datasketch方法处理top n,改回aggregate和window
This commit is contained in:
@@ -228,8 +228,8 @@ public class Toptask {
|
||||
case 2:
|
||||
//datasketch
|
||||
|
||||
//clientip聚合TOP
|
||||
|
||||
//clientip聚合TOP
|
||||
SingleOutputStreamOperator<Entity> clientipdStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
@@ -237,12 +237,16 @@ public class Toptask {
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStream2 = clientipdStream2.keyBy(new groupBySelector("common_client_ip"))
|
||||
|
||||
|
||||
clientipdStream2.keyBy(new groupBySelector("common_client_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new DatasketchMetricsCalculate(TOP_LIMIT, "common_client_ip")).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> windoweddStream2 = windowedStream2.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
windoweddStream2.addSink(getKafkaSink("TOP-CLIENT-IP")).setParallelism(3);
|
||||
.aggregate(new DatasketchForMetricsAggregate2("common_client_ip"), new UserCountWindowResult6())
|
||||
// .setParallelism(TASK_PARALLELISM)
|
||||
// .print();
|
||||
.addSink(getKafkaSink("TOP-CLIENT-IP")).setParallelism(3);
|
||||
|
||||
|
||||
|
||||
//serverip聚合TOP
|
||||
|
||||
@@ -253,12 +257,14 @@ public class Toptask {
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStreamForServerIp2 = serveripdStream2.keyBy(new groupBySelector("common_server_ip"))
|
||||
|
||||
serveripdStream2.keyBy(new groupBySelector("common_server_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new DatasketchMetricsCalculate(TOP_LIMIT, "common_server_ip")).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> windoweddStreamForServerIp2 = windowedStreamForServerIp2.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
windoweddStreamForServerIp2.addSink(getKafkaSink("TOP-SERVER-IP")).setParallelism(3);
|
||||
.aggregate(new DatasketchForMetricsAggregate2("common_server_ip"), new UserCountWindowResult6())
|
||||
// .print();
|
||||
.addSink(getKafkaSink("TOP-SERVER-IP")).setParallelism(3);
|
||||
|
||||
|
||||
|
||||
|
||||
//common_internal_ip聚合TOP
|
||||
@@ -269,13 +275,16 @@ public class Toptask {
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStreamForInternal2 = internalStream2.keyBy(new groupBySelector("common_internal_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new DatasketchMetricsCalculate(TOP_LIMIT, "common_internal_ip")).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> WindoweddStreamForInternal2 = windowedStreamForInternal2.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
WindoweddStreamForInternal2.addSink(getKafkaSink("TOP-INTERNAL-HOST")).setParallelism(3);
|
||||
|
||||
|
||||
internalStream2.keyBy(new groupBySelector("common_internal_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.aggregate(new DatasketchForMetricsAggregate2("common_internal_ip"), new UserCountWindowResult6())
|
||||
// .print();
|
||||
.addSink(getKafkaSink("TOP-INTERNAL-HOST")).setParallelism(3);
|
||||
//
|
||||
//
|
||||
//
|
||||
//common_external_ip聚合TOP
|
||||
|
||||
SingleOutputStreamOperator<Entity> externalStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@@ -285,12 +294,14 @@ public class Toptask {
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStreamForExternal2 = externalStream2.keyBy(new groupBySelector("common_external_ip"))
|
||||
|
||||
externalStream2.keyBy(new groupBySelector("common_external_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new DatasketchMetricsCalculate(TOP_LIMIT, "common_external_ip")).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> WindoweddStreamForExternal2 = windowedStreamForExternal2.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
WindoweddStreamForExternal2.addSink(getKafkaSink("TOP-EXTERNAL-HOST")).setParallelism(3);
|
||||
.aggregate(new DatasketchForMetricsAggregate2("common_external_ip"), new UserCountWindowResult6())
|
||||
// .print();
|
||||
.addSink(getKafkaSink("TOP-EXTERNAL-HOST")).setParallelism(3);
|
||||
|
||||
|
||||
|
||||
//http_domain聚合TOP
|
||||
|
||||
@@ -301,13 +312,17 @@ public class Toptask {
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStreamForDomain2 = domainStream2.keyBy(new groupBySelector("http_domain"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new DatasketchMetricsCalculate(TOP_LIMIT, "http_domain")).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> WindoweddStreamForDomain2 = windowedStreamForDomain2.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
WindoweddStreamForDomain2.addSink(getKafkaSink("TOP-WEBSITE-DOMAIN")).setParallelism(3);
|
||||
|
||||
|
||||
domainStream2.keyBy(new groupBySelector("http_domain"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.aggregate(new DatasketchForMetricsAggregate2("http_domain"), new UserCountWindowResult6())
|
||||
// .print();
|
||||
.addSink(getKafkaSink("TOP-WEBSITE-DOMAIN")).setParallelism(3);
|
||||
//
|
||||
//
|
||||
//
|
||||
//common_subscriber_id聚合TOP
|
||||
SingleOutputStreamOperator<Entity> userStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
@@ -315,13 +330,15 @@ public class Toptask {
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
//common_subscriber_id聚合TOP
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStreamForUser2 = userStream2.keyBy(new groupBySelector("common_subscriber_id"))
|
||||
|
||||
userStream2.keyBy(new groupBySelector("common_subscriber_id"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new DatasketchMetricsCalculate(TOP_LIMIT, "common_subscriber_id")).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> WindoweddStreamForUser2 = windowedStreamForUser2.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
WindoweddStreamForUser2.addSink(getKafkaSink("TOP-USER")).setParallelism(3);
|
||||
.aggregate(new DatasketchForMetricsAggregate2("common_subscriber_id"), new UserCountWindowResult6())
|
||||
// .print();
|
||||
.addSink(getKafkaSink("TOP-USER")).setParallelism(3);
|
||||
//
|
||||
|
||||
|
||||
|
||||
|
||||
//common_app_label聚合求全量
|
||||
@@ -336,7 +353,9 @@ public class Toptask {
|
||||
|
||||
appNameStream2.keyBy(new groupBySelector("common_app_label"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new metricsCalculateForApp()).addSink(getKafkaSink("TRAFFIC-APP-STAT")).setParallelism(TASK_PARALLELISM);
|
||||
.reduce(new metricsAggregationReduce(), new metricsCalculateForApp())
|
||||
.addSink(getKafkaSink("TRAFFIC-APP-STAT"))
|
||||
.setParallelism(TASK_PARALLELISM);
|
||||
|
||||
|
||||
|
||||
@@ -349,12 +368,14 @@ public class Toptask {
|
||||
}).assignTimestampsAndWatermarks(strategyForSecurity);
|
||||
|
||||
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStreamForUrl2 = UrlStream2.keyBy(new twoKeySelector())
|
||||
|
||||
UrlStream2.keyBy(new twoKeySelector())
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new UrlAggregationReduce(), new DatasketchUrlCalculate(URL_TOP_LIMIT)).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> WindoweddStreamForUrl2 = windowedStreamForUrl2.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItemsForUrl(URL_TOP_LIMIT)).setParallelism(1);
|
||||
WindoweddStreamForUrl2.addSink(getKafkaSink("TOP-URLS")).setParallelism(3);
|
||||
.aggregate(new DatasketchForUrlAggregate2(), new UserCountWindowResult7())
|
||||
// .print();
|
||||
.addSink(getKafkaSink("TOP-URLS")).setParallelism(3);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -363,6 +384,168 @@ public class Toptask {
|
||||
|
||||
break;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// clientipdStream2.windowAll(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .aggregate(new DatasketchForMetricsAggregate("clientIpSession"), new UserCountWindowResult5())
|
||||
//// .print();
|
||||
// .addSink(getKafkaSink("TOP-CLIENT-IP")).setParallelism(3);
|
||||
|
||||
// clientipdStream2.windowAll(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .aggregate(new DatasketchForMetricsAggregate("clientIpPkt"), new UserCountWindowResult5())
|
||||
//// .print();
|
||||
// .addSink(getKafkaSink("TOP-CLIENT-IP")).setParallelism(3);
|
||||
//
|
||||
//
|
||||
// clientipdStream2.windowAll(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .aggregate(new DatasketchForMetricsAggregate("clientIpByte"), new UserCountWindowResult5())
|
||||
//// .print();
|
||||
// .addSink(getKafkaSink("TOP-CLIENT-IP")).setParallelism(3);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//
|
||||
// //clientip聚合TOP
|
||||
//
|
||||
// SingleOutputStreamOperator<Entity> clientipdStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
// @Override
|
||||
// public boolean filter(Entity value) throws Exception {
|
||||
// return "IPv6_TCP".equals(value.getCommon_l4_protocol()) || "IPv4_TCP".equals(value.getCommon_l4_protocol());
|
||||
// }
|
||||
// }).assignTimestampsAndWatermarks(strategyForSession);
|
||||
//
|
||||
// SingleOutputStreamOperator<ResultEntity> windowedStream2 = clientipdStream2.keyBy(new groupBySelector("common_client_ip"))
|
||||
// .window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .reduce(new metricsAggregationReduce(), new DatasketchMetricsCalculate(TOP_LIMIT, "common_client_ip")).setParallelism(TASK_PARALLELISM);
|
||||
// DataStream<String> windoweddStream2 = windowedStream2.keyBy(new oneKeySelector())
|
||||
// .process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
// windoweddStream2.addSink(getKafkaSink("TOP-CLIENT-IP")).setParallelism(3);
|
||||
//
|
||||
// //serverip聚合TOP
|
||||
//
|
||||
// SingleOutputStreamOperator<Entity> serveripdStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
// @Override
|
||||
// public boolean filter(Entity value) throws Exception {
|
||||
// return "IPv6_TCP".equals(value.getCommon_l4_protocol()) || "IPv4_TCP".equals(value.getCommon_l4_protocol());
|
||||
// }
|
||||
// }).assignTimestampsAndWatermarks(strategyForSession);
|
||||
//
|
||||
// SingleOutputStreamOperator<ResultEntity> windowedStreamForServerIp2 = serveripdStream2.keyBy(new groupBySelector("common_server_ip"))
|
||||
// .window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .reduce(new metricsAggregationReduce(), new DatasketchMetricsCalculate(TOP_LIMIT, "common_server_ip")).setParallelism(TASK_PARALLELISM);
|
||||
// DataStream<String> windoweddStreamForServerIp2 = windowedStreamForServerIp2.keyBy(new oneKeySelector())
|
||||
// .process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
// windoweddStreamForServerIp2.addSink(getKafkaSink("TOP-SERVER-IP")).setParallelism(3);
|
||||
//
|
||||
//
|
||||
// //common_internal_ip聚合TOP
|
||||
// SingleOutputStreamOperator<Entity> internalStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
// @Override
|
||||
// public boolean filter(Entity value) throws Exception {
|
||||
// return StringUtil.isNotEmpty(value.getCommon_internal_ip());
|
||||
// }
|
||||
// }).assignTimestampsAndWatermarks(strategyForSession);
|
||||
//
|
||||
// SingleOutputStreamOperator<ResultEntity> windowedStreamForInternal2 = internalStream2.keyBy(new groupBySelector("common_internal_ip"))
|
||||
// .window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .reduce(new metricsAggregationReduce(), new DatasketchMetricsCalculate(TOP_LIMIT, "common_internal_ip")).setParallelism(TASK_PARALLELISM);
|
||||
// DataStream<String> WindoweddStreamForInternal2 = windowedStreamForInternal2.keyBy(new oneKeySelector())
|
||||
// .process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
// WindoweddStreamForInternal2.addSink(getKafkaSink("TOP-INTERNAL-HOST")).setParallelism(3);
|
||||
//
|
||||
// //common_external_ip聚合TOP
|
||||
//
|
||||
// SingleOutputStreamOperator<Entity> externalStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
// @Override
|
||||
// public boolean filter(Entity value) throws Exception {
|
||||
// return StringUtil.isNotEmpty(value.getCommon_external_ip());
|
||||
// }
|
||||
// }).assignTimestampsAndWatermarks(strategyForSession);
|
||||
//
|
||||
// SingleOutputStreamOperator<ResultEntity> windowedStreamForExternal2 = externalStream2.keyBy(new groupBySelector("common_external_ip"))
|
||||
// .window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .reduce(new metricsAggregationReduce(), new DatasketchMetricsCalculate(TOP_LIMIT, "common_external_ip")).setParallelism(TASK_PARALLELISM);
|
||||
// DataStream<String> WindoweddStreamForExternal2 = windowedStreamForExternal2.keyBy(new oneKeySelector())
|
||||
// .process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
// WindoweddStreamForExternal2.addSink(getKafkaSink("TOP-EXTERNAL-HOST")).setParallelism(3);
|
||||
//
|
||||
// //http_domain聚合TOP
|
||||
//
|
||||
// SingleOutputStreamOperator<Entity> domainStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
// @Override
|
||||
// public boolean filter(Entity value) throws Exception {
|
||||
// return StringUtil.isNotEmpty(value.getHttp_domain());
|
||||
// }
|
||||
// }).assignTimestampsAndWatermarks(strategyForSession);
|
||||
//
|
||||
// SingleOutputStreamOperator<ResultEntity> windowedStreamForDomain2 = domainStream2.keyBy(new groupBySelector("http_domain"))
|
||||
// .window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .reduce(new metricsAggregationReduce(), new DatasketchMetricsCalculate(TOP_LIMIT, "http_domain")).setParallelism(TASK_PARALLELISM);
|
||||
// DataStream<String> WindoweddStreamForDomain2 = windowedStreamForDomain2.keyBy(new oneKeySelector())
|
||||
// .process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
// WindoweddStreamForDomain2.addSink(getKafkaSink("TOP-WEBSITE-DOMAIN")).setParallelism(3);
|
||||
//
|
||||
// SingleOutputStreamOperator<Entity> userStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
// @Override
|
||||
// public boolean filter(Entity value) throws Exception {
|
||||
// return StringUtil.isNotEmpty(value.getCommon_subscriber_id());
|
||||
// }
|
||||
// }).assignTimestampsAndWatermarks(strategyForSession);
|
||||
//
|
||||
// //common_subscriber_id聚合TOP
|
||||
// SingleOutputStreamOperator<ResultEntity> windowedStreamForUser2 = userStream2.keyBy(new groupBySelector("common_subscriber_id"))
|
||||
// .window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .reduce(new metricsAggregationReduce(), new DatasketchMetricsCalculate(TOP_LIMIT, "common_subscriber_id")).setParallelism(TASK_PARALLELISM);
|
||||
// DataStream<String> WindoweddStreamForUser2 = windowedStreamForUser2.keyBy(new oneKeySelector())
|
||||
// .process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
// WindoweddStreamForUser2.addSink(getKafkaSink("TOP-USER")).setParallelism(3);
|
||||
//
|
||||
//
|
||||
// //common_app_label聚合求全量
|
||||
// SingleOutputStreamOperator<Entity> appNameStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
// @Override
|
||||
// public boolean filter(Entity value) throws Exception {
|
||||
// return StringUtil.isNotEmpty(value.getCommon_app_label());
|
||||
// }
|
||||
// }).assignTimestampsAndWatermarks(strategyForSession);
|
||||
//
|
||||
//
|
||||
//
|
||||
// appNameStream2.keyBy(new groupBySelector("common_app_label"))
|
||||
// .window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .reduce(new metricsAggregationReduce(), new metricsCalculateForApp()).addSink(getKafkaSink("TRAFFIC-APP-STAT")).setParallelism(TASK_PARALLELISM);
|
||||
//
|
||||
//
|
||||
//
|
||||
// //url聚合session求top
|
||||
// SingleOutputStreamOperator<UrlEntity> UrlStream2 = inputForUrl.filter(new FilterFunction<UrlEntity>() {
|
||||
// @Override
|
||||
// public boolean filter(UrlEntity value) throws Exception {
|
||||
// return StringUtil.isNotEmpty(value.getHttp_url());
|
||||
// }
|
||||
// }).assignTimestampsAndWatermarks(strategyForSecurity);
|
||||
//
|
||||
//
|
||||
// SingleOutputStreamOperator<ResultEntity> windowedStreamForUrl2 = UrlStream2.keyBy(new twoKeySelector())
|
||||
// .window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .reduce(new UrlAggregationReduce(), new DatasketchUrlCalculate(URL_TOP_LIMIT)).setParallelism(TASK_PARALLELISM);
|
||||
// DataStream<String> WindoweddStreamForUrl2 = windowedStreamForUrl2.keyBy(new oneKeySelector())
|
||||
// .process(new TopNHotItemsForUrl(URL_TOP_LIMIT)).setParallelism(1);
|
||||
// WindoweddStreamForUrl2.addSink(getKafkaSink("TOP-URLS")).setParallelism(3);
|
||||
//
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// break;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user