重构版本
This commit is contained in:
@@ -1,18 +1,19 @@
|
||||
package com.galaxy.tsg;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.galaxy.tsg.function.*;
|
||||
import com.galaxy.tsg.pojo.Entity;
|
||||
import com.galaxy.tsg.pojo.ResultEntity;
|
||||
import com.galaxy.tsg.pojo.UrlEntity;
|
||||
import com.alibaba.fastjson2.JSON;
|
||||
import com.galaxy.tsg.function.metricsAggregationReduce;
|
||||
import com.galaxy.tsg.function.metricsCalculate;
|
||||
import com.galaxy.tsg.function.topnHotItems;
|
||||
import com.galaxy.tsg.pojo.resultEntity;
|
||||
import com.galaxy.tsg.pojo.sessionEntity;
|
||||
import com.galaxy.tsg.pojo.transformEntity;
|
||||
import com.zdjizhi.utils.StringUtil;
|
||||
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
|
||||
import org.apache.flink.api.common.functions.FilterFunction;
|
||||
import org.apache.flink.api.common.functions.MapFunction;
|
||||
import org.apache.flink.api.java.functions.KeySelector;
|
||||
import org.apache.flink.api.java.tuple.Tuple1;
|
||||
import org.apache.flink.api.java.tuple.Tuple2;
|
||||
import org.apache.flink.api.java.tuple.Tuple4;
|
||||
import org.apache.flink.api.java.tuple.Tuple5;
|
||||
import org.apache.flink.streaming.api.datastream.DataStream;
|
||||
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
|
||||
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||
@@ -22,664 +23,198 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import static com.galaxy.tsg.config.commonConfig.*;
|
||||
import static com.galaxy.tsg.util.KafkaUtils.*;
|
||||
import static com.galaxy.tsg.util.kafkaUtils.getKafkaConsumer;
|
||||
import static com.galaxy.tsg.util.kafkaUtils.getKafkaSink;
|
||||
|
||||
public class Toptask {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(Toptask.class);
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
//1.创建执行环境
|
||||
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
|
||||
//指定使用事件时间
|
||||
//env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
|
||||
|
||||
|
||||
DataStream<String> sourceForSession = env.addSource(getKafkaConsumer("SESSION-RECORD-COMPLETED")).setParallelism(KAFKA_CONSUMER_PARALLELISM);
|
||||
WatermarkStrategy<Entity> strategyForSession = WatermarkStrategy
|
||||
.<Entity>forBoundedOutOfOrderness(Duration.ofSeconds(WATERMARK_TIME))
|
||||
.withTimestampAssigner((Entity, timestamp) -> Entity.getCommon_recv_time() * 1000);
|
||||
|
||||
List<String> topics = new LinkedList<>();
|
||||
topics.add("SECURITY-EVENT-COMPLETED");
|
||||
topics.add("PROXY-EVENT-COMPLETED");
|
||||
DataStream<String> sourceForUrl = env.addSource(getKafkaConsumerLists(topics)).setParallelism(KAFKA_CONSUMER_TOPURL_PARALLELISM);
|
||||
WatermarkStrategy<UrlEntity> strategyForSecurity = WatermarkStrategy
|
||||
.<UrlEntity>forBoundedOutOfOrderness(Duration.ofSeconds(WATERMARK_TIME))
|
||||
.withTimestampAssigner((UrlEntity, timestamp) -> UrlEntity.getCommon_recv_time() * 1000);
|
||||
DataStream<String> sourceForSession = env.addSource(getKafkaConsumer(KAFKA_CONSUMER_TOPIC)).setParallelism(KAFKA_CONSUMER_PARALLELISM);
|
||||
WatermarkStrategy<transformEntity> strategyForSession = WatermarkStrategy
|
||||
.<transformEntity>forBoundedOutOfOrderness(Duration.ofSeconds(WATERMARK_TIME))
|
||||
.withTimestampAssigner((transformEntity, timestamp) -> transformEntity.getTimestamp() * 1000);
|
||||
|
||||
|
||||
SingleOutputStreamOperator<Entity> inputForSession = sourceForSession.map(new MapFunction<String, Entity>() {
|
||||
SingleOutputStreamOperator<transformEntity> inputForSession = sourceForSession.map(new MapFunction<String, transformEntity>() {
|
||||
@Override
|
||||
public Entity map(String message) {
|
||||
Entity entity = new Entity();
|
||||
public transformEntity map(String message) {
|
||||
transformEntity transformEntity = new transformEntity();
|
||||
try {
|
||||
entity = JSON.parseObject(message, Entity.class);
|
||||
sessionEntity sessionEntity = JSON.parseObject(message, com.galaxy.tsg.pojo.sessionEntity.class);
|
||||
transformEntity.setServer_ip(sessionEntity.getCommon_server_ip());
|
||||
transformEntity.setClient_ip(sessionEntity.getCommon_client_ip());
|
||||
|
||||
transformEntity.setSubscriber_id(sessionEntity.getCommon_subscriber_id());
|
||||
transformEntity.setFqdn(sessionEntity.getCommon_server_fqdn());
|
||||
transformEntity.setExternal_ip(sessionEntity.getCommon_external_ip());
|
||||
transformEntity.setInternal_ip(sessionEntity.getCommon_internal_ip());
|
||||
transformEntity.setDomain(sessionEntity.getHttp_domain());
|
||||
transformEntity.setDevice_group(sessionEntity.getCommon_device_group());
|
||||
transformEntity.setDevice_id(sessionEntity.getCommon_device_id());
|
||||
transformEntity.setData_center(sessionEntity.getCommon_data_center());
|
||||
transformEntity.setVsys_id(sessionEntity.getCommon_vsys_id());
|
||||
transformEntity.setTimestamp(sessionEntity.getCommon_recv_time());
|
||||
transformEntity.setSessions(sessionEntity.getCommon_sessions());
|
||||
|
||||
transformEntity.setL4_protocol(sessionEntity.getCommon_l4_protocol());
|
||||
|
||||
|
||||
if ((8L & sessionEntity.getCommon_flags()) == 8L) {
|
||||
|
||||
transformEntity.setOut_bytes(sessionEntity.getCommon_c2s_byte_num());
|
||||
transformEntity.setOut_pkts(sessionEntity.getCommon_c2s_pkt_num());
|
||||
transformEntity.setIn_bytes(sessionEntity.getCommon_s2c_byte_num());
|
||||
transformEntity.setIn_pkts(sessionEntity.getCommon_s2c_pkt_num());
|
||||
|
||||
} else {
|
||||
transformEntity.setOut_bytes(sessionEntity.getCommon_s2c_byte_num());
|
||||
transformEntity.setOut_pkts(sessionEntity.getCommon_s2c_pkt_num());
|
||||
transformEntity.setIn_bytes(sessionEntity.getCommon_c2s_byte_num());
|
||||
transformEntity.setIn_pkts(sessionEntity.getCommon_c2s_pkt_num());
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
} catch (Exception e) {
|
||||
LOG.error("Entity Parsing ERROR");
|
||||
entity.setIfError(1);
|
||||
transformEntity.setIfError(1);
|
||||
}
|
||||
return entity;
|
||||
return transformEntity;
|
||||
}
|
||||
}).filter(new FilterFunction<Entity>() {
|
||||
}).filter(new FilterFunction<transformEntity>() {
|
||||
@Override
|
||||
public boolean filter(Entity entity) throws Exception {
|
||||
public boolean filter(transformEntity entity) throws Exception {
|
||||
|
||||
return entity.ifError != 1;
|
||||
}
|
||||
});
|
||||
|
||||
SingleOutputStreamOperator<UrlEntity> inputForUrl = sourceForUrl.map(new MapFunction<String, UrlEntity>() {
|
||||
|
||||
//clientip聚合TOP
|
||||
|
||||
SingleOutputStreamOperator<transformEntity> clientipdStream = inputForSession.filter(new FilterFunction<transformEntity>() {
|
||||
@Override
|
||||
public UrlEntity map(String message) {
|
||||
UrlEntity entity = new UrlEntity();
|
||||
try {
|
||||
entity = JSON.parseObject(message, UrlEntity.class);
|
||||
|
||||
} catch (Exception e) {
|
||||
LOG.error("Entity Parsing ERROR");
|
||||
entity.setIfError(1);
|
||||
}
|
||||
return entity;
|
||||
public boolean filter(transformEntity value) throws Exception {
|
||||
return "IPv6_TCP".equals(value.getL4_protocol()) || "IPv4_TCP".equals(value.getL4_protocol());
|
||||
}
|
||||
}).filter(new FilterFunction<UrlEntity>() {
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<resultEntity> windowedStream = clientipdStream.keyBy(new groupBySelector("client_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new metricsCalculate(TOP_LIMIT, "client_ip")).setParallelism(TASK_PARALLELISM).name("client_ip");;
|
||||
DataStream<String> Stream = windowedStream.keyBy(new oneKeySelector())
|
||||
.process(new topnHotItems(TOP_LIMIT)).setParallelism(ORDERBY_PARALLELISM);
|
||||
Stream.addSink(getKafkaSink(KAFKA_PRODUCER_TOPIC)).setParallelism(SINK_PARALLELISM);
|
||||
|
||||
|
||||
//serverip聚合TOP
|
||||
|
||||
|
||||
SingleOutputStreamOperator<transformEntity> serveripdStream = inputForSession.filter(new FilterFunction<transformEntity>() {
|
||||
@Override
|
||||
public boolean filter(UrlEntity entity) throws Exception {
|
||||
|
||||
return entity.ifError != 1;
|
||||
public boolean filter(transformEntity value) throws Exception {
|
||||
return "IPv6_TCP".equals(value.getL4_protocol()) || "IPv4_TCP".equals(value.getL4_protocol());
|
||||
}
|
||||
});
|
||||
|
||||
switch (TMP_TEST_TYPE) {
|
||||
case 1:
|
||||
|
||||
//clientip聚合TOP
|
||||
|
||||
SingleOutputStreamOperator<Entity> clientipdStream = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return "IPv6_TCP".equals(value.getCommon_l4_protocol()) || "IPv4_TCP".equals(value.getCommon_l4_protocol());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStream = clientipdStream.keyBy(new groupBySelector("common_client_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new metricsCalculate(TOP_LIMIT, "common_client_ip")).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> windoweddStream = windowedStream.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
windoweddStream.addSink(getKafkaSink("TOP-CLIENT-IP")).setParallelism(3);
|
||||
|
||||
|
||||
|
||||
|
||||
//serverip聚合TOP
|
||||
|
||||
SingleOutputStreamOperator<Entity> serveripdStream = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return "IPv6_TCP".equals(value.getCommon_l4_protocol()) || "IPv4_TCP".equals(value.getCommon_l4_protocol());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStreamForServerIp = serveripdStream.keyBy(new groupBySelector("common_server_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new metricsCalculate(TOP_LIMIT, "common_server_ip")).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> windoweddStreamForServerIp = windowedStreamForServerIp.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
windoweddStreamForServerIp.addSink(getKafkaSink("TOP-SERVER-IP")).setParallelism(3);
|
||||
|
||||
|
||||
//common_internal_ip聚合TOP
|
||||
SingleOutputStreamOperator<Entity> internalStream = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getCommon_internal_ip());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStreamForInternal = internalStream.keyBy(new groupBySelector("common_internal_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new metricsCalculate(TOP_LIMIT, "common_internal_ip")).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> WindoweddStreamForInternal = windowedStreamForInternal.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
WindoweddStreamForInternal.addSink(getKafkaSink("TOP-INTERNAL-HOST")).setParallelism(3);
|
||||
|
||||
//common_external_ip聚合TOP
|
||||
|
||||
SingleOutputStreamOperator<Entity> externalStream = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getCommon_external_ip());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStreamForExternal = externalStream.keyBy(new groupBySelector("common_external_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new metricsCalculate(TOP_LIMIT, "common_external_ip")).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> WindoweddStreamForExternal = windowedStreamForExternal.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
WindoweddStreamForExternal.addSink(getKafkaSink("TOP-EXTERNAL-HOST")).setParallelism(3);
|
||||
|
||||
//http_domain聚合TOP
|
||||
|
||||
SingleOutputStreamOperator<Entity> domainStream = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getHttp_domain());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStreamForDomain = domainStream.keyBy(new groupBySelector("http_domain"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new metricsCalculate(TOP_LIMIT, "http_domain")).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> WindoweddStreamForDomain = windowedStreamForDomain.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
WindoweddStreamForDomain.addSink(getKafkaSink("TOP-WEBSITE-DOMAIN")).setParallelism(3);
|
||||
|
||||
SingleOutputStreamOperator<Entity> userStream = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getCommon_subscriber_id());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
//common_subscriber_id聚合TOP
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStreamForUser = userStream.keyBy(new groupBySelector("common_subscriber_id"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new metricsCalculate(TOP_LIMIT, "common_subscriber_id")).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> WindoweddStreamForUser = windowedStreamForUser.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
WindoweddStreamForUser.addSink(getKafkaSink("TOP-USER")).setParallelism(3);
|
||||
|
||||
SingleOutputStreamOperator<Entity> appNameStream = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getCommon_app_label());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
|
||||
//common_app_label聚合求全量
|
||||
appNameStream.keyBy(new groupBySelector("common_app_label"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new metricsCalculateForApp()).addSink(getKafkaSink("TRAFFIC-APP-STAT")).setParallelism(TASK_PARALLELISM);
|
||||
|
||||
|
||||
SingleOutputStreamOperator<UrlEntity> UrlStream = inputForUrl.filter(new FilterFunction<UrlEntity>() {
|
||||
@Override
|
||||
public boolean filter(UrlEntity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getHttp_url());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSecurity);
|
||||
|
||||
//url聚合session求top
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStreamForUrl = UrlStream.keyBy(new twoKeySelector())
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new UrlAggregationReduce(), new metricsCalculateForUrl(URL_TOP_LIMIT)).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> WindoweddStreamForUrl = windowedStreamForUrl.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItemsForUrl(URL_TOP_LIMIT)).setParallelism(1);
|
||||
WindoweddStreamForUrl.addSink(getKafkaSink("TOP-URLS")).setParallelism(3);
|
||||
|
||||
break;
|
||||
case 2:
|
||||
//datasketch
|
||||
|
||||
|
||||
//clientip聚合TOP
|
||||
SingleOutputStreamOperator<Entity> clientipdStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return "IPv6_TCP".equals(value.getCommon_l4_protocol()) || "IPv4_TCP".equals(value.getCommon_l4_protocol());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
|
||||
|
||||
clientipdStream2.keyBy(new groupBySelector("common_client_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.aggregate(new DatasketchForMetricsAggregate2("common_client_ip"), new UserCountWindowResult6())
|
||||
// .setParallelism(TASK_PARALLELISM)
|
||||
// .print();
|
||||
.addSink(getKafkaSink("TOP-CLIENT-IP")).setParallelism(3);
|
||||
|
||||
|
||||
|
||||
//serverip聚合TOP
|
||||
|
||||
SingleOutputStreamOperator<Entity> serveripdStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return "IPv6_TCP".equals(value.getCommon_l4_protocol()) || "IPv4_TCP".equals(value.getCommon_l4_protocol());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
|
||||
serveripdStream2.keyBy(new groupBySelector("common_server_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.aggregate(new DatasketchForMetricsAggregate2("common_server_ip"), new UserCountWindowResult6())
|
||||
// .print();
|
||||
.addSink(getKafkaSink("TOP-SERVER-IP")).setParallelism(3);
|
||||
|
||||
|
||||
|
||||
|
||||
//common_internal_ip聚合TOP
|
||||
SingleOutputStreamOperator<Entity> internalStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getCommon_internal_ip());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
|
||||
|
||||
internalStream2.keyBy(new groupBySelector("common_internal_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.aggregate(new DatasketchForMetricsAggregate2("common_internal_ip"), new UserCountWindowResult6())
|
||||
// .print();
|
||||
.addSink(getKafkaSink("TOP-INTERNAL-HOST")).setParallelism(3);
|
||||
//
|
||||
//
|
||||
//
|
||||
//common_external_ip聚合TOP
|
||||
|
||||
SingleOutputStreamOperator<Entity> externalStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getCommon_external_ip());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
|
||||
externalStream2.keyBy(new groupBySelector("common_external_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.aggregate(new DatasketchForMetricsAggregate2("common_external_ip"), new UserCountWindowResult6())
|
||||
// .print();
|
||||
.addSink(getKafkaSink("TOP-EXTERNAL-HOST")).setParallelism(3);
|
||||
|
||||
|
||||
|
||||
//http_domain聚合TOP
|
||||
|
||||
SingleOutputStreamOperator<Entity> domainStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getHttp_domain());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
|
||||
|
||||
domainStream2.keyBy(new groupBySelector("http_domain"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.aggregate(new DatasketchForMetricsAggregate2("http_domain"), new UserCountWindowResult6())
|
||||
// .print();
|
||||
.addSink(getKafkaSink("TOP-WEBSITE-DOMAIN")).setParallelism(3);
|
||||
//
|
||||
//
|
||||
//
|
||||
//common_subscriber_id聚合TOP
|
||||
SingleOutputStreamOperator<Entity> userStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getCommon_subscriber_id());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
|
||||
userStream2.keyBy(new groupBySelector("common_subscriber_id"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.aggregate(new DatasketchForMetricsAggregate2("common_subscriber_id"), new UserCountWindowResult6())
|
||||
// .print();
|
||||
.addSink(getKafkaSink("TOP-USER")).setParallelism(3);
|
||||
//
|
||||
|
||||
|
||||
|
||||
|
||||
//common_app_label聚合求全量
|
||||
SingleOutputStreamOperator<Entity> appNameStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getCommon_app_label());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
|
||||
|
||||
appNameStream2.keyBy(new groupBySelector("common_app_label"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new metricsCalculateForApp())
|
||||
.addSink(getKafkaSink("TRAFFIC-APP-STAT"))
|
||||
.setParallelism(TASK_PARALLELISM);
|
||||
|
||||
|
||||
|
||||
//url聚合session求top
|
||||
SingleOutputStreamOperator<UrlEntity> UrlStream2 = inputForUrl.filter(new FilterFunction<UrlEntity>() {
|
||||
@Override
|
||||
public boolean filter(UrlEntity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getHttp_url());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSecurity);
|
||||
|
||||
|
||||
|
||||
UrlStream2.keyBy(new twoKeySelector())
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.aggregate(new DatasketchForUrlAggregate2(), new UserCountWindowResult7())
|
||||
// .print();
|
||||
.addSink(getKafkaSink("TOP-URLS")).setParallelism(3);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
break;
|
||||
|
||||
case 3:
|
||||
|
||||
SingleOutputStreamOperator<Entity> clientipdStream3 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return "IPv6_TCP".equals(value.getCommon_l4_protocol()) || "IPv4_TCP".equals(value.getCommon_l4_protocol());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStream3 = clientipdStream3.keyBy(new groupBySelector("common_client_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.aggregate(new metricsAggregation(TOP_LIMIT), new metricsCalculateTest(TOP_LIMIT, "common_client_ip")).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> windoweddStream3 = windowedStream3.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItems(TOP_LIMIT)).setParallelism(1);
|
||||
windoweddStream3.addSink(getKafkaSink("TOP-CLIENT-IP")).setParallelism(1);
|
||||
|
||||
|
||||
|
||||
SingleOutputStreamOperator<Entity> serveripdStream3 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return "IPv6_TCP".equals(value.getCommon_l4_protocol()) || "IPv4_TCP".equals(value.getCommon_l4_protocol());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStreamForServerIp3 = serveripdStream3.keyBy(new groupBySelector("common_server_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.aggregate(new metricsAggregation(TOP_LIMIT), new metricsCalculateTest(TOP_LIMIT, "common_server_ip")).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> windoweddStreamForServerIp3 = windowedStreamForServerIp3.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItems(TOP_LIMIT)).setParallelism(1);
|
||||
windoweddStreamForServerIp3.addSink(getKafkaSink("TOP-SERVER-IP")).setParallelism(1);
|
||||
|
||||
|
||||
//common_internal_ip聚合TOP
|
||||
SingleOutputStreamOperator<Entity> internalStream3 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getCommon_internal_ip());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStreamForInternal3 = internalStream3.keyBy(new groupBySelector("common_internal_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.aggregate(new metricsAggregation(TOP_LIMIT), new metricsCalculateTest(TOP_LIMIT, "common_internal_ip")).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> WindoweddStreamForInternal3 = windowedStreamForInternal3.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItems(TOP_LIMIT)).setParallelism(1);
|
||||
WindoweddStreamForInternal3.addSink(getKafkaSink("TOP-INTERNAL-HOST")).setParallelism(1);
|
||||
|
||||
//common_external_ip聚合TOP
|
||||
|
||||
SingleOutputStreamOperator<Entity> externalStream3 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getCommon_external_ip());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStreamForExternal3= externalStream3.keyBy(new groupBySelector("common_external_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.aggregate(new metricsAggregation(TOP_LIMIT), new metricsCalculateTest(TOP_LIMIT, "common_external_ip")).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> WindoweddStreamForExternal3 = windowedStreamForExternal3.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItems(TOP_LIMIT)).setParallelism(1);
|
||||
WindoweddStreamForExternal3.addSink(getKafkaSink("TOP-EXTERNAL-HOST")).setParallelism(1);
|
||||
|
||||
//http_domain聚合TOP
|
||||
|
||||
SingleOutputStreamOperator<Entity> domainStream3 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getHttp_domain());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStreamForDomain3 = domainStream3.keyBy(new groupBySelector("http_domain"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.aggregate(new metricsAggregation(TOP_LIMIT), new metricsCalculateTest(TOP_LIMIT, "http_domain")).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> WindoweddStreamForDomain3 = windowedStreamForDomain3.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItems(TOP_LIMIT)).setParallelism(1);
|
||||
WindoweddStreamForDomain3.addSink(getKafkaSink("TOP-WEBSITE-DOMAIN")).setParallelism(1);
|
||||
|
||||
SingleOutputStreamOperator<Entity> userStream3 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getCommon_subscriber_id());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
//common_subscriber_id聚合TOP
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStreamForUser3 = userStream3.keyBy(new groupBySelector("common_subscriber_id"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.aggregate(new metricsAggregation(TOP_LIMIT), new metricsCalculateTest(TOP_LIMIT, "common_subscriber_id")).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> WindoweddStreamForUser3 = windowedStreamForUser3.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItems(TOP_LIMIT)).setParallelism(1);
|
||||
WindoweddStreamForUser3.addSink(getKafkaSink("TOP-USER")).setParallelism(1);
|
||||
|
||||
SingleOutputStreamOperator<Entity> appNameStream3 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
@Override
|
||||
public boolean filter(Entity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getCommon_app_label());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
|
||||
//common_app_label聚合求全量
|
||||
appNameStream3.keyBy(new groupBySelector("common_app_label"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new metricsCalculateForApp()).addSink(getKafkaSink("TRAFFIC-APP-STAT")).setParallelism(TASK_PARALLELISM);
|
||||
|
||||
|
||||
SingleOutputStreamOperator<UrlEntity> UrlStream3 = inputForUrl.filter(new FilterFunction<UrlEntity>() {
|
||||
@Override
|
||||
public boolean filter(UrlEntity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getHttp_url());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSecurity);
|
||||
|
||||
//url聚合session求top
|
||||
SingleOutputStreamOperator<ResultEntity> windowedStreamForUrl3 = UrlStream3.keyBy(new twoKeySelector())
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new UrlAggregationReduce(), new metricsCalculateForUrl(URL_TOP_LIMIT)).setParallelism(TASK_PARALLELISM);
|
||||
DataStream<String> WindoweddStreamForUrl3 = windowedStreamForUrl3.keyBy(new oneKeySelector())
|
||||
.process(new TopNHotItemsForUrl(URL_TOP_LIMIT)).setParallelism(1);
|
||||
WindoweddStreamForUrl3.addSink(getKafkaSink("TOP-URLS")).setParallelism(1);
|
||||
|
||||
break;
|
||||
|
||||
|
||||
|
||||
// clientipdStream2.windowAll(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .aggregate(new DatasketchForMetricsAggregate("clientIpSession"), new UserCountWindowResult5())
|
||||
//// .print();
|
||||
// .addSink(getKafkaSink("TOP-CLIENT-IP")).setParallelism(3);
|
||||
|
||||
// clientipdStream2.windowAll(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .aggregate(new DatasketchForMetricsAggregate("clientIpPkt"), new UserCountWindowResult5())
|
||||
//// .print();
|
||||
// .addSink(getKafkaSink("TOP-CLIENT-IP")).setParallelism(3);
|
||||
//
|
||||
//
|
||||
// clientipdStream2.windowAll(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .aggregate(new DatasketchForMetricsAggregate("clientIpByte"), new UserCountWindowResult5())
|
||||
//// .print();
|
||||
// .addSink(getKafkaSink("TOP-CLIENT-IP")).setParallelism(3);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//
|
||||
// //clientip聚合TOP
|
||||
//
|
||||
// SingleOutputStreamOperator<Entity> clientipdStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
// @Override
|
||||
// public boolean filter(Entity value) throws Exception {
|
||||
// return "IPv6_TCP".equals(value.getCommon_l4_protocol()) || "IPv4_TCP".equals(value.getCommon_l4_protocol());
|
||||
// }
|
||||
// }).assignTimestampsAndWatermarks(strategyForSession);
|
||||
//
|
||||
// SingleOutputStreamOperator<ResultEntity> windowedStream2 = clientipdStream2.keyBy(new groupBySelector("common_client_ip"))
|
||||
// .window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .reduce(new metricsAggregationReduce(), new DatasketchMetricsCalculate(TOP_LIMIT, "common_client_ip")).setParallelism(TASK_PARALLELISM);
|
||||
// DataStream<String> windoweddStream2 = windowedStream2.keyBy(new oneKeySelector())
|
||||
// .process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
// windoweddStream2.addSink(getKafkaSink("TOP-CLIENT-IP")).setParallelism(3);
|
||||
//
|
||||
// //serverip聚合TOP
|
||||
//
|
||||
// SingleOutputStreamOperator<Entity> serveripdStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
// @Override
|
||||
// public boolean filter(Entity value) throws Exception {
|
||||
// return "IPv6_TCP".equals(value.getCommon_l4_protocol()) || "IPv4_TCP".equals(value.getCommon_l4_protocol());
|
||||
// }
|
||||
// }).assignTimestampsAndWatermarks(strategyForSession);
|
||||
//
|
||||
// SingleOutputStreamOperator<ResultEntity> windowedStreamForServerIp2 = serveripdStream2.keyBy(new groupBySelector("common_server_ip"))
|
||||
// .window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .reduce(new metricsAggregationReduce(), new DatasketchMetricsCalculate(TOP_LIMIT, "common_server_ip")).setParallelism(TASK_PARALLELISM);
|
||||
// DataStream<String> windoweddStreamForServerIp2 = windowedStreamForServerIp2.keyBy(new oneKeySelector())
|
||||
// .process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
// windoweddStreamForServerIp2.addSink(getKafkaSink("TOP-SERVER-IP")).setParallelism(3);
|
||||
//
|
||||
//
|
||||
// //common_internal_ip聚合TOP
|
||||
// SingleOutputStreamOperator<Entity> internalStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
// @Override
|
||||
// public boolean filter(Entity value) throws Exception {
|
||||
// return StringUtil.isNotEmpty(value.getCommon_internal_ip());
|
||||
// }
|
||||
// }).assignTimestampsAndWatermarks(strategyForSession);
|
||||
//
|
||||
// SingleOutputStreamOperator<ResultEntity> windowedStreamForInternal2 = internalStream2.keyBy(new groupBySelector("common_internal_ip"))
|
||||
// .window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .reduce(new metricsAggregationReduce(), new DatasketchMetricsCalculate(TOP_LIMIT, "common_internal_ip")).setParallelism(TASK_PARALLELISM);
|
||||
// DataStream<String> WindoweddStreamForInternal2 = windowedStreamForInternal2.keyBy(new oneKeySelector())
|
||||
// .process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
// WindoweddStreamForInternal2.addSink(getKafkaSink("TOP-INTERNAL-HOST")).setParallelism(3);
|
||||
//
|
||||
// //common_external_ip聚合TOP
|
||||
//
|
||||
// SingleOutputStreamOperator<Entity> externalStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
// @Override
|
||||
// public boolean filter(Entity value) throws Exception {
|
||||
// return StringUtil.isNotEmpty(value.getCommon_external_ip());
|
||||
// }
|
||||
// }).assignTimestampsAndWatermarks(strategyForSession);
|
||||
//
|
||||
// SingleOutputStreamOperator<ResultEntity> windowedStreamForExternal2 = externalStream2.keyBy(new groupBySelector("common_external_ip"))
|
||||
// .window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .reduce(new metricsAggregationReduce(), new DatasketchMetricsCalculate(TOP_LIMIT, "common_external_ip")).setParallelism(TASK_PARALLELISM);
|
||||
// DataStream<String> WindoweddStreamForExternal2 = windowedStreamForExternal2.keyBy(new oneKeySelector())
|
||||
// .process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
// WindoweddStreamForExternal2.addSink(getKafkaSink("TOP-EXTERNAL-HOST")).setParallelism(3);
|
||||
//
|
||||
// //http_domain聚合TOP
|
||||
//
|
||||
// SingleOutputStreamOperator<Entity> domainStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
// @Override
|
||||
// public boolean filter(Entity value) throws Exception {
|
||||
// return StringUtil.isNotEmpty(value.getHttp_domain());
|
||||
// }
|
||||
// }).assignTimestampsAndWatermarks(strategyForSession);
|
||||
//
|
||||
// SingleOutputStreamOperator<ResultEntity> windowedStreamForDomain2 = domainStream2.keyBy(new groupBySelector("http_domain"))
|
||||
// .window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .reduce(new metricsAggregationReduce(), new DatasketchMetricsCalculate(TOP_LIMIT, "http_domain")).setParallelism(TASK_PARALLELISM);
|
||||
// DataStream<String> WindoweddStreamForDomain2 = windowedStreamForDomain2.keyBy(new oneKeySelector())
|
||||
// .process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
// WindoweddStreamForDomain2.addSink(getKafkaSink("TOP-WEBSITE-DOMAIN")).setParallelism(3);
|
||||
//
|
||||
// SingleOutputStreamOperator<Entity> userStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
// @Override
|
||||
// public boolean filter(Entity value) throws Exception {
|
||||
// return StringUtil.isNotEmpty(value.getCommon_subscriber_id());
|
||||
// }
|
||||
// }).assignTimestampsAndWatermarks(strategyForSession);
|
||||
//
|
||||
// //common_subscriber_id聚合TOP
|
||||
// SingleOutputStreamOperator<ResultEntity> windowedStreamForUser2 = userStream2.keyBy(new groupBySelector("common_subscriber_id"))
|
||||
// .window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .reduce(new metricsAggregationReduce(), new DatasketchMetricsCalculate(TOP_LIMIT, "common_subscriber_id")).setParallelism(TASK_PARALLELISM);
|
||||
// DataStream<String> WindoweddStreamForUser2 = windowedStreamForUser2.keyBy(new oneKeySelector())
|
||||
// .process(new TopNHotItems(TOP_LIMIT)).setParallelism(3);
|
||||
// WindoweddStreamForUser2.addSink(getKafkaSink("TOP-USER")).setParallelism(3);
|
||||
//
|
||||
//
|
||||
// //common_app_label聚合求全量
|
||||
// SingleOutputStreamOperator<Entity> appNameStream2 = inputForSession.filter(new FilterFunction<Entity>() {
|
||||
// @Override
|
||||
// public boolean filter(Entity value) throws Exception {
|
||||
// return StringUtil.isNotEmpty(value.getCommon_app_label());
|
||||
// }
|
||||
// }).assignTimestampsAndWatermarks(strategyForSession);
|
||||
//
|
||||
//
|
||||
//
|
||||
// appNameStream2.keyBy(new groupBySelector("common_app_label"))
|
||||
// .window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .reduce(new metricsAggregationReduce(), new metricsCalculateForApp()).addSink(getKafkaSink("TRAFFIC-APP-STAT")).setParallelism(TASK_PARALLELISM);
|
||||
//
|
||||
//
|
||||
//
|
||||
// //url聚合session求top
|
||||
// SingleOutputStreamOperator<UrlEntity> UrlStream2 = inputForUrl.filter(new FilterFunction<UrlEntity>() {
|
||||
// @Override
|
||||
// public boolean filter(UrlEntity value) throws Exception {
|
||||
// return StringUtil.isNotEmpty(value.getHttp_url());
|
||||
// }
|
||||
// }).assignTimestampsAndWatermarks(strategyForSecurity);
|
||||
//
|
||||
//
|
||||
// SingleOutputStreamOperator<ResultEntity> windowedStreamForUrl2 = UrlStream2.keyBy(new twoKeySelector())
|
||||
// .window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
// .reduce(new UrlAggregationReduce(), new DatasketchUrlCalculate(URL_TOP_LIMIT)).setParallelism(TASK_PARALLELISM);
|
||||
// DataStream<String> WindoweddStreamForUrl2 = windowedStreamForUrl2.keyBy(new oneKeySelector())
|
||||
// .process(new TopNHotItemsForUrl(URL_TOP_LIMIT)).setParallelism(1);
|
||||
// WindoweddStreamForUrl2.addSink(getKafkaSink("TOP-URLS")).setParallelism(3);
|
||||
//
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// break;
|
||||
|
||||
}
|
||||
|
||||
|
||||
env.execute("TOP-task");
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<resultEntity> windowedStreamForServerIp = serveripdStream.keyBy(new groupBySelector("server_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new metricsCalculate(TOP_LIMIT, "server_ip")).setParallelism(TASK_PARALLELISM).name("server_ip");;
|
||||
DataStream<String> StreamForServerIp = windowedStreamForServerIp.keyBy(new oneKeySelector())
|
||||
.process(new topnHotItems(TOP_LIMIT)).setParallelism(ORDERBY_PARALLELISM);
|
||||
StreamForServerIp.addSink(getKafkaSink(KAFKA_PRODUCER_TOPIC)).setParallelism(SINK_PARALLELISM);
|
||||
|
||||
|
||||
//common_internal_ip聚合TOP
|
||||
SingleOutputStreamOperator<transformEntity> internalStream = inputForSession.filter(new FilterFunction<transformEntity>() {
|
||||
@Override
|
||||
public boolean filter(transformEntity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getInternal_ip());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<resultEntity> windowedStreamForInternal = internalStream.keyBy(new groupBySelector("internal_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new metricsCalculate(TOP_LIMIT, "internal_ip")).setParallelism(TASK_PARALLELISM).name("internal_ip");;
|
||||
DataStream<String> StreamForInternal = windowedStreamForInternal.keyBy(new oneKeySelector())
|
||||
.process(new topnHotItems(TOP_LIMIT)).setParallelism(ORDERBY_PARALLELISM);
|
||||
StreamForInternal.addSink(getKafkaSink(KAFKA_PRODUCER_TOPIC)).setParallelism(SINK_PARALLELISM);
|
||||
|
||||
//common_external_ip聚合TOP
|
||||
|
||||
SingleOutputStreamOperator<transformEntity> externalStream = inputForSession.filter(new FilterFunction<transformEntity>() {
|
||||
@Override
|
||||
public boolean filter(transformEntity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getExternal_ip());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<resultEntity> windowedStreamForExternal = externalStream.keyBy(new groupBySelector("external_ip"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new metricsCalculate(TOP_LIMIT, "external_ip")).setParallelism(TASK_PARALLELISM).name("external_ip");;
|
||||
DataStream<String> StreamForExternal = windowedStreamForExternal.keyBy(new oneKeySelector())
|
||||
.process(new topnHotItems(TOP_LIMIT)).setParallelism(ORDERBY_PARALLELISM);
|
||||
StreamForExternal.addSink(getKafkaSink(KAFKA_PRODUCER_TOPIC)).setParallelism(SINK_PARALLELISM);
|
||||
|
||||
//http_domain聚合TOP
|
||||
|
||||
SingleOutputStreamOperator<transformEntity> domainStream = inputForSession.filter(new FilterFunction<transformEntity>() {
|
||||
@Override
|
||||
public boolean filter(transformEntity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getDomain());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<resultEntity> windowedStreamForDomain = domainStream.keyBy(new groupBySelector("server_domain"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new metricsCalculate(TOP_LIMIT, "server_domain")).setParallelism(TASK_PARALLELISM).name("server_domain");;
|
||||
DataStream<String> StreamForDomain = windowedStreamForDomain.keyBy(new oneKeySelector())
|
||||
.process(new topnHotItems(TOP_LIMIT)).setParallelism(ORDERBY_PARALLELISM);
|
||||
StreamForDomain.addSink(getKafkaSink(KAFKA_PRODUCER_TOPIC)).setParallelism(SINK_PARALLELISM);
|
||||
|
||||
SingleOutputStreamOperator<transformEntity> userStream = inputForSession.filter(new FilterFunction<transformEntity>() {
|
||||
@Override
|
||||
public boolean filter(transformEntity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getSubscriber_id());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
//common_subscriber_id聚合TOP
|
||||
SingleOutputStreamOperator<resultEntity> windowedStreamForUser = userStream.keyBy(new groupBySelector("subscriber_id"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new metricsCalculate(TOP_LIMIT, "subscriber_id")).setParallelism(TASK_PARALLELISM).name("subscriber_id");;
|
||||
DataStream<String> StreamForUser = windowedStreamForUser.keyBy(new oneKeySelector())
|
||||
.process(new topnHotItems(TOP_LIMIT)).setParallelism(ORDERBY_PARALLELISM);
|
||||
StreamForUser.addSink(getKafkaSink(KAFKA_PRODUCER_TOPIC)).setParallelism(SINK_PARALLELISM);
|
||||
|
||||
SingleOutputStreamOperator<transformEntity> fqdnStream = inputForSession.filter(new FilterFunction<transformEntity>() {
|
||||
@Override
|
||||
public boolean filter(transformEntity value) throws Exception {
|
||||
return StringUtil.isNotEmpty(value.getFqdn());
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(strategyForSession);
|
||||
|
||||
SingleOutputStreamOperator<resultEntity> windowedStreamForFqdn = fqdnStream.keyBy(new groupBySelector("server_fqdn"))
|
||||
.window(TumblingEventTimeWindows.of(Time.minutes(WINDOW_TIME_MINUTE)))
|
||||
.reduce(new metricsAggregationReduce(), new metricsCalculate(TOP_LIMIT, "server_fqdn")).setParallelism(TASK_PARALLELISM).name("server_fqdn");
|
||||
DataStream<String> StreamForFqdn = windowedStreamForFqdn.keyBy(new oneKeySelector())
|
||||
.process(new topnHotItems(TOP_LIMIT)).setParallelism(ORDERBY_PARALLELISM);
|
||||
StreamForFqdn.addSink(getKafkaSink(KAFKA_PRODUCER_TOPIC)).setParallelism(SINK_PARALLELISM);
|
||||
|
||||
env.execute(JOB_NAME);
|
||||
|
||||
}
|
||||
|
||||
|
||||
public static class groupBySelector implements KeySelector<Entity, Tuple4<String, Long, String, String>> {
|
||||
public static class groupBySelector implements KeySelector<transformEntity, Tuple5<String, Long, String, String, String>> {
|
||||
|
||||
public String key;
|
||||
|
||||
@@ -688,31 +223,32 @@ public class Toptask {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tuple4<String, Long, String, String> getKey(Entity entity) throws Exception {
|
||||
public Tuple5<String, Long, String, String, String> getKey(transformEntity transformEntity) throws Exception {
|
||||
|
||||
Tuple4<String, Long, String, String> tuple = null;
|
||||
Tuple5<String, Long, String, String, String> tuple = null;
|
||||
transformEntity.setKey_by(key);
|
||||
switch (key) {
|
||||
case "common_client_ip":
|
||||
tuple = new Tuple4<>(entity.getCommon_client_ip(), entity.getCommon_vsys_id(), entity.getCommon_device_group(), entity.getCommon_data_center());
|
||||
case "client_ip":
|
||||
tuple = new Tuple5<>(transformEntity.getClient_ip(), transformEntity.getVsys_id(), transformEntity.getDevice_group(), transformEntity.getData_center(), transformEntity.getDevice_id());
|
||||
break;
|
||||
case "common_server_ip":
|
||||
tuple = new Tuple4<>(entity.getCommon_server_ip(), entity.getCommon_vsys_id(), entity.getCommon_device_group(), entity.getCommon_data_center());
|
||||
case "server_ip":
|
||||
tuple = new Tuple5<>(transformEntity.getServer_ip(), transformEntity.getVsys_id(), transformEntity.getDevice_group(), transformEntity.getData_center(), transformEntity.getDevice_id());
|
||||
break;
|
||||
case "common_internal_ip":
|
||||
tuple = new Tuple4<>(entity.getCommon_internal_ip(), entity.getCommon_vsys_id(), entity.getCommon_device_group(), entity.getCommon_data_center());
|
||||
case "internal_ip":
|
||||
tuple = new Tuple5<>(transformEntity.getInternal_ip(), transformEntity.getVsys_id(), transformEntity.getDevice_group(), transformEntity.getData_center(), transformEntity.getDevice_id());
|
||||
break;
|
||||
case "common_external_ip":
|
||||
tuple = new Tuple4<>(entity.getCommon_external_ip(), entity.getCommon_vsys_id(), entity.getCommon_device_group(), entity.getCommon_data_center());
|
||||
case "external_ip":
|
||||
tuple = new Tuple5<>(transformEntity.getExternal_ip(), transformEntity.getVsys_id(), transformEntity.getDevice_group(), transformEntity.getData_center(), transformEntity.getDevice_id());
|
||||
break;
|
||||
case "http_domain":
|
||||
tuple = new Tuple4<>(entity.getHttp_domain(), entity.getCommon_vsys_id(), entity.getCommon_device_group(), entity.getCommon_data_center());
|
||||
case "server_domain":
|
||||
tuple = new Tuple5<>(transformEntity.getDomain(), transformEntity.getVsys_id(), transformEntity.getDevice_group(), transformEntity.getData_center(), transformEntity.getDevice_id());
|
||||
break;
|
||||
|
||||
case "common_subscriber_id":
|
||||
tuple = new Tuple4<>(entity.getCommon_subscriber_id(), entity.getCommon_vsys_id(), entity.getCommon_device_group(), entity.getCommon_data_center());
|
||||
case "subscriber_id":
|
||||
tuple = new Tuple5<>(transformEntity.getSubscriber_id(), transformEntity.getVsys_id(), transformEntity.getDevice_group(), transformEntity.getData_center(), transformEntity.getDevice_id());
|
||||
break;
|
||||
case "common_app_label":
|
||||
tuple = new Tuple4<>(entity.getCommon_app_label(), entity.getCommon_vsys_id(), entity.getCommon_device_group(), entity.getCommon_data_center());
|
||||
case "server_fqdn":
|
||||
tuple = new Tuple5<>(transformEntity.getFqdn(), transformEntity.getVsys_id(), transformEntity.getDevice_group(), transformEntity.getData_center(), transformEntity.getDevice_id());
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -723,19 +259,13 @@ public class Toptask {
|
||||
}
|
||||
|
||||
|
||||
public static class oneKeySelector implements KeySelector<ResultEntity, Tuple1<String>> {
|
||||
public static class oneKeySelector implements KeySelector<resultEntity, Tuple1<String>> {
|
||||
|
||||
@Override
|
||||
public Tuple1<String> getKey(ResultEntity entity) throws Exception {
|
||||
public Tuple1<String> getKey(resultEntity entity) throws Exception {
|
||||
return new Tuple1<>(entity.getOrder_by());
|
||||
}
|
||||
}
|
||||
|
||||
public static class twoKeySelector implements KeySelector<UrlEntity, Tuple2<String, Long>> {
|
||||
|
||||
@Override
|
||||
public Tuple2<String, Long> getKey(UrlEntity entity) throws Exception {
|
||||
return new Tuple2<>(entity.getHttp_url(), entity.getCommon_vsys_id());
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user