2023-05-06 15:08:21 +08:00
|
|
|
package com.zdjizhi.topology;
|
|
|
|
|
|
|
|
|
|
import cn.hutool.log.Log;
|
|
|
|
|
import cn.hutool.log.LogFactory;
|
2023-11-09 14:13:45 +08:00
|
|
|
import com.zdjizhi.common.config.MergeConfigs;
|
|
|
|
|
import com.zdjizhi.common.config.MergeConfiguration;
|
2023-05-19 14:03:40 +08:00
|
|
|
import com.zdjizhi.common.pojo.Fields;
|
|
|
|
|
import com.zdjizhi.common.pojo.Metrics;
|
|
|
|
|
import com.zdjizhi.common.pojo.Tags;
|
2023-05-06 15:08:21 +08:00
|
|
|
import com.zdjizhi.utils.functions.keyby.DimensionKeyBy;
|
|
|
|
|
import com.zdjizhi.utils.functions.map.ResultFlatMap;
|
2023-08-21 17:22:37 +08:00
|
|
|
import com.zdjizhi.utils.functions.process.ParsingData;
|
2023-05-06 15:08:21 +08:00
|
|
|
import com.zdjizhi.utils.functions.statistics.DispersionCountWindow;
|
|
|
|
|
import com.zdjizhi.utils.functions.statistics.MergeCountWindow;
|
|
|
|
|
import com.zdjizhi.utils.kafka.KafkaConsumer;
|
|
|
|
|
import com.zdjizhi.utils.kafka.KafkaProducer;
|
2023-08-21 17:22:37 +08:00
|
|
|
import org.apache.flink.api.common.eventtime.*;
|
|
|
|
|
import org.apache.flink.api.java.tuple.Tuple3;
|
2023-09-18 15:09:51 +08:00
|
|
|
import org.apache.flink.api.java.utils.ParameterTool;
|
|
|
|
|
import org.apache.flink.configuration.Configuration;
|
2023-05-06 15:08:21 +08:00
|
|
|
import org.apache.flink.streaming.api.datastream.DataStream;
|
|
|
|
|
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
|
|
|
|
|
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
2023-08-21 17:22:37 +08:00
|
|
|
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
|
2023-05-06 15:08:21 +08:00
|
|
|
import org.apache.flink.streaming.api.windowing.time.Time;
|
|
|
|
|
|
2023-08-21 17:22:37 +08:00
|
|
|
import java.time.Duration;
|
2023-05-06 15:08:21 +08:00
|
|
|
|
2023-11-09 14:13:45 +08:00
|
|
|
import static com.zdjizhi.common.config.MergeConfigs.*;
|
|
|
|
|
|
2023-05-06 15:08:21 +08:00
|
|
|
/**
|
|
|
|
|
* @author qidaijie
|
|
|
|
|
* @Package com.zdjizhi.topology
|
|
|
|
|
* @Description:
|
|
|
|
|
* @date 2021/5/2016:42
|
|
|
|
|
*/
|
|
|
|
|
public class ApplicationProtocolTopology {
|
|
|
|
|
private static final Log logger = LogFactory.get();
|
|
|
|
|
|
|
|
|
|
public static void main(String[] args) {
|
|
|
|
|
try {
|
2023-11-09 14:13:45 +08:00
|
|
|
|
|
|
|
|
// param check
|
|
|
|
|
if (args.length < 1) {
|
|
|
|
|
throw new IllegalArgumentException("Error: Not found properties path. " +
|
|
|
|
|
"\nUsage: flink -c xxx xxx.jar app.properties.");
|
|
|
|
|
}
|
|
|
|
|
|
2023-05-06 15:08:21 +08:00
|
|
|
final StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
|
|
|
|
|
|
2023-11-09 14:13:45 +08:00
|
|
|
ParameterTool tool = ParameterTool.fromPropertiesFile(args[0]);
|
|
|
|
|
final Configuration config = tool.getConfiguration();
|
|
|
|
|
environment.getConfig().setGlobalJobParameters(config);
|
|
|
|
|
final MergeConfiguration fusionConfiguration = new MergeConfiguration(config);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//水印
|
2023-08-21 17:22:37 +08:00
|
|
|
WatermarkStrategy<Tuple3<Tags, Fields, Long>> strategyForSession = WatermarkStrategy
|
2023-11-09 14:13:45 +08:00
|
|
|
.<Tuple3<Tags, Fields, Long>>forBoundedOutOfOrderness(Duration.ofSeconds(config.get(WARTERMARK_MAX_ORDERNESS)))
|
|
|
|
|
.withTimestampAssigner((element, timestamp) -> element.f2);
|
2023-08-21 17:22:37 +08:00
|
|
|
|
|
|
|
|
//数据源
|
2023-11-09 14:13:45 +08:00
|
|
|
DataStream<String> streamSource = environment.addSource(
|
|
|
|
|
KafkaConsumer.getKafkaConsumer(fusionConfiguration.getProperties(SOURCE_KAFKA_PROPERTIES_PREFIX),
|
|
|
|
|
config.get(SOURCE_KAFKA_TOPIC),
|
|
|
|
|
config.get(STARTUP_MODE)));
|
2023-05-06 15:08:21 +08:00
|
|
|
|
2023-08-21 17:22:37 +08:00
|
|
|
//解析数据
|
|
|
|
|
SingleOutputStreamOperator<Tuple3<Tags, Fields, Long>> parseDataProcess = streamSource.process(new ParsingData())
|
|
|
|
|
.assignTimestampsAndWatermarks(strategyForSession)
|
2023-11-09 14:13:45 +08:00
|
|
|
.name("ParseDataProcess");
|
2023-05-06 15:08:21 +08:00
|
|
|
|
2023-08-21 17:22:37 +08:00
|
|
|
//增量聚合窗口
|
|
|
|
|
SingleOutputStreamOperator<Metrics> dispersionCountWindow = parseDataProcess.keyBy(new DimensionKeyBy())
|
2023-11-09 14:13:45 +08:00
|
|
|
.window(TumblingEventTimeWindows.of(Time.seconds(config.get(MergeConfigs.COUNT_WINDOW_TIME))))
|
2023-05-06 15:08:21 +08:00
|
|
|
.reduce(new DispersionCountWindow(), new MergeCountWindow())
|
2023-11-09 14:13:45 +08:00
|
|
|
.name("DispersionCountWindow");
|
2023-05-06 15:08:21 +08:00
|
|
|
|
2023-08-21 17:22:37 +08:00
|
|
|
//拆分数据
|
2023-05-06 15:08:21 +08:00
|
|
|
SingleOutputStreamOperator<String> resultFlatMap = dispersionCountWindow.flatMap(new ResultFlatMap())
|
2023-11-09 14:13:45 +08:00
|
|
|
.name("ResultFlatMap");
|
2023-05-06 15:08:21 +08:00
|
|
|
|
2023-08-21 17:22:37 +08:00
|
|
|
//输出
|
2023-11-09 14:13:45 +08:00
|
|
|
resultFlatMap.addSink(KafkaProducer.getKafkaProducer(fusionConfiguration.getProperties(SINK_KAFKA_PROPERTIES_PREFIX),
|
|
|
|
|
config.get(SINK_KAFKA_TOPIC),
|
|
|
|
|
config.get(LOG_FAILURES_ONLY)));
|
2023-05-06 15:08:21 +08:00
|
|
|
|
2023-09-18 15:09:51 +08:00
|
|
|
environment.execute("APP-PROTOCOL-STAT-TRAFFIC-MERGE");
|
2023-05-06 15:08:21 +08:00
|
|
|
} catch (Exception e) {
|
2023-08-21 17:22:37 +08:00
|
|
|
logger.error("This Flink task start ERROR! Exception information is :");
|
|
|
|
|
e.printStackTrace();
|
2023-05-06 15:08:21 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|