Live Traffic Chart基于Sketch HLL支持独立客户端统计。(GAL-283)

2023-04-14 18:58:28 +08:00
parent 338c03f6eb
commit b7a9229aec
17 changed files with 598 additions and 217 deletions
--- a/src/main/java/com/zdjizhi/topology/StreamAggregateTopology.java
+++ b/src/main/java/com/zdjizhi/topology/StreamAggregateTopology.java
@@ -3,10 +3,10 @@ package com.zdjizhi.topology;
 import cn.hutool.log.Log;
 import cn.hutool.log.LogFactory;
 import com.zdjizhi.common.StreamAggregateConfig;
-import com.zdjizhi.utils.functions.*;
 import com.zdjizhi.utils.functions.keyby.FirstKeyByFunction;
 import com.zdjizhi.utils.functions.keyby.SecondKeyByFunction;
 import com.zdjizhi.utils.functions.parse.ParseMapFunction;
+import com.zdjizhi.utils.functions.result.ResultFlatMapFunction;
 import com.zdjizhi.utils.functions.statistics.FirstCountWindowFunction;
 import com.zdjizhi.utils.functions.statistics.SecondCountWindowFunction;
 import com.zdjizhi.utils.kafka.KafkaConsumer;
@@ -21,6 +21,8 @@ import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTime
 import org.apache.flink.streaming.api.windowing.time.Time;
 import org.apache.flink.streaming.api.windowing.windows.TimeWindow;

+import java.util.Map;
+

 /**
 * @author qidaijie
@@ -38,29 +40,37 @@ public class StreamAggregateTopology {
            //两个输出之间的最大时间 (单位milliseconds)
            environment.setBufferTimeout(StreamAggregateConfig.BUFFER_TIMEOUT);

+            //解析原始日志
            DataStream<String> streamSource = environment.addSource(KafkaConsumer.getKafkaConsumer())
                    .setParallelism(StreamAggregateConfig.SOURCE_PARALLELISM).name(StreamAggregateConfig.SOURCE_KAFKA_TOPIC);

-            SingleOutputStreamOperator<Tuple3<String, String, String>> parseDataMap = streamSource.map(new ParseMapFunction())
+            //解析原始日志初步聚合计算，增加自定义key 缓解数据倾斜
+            SingleOutputStreamOperator<Tuple3<String, String, Map<String, Object>>> parseDataMap = streamSource.map(new ParseMapFunction())
                    .name("ParseDataMap")
                    .setParallelism(StreamAggregateConfig.PARSE_PARALLELISM);

-            WindowedStream<Tuple3<String, String, String>, String, TimeWindow> firstWindow = parseDataMap.keyBy(new FirstKeyByFunction())
+            //初步聚合计算，增加自定义key 缓解数据倾斜
+            WindowedStream<Tuple3<String, String, Map<String, Object>>, String, TimeWindow> firstWindow = parseDataMap.keyBy(new FirstKeyByFunction())
                    .window(TumblingProcessingTimeWindows.of(Time.seconds(StreamAggregateConfig.FIRST_COUNT_WINDOW_TIME)));

-            SingleOutputStreamOperator<Tuple2<String, String>> metricCountWindow = firstWindow.process(new FirstCountWindowFunction())
+            //初次聚合计算窗口
+            SingleOutputStreamOperator<Tuple2<String, Map<String, Object>>> metricCountWindow = firstWindow.process(new FirstCountWindowFunction())
                    .name("FirstCountWindow")
                    .setParallelism(StreamAggregateConfig.FIRST_WINDOW_PARALLELISM);

-            WindowedStream<Tuple2<String, String>, String, TimeWindow> secondWindow = metricCountWindow.keyBy(new SecondKeyByFunction())
+            //二次聚合计算，使用业务的key 进行数据汇总
+            WindowedStream<Tuple2<String, Map<String, Object>>, String, TimeWindow> secondWindow = metricCountWindow.keyBy(new SecondKeyByFunction())
                    .window(TumblingProcessingTimeWindows.of(Time.seconds(StreamAggregateConfig.SECOND_COUNT_WINDOW_TIME)));

-            SingleOutputStreamOperator<String> secondCountWindow = secondWindow.process(new SecondCountWindowFunction())
+            //二次聚合计算窗口
+            SingleOutputStreamOperator<Map<String, Object>> secondCountWindow = secondWindow.process(new SecondCountWindowFunction())
                    .name("SecondCountWindow").setParallelism(StreamAggregateConfig.SECOND_WINDOW_PARALLELISM);

+            //拆解结果数据按protocol id循环输出
            SingleOutputStreamOperator<String> resultFlatMap = secondCountWindow.flatMap(new ResultFlatMapFunction())
                    .name("ResultFlatMap").setParallelism(StreamAggregateConfig.SINK_PARALLELISM);

+            //输出到kafka
            resultFlatMap.addSink(KafkaProducer.getKafkaProducer()).name("LogSinkKafka")
                    .setParallelism(StreamAggregateConfig.SINK_PARALLELISM).name(StreamAggregateConfig.SINK_KAFKA_TOPIC);