Live Traffic Chart基于Sketch HLL支持独立客户端统计。(GAL-283)
This commit is contained in:
@@ -3,10 +3,10 @@ package com.zdjizhi.topology;
|
||||
import cn.hutool.log.Log;
|
||||
import cn.hutool.log.LogFactory;
|
||||
import com.zdjizhi.common.StreamAggregateConfig;
|
||||
import com.zdjizhi.utils.functions.*;
|
||||
import com.zdjizhi.utils.functions.keyby.FirstKeyByFunction;
|
||||
import com.zdjizhi.utils.functions.keyby.SecondKeyByFunction;
|
||||
import com.zdjizhi.utils.functions.parse.ParseMapFunction;
|
||||
import com.zdjizhi.utils.functions.result.ResultFlatMapFunction;
|
||||
import com.zdjizhi.utils.functions.statistics.FirstCountWindowFunction;
|
||||
import com.zdjizhi.utils.functions.statistics.SecondCountWindowFunction;
|
||||
import com.zdjizhi.utils.kafka.KafkaConsumer;
|
||||
@@ -21,6 +21,8 @@ import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTime
|
||||
import org.apache.flink.streaming.api.windowing.time.Time;
|
||||
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
/**
|
||||
* @author qidaijie
|
||||
@@ -38,29 +40,37 @@ public class StreamAggregateTopology {
|
||||
//两个输出之间的最大时间 (单位milliseconds)
|
||||
environment.setBufferTimeout(StreamAggregateConfig.BUFFER_TIMEOUT);
|
||||
|
||||
//解析原始日志
|
||||
DataStream<String> streamSource = environment.addSource(KafkaConsumer.getKafkaConsumer())
|
||||
.setParallelism(StreamAggregateConfig.SOURCE_PARALLELISM).name(StreamAggregateConfig.SOURCE_KAFKA_TOPIC);
|
||||
|
||||
SingleOutputStreamOperator<Tuple3<String, String, String>> parseDataMap = streamSource.map(new ParseMapFunction())
|
||||
//解析原始日志初步聚合计算,增加自定义key 缓解数据倾斜
|
||||
SingleOutputStreamOperator<Tuple3<String, String, Map<String, Object>>> parseDataMap = streamSource.map(new ParseMapFunction())
|
||||
.name("ParseDataMap")
|
||||
.setParallelism(StreamAggregateConfig.PARSE_PARALLELISM);
|
||||
|
||||
WindowedStream<Tuple3<String, String, String>, String, TimeWindow> firstWindow = parseDataMap.keyBy(new FirstKeyByFunction())
|
||||
//初步聚合计算,增加自定义key 缓解数据倾斜
|
||||
WindowedStream<Tuple3<String, String, Map<String, Object>>, String, TimeWindow> firstWindow = parseDataMap.keyBy(new FirstKeyByFunction())
|
||||
.window(TumblingProcessingTimeWindows.of(Time.seconds(StreamAggregateConfig.FIRST_COUNT_WINDOW_TIME)));
|
||||
|
||||
SingleOutputStreamOperator<Tuple2<String, String>> metricCountWindow = firstWindow.process(new FirstCountWindowFunction())
|
||||
//初次聚合计算窗口
|
||||
SingleOutputStreamOperator<Tuple2<String, Map<String, Object>>> metricCountWindow = firstWindow.process(new FirstCountWindowFunction())
|
||||
.name("FirstCountWindow")
|
||||
.setParallelism(StreamAggregateConfig.FIRST_WINDOW_PARALLELISM);
|
||||
|
||||
WindowedStream<Tuple2<String, String>, String, TimeWindow> secondWindow = metricCountWindow.keyBy(new SecondKeyByFunction())
|
||||
//二次聚合计算,使用业务的key 进行数据汇总
|
||||
WindowedStream<Tuple2<String, Map<String, Object>>, String, TimeWindow> secondWindow = metricCountWindow.keyBy(new SecondKeyByFunction())
|
||||
.window(TumblingProcessingTimeWindows.of(Time.seconds(StreamAggregateConfig.SECOND_COUNT_WINDOW_TIME)));
|
||||
|
||||
SingleOutputStreamOperator<String> secondCountWindow = secondWindow.process(new SecondCountWindowFunction())
|
||||
//二次聚合计算窗口
|
||||
SingleOutputStreamOperator<Map<String, Object>> secondCountWindow = secondWindow.process(new SecondCountWindowFunction())
|
||||
.name("SecondCountWindow").setParallelism(StreamAggregateConfig.SECOND_WINDOW_PARALLELISM);
|
||||
|
||||
//拆解结果数据按protocol id循环输出
|
||||
SingleOutputStreamOperator<String> resultFlatMap = secondCountWindow.flatMap(new ResultFlatMapFunction())
|
||||
.name("ResultFlatMap").setParallelism(StreamAggregateConfig.SINK_PARALLELISM);
|
||||
|
||||
//输出到kafka
|
||||
resultFlatMap.addSink(KafkaProducer.getKafkaProducer()).name("LogSinkKafka")
|
||||
.setParallelism(StreamAggregateConfig.SINK_PARALLELISM).name(StreamAggregateConfig.SINK_KAFKA_TOPIC);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user