diff --git a/pom.xml b/pom.xml
index e4f3384..fe9656f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@
com.zdjizhi
log-olap-analysis-schema
- 220822-VSYS
+ 230317-DataSketches
log-olap-analysis-schema
http://www.example.com
@@ -16,7 +16,7 @@
nexus
Team Nexus Repository
- http://192.168.40.125:8099/content/groups/public
+ http://192.168.40.153:8099/content/groups/public
@@ -188,7 +188,7 @@
com.jayway.jsonpath
json-path
- 2.4.0
+ 2.7.0
@@ -209,6 +209,20 @@
com.alibaba.nacos
nacos-client
${nacos.version}
+
+
+ com.google.guava
+ guava
+
+
+ slf4j-log4j12
+ org.slf4j
+
+
+ log4j-over-slf4j
+ org.slf4j
+
+
@@ -218,6 +232,12 @@
test
+
+ org.apache.datasketches
+ datasketches-java
+ 3.2.0
+
+
diff --git a/src/main/java/com/zdjizhi/common/StreamAggregateConfig.java b/src/main/java/com/zdjizhi/common/StreamAggregateConfig.java
index 0243f97..9ea9df5 100644
--- a/src/main/java/com/zdjizhi/common/StreamAggregateConfig.java
+++ b/src/main/java/com/zdjizhi/common/StreamAggregateConfig.java
@@ -15,8 +15,24 @@ public class StreamAggregateConfig {
encryptor.setPassword("galaxy");
}
+ /**
+ * 默认的切分符号
+ */
public static final String FORMAT_SPLITTER = ",";
+ /**
+ * 协议分隔符,需要转义
+ */
public static final String PROTOCOL_SPLITTER = "\\.";
+ /**
+ * 标识字段为日志字段还是schema指定字段
+ */
+ public static final String IS_JSON_KEY_TAG = "$.";
+
+ /**
+ * if函数连接分隔符
+ */
+ public static final String IF_CONDITION_SPLITTER = "=";
+
/**
* Nacos
@@ -27,7 +43,7 @@ public class StreamAggregateConfig {
public static final String NACOS_PIN = StreamAggregateConfigurations.getStringProperty(1, "nacos.pin");
public static final String NACOS_GROUP = StreamAggregateConfigurations.getStringProperty(1, "nacos.group");
public static final String NACOS_USERNAME = StreamAggregateConfigurations.getStringProperty(1, "nacos.username");
-
+
/**
* System
*/
diff --git a/src/main/java/com/zdjizhi/topology/StreamAggregateTopology.java b/src/main/java/com/zdjizhi/topology/StreamAggregateTopology.java
index c2d4f31..1813a89 100644
--- a/src/main/java/com/zdjizhi/topology/StreamAggregateTopology.java
+++ b/src/main/java/com/zdjizhi/topology/StreamAggregateTopology.java
@@ -3,10 +3,10 @@ package com.zdjizhi.topology;
import cn.hutool.log.Log;
import cn.hutool.log.LogFactory;
import com.zdjizhi.common.StreamAggregateConfig;
-import com.zdjizhi.utils.functions.*;
import com.zdjizhi.utils.functions.keyby.FirstKeyByFunction;
import com.zdjizhi.utils.functions.keyby.SecondKeyByFunction;
import com.zdjizhi.utils.functions.parse.ParseMapFunction;
+import com.zdjizhi.utils.functions.result.ResultFlatMapFunction;
import com.zdjizhi.utils.functions.statistics.FirstCountWindowFunction;
import com.zdjizhi.utils.functions.statistics.SecondCountWindowFunction;
import com.zdjizhi.utils.kafka.KafkaConsumer;
@@ -21,6 +21,8 @@ import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTime
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
+import java.util.Map;
+
/**
* @author qidaijie
@@ -38,29 +40,37 @@ public class StreamAggregateTopology {
//两个输出之间的最大时间 (单位milliseconds)
environment.setBufferTimeout(StreamAggregateConfig.BUFFER_TIMEOUT);
+ //解析原始日志
DataStream streamSource = environment.addSource(KafkaConsumer.getKafkaConsumer())
.setParallelism(StreamAggregateConfig.SOURCE_PARALLELISM).name(StreamAggregateConfig.SOURCE_KAFKA_TOPIC);
- SingleOutputStreamOperator> parseDataMap = streamSource.map(new ParseMapFunction())
+ //解析原始日志初步聚合计算,增加自定义key 缓解数据倾斜
+ SingleOutputStreamOperator>> parseDataMap = streamSource.map(new ParseMapFunction())
.name("ParseDataMap")
.setParallelism(StreamAggregateConfig.PARSE_PARALLELISM);
- WindowedStream, String, TimeWindow> firstWindow = parseDataMap.keyBy(new FirstKeyByFunction())
+ //初步聚合计算,增加自定义key 缓解数据倾斜
+ WindowedStream>, String, TimeWindow> firstWindow = parseDataMap.keyBy(new FirstKeyByFunction())
.window(TumblingProcessingTimeWindows.of(Time.seconds(StreamAggregateConfig.FIRST_COUNT_WINDOW_TIME)));
- SingleOutputStreamOperator> metricCountWindow = firstWindow.process(new FirstCountWindowFunction())
+ //初次聚合计算窗口
+ SingleOutputStreamOperator>> metricCountWindow = firstWindow.process(new FirstCountWindowFunction())
.name("FirstCountWindow")
.setParallelism(StreamAggregateConfig.FIRST_WINDOW_PARALLELISM);
- WindowedStream, String, TimeWindow> secondWindow = metricCountWindow.keyBy(new SecondKeyByFunction())
+ //二次聚合计算,使用业务的key 进行数据汇总
+ WindowedStream>, String, TimeWindow> secondWindow = metricCountWindow.keyBy(new SecondKeyByFunction())
.window(TumblingProcessingTimeWindows.of(Time.seconds(StreamAggregateConfig.SECOND_COUNT_WINDOW_TIME)));
- SingleOutputStreamOperator secondCountWindow = secondWindow.process(new SecondCountWindowFunction())
+ //二次聚合计算窗口
+ SingleOutputStreamOperator