diff --git a/pom.xml b/pom.xml
index de2b189..7cdc355 100644
--- a/pom.xml
+++ b/pom.xml
@@ -36,7 +36,7 @@
1.13.1
1.0.0
1.0.8
-
+ provided
@@ -65,7 +65,7 @@
-
+
org.apache.maven.plugins
@@ -243,11 +243,11 @@
clickhouse-jdbc
0.2.6
-
+
com.arangodb
arangodb-java-driver
diff --git a/properties/default_config.properties b/properties/default_config.properties
index aaeccfc..25a975d 100644
--- a/properties/default_config.properties
+++ b/properties/default_config.properties
@@ -8,58 +8,40 @@ max.poll.records=5000
#kafka source poll bytes
max.partition.fetch.bytes=31457280
#====================Kafka KafkaProducer====================#
-#producer重试的次数设置
+#producer\u91CD\u8BD5\u7684\u6B21\u6570\u8BBE\u7F6E
retries=0
-#他的含义就是说一个Batch被创建之后,最多过多久,不管这个Batch有没有写满,都必须发送出去了
+#\u4ED6\u7684\u542B\u4E49\u5C31\u662F\u8BF4\u4E00\u4E2ABatch\u88AB\u521B\u5EFA\u4E4B\u540E\uFF0C\u6700\u591A\u8FC7\u591A\u4E45\uFF0C\u4E0D\u7BA1\u8FD9\u4E2ABatch\u6709\u6CA1\u6709\u5199\u6EE1\uFF0C\u90FD\u5FC5\u987B\u53D1\u9001\u51FA\u53BB\u4E86
linger.ms=10
-#如果在超时之前未收到响应,客户端将在必要时重新发送请求
+#\u5982\u679C\u5728\u8D85\u65F6\u4E4B\u524D\u672A\u6536\u5230\u54CD\u5E94\uFF0C\u5BA2\u6237\u7AEF\u5C06\u5728\u5FC5\u8981\u65F6\u91CD\u65B0\u53D1\u9001\u8BF7\u6C42
request.timeout.ms=30000
-#producer都是按照batch进行发送的,批次大小,默认:16384
+#producer\u90FD\u662F\u6309\u7167batch\u8FDB\u884C\u53D1\u9001\u7684,\u6279\u6B21\u5927\u5C0F\uFF0C\u9ED8\u8BA4:16384
batch.size=262144
-#Producer端用于缓存消息的缓冲区大小
+#Producer\u7AEF\u7528\u4E8E\u7F13\u5B58\u6D88\u606F\u7684\u7F13\u51B2\u533A\u5927\u5C0F
#128M
buffer.memory=134217728
-#这个参数决定了每次发送给Kafka服务器请求的最大大小,默认1048576
+#\u8FD9\u4E2A\u53C2\u6570\u51B3\u5B9A\u4E86\u6BCF\u6B21\u53D1\u9001\u7ED9Kafka\u670D\u52A1\u5668\u8BF7\u6C42\u7684\u6700\u5927\u5927\u5C0F,\u9ED8\u8BA41048576
#10M
max.request.size=10485760
#====================kafka default====================#
-#kafka SASL验证用户名-加密
+#kafka SASL\u9A8C\u8BC1\u7528\u6237\u540D-\u52A0\u5BC6
kafka.user=nsyGpHKGFA4KW0zro9MDdw==
-#kafka SASL及SSL验证密码-加密
+#kafka SASL\u53CASSL\u9A8C\u8BC1\u5BC6\u7801-\u52A0\u5BC6
kafka.pin=6MleDyA3Z73HSaXiKsDJ2k7Ys8YWLhEJ
-#生产者ack
+#\u751F\u4EA7\u8005ack
producer.ack=1
-#====================nacos default====================#
-#nacos username
-nacos.username=nacos
-#nacos password
-nacos.pin=nacos
-
-#nacos group
-nacos.group=Galaxy
-#====================Topology Default====================#
-#hbase table name
-hbase.table.name=tsg_galaxy:relation_framedip_account
-
-#邮件默认编码
+#\u90AE\u4EF6\u9ED8\u8BA4\u7F16\u7801
mail.default.charset=UTF-8
-#0不做任何校验,1弱类型校验
+#0\u4E0D\u505A\u4EFB\u4F55\u6821\u9A8C\uFF0C1\u5F31\u7C7B\u578B\u6821\u9A8C
log.transform.type=1
-#两个输出之间的最大时间(单位milliseconds)
+#\u4E24\u4E2A\u8F93\u51FA\u4E4B\u95F4\u7684\u6700\u5927\u65F6\u95F4(\u5355\u4F4Dmilliseconds)
buffer.timeout=5000
-#====================临时配置-待删除====================#
-#网关APP_ID 获取接口
-app.id.http=http://192.168.44.20:9999/open-api/appDicList
-
-#app_id 更新时间,如填写0则不更新缓存
-app.tick.tuple.freq.secs=0
\ No newline at end of file
diff --git a/properties/service_flow_config.properties b/properties/service_flow_config.properties
index 91858cd..4de148b 100644
--- a/properties/service_flow_config.properties
+++ b/properties/service_flow_config.properties
@@ -8,51 +8,25 @@ sink.kafka.servers=192.168.45.102:9092
#zookeeper \u5730\u5740 \u7528\u4E8E\u914D\u7F6Elog_id
zookeeper.servers=192.168.45.102:2181
-#hbase zookeeper\u5730\u5740 \u7528\u4E8E\u8FDE\u63A5HBase
-hbase.zookeeper.servers=192.168.45.102:2181
-
#--------------------------------HTTP/\u5B9A\u4F4D\u5E93------------------------------#
#\u5B9A\u4F4D\u5E93\u5730\u5740
tools.library=D:\\workerspace\\dat\\
-#--------------------------------nacos\u914D\u7F6E------------------------------#
-#nacos \u5730\u5740
-nacos.server=192.168.45.102:8848
-
-#nacos namespace
-nacos.schema.namespace=prod
-
-#nacos data id
-nacos.data.id=session_record.json
-
#--------------------------------Kafka\u6D88\u8D39/\u751F\u4EA7\u914D\u7F6E------------------------------#
-
-#kafka \u63A5\u6536\u6570\u636Etopic
-source.kafka.topic=atest
-
-#\u8865\u5168\u6570\u636E \u8F93\u51FA topic
-sink.kafka.topic=atest2
-
#\u8BFB\u53D6topic,\u5B58\u50A8\u8BE5spout id\u7684\u6D88\u8D39offset\u4FE1\u606F\uFF0C\u53EF\u901A\u8FC7\u8BE5\u62D3\u6251\u547D\u540D;\u5177\u4F53\u5B58\u50A8offset\u7684\u4F4D\u7F6E\uFF0C\u786E\u5B9A\u4E0B\u6B21\u8BFB\u53D6\u4E0D\u91CD\u590D\u7684\u6570\u636E\uFF1B
-group.id=flinktest-102
+group.id=knowledge-group
#--------------------------------topology\u914D\u7F6E------------------------------#
#consumer \u5E76\u884C\u5EA6
source.parallelism=1
-
#\u8F6C\u6362\u51FD\u6570\u5E76\u884C\u5EA6
transform.parallelism=1
-
#kafka producer \u5E76\u884C\u5EA6
sink.parallelism=1
-
#\u6570\u636E\u4E2D\u5FC3\uFF0C\u53D6\u503C\u8303\u56F4(0-31)
data.center.id.num=0
-#hbase \u66F4\u65B0\u65F6\u95F4\uFF0C\u5982\u586B\u51990\u5219\u4E0D\u66F4\u65B0\u7F13\u5B58
-hbase.tick.tuple.freq.secs=180
-
#--------------------------------\u9ED8\u8BA4\u503C\u914D\u7F6E------------------------------#
#1 connection\u65E5\u5FD7 \uFF0C2 dns\u65E5\u5FD7
log.need.complete=2
@@ -60,7 +34,6 @@ log.need.complete=2
#\u751F\u4EA7\u8005\u538B\u7F29\u6A21\u5F0F none or snappy
producer.kafka.compression.type=none
-
source.kafka.topic.connection=connection_record_log
source.kafka.topic.sketch=connection_sketch_record_log
source.kafka.topic.dns=dns_record_log
@@ -78,36 +51,30 @@ sink.arango.table.r.resolve.domain2ip=R_RESOLVE_DOMAIN2IP
sink.arango.table.r.nx.domain2domain=R_NX_DOMAIN2DOMAIN
#clickhouse \u5165\u5E93
-ck.hosts=192.168.45.102:8123
+ck.hosts=192.168.45.102:8123,192.168.45.102:8123
ck.database=tsg_galaxy_v3
ck.username=default
ck.pin=galaxy2019
#\u5355\u4F4D\u6BEB\u79D2
ck.connection.timeout=10000
ck.socket.timeout=300000
+ck.batch=10000
-#connection_record_log
-
-flink.watermark.max.orderness=50
-#\u7EDF\u8BA1\u65F6\u95F4\u95F4\u9694 \u5355\u4F4Ds
+#flink \u65E5\u5FD7\u5EF6\u8FDF\u8D85\u65F6\u65F6\u95F4
+flink.watermark.max.delay.time=50
+#ck relation\u7EDF\u8BA1\u65F6\u95F4\u95F4\u9694 \u5355\u4F4Ds
log.aggregate.duration=5
+#arangodb \u7EDF\u8BA1\u65F6\u95F4\u95F4\u9694 \u5355\u4F4Ds
log.aggregate.duration.graph=5
#arangoDB\u53C2\u6570\u914D\u7F6E
arangoDB.host=192.168.45.102
-#arangoDB.host=192.168.40.224
arangoDB.port=8529
arangoDB.user=root
arangoDB.password=galaxy_2019
arangoDB.DB.name=knowledge
arangoDB.batch=100000
arangoDB.ttl=3600
-
-arangoDB.read.limit=
-update.arango.batch=10000
-
thread.pool.number=10
-thread.await.termination.time=10
-sink.batch.time.out=5
-sink.batch=10000
\ No newline at end of file
+sink.batch.delay.time=5
diff --git a/src/main/java/com/zdjizhi/common/FlowWriteConfig.java b/src/main/java/com/zdjizhi/common/FlowWriteConfig.java
index 57f09a0..3b68285 100644
--- a/src/main/java/com/zdjizhi/common/FlowWriteConfig.java
+++ b/src/main/java/com/zdjizhi/common/FlowWriteConfig.java
@@ -38,17 +38,6 @@ public class FlowWriteConfig {
*/
public static final String ENCODING = "UTF8";
- /**
- * Nacos
- */
- public static final String NACOS_SERVER = FlowWriteConfigurations.getStringProperty(0, "nacos.server");
- public static final String NACOS_SCHEMA_NAMESPACE = FlowWriteConfigurations.getStringProperty(0, "nacos.schema.namespace");
- public static final String NACOS_COMMON_NAMESPACE = FlowWriteConfigurations.getStringProperty(0, "nacos.common.namespace");
- public static final String NACOS_DATA_ID = FlowWriteConfigurations.getStringProperty(0, "nacos.data.id");
- public static final String NACOS_PIN = FlowWriteConfigurations.getStringProperty(1, "nacos.pin");
- public static final String NACOS_GROUP = FlowWriteConfigurations.getStringProperty(1, "nacos.group");
- public static final String NACOS_USERNAME = FlowWriteConfigurations.getStringProperty(1, "nacos.username");
-
/**
* System config
*/
@@ -61,12 +50,6 @@ public class FlowWriteConfig {
public static final Integer LOG_TRANSFORM_TYPE = FlowWriteConfigurations.getIntProperty(1, "log.transform.type");
public static final Integer BUFFER_TIMEOUT = FlowWriteConfigurations.getIntProperty(1, "buffer.timeout");
- /**
- * HBase
- */
- public static final Integer HBASE_TICK_TUPLE_FREQ_SECS = FlowWriteConfigurations.getIntProperty(0, "hbase.tick.tuple.freq.secs");
- public static final String HBASE_TABLE_NAME = FlowWriteConfigurations.getStringProperty(1, "hbase.table.name");
-
/**
* kafka common
*/
@@ -76,7 +59,6 @@ public class FlowWriteConfig {
/**
* kafka source config
*/
- public static final String SOURCE_KAFKA_TOPIC = FlowWriteConfigurations.getStringProperty(0, "source.kafka.topic");
public static final String GROUP_ID = FlowWriteConfigurations.getStringProperty(0, "group.id");
public static final String SESSION_TIMEOUT_MS = FlowWriteConfigurations.getStringProperty(1, "session.timeout.ms");
public static final String MAX_POLL_RECORDS = FlowWriteConfigurations.getStringProperty(1, "max.poll.records");
@@ -85,7 +67,6 @@ public class FlowWriteConfig {
/**
* kafka sink config
*/
- public static final String SINK_KAFKA_TOPIC = FlowWriteConfigurations.getStringProperty(0, "sink.kafka.topic");
public static final String PRODUCER_ACK = FlowWriteConfigurations.getStringProperty(1, "producer.ack");
public static final String PRODUCER_KAFKA_COMPRESSION_TYPE = FlowWriteConfigurations.getStringProperty(0, "producer.kafka.compression.type");
@@ -99,12 +80,6 @@ public class FlowWriteConfig {
public static final Integer BUFFER_MEMORY = FlowWriteConfigurations.getIntProperty(1, "buffer.memory");
public static final Integer MAX_REQUEST_SIZE = FlowWriteConfigurations.getIntProperty(1, "max.request.size");
- /**
- * http
- */
- public static final String APP_ID_HTTP = FlowWriteConfigurations.getStringProperty(1, "app.id.http");
- public static final Integer APP_TICK_TUPLE_FREQ_SECS = FlowWriteConfigurations.getIntProperty(1, "app.tick.tuple.freq.secs");
-
/**
* common config
*/
@@ -112,7 +87,6 @@ public class FlowWriteConfig {
public static final String SINK_KAFKA_SERVERS = FlowWriteConfigurations.getStringProperty(0, "sink.kafka.servers");
public static final String ZOOKEEPER_SERVERS = FlowWriteConfigurations.getStringProperty(0, "zookeeper.servers");
public static final String TOOLS_LIBRARY = FlowWriteConfigurations.getStringProperty(0, "tools.library");
- public static final String HBASE_ZOOKEEPER_SERVERS = FlowWriteConfigurations.getStringProperty(0, "hbase.zookeeper.servers");
/*
@@ -125,7 +99,7 @@ public class FlowWriteConfig {
public static final int CK_CONNECTION_TIMEOUT = FlowWriteConfigurations.getIntProperty(0, "ck.connection.timeout");
public static final int CK_SOCKET_TIMEOUT = FlowWriteConfigurations.getIntProperty(0, "ck.socket.timeout");
- public static final int FLINK_WATERMARK_MAX_ORDERNESS = FlowWriteConfigurations.getIntProperty(0, "flink.watermark.max.orderness");
+ public static final int FLINK_WATERMARK_MAX_DELAY_TIME = FlowWriteConfigurations.getIntProperty(0, "flink.watermark.max.delay.time");
public static final int LOG_AGGREGATE_DURATION = FlowWriteConfigurations.getIntProperty(0, "log.aggregate.duration");
public static final int LOG_AGGREGATE_DURATION_GRAPH = FlowWriteConfigurations.getIntProperty(0, "log.aggregate.duration.graph");
public static final String SOURCE_KAFKA_TOPIC_DNS = FlowWriteConfigurations.getStringProperty(0, "source.kafka.topic.dns");
@@ -147,8 +121,6 @@ public class FlowWriteConfig {
public static final String R_NX_DOMAIN2DOMAIN = FlowWriteConfigurations.getStringProperty(0, "sink.arango.table.r.nx.domain2domain");
-
-
public static final String ARANGODB_HOST = FlowWriteConfigurations.getStringProperty(0, "arangoDB.host");
public static final Integer ARANGODB_PORT = FlowWriteConfigurations.getIntProperty(0, "arangoDB.port");
public static final String ARANGODB_USER = FlowWriteConfigurations.getStringProperty(0, "arangoDB.user");
@@ -156,11 +128,8 @@ public class FlowWriteConfig {
public static final String ARANGODB_DB_NAME = FlowWriteConfigurations.getStringProperty(0, "arangoDB.DB.name");
public static final Integer ARANGODB_TTL = FlowWriteConfigurations.getIntProperty(0, "arangoDB.ttl");
public static final Integer ARANGODB_BATCH = FlowWriteConfigurations.getIntProperty(0, "arangoDB.batch");
-
- public static final Integer UPDATE_ARANGO_BATCH = FlowWriteConfigurations.getIntProperty(0, "update.arango.batch");
- public static final String ARANGODB_READ_LIMIT = FlowWriteConfigurations.getStringProperty(0, "arangoDB.read.limit");
public static final Integer THREAD_POOL_NUMBER = FlowWriteConfigurations.getIntProperty(0, "thread.pool.number");
- public static final Integer THREAD_AWAIT_TERMINATION_TIME = FlowWriteConfigurations.getIntProperty(0, "thread.await.termination.time");
- public static final Integer SINK_BATCH_TIME_OUT = FlowWriteConfigurations.getIntProperty(0, "sink.batch.time.out");
- public static final Integer SINK_BATCH = FlowWriteConfigurations.getIntProperty(0, "sink.batch");
+
+ public static final Integer SINK_BATCH_DELAY_TIME = FlowWriteConfigurations.getIntProperty(0, "sink.batch.delay.time");
+ public static final Integer CK_BATCH = FlowWriteConfigurations.getIntProperty(0, "ck.batch");
}
\ No newline at end of file
diff --git a/src/main/java/com/zdjizhi/common/CKWindow.java b/src/main/java/com/zdjizhi/etl/CKBatchWindow.java
similarity index 84%
rename from src/main/java/com/zdjizhi/common/CKWindow.java
rename to src/main/java/com/zdjizhi/etl/CKBatchWindow.java
index b7c7b8c..f66455f 100644
--- a/src/main/java/com/zdjizhi/common/CKWindow.java
+++ b/src/main/java/com/zdjizhi/etl/CKBatchWindow.java
@@ -1,4 +1,4 @@
-package com.zdjizhi.common;
+package com.zdjizhi.etl;
import org.apache.flink.streaming.api.functions.windowing.AllWindowFunction;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
@@ -9,7 +9,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
-public class CKWindow implements AllWindowFunction