From a9d7332acfe03c76ea2182526ec47754dc482b38 Mon Sep 17 00:00:00 2001 From: wangkuan Date: Wed, 10 Apr 2024 16:37:04 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=85=B7=E4=BD=93=E4=BB=BB?= =?UTF-8?q?=E5=8A=A1=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../TSG/集中式/dos_event_kafka_to_clickhouse | 49 ++++++ .../TSG/集中式/etl_proxy_event_kafka_to_clickhouse | 148 ++++++++++++++++ .../TSG/集中式/etl_session_record_kafka_to_clickhouse | 148 ++++++++++++++++ .../集中式/etl_transaction_record_kafka_to_clickhouse | 146 ++++++++++++++++ .../TSG/集中式/etl_voip_record_kafka_to_clickhouse | 148 ++++++++++++++++ .../Groot-Stream/集中式/dos_event_kafka_to_clickhouse | 49 ++++++ .../集中式/etl_proxy_event_kafka_to_clickhouse | 157 +++++++++++++++++ .../集中式/etl_session_record_kafka_to_clickhouse | 161 ++++++++++++++++++ .../集中式/etl_transaction_record_kafka_to_clickhouse | 155 +++++++++++++++++ .../集中式/etl_voip_record_kafka_to_clickhouse | 156 +++++++++++++++++ .../TSG-24.03/Groot-Stream/README.md | 7 + .../TSG-24.03/Groot-Stream/grootstream.yaml | 6 + .../Groot-Stream/集中式/dos_event_kafka_to_clickhouse | 49 ++++++ .../集中式/etl_proxy_event_kafka_to_clickhouse | 148 ++++++++++++++++ .../集中式/etl_session_record_kafka_to_clickhouse | 148 ++++++++++++++++ .../集中式/etl_transaction_record_kafka_to_clickhouse | 146 ++++++++++++++++ .../集中式/etl_voip_record_kafka_to_clickhouse | 148 ++++++++++++++++ 17 files changed, 1969 insertions(+) create mode 100644 Groot-Stream 最新全量配置模版/TSG/集中式/dos_event_kafka_to_clickhouse create mode 100644 Groot-Stream 最新全量配置模版/TSG/集中式/etl_proxy_event_kafka_to_clickhouse create mode 100644 Groot-Stream 最新全量配置模版/TSG/集中式/etl_session_record_kafka_to_clickhouse create mode 100644 Groot-Stream 最新全量配置模版/TSG/集中式/etl_transaction_record_kafka_to_clickhouse create mode 100644 Groot-Stream 最新全量配置模版/TSG/集中式/etl_voip_record_kafka_to_clickhouse create mode 100644 TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/dos_event_kafka_to_clickhouse create mode 100644 TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/etl_proxy_event_kafka_to_clickhouse create mode 100644 TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/etl_session_record_kafka_to_clickhouse create mode 100644 TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/etl_transaction_record_kafka_to_clickhouse create mode 100644 TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/etl_voip_record_kafka_to_clickhouse create mode 100644 TSG发布版本更新记录/TSG-24.03/Groot-Stream/README.md create mode 100644 TSG发布版本更新记录/TSG-24.03/Groot-Stream/grootstream.yaml create mode 100644 TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/dos_event_kafka_to_clickhouse create mode 100644 TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/etl_proxy_event_kafka_to_clickhouse create mode 100644 TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/etl_session_record_kafka_to_clickhouse create mode 100644 TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/etl_transaction_record_kafka_to_clickhouse create mode 100644 TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/etl_voip_record_kafka_to_clickhouse diff --git a/Groot-Stream 最新全量配置模版/TSG/集中式/dos_event_kafka_to_clickhouse b/Groot-Stream 最新全量配置模版/TSG/集中式/dos_event_kafka_to_clickhouse new file mode 100644 index 0000000..b87db67 --- /dev/null +++ b/Groot-Stream 最新全量配置模版/TSG/集中式/dos_event_kafka_to_clickhouse @@ -0,0 +1,49 @@ +sources: + kafka_source: + type: kafka + properties: + topic: DOS-EVENT + kafka.bootstrap.servers: 192.168.44.11:9094,192.168.44.13:9094,192.168.44.14:9094,192.168.44.15:9094,192.168.44.16:9094 + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.ssl.keystore.location: + kafka.ssl.keystore.password: + kafka.ssl.truststore.location: + kafka.ssl.truststore.password: + kafka.ssl.key.password: + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 + kafka.buffer.memory: + kafka.group.id: dos_event_kafka_to_clickhouse-20231221 + kafka.auto.offset.reset: latest + kafka.max.request.size: + kafka.compression.type: none + format: json + + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 + table: tsg_galaxy_v3.dos_event_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + env: + name: dos_event_kafka_to_clickhouse + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [clickhouse_sink] + - name: clickhouse_sink + + diff --git a/Groot-Stream 最新全量配置模版/TSG/集中式/etl_proxy_event_kafka_to_clickhouse b/Groot-Stream 最新全量配置模版/TSG/集中式/etl_proxy_event_kafka_to_clickhouse new file mode 100644 index 0000000..39ab825 --- /dev/null +++ b/Groot-Stream 最新全量配置模版/TSG/集中式/etl_proxy_event_kafka_to_clickhouse @@ -0,0 +1,148 @@ +sources: + kafka_source: + type: kafka + # fields: # [array of object] Field List, if not set, all fields(Map) will be output. + # watermark_timestamp: common_recv_time # [string] Watermark Field Name + # watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms + # watermark_lag: 60 # [number] Watermark Lag, default is 60 + properties: + topic: PROXY-EVENT + kafka.bootstrap.servers: 192.168.44.11:9094,192.168.44.13:9094,192.168.44.14:9094,192.168.44.15:9094,192.168.44.16:9094 + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.ssl.keystore.location: + kafka.ssl.keystore.password: + kafka.ssl.truststore.location: + kafka.ssl.truststore.password: + kafka.ssl.key.password: + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 + kafka.buffer.memory: + kafka.group.id: etl_proxy_event_kafka_to_clickhouse-20231221 + kafka.auto.offset.reset: latest + kafka.max.request.size: + kafka.compression.type: none + format: json + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + properties: + key: value + functions: # [array of object] Function List + + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + - function: DOMAIN + lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni] + output_fields: [server_domain] + parameters: + option: FIRST_SIGNIFICANT_SUBDOMAIN + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 + table: tsg_galaxy_v3.proxy_event_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + + env: # [object] Environment Variables + name: etl_proxy_event_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [clickhouse_sink] + - name: clickhouse_sink + + diff --git a/Groot-Stream 最新全量配置模版/TSG/集中式/etl_session_record_kafka_to_clickhouse b/Groot-Stream 最新全量配置模版/TSG/集中式/etl_session_record_kafka_to_clickhouse new file mode 100644 index 0000000..643fa48 --- /dev/null +++ b/Groot-Stream 最新全量配置模版/TSG/集中式/etl_session_record_kafka_to_clickhouse @@ -0,0 +1,148 @@ +sources: + kafka_source: + type: kafka + # fields: # [array of object] Field List, if not set, all fields(Map) will be output. + # watermark_timestamp: common_recv_time # [string] Watermark Field Name + # watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms + # watermark_lag: 60 # [number] Watermark Lag, default is 60 + properties: + topic: SESSION-RECORD + kafka.bootstrap.servers: 192.168.44.11:9094,192.168.44.13:9094,192.168.44.14:9094,192.168.44.15:9094,192.168.44.16:9094 + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.ssl.keystore.location: + kafka.ssl.keystore.password: + kafka.ssl.truststore.location: + kafka.ssl.truststore.password: + kafka.ssl.key.password: + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 + kafka.buffer.memory: + kafka.group.id: etl_session_record_kafka_to_clickhouse-20230125 + kafka.auto.offset.reset: latest + kafka.max.request.size: + kafka.compression.type: none + format: json + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + properties: + key: value + functions: # [array of object] Function List + + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + - function: DOMAIN + lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni] + output_fields: [server_domain] + parameters: + option: FIRST_SIGNIFICANT_SUBDOMAIN + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 + table: tsg_galaxy_v3.session_record_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + + env: # [object] Environment Variables + name: etl_session_record_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [clickhouse_sink] + - name: clickhouse_sink + + diff --git a/Groot-Stream 最新全量配置模版/TSG/集中式/etl_transaction_record_kafka_to_clickhouse b/Groot-Stream 最新全量配置模版/TSG/集中式/etl_transaction_record_kafka_to_clickhouse new file mode 100644 index 0000000..4d28714 --- /dev/null +++ b/Groot-Stream 最新全量配置模版/TSG/集中式/etl_transaction_record_kafka_to_clickhouse @@ -0,0 +1,146 @@ +sources: + kafka_source: + type: kafka + # fields: # [array of object] Field List, if not set, all fields(Map) will be output. + # watermark_timestamp: common_recv_time # [string] Watermark Field Name + # watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms + # watermark_lag: 60 # [number] Watermark Lag, default is 60 + properties: + topic: TRANSACTION-RECORD + kafka.bootstrap.servers: 192.168.44.11:9094,192.168.44.13:9094,192.168.44.14:9094,192.168.44.15:9094,192.168.44.16:9094 + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.ssl.keystore.location: + kafka.ssl.keystore.password: + kafka.ssl.truststore.location: + kafka.ssl.truststore.password: + kafka.ssl.key.password: + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 + kafka.buffer.memory: + kafka.group.id: etl_transaction_record_kafka_to_clickhouse-20240308 + kafka.auto.offset.reset: latest + kafka.max.request.size: + kafka.compression.type: none + format: json + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + properties: + key: value + functions: # [array of object] Function List + + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + - function: DOMAIN + lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni] + output_fields: [server_domain] + parameters: + option: FIRST_SIGNIFICANT_SUBDOMAIN + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 + table: tsg_galaxy_v3.transaction_record_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + + env: # [object] Environment Variables + name: etl_transaction_record_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [clickhouse_sink] + - name: clickhouse_sink diff --git a/Groot-Stream 最新全量配置模版/TSG/集中式/etl_voip_record_kafka_to_clickhouse b/Groot-Stream 最新全量配置模版/TSG/集中式/etl_voip_record_kafka_to_clickhouse new file mode 100644 index 0000000..90d3179 --- /dev/null +++ b/Groot-Stream 最新全量配置模版/TSG/集中式/etl_voip_record_kafka_to_clickhouse @@ -0,0 +1,148 @@ +sources: + kafka_source: + type: kafka + # fields: # [array of object] Field List, if not set, all fields(Map) will be output. + # watermark_timestamp: common_recv_time # [string] Watermark Field Name + # watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms + # watermark_lag: 60 # [number] Watermark Lag, default is 60 + properties: + topic: VOIP-CONVERSATION-RECORD + kafka.bootstrap.servers: 192.168.44.11:9094,192.168.44.13:9094,192.168.44.14:9094,192.168.44.15:9094,192.168.44.16:9094 + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.ssl.keystore.location: + kafka.ssl.keystore.password: + kafka.ssl.truststore.location: + kafka.ssl.truststore.password: + kafka.ssl.key.password: + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 + kafka.buffer.memory: + kafka.group.id: etl_voip_record_kafka_to_clickhouse-20231221 + kafka.auto.offset.reset: latest + kafka.max.request.size: + kafka.compression.type: none + format: json + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + properties: + key: value + functions: # [array of object] Function List + + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + - function: DOMAIN + lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni] + output_fields: [server_domain] + parameters: + option: FIRST_SIGNIFICANT_SUBDOMAIN + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 + table: tsg_galaxy_v3.voip_record_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + + env: # [object] Environment Variables + name: etl_voip_record_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [clickhouse_sink] + - name: clickhouse_sink + + diff --git a/TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/dos_event_kafka_to_clickhouse b/TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/dos_event_kafka_to_clickhouse new file mode 100644 index 0000000..6573973 --- /dev/null +++ b/TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/dos_event_kafka_to_clickhouse @@ -0,0 +1,49 @@ +sources: + kafka_source: + type: kafka + # fields: # [array of object] Field List, if not set, all fields(Map) will be output. + properties: + topic: SESSION-RECORD + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SSL + kafka.ssl.endpoint.identification.algorithm: "" + kafka.ssl.keystore.location: /data/tsg/olap/flink/topology/data/keystore.jks + kafka.ssl.keystore.password: 86cf0e2ffba3f541a6c6761313e5cc7e + kafka.ssl.truststore.location: /data/tsg/olap/flink/topology/data/truststore.jks + kafka.ssl.truststore.password: 86cf0e2ffba3f541a6c6761313e5cc7e + kafka.ssl.key.password: 86cf0e2ffba3f541a6c6761313e5cc7e + #kafka.security.protocol: SASL_PLAINTEXT + #kafka.sasl.mechanism: PLAIN + #kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 + kafka.group.id: etl_session_record_kafka_to_kafka-20231221 + kafka.auto.offset.reset: latest + format: json + + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 + table: tsg_galaxy_v3.dos_event_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + env: + name: dos_event_kafka_to_clickhouse + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [clickhouse_sink] + - name: clickhouse_sink + + diff --git a/TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/etl_proxy_event_kafka_to_clickhouse b/TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/etl_proxy_event_kafka_to_clickhouse new file mode 100644 index 0000000..277845c --- /dev/null +++ b/TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/etl_proxy_event_kafka_to_clickhouse @@ -0,0 +1,157 @@ +sources: + kafka_source: + type: kafka + # fields: # [array of object] Field List, if not set, all fields(Map) will be output. + properties: + topic: SESSION-RECORD + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SSL + kafka.ssl.endpoint.identification.algorithm: "" + kafka.ssl.keystore.location: /data/tsg/olap/flink/topology/data/keystore.jks + kafka.ssl.keystore.password: 86cf0e2ffba3f541a6c6761313e5cc7e + kafka.ssl.truststore.location: /data/tsg/olap/flink/topology/data/truststore.jks + kafka.ssl.truststore.password: 86cf0e2ffba3f541a6c6761313e5cc7e + kafka.ssl.key.password: 86cf0e2ffba3f541a6c6761313e5cc7e + #kafka.security.protocol: SASL_PLAINTEXT + #kafka.sasl.mechanism: PLAIN + #kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 + kafka.group.id: etl_session_record_kafka_to_kafka-20231221 + kafka.auto.offset.reset: latest + format: json + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + properties: + key: value + functions: # [array of object] Function List + - function: ASN_LOOKUP + lookup_fields: [server_ip] + output_fields: [server_asn] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + + - function: ASN_LOOKUP + lookup_fields: [client_ip] + output_fields: [client_asn] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + - function: DOMAIN + lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni] + output_fields: [server_domain] + parameters: + option: FIRST_SIGNIFICANT_SUBDOMAIN + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 + table: tsg_galaxy_v3.proxy_event_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + + env: # [object] Environment Variables + name: etl_proxy_event_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [clickhouse_sink] + - name: clickhouse_sink + + diff --git a/TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/etl_session_record_kafka_to_clickhouse b/TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/etl_session_record_kafka_to_clickhouse new file mode 100644 index 0000000..aed5fcf --- /dev/null +++ b/TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/etl_session_record_kafka_to_clickhouse @@ -0,0 +1,161 @@ +sources: + kafka_source: + type: kafka + # fields: # [array of object] Field List, if not set, all fields(Map) will be output. + # watermark_timestamp: common_recv_time # [string] Watermark Field Name + # watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms + # watermark_lag: 60 # [number] Watermark Lag, default is 60 + properties: + topic: SESSION-RECORD + kafka.bootstrap.servers: 192.168.44.11:9094,192.168.44.13:9094,192.168.44.14:9094,192.168.44.15:9094,192.168.44.16:9094 + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.ssl.keystore.location: + kafka.ssl.keystore.password: + kafka.ssl.truststore.location: + kafka.ssl.truststore.password: + kafka.ssl.key.password: + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 + kafka.buffer.memory: + kafka.group.id: etl_session_record_kafka_to_clickhouse-20230125 + kafka.auto.offset.reset: latest + kafka.max.request.size: + kafka.compression.type: none + format: json + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + properties: + key: value + functions: # [array of object] Function List + - function: ASN_LOOKUP + lookup_fields: [server_ip] + output_fields: [server_asn] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + + - function: ASN_LOOKUP + lookup_fields: [client_ip] + output_fields: [client_asn] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + - function: DOMAIN + lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni] + output_fields: [server_domain] + parameters: + option: FIRST_SIGNIFICANT_SUBDOMAIN + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 + table: tsg_galaxy_v3.session_record_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + + env: # [object] Environment Variables + name: etl_session_record_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [clickhouse_sink] + - name: clickhouse_sink + + diff --git a/TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/etl_transaction_record_kafka_to_clickhouse b/TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/etl_transaction_record_kafka_to_clickhouse new file mode 100644 index 0000000..6ad0945 --- /dev/null +++ b/TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/etl_transaction_record_kafka_to_clickhouse @@ -0,0 +1,155 @@ +sources: + kafka_source: + type: kafka + # fields: # [array of object] Field List, if not set, all fields(Map) will be output. + properties: + topic: SESSION-RECORD + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SSL + kafka.ssl.endpoint.identification.algorithm: "" + kafka.ssl.keystore.location: /data/tsg/olap/flink/topology/data/keystore.jks + kafka.ssl.keystore.password: 86cf0e2ffba3f541a6c6761313e5cc7e + kafka.ssl.truststore.location: /data/tsg/olap/flink/topology/data/truststore.jks + kafka.ssl.truststore.password: 86cf0e2ffba3f541a6c6761313e5cc7e + kafka.ssl.key.password: 86cf0e2ffba3f541a6c6761313e5cc7e + #kafka.security.protocol: SASL_PLAINTEXT + #kafka.sasl.mechanism: PLAIN + #kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 + kafka.group.id: etl_session_record_kafka_to_kafka-20231221 + kafka.auto.offset.reset: latest + format: json + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + properties: + key: value + functions: # [array of object] Function List + - function: ASN_LOOKUP + lookup_fields: [server_ip] + output_fields: [server_asn] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + + - function: ASN_LOOKUP + lookup_fields: [client_ip] + output_fields: [client_asn] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + - function: DOMAIN + lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni] + output_fields: [server_domain] + parameters: + option: FIRST_SIGNIFICANT_SUBDOMAIN + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 + table: tsg_galaxy_v3.transaction_record_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + + env: # [object] Environment Variables + name: etl_transaction_record_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [clickhouse_sink] + - name: clickhouse_sink diff --git a/TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/etl_voip_record_kafka_to_clickhouse b/TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/etl_voip_record_kafka_to_clickhouse new file mode 100644 index 0000000..11ba71d --- /dev/null +++ b/TSG发布版本更新记录/TSG-24.02.1/Groot-Stream/集中式/etl_voip_record_kafka_to_clickhouse @@ -0,0 +1,156 @@ +sources: + kafka_source: + type: kafka + # fields: # [array of object] Field List, if not set, all fields(Map) will be output. + properties: + topic: SESSION-RECORD + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SSL + kafka.ssl.endpoint.identification.algorithm: "" + kafka.ssl.keystore.location: /data/tsg/olap/flink/topology/data/keystore.jks + kafka.ssl.keystore.password: 86cf0e2ffba3f541a6c6761313e5cc7e + kafka.ssl.truststore.location: /data/tsg/olap/flink/topology/data/truststore.jks + kafka.ssl.truststore.password: 86cf0e2ffba3f541a6c6761313e5cc7e + kafka.ssl.key.password: 86cf0e2ffba3f541a6c6761313e5cc7e + #kafka.security.protocol: SASL_PLAINTEXT + #kafka.sasl.mechanism: PLAIN + #kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 + kafka.group.id: etl_session_record_kafka_to_kafka-20231221 + kafka.auto.offset.reset: latest + format: json + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + + functions: # [array of object] Function List + - function: ASN_LOOKUP + lookup_fields: [server_ip] + output_fields: [server_asn] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + + - function: ASN_LOOKUP + lookup_fields: [client_ip] + output_fields: [client_asn] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + - function: DOMAIN + lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni] + output_fields: [server_domain] + parameters: + option: FIRST_SIGNIFICANT_SUBDOMAIN + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 + table: tsg_galaxy_v3.voip_record_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + + env: # [object] Environment Variables + name: etl_voip_record_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [clickhouse_sink] + - name: clickhouse_sink + + diff --git a/TSG发布版本更新记录/TSG-24.03/Groot-Stream/README.md b/TSG发布版本更新记录/TSG-24.03/Groot-Stream/README.md new file mode 100644 index 0000000..eb19dc5 --- /dev/null +++ b/TSG发布版本更新记录/TSG-24.03/Groot-Stream/README.md @@ -0,0 +1,7 @@ +v1.2.2 (2024-04-08) +https://git.mesalab.cn/galaxy/platform/groot-stream/-/releases/v1.2.2 + +Core +配置文件敏感信息加密,AES Config Shade增加选项:kafka.ssl.keystore.password,kafka.ssl.truststore.password,kafka.ssl.key.password +Connector +ClickHouse Sink Connector兼容AggregateFunction类型 diff --git a/TSG发布版本更新记录/TSG-24.03/Groot-Stream/grootstream.yaml b/TSG发布版本更新记录/TSG-24.03/Groot-Stream/grootstream.yaml new file mode 100644 index 0000000..3a75067 --- /dev/null +++ b/TSG发布版本更新记录/TSG-24.03/Groot-Stream/grootstream.yaml @@ -0,0 +1,6 @@ +grootstream: + properties: + hos.path: http://192.168.44.12:9098/hos + hos.bucket.name.traffic_file: traffic_file_bucket + hos.bucket.name.troubleshooting_file: troubleshooting_file_bucket + scheduler.knowledge_base.update.interval.minutes: 5 diff --git a/TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/dos_event_kafka_to_clickhouse b/TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/dos_event_kafka_to_clickhouse new file mode 100644 index 0000000..b87db67 --- /dev/null +++ b/TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/dos_event_kafka_to_clickhouse @@ -0,0 +1,49 @@ +sources: + kafka_source: + type: kafka + properties: + topic: DOS-EVENT + kafka.bootstrap.servers: 192.168.44.11:9094,192.168.44.13:9094,192.168.44.14:9094,192.168.44.15:9094,192.168.44.16:9094 + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.ssl.keystore.location: + kafka.ssl.keystore.password: + kafka.ssl.truststore.location: + kafka.ssl.truststore.password: + kafka.ssl.key.password: + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 + kafka.buffer.memory: + kafka.group.id: dos_event_kafka_to_clickhouse-20231221 + kafka.auto.offset.reset: latest + kafka.max.request.size: + kafka.compression.type: none + format: json + + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 + table: tsg_galaxy_v3.dos_event_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + env: + name: dos_event_kafka_to_clickhouse + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [clickhouse_sink] + - name: clickhouse_sink + + diff --git a/TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/etl_proxy_event_kafka_to_clickhouse b/TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/etl_proxy_event_kafka_to_clickhouse new file mode 100644 index 0000000..39ab825 --- /dev/null +++ b/TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/etl_proxy_event_kafka_to_clickhouse @@ -0,0 +1,148 @@ +sources: + kafka_source: + type: kafka + # fields: # [array of object] Field List, if not set, all fields(Map) will be output. + # watermark_timestamp: common_recv_time # [string] Watermark Field Name + # watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms + # watermark_lag: 60 # [number] Watermark Lag, default is 60 + properties: + topic: PROXY-EVENT + kafka.bootstrap.servers: 192.168.44.11:9094,192.168.44.13:9094,192.168.44.14:9094,192.168.44.15:9094,192.168.44.16:9094 + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.ssl.keystore.location: + kafka.ssl.keystore.password: + kafka.ssl.truststore.location: + kafka.ssl.truststore.password: + kafka.ssl.key.password: + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 + kafka.buffer.memory: + kafka.group.id: etl_proxy_event_kafka_to_clickhouse-20231221 + kafka.auto.offset.reset: latest + kafka.max.request.size: + kafka.compression.type: none + format: json + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + properties: + key: value + functions: # [array of object] Function List + + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + - function: DOMAIN + lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni] + output_fields: [server_domain] + parameters: + option: FIRST_SIGNIFICANT_SUBDOMAIN + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 + table: tsg_galaxy_v3.proxy_event_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + + env: # [object] Environment Variables + name: etl_proxy_event_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [clickhouse_sink] + - name: clickhouse_sink + + diff --git a/TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/etl_session_record_kafka_to_clickhouse b/TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/etl_session_record_kafka_to_clickhouse new file mode 100644 index 0000000..643fa48 --- /dev/null +++ b/TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/etl_session_record_kafka_to_clickhouse @@ -0,0 +1,148 @@ +sources: + kafka_source: + type: kafka + # fields: # [array of object] Field List, if not set, all fields(Map) will be output. + # watermark_timestamp: common_recv_time # [string] Watermark Field Name + # watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms + # watermark_lag: 60 # [number] Watermark Lag, default is 60 + properties: + topic: SESSION-RECORD + kafka.bootstrap.servers: 192.168.44.11:9094,192.168.44.13:9094,192.168.44.14:9094,192.168.44.15:9094,192.168.44.16:9094 + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.ssl.keystore.location: + kafka.ssl.keystore.password: + kafka.ssl.truststore.location: + kafka.ssl.truststore.password: + kafka.ssl.key.password: + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 + kafka.buffer.memory: + kafka.group.id: etl_session_record_kafka_to_clickhouse-20230125 + kafka.auto.offset.reset: latest + kafka.max.request.size: + kafka.compression.type: none + format: json + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + properties: + key: value + functions: # [array of object] Function List + + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + - function: DOMAIN + lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni] + output_fields: [server_domain] + parameters: + option: FIRST_SIGNIFICANT_SUBDOMAIN + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 + table: tsg_galaxy_v3.session_record_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + + env: # [object] Environment Variables + name: etl_session_record_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [clickhouse_sink] + - name: clickhouse_sink + + diff --git a/TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/etl_transaction_record_kafka_to_clickhouse b/TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/etl_transaction_record_kafka_to_clickhouse new file mode 100644 index 0000000..4d28714 --- /dev/null +++ b/TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/etl_transaction_record_kafka_to_clickhouse @@ -0,0 +1,146 @@ +sources: + kafka_source: + type: kafka + # fields: # [array of object] Field List, if not set, all fields(Map) will be output. + # watermark_timestamp: common_recv_time # [string] Watermark Field Name + # watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms + # watermark_lag: 60 # [number] Watermark Lag, default is 60 + properties: + topic: TRANSACTION-RECORD + kafka.bootstrap.servers: 192.168.44.11:9094,192.168.44.13:9094,192.168.44.14:9094,192.168.44.15:9094,192.168.44.16:9094 + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.ssl.keystore.location: + kafka.ssl.keystore.password: + kafka.ssl.truststore.location: + kafka.ssl.truststore.password: + kafka.ssl.key.password: + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 + kafka.buffer.memory: + kafka.group.id: etl_transaction_record_kafka_to_clickhouse-20240308 + kafka.auto.offset.reset: latest + kafka.max.request.size: + kafka.compression.type: none + format: json + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + properties: + key: value + functions: # [array of object] Function List + + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + - function: DOMAIN + lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni] + output_fields: [server_domain] + parameters: + option: FIRST_SIGNIFICANT_SUBDOMAIN + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 + table: tsg_galaxy_v3.transaction_record_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + + env: # [object] Environment Variables + name: etl_transaction_record_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [clickhouse_sink] + - name: clickhouse_sink diff --git a/TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/etl_voip_record_kafka_to_clickhouse b/TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/etl_voip_record_kafka_to_clickhouse new file mode 100644 index 0000000..90d3179 --- /dev/null +++ b/TSG发布版本更新记录/TSG-24.03/Groot-Stream/集中式/etl_voip_record_kafka_to_clickhouse @@ -0,0 +1,148 @@ +sources: + kafka_source: + type: kafka + # fields: # [array of object] Field List, if not set, all fields(Map) will be output. + # watermark_timestamp: common_recv_time # [string] Watermark Field Name + # watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms + # watermark_lag: 60 # [number] Watermark Lag, default is 60 + properties: + topic: VOIP-CONVERSATION-RECORD + kafka.bootstrap.servers: 192.168.44.11:9094,192.168.44.13:9094,192.168.44.14:9094,192.168.44.15:9094,192.168.44.16:9094 + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.ssl.keystore.location: + kafka.ssl.keystore.password: + kafka.ssl.truststore.location: + kafka.ssl.truststore.password: + kafka.ssl.key.password: + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 + kafka.buffer.memory: + kafka.group.id: etl_voip_record_kafka_to_clickhouse-20231221 + kafka.auto.offset.reset: latest + kafka.max.request.size: + kafka.compression.type: none + format: json + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + properties: + key: value + functions: # [array of object] Function List + + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + - function: DOMAIN + lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni] + output_fields: [server_domain] + parameters: + option: FIRST_SIGNIFICANT_SUBDOMAIN + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 + table: tsg_galaxy_v3.voip_record_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + + env: # [object] Environment Variables + name: etl_voip_record_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [clickhouse_sink] + - name: clickhouse_sink + +