diff --git a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/README.md b/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/README.md deleted file mode 100644 index eb19dc5..0000000 --- a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/README.md +++ /dev/null @@ -1,7 +0,0 @@ -v1.2.2 (2024-04-08) -https://git.mesalab.cn/galaxy/platform/groot-stream/-/releases/v1.2.2 - -Core -配置文件敏感信息加密,AES Config Shade增加选项:kafka.ssl.keystore.password,kafka.ssl.truststore.password,kafka.ssl.key.password -Connector -ClickHouse Sink Connector兼容AggregateFunction类型 diff --git a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/etl_session_record_kafka_to_kafka b/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/etl_session_record_kafka_to_kafka deleted file mode 100644 index 1af1ba2..0000000 --- a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/etl_session_record_kafka_to_kafka +++ /dev/null @@ -1,162 +0,0 @@ -sources: - kafka_source: - type: kafka - # fields: # [array of object] Field List, if not set, all fields(Map) will be output. - properties: - topic: SESSION-RECORD - kafka.bootstrap.servers: "{{ kafka_source_servers }}" - kafka.session.timeout.ms: 60000 - kafka.max.poll.records: 3000 - kafka.max.partition.fetch.bytes: 31457280 - kafka.security.protocol: SSL - kafka.ssl.endpoint.identification.algorithm: "" - kafka.ssl.keystore.location: /data/tsg/olap/flink/topology/data/keystore.jks - kafka.ssl.keystore.password: 86cf0e2ffba3f541a6c6761313e5cc7e - kafka.ssl.truststore.location: /data/tsg/olap/flink/topology/data/truststore.jks - kafka.ssl.truststore.password: 86cf0e2ffba3f541a6c6761313e5cc7e - kafka.ssl.key.password: 86cf0e2ffba3f541a6c6761313e5cc7e - #kafka.security.protocol: SASL_PLAINTEXT - #kafka.sasl.mechanism: PLAIN - #kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 - kafka.group.id: etl_session_record_kafka_to_kafka-20231221 - kafka.auto.offset.reset: latest - format: json - -processing_pipelines: - etl_processor: # [object] Processing Pipeline - type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl - remove_fields: # [array of object] Field List, if not set, all fields(Map) will be output. - output_fields: # [array of object] Field List, if set, fields will be remove. - functions: # [array of object] Function List - - function: ASN_LOOKUP - lookup_fields: [server_ip] - output_fields: [server_asn] - parameters: - option: IP_TO_ASN - kb_name: tsg_ip_asn - - - function: ASN_LOOKUP - lookup_fields: [client_ip] - output_fields: [client_asn] - parameters: - option: IP_TO_ASN - kb_name: tsg_ip_asn - - - function: SNOWFLAKE_ID - lookup_fields: [''] - output_fields: [log_id] - parameters: - data_center_id_num: 1 - - - function: JSON_EXTRACT - lookup_fields: [device_tag] - output_fields: [data_center] - filter: - parameters: - value_expression: $.tags[?(@.tag=='data_center')][0].value - - - function: JSON_EXTRACT - lookup_fields: [device_tag] - output_fields: [device_group] - filter: - parameters: - value_expression: $.tags[?(@.tag=='device_group')][0].value - - - function: CURRENT_UNIX_TIMESTAMP - output_fields: [processing_time] - parameters: - precision: seconds - - - function: UNIX_TIMESTAMP_CONVERTER - lookup_fields: [__timestamp] - output_fields: [recv_time] - parameters: - precision: seconds - - - function: EVAL - output_fields: [ingestion_time] - parameters: - value_expression: recv_time - - - function: DOMAIN - lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni] - output_fields: [server_domain] - parameters: - option: FIRST_SIGNIFICANT_SUBDOMAIN - - - function: BASE64_DECODE_TO_STRING - output_fields: [mail_subject] - parameters: - value_field: mail_subject - charset_field: mail_subject_charset - - - function: BASE64_DECODE_TO_STRING - output_fields: [mail_attachment_name] - parameters: - value_field: mail_attachment_name - charset_field: mail_attachment_name_charset - - - function: PATH_COMBINE - lookup_fields: [rtp_pcap_path] - output_fields: [rtp_pcap_path] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] - - - function: PATH_COMBINE - lookup_fields: [http_request_body] - output_fields: [http_request_body] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] - - - function: PATH_COMBINE - lookup_fields: [http_response_body] - output_fields: [http_response_body] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] - - - function: PATH_COMBINE - lookup_fields: [mail_eml_file] - output_fields: [mail_eml_file] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] - - - function: PATH_COMBINE - lookup_fields: [packet_capture_file] - output_fields: [packet_capture_file] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] - -sinks: - kafka_sink: - type : kafka - properties: - topic: SESSION-RECORD-PROCESSED - kafka.bootstrap.servers: {{ national_center_kafka_servers }} - kafka.retries: 0 - kafka.linger.ms: 10 - kafka.request.timeout.ms: 30000 - kafka.batch.size: 262144 - kafka.buffer.memory: 134217728 - kafka.max.request.size: 10485760 - kafka.compression.type: snappy - kafka.security.protocol: SASL_PLAINTEXT - kafka.sasl.mechanism: PLAIN - kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 - format: json - - -application: - - env: # [object] Environment Variables - name: etl_session_record_kafka_to_kafka # [string] Job Name - shade.identifier: aes - pipeline: - object-reuse: true # [boolean] Object Reuse, default is false - topology: - - name: kafka_source - downstream: [etl_processor] - - name: etl_processor - downstream: [kafka_sink] - - name: kafka_sink - - diff --git a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/grootstream.yaml b/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/grootstream.yaml deleted file mode 100644 index a94e7e3..0000000 --- a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/grootstream.yaml +++ /dev/null @@ -1,18 +0,0 @@ -grootstream: - knowledge_base: -# - name: tsg_ip_asn -# fs_type: http -# fs_path: http://192.168.44.12:9999/v1/knowledge_base -# files: -# - f9f6bc91-2142-4673-8249-e097c00fe1ea - - - name: tsg_ip_asn - fs_type: local - fs_path: /data/hdd/olap/flink/topology/data/ - files: - - asn_builtin.mmdb - properties: - hos.path: http://192.168.44.12:9098/hos - hos.bucket.name.traffic_file: traffic_file_bucket - hos.bucket.name.troubleshooting_file: troubleshooting_file_bucket - scheduler.knowledge_base.update.interval.minutes: 5 diff --git a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/session_record_processed_kafka_to_clickhouse b/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/session_record_processed_kafka_to_clickhouse deleted file mode 100644 index 8b0eb85..0000000 --- a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/session_record_processed_kafka_to_clickhouse +++ /dev/null @@ -1,49 +0,0 @@ -sources: - kafka_source: - type: kafka - # fields: # [array of object] Field List, if not set, all fields(Map) will be output. - properties: - topic: SESSION-RECORD - kafka.bootstrap.servers: "{{ kafka_sink_servers }}" - kafka.session.timeout.ms: 60000 - kafka.max.poll.records: 3000 - kafka.max.partition.fetch.bytes: 31457280 - #kafka.security.protocol: SSL - #kafka.ssl.endpoint.identification.algorithm: "" - #kafka.ssl.keystore.location: /data/tsg/olap/flink/topology/data/keystore.jks - #kafka.ssl.keystore.password: 86cf0e2ffba3f541a6c6761313e5cc7e - #kafka.ssl.truststore.location: /data/tsg/olap/flink/topology/data/truststore.jks - #kafka.ssl.truststore.password: 86cf0e2ffba3f541a6c6761313e5cc7e - #kafka.ssl.key.password: 86cf0e2ffba3f541a6c6761313e5cc7e - kafka.security.protocol: SASL_PLAINTEXT - kafka.sasl.mechanism: PLAIN - kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 - kafka.group.id: session_record_processed_kafka_to_clickhouse - kafka.auto.offset.reset: latest - format: json - -sinks: - clickhouse_sink: - type: clickhouse - properties: - host: "{{ clickhouse_servers }}" - table: tsg_galaxy_v3.session_record_local - batch.size: 100000 - batch.interval: 30s - connection.user: e54c9568586180eede1506eecf3574e9 - connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e - connection.connect_timeout: 30 - connection.query_timeout: 300 - -application: - - env: # [object] Environment Variables - name: session_record_processed_kafka_to_clickhouse # [string] Job Name - shade.identifier: aes - pipeline: - object-reuse: true # [boolean] Object Reuse, default is false - topology: - - name: kafka_source - downstream: [clickhouse_sink] - - name: clickhouse_sink - diff --git a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/udf.plugins b/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/udf.plugins deleted file mode 100644 index 1de2395..0000000 --- a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/udf.plugins +++ /dev/null @@ -1,15 +0,0 @@ -com.geedgenetworks.core.udf.AsnLookup -com.geedgenetworks.core.udf.CurrentUnixTimestamp -com.geedgenetworks.core.udf.DecodeBase64 -com.geedgenetworks.core.udf.Domain -com.geedgenetworks.core.udf.Drop -com.geedgenetworks.core.udf.Eval -com.geedgenetworks.core.udf.FromUnixTimestamp -com.geedgenetworks.core.udf.GenerateStringArray -com.geedgenetworks.core.udf.GeoIpLookup -com.geedgenetworks.core.udf.JsonExtract -com.geedgenetworks.core.udf.PathCombine -com.geedgenetworks.core.udf.Rename -com.geedgenetworks.core.udf.SnowflakeId -com.geedgenetworks.core.udf.StringJoiner -com.geedgenetworks.core.udf.UnixTimestampConverter \ No newline at end of file diff --git a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/集中式/dos_event_kafka_to_clickhouse b/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/集中式/dos_event_kafka_to_clickhouse deleted file mode 100644 index 15225dc..0000000 --- a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/集中式/dos_event_kafka_to_clickhouse +++ /dev/null @@ -1,50 +0,0 @@ -sources: - kafka_source: - type: kafka - # fields: # [array of object] Field List, if not set, all fields(Map) will be output. - properties: - topic: SESSION-RECORD - kafka.bootstrap.servers: "{{ kafka_source_servers }}" - kafka.session.timeout.ms: 60000 - kafka.max.poll.records: 3000 - kafka.max.partition.fetch.bytes: 31457280 - kafka.security.protocol: SSL - kafka.ssl.endpoint.identification.algorithm: "" - kafka.ssl.keystore.location: /data/tsg/olap/flink/topology/data/keystore.jks - kafka.ssl.keystore.password: 86cf0e2ffba3f541a6c6761313e5cc7e - kafka.ssl.truststore.location: /data/tsg/olap/flink/topology/data/truststore.jks - kafka.ssl.truststore.password: 86cf0e2ffba3f541a6c6761313e5cc7e - kafka.ssl.key.password: 86cf0e2ffba3f541a6c6761313e5cc7e - #kafka.security.protocol: SASL_PLAINTEXT - #kafka.sasl.mechanism: PLAIN - #kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 - kafka.group.id: etl_session_record_kafka_to_kafka-20231221 - kafka.auto.offset.reset: latest - format: json - - -sinks: - clickhouse_sink: - type: clickhouse - properties: - host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 - table: tsg_galaxy_v3.dos_event_local - batch.size: 100000 - batch.interval: 30s - connection.user: e54c9568586180eede1506eecf3574e9 - connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e - connection.connect_timeout: 30 - connection.query_timeout: 300 - -application: - env: - name: dos_event_kafka_to_clickhouse - shade.identifier: aes - pipeline: - object-reuse: true # [boolean] Object Reuse, default is false - topology: - - name: kafka_source - downstream: [clickhouse_sink] - - name: clickhouse_sink - - diff --git a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/集中式/etl_proxy_event_kafka_to_clickhouse b/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/集中式/etl_proxy_event_kafka_to_clickhouse deleted file mode 100644 index bee28c8..0000000 --- a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/集中式/etl_proxy_event_kafka_to_clickhouse +++ /dev/null @@ -1,158 +0,0 @@ -sources: - kafka_source: - type: kafka - # fields: # [array of object] Field List, if not set, all fields(Map) will be output. - properties: - topic: SESSION-RECORD - kafka.bootstrap.servers: "{{ kafka_source_servers }}" - kafka.session.timeout.ms: 60000 - kafka.max.poll.records: 3000 - kafka.max.partition.fetch.bytes: 31457280 - kafka.security.protocol: SSL - kafka.ssl.endpoint.identification.algorithm: "" - kafka.ssl.keystore.location: /data/tsg/olap/flink/topology/data/keystore.jks - kafka.ssl.keystore.password: 86cf0e2ffba3f541a6c6761313e5cc7e - kafka.ssl.truststore.location: /data/tsg/olap/flink/topology/data/truststore.jks - kafka.ssl.truststore.password: 86cf0e2ffba3f541a6c6761313e5cc7e - kafka.ssl.key.password: 86cf0e2ffba3f541a6c6761313e5cc7e - #kafka.security.protocol: SASL_PLAINTEXT - #kafka.sasl.mechanism: PLAIN - #kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 - kafka.group.id: etl_session_record_kafka_to_kafka-20231221 - kafka.auto.offset.reset: latest - format: json - -processing_pipelines: - etl_processor: # [object] Processing Pipeline - type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl - remove_fields: - output_fields: - properties: - key: value - functions: # [array of object] Function List - - function: ASN_LOOKUP - lookup_fields: [server_ip] - output_fields: [server_asn] - parameters: - option: IP_TO_ASN - kb_name: tsg_ip_asn - - - function: ASN_LOOKUP - lookup_fields: [client_ip] - output_fields: [client_asn] - parameters: - option: IP_TO_ASN - kb_name: tsg_ip_asn - - - function: SNOWFLAKE_ID - lookup_fields: [''] - output_fields: [log_id] - parameters: - data_center_id_num: 1 - - - function: JSON_EXTRACT - lookup_fields: [device_tag] - output_fields: [data_center] - filter: - parameters: - value_expression: $.tags[?(@.tag=='data_center')][0].value - - - function: JSON_EXTRACT - lookup_fields: [device_tag] - output_fields: [device_group] - filter: - parameters: - value_expression: $.tags[?(@.tag=='device_group')][0].value - - - function: CURRENT_UNIX_TIMESTAMP - output_fields: [processing_time] - parameters: - precision: seconds - - - function: UNIX_TIMESTAMP_CONVERTER - lookup_fields: [__timestamp] - output_fields: [recv_time] - parameters: - precision: seconds - - - function: EVAL - output_fields: [ingestion_time] - parameters: - value_expression: recv_time - - - function: DOMAIN - lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni] - output_fields: [server_domain] - parameters: - option: FIRST_SIGNIFICANT_SUBDOMAIN - - - function: BASE64_DECODE_TO_STRING - output_fields: [mail_subject] - parameters: - value_field: mail_subject - charset_field: mail_subject_charset - - - function: BASE64_DECODE_TO_STRING - output_fields: [mail_attachment_name] - parameters: - value_field: mail_attachment_name - charset_field: mail_attachment_name_charset - - - function: PATH_COMBINE - lookup_fields: [rtp_pcap_path] - output_fields: [rtp_pcap_path] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] - - - function: PATH_COMBINE - lookup_fields: [http_request_body] - output_fields: [http_request_body] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] - - - function: PATH_COMBINE - lookup_fields: [http_response_body] - output_fields: [http_response_body] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] - - - function: PATH_COMBINE - lookup_fields: [mail_eml_file] - output_fields: [mail_eml_file] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] - - - function: PATH_COMBINE - lookup_fields: [packet_capture_file] - output_fields: [packet_capture_file] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] - -sinks: - clickhouse_sink: - type: clickhouse - properties: - host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 - table: tsg_galaxy_v3.proxy_event_local - batch.size: 100000 - batch.interval: 30s - connection.user: e54c9568586180eede1506eecf3574e9 - connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e - connection.connect_timeout: 30 - connection.query_timeout: 300 - -application: - - env: # [object] Environment Variables - name: etl_proxy_event_kafka_to_clickhouse # [string] Job Name - shade.identifier: aes - pipeline: - object-reuse: true # [boolean] Object Reuse, default is false - topology: - - name: kafka_source - downstream: [etl_processor] - - name: etl_processor - downstream: [clickhouse_sink] - - name: clickhouse_sink - - diff --git a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/集中式/etl_session_record_kafka_to_clickhouse b/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/集中式/etl_session_record_kafka_to_clickhouse deleted file mode 100644 index 5866ca6..0000000 --- a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/集中式/etl_session_record_kafka_to_clickhouse +++ /dev/null @@ -1,162 +0,0 @@ -sources: - kafka_source: - type: kafka - # fields: # [array of object] Field List, if not set, all fields(Map) will be output. - # watermark_timestamp: common_recv_time # [string] Watermark Field Name - # watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms - # watermark_lag: 60 # [number] Watermark Lag, default is 60 - properties: - topic: SESSION-RECORD - kafka.bootstrap.servers: 192.168.44.11:9094,192.168.44.13:9094,192.168.44.14:9094,192.168.44.15:9094,192.168.44.16:9094 - kafka.session.timeout.ms: 60000 - kafka.max.poll.records: 3000 - kafka.max.partition.fetch.bytes: 31457280 - kafka.security.protocol: SASL_PLAINTEXT - kafka.ssl.keystore.location: - kafka.ssl.keystore.password: - kafka.ssl.truststore.location: - kafka.ssl.truststore.password: - kafka.ssl.key.password: - kafka.sasl.mechanism: PLAIN - kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 - kafka.buffer.memory: - kafka.group.id: etl_session_record_kafka_to_clickhouse-20230125 - kafka.auto.offset.reset: latest - kafka.max.request.size: - kafka.compression.type: none - format: json - -processing_pipelines: - etl_processor: # [object] Processing Pipeline - type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl - remove_fields: - output_fields: - properties: - key: value - functions: # [array of object] Function List - - function: ASN_LOOKUP - lookup_fields: [server_ip] - output_fields: [server_asn] - parameters: - option: IP_TO_ASN - kb_name: tsg_ip_asn - - - function: ASN_LOOKUP - lookup_fields: [client_ip] - output_fields: [client_asn] - parameters: - option: IP_TO_ASN - kb_name: tsg_ip_asn - - - function: SNOWFLAKE_ID - lookup_fields: [''] - output_fields: [log_id] - parameters: - data_center_id_num: 1 - - - function: JSON_EXTRACT - lookup_fields: [device_tag] - output_fields: [data_center] - filter: - parameters: - value_expression: $.tags[?(@.tag=='data_center')][0].value - - - function: JSON_EXTRACT - lookup_fields: [device_tag] - output_fields: [device_group] - filter: - parameters: - value_expression: $.tags[?(@.tag=='device_group')][0].value - - - function: CURRENT_UNIX_TIMESTAMP - output_fields: [processing_time] - parameters: - precision: seconds - - - function: UNIX_TIMESTAMP_CONVERTER - lookup_fields: [__timestamp] - output_fields: [recv_time] - parameters: - precision: seconds - - - function: EVAL - output_fields: [ingestion_time] - parameters: - value_expression: recv_time - - - function: DOMAIN - lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni] - output_fields: [server_domain] - parameters: - option: FIRST_SIGNIFICANT_SUBDOMAIN - - - function: BASE64_DECODE_TO_STRING - output_fields: [mail_subject] - parameters: - value_field: mail_subject - charset_field: mail_subject_charset - - - function: BASE64_DECODE_TO_STRING - output_fields: [mail_attachment_name] - parameters: - value_field: mail_attachment_name - charset_field: mail_attachment_name_charset - - - function: PATH_COMBINE - lookup_fields: [rtp_pcap_path] - output_fields: [rtp_pcap_path] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] - - - function: PATH_COMBINE - lookup_fields: [http_request_body] - output_fields: [http_request_body] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] - - - function: PATH_COMBINE - lookup_fields: [http_response_body] - output_fields: [http_response_body] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] - - - function: PATH_COMBINE - lookup_fields: [mail_eml_file] - output_fields: [mail_eml_file] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] - - - function: PATH_COMBINE - lookup_fields: [packet_capture_file] - output_fields: [packet_capture_file] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] - -sinks: - clickhouse_sink: - type: clickhouse - properties: - host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 - table: tsg_galaxy_v3.session_record_local - batch.size: 100000 - batch.interval: 30s - connection.user: e54c9568586180eede1506eecf3574e9 - connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e - connection.connect_timeout: 30 - connection.query_timeout: 300 - -application: - - env: # [object] Environment Variables - name: etl_session_record_kafka_to_clickhouse # [string] Job Name - shade.identifier: aes - pipeline: - object-reuse: true # [boolean] Object Reuse, default is false - topology: - - name: kafka_source - downstream: [etl_processor] - - name: etl_processor - downstream: [clickhouse_sink] - - name: clickhouse_sink - - diff --git a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/集中式/etl_transaction_record_kafka_to_clickhouse b/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/集中式/etl_transaction_record_kafka_to_clickhouse deleted file mode 100644 index 9cdc75e..0000000 --- a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/集中式/etl_transaction_record_kafka_to_clickhouse +++ /dev/null @@ -1,156 +0,0 @@ -sources: - kafka_source: - type: kafka - # fields: # [array of object] Field List, if not set, all fields(Map) will be output. - properties: - topic: SESSION-RECORD - kafka.bootstrap.servers: "{{ kafka_source_servers }}" - kafka.session.timeout.ms: 60000 - kafka.max.poll.records: 3000 - kafka.max.partition.fetch.bytes: 31457280 - kafka.security.protocol: SSL - kafka.ssl.endpoint.identification.algorithm: "" - kafka.ssl.keystore.location: /data/tsg/olap/flink/topology/data/keystore.jks - kafka.ssl.keystore.password: 86cf0e2ffba3f541a6c6761313e5cc7e - kafka.ssl.truststore.location: /data/tsg/olap/flink/topology/data/truststore.jks - kafka.ssl.truststore.password: 86cf0e2ffba3f541a6c6761313e5cc7e - kafka.ssl.key.password: 86cf0e2ffba3f541a6c6761313e5cc7e - #kafka.security.protocol: SASL_PLAINTEXT - #kafka.sasl.mechanism: PLAIN - #kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 - kafka.group.id: etl_session_record_kafka_to_kafka-20231221 - kafka.auto.offset.reset: latest - format: json - -processing_pipelines: - etl_processor: # [object] Processing Pipeline - type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl - remove_fields: - output_fields: - properties: - key: value - functions: # [array of object] Function List - - function: ASN_LOOKUP - lookup_fields: [server_ip] - output_fields: [server_asn] - parameters: - option: IP_TO_ASN - kb_name: tsg_ip_asn - - - function: ASN_LOOKUP - lookup_fields: [client_ip] - output_fields: [client_asn] - parameters: - option: IP_TO_ASN - kb_name: tsg_ip_asn - - - function: SNOWFLAKE_ID - lookup_fields: [''] - output_fields: [log_id] - parameters: - data_center_id_num: 1 - - - function: JSON_EXTRACT - lookup_fields: [device_tag] - output_fields: [data_center] - filter: - parameters: - value_expression: $.tags[?(@.tag=='data_center')][0].value - - - function: JSON_EXTRACT - lookup_fields: [device_tag] - output_fields: [device_group] - filter: - parameters: - value_expression: $.tags[?(@.tag=='device_group')][0].value - - - function: CURRENT_UNIX_TIMESTAMP - output_fields: [processing_time] - parameters: - precision: seconds - - - function: UNIX_TIMESTAMP_CONVERTER - lookup_fields: [__timestamp] - output_fields: [recv_time] - parameters: - precision: seconds - - - function: EVAL - output_fields: [ingestion_time] - parameters: - value_expression: recv_time - - - function: DOMAIN - lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni] - output_fields: [server_domain] - parameters: - option: FIRST_SIGNIFICANT_SUBDOMAIN - - - function: BASE64_DECODE_TO_STRING - output_fields: [mail_subject] - parameters: - value_field: mail_subject - charset_field: mail_subject_charset - - - function: BASE64_DECODE_TO_STRING - output_fields: [mail_attachment_name] - parameters: - value_field: mail_attachment_name - charset_field: mail_attachment_name_charset - - - function: PATH_COMBINE - lookup_fields: [rtp_pcap_path] - output_fields: [rtp_pcap_path] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] - - - function: PATH_COMBINE - lookup_fields: [http_request_body] - output_fields: [http_request_body] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] - - - function: PATH_COMBINE - lookup_fields: [http_response_body] - output_fields: [http_response_body] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] - - - function: PATH_COMBINE - lookup_fields: [mail_eml_file] - output_fields: [mail_eml_file] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] - - - function: PATH_COMBINE - lookup_fields: [packet_capture_file] - output_fields: [packet_capture_file] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] - -sinks: - clickhouse_sink: - type: clickhouse - properties: - host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 - table: tsg_galaxy_v3.transaction_record_local - batch.size: 100000 - batch.interval: 30s - connection.user: e54c9568586180eede1506eecf3574e9 - connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e - connection.connect_timeout: 30 - connection.query_timeout: 300 - -application: - - env: # [object] Environment Variables - name: etl_transaction_record_kafka_to_clickhouse # [string] Job Name - shade.identifier: aes - pipeline: - object-reuse: true # [boolean] Object Reuse, default is false - topology: - - name: kafka_source - downstream: [etl_processor] - - name: etl_processor - downstream: [clickhouse_sink] - - name: clickhouse_sink diff --git a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/集中式/etl_voip_record_kafka_to_clickhouse b/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/集中式/etl_voip_record_kafka_to_clickhouse deleted file mode 100644 index 3fa47e9..0000000 --- a/tsg_olap/upgrade/TSG-24.02.1/Groot-Stream/集中式/etl_voip_record_kafka_to_clickhouse +++ /dev/null @@ -1,157 +0,0 @@ -sources: - kafka_source: - type: kafka - # fields: # [array of object] Field List, if not set, all fields(Map) will be output. - properties: - topic: SESSION-RECORD - kafka.bootstrap.servers: "{{ kafka_source_servers }}" - kafka.session.timeout.ms: 60000 - kafka.max.poll.records: 3000 - kafka.max.partition.fetch.bytes: 31457280 - kafka.security.protocol: SSL - kafka.ssl.endpoint.identification.algorithm: "" - kafka.ssl.keystore.location: /data/tsg/olap/flink/topology/data/keystore.jks - kafka.ssl.keystore.password: 86cf0e2ffba3f541a6c6761313e5cc7e - kafka.ssl.truststore.location: /data/tsg/olap/flink/topology/data/truststore.jks - kafka.ssl.truststore.password: 86cf0e2ffba3f541a6c6761313e5cc7e - kafka.ssl.key.password: 86cf0e2ffba3f541a6c6761313e5cc7e - #kafka.security.protocol: SASL_PLAINTEXT - #kafka.sasl.mechanism: PLAIN - #kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817 - kafka.group.id: etl_session_record_kafka_to_kafka-20231221 - kafka.auto.offset.reset: latest - format: json - -processing_pipelines: - etl_processor: # [object] Processing Pipeline - type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl - remove_fields: - output_fields: - - functions: # [array of object] Function List - - function: ASN_LOOKUP - lookup_fields: [server_ip] - output_fields: [server_asn] - parameters: - option: IP_TO_ASN - kb_name: tsg_ip_asn - - - function: ASN_LOOKUP - lookup_fields: [client_ip] - output_fields: [client_asn] - parameters: - option: IP_TO_ASN - kb_name: tsg_ip_asn - - - function: SNOWFLAKE_ID - lookup_fields: [''] - output_fields: [log_id] - parameters: - data_center_id_num: 1 - - - function: JSON_EXTRACT - lookup_fields: [device_tag] - output_fields: [data_center] - filter: - parameters: - value_expression: $.tags[?(@.tag=='data_center')][0].value - - - function: JSON_EXTRACT - lookup_fields: [device_tag] - output_fields: [device_group] - filter: - parameters: - value_expression: $.tags[?(@.tag=='device_group')][0].value - - - function: CURRENT_UNIX_TIMESTAMP - output_fields: [processing_time] - parameters: - precision: seconds - - - function: UNIX_TIMESTAMP_CONVERTER - lookup_fields: [__timestamp] - output_fields: [recv_time] - parameters: - precision: seconds - - - function: EVAL - output_fields: [ingestion_time] - parameters: - value_expression: recv_time - - - function: DOMAIN - lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni] - output_fields: [server_domain] - parameters: - option: FIRST_SIGNIFICANT_SUBDOMAIN - - - function: BASE64_DECODE_TO_STRING - output_fields: [mail_subject] - parameters: - value_field: mail_subject - charset_field: mail_subject_charset - - - function: BASE64_DECODE_TO_STRING - output_fields: [mail_attachment_name] - parameters: - value_field: mail_attachment_name - charset_field: mail_attachment_name_charset - - - function: PATH_COMBINE - lookup_fields: [rtp_pcap_path] - output_fields: [rtp_pcap_path] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] - - - function: PATH_COMBINE - lookup_fields: [http_request_body] - output_fields: [http_request_body] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] - - - function: PATH_COMBINE - lookup_fields: [http_response_body] - output_fields: [http_response_body] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] - - - function: PATH_COMBINE - lookup_fields: [mail_eml_file] - output_fields: [mail_eml_file] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] - - - function: PATH_COMBINE - lookup_fields: [packet_capture_file] - output_fields: [packet_capture_file] - parameters: - path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] - -sinks: - clickhouse_sink: - type: clickhouse - properties: - host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001 - table: tsg_galaxy_v3.voip_record_local - batch.size: 100000 - batch.interval: 30s - connection.user: e54c9568586180eede1506eecf3574e9 - connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e - connection.connect_timeout: 30 - connection.query_timeout: 300 - -application: - - env: # [object] Environment Variables - name: etl_voip_record_kafka_to_clickhouse # [string] Job Name - shade.identifier: aes - pipeline: - object-reuse: true # [boolean] Object Reuse, default is false - topology: - - name: kafka_source - downstream: [etl_processor] - - name: etl_processor - downstream: [clickhouse_sink] - - name: clickhouse_sink - -