diff --git a/README.md b/README.md index a6800ff..9a263e1 100644 --- a/README.md +++ b/README.md @@ -6,3 +6,9 @@ |:-----------------|:--------------| | shell-scripts | 存储安装和初始化脚本。 | | config-templates | 存储配置文件模板。 | + +## 文件命名 + +- `[索引号]_[create]_[项目名]_[功能名称]`:初始化数据库或脚本。例如:`001_create_tsg_olap_clickhouse_table.sql`。 +- `[索引号]_[upgrade]_[from version]_to_[current version]_[项目名]_[功能名称]`:升级文件,多个版本升级记录可以放在一个文件中。例如发布一个LTS版本:`101_upgrade_v2402_v2409_tsg_olap_clickhouse_table.sql`。 + diff --git a/clickhouse/tsg_olap_clickhouse_ddl.sql b/clickhouse/001_create_tsg_olap_clickhouse_table.sql similarity index 100% rename from clickhouse/tsg_olap_clickhouse_ddl.sql rename to clickhouse/001_create_tsg_olap_clickhouse_table.sql diff --git a/clickhouse/tsg_olap_clickhouse_ddl_check.sql b/clickhouse/002_check_tsg_olap_clickhouse_table.sql similarity index 100% rename from clickhouse/tsg_olap_clickhouse_ddl_check.sql rename to clickhouse/002_check_tsg_olap_clickhouse_table.sql diff --git a/config-templates/README.md b/config-templates/README.md index e69de29..2c45905 100644 --- a/config-templates/README.md +++ b/config-templates/README.md @@ -0,0 +1 @@ +全局安装配置文件 \ No newline at end of file diff --git a/druid/README.md b/druid/README.md index e69de29..d0aea9c 100644 --- a/druid/README.md +++ b/druid/README.md @@ -0,0 +1 @@ +Druid 摄入任务 \ No newline at end of file diff --git a/file-chunk-combiner/templates/agg_traffic_file_chunk_combiner b/file-chunk-combiner/agg_traffic_file_chunk_combiner similarity index 100% rename from file-chunk-combiner/templates/agg_traffic_file_chunk_combiner rename to file-chunk-combiner/agg_traffic_file_chunk_combiner diff --git a/file-chunk-combiner/集群/config/agg_traffic_eml_file_chunk_combiner b/file-chunk-combiner/cluster/config/agg_traffic_eml_file_chunk_combiner similarity index 100% rename from file-chunk-combiner/集群/config/agg_traffic_eml_file_chunk_combiner rename to file-chunk-combiner/cluster/config/agg_traffic_eml_file_chunk_combiner diff --git a/file-chunk-combiner/集群/config/agg_traffic_http_file_chunk_combiner b/file-chunk-combiner/cluster/config/agg_traffic_http_file_chunk_combiner similarity index 100% rename from file-chunk-combiner/集群/config/agg_traffic_http_file_chunk_combiner rename to file-chunk-combiner/cluster/config/agg_traffic_http_file_chunk_combiner diff --git a/file-chunk-combiner/集群/config/agg_traffic_policy_capture_file_chunk_combiner b/file-chunk-combiner/cluster/config/agg_traffic_policy_capture_file_chunk_combiner similarity index 100% rename from file-chunk-combiner/集群/config/agg_traffic_policy_capture_file_chunk_combiner rename to file-chunk-combiner/cluster/config/agg_traffic_policy_capture_file_chunk_combiner diff --git a/file-chunk-combiner/集群/config/agg_traffic_rtp_file_chunk_combiner b/file-chunk-combiner/cluster/config/agg_traffic_rtp_file_chunk_combiner similarity index 100% rename from file-chunk-combiner/集群/config/agg_traffic_rtp_file_chunk_combiner rename to file-chunk-combiner/cluster/config/agg_traffic_rtp_file_chunk_combiner diff --git a/file-chunk-combiner/集群/env/agg_traffic_eml_file_chunk_combiner.sh b/file-chunk-combiner/cluster/env/agg_traffic_eml_file_chunk_combiner.sh similarity index 100% rename from file-chunk-combiner/集群/env/agg_traffic_eml_file_chunk_combiner.sh rename to file-chunk-combiner/cluster/env/agg_traffic_eml_file_chunk_combiner.sh diff --git a/file-chunk-combiner/集群/env/agg_traffic_http_file_chunk_combiner.sh b/file-chunk-combiner/cluster/env/agg_traffic_http_file_chunk_combiner.sh similarity index 100% rename from file-chunk-combiner/集群/env/agg_traffic_http_file_chunk_combiner.sh rename to file-chunk-combiner/cluster/env/agg_traffic_http_file_chunk_combiner.sh diff --git a/file-chunk-combiner/集群/env/agg_traffic_policy_capture_file_chunk_combiner.sh b/file-chunk-combiner/cluster/env/agg_traffic_policy_capture_file_chunk_combiner.sh similarity index 100% rename from file-chunk-combiner/集群/env/agg_traffic_policy_capture_file_chunk_combiner.sh rename to file-chunk-combiner/cluster/env/agg_traffic_policy_capture_file_chunk_combiner.sh diff --git a/file-chunk-combiner/集群/env/agg_traffic_rtp_file_chunk_combiner.sh b/file-chunk-combiner/cluster/env/agg_traffic_rtp_file_chunk_combiner.sh similarity index 100% rename from file-chunk-combiner/集群/env/agg_traffic_rtp_file_chunk_combiner.sh rename to file-chunk-combiner/cluster/env/agg_traffic_rtp_file_chunk_combiner.sh diff --git a/file-chunk-combiner/单机/config/agg_traffic_eml_file_chunk_combiner b/file-chunk-combiner/standalone/config/agg_traffic_eml_file_chunk_combiner similarity index 100% rename from file-chunk-combiner/单机/config/agg_traffic_eml_file_chunk_combiner rename to file-chunk-combiner/standalone/config/agg_traffic_eml_file_chunk_combiner diff --git a/file-chunk-combiner/单机/config/agg_traffic_http_file_chunk_combiner b/file-chunk-combiner/standalone/config/agg_traffic_http_file_chunk_combiner similarity index 100% rename from file-chunk-combiner/单机/config/agg_traffic_http_file_chunk_combiner rename to file-chunk-combiner/standalone/config/agg_traffic_http_file_chunk_combiner diff --git a/file-chunk-combiner/单机/config/agg_traffic_policy_capture_file_chunk_combiner b/file-chunk-combiner/standalone/config/agg_traffic_policy_capture_file_chunk_combiner similarity index 100% rename from file-chunk-combiner/单机/config/agg_traffic_policy_capture_file_chunk_combiner rename to file-chunk-combiner/standalone/config/agg_traffic_policy_capture_file_chunk_combiner diff --git a/file-chunk-combiner/单机/config/agg_traffic_rtp_file_chunk_combiner b/file-chunk-combiner/standalone/config/agg_traffic_rtp_file_chunk_combiner similarity index 100% rename from file-chunk-combiner/单机/config/agg_traffic_rtp_file_chunk_combiner rename to file-chunk-combiner/standalone/config/agg_traffic_rtp_file_chunk_combiner diff --git a/file-chunk-combiner/单机/env/agg_traffic_eml_file_chunk_combiner.sh b/file-chunk-combiner/standalone/env/agg_traffic_eml_file_chunk_combiner.sh similarity index 100% rename from file-chunk-combiner/单机/env/agg_traffic_eml_file_chunk_combiner.sh rename to file-chunk-combiner/standalone/env/agg_traffic_eml_file_chunk_combiner.sh diff --git a/file-chunk-combiner/单机/env/agg_traffic_http_file_chunk_combiner.sh b/file-chunk-combiner/standalone/env/agg_traffic_http_file_chunk_combiner.sh similarity index 100% rename from file-chunk-combiner/单机/env/agg_traffic_http_file_chunk_combiner.sh rename to file-chunk-combiner/standalone/env/agg_traffic_http_file_chunk_combiner.sh diff --git a/file-chunk-combiner/单机/env/agg_traffic_policy_capture_file_chunk_combiner.sh b/file-chunk-combiner/standalone/env/agg_traffic_policy_capture_file_chunk_combiner.sh similarity index 100% rename from file-chunk-combiner/单机/env/agg_traffic_policy_capture_file_chunk_combiner.sh rename to file-chunk-combiner/standalone/env/agg_traffic_policy_capture_file_chunk_combiner.sh diff --git a/file-chunk-combiner/单机/env/agg_traffic_rtp_file_chunk_combiner.sh b/file-chunk-combiner/standalone/env/agg_traffic_rtp_file_chunk_combiner.sh similarity index 100% rename from file-chunk-combiner/单机/env/agg_traffic_rtp_file_chunk_combiner.sh rename to file-chunk-combiner/standalone/env/agg_traffic_rtp_file_chunk_combiner.sh diff --git a/groot-stream/README.md b/groot-stream/README.md new file mode 100644 index 0000000..c2ef994 --- /dev/null +++ b/groot-stream/README.md @@ -0,0 +1,31 @@ +# 配置模版举例 + +## session_record.yaml.j2 (会话日志ETL场景) + +- 多数中心部署场景: 分中心Data Transporter预处理后,集中汇聚至国家中心(NDC) + - etl_session_record_kafka_to_ndc_kafka (A-DT) + - Topology: kafka_source -> etl_processor -> kafka_sink + - Data Flow: SESSION-RECORD -> SESSION-RECORD-PROCESSED +- 多数中心部署场景:国家中心侧加载会话日志写入ClickHouse + - session_record_processed_kafka_to_clickhouse(A-NDC) + - Topology: kafka_source -> clickhouse_sink + - Data Flow: SESSION-RECORD-PROCESSED -> session_record_local +- 集中部署场景:摄入会话日志,预处理后写入ClickHouse + - etl_session_record_kafka_to_clickhouse (B) + - Topology: kafka_source -> etl_processor -> clickhouse_sink + - Data Flow: SESSION-RECORD -> session_record_local + +## data_transporter.yaml.j2 (数据回传场景) + +- troubleshooting_file_stream_kafka_to_ndc_kafka + - Topology: kafka_source -> kafka_sink (format:raw) + - Data Flow: TROUBLESHOOTING-FILE-STREAM-RECORD -> TROUBLESHOOTING-FILE-STREAM-RECORD + +## realtime_log_streaming_cn_session_record.yaml.template (向其它厂商/第三方推送场景) + +`install_cn_udf.sh安装CN UDFs;grootstream.yaml定义CN知识库` + +- etl_session_record_kafka_to_cn_kafka + - Topology: kafka_source -> etl_processor -> post_output_field_processor -> kafka_sink + - Data Flow: SESSION-RECORD(SESSION-RECORD-PROCESSED) -> SESSION-RECORD-CN + diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/dos_sketch_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/dos_sketch_kafka_to_ndc_kafka new file mode 100644 index 0000000..b131c5c --- /dev/null +++ b/groot-stream/multi-datacenter-examples/datacenter_dt/dos_sketch_kafka_to_ndc_kafka @@ -0,0 +1,49 @@ +sources: + kafka_source: + type: kafka + properties: + topic: DOS-SKETCH-RECORD + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: DOS-SKETCH-RECORD + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: dos_sketch_record_kafka_to_kafka + kafka.auto.offset.reset: latest + format: raw + + +sinks: + kafka_sink: + type: kafka + properties: + topic: DOS-SKETCH-RECORD + kafka.bootstrap.servers: "{{ kafka_sink_servers }}" + kafka.client.id: DOS-SKETCH-RECORD + kafka.retries: 0 + kafka.linger.ms: 10 + kafka.request.timeout.ms: 30000 + kafka.batch.size: 262144 + kafka.buffer.memory: 134217728 + kafka.max.request.size: 10485760 + kafka.compression.type: snappy + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + format: raw + + +application: + env: + name: dos_sketch_record_kafka_to_kafka + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [kafka_sink] + - name: kafka_sink + diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/etl_proxy_event_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/etl_proxy_event_kafka_to_ndc_kafka new file mode 100644 index 0000000..512ec94 --- /dev/null +++ b/groot-stream/multi-datacenter-examples/datacenter_dt/etl_proxy_event_kafka_to_ndc_kafka @@ -0,0 +1,154 @@ +sources: + kafka_source: + type: kafka + properties: + topic: PROXY-EVENT + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: PROXY-EVENT + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: etl_proxy_event_kafka_to_ndc_kafka + kafka.auto.offset.reset: latest + format: json + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + properties: + key: value + functions: # [array of object] Function List + + - function: ASN_LOOKUP + lookup_fields: [server_ip] + output_fields: [server_asn] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + + - function: ASN_LOOKUP + lookup_fields: [client_ip] + output_fields: [client_asn] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + +sinks: + kafka_sink: + type: kafka + properties: + topic: PROXY-EVENT-PROCESSED + kafka.bootstrap.servers: "{{ kafka_sink_servers }}" + kafka.client.id: PROXY-EVENT-PROCESSED + kafka.retries: 0 + kafka.linger.ms: 10 + kafka.request.timeout.ms: 30000 + kafka.batch.size: 262144 + kafka.buffer.memory: 134217728 + kafka.max.request.size: 10485760 + kafka.compression.type: snappy + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + format: json + + +application: + + env: # [object] Environment Variables + name: etl_proxy_event_kafka_to_ndc_kafka # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [kafka_sink] + - name: kafka_sink + + diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/etl_session_record_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/etl_session_record_kafka_to_ndc_kafka new file mode 100644 index 0000000..1aa840f --- /dev/null +++ b/groot-stream/multi-datacenter-examples/datacenter_dt/etl_session_record_kafka_to_ndc_kafka @@ -0,0 +1,154 @@ +sources: + kafka_source: + type: kafka + properties: + topic: SESSION-RECORD + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: SESSION-RECORD + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: etl_session_record_kafka_to_ndc_kafka + kafka.auto.offset.reset: latest + format: json + +processing_pipelines: + etl_processor: + type: projection + functions: + + - function: ASN_LOOKUP + lookup_fields: [server_ip] + output_fields: [server_asn] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + + - function: ASN_LOOKUP + lookup_fields: [client_ip] + output_fields: [client_asn] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + +sinks: + kafka_sink: + type: kafka + properties: + topic: SESSION-RECORD-PROCESSED + kafka.bootstrap.servers: "{{ kafka_sink_servers }}" + kafka.client.id: SESSION-RECORD-PROCESSED + kafka.retries: 0 + kafka.linger.ms: 10 + kafka.request.timeout.ms: 30000 + kafka.batch.size: 262144 + kafka.buffer.memory: 134217728 + kafka.max.request.size: 10485760 + kafka.compression.type: snappy + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + format: json + + +application: + + env: + name: etl_session_record_kafka_to_ndc_kafka + shade.identifier: aes + pipeline: + object-reuse: true + properties: + hos.bucket.name.rtp_file: traffic_rtp_file_bucket + hos.bucket.name.http_file: traffic_http_file_bucket + hos.bucket.name.eml_file: traffic_eml_file_bucket + hos.bucket.name.policy_capture_file: traffic_policy_capture_file_bucket + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [kafka_sink] + - name: kafka_sink + + diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/etl_transaction_record_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/etl_transaction_record_kafka_to_ndc_kafka new file mode 100644 index 0000000..5f9c317 --- /dev/null +++ b/groot-stream/multi-datacenter-examples/datacenter_dt/etl_transaction_record_kafka_to_ndc_kafka @@ -0,0 +1,157 @@ +sources: + kafka_source: + type: kafka + # fields: # [array of object] Field List, if not set, all fields(Map) will be output. + # watermark_timestamp: common_recv_time # [string] Watermark Field Name + # watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms + # watermark_lag: 60 # [number] Watermark Lag, default is 60 + properties: + topic: TRANSACTION-RECORD + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: TRANSACTION-RECORD + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: etl_transaction_record_kafka_to_ndc_kafka + kafka.auto.offset.reset: latest + format: json + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + properties: + key: value + functions: # [array of object] Function List + + - function: ASN_LOOKUP + lookup_fields: [server_ip] + output_fields: [server_asn] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + + - function: ASN_LOOKUP + lookup_fields: [client_ip] + output_fields: [client_asn] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + +sinks: + kafka_sink: + type: kafka + properties: + topic: TRANSACTION-RECORD-PROCESSED + kafka.bootstrap.servers: "{{ kafka_sink_servers }}" + kafka.client.id: TRANSACTION-RECORD-PROCESSED + kafka.retries: 0 + kafka.linger.ms: 10 + kafka.request.timeout.ms: 30000 + kafka.batch.size: 262144 + kafka.buffer.memory: 134217728 + kafka.max.request.size: 10485760 + kafka.compression.type: snappy + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + format: json + + +application: + + env: # [object] Environment Variables + name: etl_transaction_record_kafka_to_ndc_kafka # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [kafka_sink] + - name: kafka_sink + + diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/network_traffic_metrics_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/network_traffic_metrics_kafka_to_ndc_kafka new file mode 100644 index 0000000..a419fbd --- /dev/null +++ b/groot-stream/multi-datacenter-examples/datacenter_dt/network_traffic_metrics_kafka_to_ndc_kafka @@ -0,0 +1,48 @@ +sources: + kafka_source: + type: kafka + properties: + topic: NETWORK-TRAFFIC-METRIC + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: NETWORK-TRAFFIC-METRIC + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: network_traffic_metrics_kafka_to_ndc_kafka + kafka.auto.offset.reset: latest + format: raw + + +sinks: + kafka_sink: + type: kafka + properties: + topic: NETWORK-TRAFFIC-METRIC + kafka.bootstrap.servers: "{{ kafka_sink_servers }}" + kafka.client.id: NETWORK-TRAFFIC-METRIC + kafka.retries: 0 + kafka.linger.ms: 10 + kafka.request.timeout.ms: 30000 + kafka.batch.size: 262144 + kafka.buffer.memory: 134217728 + kafka.max.request.size: 10485760 + kafka.compression.type: snappy + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + format: raw + + +application: + env: + name: network_traffic_metrics_kafka_to_ndc_kafka + shade.identifier: aes + pipeline: + object-reuse: true + topology: + - name: kafka_source + downstream: [kafka_sink] + - name: kafka_sink diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/object_statistics_metric_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/object_statistics_metric_kafka_to_ndc_kafka new file mode 100644 index 0000000..2a698ad --- /dev/null +++ b/groot-stream/multi-datacenter-examples/datacenter_dt/object_statistics_metric_kafka_to_ndc_kafka @@ -0,0 +1,50 @@ +sources: + kafka_source: + type: kafka + properties: + topic: OBJECT-STATISTICS-METRIC + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: OBJECT-STATISTICS-METRIC + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: object_statistics_metric_kafka_to_ndc_kafka + kafka.auto.offset.reset: latest + format: raw + + +sinks: + kafka_sink: + type: kafka + properties: + topic: OBJECT-STATISTICS-METRIC + kafka.bootstrap.servers: "{{ kafka_sink_servers }}" + kafka.client.id: OBJECT-STATISTICS-METRIC + kafka.retries: 0 + kafka.linger.ms: 10 + kafka.request.timeout.ms: 30000 + kafka.batch.size: 262144 + kafka.buffer.memory: 134217728 + kafka.max.request.size: 10485760 + kafka.compression.type: snappy + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + format: raw + + +application: + env: + name: object_statistics_metric_kafka_to_ndc_kafka + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [kafka_sink] + - name: kafka_sink + + diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/policy_rule_metrics_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/policy_rule_metrics_kafka_to_ndc_kafka new file mode 100644 index 0000000..07f2a68 --- /dev/null +++ b/groot-stream/multi-datacenter-examples/datacenter_dt/policy_rule_metrics_kafka_to_ndc_kafka @@ -0,0 +1,50 @@ +sources: + kafka_source: + type: kafka + properties: + topic: POLICY-RULE-METRIC + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: POLICY-RULE-METRIC + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: policy_rule_metrics_kafka_to_ndc_kafka + kafka.auto.offset.reset: latest + format: raw + + +sinks: + kafka_sink: + type: kafka + properties: + topic: POLICY-RULE-METRIC + kafka.bootstrap.servers: "{{ kafka_sink_servers }}" + kafka.client.id: POLICY-RULE-METRIC + kafka.retries: 0 + kafka.linger.ms: 10 + kafka.request.timeout.ms: 30000 + kafka.batch.size: 262144 + kafka.buffer.memory: 134217728 + kafka.max.request.size: 10485760 + kafka.compression.type: snappy + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + format: raw + + +application: + env: + name: policy_rule_metrics_kafka_to_ndc_kafka + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [kafka_sink] + - name: kafka_sink + + diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/pxy_exch_intermedia_cert_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/pxy_exch_intermedia_cert_kafka_to_ndc_kafka new file mode 100644 index 0000000..8cca8b2 --- /dev/null +++ b/groot-stream/multi-datacenter-examples/datacenter_dt/pxy_exch_intermedia_cert_kafka_to_ndc_kafka @@ -0,0 +1,58 @@ +sources: + kafka_source: + type: kafka + properties: + topic: PXY-EXCH-INTERMEDIA-CERT + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: PXY-EXCH-INTERMEDIA-CERT + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.ssl.keystore.location: + kafka.ssl.keystore.password: + kafka.ssl.truststore.location: + kafka.ssl.truststore.password: + kafka.ssl.key.password: + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.buffer.memory: + kafka.group.id: pxy_exch_intermedia_cert_kafka_to_ndc_kafka + kafka.auto.offset.reset: latest + kafka.max.request.size: + kafka.compression.type: none + format: raw + + +sinks: + kafka_sink: + type: kafka + properties: + topic: PXY-EXCH-INTERMEDIA-CERT + kafka.bootstrap.servers: "{{ kafka_sink_servers }}" + kafka.client.id: PXY-EXCH-INTERMEDIA-CERT + kafka.retries: 0 + kafka.linger.ms: 10 + kafka.request.timeout.ms: 30000 + kafka.batch.size: 262144 + kafka.buffer.memory: 134217728 + kafka.max.request.size: 10485760 + kafka.compression.type: snappy + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + format: raw + + +application: + env: + name: pxy_exch_intermedia_cert_kafka_to_ndc_kafka + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [kafka_sink] + - name: kafka_sink + + diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/statistics_rule_metric_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/statistics_rule_metric_kafka_to_ndc_kafka new file mode 100644 index 0000000..ff83e39 --- /dev/null +++ b/groot-stream/multi-datacenter-examples/datacenter_dt/statistics_rule_metric_kafka_to_ndc_kafka @@ -0,0 +1,50 @@ +sources: + kafka_source: + type: kafka + properties: + topic: STATISTICS-RULE-METRIC + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: STATISTICS-RULE-METRIC + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: statistics_rule_metric_kafka_to_ndc_kafka + kafka.auto.offset.reset: latest + format: raw + + +sinks: + kafka_sink: + type: kafka + properties: + topic: STATISTICS-RULE-METRIC + kafka.bootstrap.servers: "{{ kafka_sink_servers }}" + kafka.client.id: STATISTICS-RULE-METRIC + kafka.retries: 0 + kafka.linger.ms: 10 + kafka.request.timeout.ms: 30000 + kafka.batch.size: 262144 + kafka.buffer.memory: 134217728 + kafka.max.request.size: 10485760 + kafka.compression.type: snappy + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + format: raw + + +application: + env: + name: statistics_rule_metric_kafka_to_ndc_kafka + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [kafka_sink] + - name: kafka_sink + + diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/troubleshooting_file_stream_record_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/troubleshooting_file_stream_record_kafka_to_ndc_kafka new file mode 100644 index 0000000..b880e8a --- /dev/null +++ b/groot-stream/multi-datacenter-examples/datacenter_dt/troubleshooting_file_stream_record_kafka_to_ndc_kafka @@ -0,0 +1,50 @@ +sources: + kafka_source: + type: kafka + properties: + topic: TROUBLESHOOTING-FILE-STREAM-RECORD + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: TROUBLESHOOTING-FILE-STREAM-RECORD + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: troubleshooting_file_stream_record_kafka_to_ndc_kafka + kafka.auto.offset.reset: latest + format: raw + + +sinks: + kafka_sink: + type: kafka + properties: + topic: TROUBLESHOOTING-FILE-STREAM-RECORD + kafka.bootstrap.servers: "{{ kafka_sink_servers }}" + kafka.client.id: TROUBLESHOOTING-FILE-STREAM-RECORD + kafka.retries: 0 + kafka.linger.ms: 10 + kafka.request.timeout.ms: 30000 + kafka.batch.size: 262144 + kafka.buffer.memory: 134217728 + kafka.max.request.size: 10485760 + kafka.compression.type: snappy + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + format: raw + + +application: + env: + name: troubleshooting_file_stream_record_kafka_to_ndc_kafka + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [kafka_sink] + - name: kafka_sink + + diff --git a/groot-stream/multi-datacenter-examples/datacenter_dt/voip_record_kafka_to_ndc_kafka b/groot-stream/multi-datacenter-examples/datacenter_dt/voip_record_kafka_to_ndc_kafka new file mode 100644 index 0000000..0339db1 --- /dev/null +++ b/groot-stream/multi-datacenter-examples/datacenter_dt/voip_record_kafka_to_ndc_kafka @@ -0,0 +1,89 @@ +sources: + kafka_source: + type: kafka + properties: + topic: VOIP-RECORD + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: VOIP-RECORD + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: statistics_rule_metric_kafka_to_ndc_kafka + kafka.auto.offset.reset: latest + format: json + + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + functions: # [array of object] Function List + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + + +sinks: + kafka_sink: + type: kafka + properties: + topic: VOIP-RECORD + kafka.bootstrap.servers: "{{ kafka_sink_servers }}" + kafka.client.id: VOIP-RECORD + kafka.retries: 0 + kafka.linger.ms: 10 + kafka.request.timeout.ms: 30000 + kafka.batch.size: 262144 + kafka.buffer.memory: 134217728 + kafka.max.request.size: 10485760 + kafka.compression.type: snappy + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + format: json + + + +application: + env: + name: voip_record_kafka_to_ndc_kafka + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [kafka_sink] + - name: kafka_sink diff --git a/groot-stream/multi-datacenter-examples/national_datacenter/dos_event_kafka_to_clickhouse b/groot-stream/multi-datacenter-examples/national_datacenter/dos_event_kafka_to_clickhouse new file mode 100644 index 0000000..2800bf2 --- /dev/null +++ b/groot-stream/multi-datacenter-examples/national_datacenter/dos_event_kafka_to_clickhouse @@ -0,0 +1,43 @@ +sources: + kafka_source: + type: kafka + properties: + topic: DOS-EVENT + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: DOS-EVENT + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: dos_event_kafka_to_clickhouse + kafka.auto.offset.reset: latest + format: json + + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: "{{ clickhouse_servers }}" + table: tsg_galaxy_v3.dos_event_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + connection.connect_timeout: 30 + connection.query_timeout: 300 + +application: + env: + name: dos_event_kafka_to_clickhouse + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [clickhouse_sink] + - name: clickhouse_sink + + diff --git a/groot-stream/multi-datacenter-examples/national_datacenter/etl_session_record_processed_kafka_to_cn_kafka b/groot-stream/multi-datacenter-examples/national_datacenter/etl_session_record_processed_kafka_to_cn_kafka new file mode 100644 index 0000000..69bd6e8 --- /dev/null +++ b/groot-stream/multi-datacenter-examples/national_datacenter/etl_session_record_processed_kafka_to_cn_kafka @@ -0,0 +1,399 @@ +sources: + kafka_source: + type: kafka + properties: + topic: SESSION-RECORD-PROCESSED + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: SESSION-RECORD-PROCESSED + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: etl_processed_session_record_kafka_to_cn_kafka + kafka.auto.offset.reset: latest + format: json + +processing_pipelines: + session_record_processor: + type: projection + remove_fields: + output_fields: + functions: # [array of object] Function List + - function: EVAL + output_fields: [ domain ] + parameters: + value_expression: server_fqdn + + - function: EVAL + output_fields: [ domain_sld ] + parameters: + value_expression: server_domain + + - function: CN_L7_PROTOCOL_AND_APP_EXTRACT + parameters: + decoded_path_field_name: decoded_path + app_transition_field_name: app_transition + l7_protocol_field_name: l7_protocol + app_field_name: app + l7_protocol: DHCP,DNS,FTP,GRE,GTP,HTTP,HTTPS,ICMP,IMAP,IMAPS,IPSEC,ISAKMP,XMPP,L2TP,LDAP,MMS,NETBIOS,NETFLOW,NTP,POP3,POP3S,RDP,PPTP,RADIUS,RTCP,RTP,RTSP,SIP,SMB,SMTP,SMTPS,SNMP,SSDP,SSH,SSL,STUN,TELNET,TFTP,OPENVPN,RTMP,TEREDO,FTPS,DTLS,SPDY,BJNP,QUIC,MDNS,Unknown TCP,Unknown UDP,Unknown Other,IKE,MAIL,SOCKS,DoH,SLP,SSL with ESNI,ISATAP,Stratum,SSL with ECH + + - function: GEOIP_LOOKUP + lookup_fields: [ client_ip ] + output_fields: [ ] + parameters: + kb_name: cn_ip_location + option: IP_TO_OBJECT + geolocation_field_mapping: + COUNTRY: client_country_region + PROVINCE: client_super_admin_area + CITY: client_admin_area + LONGITUDE: client_longitude + LATITUDE: client_latitude + ISP: client_isp + + - function: GEOIP_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ ] + parameters: + kb_name: cn_ip_location + option: IP_TO_OBJECT + geolocation_field_mapping: + COUNTRY: server_country_region + PROVINCE: server_super_admin_area + CITY: server_admin_area + LONGITUDE: server_longitude + LATITUDE: server_latitude + ISP: server_isp + + - function: ASN_LOOKUP + lookup_fields: [ client_ip ] + output_fields: [ client_asn ] + parameters: + option: IP_TO_ASN + kb_name: cn_ip_asn + + - function: ASN_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ server_asn ] + parameters: + option: IP_TO_ASN + kb_name: cn_ip_asn + + - function: CN_IDC_RENTER_LOOKUP + lookup_fields: [ client_ip ] + output_fields: [ client_idc_renter ] + parameters: + kb_name: cn_idc_renter + + - function: CN_IDC_RENTER_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ server_idc_renter ] + parameters: + kb_name: cn_idc_renter + + - function: CN_LINK_DIRECTION_LOOKUP + lookup_fields: [ in_link_id ] + output_fields: [ in_link_direction ] + parameters: + kb_name: cn_link_direction + + - function: CN_LINK_DIRECTION_LOOKUP + lookup_fields: [ out_link_id ] + output_fields: [ out_link_direction ] + parameters: + kb_name: cn_link_direction + + - function: CN_FQDN_CATEGORY_LOOKUP + lookup_fields: [ domain ] + parameters: + kb_name: cn_fqdn_category + field_mapping: + NAME: domain_category_name + GROUP: domain_category_group + REPUTATION_LEVEL: domain_reputation_level + + - function: CN_ICP_LOOKUP + lookup_fields: [ domain ] + output_fields: [ domain_icp_company_name ] + parameters: + kb_name: cn_fqdn_icp + + - function: CN_FQDN_WHOIS_LOOKUP + lookup_fields: [ domain ] + output_fields: [ domain_whois_org ] + parameters: + kb_name: cn_fqdn_whois + + - function: CN_DNS_SERVER_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ server_dns_server ] + parameters: + kb_name: cn_dns_server + + - function: CN_APP_CATEGORY_LOOKUP + lookup_fields: [ app ] + parameters: + kb_name: cn_app_category + field_mapping: + CATEGORY: app_category + SUBCATEGORY: app_subcategory + COMPANY: app_company + COMPANY_CATEGORY: app_company_category + + - function: EVAL + output_fields: [ client_zone ] + parameters: + value_expression: "flags & 8 == 8 ? 'internal' : 'external'" + + - function: EVAL + output_fields: [ server_zone ] + parameters: + value_expression: "flags & 16 == 16 ? 'internal' : 'external'" + + - function: CN_IP_ZONE_LOOKUP + lookup_fields: [ client_ip ] + output_fields: [ client_zone ] + parameters: + kb_name: none + #kb_name: cn_internal_ip + + - function: CN_IP_ZONE_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ server_zone ] + parameters: + kb_name: none + #kb_name: cn_internal_ip + + - function: EVAL + output_fields: [ sent_bytes ] + parameters: + value_expression: "sent_bytes == null ? 0 : sent_bytes" + + - function: EVAL + output_fields: [ sent_pkts ] + parameters: + value_expression: "sent_pkts == null ? 0 : sent_pkts" + + - function: EVAL + output_fields: [ received_bytes ] + parameters: + value_expression: "received_bytes == null ? 0 : received_bytes" + + - function: EVAL + output_fields: [ received_pkts ] + parameters: + value_expression: "received_pkts == null ? 0 : received_pkts" + + - function: EVAL + output_fields: [ traffic_inbound_byte ] + parameters: + value_expression: "client_zone == 'internal' && server_zone == 'external' ? received_bytes : traffic_inbound_byte" + + - function: EVAL + output_fields: [ traffic_outbound_byte ] + parameters: + value_expression: "client_zone == 'external' && server_zone == 'internal' ? received_bytes : traffic_outbound_byte" + + - function: EVAL + output_fields: [ traffic_inbound_pkt ] + parameters: + value_expression: "client_zone == 'internal' && server_zone == 'external' ? received_pkts : traffic_inbound_pkt" + + - function: EVAL + output_fields: [ traffic_outbound_pkt ] + parameters: + value_expression: "client_zone == 'external' && server_zone == 'internal' ? received_pkts : traffic_outbound_pkt" + + - function: EVAL + output_fields: [ traffic_outbound_byte ] + parameters: + value_expression: "client_zone == 'internal' && server_zone == 'external' ? sent_bytes : traffic_outbound_byte" + + - function: EVAL + output_fields: [ traffic_inbound_byte ] + parameters: + value_expression: "client_zone == 'external' && server_zone == 'internal' ? sent_bytes : traffic_inbound_byte" + + - function: EVAL + output_fields: [ traffic_outbound_pkt ] + parameters: + value_expression: "client_zone == 'internal' && server_zone == 'external' ? sent_pkts : traffic_outbound_pkt" + + - function: EVAL + output_fields: [ traffic_inbound_pkt ] + parameters: + value_expression: "client_zone == 'external' && server_zone == 'internal' ? sent_pkts : traffic_inbound_pkt" + + - function: EVAL + output_fields: [ traffic_internal_byte ] + parameters: + value_expression: "client_zone == 'internal' && server_zone == 'internal' ? sent_bytes + received_bytes : traffic_internal_byte" + + - function: EVAL + output_fields: [ traffic_internal_pkt ] + parameters: + value_expression: "client_zone == 'internal' && server_zone == 'internal' ? sent_pkts + received_pkts : traffic_internal_pkt" + + - function: EVAL + output_fields: [ traffic_through_byte ] + parameters: + value_expression: "client_zone == 'external' && server_zone == 'external' ? sent_bytes + received_bytes : traffic_through_byte" + + - function: EVAL + output_fields: [ traffic_through_pkt ] + parameters: + value_expression: "client_zone == 'external' && server_zone == 'external' ? sent_pkts + received_pkts : traffic_through_pkt" + + - function: EVAL + output_fields: [ sessions ] + parameters: + value_expression: "1" + + - function: EVAL + output_fields: [ internal_query_num ] + parameters: + value_expression: "client_zone == 'internal' ? sessions : internal_query_num" + + - function: EVAL + output_fields: [ external_query_num ] + parameters: + value_expression: "client_zone == 'external' ? sessions : external_query_num" + + - function: CN_VPN_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ server_vpn_service_name ] + parameters: + kb_name: cn_vpn_learning_ip + option: IP_TO_VPN + + - function: CN_VPN_LOOKUP + lookup_fields: [ domain ] + output_fields: [ domain_vpn_service_name ] + parameters: + kb_name: cn_vpn_learning_domain + option: DOMAIN_TO_VPN + + - function: CN_IOC_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ server_malware ] + parameters: + kb_name: cn_ioc_malware + option: IP_TO_MALWARE + + - function: CN_IOC_LOOKUP + lookup_fields: [ domain ] + output_fields: [ domain_malware ] + parameters: + kb_name: cn_ioc_malware + option: DOMAIN_TO_MALWARE + + - function: CN_USER_DEFINE_TAG_LOOKUP + lookup_fields: [ client_ip ] + output_fields: [ client_ip_tags ] + parameters: + kb_name: cn_ip_tag_user_define + option: IP_TO_TAG + + - function: CN_USER_DEFINE_TAG_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ server_ip_tags ] + parameters: + kb_name: cn_ip_tag_user_define + option: IP_TO_TAG + + - function: CN_USER_DEFINE_TAG_LOOKUP + lookup_fields: [ domain ] + output_fields: [ domain_tags ] + parameters: + kb_name: cn_domain_tag_user_define + option: DOMAIN_TO_TAG + + - function: CN_USER_DEFINE_TAG_LOOKUP + lookup_fields: [ app ] + output_fields: [ app_tags ] + parameters: + kb_name: cn_app_tag_user_define + option: APP_TO_TAG + + - function: GENERATE_STRING_ARRAY + lookup_fields: [ client_idc_renter,client_ip_tags ] + output_fields: [ client_ip_tags ] + + - function: GENERATE_STRING_ARRAY + lookup_fields: [ server_idc_renter,server_dns_server,server_node_type,server_malware,server_vpn_service_name,server_ip_tags ] + output_fields: [ server_ip_tags ] + + - function: GENERATE_STRING_ARRAY + lookup_fields: [ domain_node_type,domain_malware,domain_vpn_service_name,domain_tags ] + output_fields: [ domain_tags ] + + - function: CN_ARRAY_ELEMENTS_PREPEND + lookup_fields: [ client_ip_tags ] + output_fields: [ client_ip_tags ] + parameters: + prefix: ip. + + - function: CN_ARRAY_ELEMENTS_PREPEND + lookup_fields: [ server_ip_tags ] + output_fields: [ server_ip_tags ] + parameters: + prefix: ip. + + - function: CN_ARRAY_ELEMENTS_PREPEND + lookup_fields: [ domain_tags ] + output_fields: [ domain_tags ] + parameters: + prefix: domain. + + - function: CN_ARRAY_ELEMENTS_PREPEND + lookup_fields: [ app_tags ] + output_fields: [ app_tags ] + parameters: + prefix: app. +postprocessing_pipelines: + remove_field_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + output_fields: [ recv_time,log_id,flags,start_timestamp_ms,end_timestamp_ms,duration_ms,decoded_as,client_ip,server_ip,client_port,server_port,app,app_transition,decoded_path,ip_protocol,l7_protocol,out_link_id,in_link_id,subscriber_id,imei,imsi,phone_number,apn,http_url,dns_rcode,dns_qname,dns_qtype,dns_rr,out_link_direction,in_link_direction,server_fqdn,server_domain,domain,domain_sld,domain_category_name,domain_category_group,domain_reputation_level,domain_icp_company_name,domain_whois_org,domain_tags,client_zone,client_country_region,client_super_admin_area,client_admin_area,client_longitude,client_latitude,client_isp,client_asn,client_ip_tags,server_zone,server_country_region,server_super_admin_area,server_admin_area,server_longitude,server_latitude,server_isp,server_asn,server_ip_tags,app_category,app_subcategory,app_company,app_company_category,app_tags,sent_pkts,sent_bytes,received_pkts,received_bytes,sessions,tcp_c2s_lost_bytes,tcp_s2c_lost_bytes,tcp_c2s_o3_pkts,tcp_s2c_o3_pkts,tcp_c2s_rtx_bytes,tcp_s2c_rtx_bytes,tcp_c2s_rtx_pkts,tcp_s2c_rtx_pkts,tcp_rtt_ms,http_response_latency_ms,ssl_handshake_latency_ms,dns_response_latency_ms,cn_internal_rule_id_list,cn_internal_ioc_type_list,traffic_inbound_byte,traffic_inbound_pkt,traffic_outbound_byte,traffic_outbound_pkt,traffic_internal_byte,traffic_internal_pkt,traffic_through_byte,traffic_through_pkt,internal_query_num,external_query_num ] + +sinks: + cn_kafka_sink: + type: kafka + properties: + topic: SESSION-RECORD-CN + kafka.bootstrap.servers: {{ national_center_cn_kafka_servers }} + kafka.client.id: SESSION-RECORD-CN + kafka.retries: 0 + kafka.linger.ms: 10 + kafka.request.timeout.ms: 30000 + kafka.batch.size: 262144 + kafka.buffer.memory: 134217728 + kafka.max.request.size: 10485760 + kafka.compression.type: snappy + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + format: json + + +application: + env: + name: etl_session_record_processed_kafka_to_cn_kafka + shade.identifier: aes + pipeline: + object-reuse: true + properties: + hos.bucket.name.rtp_file: traffic_rtp_file_bucket + hos.bucket.name.http_file: traffic_http_file_bucket + hos.bucket.name.eml_file: traffic_eml_file_bucket + hos.bucket.name.policy_capture_file: traffic_policy_capture_file_bucket + topology: + - name: kafka_source + downstream: [ session_record_processor ] + - name: session_record_processor + downstream: [ remove_field_processor ] + - name: remove_field_processor + downstream: [ cn_kafka_sink ] + - name: cn_kafka_sink + downstream: [ ] diff --git a/groot-stream/multi-datacenter-examples/national_datacenter/etl_voip_record_kafka_to_clickhouse b/groot-stream/multi-datacenter-examples/national_datacenter/etl_voip_record_kafka_to_clickhouse new file mode 100644 index 0000000..7b46dc8 --- /dev/null +++ b/groot-stream/multi-datacenter-examples/national_datacenter/etl_voip_record_kafka_to_clickhouse @@ -0,0 +1,119 @@ +sources: + kafka_source: + type: kafka + properties: + topic: VOIP-CONVERSATION-RECORD + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: VOIP-CONVERSATION-RECORD + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: etl_voip_record_kafka_to_clickhouse + kafka.auto.offset.reset: latest + format: json + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + properties: + key: value + functions: # [array of object] Function List + + - function: ASN_LOOKUP + lookup_fields: [server_ip] + output_fields: [server_asn] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + + - function: ASN_LOOKUP + lookup_fields: [client_ip] + output_fields: [client_asn] + parameters: + option: IP_TO_ASN + kb_name: tsg_ip_asn + + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: "{{ clickhouse_servers }}" + table: tsg_galaxy_v3.voip_record_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + connection.connect_timeout: 30 + connection.query_timeout: 300 + + +application: + + env: # [object] Environment Variables + name: etl_voip_record_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [clickhouse_sink] + - name: clickhouse_sink + + diff --git a/groot-stream/multi-datacenter-examples/national_datacenter/proxy_event_processed_kafka_to_clickhouse b/groot-stream/multi-datacenter-examples/national_datacenter/proxy_event_processed_kafka_to_clickhouse new file mode 100644 index 0000000..8cdfae5 --- /dev/null +++ b/groot-stream/multi-datacenter-examples/national_datacenter/proxy_event_processed_kafka_to_clickhouse @@ -0,0 +1,42 @@ +sources: + kafka_source: + type: kafka + properties: + topic: PROXY-EVENT-PROCESSED + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: PROXY-EVENT-PROCESSED + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: proxy_event_processed_kafka_to_clickhouse + kafka.auto.offset.reset: latest + format: json + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: "{{ clickhouse_servers }}" + table: tsg_galaxy_v3.proxy_event_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + connection.connect_timeout: 30 + connection.query_timeout: 300 + + +application: + + env: # [object] Environment Variables + name: proxy_event_processed_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [clickhouse_sink] + - name: clickhouse_sink diff --git a/groot-stream/multi-datacenter-examples/national_datacenter/session_record_processed_kafka_to_clickhouse b/groot-stream/multi-datacenter-examples/national_datacenter/session_record_processed_kafka_to_clickhouse new file mode 100644 index 0000000..7d0f68b --- /dev/null +++ b/groot-stream/multi-datacenter-examples/national_datacenter/session_record_processed_kafka_to_clickhouse @@ -0,0 +1,42 @@ +sources: + kafka_source: + type: kafka + properties: + topic: SESSION-RECORD-PROCESSED + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: SESSION-RECORD-PROCESSED + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: session_record_processed_kafka_to_clickhouse + kafka.auto.offset.reset: latest + format: json + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: "{{ clickhouse_servers }}" + table: tsg_galaxy_v3.session_record_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + connection.connect_timeout: 30 + connection.query_timeout: 300 + + +application: + + env: # [object] Environment Variables + name: session_record_processed_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [clickhouse_sink] + - name: clickhouse_sink diff --git a/groot-stream/multi-datacenter-examples/national_datacenter/transaction_record_processed_kafka_to_clickhouse b/groot-stream/multi-datacenter-examples/national_datacenter/transaction_record_processed_kafka_to_clickhouse new file mode 100644 index 0000000..83ce33d --- /dev/null +++ b/groot-stream/multi-datacenter-examples/national_datacenter/transaction_record_processed_kafka_to_clickhouse @@ -0,0 +1,42 @@ +sources: + kafka_source: + type: kafka + properties: + topic: TRANSACTION-RECORD-PROCESSED + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: TRANSACTION-RECORD-PROCESSED + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: transaction_record_processed_kafka_to_clickhouse + kafka.auto.offset.reset: latest + format: json + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: "{{ clickhouse_servers }}" + table: tsg_galaxy_v3.transaction_record_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + connection.connect_timeout: 30 + connection.query_timeout: 300 + + +application: + + env: # [object] Environment Variables + name: transaction_record_processed_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [clickhouse_sink] + - name: clickhouse_sink diff --git a/groot-stream/single-cluster-examples/dos_event_kafka_to_clickhouse b/groot-stream/single-cluster-examples/dos_event_kafka_to_clickhouse new file mode 100644 index 0000000..5152734 --- /dev/null +++ b/groot-stream/single-cluster-examples/dos_event_kafka_to_clickhouse @@ -0,0 +1,50 @@ +sources: + kafka_source: + type: kafka + properties: + topic: DOS-EVENT + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: DOS-EVENT + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.ssl.keystore.location: + kafka.ssl.keystore.password: + kafka.ssl.truststore.location: + kafka.ssl.truststore.password: + kafka.ssl.key.password: + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.buffer.memory: + kafka.group.id: dos_event_kafka_to_clickhouse-20231221 + kafka.auto.offset.reset: latest + kafka.max.request.size: + kafka.compression.type: none + format: json + + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: "{{ clickhouse_servers }}" + table: tsg_galaxy_v3.dos_event_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + env: + name: dos_event_kafka_to_clickhouse + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [clickhouse_sink] + - name: clickhouse_sink + + diff --git a/groot-stream/single-cluster-examples/etl_datapath_telemetry_record_kafka_to_clickhouse b/groot-stream/single-cluster-examples/etl_datapath_telemetry_record_kafka_to_clickhouse new file mode 100644 index 0000000..9ae5b54 --- /dev/null +++ b/groot-stream/single-cluster-examples/etl_datapath_telemetry_record_kafka_to_clickhouse @@ -0,0 +1,72 @@ +sources: + kafka_source: + type: kafka + properties: + topic: DATAPATH-TELEMETRY-RECORD + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: DATAPATH-TELEMETRY-RECORD + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + #kafka.security.protocol: SSL + #kafka.ssl.endpoint.identification.algorithm: "" + #kafka.ssl.keystore.location: /data/tsg/olap/flink/topology/data/keystore.jks + #kafka.ssl.keystore.password: 86cf0e2ffba3f541a6c6761313e5cc7e + #kafka.ssl.truststore.location: /data/tsg/olap/flink/topology/data/truststore.jks + #kafka.ssl.truststore.password: 86cf0e2ffba3f541a6c6761313e5cc7e + #kafka.ssl.key.password: 86cf0e2ffba3f541a6c6761313e5cc7e + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: etl_datapath_telemetry_record_kafka_to_clickhouse-20230125 + kafka.auto.offset.reset: latest + format: msgpack + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + functions: + - function: SNOWFLAKE_ID + lookup_fields: [ '' ] + output_fields: [ log_id ] + parameters: + data_center_id_num: 1 + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [ __timestamp ] + output_fields: [ recv_time ] + parameters: + precision: seconds + - function: BASE64_ENCODE_TO_STRING + output_fields: [ packet ] + parameters: + value_field: packet + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: "{{ clickhouse_servers }}" + table: tsg_galaxy_v3.datapath_telemetry_record_local + batch.size: 5000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + + env: # [object] Environment Variables + name: etl_datapath_telemetry_record_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [clickhouse_sink] + - name: clickhouse_sink + + diff --git a/groot-stream/single-cluster-examples/etl_proxy_event_kafka_to_clickhouse b/groot-stream/single-cluster-examples/etl_proxy_event_kafka_to_clickhouse new file mode 100644 index 0000000..011eabb --- /dev/null +++ b/groot-stream/single-cluster-examples/etl_proxy_event_kafka_to_clickhouse @@ -0,0 +1,143 @@ +sources: + kafka_source: + type: kafka + # fields: # [array of object] Field List, if not set, all fields(Map) will be output. + # watermark_timestamp: common_recv_time # [string] Watermark Field Name + # watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms + # watermark_lag: 60 # [number] Watermark Lag, default is 60 + properties: + topic: PROXY-EVENT + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: PROXY-EVENT + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.ssl.keystore.location: + kafka.ssl.keystore.password: + kafka.ssl.truststore.location: + kafka.ssl.truststore.password: + kafka.ssl.key.password: + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.buffer.memory: + kafka.group.id: etl_proxy_event_kafka_to_clickhouse-20231221 + kafka.auto.offset.reset: latest + kafka.max.request.size: + kafka.compression.type: none + format: json + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + properties: + key: value + functions: # [array of object] Function List + + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: "{{ clickhouse_servers }}" + table: tsg_galaxy_v3.proxy_event_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + + env: # [object] Environment Variables + name: etl_proxy_event_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [clickhouse_sink] + - name: clickhouse_sink + + diff --git a/groot-stream/single-cluster-examples/etl_session_record_kafka_to_clickhouse b/groot-stream/single-cluster-examples/etl_session_record_kafka_to_clickhouse new file mode 100644 index 0000000..960c10e --- /dev/null +++ b/groot-stream/single-cluster-examples/etl_session_record_kafka_to_clickhouse @@ -0,0 +1,141 @@ +sources: + kafka_source: + type: kafka + properties: + topic: SESSION-RECORD + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: SESSION-RECORD + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + + # kafka.security.protocol: SSL + # kafka.ssl.endpoint.identification.algorithm: "" + # kafka.ssl.keystore.location: $GROOT_HOME/config/dat/keystore.jks + # kafka.ssl.keystore.password: 86cf0e2ffba3f541a6c6761313e5cc7e + # kafka.ssl.truststore.location: $GROOT_HOME/config/dat/truststore.jks + # kafka.ssl.truststore.password: 86cf0e2ffba3f541a6c6761313e5cc7e + # kafka.ssl.key.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: etl_session_record_kafka_to_clickhouse-20230125 + kafka.auto.offset.reset: latest + format: json + +processing_pipelines: + etl_processor: + type: projection + properties: + key: value + functions: + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: "{{ clickhouse_servers }}" + table: tsg_galaxy_v3.session_record_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + env: + name: etl_session_record_kafka_to_clickhouse + shade.identifier: aes + pipeline: + object-reuse: true + properties: + hos.bucket.name.rtp_file: traffic_rtp_file_bucket + hos.bucket.name.http_file: traffic_http_file_bucket + hos.bucket.name.eml_file: traffic_eml_file_bucket + hos.bucket.name.policy_capture_file: traffic_policy_capture_file_bucket + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [clickhouse_sink] + - name: clickhouse_sink + + diff --git a/groot-stream/single-cluster-examples/etl_traffic_sketch_metric_kafka_to_clickhouse b/groot-stream/single-cluster-examples/etl_traffic_sketch_metric_kafka_to_clickhouse new file mode 100644 index 0000000..9eee8c4 --- /dev/null +++ b/groot-stream/single-cluster-examples/etl_traffic_sketch_metric_kafka_to_clickhouse @@ -0,0 +1,93 @@ +sources: + kafka_source: + type: kafka + properties: + topic: TRAFFIC-SKETCH-METRIC + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: TRAFFIC-SKETCH-METRIC + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: etl_traffic_sketch_metric + kafka.auto.offset.reset: latest + kafka.compression.type: none + format: json + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + functions: # [array of object] Function List + + - function: FLATTEN + lookup_fields: [ fields,tags ] + output_fields: [ ] + parameters: + #prefix: "" + depth: 3 + # delimiter: "." + + - function: RENAME + lookup_fields: [ '' ] + output_fields: [ '' ] + filter: + parameters: + # parent_fields: [tags] + #rename_fields: + # tags: tags + rename_expression: key =string.replace_all(key,'tags.','');key =string.replace_all(key,'fields.','');return key; + + - function: EVAL + output_fields: [ internal_ip ] + parameters: + value_expression: 'direction=Outbound? client_ip : server_ip' + - function: EVAL + output_fields: [ external_ip ] + parameters: + value_expression: 'direction=Outbound? server_ip : client_ip' + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [ timestamp_ms ] + output_fields: [ recv_time ] + parameters: + precision: seconds + + - function: SNOWFLAKE_ID + lookup_fields: [ '' ] + output_fields: [ log_id ] + filter: + parameters: + data_center_id_num: 1 + + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: "{{ clickhouse_servers }}" + table: tsg_galaxy_v3.traffic_sketch_metric_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + + env: # [object] Environment Variables + name: etl_traffic_sketch_metric # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [clickhouse_sink] + - name: clickhouse_sink + + diff --git a/groot-stream/single-cluster-examples/etl_transaction_record_kafka_to_clickhouse b/groot-stream/single-cluster-examples/etl_transaction_record_kafka_to_clickhouse new file mode 100644 index 0000000..03bd6d7 --- /dev/null +++ b/groot-stream/single-cluster-examples/etl_transaction_record_kafka_to_clickhouse @@ -0,0 +1,141 @@ +sources: + kafka_source: + type: kafka + # fields: # [array of object] Field List, if not set, all fields(Map) will be output. + # watermark_timestamp: common_recv_time # [string] Watermark Field Name + # watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms + # watermark_lag: 60 # [number] Watermark Lag, default is 60 + properties: + topic: TRANSACTION-RECORD + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: TRANSACTION-RECORD + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.ssl.keystore.location: + kafka.ssl.keystore.password: + kafka.ssl.truststore.location: + kafka.ssl.truststore.password: + kafka.ssl.key.password: + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.buffer.memory: + kafka.group.id: etl_transaction_record_kafka_to_clickhouse-20240308 + kafka.auto.offset.reset: latest + kafka.max.request.size: + kafka.compression.type: none + format: json + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + properties: + key: value + functions: # [array of object] Function List + + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: "{{ clickhouse_servers }}" + table: tsg_galaxy_v3.transaction_record_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + + env: # [object] Environment Variables + name: etl_transaction_record_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [clickhouse_sink] + - name: clickhouse_sink diff --git a/groot-stream/single-cluster-examples/etl_voip_record_kafka_to_clickhouse b/groot-stream/single-cluster-examples/etl_voip_record_kafka_to_clickhouse new file mode 100644 index 0000000..6fde822 --- /dev/null +++ b/groot-stream/single-cluster-examples/etl_voip_record_kafka_to_clickhouse @@ -0,0 +1,143 @@ +sources: + kafka_source: + type: kafka + # fields: # [array of object] Field List, if not set, all fields(Map) will be output. + # watermark_timestamp: common_recv_time # [string] Watermark Field Name + # watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms + # watermark_lag: 60 # [number] Watermark Lag, default is 60 + properties: + topic: VOIP-CONVERSATION-RECORD + kafka.bootstrap.servers: "{{ kafka_source_servers }}" + kafka.client.id: VOIP-CONVERSATION-RECORD + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.ssl.keystore.location: + kafka.ssl.keystore.password: + kafka.ssl.truststore.location: + kafka.ssl.truststore.password: + kafka.ssl.key.password: + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.buffer.memory: + kafka.group.id: etl_voip_record_kafka_to_clickhouse-20231221 + kafka.auto.offset.reset: latest + kafka.max.request.size: + kafka.compression.type: none + format: json + +processing_pipelines: + etl_processor: # [object] Processing Pipeline + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + remove_fields: + output_fields: + properties: + key: value + functions: # [array of object] Function List + + - function: SNOWFLAKE_ID + lookup_fields: [''] + output_fields: [log_id] + parameters: + data_center_id_num: 1 + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [data_center] + filter: + parameters: + value_expression: $.tags[?(@.tag=='data_center')][0].value + + - function: JSON_EXTRACT + lookup_fields: [device_tag] + output_fields: [device_group] + filter: + parameters: + value_expression: $.tags[?(@.tag=='device_group')][0].value + + - function: CURRENT_UNIX_TIMESTAMP + output_fields: [processing_time] + parameters: + precision: seconds + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [__timestamp] + output_fields: [recv_time] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ingestion_time] + parameters: + value_expression: recv_time + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_subject] + parameters: + value_field: mail_subject + charset_field: mail_subject_charset + + - function: BASE64_DECODE_TO_STRING + output_fields: [mail_attachment_name] + parameters: + value_field: mail_attachment_name + charset_field: mail_attachment_name_charset + + - function: PATH_COMBINE + lookup_fields: [rtp_pcap_path] + output_fields: [rtp_pcap_path] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path] + + - function: PATH_COMBINE + lookup_fields: [http_request_body] + output_fields: [http_request_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body] + + - function: PATH_COMBINE + lookup_fields: [http_response_body] + output_fields: [http_response_body] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body] + + - function: PATH_COMBINE + lookup_fields: [mail_eml_file] + output_fields: [mail_eml_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file] + + - function: PATH_COMBINE + lookup_fields: [packet_capture_file] + output_fields: [packet_capture_file] + parameters: + path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file] + +sinks: + clickhouse_sink: + type: clickhouse + properties: + host: "{{ clickhouse_servers }}" + table: tsg_galaxy_v3.voip_record_local + batch.size: 100000 + batch.interval: 30s + connection.user: e54c9568586180eede1506eecf3574e9 + connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e + + +application: + + env: # [object] Environment Variables + name: etl_voip_record_kafka_to_clickhouse # [string] Job Name + shade.identifier: aes + pipeline: + object-reuse: true # [boolean] Object Reuse, default is false + topology: + - name: kafka_source + downstream: [etl_processor] + - name: etl_processor + downstream: [clickhouse_sink] + - name: clickhouse_sink + + diff --git a/groot-stream/templates/realtime_log_streaming_cn_session_record.yaml.template b/groot-stream/templates/realtime_log_streaming_cn_session_record.yaml.template new file mode 100644 index 0000000..3e9db4e --- /dev/null +++ b/groot-stream/templates/realtime_log_streaming_cn_session_record.yaml.template @@ -0,0 +1,387 @@ +sources: + kafka_source: + type: kafka + properties: + topic: {{ kafka_source_topic }} + kafka.bootstrap.servers: {{ kafka_source_bootstrap_servers }} + kafka.client.id: {{ kafka_source_topic }} + kafka.session.timeout.ms: 60000 + kafka.max.poll.records: 3000 + kafka.max.partition.fetch.bytes: 31457280 + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + kafka.group.id: {{ kafka_source_group_id }} + kafka.auto.offset.reset: latest + format: json + json.ignore.parse.errors: false + + +processing_pipelines: + etl_processor: + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + functions: + - function: SNOWFLAKE_ID + lookup_fields: [ '' ] + output_fields: [ cn_log_id ] + parameters: + data_center_id_num: 1 + + - function: EVAL + output_fields: [ log_id ] + parameters: + value_expression: "log_id == null ? cn_log_id : log_id" + + - function: UNIX_TIMESTAMP_CONVERTER + lookup_fields: [ __timestamp ] + output_fields: [ kafka_recv_time ] + parameters: + precision: seconds + + - function: EVAL + output_fields: [ recv_time ] + parameters: + value_expression: "recv_time == null ? kafka_recv_time : recv_time" + + - function: EVAL + output_fields: [ domain ] + parameters: + value_expression: server_fqdn + + - function: EVAL + output_fields: [ domain_sld ] + parameters: + value_expression: server_domain + + - function: CN_L7_PROTOCOL_AND_APP_EXTRACT + parameters: + decoded_path_field_name: decoded_path + app_transition_field_name: app_transition + l7_protocol_field_name: l7_protocol + app_field_name: app + l7_protocol: DHCP,DNS,FTP,GRE,GTP,HTTP,HTTPS,ICMP,IMAP,IMAPS,IPSEC,ISAKMP,XMPP,L2TP,LDAP,MMS,NETBIOS,NETFLOW,NTP,POP3,POP3S,RDP,PPTP,RADIUS,RTCP,RTP,RTSP,SIP,SMB,SMTP,SMTPS,SNMP,SSDP,SSH,SSL,STUN,TELNET,TFTP,OPENVPN,RTMP,TEREDO,FTPS,DTLS,SPDY,BJNP,QUIC,MDNS,Unknown TCP,Unknown UDP,Unknown Other,IKE,MAIL,SOCKS,DoH,SLP,SSL with ESNI,ISATAP,Stratum,SSL with ECH + + - function: GEOIP_LOOKUP + lookup_fields: [ client_ip ] + output_fields: [ ] + parameters: + kb_name: cn_ip_location + option: IP_TO_OBJECT + geolocation_field_mapping: + COUNTRY: client_country_region + PROVINCE: client_super_admin_area + CITY: client_admin_area + LONGITUDE: client_longitude + LATITUDE: client_latitude + ISP: client_isp + + - function: GEOIP_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ ] + parameters: + kb_name: cn_ip_location + option: IP_TO_OBJECT + geolocation_field_mapping: + COUNTRY: server_country_region + PROVINCE: server_super_admin_area + CITY: server_admin_area + LONGITUDE: server_longitude + LATITUDE: server_latitude + ISP: server_isp + + - function: ASN_LOOKUP + lookup_fields: [ client_ip ] + output_fields: [ client_asn ] + parameters: + option: IP_TO_ASN + kb_name: cn_ip_asn + + - function: ASN_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ server_asn ] + parameters: + option: IP_TO_ASN + kb_name: cn_ip_asn + + - function: CN_IDC_RENTER_LOOKUP + lookup_fields: [ client_ip ] + output_fields: [ client_idc_renter ] + parameters: + kb_name: cn_idc_renter + + - function: CN_IDC_RENTER_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ server_idc_renter ] + parameters: + kb_name: cn_idc_renter + + - function: CN_LINK_DIRECTION_LOOKUP + lookup_fields: [ in_link_id ] + output_fields: [ in_link_direction ] + parameters: + kb_name: cn_link_direction + + - function: CN_LINK_DIRECTION_LOOKUP + lookup_fields: [ out_link_id ] + output_fields: [ out_link_direction ] + parameters: + kb_name: cn_link_direction + + - function: CN_FQDN_CATEGORY_LOOKUP + lookup_fields: [ domain ] + parameters: + kb_name: cn_fqdn_category + field_mapping: + NAME: domain_category_name + GROUP: domain_category_group + REPUTATION_LEVEL: domain_reputation_level + + - function: CN_ICP_LOOKUP + lookup_fields: [ domain ] + output_fields: [ domain_icp_company_name ] + parameters: + kb_name: cn_fqdn_icp + + - function: CN_FQDN_WHOIS_LOOKUP + lookup_fields: [ domain ] + output_fields: [ domain_whois_org ] + parameters: + kb_name: cn_fqdn_whois + + - function: CN_DNS_SERVER_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ server_dns_server ] + parameters: + kb_name: cn_dns_server + + - function: CN_APP_CATEGORY_LOOKUP + lookup_fields: [ app ] + parameters: + kb_name: cn_app_category + field_mapping: + CATEGORY: app_category + SUBCATEGORY: app_subcategory + COMPANY: app_company + COMPANY_CATEGORY: app_company_category + + - function: EVAL + output_fields: [ client_zone ] + parameters: + value_expression: "flags & 8 == 8 ? 'internal' : 'external'" + + - function: EVAL + output_fields: [ server_zone ] + parameters: + value_expression: "flags & 16 == 16 ? 'internal' : 'external'" + + - function: CN_IP_ZONE_LOOKUP + lookup_fields: [ client_ip ] + output_fields: [ client_zone ] + parameters: + kb_name: none + #kb_name: cn_internal_ip + + - function: CN_IP_ZONE_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ server_zone ] + parameters: + kb_name: none + #kb_name: cn_internal_ip + + - function: EVAL + output_fields: [ sent_bytes ] + parameters: + value_expression: "sent_bytes == null ? 0 : sent_bytes" + + - function: EVAL + output_fields: [ sent_pkts ] + parameters: + value_expression: "sent_pkts == null ? 0 : sent_pkts" + + - function: EVAL + output_fields: [ received_bytes ] + parameters: + value_expression: "received_bytes == null ? 0 : received_bytes" + + - function: EVAL + output_fields: [ received_pkts ] + parameters: + value_expression: "received_pkts == null ? 0 : received_pkts" + + - function: EVAL + output_fields: [ traffic_inbound_byte ] + parameters: + value_expression: "client_zone == 'internal' && server_zone == 'external' ? received_bytes : traffic_inbound_byte" + + - function: EVAL + output_fields: [ traffic_outbound_byte ] + parameters: + value_expression: "client_zone == 'external' && server_zone == 'internal' ? received_bytes : traffic_outbound_byte" + + - function: EVAL + output_fields: [ traffic_inbound_pkt ] + parameters: + value_expression: "client_zone == 'internal' && server_zone == 'external' ? received_pkts : traffic_inbound_pkt" + + - function: EVAL + output_fields: [ traffic_outbound_pkt ] + parameters: + value_expression: "client_zone == 'external' && server_zone == 'internal' ? received_pkts : traffic_outbound_pkt" + + - function: EVAL + output_fields: [ traffic_outbound_byte ] + parameters: + value_expression: "client_zone == 'internal' && server_zone == 'external' ? sent_bytes : traffic_outbound_byte" + + - function: EVAL + output_fields: [ traffic_inbound_byte ] + parameters: + value_expression: "client_zone == 'external' && server_zone == 'internal' ? sent_bytes : traffic_inbound_byte" + + - function: EVAL + output_fields: [ traffic_outbound_pkt ] + parameters: + value_expression: "client_zone == 'internal' && server_zone == 'external' ? sent_pkts : traffic_outbound_pkt" + + - function: EVAL + output_fields: [ traffic_inbound_pkt ] + parameters: + value_expression: "client_zone == 'external' && server_zone == 'internal' ? sent_pkts : traffic_inbound_pkt" + + - function: EVAL + output_fields: [ traffic_internal_byte ] + parameters: + value_expression: "client_zone == 'internal' && server_zone == 'internal' ? sent_bytes + received_bytes : traffic_internal_byte" + + - function: EVAL + output_fields: [ traffic_internal_pkt ] + parameters: + value_expression: "client_zone == 'internal' && server_zone == 'internal' ? sent_pkts + received_pkts : traffic_internal_pkt" + + - function: EVAL + output_fields: [ traffic_through_byte ] + parameters: + value_expression: "client_zone == 'external' && server_zone == 'external' ? sent_bytes + received_bytes : traffic_through_byte" + + - function: EVAL + output_fields: [ traffic_through_pkt ] + parameters: + value_expression: "client_zone == 'external' && server_zone == 'external' ? sent_pkts + received_pkts : traffic_through_pkt" + + - function: EVAL + output_fields: [ sessions ] + parameters: + value_expression: "1" + + - function: EVAL + output_fields: [ internal_query_num ] + parameters: + value_expression: "client_zone == 'internal' ? sessions : internal_query_num" + + - function: EVAL + output_fields: [ external_query_num ] + parameters: + value_expression: "client_zone == 'external' ? sessions : external_query_num" + + - function: CN_ANONYMITY_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ server_node_type ] + parameters: + kb_name: cn_ioc_darkweb + option: IP_TO_NODE_TYPE + + - function: CN_ANONYMITY_LOOKUP + lookup_fields: [ domain ] + output_fields: [ domain_node_type ] + parameters: + kb_name: cn_ioc_darkweb + option: DOMAIN_TO_NODE_TYPE + + - function: CN_IOC_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ server_malware ] + parameters: + kb_name: cn_ioc_malware + option: IP_TO_MALWARE + + - function: CN_IOC_LOOKUP + lookup_fields: [ domain ] + output_fields: [ domain_malware ] + parameters: + kb_name: cn_ioc_malware + option: DOMAIN_TO_MALWARE + + - function: CN_INTELLIGENCE_INDICATOR_LOOKUP + lookup_fields: [ client_ip ] + output_fields: [ client_ip_tags ] + parameters: + kb_name: cn_intelligence_indicator + option: IP_TO_TAG + + - function: CN_INTELLIGENCE_INDICATOR_LOOKUP + lookup_fields: [ server_ip ] + output_fields: [ server_ip_tags ] + parameters: + kb_name: cn_intelligence_indicator + option: IP_TO_TAG + + - function: CN_INTELLIGENCE_INDICATOR_LOOKUP + lookup_fields: [ domain ] + output_fields: [ domain_tags ] + parameters: + kb_name: cn_intelligence_indicator + option: DOMAIN_TO_TAG + + - function: GENERATE_STRING_ARRAY + lookup_fields: [ client_idc_renter,client_ip_tags ] + output_fields: [ client_ip_tags ] + + - function: GENERATE_STRING_ARRAY + lookup_fields: [ server_idc_renter,server_dns_server,server_node_type,server_malware,server_ip_tags ] + output_fields: [ server_ip_tags ] + + - function: GENERATE_STRING_ARRAY + lookup_fields: [ domain_node_type,domain_malware,domain_tags ] + output_fields: [ domain_tags ] + +postprocessing_pipelines: + post_output_field_processor: + type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl + output_fields: [ recv_time,log_id,flags,start_timestamp_ms,end_timestamp_ms,duration_ms,decoded_as,client_ip,server_ip,client_port,server_port,app,app_transition,decoded_path,ip_protocol,l7_protocol,out_link_id,in_link_id,subscriber_id,imei,imsi,phone_number,apn,http_url,dns_rcode,dns_qname,dns_qtype,dns_rr,out_link_direction,in_link_direction,server_fqdn,server_domain,domain,domain_sld,domain_category_name,domain_category_group,domain_reputation_level,domain_icp_company_name,domain_whois_org,domain_tags,client_zone,client_country_region,client_super_admin_area,client_admin_area,client_longitude,client_latitude,client_isp,client_asn,client_ip_tags,server_zone,server_country_region,server_super_admin_area,server_admin_area,server_longitude,server_latitude,server_isp,server_asn,server_ip_tags,app_category,app_subcategory,app_company,app_company_category,app_tags,sent_pkts,sent_bytes,received_pkts,received_bytes,sessions,tcp_c2s_lost_bytes,tcp_s2c_lost_bytes,tcp_c2s_o3_pkts,tcp_s2c_o3_pkts,tcp_c2s_rtx_bytes,tcp_s2c_rtx_bytes,tcp_c2s_rtx_pkts,tcp_s2c_rtx_pkts,tcp_rtt_ms,http_response_latency_ms,ssl_handshake_latency_ms,dns_response_latency_ms,cn_internal_rule_id_list,cn_internal_ioc_type_list,traffic_inbound_byte,traffic_inbound_pkt,traffic_outbound_byte,traffic_outbound_pkt,traffic_internal_byte,traffic_internal_pkt,traffic_through_byte,traffic_through_pkt,internal_query_num,external_query_num ] + +sinks: + kafka_sink: + type: kafka + properties: + topic: {{ kafka_sink_topic }} + kafka.bootstrap.servers: {{ kafka_sink_bootstrap_servers }} + kafka.client.id: {{ kafka_sink_topic }} + kafka.retries: 0 + kafka.linger.ms: 10 + kafka.request.timeout.ms: 30000 + kafka.batch.size: 262144 + kafka.buffer.memory: 134217728 + kafka.max.request.size: 10485760 + kafka.compression.type: snappy + kafka.security.protocol: SASL_PLAINTEXT + kafka.sasl.mechanism: PLAIN + kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 + format: json + json.ignore.parse.errors: false + log.failures.only: true + +application: + env: + name: {{ job_name }} + shade.identifier: aes + pipeline: + object-reuse: true + topology: + - name: kafka_source + downstream: [ etl_processor ] + - name: etl_processor + downstream: [ post_output_field_processor ] + - name: post_output_field_processor + downstream: [ kafka_sink ] + - name: kafka_sink + downstream: [ ] diff --git a/hbase/tsg_olap_hbase_ddl.sql b/hbase/001_create_tsg_olap_hbase_table.sql similarity index 100% rename from hbase/tsg_olap_hbase_ddl.sql rename to hbase/001_create_tsg_olap_hbase_table.sql diff --git a/hbase/tsg_olap_phoenix_ddl.sql b/hbase/002_create_tsg_olap_hbase_phoenix_table.sql similarity index 100% rename from hbase/tsg_olap_phoenix_ddl.sql rename to hbase/002_create_tsg_olap_hbase_phoenix_table.sql diff --git a/hbase/update_hbase.sh b/hbase/101_upgrade_v2408_to_v2409_tsg_olap_hbase_table.sh similarity index 100% rename from hbase/update_hbase.sh rename to hbase/101_upgrade_v2408_to_v2409_tsg_olap_hbase_table.sh diff --git a/hos/create_bucket.sh b/hos/001_create_tsg_olap_hos_bucket.sh similarity index 100% rename from hos/create_bucket.sh rename to hos/001_create_tsg_olap_hos_bucket.sh diff --git a/hos/bucket_upgrade.sh b/hos/002_upgrade_tsg_olap_hos_bucket.sh similarity index 100% rename from hos/bucket_upgrade.sh rename to hos/002_upgrade_tsg_olap_hos_bucket.sh diff --git a/hos/galaxy-hos-service-24.09.yml b/hos/galaxy-hos-service-24.09.yml deleted file mode 100644 index b4ce78f..0000000 --- a/hos/galaxy-hos-service-24.09.yml +++ /dev/null @@ -1,97 +0,0 @@ -#服务端口 -server: - port: 8186 - max-http-header-size: 20MB - tomcat: - max-threads: 400 -#tomcat缓存大小,单位KB系统默认10M,配置10g -tomcat: - cacheMaxSize: 1000000 -#hbase参数 -hbase: - zookeeperQuorum: 192.168.44.11:2181,192.168.44.14:2181,192.168.44.15:2181 - zookeeperPort: 2181 - zookeeperNodeParent: /hbase - clientRetriesNumber: 9 - rpcTimeout: 100000 - connectPool: 10 - clientWriteBuffer: 10485760 - clientKeyValueMaxsize: 1073741824 - mobThreshold: 10485760 - #part的最大数量 - maxParts: 100000 - #每次获取的part数 - getPartBatch: 10 - #hbase索引表前缀,前缀为以下的都为索引表 - timeIndexTablePrefix: index_time_ - filenameIndexTablePrefix: index_filename_ - partFileIndexTablePrefix: index_partfile_ - systemBucketMeta: system:bucket_meta - #创建表的分区数 - regionCount: 16 - filenameHead: 0,1,2,3,4,5,6,7,8,9,a,b,c,d,e,f - partHead: 0,1,2,3,4,5,6,7,8,9,a,b,c,d,e,f - #获取文件大小的目录 - dataPath: /hbase - #hadoop集群namenode节点,单机为单个ip,集群为ip1,ip2 - hadoopNameNodes: 192.168.44.10,192.168.44.11 - #副本数,单机为1,集群为2 - hadoopReplication: 2 - #hadoop端口 - hadoopPort: 9000 - hadoopUser: root - hadoopNameServices: ns1 - hadoopNameNodesNs1: nn1,nn2 - asyncPut: 0 -#是否打开验证,0打开,打开需要使用S3身份验证或者token访问服务 -auth: - open: 0 - #http访问使用的token - token: ENC(vknRT6U4I739rLIha9CvojM+4uFyXZLEYpO2HZayLnRak1HPW0K2yZ3vnQBA2foo) - #s3验证 - s3: - accesskey: ENC(FUQDvVP+zqCiwHQhXcRvbw==) - secretkey: ENC(FUQDvVP+zqCiwHQhXcRvbw==) -hos: - #文件大小阈值 - maxFileSize: 5073741800 - #大文件阈值 - uploadThreshold: 104857600 - #长连接超时时间 - keepAliveTimeout: 60000 - #批量删除对象的最大数量 - deleteMultipleNumber: 1000 - #获取对象列表等操作的最大值 - maxResultLimit: 100000 - #分块上传的最大分块数 - maxPartNumber: 10000 - #追加上传的最大次数 - maxAppendNumber: 100000 - #是否快速上传 - isQuickUpload: 0 - #是否快速下载文件,1打开,hbase内存小于20G的集群设为0 - isQuickDownloadFile: 0 - #用户白名单(hbase的namespace),获取存储配额 - users: default - #是否打开限流,0:关闭,1:打开 - openRateLimiter: 0 - #限流每秒请求数 - rateLimiterQps: 20000 -#设置上传文件大小的最大值 -spring: - servlet: - multipart: - max-file-size: 5GB - max-request-size: 5GB -#Prometheus参数 - application: - name: HosServiceApplication -#Prometheus参数 -management: - endpoints: - web: - exposure: - include: '*' - metrics: - tags: - application: ${spring.application.name} \ No newline at end of file diff --git a/hos/hosutil/config.properties b/hos/hosutil/config.properties deleted file mode 100644 index fc486bf..0000000 --- a/hos/hosutil/config.properties +++ /dev/null @@ -1,21 +0,0 @@ -qgw.serverAddr=http://{{ vrrp_instance.default.virtual_ipaddress }}:9999 -hos.serverAddr=http://{{ vrrp_instance.oss.virtual_ipaddress }}:9098 -hos.token={{ hos_token }} -kafka.server={{ groups.kafka[0] }}:9092 -#延迟时间,校验多少秒之前的文件,单位秒 -check.time.delay=180 -hos.traffic.buckets=traffic_policy_capture_file_bucket,traffic_rtp_file_bucket,traffic_http_file_bucket,traffic_eml_file_bucket -kafka.traffic.topics=TRAFFIC-POLICY-CAPTURE-FILE-STREAM-RECORD,TRAFFIC-RTP-FILE-STREAM-RECORD,TRAFFIC-HTTP-FILE-STREAM-RECORD,TRAFFIC-EML-FILE-STREAM-RECORD -kafka.troubleshooting.topic=TROUBLESHOOTING-FILE-STREAM-RECORD -file.chunk.combiner.window.time=15000 -traffic.file.count=10 -threads=1 -max.threads=10 -print.out.interval=1000 -http.max.total=100 -http.default.max.per.route=100 -http.connect.timeout=5000 -http.connection.request.timeout=10000 -http.socket.timeout=-1 -hos.log.types=security_event,monitor_event,proxy_event,session_record,voip_record,assessment_event,transaction_record,troubleshooting -hos.log.types.file.types.url.fields=security_event:http-http_response_body&http_request_body,pcap-packet_capture_file&rtp_pcap_path,eml-mail_eml_file;proxy_event:http-http_response_body&http_request_body;session_record:http-http_response_body&http_request_body,pcap-packet_capture_file&rtp_pcap_path,eml-mail_eml_file;voip_record:pcap-rtp_pcap_path;assessment_event:other-assessment_file;transaction_record:http-http_response_body&http_request_body,eml-mail_eml_file;monitor_event:http-http_response_body&http_request_body,pcap-packet_capture_file&rtp_pcap_path,eml-mail_eml_file \ No newline at end of file diff --git a/hos/hosutil/galaxy-hos-util-1.4.jar b/hos/hosutil/galaxy-hos-util-1.4.jar deleted file mode 100644 index 9b05a71..0000000 Binary files a/hos/hosutil/galaxy-hos-util-1.4.jar and /dev/null differ diff --git a/hos/hosutil/hosutil.sh b/hos/hosutil/hosutil.sh deleted file mode 100644 index e74c7ff..0000000 --- a/hos/hosutil/hosutil.sh +++ /dev/null @@ -1,138 +0,0 @@ -#!/bin/bash - -version="1.4" -jar="galaxy-hos-util-$version.jar" - -usage() { - cat <&2; usage; exit 1 ;; - :) echo "Option -$OPTARG requires an argument" >&2; usage; exit 1 ;; - esac -done - -case "$operation" in - download) download ;; - upload) upload ;; - check) check ;; - combiner) combiner ;; - version) echo $version ;; - *) usage; exit 1 ;; -esac - diff --git a/mariadb/README.md b/mariadb/README.md index e69de29..4dfe1c6 100644 --- a/mariadb/README.md +++ b/mariadb/README.md @@ -0,0 +1 @@ +MariaDB 数据初始化脚本 \ No newline at end of file diff --git a/mariadb/galaxy-qgw-service/init_saved_query_job.sql b/mariadb/galaxy-qgw-service/V23.12__init_saved_query_job.sql similarity index 100% rename from mariadb/galaxy-qgw-service/init_saved_query_job.sql rename to mariadb/galaxy-qgw-service/V23.12__init_saved_query_job.sql diff --git a/mariadb/galaxy-qgw-service/init_sys_storage_event.sql b/mariadb/galaxy-qgw-service/V24.07__init_sys_storage_event.sql similarity index 100% rename from mariadb/galaxy-qgw-service/init_sys_storage_event.sql rename to mariadb/galaxy-qgw-service/V24.07__init_sys_storage_event.sql diff --git a/shell-scripts/README.md b/shell-scripts/README.md index e69de29..15f8338 100644 --- a/shell-scripts/README.md +++ b/shell-scripts/README.md @@ -0,0 +1 @@ +全局安装脚本管理 \ No newline at end of file