增加24.09相关初始化sql及模版配置文件
This commit is contained in:
@@ -6,3 +6,9 @@
|
||||
|:-----------------|:--------------|
|
||||
| shell-scripts | 存储安装和初始化脚本。 |
|
||||
| config-templates | 存储配置文件模板。 |
|
||||
|
||||
## 文件命名
|
||||
|
||||
- `[索引号]_[create]_[项目名]_[功能名称]`:初始化数据库或脚本。例如:`001_create_tsg_olap_clickhouse_table.sql`。
|
||||
- `[索引号]_[upgrade]_[from version]_to_[current version]_[项目名]_[功能名称]`:升级文件,多个版本升级记录可以放在一个文件中。例如发布一个LTS版本:`101_upgrade_v2402_v2409_tsg_olap_clickhouse_table.sql`。
|
||||
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
全局安装配置文件
|
||||
@@ -0,0 +1 @@
|
||||
Druid 摄入任务
|
||||
31
groot-stream/README.md
Normal file
31
groot-stream/README.md
Normal file
@@ -0,0 +1,31 @@
|
||||
# 配置模版举例
|
||||
|
||||
## session_record.yaml.j2 (会话日志ETL场景)
|
||||
|
||||
- 多数中心部署场景: 分中心Data Transporter预处理后,集中汇聚至国家中心(NDC)
|
||||
- etl_session_record_kafka_to_ndc_kafka (A-DT)
|
||||
- Topology: kafka_source -> etl_processor -> kafka_sink
|
||||
- Data Flow: SESSION-RECORD -> SESSION-RECORD-PROCESSED
|
||||
- 多数中心部署场景:国家中心侧加载会话日志写入ClickHouse
|
||||
- session_record_processed_kafka_to_clickhouse(A-NDC)
|
||||
- Topology: kafka_source -> clickhouse_sink
|
||||
- Data Flow: SESSION-RECORD-PROCESSED -> session_record_local
|
||||
- 集中部署场景:摄入会话日志,预处理后写入ClickHouse
|
||||
- etl_session_record_kafka_to_clickhouse (B)
|
||||
- Topology: kafka_source -> etl_processor -> clickhouse_sink
|
||||
- Data Flow: SESSION-RECORD -> session_record_local
|
||||
|
||||
## data_transporter.yaml.j2 (数据回传场景)
|
||||
|
||||
- troubleshooting_file_stream_kafka_to_ndc_kafka
|
||||
- Topology: kafka_source -> kafka_sink (format:raw)
|
||||
- Data Flow: TROUBLESHOOTING-FILE-STREAM-RECORD -> TROUBLESHOOTING-FILE-STREAM-RECORD
|
||||
|
||||
## realtime_log_streaming_cn_session_record.yaml.template (向其它厂商/第三方推送场景)
|
||||
|
||||
`install_cn_udf.sh安装CN UDFs;grootstream.yaml定义CN知识库`
|
||||
|
||||
- etl_session_record_kafka_to_cn_kafka
|
||||
- Topology: kafka_source -> etl_processor -> post_output_field_processor -> kafka_sink
|
||||
- Data Flow: SESSION-RECORD(SESSION-RECORD-PROCESSED) -> SESSION-RECORD-CN
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: DOS-SKETCH-RECORD
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: DOS-SKETCH-RECORD
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: dos_sketch_record_kafka_to_kafka
|
||||
kafka.auto.offset.reset: latest
|
||||
format: raw
|
||||
|
||||
|
||||
sinks:
|
||||
kafka_sink:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: DOS-SKETCH-RECORD
|
||||
kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
|
||||
kafka.client.id: DOS-SKETCH-RECORD
|
||||
kafka.retries: 0
|
||||
kafka.linger.ms: 10
|
||||
kafka.request.timeout.ms: 30000
|
||||
kafka.batch.size: 262144
|
||||
kafka.buffer.memory: 134217728
|
||||
kafka.max.request.size: 10485760
|
||||
kafka.compression.type: snappy
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
format: raw
|
||||
|
||||
|
||||
application:
|
||||
env:
|
||||
name: dos_sketch_record_kafka_to_kafka
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [kafka_sink]
|
||||
- name: kafka_sink
|
||||
|
||||
@@ -0,0 +1,154 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: PROXY-EVENT
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: PROXY-EVENT
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: etl_proxy_event_kafka_to_ndc_kafka
|
||||
kafka.auto.offset.reset: latest
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
remove_fields:
|
||||
output_fields:
|
||||
properties:
|
||||
key: value
|
||||
functions: # [array of object] Function List
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [server_ip]
|
||||
output_fields: [server_asn]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: tsg_ip_asn
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [client_ip]
|
||||
output_fields: [client_asn]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: tsg_ip_asn
|
||||
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: ['']
|
||||
output_fields: [log_id]
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [data_center]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='data_center')][0].value
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [device_group]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='device_group')][0].value
|
||||
|
||||
- function: CURRENT_UNIX_TIMESTAMP
|
||||
output_fields: [processing_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [__timestamp]
|
||||
output_fields: [recv_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ingestion_time]
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
value_field: mail_subject
|
||||
charset_field: mail_subject_charset
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_attachment_name]
|
||||
parameters:
|
||||
value_field: mail_attachment_name
|
||||
charset_field: mail_attachment_name_charset
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [rtp_pcap_path]
|
||||
output_fields: [rtp_pcap_path]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_request_body]
|
||||
output_fields: [http_request_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_response_body]
|
||||
output_fields: [http_response_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [mail_eml_file]
|
||||
output_fields: [mail_eml_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [packet_capture_file]
|
||||
output_fields: [packet_capture_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
|
||||
|
||||
sinks:
|
||||
kafka_sink:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: PROXY-EVENT-PROCESSED
|
||||
kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
|
||||
kafka.client.id: PROXY-EVENT-PROCESSED
|
||||
kafka.retries: 0
|
||||
kafka.linger.ms: 10
|
||||
kafka.request.timeout.ms: 30000
|
||||
kafka.batch.size: 262144
|
||||
kafka.buffer.memory: 134217728
|
||||
kafka.max.request.size: 10485760
|
||||
kafka.compression.type: snappy
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
format: json
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: etl_proxy_event_kafka_to_ndc_kafka # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [kafka_sink]
|
||||
- name: kafka_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,154 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: SESSION-RECORD
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: SESSION-RECORD
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: etl_session_record_kafka_to_ndc_kafka
|
||||
kafka.auto.offset.reset: latest
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor:
|
||||
type: projection
|
||||
functions:
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [server_ip]
|
||||
output_fields: [server_asn]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: tsg_ip_asn
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [client_ip]
|
||||
output_fields: [client_asn]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: tsg_ip_asn
|
||||
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: ['']
|
||||
output_fields: [log_id]
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [data_center]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='data_center')][0].value
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [device_group]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='device_group')][0].value
|
||||
|
||||
- function: CURRENT_UNIX_TIMESTAMP
|
||||
output_fields: [processing_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [__timestamp]
|
||||
output_fields: [recv_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ingestion_time]
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
value_field: mail_subject
|
||||
charset_field: mail_subject_charset
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_attachment_name]
|
||||
parameters:
|
||||
value_field: mail_attachment_name
|
||||
charset_field: mail_attachment_name_charset
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [rtp_pcap_path]
|
||||
output_fields: [rtp_pcap_path]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_request_body]
|
||||
output_fields: [http_request_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_response_body]
|
||||
output_fields: [http_response_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [mail_eml_file]
|
||||
output_fields: [mail_eml_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [packet_capture_file]
|
||||
output_fields: [packet_capture_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
|
||||
|
||||
sinks:
|
||||
kafka_sink:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: SESSION-RECORD-PROCESSED
|
||||
kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
|
||||
kafka.client.id: SESSION-RECORD-PROCESSED
|
||||
kafka.retries: 0
|
||||
kafka.linger.ms: 10
|
||||
kafka.request.timeout.ms: 30000
|
||||
kafka.batch.size: 262144
|
||||
kafka.buffer.memory: 134217728
|
||||
kafka.max.request.size: 10485760
|
||||
kafka.compression.type: snappy
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
format: json
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env:
|
||||
name: etl_session_record_kafka_to_ndc_kafka
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true
|
||||
properties:
|
||||
hos.bucket.name.rtp_file: traffic_rtp_file_bucket
|
||||
hos.bucket.name.http_file: traffic_http_file_bucket
|
||||
hos.bucket.name.eml_file: traffic_eml_file_bucket
|
||||
hos.bucket.name.policy_capture_file: traffic_policy_capture_file_bucket
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [kafka_sink]
|
||||
- name: kafka_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,157 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
# fields: # [array of object] Field List, if not set, all fields(Map<String, Object>) will be output.
|
||||
# watermark_timestamp: common_recv_time # [string] Watermark Field Name
|
||||
# watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms
|
||||
# watermark_lag: 60 # [number] Watermark Lag, default is 60
|
||||
properties:
|
||||
topic: TRANSACTION-RECORD
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: TRANSACTION-RECORD
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: etl_transaction_record_kafka_to_ndc_kafka
|
||||
kafka.auto.offset.reset: latest
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
remove_fields:
|
||||
output_fields:
|
||||
properties:
|
||||
key: value
|
||||
functions: # [array of object] Function List
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [server_ip]
|
||||
output_fields: [server_asn]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: tsg_ip_asn
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [client_ip]
|
||||
output_fields: [client_asn]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: tsg_ip_asn
|
||||
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: ['']
|
||||
output_fields: [log_id]
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [data_center]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='data_center')][0].value
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [device_group]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='device_group')][0].value
|
||||
|
||||
- function: CURRENT_UNIX_TIMESTAMP
|
||||
output_fields: [processing_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [__timestamp]
|
||||
output_fields: [recv_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ingestion_time]
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
value_field: mail_subject
|
||||
charset_field: mail_subject_charset
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_attachment_name]
|
||||
parameters:
|
||||
value_field: mail_attachment_name
|
||||
charset_field: mail_attachment_name_charset
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [rtp_pcap_path]
|
||||
output_fields: [rtp_pcap_path]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_request_body]
|
||||
output_fields: [http_request_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_response_body]
|
||||
output_fields: [http_response_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [mail_eml_file]
|
||||
output_fields: [mail_eml_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [packet_capture_file]
|
||||
output_fields: [packet_capture_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
|
||||
|
||||
sinks:
|
||||
kafka_sink:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: TRANSACTION-RECORD-PROCESSED
|
||||
kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
|
||||
kafka.client.id: TRANSACTION-RECORD-PROCESSED
|
||||
kafka.retries: 0
|
||||
kafka.linger.ms: 10
|
||||
kafka.request.timeout.ms: 30000
|
||||
kafka.batch.size: 262144
|
||||
kafka.buffer.memory: 134217728
|
||||
kafka.max.request.size: 10485760
|
||||
kafka.compression.type: snappy
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
format: json
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: etl_transaction_record_kafka_to_ndc_kafka # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [kafka_sink]
|
||||
- name: kafka_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: NETWORK-TRAFFIC-METRIC
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: NETWORK-TRAFFIC-METRIC
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: network_traffic_metrics_kafka_to_ndc_kafka
|
||||
kafka.auto.offset.reset: latest
|
||||
format: raw
|
||||
|
||||
|
||||
sinks:
|
||||
kafka_sink:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: NETWORK-TRAFFIC-METRIC
|
||||
kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
|
||||
kafka.client.id: NETWORK-TRAFFIC-METRIC
|
||||
kafka.retries: 0
|
||||
kafka.linger.ms: 10
|
||||
kafka.request.timeout.ms: 30000
|
||||
kafka.batch.size: 262144
|
||||
kafka.buffer.memory: 134217728
|
||||
kafka.max.request.size: 10485760
|
||||
kafka.compression.type: snappy
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
format: raw
|
||||
|
||||
|
||||
application:
|
||||
env:
|
||||
name: network_traffic_metrics_kafka_to_ndc_kafka
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [kafka_sink]
|
||||
- name: kafka_sink
|
||||
@@ -0,0 +1,50 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: OBJECT-STATISTICS-METRIC
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: OBJECT-STATISTICS-METRIC
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: object_statistics_metric_kafka_to_ndc_kafka
|
||||
kafka.auto.offset.reset: latest
|
||||
format: raw
|
||||
|
||||
|
||||
sinks:
|
||||
kafka_sink:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: OBJECT-STATISTICS-METRIC
|
||||
kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
|
||||
kafka.client.id: OBJECT-STATISTICS-METRIC
|
||||
kafka.retries: 0
|
||||
kafka.linger.ms: 10
|
||||
kafka.request.timeout.ms: 30000
|
||||
kafka.batch.size: 262144
|
||||
kafka.buffer.memory: 134217728
|
||||
kafka.max.request.size: 10485760
|
||||
kafka.compression.type: snappy
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
format: raw
|
||||
|
||||
|
||||
application:
|
||||
env:
|
||||
name: object_statistics_metric_kafka_to_ndc_kafka
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [kafka_sink]
|
||||
- name: kafka_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: POLICY-RULE-METRIC
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: POLICY-RULE-METRIC
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: policy_rule_metrics_kafka_to_ndc_kafka
|
||||
kafka.auto.offset.reset: latest
|
||||
format: raw
|
||||
|
||||
|
||||
sinks:
|
||||
kafka_sink:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: POLICY-RULE-METRIC
|
||||
kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
|
||||
kafka.client.id: POLICY-RULE-METRIC
|
||||
kafka.retries: 0
|
||||
kafka.linger.ms: 10
|
||||
kafka.request.timeout.ms: 30000
|
||||
kafka.batch.size: 262144
|
||||
kafka.buffer.memory: 134217728
|
||||
kafka.max.request.size: 10485760
|
||||
kafka.compression.type: snappy
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
format: raw
|
||||
|
||||
|
||||
application:
|
||||
env:
|
||||
name: policy_rule_metrics_kafka_to_ndc_kafka
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [kafka_sink]
|
||||
- name: kafka_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: PXY-EXCH-INTERMEDIA-CERT
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: PXY-EXCH-INTERMEDIA-CERT
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.ssl.keystore.location:
|
||||
kafka.ssl.keystore.password:
|
||||
kafka.ssl.truststore.location:
|
||||
kafka.ssl.truststore.password:
|
||||
kafka.ssl.key.password:
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.buffer.memory:
|
||||
kafka.group.id: pxy_exch_intermedia_cert_kafka_to_ndc_kafka
|
||||
kafka.auto.offset.reset: latest
|
||||
kafka.max.request.size:
|
||||
kafka.compression.type: none
|
||||
format: raw
|
||||
|
||||
|
||||
sinks:
|
||||
kafka_sink:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: PXY-EXCH-INTERMEDIA-CERT
|
||||
kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
|
||||
kafka.client.id: PXY-EXCH-INTERMEDIA-CERT
|
||||
kafka.retries: 0
|
||||
kafka.linger.ms: 10
|
||||
kafka.request.timeout.ms: 30000
|
||||
kafka.batch.size: 262144
|
||||
kafka.buffer.memory: 134217728
|
||||
kafka.max.request.size: 10485760
|
||||
kafka.compression.type: snappy
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
format: raw
|
||||
|
||||
|
||||
application:
|
||||
env:
|
||||
name: pxy_exch_intermedia_cert_kafka_to_ndc_kafka
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [kafka_sink]
|
||||
- name: kafka_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: STATISTICS-RULE-METRIC
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: STATISTICS-RULE-METRIC
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: statistics_rule_metric_kafka_to_ndc_kafka
|
||||
kafka.auto.offset.reset: latest
|
||||
format: raw
|
||||
|
||||
|
||||
sinks:
|
||||
kafka_sink:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: STATISTICS-RULE-METRIC
|
||||
kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
|
||||
kafka.client.id: STATISTICS-RULE-METRIC
|
||||
kafka.retries: 0
|
||||
kafka.linger.ms: 10
|
||||
kafka.request.timeout.ms: 30000
|
||||
kafka.batch.size: 262144
|
||||
kafka.buffer.memory: 134217728
|
||||
kafka.max.request.size: 10485760
|
||||
kafka.compression.type: snappy
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
format: raw
|
||||
|
||||
|
||||
application:
|
||||
env:
|
||||
name: statistics_rule_metric_kafka_to_ndc_kafka
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [kafka_sink]
|
||||
- name: kafka_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: TROUBLESHOOTING-FILE-STREAM-RECORD
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: TROUBLESHOOTING-FILE-STREAM-RECORD
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: troubleshooting_file_stream_record_kafka_to_ndc_kafka
|
||||
kafka.auto.offset.reset: latest
|
||||
format: raw
|
||||
|
||||
|
||||
sinks:
|
||||
kafka_sink:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: TROUBLESHOOTING-FILE-STREAM-RECORD
|
||||
kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
|
||||
kafka.client.id: TROUBLESHOOTING-FILE-STREAM-RECORD
|
||||
kafka.retries: 0
|
||||
kafka.linger.ms: 10
|
||||
kafka.request.timeout.ms: 30000
|
||||
kafka.batch.size: 262144
|
||||
kafka.buffer.memory: 134217728
|
||||
kafka.max.request.size: 10485760
|
||||
kafka.compression.type: snappy
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
format: raw
|
||||
|
||||
|
||||
application:
|
||||
env:
|
||||
name: troubleshooting_file_stream_record_kafka_to_ndc_kafka
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [kafka_sink]
|
||||
- name: kafka_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,89 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: VOIP-RECORD
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: VOIP-RECORD
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: statistics_rule_metric_kafka_to_ndc_kafka
|
||||
kafka.auto.offset.reset: latest
|
||||
format: json
|
||||
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
remove_fields:
|
||||
output_fields:
|
||||
functions: # [array of object] Function List
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [rtp_pcap_path]
|
||||
output_fields: [rtp_pcap_path]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_request_body]
|
||||
output_fields: [http_request_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_response_body]
|
||||
output_fields: [http_response_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [mail_eml_file]
|
||||
output_fields: [mail_eml_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [packet_capture_file]
|
||||
output_fields: [packet_capture_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
|
||||
|
||||
|
||||
sinks:
|
||||
kafka_sink:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: VOIP-RECORD
|
||||
kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
|
||||
kafka.client.id: VOIP-RECORD
|
||||
kafka.retries: 0
|
||||
kafka.linger.ms: 10
|
||||
kafka.request.timeout.ms: 30000
|
||||
kafka.batch.size: 262144
|
||||
kafka.buffer.memory: 134217728
|
||||
kafka.max.request.size: 10485760
|
||||
kafka.compression.type: snappy
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
format: json
|
||||
|
||||
|
||||
|
||||
application:
|
||||
env:
|
||||
name: voip_record_kafka_to_ndc_kafka
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [kafka_sink]
|
||||
- name: kafka_sink
|
||||
@@ -0,0 +1,43 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: DOS-EVENT
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: DOS-EVENT
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: dos_event_kafka_to_clickhouse
|
||||
kafka.auto.offset.reset: latest
|
||||
format: json
|
||||
|
||||
|
||||
sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.dos_event_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
connection.user: e54c9568586180eede1506eecf3574e9
|
||||
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
connection.connect_timeout: 30
|
||||
connection.query_timeout: 300
|
||||
|
||||
application:
|
||||
env:
|
||||
name: dos_event_kafka_to_clickhouse
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [clickhouse_sink]
|
||||
- name: clickhouse_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,399 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: SESSION-RECORD-PROCESSED
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: SESSION-RECORD-PROCESSED
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: etl_processed_session_record_kafka_to_cn_kafka
|
||||
kafka.auto.offset.reset: latest
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
session_record_processor:
|
||||
type: projection
|
||||
remove_fields:
|
||||
output_fields:
|
||||
functions: # [array of object] Function List
|
||||
- function: EVAL
|
||||
output_fields: [ domain ]
|
||||
parameters:
|
||||
value_expression: server_fqdn
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ domain_sld ]
|
||||
parameters:
|
||||
value_expression: server_domain
|
||||
|
||||
- function: CN_L7_PROTOCOL_AND_APP_EXTRACT
|
||||
parameters:
|
||||
decoded_path_field_name: decoded_path
|
||||
app_transition_field_name: app_transition
|
||||
l7_protocol_field_name: l7_protocol
|
||||
app_field_name: app
|
||||
l7_protocol: DHCP,DNS,FTP,GRE,GTP,HTTP,HTTPS,ICMP,IMAP,IMAPS,IPSEC,ISAKMP,XMPP,L2TP,LDAP,MMS,NETBIOS,NETFLOW,NTP,POP3,POP3S,RDP,PPTP,RADIUS,RTCP,RTP,RTSP,SIP,SMB,SMTP,SMTPS,SNMP,SSDP,SSH,SSL,STUN,TELNET,TFTP,OPENVPN,RTMP,TEREDO,FTPS,DTLS,SPDY,BJNP,QUIC,MDNS,Unknown TCP,Unknown UDP,Unknown Other,IKE,MAIL,SOCKS,DoH,SLP,SSL with ESNI,ISATAP,Stratum,SSL with ECH
|
||||
|
||||
- function: GEOIP_LOOKUP
|
||||
lookup_fields: [ client_ip ]
|
||||
output_fields: [ ]
|
||||
parameters:
|
||||
kb_name: cn_ip_location
|
||||
option: IP_TO_OBJECT
|
||||
geolocation_field_mapping:
|
||||
COUNTRY: client_country_region
|
||||
PROVINCE: client_super_admin_area
|
||||
CITY: client_admin_area
|
||||
LONGITUDE: client_longitude
|
||||
LATITUDE: client_latitude
|
||||
ISP: client_isp
|
||||
|
||||
- function: GEOIP_LOOKUP
|
||||
lookup_fields: [ server_ip ]
|
||||
output_fields: [ ]
|
||||
parameters:
|
||||
kb_name: cn_ip_location
|
||||
option: IP_TO_OBJECT
|
||||
geolocation_field_mapping:
|
||||
COUNTRY: server_country_region
|
||||
PROVINCE: server_super_admin_area
|
||||
CITY: server_admin_area
|
||||
LONGITUDE: server_longitude
|
||||
LATITUDE: server_latitude
|
||||
ISP: server_isp
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [ client_ip ]
|
||||
output_fields: [ client_asn ]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: cn_ip_asn
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [ server_ip ]
|
||||
output_fields: [ server_asn ]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: cn_ip_asn
|
||||
|
||||
- function: CN_IDC_RENTER_LOOKUP
|
||||
lookup_fields: [ client_ip ]
|
||||
output_fields: [ client_idc_renter ]
|
||||
parameters:
|
||||
kb_name: cn_idc_renter
|
||||
|
||||
- function: CN_IDC_RENTER_LOOKUP
|
||||
lookup_fields: [ server_ip ]
|
||||
output_fields: [ server_idc_renter ]
|
||||
parameters:
|
||||
kb_name: cn_idc_renter
|
||||
|
||||
- function: CN_LINK_DIRECTION_LOOKUP
|
||||
lookup_fields: [ in_link_id ]
|
||||
output_fields: [ in_link_direction ]
|
||||
parameters:
|
||||
kb_name: cn_link_direction
|
||||
|
||||
- function: CN_LINK_DIRECTION_LOOKUP
|
||||
lookup_fields: [ out_link_id ]
|
||||
output_fields: [ out_link_direction ]
|
||||
parameters:
|
||||
kb_name: cn_link_direction
|
||||
|
||||
- function: CN_FQDN_CATEGORY_LOOKUP
|
||||
lookup_fields: [ domain ]
|
||||
parameters:
|
||||
kb_name: cn_fqdn_category
|
||||
field_mapping:
|
||||
NAME: domain_category_name
|
||||
GROUP: domain_category_group
|
||||
REPUTATION_LEVEL: domain_reputation_level
|
||||
|
||||
- function: CN_ICP_LOOKUP
|
||||
lookup_fields: [ domain ]
|
||||
output_fields: [ domain_icp_company_name ]
|
||||
parameters:
|
||||
kb_name: cn_fqdn_icp
|
||||
|
||||
- function: CN_FQDN_WHOIS_LOOKUP
|
||||
lookup_fields: [ domain ]
|
||||
output_fields: [ domain_whois_org ]
|
||||
parameters:
|
||||
kb_name: cn_fqdn_whois
|
||||
|
||||
- function: CN_DNS_SERVER_LOOKUP
|
||||
lookup_fields: [ server_ip ]
|
||||
output_fields: [ server_dns_server ]
|
||||
parameters:
|
||||
kb_name: cn_dns_server
|
||||
|
||||
- function: CN_APP_CATEGORY_LOOKUP
|
||||
lookup_fields: [ app ]
|
||||
parameters:
|
||||
kb_name: cn_app_category
|
||||
field_mapping:
|
||||
CATEGORY: app_category
|
||||
SUBCATEGORY: app_subcategory
|
||||
COMPANY: app_company
|
||||
COMPANY_CATEGORY: app_company_category
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ client_zone ]
|
||||
parameters:
|
||||
value_expression: "flags & 8 == 8 ? 'internal' : 'external'"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ server_zone ]
|
||||
parameters:
|
||||
value_expression: "flags & 16 == 16 ? 'internal' : 'external'"
|
||||
|
||||
- function: CN_IP_ZONE_LOOKUP
|
||||
lookup_fields: [ client_ip ]
|
||||
output_fields: [ client_zone ]
|
||||
parameters:
|
||||
kb_name: none
|
||||
#kb_name: cn_internal_ip
|
||||
|
||||
- function: CN_IP_ZONE_LOOKUP
|
||||
lookup_fields: [ server_ip ]
|
||||
output_fields: [ server_zone ]
|
||||
parameters:
|
||||
kb_name: none
|
||||
#kb_name: cn_internal_ip
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ sent_bytes ]
|
||||
parameters:
|
||||
value_expression: "sent_bytes == null ? 0 : sent_bytes"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ sent_pkts ]
|
||||
parameters:
|
||||
value_expression: "sent_pkts == null ? 0 : sent_pkts"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ received_bytes ]
|
||||
parameters:
|
||||
value_expression: "received_bytes == null ? 0 : received_bytes"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ received_pkts ]
|
||||
parameters:
|
||||
value_expression: "received_pkts == null ? 0 : received_pkts"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_inbound_byte ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'internal' && server_zone == 'external' ? received_bytes : traffic_inbound_byte"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_outbound_byte ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'external' && server_zone == 'internal' ? received_bytes : traffic_outbound_byte"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_inbound_pkt ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'internal' && server_zone == 'external' ? received_pkts : traffic_inbound_pkt"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_outbound_pkt ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'external' && server_zone == 'internal' ? received_pkts : traffic_outbound_pkt"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_outbound_byte ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'internal' && server_zone == 'external' ? sent_bytes : traffic_outbound_byte"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_inbound_byte ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'external' && server_zone == 'internal' ? sent_bytes : traffic_inbound_byte"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_outbound_pkt ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'internal' && server_zone == 'external' ? sent_pkts : traffic_outbound_pkt"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_inbound_pkt ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'external' && server_zone == 'internal' ? sent_pkts : traffic_inbound_pkt"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_internal_byte ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'internal' && server_zone == 'internal' ? sent_bytes + received_bytes : traffic_internal_byte"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_internal_pkt ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'internal' && server_zone == 'internal' ? sent_pkts + received_pkts : traffic_internal_pkt"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_through_byte ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'external' && server_zone == 'external' ? sent_bytes + received_bytes : traffic_through_byte"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_through_pkt ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'external' && server_zone == 'external' ? sent_pkts + received_pkts : traffic_through_pkt"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ sessions ]
|
||||
parameters:
|
||||
value_expression: "1"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ internal_query_num ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'internal' ? sessions : internal_query_num"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ external_query_num ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'external' ? sessions : external_query_num"
|
||||
|
||||
- function: CN_VPN_LOOKUP
|
||||
lookup_fields: [ server_ip ]
|
||||
output_fields: [ server_vpn_service_name ]
|
||||
parameters:
|
||||
kb_name: cn_vpn_learning_ip
|
||||
option: IP_TO_VPN
|
||||
|
||||
- function: CN_VPN_LOOKUP
|
||||
lookup_fields: [ domain ]
|
||||
output_fields: [ domain_vpn_service_name ]
|
||||
parameters:
|
||||
kb_name: cn_vpn_learning_domain
|
||||
option: DOMAIN_TO_VPN
|
||||
|
||||
- function: CN_IOC_LOOKUP
|
||||
lookup_fields: [ server_ip ]
|
||||
output_fields: [ server_malware ]
|
||||
parameters:
|
||||
kb_name: cn_ioc_malware
|
||||
option: IP_TO_MALWARE
|
||||
|
||||
- function: CN_IOC_LOOKUP
|
||||
lookup_fields: [ domain ]
|
||||
output_fields: [ domain_malware ]
|
||||
parameters:
|
||||
kb_name: cn_ioc_malware
|
||||
option: DOMAIN_TO_MALWARE
|
||||
|
||||
- function: CN_USER_DEFINE_TAG_LOOKUP
|
||||
lookup_fields: [ client_ip ]
|
||||
output_fields: [ client_ip_tags ]
|
||||
parameters:
|
||||
kb_name: cn_ip_tag_user_define
|
||||
option: IP_TO_TAG
|
||||
|
||||
- function: CN_USER_DEFINE_TAG_LOOKUP
|
||||
lookup_fields: [ server_ip ]
|
||||
output_fields: [ server_ip_tags ]
|
||||
parameters:
|
||||
kb_name: cn_ip_tag_user_define
|
||||
option: IP_TO_TAG
|
||||
|
||||
- function: CN_USER_DEFINE_TAG_LOOKUP
|
||||
lookup_fields: [ domain ]
|
||||
output_fields: [ domain_tags ]
|
||||
parameters:
|
||||
kb_name: cn_domain_tag_user_define
|
||||
option: DOMAIN_TO_TAG
|
||||
|
||||
- function: CN_USER_DEFINE_TAG_LOOKUP
|
||||
lookup_fields: [ app ]
|
||||
output_fields: [ app_tags ]
|
||||
parameters:
|
||||
kb_name: cn_app_tag_user_define
|
||||
option: APP_TO_TAG
|
||||
|
||||
- function: GENERATE_STRING_ARRAY
|
||||
lookup_fields: [ client_idc_renter,client_ip_tags ]
|
||||
output_fields: [ client_ip_tags ]
|
||||
|
||||
- function: GENERATE_STRING_ARRAY
|
||||
lookup_fields: [ server_idc_renter,server_dns_server,server_node_type,server_malware,server_vpn_service_name,server_ip_tags ]
|
||||
output_fields: [ server_ip_tags ]
|
||||
|
||||
- function: GENERATE_STRING_ARRAY
|
||||
lookup_fields: [ domain_node_type,domain_malware,domain_vpn_service_name,domain_tags ]
|
||||
output_fields: [ domain_tags ]
|
||||
|
||||
- function: CN_ARRAY_ELEMENTS_PREPEND
|
||||
lookup_fields: [ client_ip_tags ]
|
||||
output_fields: [ client_ip_tags ]
|
||||
parameters:
|
||||
prefix: ip.
|
||||
|
||||
- function: CN_ARRAY_ELEMENTS_PREPEND
|
||||
lookup_fields: [ server_ip_tags ]
|
||||
output_fields: [ server_ip_tags ]
|
||||
parameters:
|
||||
prefix: ip.
|
||||
|
||||
- function: CN_ARRAY_ELEMENTS_PREPEND
|
||||
lookup_fields: [ domain_tags ]
|
||||
output_fields: [ domain_tags ]
|
||||
parameters:
|
||||
prefix: domain.
|
||||
|
||||
- function: CN_ARRAY_ELEMENTS_PREPEND
|
||||
lookup_fields: [ app_tags ]
|
||||
output_fields: [ app_tags ]
|
||||
parameters:
|
||||
prefix: app.
|
||||
postprocessing_pipelines:
|
||||
remove_field_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
output_fields: [ recv_time,log_id,flags,start_timestamp_ms,end_timestamp_ms,duration_ms,decoded_as,client_ip,server_ip,client_port,server_port,app,app_transition,decoded_path,ip_protocol,l7_protocol,out_link_id,in_link_id,subscriber_id,imei,imsi,phone_number,apn,http_url,dns_rcode,dns_qname,dns_qtype,dns_rr,out_link_direction,in_link_direction,server_fqdn,server_domain,domain,domain_sld,domain_category_name,domain_category_group,domain_reputation_level,domain_icp_company_name,domain_whois_org,domain_tags,client_zone,client_country_region,client_super_admin_area,client_admin_area,client_longitude,client_latitude,client_isp,client_asn,client_ip_tags,server_zone,server_country_region,server_super_admin_area,server_admin_area,server_longitude,server_latitude,server_isp,server_asn,server_ip_tags,app_category,app_subcategory,app_company,app_company_category,app_tags,sent_pkts,sent_bytes,received_pkts,received_bytes,sessions,tcp_c2s_lost_bytes,tcp_s2c_lost_bytes,tcp_c2s_o3_pkts,tcp_s2c_o3_pkts,tcp_c2s_rtx_bytes,tcp_s2c_rtx_bytes,tcp_c2s_rtx_pkts,tcp_s2c_rtx_pkts,tcp_rtt_ms,http_response_latency_ms,ssl_handshake_latency_ms,dns_response_latency_ms,cn_internal_rule_id_list,cn_internal_ioc_type_list,traffic_inbound_byte,traffic_inbound_pkt,traffic_outbound_byte,traffic_outbound_pkt,traffic_internal_byte,traffic_internal_pkt,traffic_through_byte,traffic_through_pkt,internal_query_num,external_query_num ]
|
||||
|
||||
sinks:
|
||||
cn_kafka_sink:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: SESSION-RECORD-CN
|
||||
kafka.bootstrap.servers: {{ national_center_cn_kafka_servers }}
|
||||
kafka.client.id: SESSION-RECORD-CN
|
||||
kafka.retries: 0
|
||||
kafka.linger.ms: 10
|
||||
kafka.request.timeout.ms: 30000
|
||||
kafka.batch.size: 262144
|
||||
kafka.buffer.memory: 134217728
|
||||
kafka.max.request.size: 10485760
|
||||
kafka.compression.type: snappy
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
format: json
|
||||
|
||||
|
||||
application:
|
||||
env:
|
||||
name: etl_session_record_processed_kafka_to_cn_kafka
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true
|
||||
properties:
|
||||
hos.bucket.name.rtp_file: traffic_rtp_file_bucket
|
||||
hos.bucket.name.http_file: traffic_http_file_bucket
|
||||
hos.bucket.name.eml_file: traffic_eml_file_bucket
|
||||
hos.bucket.name.policy_capture_file: traffic_policy_capture_file_bucket
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [ session_record_processor ]
|
||||
- name: session_record_processor
|
||||
downstream: [ remove_field_processor ]
|
||||
- name: remove_field_processor
|
||||
downstream: [ cn_kafka_sink ]
|
||||
- name: cn_kafka_sink
|
||||
downstream: [ ]
|
||||
@@ -0,0 +1,119 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: VOIP-CONVERSATION-RECORD
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: VOIP-CONVERSATION-RECORD
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: etl_voip_record_kafka_to_clickhouse
|
||||
kafka.auto.offset.reset: latest
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
remove_fields:
|
||||
output_fields:
|
||||
properties:
|
||||
key: value
|
||||
functions: # [array of object] Function List
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [server_ip]
|
||||
output_fields: [server_asn]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: tsg_ip_asn
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [client_ip]
|
||||
output_fields: [client_asn]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: tsg_ip_asn
|
||||
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: ['']
|
||||
output_fields: [log_id]
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [data_center]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='data_center')][0].value
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [device_group]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='device_group')][0].value
|
||||
|
||||
- function: CURRENT_UNIX_TIMESTAMP
|
||||
output_fields: [processing_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [__timestamp]
|
||||
output_fields: [recv_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ingestion_time]
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
value_field: mail_subject
|
||||
charset_field: mail_subject_charset
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_attachment_name]
|
||||
parameters:
|
||||
value_field: mail_attachment_name
|
||||
charset_field: mail_attachment_name_charset
|
||||
|
||||
|
||||
sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.voip_record_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
connection.user: e54c9568586180eede1506eecf3574e9
|
||||
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
connection.connect_timeout: 30
|
||||
connection.query_timeout: 300
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: etl_voip_record_kafka_to_clickhouse # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [clickhouse_sink]
|
||||
- name: clickhouse_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: PROXY-EVENT-PROCESSED
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: PROXY-EVENT-PROCESSED
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: proxy_event_processed_kafka_to_clickhouse
|
||||
kafka.auto.offset.reset: latest
|
||||
format: json
|
||||
|
||||
sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.proxy_event_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
connection.user: e54c9568586180eede1506eecf3574e9
|
||||
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
connection.connect_timeout: 30
|
||||
connection.query_timeout: 300
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: proxy_event_processed_kafka_to_clickhouse # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [clickhouse_sink]
|
||||
- name: clickhouse_sink
|
||||
@@ -0,0 +1,42 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: SESSION-RECORD-PROCESSED
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: SESSION-RECORD-PROCESSED
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: session_record_processed_kafka_to_clickhouse
|
||||
kafka.auto.offset.reset: latest
|
||||
format: json
|
||||
|
||||
sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.session_record_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
connection.user: e54c9568586180eede1506eecf3574e9
|
||||
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
connection.connect_timeout: 30
|
||||
connection.query_timeout: 300
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: session_record_processed_kafka_to_clickhouse # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [clickhouse_sink]
|
||||
- name: clickhouse_sink
|
||||
@@ -0,0 +1,42 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: TRANSACTION-RECORD-PROCESSED
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: TRANSACTION-RECORD-PROCESSED
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: transaction_record_processed_kafka_to_clickhouse
|
||||
kafka.auto.offset.reset: latest
|
||||
format: json
|
||||
|
||||
sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.transaction_record_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
connection.user: e54c9568586180eede1506eecf3574e9
|
||||
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
connection.connect_timeout: 30
|
||||
connection.query_timeout: 300
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: transaction_record_processed_kafka_to_clickhouse # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [clickhouse_sink]
|
||||
- name: clickhouse_sink
|
||||
@@ -0,0 +1,50 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: DOS-EVENT
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: DOS-EVENT
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.ssl.keystore.location:
|
||||
kafka.ssl.keystore.password:
|
||||
kafka.ssl.truststore.location:
|
||||
kafka.ssl.truststore.password:
|
||||
kafka.ssl.key.password:
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.buffer.memory:
|
||||
kafka.group.id: dos_event_kafka_to_clickhouse-20231221
|
||||
kafka.auto.offset.reset: latest
|
||||
kafka.max.request.size:
|
||||
kafka.compression.type: none
|
||||
format: json
|
||||
|
||||
|
||||
sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.dos_event_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
connection.user: e54c9568586180eede1506eecf3574e9
|
||||
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
|
||||
|
||||
application:
|
||||
env:
|
||||
name: dos_event_kafka_to_clickhouse
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [clickhouse_sink]
|
||||
- name: clickhouse_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,72 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: DATAPATH-TELEMETRY-RECORD
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: DATAPATH-TELEMETRY-RECORD
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
#kafka.security.protocol: SSL
|
||||
#kafka.ssl.endpoint.identification.algorithm: ""
|
||||
#kafka.ssl.keystore.location: /data/tsg/olap/flink/topology/data/keystore.jks
|
||||
#kafka.ssl.keystore.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
#kafka.ssl.truststore.location: /data/tsg/olap/flink/topology/data/truststore.jks
|
||||
#kafka.ssl.truststore.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
#kafka.ssl.key.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: etl_datapath_telemetry_record_kafka_to_clickhouse-20230125
|
||||
kafka.auto.offset.reset: latest
|
||||
format: msgpack
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
remove_fields:
|
||||
output_fields:
|
||||
functions:
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: [ '' ]
|
||||
output_fields: [ log_id ]
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [ __timestamp ]
|
||||
output_fields: [ recv_time ]
|
||||
parameters:
|
||||
precision: seconds
|
||||
- function: BASE64_ENCODE_TO_STRING
|
||||
output_fields: [ packet ]
|
||||
parameters:
|
||||
value_field: packet
|
||||
|
||||
sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.datapath_telemetry_record_local
|
||||
batch.size: 5000
|
||||
batch.interval: 30s
|
||||
connection.user: e54c9568586180eede1506eecf3574e9
|
||||
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: etl_datapath_telemetry_record_kafka_to_clickhouse # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [clickhouse_sink]
|
||||
- name: clickhouse_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,143 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
# fields: # [array of object] Field List, if not set, all fields(Map<String, Object>) will be output.
|
||||
# watermark_timestamp: common_recv_time # [string] Watermark Field Name
|
||||
# watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms
|
||||
# watermark_lag: 60 # [number] Watermark Lag, default is 60
|
||||
properties:
|
||||
topic: PROXY-EVENT
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: PROXY-EVENT
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.ssl.keystore.location:
|
||||
kafka.ssl.keystore.password:
|
||||
kafka.ssl.truststore.location:
|
||||
kafka.ssl.truststore.password:
|
||||
kafka.ssl.key.password:
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.buffer.memory:
|
||||
kafka.group.id: etl_proxy_event_kafka_to_clickhouse-20231221
|
||||
kafka.auto.offset.reset: latest
|
||||
kafka.max.request.size:
|
||||
kafka.compression.type: none
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
remove_fields:
|
||||
output_fields:
|
||||
properties:
|
||||
key: value
|
||||
functions: # [array of object] Function List
|
||||
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: ['']
|
||||
output_fields: [log_id]
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [data_center]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='data_center')][0].value
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [device_group]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='device_group')][0].value
|
||||
|
||||
- function: CURRENT_UNIX_TIMESTAMP
|
||||
output_fields: [processing_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [__timestamp]
|
||||
output_fields: [recv_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ingestion_time]
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
value_field: mail_subject
|
||||
charset_field: mail_subject_charset
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_attachment_name]
|
||||
parameters:
|
||||
value_field: mail_attachment_name
|
||||
charset_field: mail_attachment_name_charset
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [rtp_pcap_path]
|
||||
output_fields: [rtp_pcap_path]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_request_body]
|
||||
output_fields: [http_request_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_response_body]
|
||||
output_fields: [http_response_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [mail_eml_file]
|
||||
output_fields: [mail_eml_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [packet_capture_file]
|
||||
output_fields: [packet_capture_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
|
||||
|
||||
sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.proxy_event_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
connection.user: e54c9568586180eede1506eecf3574e9
|
||||
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: etl_proxy_event_kafka_to_clickhouse # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [clickhouse_sink]
|
||||
- name: clickhouse_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,141 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: SESSION-RECORD
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: SESSION-RECORD
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
|
||||
# kafka.security.protocol: SSL
|
||||
# kafka.ssl.endpoint.identification.algorithm: ""
|
||||
# kafka.ssl.keystore.location: $GROOT_HOME/config/dat/keystore.jks
|
||||
# kafka.ssl.keystore.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
# kafka.ssl.truststore.location: $GROOT_HOME/config/dat/truststore.jks
|
||||
# kafka.ssl.truststore.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
# kafka.ssl.key.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: etl_session_record_kafka_to_clickhouse-20230125
|
||||
kafka.auto.offset.reset: latest
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor:
|
||||
type: projection
|
||||
properties:
|
||||
key: value
|
||||
functions:
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: ['']
|
||||
output_fields: [log_id]
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [data_center]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='data_center')][0].value
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [device_group]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='device_group')][0].value
|
||||
|
||||
- function: CURRENT_UNIX_TIMESTAMP
|
||||
output_fields: [processing_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [__timestamp]
|
||||
output_fields: [recv_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ingestion_time]
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
value_field: mail_subject
|
||||
charset_field: mail_subject_charset
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_attachment_name]
|
||||
parameters:
|
||||
value_field: mail_attachment_name
|
||||
charset_field: mail_attachment_name_charset
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [rtp_pcap_path]
|
||||
output_fields: [rtp_pcap_path]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_request_body]
|
||||
output_fields: [http_request_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_response_body]
|
||||
output_fields: [http_response_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [mail_eml_file]
|
||||
output_fields: [mail_eml_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [packet_capture_file]
|
||||
output_fields: [packet_capture_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
|
||||
|
||||
sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.session_record_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
connection.user: e54c9568586180eede1506eecf3574e9
|
||||
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
|
||||
|
||||
application:
|
||||
env:
|
||||
name: etl_session_record_kafka_to_clickhouse
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true
|
||||
properties:
|
||||
hos.bucket.name.rtp_file: traffic_rtp_file_bucket
|
||||
hos.bucket.name.http_file: traffic_http_file_bucket
|
||||
hos.bucket.name.eml_file: traffic_eml_file_bucket
|
||||
hos.bucket.name.policy_capture_file: traffic_policy_capture_file_bucket
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [clickhouse_sink]
|
||||
- name: clickhouse_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: TRAFFIC-SKETCH-METRIC
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: TRAFFIC-SKETCH-METRIC
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: etl_traffic_sketch_metric
|
||||
kafka.auto.offset.reset: latest
|
||||
kafka.compression.type: none
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
remove_fields:
|
||||
output_fields:
|
||||
functions: # [array of object] Function List
|
||||
|
||||
- function: FLATTEN
|
||||
lookup_fields: [ fields,tags ]
|
||||
output_fields: [ ]
|
||||
parameters:
|
||||
#prefix: ""
|
||||
depth: 3
|
||||
# delimiter: "."
|
||||
|
||||
- function: RENAME
|
||||
lookup_fields: [ '' ]
|
||||
output_fields: [ '' ]
|
||||
filter:
|
||||
parameters:
|
||||
# parent_fields: [tags]
|
||||
#rename_fields:
|
||||
# tags: tags
|
||||
rename_expression: key =string.replace_all(key,'tags.','');key =string.replace_all(key,'fields.','');return key;
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ internal_ip ]
|
||||
parameters:
|
||||
value_expression: 'direction=Outbound? client_ip : server_ip'
|
||||
- function: EVAL
|
||||
output_fields: [ external_ip ]
|
||||
parameters:
|
||||
value_expression: 'direction=Outbound? server_ip : client_ip'
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [ timestamp_ms ]
|
||||
output_fields: [ recv_time ]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: [ '' ]
|
||||
output_fields: [ log_id ]
|
||||
filter:
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
|
||||
sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.traffic_sketch_metric_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
connection.user: e54c9568586180eede1506eecf3574e9
|
||||
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: etl_traffic_sketch_metric # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [clickhouse_sink]
|
||||
- name: clickhouse_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,141 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
# fields: # [array of object] Field List, if not set, all fields(Map<String, Object>) will be output.
|
||||
# watermark_timestamp: common_recv_time # [string] Watermark Field Name
|
||||
# watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms
|
||||
# watermark_lag: 60 # [number] Watermark Lag, default is 60
|
||||
properties:
|
||||
topic: TRANSACTION-RECORD
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: TRANSACTION-RECORD
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.ssl.keystore.location:
|
||||
kafka.ssl.keystore.password:
|
||||
kafka.ssl.truststore.location:
|
||||
kafka.ssl.truststore.password:
|
||||
kafka.ssl.key.password:
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.buffer.memory:
|
||||
kafka.group.id: etl_transaction_record_kafka_to_clickhouse-20240308
|
||||
kafka.auto.offset.reset: latest
|
||||
kafka.max.request.size:
|
||||
kafka.compression.type: none
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
remove_fields:
|
||||
output_fields:
|
||||
properties:
|
||||
key: value
|
||||
functions: # [array of object] Function List
|
||||
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: ['']
|
||||
output_fields: [log_id]
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [data_center]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='data_center')][0].value
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [device_group]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='device_group')][0].value
|
||||
|
||||
- function: CURRENT_UNIX_TIMESTAMP
|
||||
output_fields: [processing_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [__timestamp]
|
||||
output_fields: [recv_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ingestion_time]
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
value_field: mail_subject
|
||||
charset_field: mail_subject_charset
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_attachment_name]
|
||||
parameters:
|
||||
value_field: mail_attachment_name
|
||||
charset_field: mail_attachment_name_charset
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [rtp_pcap_path]
|
||||
output_fields: [rtp_pcap_path]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_request_body]
|
||||
output_fields: [http_request_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_response_body]
|
||||
output_fields: [http_response_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [mail_eml_file]
|
||||
output_fields: [mail_eml_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [packet_capture_file]
|
||||
output_fields: [packet_capture_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
|
||||
|
||||
sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.transaction_record_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
connection.user: e54c9568586180eede1506eecf3574e9
|
||||
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: etl_transaction_record_kafka_to_clickhouse # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [clickhouse_sink]
|
||||
- name: clickhouse_sink
|
||||
@@ -0,0 +1,143 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
# fields: # [array of object] Field List, if not set, all fields(Map<String, Object>) will be output.
|
||||
# watermark_timestamp: common_recv_time # [string] Watermark Field Name
|
||||
# watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms
|
||||
# watermark_lag: 60 # [number] Watermark Lag, default is 60
|
||||
properties:
|
||||
topic: VOIP-CONVERSATION-RECORD
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.client.id: VOIP-CONVERSATION-RECORD
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.ssl.keystore.location:
|
||||
kafka.ssl.keystore.password:
|
||||
kafka.ssl.truststore.location:
|
||||
kafka.ssl.truststore.password:
|
||||
kafka.ssl.key.password:
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.buffer.memory:
|
||||
kafka.group.id: etl_voip_record_kafka_to_clickhouse-20231221
|
||||
kafka.auto.offset.reset: latest
|
||||
kafka.max.request.size:
|
||||
kafka.compression.type: none
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
remove_fields:
|
||||
output_fields:
|
||||
properties:
|
||||
key: value
|
||||
functions: # [array of object] Function List
|
||||
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: ['']
|
||||
output_fields: [log_id]
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [data_center]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='data_center')][0].value
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [device_group]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='device_group')][0].value
|
||||
|
||||
- function: CURRENT_UNIX_TIMESTAMP
|
||||
output_fields: [processing_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [__timestamp]
|
||||
output_fields: [recv_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ingestion_time]
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
value_field: mail_subject
|
||||
charset_field: mail_subject_charset
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_attachment_name]
|
||||
parameters:
|
||||
value_field: mail_attachment_name
|
||||
charset_field: mail_attachment_name_charset
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [rtp_pcap_path]
|
||||
output_fields: [rtp_pcap_path]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_request_body]
|
||||
output_fields: [http_request_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_response_body]
|
||||
output_fields: [http_response_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [mail_eml_file]
|
||||
output_fields: [mail_eml_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [packet_capture_file]
|
||||
output_fields: [packet_capture_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
|
||||
|
||||
sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.voip_record_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
connection.user: e54c9568586180eede1506eecf3574e9
|
||||
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: etl_voip_record_kafka_to_clickhouse # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [clickhouse_sink]
|
||||
- name: clickhouse_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,387 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: {{ kafka_source_topic }}
|
||||
kafka.bootstrap.servers: {{ kafka_source_bootstrap_servers }}
|
||||
kafka.client.id: {{ kafka_source_topic }}
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
kafka.group.id: {{ kafka_source_group_id }}
|
||||
kafka.auto.offset.reset: latest
|
||||
format: json
|
||||
json.ignore.parse.errors: false
|
||||
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor:
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
functions:
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: [ '' ]
|
||||
output_fields: [ cn_log_id ]
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ log_id ]
|
||||
parameters:
|
||||
value_expression: "log_id == null ? cn_log_id : log_id"
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [ __timestamp ]
|
||||
output_fields: [ kafka_recv_time ]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ recv_time ]
|
||||
parameters:
|
||||
value_expression: "recv_time == null ? kafka_recv_time : recv_time"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ domain ]
|
||||
parameters:
|
||||
value_expression: server_fqdn
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ domain_sld ]
|
||||
parameters:
|
||||
value_expression: server_domain
|
||||
|
||||
- function: CN_L7_PROTOCOL_AND_APP_EXTRACT
|
||||
parameters:
|
||||
decoded_path_field_name: decoded_path
|
||||
app_transition_field_name: app_transition
|
||||
l7_protocol_field_name: l7_protocol
|
||||
app_field_name: app
|
||||
l7_protocol: DHCP,DNS,FTP,GRE,GTP,HTTP,HTTPS,ICMP,IMAP,IMAPS,IPSEC,ISAKMP,XMPP,L2TP,LDAP,MMS,NETBIOS,NETFLOW,NTP,POP3,POP3S,RDP,PPTP,RADIUS,RTCP,RTP,RTSP,SIP,SMB,SMTP,SMTPS,SNMP,SSDP,SSH,SSL,STUN,TELNET,TFTP,OPENVPN,RTMP,TEREDO,FTPS,DTLS,SPDY,BJNP,QUIC,MDNS,Unknown TCP,Unknown UDP,Unknown Other,IKE,MAIL,SOCKS,DoH,SLP,SSL with ESNI,ISATAP,Stratum,SSL with ECH
|
||||
|
||||
- function: GEOIP_LOOKUP
|
||||
lookup_fields: [ client_ip ]
|
||||
output_fields: [ ]
|
||||
parameters:
|
||||
kb_name: cn_ip_location
|
||||
option: IP_TO_OBJECT
|
||||
geolocation_field_mapping:
|
||||
COUNTRY: client_country_region
|
||||
PROVINCE: client_super_admin_area
|
||||
CITY: client_admin_area
|
||||
LONGITUDE: client_longitude
|
||||
LATITUDE: client_latitude
|
||||
ISP: client_isp
|
||||
|
||||
- function: GEOIP_LOOKUP
|
||||
lookup_fields: [ server_ip ]
|
||||
output_fields: [ ]
|
||||
parameters:
|
||||
kb_name: cn_ip_location
|
||||
option: IP_TO_OBJECT
|
||||
geolocation_field_mapping:
|
||||
COUNTRY: server_country_region
|
||||
PROVINCE: server_super_admin_area
|
||||
CITY: server_admin_area
|
||||
LONGITUDE: server_longitude
|
||||
LATITUDE: server_latitude
|
||||
ISP: server_isp
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [ client_ip ]
|
||||
output_fields: [ client_asn ]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: cn_ip_asn
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [ server_ip ]
|
||||
output_fields: [ server_asn ]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: cn_ip_asn
|
||||
|
||||
- function: CN_IDC_RENTER_LOOKUP
|
||||
lookup_fields: [ client_ip ]
|
||||
output_fields: [ client_idc_renter ]
|
||||
parameters:
|
||||
kb_name: cn_idc_renter
|
||||
|
||||
- function: CN_IDC_RENTER_LOOKUP
|
||||
lookup_fields: [ server_ip ]
|
||||
output_fields: [ server_idc_renter ]
|
||||
parameters:
|
||||
kb_name: cn_idc_renter
|
||||
|
||||
- function: CN_LINK_DIRECTION_LOOKUP
|
||||
lookup_fields: [ in_link_id ]
|
||||
output_fields: [ in_link_direction ]
|
||||
parameters:
|
||||
kb_name: cn_link_direction
|
||||
|
||||
- function: CN_LINK_DIRECTION_LOOKUP
|
||||
lookup_fields: [ out_link_id ]
|
||||
output_fields: [ out_link_direction ]
|
||||
parameters:
|
||||
kb_name: cn_link_direction
|
||||
|
||||
- function: CN_FQDN_CATEGORY_LOOKUP
|
||||
lookup_fields: [ domain ]
|
||||
parameters:
|
||||
kb_name: cn_fqdn_category
|
||||
field_mapping:
|
||||
NAME: domain_category_name
|
||||
GROUP: domain_category_group
|
||||
REPUTATION_LEVEL: domain_reputation_level
|
||||
|
||||
- function: CN_ICP_LOOKUP
|
||||
lookup_fields: [ domain ]
|
||||
output_fields: [ domain_icp_company_name ]
|
||||
parameters:
|
||||
kb_name: cn_fqdn_icp
|
||||
|
||||
- function: CN_FQDN_WHOIS_LOOKUP
|
||||
lookup_fields: [ domain ]
|
||||
output_fields: [ domain_whois_org ]
|
||||
parameters:
|
||||
kb_name: cn_fqdn_whois
|
||||
|
||||
- function: CN_DNS_SERVER_LOOKUP
|
||||
lookup_fields: [ server_ip ]
|
||||
output_fields: [ server_dns_server ]
|
||||
parameters:
|
||||
kb_name: cn_dns_server
|
||||
|
||||
- function: CN_APP_CATEGORY_LOOKUP
|
||||
lookup_fields: [ app ]
|
||||
parameters:
|
||||
kb_name: cn_app_category
|
||||
field_mapping:
|
||||
CATEGORY: app_category
|
||||
SUBCATEGORY: app_subcategory
|
||||
COMPANY: app_company
|
||||
COMPANY_CATEGORY: app_company_category
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ client_zone ]
|
||||
parameters:
|
||||
value_expression: "flags & 8 == 8 ? 'internal' : 'external'"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ server_zone ]
|
||||
parameters:
|
||||
value_expression: "flags & 16 == 16 ? 'internal' : 'external'"
|
||||
|
||||
- function: CN_IP_ZONE_LOOKUP
|
||||
lookup_fields: [ client_ip ]
|
||||
output_fields: [ client_zone ]
|
||||
parameters:
|
||||
kb_name: none
|
||||
#kb_name: cn_internal_ip
|
||||
|
||||
- function: CN_IP_ZONE_LOOKUP
|
||||
lookup_fields: [ server_ip ]
|
||||
output_fields: [ server_zone ]
|
||||
parameters:
|
||||
kb_name: none
|
||||
#kb_name: cn_internal_ip
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ sent_bytes ]
|
||||
parameters:
|
||||
value_expression: "sent_bytes == null ? 0 : sent_bytes"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ sent_pkts ]
|
||||
parameters:
|
||||
value_expression: "sent_pkts == null ? 0 : sent_pkts"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ received_bytes ]
|
||||
parameters:
|
||||
value_expression: "received_bytes == null ? 0 : received_bytes"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ received_pkts ]
|
||||
parameters:
|
||||
value_expression: "received_pkts == null ? 0 : received_pkts"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_inbound_byte ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'internal' && server_zone == 'external' ? received_bytes : traffic_inbound_byte"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_outbound_byte ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'external' && server_zone == 'internal' ? received_bytes : traffic_outbound_byte"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_inbound_pkt ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'internal' && server_zone == 'external' ? received_pkts : traffic_inbound_pkt"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_outbound_pkt ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'external' && server_zone == 'internal' ? received_pkts : traffic_outbound_pkt"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_outbound_byte ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'internal' && server_zone == 'external' ? sent_bytes : traffic_outbound_byte"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_inbound_byte ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'external' && server_zone == 'internal' ? sent_bytes : traffic_inbound_byte"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_outbound_pkt ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'internal' && server_zone == 'external' ? sent_pkts : traffic_outbound_pkt"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_inbound_pkt ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'external' && server_zone == 'internal' ? sent_pkts : traffic_inbound_pkt"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_internal_byte ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'internal' && server_zone == 'internal' ? sent_bytes + received_bytes : traffic_internal_byte"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_internal_pkt ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'internal' && server_zone == 'internal' ? sent_pkts + received_pkts : traffic_internal_pkt"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_through_byte ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'external' && server_zone == 'external' ? sent_bytes + received_bytes : traffic_through_byte"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ traffic_through_pkt ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'external' && server_zone == 'external' ? sent_pkts + received_pkts : traffic_through_pkt"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ sessions ]
|
||||
parameters:
|
||||
value_expression: "1"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ internal_query_num ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'internal' ? sessions : internal_query_num"
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ external_query_num ]
|
||||
parameters:
|
||||
value_expression: "client_zone == 'external' ? sessions : external_query_num"
|
||||
|
||||
- function: CN_ANONYMITY_LOOKUP
|
||||
lookup_fields: [ server_ip ]
|
||||
output_fields: [ server_node_type ]
|
||||
parameters:
|
||||
kb_name: cn_ioc_darkweb
|
||||
option: IP_TO_NODE_TYPE
|
||||
|
||||
- function: CN_ANONYMITY_LOOKUP
|
||||
lookup_fields: [ domain ]
|
||||
output_fields: [ domain_node_type ]
|
||||
parameters:
|
||||
kb_name: cn_ioc_darkweb
|
||||
option: DOMAIN_TO_NODE_TYPE
|
||||
|
||||
- function: CN_IOC_LOOKUP
|
||||
lookup_fields: [ server_ip ]
|
||||
output_fields: [ server_malware ]
|
||||
parameters:
|
||||
kb_name: cn_ioc_malware
|
||||
option: IP_TO_MALWARE
|
||||
|
||||
- function: CN_IOC_LOOKUP
|
||||
lookup_fields: [ domain ]
|
||||
output_fields: [ domain_malware ]
|
||||
parameters:
|
||||
kb_name: cn_ioc_malware
|
||||
option: DOMAIN_TO_MALWARE
|
||||
|
||||
- function: CN_INTELLIGENCE_INDICATOR_LOOKUP
|
||||
lookup_fields: [ client_ip ]
|
||||
output_fields: [ client_ip_tags ]
|
||||
parameters:
|
||||
kb_name: cn_intelligence_indicator
|
||||
option: IP_TO_TAG
|
||||
|
||||
- function: CN_INTELLIGENCE_INDICATOR_LOOKUP
|
||||
lookup_fields: [ server_ip ]
|
||||
output_fields: [ server_ip_tags ]
|
||||
parameters:
|
||||
kb_name: cn_intelligence_indicator
|
||||
option: IP_TO_TAG
|
||||
|
||||
- function: CN_INTELLIGENCE_INDICATOR_LOOKUP
|
||||
lookup_fields: [ domain ]
|
||||
output_fields: [ domain_tags ]
|
||||
parameters:
|
||||
kb_name: cn_intelligence_indicator
|
||||
option: DOMAIN_TO_TAG
|
||||
|
||||
- function: GENERATE_STRING_ARRAY
|
||||
lookup_fields: [ client_idc_renter,client_ip_tags ]
|
||||
output_fields: [ client_ip_tags ]
|
||||
|
||||
- function: GENERATE_STRING_ARRAY
|
||||
lookup_fields: [ server_idc_renter,server_dns_server,server_node_type,server_malware,server_ip_tags ]
|
||||
output_fields: [ server_ip_tags ]
|
||||
|
||||
- function: GENERATE_STRING_ARRAY
|
||||
lookup_fields: [ domain_node_type,domain_malware,domain_tags ]
|
||||
output_fields: [ domain_tags ]
|
||||
|
||||
postprocessing_pipelines:
|
||||
post_output_field_processor:
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
output_fields: [ recv_time,log_id,flags,start_timestamp_ms,end_timestamp_ms,duration_ms,decoded_as,client_ip,server_ip,client_port,server_port,app,app_transition,decoded_path,ip_protocol,l7_protocol,out_link_id,in_link_id,subscriber_id,imei,imsi,phone_number,apn,http_url,dns_rcode,dns_qname,dns_qtype,dns_rr,out_link_direction,in_link_direction,server_fqdn,server_domain,domain,domain_sld,domain_category_name,domain_category_group,domain_reputation_level,domain_icp_company_name,domain_whois_org,domain_tags,client_zone,client_country_region,client_super_admin_area,client_admin_area,client_longitude,client_latitude,client_isp,client_asn,client_ip_tags,server_zone,server_country_region,server_super_admin_area,server_admin_area,server_longitude,server_latitude,server_isp,server_asn,server_ip_tags,app_category,app_subcategory,app_company,app_company_category,app_tags,sent_pkts,sent_bytes,received_pkts,received_bytes,sessions,tcp_c2s_lost_bytes,tcp_s2c_lost_bytes,tcp_c2s_o3_pkts,tcp_s2c_o3_pkts,tcp_c2s_rtx_bytes,tcp_s2c_rtx_bytes,tcp_c2s_rtx_pkts,tcp_s2c_rtx_pkts,tcp_rtt_ms,http_response_latency_ms,ssl_handshake_latency_ms,dns_response_latency_ms,cn_internal_rule_id_list,cn_internal_ioc_type_list,traffic_inbound_byte,traffic_inbound_pkt,traffic_outbound_byte,traffic_outbound_pkt,traffic_internal_byte,traffic_internal_pkt,traffic_through_byte,traffic_through_pkt,internal_query_num,external_query_num ]
|
||||
|
||||
sinks:
|
||||
kafka_sink:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: {{ kafka_sink_topic }}
|
||||
kafka.bootstrap.servers: {{ kafka_sink_bootstrap_servers }}
|
||||
kafka.client.id: {{ kafka_sink_topic }}
|
||||
kafka.retries: 0
|
||||
kafka.linger.ms: 10
|
||||
kafka.request.timeout.ms: 30000
|
||||
kafka.batch.size: 262144
|
||||
kafka.buffer.memory: 134217728
|
||||
kafka.max.request.size: 10485760
|
||||
kafka.compression.type: snappy
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||||
format: json
|
||||
json.ignore.parse.errors: false
|
||||
log.failures.only: true
|
||||
|
||||
application:
|
||||
env:
|
||||
name: {{ job_name }}
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [ etl_processor ]
|
||||
- name: etl_processor
|
||||
downstream: [ post_output_field_processor ]
|
||||
- name: post_output_field_processor
|
||||
downstream: [ kafka_sink ]
|
||||
- name: kafka_sink
|
||||
downstream: [ ]
|
||||
@@ -1,97 +0,0 @@
|
||||
#服务端口
|
||||
server:
|
||||
port: 8186
|
||||
max-http-header-size: 20MB
|
||||
tomcat:
|
||||
max-threads: 400
|
||||
#tomcat缓存大小,单位KB系统默认10M,配置10g
|
||||
tomcat:
|
||||
cacheMaxSize: 1000000
|
||||
#hbase参数
|
||||
hbase:
|
||||
zookeeperQuorum: 192.168.44.11:2181,192.168.44.14:2181,192.168.44.15:2181
|
||||
zookeeperPort: 2181
|
||||
zookeeperNodeParent: /hbase
|
||||
clientRetriesNumber: 9
|
||||
rpcTimeout: 100000
|
||||
connectPool: 10
|
||||
clientWriteBuffer: 10485760
|
||||
clientKeyValueMaxsize: 1073741824
|
||||
mobThreshold: 10485760
|
||||
#part的最大数量
|
||||
maxParts: 100000
|
||||
#每次获取的part数
|
||||
getPartBatch: 10
|
||||
#hbase索引表前缀,前缀为以下的都为索引表
|
||||
timeIndexTablePrefix: index_time_
|
||||
filenameIndexTablePrefix: index_filename_
|
||||
partFileIndexTablePrefix: index_partfile_
|
||||
systemBucketMeta: system:bucket_meta
|
||||
#创建表的分区数
|
||||
regionCount: 16
|
||||
filenameHead: 0,1,2,3,4,5,6,7,8,9,a,b,c,d,e,f
|
||||
partHead: 0,1,2,3,4,5,6,7,8,9,a,b,c,d,e,f
|
||||
#获取文件大小的目录
|
||||
dataPath: /hbase
|
||||
#hadoop集群namenode节点,单机为单个ip,集群为ip1,ip2
|
||||
hadoopNameNodes: 192.168.44.10,192.168.44.11
|
||||
#副本数,单机为1,集群为2
|
||||
hadoopReplication: 2
|
||||
#hadoop端口
|
||||
hadoopPort: 9000
|
||||
hadoopUser: root
|
||||
hadoopNameServices: ns1
|
||||
hadoopNameNodesNs1: nn1,nn2
|
||||
asyncPut: 0
|
||||
#是否打开验证,0打开,打开需要使用S3身份验证或者token访问服务
|
||||
auth:
|
||||
open: 0
|
||||
#http访问使用的token
|
||||
token: ENC(vknRT6U4I739rLIha9CvojM+4uFyXZLEYpO2HZayLnRak1HPW0K2yZ3vnQBA2foo)
|
||||
#s3验证
|
||||
s3:
|
||||
accesskey: ENC(FUQDvVP+zqCiwHQhXcRvbw==)
|
||||
secretkey: ENC(FUQDvVP+zqCiwHQhXcRvbw==)
|
||||
hos:
|
||||
#文件大小阈值
|
||||
maxFileSize: 5073741800
|
||||
#大文件阈值
|
||||
uploadThreshold: 104857600
|
||||
#长连接超时时间
|
||||
keepAliveTimeout: 60000
|
||||
#批量删除对象的最大数量
|
||||
deleteMultipleNumber: 1000
|
||||
#获取对象列表等操作的最大值
|
||||
maxResultLimit: 100000
|
||||
#分块上传的最大分块数
|
||||
maxPartNumber: 10000
|
||||
#追加上传的最大次数
|
||||
maxAppendNumber: 100000
|
||||
#是否快速上传
|
||||
isQuickUpload: 0
|
||||
#是否快速下载文件,1打开,hbase内存小于20G的集群设为0
|
||||
isQuickDownloadFile: 0
|
||||
#用户白名单(hbase的namespace),获取存储配额
|
||||
users: default
|
||||
#是否打开限流,0:关闭,1:打开
|
||||
openRateLimiter: 0
|
||||
#限流每秒请求数
|
||||
rateLimiterQps: 20000
|
||||
#设置上传文件大小的最大值
|
||||
spring:
|
||||
servlet:
|
||||
multipart:
|
||||
max-file-size: 5GB
|
||||
max-request-size: 5GB
|
||||
#Prometheus参数
|
||||
application:
|
||||
name: HosServiceApplication
|
||||
#Prometheus参数
|
||||
management:
|
||||
endpoints:
|
||||
web:
|
||||
exposure:
|
||||
include: '*'
|
||||
metrics:
|
||||
tags:
|
||||
application: ${spring.application.name}
|
||||
@@ -1,21 +0,0 @@
|
||||
qgw.serverAddr=http://{{ vrrp_instance.default.virtual_ipaddress }}:9999
|
||||
hos.serverAddr=http://{{ vrrp_instance.oss.virtual_ipaddress }}:9098
|
||||
hos.token={{ hos_token }}
|
||||
kafka.server={{ groups.kafka[0] }}:9092
|
||||
#延迟时间,校验多少秒之前的文件,单位秒
|
||||
check.time.delay=180
|
||||
hos.traffic.buckets=traffic_policy_capture_file_bucket,traffic_rtp_file_bucket,traffic_http_file_bucket,traffic_eml_file_bucket
|
||||
kafka.traffic.topics=TRAFFIC-POLICY-CAPTURE-FILE-STREAM-RECORD,TRAFFIC-RTP-FILE-STREAM-RECORD,TRAFFIC-HTTP-FILE-STREAM-RECORD,TRAFFIC-EML-FILE-STREAM-RECORD
|
||||
kafka.troubleshooting.topic=TROUBLESHOOTING-FILE-STREAM-RECORD
|
||||
file.chunk.combiner.window.time=15000
|
||||
traffic.file.count=10
|
||||
threads=1
|
||||
max.threads=10
|
||||
print.out.interval=1000
|
||||
http.max.total=100
|
||||
http.default.max.per.route=100
|
||||
http.connect.timeout=5000
|
||||
http.connection.request.timeout=10000
|
||||
http.socket.timeout=-1
|
||||
hos.log.types=security_event,monitor_event,proxy_event,session_record,voip_record,assessment_event,transaction_record,troubleshooting
|
||||
hos.log.types.file.types.url.fields=security_event:http-http_response_body&http_request_body,pcap-packet_capture_file&rtp_pcap_path,eml-mail_eml_file;proxy_event:http-http_response_body&http_request_body;session_record:http-http_response_body&http_request_body,pcap-packet_capture_file&rtp_pcap_path,eml-mail_eml_file;voip_record:pcap-rtp_pcap_path;assessment_event:other-assessment_file;transaction_record:http-http_response_body&http_request_body,eml-mail_eml_file;monitor_event:http-http_response_body&http_request_body,pcap-packet_capture_file&rtp_pcap_path,eml-mail_eml_file
|
||||
Binary file not shown.
@@ -1,138 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
version="1.4"
|
||||
jar="galaxy-hos-util-$version.jar"
|
||||
|
||||
usage() {
|
||||
cat <<EOF
|
||||
|
||||
Usage: ./hosutil.sh [command] [-h] [options...]
|
||||
|
||||
Available commands:
|
||||
download Download individual or batch files
|
||||
upload Upload individual or batch files
|
||||
check Check file availability
|
||||
combiner Verify if the file-chunk-combiner data stream is correct
|
||||
version Print the version
|
||||
|
||||
Options for 'download' command:
|
||||
-b, --bucket The bucket to access.
|
||||
-d, --directory Directory to save files. If not exists, will be created. Default is ./download/.
|
||||
-k, --keys Files to download. Can be a single or multiple files separated by commas.
|
||||
-p, --prefix Prefix for batch downloading files based on file name.
|
||||
-s, --start_time Start time in UTC format (yyyyMMdd, yyyy-MM-dd, yyyyMMddHHmmss). Default is the previous day's time.
|
||||
-e, --end_time End time in UTC format (yyyyMMdd, yyyy-MM-dd, yyyyMMddHHmmss). Default is current time.
|
||||
-c, --count Number of files to download. Default is 1000, maximum is 100000.
|
||||
-t, --threads Number of threads. Default is 1, maximum is 10.
|
||||
|
||||
Options for 'upload' command:
|
||||
-b, --bucket The bucket to access.
|
||||
-d, --directory Directory where files to upload are located. Default is ./upload/.
|
||||
-t, --threads Number of threads. Default is 1, maximum is 10.
|
||||
|
||||
Options for 'check' command:
|
||||
-s, --start_time Start time in UTC format (yyyyMMdd, yyyy-MM-dd, yyyyMMddHHmmss). Default is the previous day's time.
|
||||
-e, --end_time End time in UTC format (yyyyMMdd, yyyy-MM-dd, yyyyMMddHHmmss). Default is current time.
|
||||
-c, --count Number of logs to evaluate. Default is 1000, maximum is 100000.
|
||||
-d, --data_center Specify the data centers to evaluate, separated by commas. If not specified, all data centers are evaluated.
|
||||
-l, --log_type Specify the logs to evaluate, separated by commas. If not specified, all logs are evaluated.
|
||||
Supported logs: security_event, monitor_event, proxy_event, session_record, voip_record, assessment_event, transaction_record, troubleshooting.
|
||||
-f, --file_type Specify file types. If not specified, all types are evaluated. Supported types: eml, http, pcap, other.
|
||||
Only session_record, security_event, monitor_event, transaction_record support multiple types.
|
||||
-t --threads Number of threads. Default is 1, maximum is 10.
|
||||
|
||||
Options for 'combiner' command:
|
||||
-j, --job Job to verify. Options: traffic, troubleshooting. Default is traffic.(Troubleshooting job removed in version 24.05)
|
||||
|
||||
EOF
|
||||
}
|
||||
|
||||
# 初始化默认值
|
||||
bucket=""
|
||||
directory=""
|
||||
keys=""
|
||||
prefix=""
|
||||
start_time=""
|
||||
end_time=""
|
||||
count=1000
|
||||
threads=1
|
||||
log_type=""
|
||||
file_type=""
|
||||
data_center=""
|
||||
job_name="traffic"
|
||||
|
||||
# 检查必填参数
|
||||
check_required() {
|
||||
case "$operation" in
|
||||
download|upload)
|
||||
if [ -z "$bucket" ]; then
|
||||
echo "Error: bucket is required for $operation."
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
# 对于其他操作,不需要检查特定参数
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# 下载函数
|
||||
download() {
|
||||
directory=${directory:-"./download/"}
|
||||
check_required
|
||||
java -jar $jar download $bucket $directory keys=$keys prefix=$prefix max_keys=$count time_range=$start_time/$end_time thread_num=$threads
|
||||
}
|
||||
|
||||
# 上传函数
|
||||
upload() {
|
||||
directory=${directory:-"./upload/"}
|
||||
check_required
|
||||
java -jar $jar upload $bucket $directory thread_num=$threads
|
||||
}
|
||||
|
||||
# 检查函数
|
||||
check() {
|
||||
java -jar $jar check data_center=$data_center log_type=$log_type file_type=$file_type max_logs=$count time_range=$start_time/$end_time thread_num=$threads
|
||||
}
|
||||
|
||||
# 合并器函数
|
||||
combiner() {
|
||||
java -jar $jar combiner $job_name
|
||||
}
|
||||
|
||||
# 主操作流程
|
||||
if [ $# -eq 0 ];then
|
||||
usage
|
||||
exit 0
|
||||
fi
|
||||
|
||||
operation=$1
|
||||
shift
|
||||
while getopts ":h:b:d:k:p:s:e:c:t:l:f:j:" opt; do
|
||||
case $opt in
|
||||
h) usage; exit 0 ;;
|
||||
b) bucket=$OPTARG ;;
|
||||
d) if [ "$operation" == "check" ]; then data_center=$OPTARG; else directory=$OPTARG; fi ;;
|
||||
k) keys=$OPTARG ;;
|
||||
p) prefix=$OPTARG ;;
|
||||
s) start_time=$OPTARG ;;
|
||||
e) end_Time=$OPTARG ;;
|
||||
c) count=$OPTARG ;;
|
||||
t) threads=$OPTARG ;;
|
||||
l) log_type=$OPTARG ;;
|
||||
f) file_type=$OPTARG ;;
|
||||
j) job_name=$OPTARG ;;
|
||||
\?) echo "Invalid option: -$OPTARG" >&2; usage; exit 1 ;;
|
||||
:) echo "Option -$OPTARG requires an argument" >&2; usage; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
case "$operation" in
|
||||
download) download ;;
|
||||
upload) upload ;;
|
||||
check) check ;;
|
||||
combiner) combiner ;;
|
||||
version) echo $version ;;
|
||||
*) usage; exit 1 ;;
|
||||
esac
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
MariaDB 数据初始化脚本
|
||||
@@ -0,0 +1 @@
|
||||
全局安装脚本管理
|
||||
Reference in New Issue
Block a user