TSG-21568 ETL任务不再填充server_domain
This commit is contained in:
@@ -73,11 +73,6 @@ processing_pipelines:
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: DOMAIN
|
||||
lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni]
|
||||
output_fields: [server_domain]
|
||||
parameters:
|
||||
option: FIRST_SIGNIFICANT_SUBDOMAIN
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
|
||||
@@ -73,12 +73,6 @@ processing_pipelines:
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: DOMAIN
|
||||
lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni]
|
||||
output_fields: [server_domain]
|
||||
parameters:
|
||||
option: FIRST_SIGNIFICANT_SUBDOMAIN
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
|
||||
@@ -77,12 +77,6 @@ processing_pipelines:
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: DOMAIN
|
||||
lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni]
|
||||
output_fields: [server_domain]
|
||||
parameters:
|
||||
option: FIRST_SIGNIFICANT_SUBDOMAIN
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
|
||||
@@ -73,11 +73,6 @@ processing_pipelines:
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: DOMAIN
|
||||
lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni]
|
||||
output_fields: [server_domain]
|
||||
parameters:
|
||||
option: FIRST_SIGNIFICANT_SUBDOMAIN
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
|
||||
@@ -3,7 +3,7 @@ sources:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: DOS-EVENT
|
||||
kafka.bootstrap.servers: 192.168.44.11:9094,192.168.44.13:9094,192.168.44.14:9094,192.168.44.15:9094,192.168.44.16:9094
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
@@ -27,7 +27,7 @@ sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.dos_event_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
|
||||
@@ -3,7 +3,7 @@ sources:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: DATAPATH-TELEMETRY-RECORD
|
||||
kafka.bootstrap.servers: 192.168.44.11:9094,192.168.44.13:9094,192.168.44.14:9094,192.168.44.15:9094,192.168.44.16:9094
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
@@ -46,7 +46,7 @@ sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.datapath_telemetry_record_local
|
||||
batch.size: 5000
|
||||
batch.interval: 30s
|
||||
|
||||
@@ -7,7 +7,7 @@ sources:
|
||||
# watermark_lag: 60 # [number] Watermark Lag, default is 60
|
||||
properties:
|
||||
topic: PROXY-EVENT
|
||||
kafka.bootstrap.servers: 192.168.44.11:9094,192.168.44.13:9094,192.168.44.14:9094,192.168.44.15:9094,192.168.44.16:9094
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
@@ -71,12 +71,6 @@ processing_pipelines:
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: DOMAIN
|
||||
lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni]
|
||||
output_fields: [server_domain]
|
||||
parameters:
|
||||
option: FIRST_SIGNIFICANT_SUBDOMAIN
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
@@ -123,7 +117,7 @@ sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.proxy_event_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
|
||||
@@ -7,7 +7,7 @@ sources:
|
||||
# watermark_lag: 60 # [number] Watermark Lag, default is 60
|
||||
properties:
|
||||
topic: SESSION-RECORD
|
||||
kafka.bootstrap.servers: 192.168.44.11:9094,192.168.44.13:9094,192.168.44.14:9094,192.168.44.15:9094,192.168.44.16:9094
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
@@ -71,12 +71,6 @@ processing_pipelines:
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: DOMAIN
|
||||
lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni]
|
||||
output_fields: [server_domain]
|
||||
parameters:
|
||||
option: FIRST_SIGNIFICANT_SUBDOMAIN
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
@@ -123,7 +117,7 @@ sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.session_record_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
|
||||
@@ -0,0 +1,92 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: TRAFFIC-SKETCH-METRIC
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817
|
||||
kafka.group.id: etl_traffic_sketch_metric
|
||||
kafka.auto.offset.reset: latest
|
||||
kafka.compression.type: none
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
remove_fields:
|
||||
output_fields:
|
||||
functions: # [array of object] Function List
|
||||
|
||||
- function: FLATTEN
|
||||
lookup_fields: [ fields,tags ]
|
||||
output_fields: [ ]
|
||||
parameters:
|
||||
#prefix: ""
|
||||
depth: 3
|
||||
# delimiter: "."
|
||||
|
||||
- function: RENAME
|
||||
lookup_fields: [ '' ]
|
||||
output_fields: [ '' ]
|
||||
filter:
|
||||
parameters:
|
||||
# parent_fields: [tags]
|
||||
#rename_fields:
|
||||
# tags: tags
|
||||
rename_expression: key =string.replace_all(key,'tags.','');key =string.replace_all(key,'fields.','');return key;
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ internal_ip ]
|
||||
parameters:
|
||||
value_expression: 'direction=Outbound? client_ip : server_ip'
|
||||
- function: EVAL
|
||||
output_fields: [ external_ip ]
|
||||
parameters:
|
||||
value_expression: 'direction=Outbound? server_ip : client_ip'
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [ timestamp_ms ]
|
||||
output_fields: [ recv_time ]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: [ '' ]
|
||||
output_fields: [ log_id ]
|
||||
filter:
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
|
||||
sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.traffic_sketch_metric_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
connection.user: e54c9568586180eede1506eecf3574e9
|
||||
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: etl_traffic_sketch_metric # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [clickhouse_sink]
|
||||
- name: clickhouse_sink
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ sources:
|
||||
# watermark_lag: 60 # [number] Watermark Lag, default is 60
|
||||
properties:
|
||||
topic: TRANSACTION-RECORD
|
||||
kafka.bootstrap.servers: 192.168.44.11:9094,192.168.44.13:9094,192.168.44.14:9094,192.168.44.15:9094,192.168.44.16:9094
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
@@ -71,12 +71,6 @@ processing_pipelines:
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: DOMAIN
|
||||
lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni]
|
||||
output_fields: [server_domain]
|
||||
parameters:
|
||||
option: FIRST_SIGNIFICANT_SUBDOMAIN
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
@@ -123,7 +117,7 @@ sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.transaction_record_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
|
||||
@@ -7,7 +7,7 @@ sources:
|
||||
# watermark_lag: 60 # [number] Watermark Lag, default is 60
|
||||
properties:
|
||||
topic: VOIP-CONVERSATION-RECORD
|
||||
kafka.bootstrap.servers: 192.168.44.11:9094,192.168.44.13:9094,192.168.44.14:9094,192.168.44.15:9094,192.168.44.16:9094
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
@@ -71,12 +71,6 @@ processing_pipelines:
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: DOMAIN
|
||||
lookup_fields: [http_host, ssl_sni, dtls_sni, quic_sni]
|
||||
output_fields: [server_domain]
|
||||
parameters:
|
||||
option: FIRST_SIGNIFICANT_SUBDOMAIN
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
@@ -123,7 +117,7 @@ sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: 192.168.44.13:9001,192.168.44.14:9001,192.168.44.15:9001,192.168.44.16:9001
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.voip_record_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: PROXY-EVENT
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817
|
||||
kafka.group.id: etl_proxy_event_kafka_to_ndc_kafka
|
||||
kafka.auto.offset.reset: latest
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
remove_fields:
|
||||
output_fields:
|
||||
properties:
|
||||
key: value
|
||||
functions: # [array of object] Function List
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [server_ip]
|
||||
output_fields: [server_asn]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: tsg_ip_asn
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [client_ip]
|
||||
output_fields: [client_asn]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: tsg_ip_asn
|
||||
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: ['']
|
||||
output_fields: [log_id]
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [data_center]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='data_center')][0].value
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [device_group]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='device_group')][0].value
|
||||
|
||||
- function: CURRENT_UNIX_TIMESTAMP
|
||||
output_fields: [processing_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [__timestamp]
|
||||
output_fields: [recv_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ingestion_time]
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
value_field: mail_subject
|
||||
charset_field: mail_subject_charset
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_attachment_name]
|
||||
parameters:
|
||||
value_field: mail_attachment_name
|
||||
charset_field: mail_attachment_name_charset
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [rtp_pcap_path]
|
||||
output_fields: [rtp_pcap_path]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_request_body]
|
||||
output_fields: [http_request_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_response_body]
|
||||
output_fields: [http_response_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [mail_eml_file]
|
||||
output_fields: [mail_eml_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [packet_capture_file]
|
||||
output_fields: [packet_capture_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
|
||||
|
||||
sinks:
|
||||
kafka_sink:
|
||||
type : kafka
|
||||
properties:
|
||||
topic: PROXY-EVENT-PROCESSED
|
||||
kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
|
||||
kafka.retries: 0
|
||||
kafka.linger.ms: 10
|
||||
kafka.request.timeout.ms: 30000
|
||||
kafka.batch.size: 262144
|
||||
kafka.buffer.memory: 134217728
|
||||
kafka.max.request.size: 10485760
|
||||
kafka.compression.type: snappy
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817
|
||||
format: json
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: etl_proxy_event_kafka_to_ndc_kafka # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [kafka_sink]
|
||||
- name: kafka_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,151 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: SESSION-RECORD
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817
|
||||
kafka.group.id: etl_session_record_kafka_to_ndc_kafka
|
||||
kafka.auto.offset.reset: latest
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
remove_fields:
|
||||
output_fields:
|
||||
properties:
|
||||
key: value
|
||||
functions: # [array of object] Function List
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [server_ip]
|
||||
output_fields: [server_asn]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: tsg_ip_asn
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [client_ip]
|
||||
output_fields: [client_asn]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: tsg_ip_asn
|
||||
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: ['']
|
||||
output_fields: [log_id]
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [data_center]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='data_center')][0].value
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [device_group]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='device_group')][0].value
|
||||
|
||||
- function: CURRENT_UNIX_TIMESTAMP
|
||||
output_fields: [processing_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [__timestamp]
|
||||
output_fields: [recv_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ingestion_time]
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
value_field: mail_subject
|
||||
charset_field: mail_subject_charset
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_attachment_name]
|
||||
parameters:
|
||||
value_field: mail_attachment_name
|
||||
charset_field: mail_attachment_name_charset
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [rtp_pcap_path]
|
||||
output_fields: [rtp_pcap_path]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_request_body]
|
||||
output_fields: [http_request_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_response_body]
|
||||
output_fields: [http_response_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [mail_eml_file]
|
||||
output_fields: [mail_eml_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [packet_capture_file]
|
||||
output_fields: [packet_capture_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
|
||||
|
||||
sinks:
|
||||
kafka_sink:
|
||||
type : kafka
|
||||
properties:
|
||||
topic: SESSION-RECORD-PROCESSED
|
||||
kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
|
||||
kafka.retries: 0
|
||||
kafka.linger.ms: 10
|
||||
kafka.request.timeout.ms: 30000
|
||||
kafka.batch.size: 262144
|
||||
kafka.buffer.memory: 134217728
|
||||
kafka.max.request.size: 10485760
|
||||
kafka.compression.type: snappy
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817
|
||||
format: json
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: etl_session_record_kafka_to_ndc_kafka # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [kafka_sink]
|
||||
- name: kafka_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,155 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
# fields: # [array of object] Field List, if not set, all fields(Map<String, Object>) will be output.
|
||||
# watermark_timestamp: common_recv_time # [string] Watermark Field Name
|
||||
# watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms
|
||||
# watermark_lag: 60 # [number] Watermark Lag, default is 60
|
||||
properties:
|
||||
topic: TRANSACTION-RECORD
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817
|
||||
kafka.group.id: etl_transaction_record_kafka_to_ndc_kafka
|
||||
kafka.auto.offset.reset: latest
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
remove_fields:
|
||||
output_fields:
|
||||
properties:
|
||||
key: value
|
||||
functions: # [array of object] Function List
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [server_ip]
|
||||
output_fields: [server_asn]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: tsg_ip_asn
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [client_ip]
|
||||
output_fields: [client_asn]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: tsg_ip_asn
|
||||
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: ['']
|
||||
output_fields: [log_id]
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [data_center]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='data_center')][0].value
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [device_group]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='device_group')][0].value
|
||||
|
||||
- function: CURRENT_UNIX_TIMESTAMP
|
||||
output_fields: [processing_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [__timestamp]
|
||||
output_fields: [recv_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ingestion_time]
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
value_field: mail_subject
|
||||
charset_field: mail_subject_charset
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_attachment_name]
|
||||
parameters:
|
||||
value_field: mail_attachment_name
|
||||
charset_field: mail_attachment_name_charset
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [rtp_pcap_path]
|
||||
output_fields: [rtp_pcap_path]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_request_body]
|
||||
output_fields: [http_request_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_response_body]
|
||||
output_fields: [http_response_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [mail_eml_file]
|
||||
output_fields: [mail_eml_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [packet_capture_file]
|
||||
output_fields: [packet_capture_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
|
||||
|
||||
sinks:
|
||||
kafka_sink:
|
||||
type : kafka
|
||||
properties:
|
||||
topic: TRANSACTION-RECORD-PROCESSED
|
||||
kafka.bootstrap.servers: "{{ kafka_sink_servers }}"
|
||||
kafka.retries: 0
|
||||
kafka.linger.ms: 10
|
||||
kafka.request.timeout.ms: 30000
|
||||
kafka.batch.size: 262144
|
||||
kafka.buffer.memory: 134217728
|
||||
kafka.max.request.size: 10485760
|
||||
kafka.compression.type: snappy
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817
|
||||
format: json
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: etl_transaction_record_kafka_to_ndc_kafka # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [kafka_sink]
|
||||
- name: kafka_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,118 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: VOIP-CONVERSATION-RECORD
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817
|
||||
kafka.group.id: etl_voip_record_kafka_to_clickhouse
|
||||
kafka.auto.offset.reset: latest
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
remove_fields:
|
||||
output_fields:
|
||||
properties:
|
||||
key: value
|
||||
functions: # [array of object] Function List
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [server_ip]
|
||||
output_fields: [server_asn]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: tsg_ip_asn
|
||||
|
||||
- function: ASN_LOOKUP
|
||||
lookup_fields: [client_ip]
|
||||
output_fields: [client_asn]
|
||||
parameters:
|
||||
option: IP_TO_ASN
|
||||
kb_name: tsg_ip_asn
|
||||
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: ['']
|
||||
output_fields: [log_id]
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [data_center]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='data_center')][0].value
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [device_group]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='device_group')][0].value
|
||||
|
||||
- function: CURRENT_UNIX_TIMESTAMP
|
||||
output_fields: [processing_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [__timestamp]
|
||||
output_fields: [recv_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ingestion_time]
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
value_field: mail_subject
|
||||
charset_field: mail_subject_charset
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_attachment_name]
|
||||
parameters:
|
||||
value_field: mail_attachment_name
|
||||
charset_field: mail_attachment_name_charset
|
||||
|
||||
|
||||
sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.voip_record_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
connection.user: e54c9568586180eede1506eecf3574e9
|
||||
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
connection.connect_timeout: 30
|
||||
connection.query_timeout: 300
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: etl_voip_record_kafka_to_clickhouse # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [clickhouse_sink]
|
||||
- name: clickhouse_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,142 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
# fields: # [array of object] Field List, if not set, all fields(Map<String, Object>) will be output.
|
||||
# watermark_timestamp: common_recv_time # [string] Watermark Field Name
|
||||
# watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms
|
||||
# watermark_lag: 60 # [number] Watermark Lag, default is 60
|
||||
properties:
|
||||
topic: PROXY-EVENT
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.ssl.keystore.location:
|
||||
kafka.ssl.keystore.password:
|
||||
kafka.ssl.truststore.location:
|
||||
kafka.ssl.truststore.password:
|
||||
kafka.ssl.key.password:
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817
|
||||
kafka.buffer.memory:
|
||||
kafka.group.id: etl_proxy_event_kafka_to_clickhouse-20231221
|
||||
kafka.auto.offset.reset: latest
|
||||
kafka.max.request.size:
|
||||
kafka.compression.type: none
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
remove_fields:
|
||||
output_fields:
|
||||
properties:
|
||||
key: value
|
||||
functions: # [array of object] Function List
|
||||
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: ['']
|
||||
output_fields: [log_id]
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [data_center]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='data_center')][0].value
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [device_group]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='device_group')][0].value
|
||||
|
||||
- function: CURRENT_UNIX_TIMESTAMP
|
||||
output_fields: [processing_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [__timestamp]
|
||||
output_fields: [recv_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ingestion_time]
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
value_field: mail_subject
|
||||
charset_field: mail_subject_charset
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_attachment_name]
|
||||
parameters:
|
||||
value_field: mail_attachment_name
|
||||
charset_field: mail_attachment_name_charset
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [rtp_pcap_path]
|
||||
output_fields: [rtp_pcap_path]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_request_body]
|
||||
output_fields: [http_request_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_response_body]
|
||||
output_fields: [http_response_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [mail_eml_file]
|
||||
output_fields: [mail_eml_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [packet_capture_file]
|
||||
output_fields: [packet_capture_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
|
||||
|
||||
sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.proxy_event_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
connection.user: e54c9568586180eede1506eecf3574e9
|
||||
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: etl_proxy_event_kafka_to_clickhouse # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [clickhouse_sink]
|
||||
- name: clickhouse_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,142 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
# fields: # [array of object] Field List, if not set, all fields(Map<String, Object>) will be output.
|
||||
# watermark_timestamp: common_recv_time # [string] Watermark Field Name
|
||||
# watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms
|
||||
# watermark_lag: 60 # [number] Watermark Lag, default is 60
|
||||
properties:
|
||||
topic: SESSION-RECORD
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.ssl.keystore.location:
|
||||
kafka.ssl.keystore.password:
|
||||
kafka.ssl.truststore.location:
|
||||
kafka.ssl.truststore.password:
|
||||
kafka.ssl.key.password:
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817
|
||||
kafka.buffer.memory:
|
||||
kafka.group.id: etl_session_record_kafka_to_clickhouse-20230125
|
||||
kafka.auto.offset.reset: latest
|
||||
kafka.max.request.size:
|
||||
kafka.compression.type: none
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
remove_fields:
|
||||
output_fields:
|
||||
properties:
|
||||
key: value
|
||||
functions: # [array of object] Function List
|
||||
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: ['']
|
||||
output_fields: [log_id]
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [data_center]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='data_center')][0].value
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [device_group]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='device_group')][0].value
|
||||
|
||||
- function: CURRENT_UNIX_TIMESTAMP
|
||||
output_fields: [processing_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [__timestamp]
|
||||
output_fields: [recv_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ingestion_time]
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
value_field: mail_subject
|
||||
charset_field: mail_subject_charset
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_attachment_name]
|
||||
parameters:
|
||||
value_field: mail_attachment_name
|
||||
charset_field: mail_attachment_name_charset
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [rtp_pcap_path]
|
||||
output_fields: [rtp_pcap_path]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_request_body]
|
||||
output_fields: [http_request_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_response_body]
|
||||
output_fields: [http_response_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [mail_eml_file]
|
||||
output_fields: [mail_eml_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [packet_capture_file]
|
||||
output_fields: [packet_capture_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
|
||||
|
||||
sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.session_record_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
connection.user: e54c9568586180eede1506eecf3574e9
|
||||
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: etl_session_record_kafka_to_clickhouse # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [clickhouse_sink]
|
||||
- name: clickhouse_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,92 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
properties:
|
||||
topic: TRAFFIC-SKETCH-METRIC
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817
|
||||
kafka.group.id: etl_traffic_sketch_metric
|
||||
kafka.auto.offset.reset: latest
|
||||
kafka.compression.type: none
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
remove_fields:
|
||||
output_fields:
|
||||
functions: # [array of object] Function List
|
||||
|
||||
- function: FLATTEN
|
||||
lookup_fields: [ fields,tags ]
|
||||
output_fields: [ ]
|
||||
parameters:
|
||||
#prefix: ""
|
||||
depth: 3
|
||||
# delimiter: "."
|
||||
|
||||
- function: RENAME
|
||||
lookup_fields: [ '' ]
|
||||
output_fields: [ '' ]
|
||||
filter:
|
||||
parameters:
|
||||
# parent_fields: [tags]
|
||||
#rename_fields:
|
||||
# tags: tags
|
||||
rename_expression: key =string.replace_all(key,'tags.','');key =string.replace_all(key,'fields.','');return key;
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ internal_ip ]
|
||||
parameters:
|
||||
value_expression: 'direction=Outbound? client_ip : server_ip'
|
||||
- function: EVAL
|
||||
output_fields: [ external_ip ]
|
||||
parameters:
|
||||
value_expression: 'direction=Outbound? server_ip : client_ip'
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [ timestamp_ms ]
|
||||
output_fields: [ recv_time ]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: [ '' ]
|
||||
output_fields: [ log_id ]
|
||||
filter:
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
|
||||
sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.traffic_sketch_metric_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
connection.user: e54c9568586180eede1506eecf3574e9
|
||||
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: etl_traffic_sketch_metric # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [clickhouse_sink]
|
||||
- name: clickhouse_sink
|
||||
|
||||
|
||||
@@ -0,0 +1,140 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
# fields: # [array of object] Field List, if not set, all fields(Map<String, Object>) will be output.
|
||||
# watermark_timestamp: common_recv_time # [string] Watermark Field Name
|
||||
# watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms
|
||||
# watermark_lag: 60 # [number] Watermark Lag, default is 60
|
||||
properties:
|
||||
topic: TRANSACTION-RECORD
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.ssl.keystore.location:
|
||||
kafka.ssl.keystore.password:
|
||||
kafka.ssl.truststore.location:
|
||||
kafka.ssl.truststore.password:
|
||||
kafka.ssl.key.password:
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817
|
||||
kafka.buffer.memory:
|
||||
kafka.group.id: etl_transaction_record_kafka_to_clickhouse-20240308
|
||||
kafka.auto.offset.reset: latest
|
||||
kafka.max.request.size:
|
||||
kafka.compression.type: none
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
remove_fields:
|
||||
output_fields:
|
||||
properties:
|
||||
key: value
|
||||
functions: # [array of object] Function List
|
||||
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: ['']
|
||||
output_fields: [log_id]
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [data_center]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='data_center')][0].value
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [device_group]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='device_group')][0].value
|
||||
|
||||
- function: CURRENT_UNIX_TIMESTAMP
|
||||
output_fields: [processing_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [__timestamp]
|
||||
output_fields: [recv_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ingestion_time]
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
value_field: mail_subject
|
||||
charset_field: mail_subject_charset
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_attachment_name]
|
||||
parameters:
|
||||
value_field: mail_attachment_name
|
||||
charset_field: mail_attachment_name_charset
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [rtp_pcap_path]
|
||||
output_fields: [rtp_pcap_path]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_request_body]
|
||||
output_fields: [http_request_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_response_body]
|
||||
output_fields: [http_response_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [mail_eml_file]
|
||||
output_fields: [mail_eml_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [packet_capture_file]
|
||||
output_fields: [packet_capture_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
|
||||
|
||||
sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.transaction_record_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
connection.user: e54c9568586180eede1506eecf3574e9
|
||||
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: etl_transaction_record_kafka_to_clickhouse # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [clickhouse_sink]
|
||||
- name: clickhouse_sink
|
||||
@@ -0,0 +1,142 @@
|
||||
sources:
|
||||
kafka_source:
|
||||
type: kafka
|
||||
# fields: # [array of object] Field List, if not set, all fields(Map<String, Object>) will be output.
|
||||
# watermark_timestamp: common_recv_time # [string] Watermark Field Name
|
||||
# watermark_timestamp_unit: ms # [string] Watermark Unit, default is ms
|
||||
# watermark_lag: 60 # [number] Watermark Lag, default is 60
|
||||
properties:
|
||||
topic: VOIP-CONVERSATION-RECORD
|
||||
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||||
kafka.session.timeout.ms: 60000
|
||||
kafka.max.poll.records: 3000
|
||||
kafka.max.partition.fetch.bytes: 31457280
|
||||
kafka.security.protocol: SASL_PLAINTEXT
|
||||
kafka.ssl.keystore.location:
|
||||
kafka.ssl.keystore.password:
|
||||
kafka.ssl.truststore.location:
|
||||
kafka.ssl.truststore.password:
|
||||
kafka.ssl.key.password:
|
||||
kafka.sasl.mechanism: PLAIN
|
||||
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a7ff0b2d3889a424249967b3870b50993d9644f239f0de82cdb13bdb502959e16afadffa49ef1e1d2b9c9b5113e619817
|
||||
kafka.buffer.memory:
|
||||
kafka.group.id: etl_voip_record_kafka_to_clickhouse-20231221
|
||||
kafka.auto.offset.reset: latest
|
||||
kafka.max.request.size:
|
||||
kafka.compression.type: none
|
||||
format: json
|
||||
|
||||
processing_pipelines:
|
||||
etl_processor: # [object] Processing Pipeline
|
||||
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||||
remove_fields:
|
||||
output_fields:
|
||||
properties:
|
||||
key: value
|
||||
functions: # [array of object] Function List
|
||||
|
||||
- function: SNOWFLAKE_ID
|
||||
lookup_fields: ['']
|
||||
output_fields: [log_id]
|
||||
parameters:
|
||||
data_center_id_num: 1
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [data_center]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='data_center')][0].value
|
||||
|
||||
- function: JSON_EXTRACT
|
||||
lookup_fields: [device_tag]
|
||||
output_fields: [device_group]
|
||||
filter:
|
||||
parameters:
|
||||
value_expression: $.tags[?(@.tag=='device_group')][0].value
|
||||
|
||||
- function: CURRENT_UNIX_TIMESTAMP
|
||||
output_fields: [processing_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: UNIX_TIMESTAMP_CONVERTER
|
||||
lookup_fields: [__timestamp]
|
||||
output_fields: [recv_time]
|
||||
parameters:
|
||||
precision: seconds
|
||||
|
||||
- function: EVAL
|
||||
output_fields: [ingestion_time]
|
||||
parameters:
|
||||
value_expression: recv_time
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_subject]
|
||||
parameters:
|
||||
value_field: mail_subject
|
||||
charset_field: mail_subject_charset
|
||||
|
||||
- function: BASE64_DECODE_TO_STRING
|
||||
output_fields: [mail_attachment_name]
|
||||
parameters:
|
||||
value_field: mail_attachment_name
|
||||
charset_field: mail_attachment_name_charset
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [rtp_pcap_path]
|
||||
output_fields: [rtp_pcap_path]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, rtp_pcap_path]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_request_body]
|
||||
output_fields: [http_request_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_request_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [http_response_body]
|
||||
output_fields: [http_response_body]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, http_response_body]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [mail_eml_file]
|
||||
output_fields: [mail_eml_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, mail_eml_file]
|
||||
|
||||
- function: PATH_COMBINE
|
||||
lookup_fields: [packet_capture_file]
|
||||
output_fields: [packet_capture_file]
|
||||
parameters:
|
||||
path: [props.hos.path, props.hos.bucket.name.traffic_file, packet_capture_file]
|
||||
|
||||
sinks:
|
||||
clickhouse_sink:
|
||||
type: clickhouse
|
||||
properties:
|
||||
host: "{{ clickhouse_servers }}"
|
||||
table: tsg_galaxy_v3.voip_record_local
|
||||
batch.size: 100000
|
||||
batch.interval: 30s
|
||||
connection.user: e54c9568586180eede1506eecf3574e9
|
||||
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||||
|
||||
|
||||
application:
|
||||
|
||||
env: # [object] Environment Variables
|
||||
name: etl_voip_record_kafka_to_clickhouse # [string] Job Name
|
||||
shade.identifier: aes
|
||||
pipeline:
|
||||
object-reuse: true # [boolean] Object Reuse, default is false
|
||||
topology:
|
||||
- name: kafka_source
|
||||
downstream: [etl_processor]
|
||||
- name: etl_processor
|
||||
downstream: [clickhouse_sink]
|
||||
- name: clickhouse_sink
|
||||
|
||||
|
||||
Reference in New Issue
Block a user