94 lines
2.8 KiB
Plaintext
94 lines
2.8 KiB
Plaintext
|
|
sources:
|
||
|
|
kafka_source:
|
||
|
|
type: kafka
|
||
|
|
properties:
|
||
|
|
topic: TRAFFIC-SKETCH-METRIC
|
||
|
|
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
|
||
|
|
kafka.client.id: TRAFFIC-SKETCH-METRIC
|
||
|
|
kafka.session.timeout.ms: 60000
|
||
|
|
kafka.max.poll.records: 3000
|
||
|
|
kafka.max.partition.fetch.bytes: 31457280
|
||
|
|
kafka.security.protocol: SASL_PLAINTEXT
|
||
|
|
kafka.sasl.mechanism: PLAIN
|
||
|
|
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
|
||
|
|
kafka.group.id: etl_traffic_sketch_metric
|
||
|
|
kafka.auto.offset.reset: latest
|
||
|
|
kafka.compression.type: none
|
||
|
|
format: json
|
||
|
|
|
||
|
|
processing_pipelines:
|
||
|
|
etl_processor: # [object] Processing Pipeline
|
||
|
|
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
|
||
|
|
remove_fields:
|
||
|
|
output_fields:
|
||
|
|
functions: # [array of object] Function List
|
||
|
|
|
||
|
|
- function: FLATTEN
|
||
|
|
lookup_fields: [ fields,tags ]
|
||
|
|
output_fields: [ ]
|
||
|
|
parameters:
|
||
|
|
#prefix: ""
|
||
|
|
depth: 3
|
||
|
|
# delimiter: "."
|
||
|
|
|
||
|
|
- function: RENAME
|
||
|
|
lookup_fields: [ '' ]
|
||
|
|
output_fields: [ '' ]
|
||
|
|
filter:
|
||
|
|
parameters:
|
||
|
|
# parent_fields: [tags]
|
||
|
|
#rename_fields:
|
||
|
|
# tags: tags
|
||
|
|
rename_expression: key =string.replace_all(key,'tags.','');key =string.replace_all(key,'fields.','');return key;
|
||
|
|
|
||
|
|
- function: EVAL
|
||
|
|
output_fields: [ internal_ip ]
|
||
|
|
parameters:
|
||
|
|
value_expression: 'direction=Outbound? client_ip : server_ip'
|
||
|
|
- function: EVAL
|
||
|
|
output_fields: [ external_ip ]
|
||
|
|
parameters:
|
||
|
|
value_expression: 'direction=Outbound? server_ip : client_ip'
|
||
|
|
|
||
|
|
- function: UNIX_TIMESTAMP_CONVERTER
|
||
|
|
lookup_fields: [ timestamp_ms ]
|
||
|
|
output_fields: [ recv_time ]
|
||
|
|
parameters:
|
||
|
|
precision: seconds
|
||
|
|
|
||
|
|
- function: SNOWFLAKE_ID
|
||
|
|
lookup_fields: [ '' ]
|
||
|
|
output_fields: [ log_id ]
|
||
|
|
filter:
|
||
|
|
parameters:
|
||
|
|
data_center_id_num: 1
|
||
|
|
|
||
|
|
|
||
|
|
sinks:
|
||
|
|
clickhouse_sink:
|
||
|
|
type: clickhouse
|
||
|
|
properties:
|
||
|
|
host: "{{ clickhouse_servers }}"
|
||
|
|
table: tsg_galaxy_v3.traffic_sketch_metric_local
|
||
|
|
batch.size: 100000
|
||
|
|
batch.interval: 30s
|
||
|
|
connection.user: e54c9568586180eede1506eecf3574e9
|
||
|
|
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
|
||
|
|
|
||
|
|
|
||
|
|
application:
|
||
|
|
|
||
|
|
env: # [object] Environment Variables
|
||
|
|
name: etl_traffic_sketch_metric # [string] Job Name
|
||
|
|
shade.identifier: aes
|
||
|
|
pipeline:
|
||
|
|
object-reuse: true # [boolean] Object Reuse, default is false
|
||
|
|
topology:
|
||
|
|
- name: kafka_source
|
||
|
|
downstream: [etl_processor]
|
||
|
|
- name: etl_processor
|
||
|
|
downstream: [clickhouse_sink]
|
||
|
|
- name: clickhouse_sink
|
||
|
|
|
||
|
|
|