sources: kafka_source: type: kafka properties: topic: TRAFFIC-SKETCH-METRIC kafka.bootstrap.servers: "{{ kafka_source_servers }}" kafka.client.id: TRAFFIC-SKETCH-METRIC kafka.session.timeout.ms: 60000 kafka.max.poll.records: 3000 kafka.max.partition.fetch.bytes: 31457280 kafka.security.protocol: SASL_PLAINTEXT kafka.sasl.mechanism: PLAIN kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252 kafka.group.id: etl_traffic_sketch_metric kafka.auto.offset.reset: latest kafka.compression.type: none format: json processing_pipelines: etl_processor: # [object] Processing Pipeline type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl remove_fields: output_fields: functions: # [array of object] Function List - function: FLATTEN lookup_fields: [ fields,tags ] output_fields: [ ] parameters: #prefix: "" depth: 3 # delimiter: "." - function: RENAME lookup_fields: [ '' ] output_fields: [ '' ] filter: parameters: # parent_fields: [tags] #rename_fields: # tags: tags rename_expression: key =string.replace_all(key,'tags.','');key =string.replace_all(key,'fields.','');return key; - function: EVAL output_fields: [ internal_ip ] parameters: value_expression: 'direction=Outbound? client_ip : server_ip' - function: EVAL output_fields: [ external_ip ] parameters: value_expression: 'direction=Outbound? server_ip : client_ip' - function: UNIX_TIMESTAMP_CONVERTER lookup_fields: [ timestamp_ms ] output_fields: [ recv_time ] parameters: precision: seconds - function: SNOWFLAKE_ID lookup_fields: [ '' ] output_fields: [ log_id ] filter: parameters: data_center_id_num: 1 sinks: clickhouse_sink: type: clickhouse properties: host: "{{ clickhouse_servers }}" table: tsg_galaxy_v3.traffic_sketch_metric_local batch.size: 100000 batch.interval: 30s connection.user: e54c9568586180eede1506eecf3574e9 connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e application: env: # [object] Environment Variables name: etl_traffic_sketch_metric # [string] Job Name shade.identifier: aes pipeline: object-reuse: true # [boolean] Object Reuse, default is false topology: - name: kafka_source downstream: [etl_processor] - name: etl_processor downstream: [clickhouse_sink] - name: clickhouse_sink