补充提交

This commit is contained in:
wangkuan
2024-10-21 18:18:48 +08:00
parent 28dac5f4da
commit 7daea6cb26

View File

@@ -0,0 +1,144 @@
sources:
kafka_source:
type: kafka
watermark_timestamp: timestamp_ms
watermark_timestamp_unit: ms
watermark_lag: 60000
properties:
topic: DOS-EVENT
kafka.bootstrap.servers: "{{ kafka_source_servers }}"
kafka.client.id: DOS-PROTECTION-RULE-METRIC
kafka.session.timeout.ms: 60000
kafka.max.poll.records: 3000
kafka.max.partition.fetch.bytes: 31457280
kafka.security.protocol: SASL_PLAINTEXT
kafka.ssl.keystore.location:
kafka.ssl.keystore.password:
kafka.ssl.truststore.location:
kafka.ssl.truststore.password:
kafka.ssl.key.password:
kafka.sasl.mechanism: PLAIN
kafka.sasl.jaas.config: 454f65ea6eef1256e3067104f82730e737b68959560966b811e7ff364116b03124917eb2b0f3596f14733aa29ebad9352644ce1a5c85991c6f01ba8a5e8f177a80bea937958aaa485c2acc2b475603495a23eb59f055e037c0b186acb22886bd0275ca91f1633441d9943e7962942252
kafka.buffer.memory:
kafka.group.id: dos_event_kafka_to_clickhouse-20231221
kafka.auto.offset.reset: latest
kafka.max.request.size:
kafka.compression.type: none
format: json
processing_pipelines:
pre_etl_processor: # [object] Processing Pipeline
type: projection
functions: # [array of object] Function List
- function: CURRENT_UNIX_TIMESTAMP
output_fields: [ recv_time ]
parameters:
precision: seconds
aggregate_processor:
type: aggregate
group_by_fields: [vsys_id,rule_uuid,server_ip,client_ip]
window_type: tumbling_event_time # tumbling_event_time,sliding_processing_time,sliding_event_time
window_size: 600
functions:
- function: NUMBER_SUM
lookup_fields: [ sessions ]
- function: NUMBER_SUM
lookup_fields: [ bytes ]
- function: NUMBER_SUM
lookup_fields: [ pkts ]
output_fields: [ packets ]
- function: FIRST_VALUE
lookup_fields: [ client_country ]
- function: FIRST_VALUE
lookup_fields: [ server_country ]
- function: NUMBER_MIN
lookup_fields: [ timestamp_ms ]
output_fields: [ start_timestamp_ms ]
- function: NUMBER_MIN
lookup_fields: [ recv_time ]
- function: NUMBER_MAX
lookup_fields: [ timestamp_ms ]
output_fields: [ end_timestamp_ms ]
- function: FIRST_VALUE
lookup_fields: [ duration ]
post_etl_processor: # [object] Processing Pipeline
type: projection
remove_fields:
output_fields:
functions: # [array of object] Function List
- function: UNIX_TIMESTAMP_CONVERTER
lookup_fields: [ end_timestamp_ms ]
output_fields: [ end_time ]
parameters:
precision: seconds
- function: UNIX_TIMESTAMP_CONVERTER
lookup_fields: [ start_timestamp_ms ]
output_fields: [ start_time ]
parameters:
precision: seconds
- function: EVAL
output_fields: [ duration ]
parameters:
value_expression: "((end_time-start_time) > 0)? (end_time-start_time) : (duration/1000)"
- function: EVAL
output_fields: [ end_time ]
parameters:
value_expression: start_time + duration
- function: EVAL
output_fields: [ session_rate ]
parameters:
value_expression: math.round((double(sessions) / duration )*100)/100.0
- function: EVAL
output_fields: [ packet_rate ]
parameters:
value_expression: math.round((double(packets) / duration ) *100)/100.0
- function: EVAL
output_fields: [ bit_rate ]
parameters:
value_expression: math.round((double((bytes*8)) / duration) *100)/100.0
- function: RENAME
parameters:
rename_fields:
client_ip: source_ip
client_country: source_country
server_ip: destination_ip
server_country: destination_country
- function: SNOWFLAKE_ID
lookup_fields: ['']
output_fields: [log_id]
parameters:
data_center_id_num: 1
sinks:
clickhouse_sink:
type: clickhouse
properties:
host: "{{ clickhouse_servers }}"
table: tsg_galaxy_v3.dos_event_local
batch.size: 100000
batch.interval: 30s
connection.user: e54c9568586180eede1506eecf3574e9
connection.password: 86cf0e2ffba3f541a6c6761313e5cc7e
application:
env:
name: dos_event_kafka_to_clickhouse
shade.identifier: aes
pipeline:
object-reuse: true # [boolean] Object Reuse, default is false
{{ topology }}
# topology:
# - name: kafka_source
# downstream: [pre_etl_processor]
# - name: pre_etl_processor
# downstream: [aggregate_processor]
# - name: aggregate_processor
# downstream: [post_etl_processor]
# - name: post_etl_processor
# downstream: [clickhouse_sink]
# - name: clickhouse_sink