CN Groot-Stream 配置模版

This commit is contained in:
gujinkai
2024-04-09 11:08:01 +08:00
parent 4461905788
commit 9d6260ba68
9 changed files with 456 additions and 32 deletions

View File

@@ -0,0 +1,2 @@
v1.2.2 (2024-04-08)
https://git.mesalab.cn/galaxy/platform/groot-stream/-/releases/v1.2.2

View File

@@ -29,24 +29,6 @@ processing_pipelines:
remove_fields: remove_fields:
output_fields: output_fields:
functions: # [array of object] Function List functions: # [array of object] Function List
- function: SNOWFLAKE_ID
lookup_fields: [ '' ]
output_fields: [ log_id ]
filter:
parameters:
data_center_id_num: 1
- function: UNIX_TIMESTAMP_CONVERTER
lookup_fields: [ __timestamp ]
output_fields: [ recv_time ]
parameters:
precision: seconds
- function: EVAL
output_fields: [ ingestion_time ]
parameters:
value_expression: recv_time
- function: EVAL - function: EVAL
output_fields: [ domain ] output_fields: [ domain ]
parameters: parameters:

View File

@@ -103,6 +103,4 @@ grootstream:
token: 1a653ea0-d39b-4246-94b0-1ba95db4b6a7 token: 1a653ea0-d39b-4246-94b0-1ba95db4b6a7
properties: properties:
hos.path: http://192.168.44.12:8089 scheduler.knowledge_base.update.interval.minutes: 5
hos.bucket.name.traffic_file: traffic_file_bucket
hos.bucket.name.troubleshooting_file: troubleshooting_file_bucket

View File

@@ -1,18 +1,7 @@
com.geedgenetworks.core.udf.AsnLookup com.geedgenetworks.core.udf.AsnLookup
com.geedgenetworks.core.udf.CurrentUnixTimestamp
com.geedgenetworks.core.udf.DecodeBase64
com.geedgenetworks.core.udf.Domain
com.geedgenetworks.core.udf.Drop
com.geedgenetworks.core.udf.Eval com.geedgenetworks.core.udf.Eval
com.geedgenetworks.core.udf.FromUnixTimestamp
com.geedgenetworks.core.udf.GenerateStringArray com.geedgenetworks.core.udf.GenerateStringArray
com.geedgenetworks.core.udf.GeoIpLookup com.geedgenetworks.core.udf.GeoIpLookup
com.geedgenetworks.core.udf.JsonExtract
com.geedgenetworks.core.udf.PathCombine
com.geedgenetworks.core.udf.Rename
com.geedgenetworks.core.udf.SnowflakeId
com.geedgenetworks.core.udf.StringJoiner
com.geedgenetworks.core.udf.UnixTimestampConverter
com.geedgenetworks.core.udf.cn.L7ProtocolAndAppExtract com.geedgenetworks.core.udf.cn.L7ProtocolAndAppExtract
com.geedgenetworks.core.udf.cn.IdcRenterLookup com.geedgenetworks.core.udf.cn.IdcRenterLookup
com.geedgenetworks.core.udf.cn.LinkDirectionLookup com.geedgenetworks.core.udf.cn.LinkDirectionLookup

View File

@@ -0,0 +1,2 @@
v1.2.2 (2024-04-08)
https://git.mesalab.cn/galaxy/platform/groot-stream/-/releases/v1.2.2

View File

@@ -0,0 +1,326 @@
sources:
kafka_source:
type: kafka
# fields: # [array of object] Field List, if not set, all fields(Map<String, Object>) will be output.
properties: # [object] Source Properties
topic: SESSION-RECORD
kafka.bootstrap.servers: 192.168.44.11:9094,192.168.44.13:9094,192.168.44.14:9094,192.168.44.15:9094,192.168.44.16:9094
kafka.session.timeout.ms: 60000
kafka.max.poll.records: 3000
kafka.max.partition.fetch.bytes: 31457280
kafka.security.protocol: SASL_PLAINTEXT
kafka.ssl.keystore.location:
kafka.ssl.keystore.password:
kafka.ssl.truststore.location:
kafka.ssl.truststore.password:
kafka.ssl.key.password:
kafka.sasl.mechanism: PLAIN
kafka.sasl.jaas.config: org.apache.kafka.common.security.plain.PlainLoginModule required username="admin" password="galaxy2019";
kafka.buffer.memory:
kafka.group.id: 44.55-test
kafka.auto.offset.reset: latest
kafka.max.request.size:
kafka.compression.type: none
format: json # [string] Data Format, default is json
processing_pipelines:
session_record_processor: # [object] Processing Pipeline
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
remove_fields:
output_fields:
functions: # [array of object] Function List
- function: EVAL
output_fields: [ domain ]
parameters:
value_expression: server_fqdn
- function: EVAL
output_fields: [ domain_sld ]
parameters:
value_expression: server_domain
- function: CN_L7_PROTOCOL_AND_APP_EXTRACT
parameters:
decoded_path_field_name: decoded_path
app_transition_field_name: app_transition
l7_protocol_field_name: l7_protocol
app_field_name: app
l7_protocol: DHCP,DNS,FTP,GRE,GTP,HTTP,HTTPS,ICMP,IMAP,IMAPS,IPSEC,ISAKMP,XMPP,L2TP,LDAP,MMS,NETBIOS,NETFLOW,NTP,POP3,POP3S,RDP,PPTP,RADIUS,RTCP,RTP,RTSP,SIP,SMB,SMTP,SMTPS,SNMP,SSDP,SSH,SSL,STUN,TELNET,TFTP,OPENVPN,RTMP,TEREDO,FTPS,DTLS,SPDY,BJNP,QUIC,MDNS,Unknown TCP,Unknown UDP,Unknown Other,IKE,MAIL,SOCKS,DoH,SLP,SSL with ESNI,ISATAP,Stratum,SSL with ECH
- function: GEOIP_LOOKUP
lookup_fields: [ client_ip ]
output_fields: [ ]
parameters:
kb_name: cn_ip_location
option: IP_TO_OBJECT
geolocation_field_mapping:
COUNTRY: client_country_region
PROVINCE: client_super_admin_area
CITY: client_admin_area
LONGITUDE: client_longitude
LATITUDE: client_latitude
ISP: client_isp
- function: GEOIP_LOOKUP
lookup_fields: [ server_ip ]
output_fields: [ ]
parameters:
kb_name: cn_ip_location
option: IP_TO_OBJECT
geolocation_field_mapping:
COUNTRY: server_country_region
PROVINCE: server_super_admin_area
CITY: server_admin_area
LONGITUDE: server_longitude
LATITUDE: server_latitude
ISP: server_isp
- function: ASN_LOOKUP
lookup_fields: [ client_ip ]
output_fields: [ client_asn ]
parameters:
option: IP_TO_ASN
kb_name: cn_ip_asn
- function: ASN_LOOKUP
lookup_fields: [ server_ip ]
output_fields: [ server_asn ]
parameters:
option: IP_TO_ASN
kb_name: cn_ip_asn
- function: CN_IDC_RENTER_LOOKUP
lookup_fields: [ client_ip ]
output_fields: [ client_idc_renter ]
parameters:
kb_name: cn_idc_renter
- function: CN_IDC_RENTER_LOOKUP
lookup_fields: [ server_ip ]
output_fields: [ server_idc_renter ]
parameters:
kb_name: cn_idc_renter
- function: CN_LINK_DIRECTION_LOOKUP
lookup_fields: [ in_link_id ]
output_fields: [ in_link_direction ]
parameters:
kb_name: cn_link_direction
- function: CN_LINK_DIRECTION_LOOKUP
lookup_fields: [ out_link_id ]
output_fields: [ out_link_direction ]
parameters:
kb_name: cn_link_direction
- function: CN_FQDN_CATEGORY_LOOKUP
lookup_fields: [ domain ]
parameters:
kb_name: cn_fqdn_category
field_mapping:
NAME: domain_category_name
GROUP: domain_category_group
REPUTATION_LEVEL: domain_reputation_level
- function: CN_ICP_LOOKUP
lookup_fields: [ domain ]
output_fields: [ domain_icp_company_name ]
parameters:
kb_name: cn_fqdn_icp
- function: CN_FQDN_WHOIS_LOOKUP
lookup_fields: [ domain ]
output_fields: [ domain_whois_org ]
parameters:
kb_name: cn_fqdn_whois
- function: CN_DNS_SERVER_LOOKUP
lookup_fields: [ server_ip ]
output_fields: [ server_dns_server ]
parameters:
kb_name: cn_dns_server
- function: CN_APP_CATEGORY_LOOKUP
lookup_fields: [ app ]
parameters:
kb_name: cn_app_category
field_mapping:
CATEGORY: app_category
SUBCATEGORY: app_subcategory
COMPANY: app_company
COMPANY_CATEGORY: app_company_category
- function: EVAL
output_fields: [ client_zone ]
parameters:
value_expression: "flags & 8 == 8 ? 'internal' : 'external'"
- function: EVAL
output_fields: [ server_zone ]
parameters:
value_expression: "flags & 16 == 16 ? 'internal' : 'external'"
- function: CN_IP_ZONE_LOOKUP
lookup_fields: [ client_ip ]
output_fields: [ client_zone ]
parameters:
kb_name: none
#kb_name: cn_internal_ip
- function: CN_IP_ZONE_LOOKUP
lookup_fields: [ server_ip ]
output_fields: [ server_zone ]
parameters:
kb_name: none
#kb_name: cn_internal_ip
- function: CN_VPN_LOOKUP
lookup_fields: [ server_ip ]
output_fields: [ server_vpn_service_name ]
parameters:
kb_name: cn_vpn_learning_ip
option: IP_TO_VPN
- function: CN_VPN_LOOKUP
lookup_fields: [ domain ]
output_fields: [ domain_vpn_service_name ]
parameters:
kb_name: cn_vpn_learning_domain
option: DOMAIN_TO_VPN
- function: CN_ANONYMITY_LOOKUP
lookup_fields: [ server_ip ]
output_fields: [ server_node_type ]
parameters:
kb_name: cn_ioc_darkweb
option: IP_TO_NODE_TYPE
- function: CN_ANONYMITY_LOOKUP
lookup_fields: [ domain ]
output_fields: [ domain_node_type ]
parameters:
kb_name: cn_ioc_darkweb
option: DOMAIN_TO_NODE_TYPE
- function: CN_IOC_LOOKUP
lookup_fields: [ server_ip ]
output_fields: [ server_malware ]
parameters:
kb_name: cn_ioc_malware
option: IP_TO_MALWARE
- function: CN_IOC_LOOKUP
lookup_fields: [ domain ]
output_fields: [ domain_malware ]
parameters:
kb_name: cn_ioc_malware
option: DOMAIN_TO_MALWARE
- function: CN_USER_DEFINE_TAG_LOOKUP
lookup_fields: [ client_ip ]
output_fields: [ client_ip_tags ]
parameters:
kb_name: cn_ip_tag_user_define
option: IP_TO_TAG
- function: CN_USER_DEFINE_TAG_LOOKUP
lookup_fields: [ server_ip ]
output_fields: [ server_ip_tags ]
parameters:
kb_name: cn_ip_tag_user_define
option: IP_TO_TAG
- function: CN_USER_DEFINE_TAG_LOOKUP
lookup_fields: [ domain ]
output_fields: [ domain_tags ]
parameters:
kb_name: cn_domain_tag_user_define
option: DOMAIN_TO_TAG
- function: CN_USER_DEFINE_TAG_LOOKUP
lookup_fields: [ app ]
output_fields: [ app_tags ]
parameters:
kb_name: cn_app_tag_user_define
option: APP_TO_TAG
- function: GENERATE_STRING_ARRAY
lookup_fields: [ client_idc_renter,client_ip_tags ]
output_fields: [ client_ip_tags ]
- function: GENERATE_STRING_ARRAY
lookup_fields: [ server_idc_renter,server_dns_server,server_node_type,server_malware,server_vpn_service_name,server_ip_tags ]
output_fields: [ server_ip_tags ]
- function: GENERATE_STRING_ARRAY
lookup_fields: [ domain_node_type,domain_malware,domain_vpn_service_name,domain_tags ]
output_fields: [ domain_tags ]
- function: CN_ARRAY_ELEMENTS_PREPEND
lookup_fields: [ client_ip_tags ]
output_fields: [ client_ip_tags ]
parameters:
prefix: ip.
- function: CN_ARRAY_ELEMENTS_PREPEND
lookup_fields: [ server_ip_tags ]
output_fields: [ server_ip_tags ]
parameters:
prefix: ip.
- function: CN_ARRAY_ELEMENTS_PREPEND
lookup_fields: [ domain_tags ]
output_fields: [ domain_tags ]
parameters:
prefix: domain.
- function: CN_ARRAY_ELEMENTS_PREPEND
lookup_fields: [ app_tags ]
output_fields: [ app_tags ]
parameters:
prefix: app.
postprocessing_pipelines:
remove_field_processor: # [object] Processing Pipeline
type: com.geedgenetworks.core.processor.projection.ProjectionProcessorImpl
output_fields: [ recv_time,log_id,flags,start_timestamp_ms,end_timestamp_ms,duration_ms,decoded_as,client_ip,server_ip,client_port,server_port,app,app_transition,decoded_path,ip_protocol,l7_protocol,out_link_id,in_link_id,subscriber_id,imei,imsi,phone_number,apn,http_url,dns_rcode,dns_qname,dns_qtype,dns_rr,out_link_direction,in_link_direction,server_fqdn,server_domain,domain,domain_sld,domain_category_name,domain_category_group,domain_reputation_level,domain_icp_company_name,domain_whois_org,domain_tags,client_zone,client_country_region,client_super_admin_area,client_admin_area,client_longitude,client_latitude,client_isp,client_asn,client_ip_tags,server_zone,server_country_region,server_super_admin_area,server_admin_area,server_longitude,server_latitude,server_isp,server_asn,server_ip_tags,app_category,app_subcategory,app_company,app_company_category,app_tags,sent_pkts,sent_bytes,received_pkts,received_bytes,sessions,tcp_c2s_lost_bytes,tcp_s2c_lost_bytes,tcp_c2s_o3_pkts,tcp_s2c_o3_pkts,tcp_c2s_rtx_bytes,tcp_s2c_rtx_bytes,tcp_c2s_rtx_pkts,tcp_s2c_rtx_pkts,tcp_rtt_ms,http_response_latency_ms,ssl_handshake_latency_ms,dns_response_latency_ms,cn_internal_rule_id_list,cn_internal_ioc_type_list ]
sinks:
kafka_sink_a:
type: kafka
properties:
topic: SESSION-RECORD-CN
kafka.bootstrap.servers: 192.168.44.55:9092
kafka.retries: 0
kafka.linger.ms: 10
kafka.request.timeout.ms: 30000
kafka.batch.size: 262144
kafka.buffer.memory: 134217728
kafka.max.request.size: 10485760
kafka.compression.type: snappy
kafka.security.protocol:
kafka.ssl.keystore.location:
kafka.ssl.keystore.password:
kafka.ssl.truststore.location:
kafka.ssl.truststore.password:
kafka.ssl.key.password:
kafka.sasl.mechanism:
kafka.sasl.jaas.config:
format: json
application: # [object] Application Configuration
env: # [object] Environment Variables
name: groot-stream-job # [string] Job Name
parallelism: 3 # [number] Job-Level Parallelism
pipeline:
object-reuse: true # [boolean] Object Reuse, default is false
topology: # [array of object] Node List. It will be used build data flow for job dag graph.
- name: kafka_source # [string] Node Name, must be unique. It will be used as the name of the corresponding Flink operator. eg. kafka_source the processor type as SOURCE.
#parallelism: 1 # [number] Operator-Level Parallelism.
downstream: [ session_record_processor ] # [array of string] Downstream Node Name List.
- name: session_record_processor
downstream: [ remove_field_processor ]
- name: remove_field_processor
downstream: [ kafka_sink_a ]
- name: kafka_sink_a
downstream: [ ]

View File

@@ -0,0 +1,106 @@
grootstream:
knowledge_base:
- name: cn_ip_location
fs_type: http
fs_path: http://192.168.44.55:9999/v1/knowledge_base
files:
- 1
- name: cn_ip_asn
fs_type: http
fs_path: http://192.168.44.55:9999/v1/knowledge_base
files:
- 2
- name: cn_idc_renter
fs_type: http
fs_path: http://192.168.44.55:9999/v1/knowledge_base
files:
- 11
- name: cn_link_direction
fs_type: http
fs_path: http://192.168.44.55:9999/v1/knowledge_base
files:
- 13
- name: cn_fqdn_category
fs_type: http
fs_path: http://192.168.44.55:9999/v1/knowledge_base
files:
- 5
- name: cn_fqdn_icp
fs_type: http
fs_path: http://192.168.44.55:9999/v1/knowledge_base
files:
- 4
- name: cn_fqdn_whois
fs_type: http
fs_path: http://192.168.44.55:9999/v1/knowledge_base
files:
- 6
- name: cn_dns_server
fs_type: http
fs_path: http://192.168.44.55:9999/v1/knowledge_base
files:
- 3
- name: cn_app_category
fs_type: http
fs_path: http://192.168.44.55:9999/v1/knowledge_base
files:
- 9
- name: cn_internal_ip
fs_type: http
fs_path: http://192.168.44.55:9999/v1/knowledge_base
files:
- 12
- name: cn_vpn_learning_ip
fs_type: http
fs_path: http://192.168.44.55:9999/v1/knowledge_base
files:
- 15
- name: cn_vpn_learning_domain
fs_type: http
fs_path: http://192.168.44.55:9999/v1/knowledge_base
files:
- 14
- name: cn_ioc_darkweb
fs_type: http
fs_path: http://192.168.44.55:9999/v1/knowledge_base
files:
- 8
- name: cn_ioc_malware
fs_type: http
fs_path: http://192.168.44.55:9999/v1/knowledge_base
files:
- 7
- name: cn_ip_tag_user_define
fs_type: http
fs_path: http://192.168.44.55:9999/v1/knowledge_base?category=cn_ip_tag_user_defined
- name: cn_domain_tag_user_define
fs_type: http
fs_path: http://192.168.44.55:9999/v1/knowledge_base?category=cn_domain_tag_user_defined
- name: cn_app_tag_user_define
fs_type: http
fs_path: http://192.168.44.55:9999/v1/knowledge_base?category=cn_app_tag_user_defined
- name: cn_rule
fs_type: http
fs_path: http://192.168.44.54:8090
properties:
token: 1a653ea0-d39b-4246-94b0-1ba95db4b6a7
properties:
scheduler.knowledge_base.update.interval.minutes: 5

View File

@@ -0,0 +1,19 @@
com.geedgenetworks.core.udf.AsnLookup
com.geedgenetworks.core.udf.Eval
com.geedgenetworks.core.udf.GenerateStringArray
com.geedgenetworks.core.udf.GeoIpLookup
com.geedgenetworks.core.udf.cn.L7ProtocolAndAppExtract
com.geedgenetworks.core.udf.cn.IdcRenterLookup
com.geedgenetworks.core.udf.cn.LinkDirectionLookup
com.geedgenetworks.core.udf.cn.FqdnCategoryLookup
com.geedgenetworks.core.udf.cn.IcpLookup
com.geedgenetworks.core.udf.cn.FqdnWhoisLookup
com.geedgenetworks.core.udf.cn.DnsServerInfoLookup
com.geedgenetworks.core.udf.cn.AppCategoryLookup
com.geedgenetworks.core.udf.cn.IpZoneLookup
com.geedgenetworks.core.udf.cn.VpnLookup
com.geedgenetworks.core.udf.cn.AnonymityLookup
com.geedgenetworks.core.udf.cn.IocLookup
com.geedgenetworks.core.udf.cn.UserDefineTagLookup
com.geedgenetworks.core.udf.cn.FieldsMerge
com.geedgenetworks.core.udf.cn.ArrayElementsPrepend