diff --git a/ip-learning-spark/src/main/scala/cn/ac/iie/dao/BaseClickhouseData.scala b/ip-learning-spark/src/main/scala/cn/ac/iie/dao/BaseClickhouseData.scala index 51ac9e6..b3e3c6c 100644 --- a/ip-learning-spark/src/main/scala/cn/ac/iie/dao/BaseClickhouseData.scala +++ b/ip-learning-spark/src/main/scala/cn/ac/iie/dao/BaseClickhouseData.scala @@ -99,16 +99,16 @@ object BaseClickhouseData { val sql = s""" |(SELECT * FROM - |((SELECT ssl_sni AS FQDN,server_ip AS destination_ip,MAX(recv_time) AS LAST_FOUND_TIME,MIN(recv_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL, - |toString(groupUniqArray(${ApplicationConfig.DISTINCT_CLIENT_IP_NUM})(source_ip)) AS DIST_CIP_RECENT,'TLS' AS decoded_as_list, vsys_id AS VSYS_ID + |(SELECT ssl_sni AS FQDN,destination_ip AS server_ip, MAX(recv_time) AS LAST_FOUND_TIME,MIN(recv_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL, + |toString(groupUniqArray(${ApplicationConfig.DISTINCT_CLIENT_IP_NUM})(source_ip)) AS DIST_CIP_RECENT, 'TLS' AS decoded_as_list, vsys_id AS VSYS_ID |FROM ${ApplicationConfig.SPARK_READ_CLICKHOUSE_SESSION_TABLE} - |WHERE $where and decoded_as = 'SSL' GROUP BY ssl_sni,server_ip,vsys_id) + |WHERE $where and decoded_as = 'SSL' and notEmpty(ssl_sni) and notEmpty(destination_ip) and vsys_id IS NOT NULL GROUP BY ssl_sni,destination_ip,vsys_id) |UNION ALL - |(SELECT http_host AS FQDN,server_ip AS destination_ip,MAX(recv_time) AS LAST_FOUND_TIME,MIN(recv_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL, - |toString(groupUniqArray(${ApplicationConfig.DISTINCT_CLIENT_IP_NUM})(source_ip)) AS DIST_CIP_RECENT,'HTTP' AS decoded_as_list,vsys_id AS VSYS_ID + |(SELECT http_host AS FQDN,destination_ip AS server_ip, MAX(recv_time) AS LAST_FOUND_TIME,MIN(recv_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL, + |toString(groupUniqArray(${ApplicationConfig.DISTINCT_CLIENT_IP_NUM})(source_ip)) AS DIST_CIP_RECENT, 'HTTP' AS decoded_as_list,vsys_id AS VSYS_ID |FROM ${ApplicationConfig.SPARK_READ_CLICKHOUSE_SESSION_TABLE} - |WHERE $where and decoded_as = 'HTTP' GROUP BY http_host,server_ip,vsys_id)) - |WHERE FQDN != '') as dbtable + |WHERE $where and decoded_as = 'HTTP' and notEmpty(http_host) and notEmpty(destination_ip) and vsys_id IS NOT NULL GROUP BY http_host,destination_ip,vsys_id) + |) as dbtable """.stripMargin LOG.warn(sql) val frame = initClickhouseData(sql)