[修复] IP Learning 适配 Client/Server字段重命名的变化 TSG-23853

This commit is contained in:
zhanghongqing
2024-11-28 11:12:39 +08:00
parent 88451c2b3a
commit d30e8d86aa
6 changed files with 17 additions and 22 deletions

View File

@@ -83,7 +83,7 @@ public class ArangoDBConnect {
collection.replaceDocuments(docUpdate); collection.replaceDocuments(docUpdate);
} }
} catch (Exception e) { } catch (Exception e) {
LOG.error("update failure" + e.toString()); LOG.error("update failure: " + e.toString());
} finally { } finally {
docInsert.clear(); docInsert.clear();
docInsert.clear(); docInsert.clear();
@@ -101,11 +101,11 @@ public class ArangoDBConnect {
MultiDocumentEntity<DocumentCreateEntity<T>> documentCreateEntityMultiDocumentEntity = collection.insertDocuments(docOverwrite, documentCreateOptions); MultiDocumentEntity<DocumentCreateEntity<T>> documentCreateEntityMultiDocumentEntity = collection.insertDocuments(docOverwrite, documentCreateOptions);
Collection<ErrorEntity> errors = documentCreateEntityMultiDocumentEntity.getErrors(); Collection<ErrorEntity> errors = documentCreateEntityMultiDocumentEntity.getErrors();
for (ErrorEntity errorEntity : errors) { for (ErrorEntity errorEntity : errors) {
LOG.warn("write arangoDB error" + errorEntity.getErrorMessage()); LOG.warn("write arangoDB error: " + errorEntity.getErrorMessage());
} }
} }
} catch (Exception e) { } catch (Exception e) {
LOG.error("update failure" + e.toString()); LOG.error("update failure: " + e.toString());
} finally { } finally {
docOverwrite.clear(); docOverwrite.clear();
} }

View File

@@ -99,13 +99,13 @@ object BaseClickhouseData {
val sql = val sql =
s""" s"""
|(SELECT * FROM |(SELECT * FROM
|((SELECT ssl_sni AS FQDN,server_ip,MAX(recv_time) AS LAST_FOUND_TIME,MIN(recv_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL, |((SELECT ssl_sni AS FQDN,server_ip AS destination_ip,MAX(recv_time) AS LAST_FOUND_TIME,MIN(recv_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL,
|toString(groupUniqArray(${ApplicationConfig.DISTINCT_CLIENT_IP_NUM})(client_ip)) AS DIST_CIP_RECENT,'TLS' AS decoded_as_list, vsys_id AS VSYS_ID |toString(groupUniqArray(${ApplicationConfig.DISTINCT_CLIENT_IP_NUM})(source_ip)) AS DIST_CIP_RECENT,'TLS' AS decoded_as_list, vsys_id AS VSYS_ID
|FROM ${ApplicationConfig.SPARK_READ_CLICKHOUSE_SESSION_TABLE} |FROM ${ApplicationConfig.SPARK_READ_CLICKHOUSE_SESSION_TABLE}
|WHERE $where and decoded_as = 'SSL' GROUP BY ssl_sni,server_ip,vsys_id) |WHERE $where and decoded_as = 'SSL' GROUP BY ssl_sni,server_ip,vsys_id)
|UNION ALL |UNION ALL
|(SELECT http_host AS FQDN,server_ip,MAX(recv_time) AS LAST_FOUND_TIME,MIN(recv_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL, |(SELECT http_host AS FQDN,server_ip AS destination_ip,MAX(recv_time) AS LAST_FOUND_TIME,MIN(recv_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL,
|toString(groupUniqArray(${ApplicationConfig.DISTINCT_CLIENT_IP_NUM})(client_ip)) AS DIST_CIP_RECENT,'HTTP' AS decoded_as_list,vsys_id AS VSYS_ID |toString(groupUniqArray(${ApplicationConfig.DISTINCT_CLIENT_IP_NUM})(source_ip)) AS DIST_CIP_RECENT,'HTTP' AS decoded_as_list,vsys_id AS VSYS_ID
|FROM ${ApplicationConfig.SPARK_READ_CLICKHOUSE_SESSION_TABLE} |FROM ${ApplicationConfig.SPARK_READ_CLICKHOUSE_SESSION_TABLE}
|WHERE $where and decoded_as = 'HTTP' GROUP BY http_host,server_ip,vsys_id)) |WHERE $where and decoded_as = 'HTTP' GROUP BY http_host,server_ip,vsys_id))
|WHERE FQDN != '') as dbtable |WHERE FQDN != '') as dbtable

View File

@@ -50,8 +50,10 @@ object MergeDataFrame {
def mergeRelationFqdnLocateIp(): RDD[(String, (Option[BaseEdgeDocument], Row))] = { def mergeRelationFqdnLocateIp(): RDD[(String, (Option[BaseEdgeDocument], Row))] = {
val frame = BaseClickhouseData.getRelationFqdnLocateIpDf val frame = BaseClickhouseData.getRelationFqdnLocateIpDf
.repartition() .repartition(ApplicationConfig.SPARK_SQL_SHUFFLE_PARTITIONS)
.filter(row => isDomain(row.getAs[String]("FQDN"))) .filter(row => isDomain(row.getAs[String]("FQDN")) &&
row.getAs[String]("server_ip") != null &&
row.getAs[Integer]("VSYS_ID") != null)
.groupBy("FQDN", "server_ip", "VSYS_ID") .groupBy("FQDN", "server_ip", "VSYS_ID")
.agg( .agg(
min("FIRST_FOUND_TIME").alias("FIRST_FOUND_TIME"), min("FIRST_FOUND_TIME").alias("FIRST_FOUND_TIME"),
@@ -61,14 +63,7 @@ object MergeDataFrame {
collect_set("DIST_CIP_RECENT").alias("DIST_CIP_RECENT") collect_set("DIST_CIP_RECENT").alias("DIST_CIP_RECENT")
) )
val fqdnLocIpRddRow = frame.rdd val fqdnLocIpRddRow = frame.rdd.map(row => {
.filter(row => {
// 检查 server_ip 和 VSYS_ID 是否为 null
val serverIp = row.getAs[String]("server_ip")
val vsysId = row.getAs[Integer]("VSYS_ID")
serverIp != null && vsysId != null
})
.map(row => {
val fqdn = row.getAs[String]("FQDN") val fqdn = row.getAs[String]("FQDN")
val serverIp = row.getAs[String]("server_ip") val serverIp = row.getAs[String]("server_ip")
val vsysId = row.getAs[Integer]("VSYS_ID").toLong val vsysId = row.getAs[Integer]("VSYS_ID").toLong

View File

@@ -72,7 +72,6 @@ object UpdateDocument {
val document: T = getDocumentRow(row) val document: T = getDocumentRow(row)
if (document != null) { if (document != null) {
fqdnAccmu.add(1) fqdnAccmu.add(1)
// println(document)
resultDocumentList.add(document) resultDocumentList.add(document)
} }
i += 1 i += 1
@@ -91,7 +90,7 @@ object UpdateDocument {
LOG.warn(s"update $collName rows: ${fqdnAccmu.value}") LOG.warn(s"update $collName rows: ${fqdnAccmu.value}")
val last = System.currentTimeMillis() val last = System.currentTimeMillis()
LOG.warn(s"update $collName time: ${last - start}") LOG.warn(s"update $collName time: ${last - start}ms")
} catch { } catch {
case e: Exception => e.printStackTrace() case e: Exception => e.printStackTrace()
} }

View File

@@ -49,10 +49,10 @@ class ArangoRdd[T: ClassTag](@transient override val sparkContext: SparkContext,
val separate = split.separate val separate = split.separate
val collection = options.collection val collection = options.collection
val sql = s"FOR doc IN $collection limit $offset,$separate RETURN doc" val sql = s"FOR doc IN $collection limit $offset,$separate RETURN doc"
LOG.info(sql) LOG.info(s"Executing query: $sql")
arangoCursor = arangoDB.db(options.database).query(sql,bindVars,queryOptions,clazz.runtimeClass.asInstanceOf[Class[T]]) arangoCursor = arangoDB.db(options.database).query(sql,bindVars,queryOptions,clazz.runtimeClass.asInstanceOf[Class[T]])
}catch { }catch {
case e: Exception => LOG.error(s"创建Cursor异常:${e.getMessage}") case e: Exception => LOG.error(s"create Cursor error: ${e.getMessage}")
}finally { }finally {
arangoDB.shutdown() arangoDB.shutdown()
} }
@@ -81,7 +81,7 @@ class ArangoRdd[T: ClassTag](@transient override val sparkContext: SparkContext,
cnt = ApplicationConfig.ARANGODB_TOTAL_NUM cnt = ApplicationConfig.ARANGODB_TOTAL_NUM
} }
} catch { } catch {
case e: Exception => LOG.error(sql + s"执行异常:${e.getMessage}") case e: Exception => LOG.error(sql + s"execute error: ${e.getMessage}")
}finally { }finally {
arangoDB.shutdown() arangoDB.shutdown()
} }

View File

@@ -27,6 +27,7 @@ object SparkSessionUtil {
.config("arangodb.hosts", s"${ApplicationConfig.ARANGODB_HOST}:${ApplicationConfig.ARANGODB_PORT}") .config("arangodb.hosts", s"${ApplicationConfig.ARANGODB_HOST}:${ApplicationConfig.ARANGODB_PORT}")
.config("arangodb.user", ApplicationConfig.ARANGODB_USER) .config("arangodb.user", ApplicationConfig.ARANGODB_USER)
.config("arangodb.password", ApplicationConfig.ARANGODB_PASSWORD) .config("arangodb.password", ApplicationConfig.ARANGODB_PASSWORD)
.config("spark.sql.objectHashAggregate.sortBased.fallbackThreshold", Integer.MAX_VALUE)
.master(ApplicationConfig.MASTER) .master(ApplicationConfig.MASTER)
.getOrCreate() .getOrCreate()
LOG.warn("spark session start success") LOG.warn("spark session start success")