[修复] IP Learning 适配 Client/Server字段重命名的变化 TSG-23853
This commit is contained in:
@@ -83,7 +83,7 @@ public class ArangoDBConnect {
|
|||||||
collection.replaceDocuments(docUpdate);
|
collection.replaceDocuments(docUpdate);
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.error("update failure:" + e.toString());
|
LOG.error("update failure: " + e.toString());
|
||||||
} finally {
|
} finally {
|
||||||
docInsert.clear();
|
docInsert.clear();
|
||||||
docInsert.clear();
|
docInsert.clear();
|
||||||
@@ -101,11 +101,11 @@ public class ArangoDBConnect {
|
|||||||
MultiDocumentEntity<DocumentCreateEntity<T>> documentCreateEntityMultiDocumentEntity = collection.insertDocuments(docOverwrite, documentCreateOptions);
|
MultiDocumentEntity<DocumentCreateEntity<T>> documentCreateEntityMultiDocumentEntity = collection.insertDocuments(docOverwrite, documentCreateOptions);
|
||||||
Collection<ErrorEntity> errors = documentCreateEntityMultiDocumentEntity.getErrors();
|
Collection<ErrorEntity> errors = documentCreateEntityMultiDocumentEntity.getErrors();
|
||||||
for (ErrorEntity errorEntity : errors) {
|
for (ErrorEntity errorEntity : errors) {
|
||||||
LOG.warn("write arangoDB error:" + errorEntity.getErrorMessage());
|
LOG.warn("write arangoDB error: " + errorEntity.getErrorMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.error("update failure:" + e.toString());
|
LOG.error("update failure: " + e.toString());
|
||||||
} finally {
|
} finally {
|
||||||
docOverwrite.clear();
|
docOverwrite.clear();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -99,13 +99,13 @@ object BaseClickhouseData {
|
|||||||
val sql =
|
val sql =
|
||||||
s"""
|
s"""
|
||||||
|(SELECT * FROM
|
|(SELECT * FROM
|
||||||
|((SELECT ssl_sni AS FQDN,server_ip,MAX(recv_time) AS LAST_FOUND_TIME,MIN(recv_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL,
|
|((SELECT ssl_sni AS FQDN,server_ip AS destination_ip,MAX(recv_time) AS LAST_FOUND_TIME,MIN(recv_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL,
|
||||||
|toString(groupUniqArray(${ApplicationConfig.DISTINCT_CLIENT_IP_NUM})(client_ip)) AS DIST_CIP_RECENT,'TLS' AS decoded_as_list, vsys_id AS VSYS_ID
|
|toString(groupUniqArray(${ApplicationConfig.DISTINCT_CLIENT_IP_NUM})(source_ip)) AS DIST_CIP_RECENT,'TLS' AS decoded_as_list, vsys_id AS VSYS_ID
|
||||||
|FROM ${ApplicationConfig.SPARK_READ_CLICKHOUSE_SESSION_TABLE}
|
|FROM ${ApplicationConfig.SPARK_READ_CLICKHOUSE_SESSION_TABLE}
|
||||||
|WHERE $where and decoded_as = 'SSL' GROUP BY ssl_sni,server_ip,vsys_id)
|
|WHERE $where and decoded_as = 'SSL' GROUP BY ssl_sni,server_ip,vsys_id)
|
||||||
|UNION ALL
|
|UNION ALL
|
||||||
|(SELECT http_host AS FQDN,server_ip,MAX(recv_time) AS LAST_FOUND_TIME,MIN(recv_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL,
|
|(SELECT http_host AS FQDN,server_ip AS destination_ip,MAX(recv_time) AS LAST_FOUND_TIME,MIN(recv_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL,
|
||||||
|toString(groupUniqArray(${ApplicationConfig.DISTINCT_CLIENT_IP_NUM})(client_ip)) AS DIST_CIP_RECENT,'HTTP' AS decoded_as_list,vsys_id AS VSYS_ID
|
|toString(groupUniqArray(${ApplicationConfig.DISTINCT_CLIENT_IP_NUM})(source_ip)) AS DIST_CIP_RECENT,'HTTP' AS decoded_as_list,vsys_id AS VSYS_ID
|
||||||
|FROM ${ApplicationConfig.SPARK_READ_CLICKHOUSE_SESSION_TABLE}
|
|FROM ${ApplicationConfig.SPARK_READ_CLICKHOUSE_SESSION_TABLE}
|
||||||
|WHERE $where and decoded_as = 'HTTP' GROUP BY http_host,server_ip,vsys_id))
|
|WHERE $where and decoded_as = 'HTTP' GROUP BY http_host,server_ip,vsys_id))
|
||||||
|WHERE FQDN != '') as dbtable
|
|WHERE FQDN != '') as dbtable
|
||||||
|
|||||||
@@ -50,8 +50,10 @@ object MergeDataFrame {
|
|||||||
|
|
||||||
def mergeRelationFqdnLocateIp(): RDD[(String, (Option[BaseEdgeDocument], Row))] = {
|
def mergeRelationFqdnLocateIp(): RDD[(String, (Option[BaseEdgeDocument], Row))] = {
|
||||||
val frame = BaseClickhouseData.getRelationFqdnLocateIpDf
|
val frame = BaseClickhouseData.getRelationFqdnLocateIpDf
|
||||||
.repartition()
|
.repartition(ApplicationConfig.SPARK_SQL_SHUFFLE_PARTITIONS)
|
||||||
.filter(row => isDomain(row.getAs[String]("FQDN")))
|
.filter(row => isDomain(row.getAs[String]("FQDN")) &&
|
||||||
|
row.getAs[String]("server_ip") != null &&
|
||||||
|
row.getAs[Integer]("VSYS_ID") != null)
|
||||||
.groupBy("FQDN", "server_ip", "VSYS_ID")
|
.groupBy("FQDN", "server_ip", "VSYS_ID")
|
||||||
.agg(
|
.agg(
|
||||||
min("FIRST_FOUND_TIME").alias("FIRST_FOUND_TIME"),
|
min("FIRST_FOUND_TIME").alias("FIRST_FOUND_TIME"),
|
||||||
@@ -61,14 +63,7 @@ object MergeDataFrame {
|
|||||||
collect_set("DIST_CIP_RECENT").alias("DIST_CIP_RECENT")
|
collect_set("DIST_CIP_RECENT").alias("DIST_CIP_RECENT")
|
||||||
)
|
)
|
||||||
|
|
||||||
val fqdnLocIpRddRow = frame.rdd
|
val fqdnLocIpRddRow = frame.rdd.map(row => {
|
||||||
.filter(row => {
|
|
||||||
// 检查 server_ip 和 VSYS_ID 是否为 null
|
|
||||||
val serverIp = row.getAs[String]("server_ip")
|
|
||||||
val vsysId = row.getAs[Integer]("VSYS_ID")
|
|
||||||
serverIp != null && vsysId != null
|
|
||||||
})
|
|
||||||
.map(row => {
|
|
||||||
val fqdn = row.getAs[String]("FQDN")
|
val fqdn = row.getAs[String]("FQDN")
|
||||||
val serverIp = row.getAs[String]("server_ip")
|
val serverIp = row.getAs[String]("server_ip")
|
||||||
val vsysId = row.getAs[Integer]("VSYS_ID").toLong
|
val vsysId = row.getAs[Integer]("VSYS_ID").toLong
|
||||||
|
|||||||
@@ -72,7 +72,6 @@ object UpdateDocument {
|
|||||||
val document: T = getDocumentRow(row)
|
val document: T = getDocumentRow(row)
|
||||||
if (document != null) {
|
if (document != null) {
|
||||||
fqdnAccmu.add(1)
|
fqdnAccmu.add(1)
|
||||||
// println(document)
|
|
||||||
resultDocumentList.add(document)
|
resultDocumentList.add(document)
|
||||||
}
|
}
|
||||||
i += 1
|
i += 1
|
||||||
@@ -91,7 +90,7 @@ object UpdateDocument {
|
|||||||
LOG.warn(s"update $collName rows: ${fqdnAccmu.value}")
|
LOG.warn(s"update $collName rows: ${fqdnAccmu.value}")
|
||||||
|
|
||||||
val last = System.currentTimeMillis()
|
val last = System.currentTimeMillis()
|
||||||
LOG.warn(s"update $collName time: ${last - start}")
|
LOG.warn(s"update $collName time: ${last - start}ms")
|
||||||
} catch {
|
} catch {
|
||||||
case e: Exception => e.printStackTrace()
|
case e: Exception => e.printStackTrace()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -49,10 +49,10 @@ class ArangoRdd[T: ClassTag](@transient override val sparkContext: SparkContext,
|
|||||||
val separate = split.separate
|
val separate = split.separate
|
||||||
val collection = options.collection
|
val collection = options.collection
|
||||||
val sql = s"FOR doc IN $collection limit $offset,$separate RETURN doc"
|
val sql = s"FOR doc IN $collection limit $offset,$separate RETURN doc"
|
||||||
LOG.info(sql)
|
LOG.info(s"Executing query: $sql")
|
||||||
arangoCursor = arangoDB.db(options.database).query(sql,bindVars,queryOptions,clazz.runtimeClass.asInstanceOf[Class[T]])
|
arangoCursor = arangoDB.db(options.database).query(sql,bindVars,queryOptions,clazz.runtimeClass.asInstanceOf[Class[T]])
|
||||||
}catch {
|
}catch {
|
||||||
case e: Exception => LOG.error(s"创建Cursor异常:${e.getMessage}")
|
case e: Exception => LOG.error(s"create Cursor error: ${e.getMessage}")
|
||||||
}finally {
|
}finally {
|
||||||
arangoDB.shutdown()
|
arangoDB.shutdown()
|
||||||
}
|
}
|
||||||
@@ -81,7 +81,7 @@ class ArangoRdd[T: ClassTag](@transient override val sparkContext: SparkContext,
|
|||||||
cnt = ApplicationConfig.ARANGODB_TOTAL_NUM
|
cnt = ApplicationConfig.ARANGODB_TOTAL_NUM
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
case e: Exception => LOG.error(sql + s"执行异常:${e.getMessage}")
|
case e: Exception => LOG.error(sql + s"execute error: ${e.getMessage}")
|
||||||
}finally {
|
}finally {
|
||||||
arangoDB.shutdown()
|
arangoDB.shutdown()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ object SparkSessionUtil {
|
|||||||
.config("arangodb.hosts", s"${ApplicationConfig.ARANGODB_HOST}:${ApplicationConfig.ARANGODB_PORT}")
|
.config("arangodb.hosts", s"${ApplicationConfig.ARANGODB_HOST}:${ApplicationConfig.ARANGODB_PORT}")
|
||||||
.config("arangodb.user", ApplicationConfig.ARANGODB_USER)
|
.config("arangodb.user", ApplicationConfig.ARANGODB_USER)
|
||||||
.config("arangodb.password", ApplicationConfig.ARANGODB_PASSWORD)
|
.config("arangodb.password", ApplicationConfig.ARANGODB_PASSWORD)
|
||||||
|
.config("spark.sql.objectHashAggregate.sortBased.fallbackThreshold", Integer.MAX_VALUE)
|
||||||
.master(ApplicationConfig.MASTER)
|
.master(ApplicationConfig.MASTER)
|
||||||
.getOrCreate()
|
.getOrCreate()
|
||||||
LOG.warn("spark session start success!!!")
|
LOG.warn("spark session start success!!!")
|
||||||
|
|||||||
Reference in New Issue
Block a user