格式化代码
This commit is contained in:
@@ -19,6 +19,7 @@ import java.util.concurrent.CountDownLatch;
|
|||||||
* @author wlh
|
* @author wlh
|
||||||
* 多线程全量读取arangoDb历史数据,封装到map
|
* 多线程全量读取arangoDb历史数据,封装到map
|
||||||
*/
|
*/
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
public class ReadHistoryArangoData<T extends BaseDocument> extends Thread {
|
public class ReadHistoryArangoData<T extends BaseDocument> extends Thread {
|
||||||
public static long currentHour = System.currentTimeMillis() / (60 * 60 * 1000) * 60 * 60;
|
public static long currentHour = System.currentTimeMillis() / (60 * 60 * 1000) * 60 * 60;
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(ReadHistoryArangoData.class);
|
private static final Logger LOG = LoggerFactory.getLogger(ReadHistoryArangoData.class);
|
||||||
@@ -92,7 +93,7 @@ public class ReadHistoryArangoData<T extends BaseDocument> extends Thread {
|
|||||||
for (String protocol : PROTOCOL_SET) {
|
for (String protocol : PROTOCOL_SET) {
|
||||||
String protocolRecent = protocol + "_CNT_RECENT";
|
String protocolRecent = protocol + "_CNT_RECENT";
|
||||||
ArrayList<Long> cntRecent = (ArrayList<Long>) doc.getAttribute(protocolRecent);
|
ArrayList<Long> cntRecent = (ArrayList<Long>) doc.getAttribute(protocolRecent);
|
||||||
Long[] cntRecentsSrc = cntRecent.toArray(new Long[cntRecent.size()]);
|
Long[] cntRecentsSrc = cntRecent.toArray(new Long[0]);
|
||||||
Long[] cntRecentsDst = new Long[RECENT_COUNT_HOUR];
|
Long[] cntRecentsDst = new Long[RECENT_COUNT_HOUR];
|
||||||
System.arraycopy(cntRecentsSrc, 0, cntRecentsDst, 1, cntRecentsSrc.length - 1);
|
System.arraycopy(cntRecentsSrc, 0, cntRecentsDst, 1, cntRecentsSrc.length - 1);
|
||||||
cntRecentsDst[0] = 0L;
|
cntRecentsDst[0] = 0L;
|
||||||
@@ -104,6 +105,11 @@ public class ReadHistoryArangoData<T extends BaseDocument> extends Thread {
|
|||||||
private void deleteDistinctClientIpByTime(T doc) {
|
private void deleteDistinctClientIpByTime(T doc) {
|
||||||
ArrayList<String> distCip = (ArrayList<String>) doc.getAttribute("DIST_CIP");
|
ArrayList<String> distCip = (ArrayList<String>) doc.getAttribute("DIST_CIP");
|
||||||
ArrayList<Long> distCipTs = (ArrayList<Long>) doc.getAttribute("DIST_CIP_TS");
|
ArrayList<Long> distCipTs = (ArrayList<Long>) doc.getAttribute("DIST_CIP_TS");
|
||||||
|
if (distCip == null || distCip.isEmpty()){
|
||||||
|
doc.updateAttribute("DIST_CIP", new String[0]);
|
||||||
|
doc.updateAttribute("DIST_CIP_TS", new long[0]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
distCipTs.add(currentHour - RECENT_COUNT_HOUR * 3600);
|
distCipTs.add(currentHour - RECENT_COUNT_HOUR * 3600);
|
||||||
Collections.sort(distCipTs);
|
Collections.sort(distCipTs);
|
||||||
int index = distCipTs.indexOf(currentHour - RECENT_COUNT_HOUR * 3600);
|
int index = distCipTs.indexOf(currentHour - RECENT_COUNT_HOUR * 3600);
|
||||||
|
|||||||
@@ -70,26 +70,6 @@ public class ArangoDBConnect {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Deprecated
|
|
||||||
public <T> void insertAndUpdate(ArrayList<T> docInsert,ArrayList<T> docUpdate,String collectionName){
|
|
||||||
ArangoDatabase database = getDatabase();
|
|
||||||
try {
|
|
||||||
ArangoCollection collection = database.collection(collectionName);
|
|
||||||
if (!docInsert.isEmpty()){
|
|
||||||
collection.importDocuments(docInsert);
|
|
||||||
}
|
|
||||||
if (!docUpdate.isEmpty()){
|
|
||||||
collection.replaceDocuments(docUpdate);
|
|
||||||
}
|
|
||||||
}catch (Exception e){
|
|
||||||
System.out.println("更新失败");
|
|
||||||
e.printStackTrace();
|
|
||||||
}finally {
|
|
||||||
docInsert.clear();
|
|
||||||
docInsert.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public <T> void overwrite(ArrayList<T> docOverwrite,String collectionName){
|
public <T> void overwrite(ArrayList<T> docOverwrite,String collectionName){
|
||||||
ArangoDatabase database = getDatabase();
|
ArangoDatabase database = getDatabase();
|
||||||
try {
|
try {
|
||||||
@@ -101,11 +81,11 @@ public class ArangoDBConnect {
|
|||||||
MultiDocumentEntity<DocumentCreateEntity<T>> documentCreateEntityMultiDocumentEntity = collection.insertDocuments(docOverwrite, documentCreateOptions);
|
MultiDocumentEntity<DocumentCreateEntity<T>> documentCreateEntityMultiDocumentEntity = collection.insertDocuments(docOverwrite, documentCreateOptions);
|
||||||
Collection<ErrorEntity> errors = documentCreateEntityMultiDocumentEntity.getErrors();
|
Collection<ErrorEntity> errors = documentCreateEntityMultiDocumentEntity.getErrors();
|
||||||
for (ErrorEntity errorEntity:errors){
|
for (ErrorEntity errorEntity:errors){
|
||||||
LOG.warn("写入arangoDB异常:"+errorEntity.getErrorMessage());
|
LOG.debug("写入arangoDB异常:"+errorEntity.getErrorMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}catch (Exception e){
|
}catch (Exception e){
|
||||||
System.out.println("更新失败:"+e.toString());
|
LOG.error("更新arangoDB失败:"+e.toString());
|
||||||
}finally {
|
}finally {
|
||||||
docOverwrite.clear();
|
docOverwrite.clear();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,11 +8,9 @@ spark.serializer=org.apache.spark.serializer.KryoSerializer
|
|||||||
master=local[*]
|
master=local[*]
|
||||||
#spark读取clickhouse配置
|
#spark读取clickhouse配置
|
||||||
spark.read.clickhouse.url=jdbc:clickhouse://192.168.44.12:8123/tsg_galaxy_v3
|
spark.read.clickhouse.url=jdbc:clickhouse://192.168.44.12:8123/tsg_galaxy_v3
|
||||||
#spark.read.clickhouse.url=jdbc:clickhouse://192.168.40.186:8123/tsg_galaxy_v3
|
|
||||||
spark.read.clickhouse.driver=ru.yandex.clickhouse.ClickHouseDriver
|
spark.read.clickhouse.driver=ru.yandex.clickhouse.ClickHouseDriver
|
||||||
spark.read.clickhouse.user=default
|
spark.read.clickhouse.user=default
|
||||||
spark.read.clickhouse.password=ceiec2019
|
spark.read.clickhouse.password=ceiec2019
|
||||||
#spark.read.clickhouse.password=111111
|
|
||||||
spark.read.clickhouse.numPartitions=5
|
spark.read.clickhouse.numPartitions=5
|
||||||
spark.read.clickhouse.fetchsize=10000
|
spark.read.clickhouse.fetchsize=10000
|
||||||
spark.read.clickhouse.partitionColumn=LAST_FOUND_TIME
|
spark.read.clickhouse.partitionColumn=LAST_FOUND_TIME
|
||||||
|
|||||||
@@ -107,45 +107,35 @@ object BaseClickhouseData {
|
|||||||
|
|
||||||
def getVertexIpDf: DataFrame ={
|
def getVertexIpDf: DataFrame ={
|
||||||
loadConnectionDataFromCk()
|
loadConnectionDataFromCk()
|
||||||
|
val where = "common_recv_time >= " + timeLimit._2 + " AND common_recv_time < " + timeLimit._1
|
||||||
val sql =
|
val sql =
|
||||||
"""
|
s"""
|
||||||
|SELECT
|
|(SELECT * FROM
|
||||||
| *
|
|((SELECT common_client_ip AS IP,MIN(common_end_time) AS FIRST_FOUND_TIME,
|
||||||
|FROM
|
|MAX(common_end_time) AS LAST_FOUND_TIME,
|
||||||
| (
|
|count(*) as SESSION_COUNT,
|
||||||
| (
|
|SUM(common_c2s_byte_num+common_s2c_byte_num) as BYTES_SUM,
|
||||||
| SELECT
|
|groupUniqArray(2)(common_link_info_c2s)[2] as common_link_info,
|
||||||
| common_client_ip AS IP,
|
|'client' as ip_type
|
||||||
| MIN(common_recv_time) AS FIRST_FOUND_TIME,
|
|FROM tsg_galaxy_v3.connection_record_log
|
||||||
| MAX(common_recv_time) AS LAST_FOUND_TIME,
|
|where $where
|
||||||
| count(*) as SESSION_COUNT,
|
|group by common_client_ip)
|
||||||
| sum(common_c2s_byte_num) as BYTES_SUM,
|
|UNION ALL
|
||||||
| 'client' as ip_type
|
|(SELECT common_server_ip AS IP,
|
||||||
| FROM
|
|MIN(common_end_time) AS FIRST_FOUND_TIME,
|
||||||
| global_temp.dbtable
|
|MAX(common_end_time) AS LAST_FOUND_TIME,
|
||||||
| GROUP BY
|
|count(*) as SESSION_COUNT,
|
||||||
| IP
|
|SUM(common_c2s_byte_num+common_s2c_byte_num) as BYTES_SUM,
|
||||||
| )
|
|groupUniqArray(2)(common_link_info_s2c)[2] as common_link_info,
|
||||||
| UNION ALL
|
|'server' as ip_type
|
||||||
| (
|
|FROM tsg_galaxy_v3.connection_record_log
|
||||||
| SELECT
|
|where $where
|
||||||
| common_server_ip AS IP,
|
|group by common_server_ip))) as dbtable
|
||||||
| MIN(common_recv_time) AS FIRST_FOUND_TIME,
|
|
||||||
| MAX(common_recv_time) AS LAST_FOUND_TIME,
|
|
||||||
| count(*) as SESSION_COUNT,
|
|
||||||
| sum(common_s2c_byte_num) as BYTES_SUM,
|
|
||||||
| 'server' as ip_type
|
|
||||||
| FROM
|
|
||||||
| global_temp.dbtable
|
|
||||||
| GROUP BY
|
|
||||||
| IP
|
|
||||||
| )
|
|
||||||
| )
|
|
||||||
""".stripMargin
|
""".stripMargin
|
||||||
LOG.warn(sql)
|
LOG.warn(sql)
|
||||||
val vertexIpDf = spark.sql(sql)
|
val frame = initClickhouseData(sql)
|
||||||
vertexIpDf.printSchema()
|
frame.printSchema()
|
||||||
vertexIpDf
|
frame
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -196,16 +186,16 @@ object BaseClickhouseData {
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
def getRelationFqdnLocateIpDf: DataFrame ={
|
def getRelationFqdnLocateIpDf: DataFrame ={
|
||||||
val where = "common_end_time >= " + timeLimit._2 + " AND common_end_time < " + timeLimit._1
|
val where = "common_recv_time >= " + timeLimit._2 + " AND common_recv_time < " + timeLimit._1
|
||||||
val sql =
|
val sql =
|
||||||
s"""
|
s"""
|
||||||
|(SELECT * FROM
|
|(SELECT * FROM
|
||||||
|((SELECT ssl_sni AS FQDN,common_server_ip,MAX(common_end_time) AS LAST_FOUND_TIME,MIN(common_end_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL,
|
|((SELECT ssl_sni AS FQDN,common_server_ip,MAX(common_recv_time) AS LAST_FOUND_TIME,MIN(common_recv_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL,
|
||||||
|toString(groupUniqArray(${ApplicationConfig.DISTINCT_CLIENT_IP_NUM})(common_client_ip)) AS DIST_CIP_RECENT,'TLS' AS schema_type
|
|toString(groupUniqArray(${ApplicationConfig.DISTINCT_CLIENT_IP_NUM})(common_client_ip)) AS DIST_CIP_RECENT,'TLS' AS schema_type
|
||||||
|FROM tsg_galaxy_v3.connection_record_log
|
|FROM tsg_galaxy_v3.connection_record_log
|
||||||
|WHERE $where and common_schema_type = 'SSL' GROUP BY ssl_sni,common_server_ip)
|
|WHERE $where and common_schema_type = 'SSL' GROUP BY ssl_sni,common_server_ip)
|
||||||
|UNION ALL
|
|UNION ALL
|
||||||
|(SELECT http_host AS FQDN,common_server_ip,MAX(common_end_time) AS LAST_FOUND_TIME,MIN(common_end_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL,
|
|(SELECT http_host AS FQDN,common_server_ip,MAX(common_recv_time) AS LAST_FOUND_TIME,MIN(common_recv_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL,
|
||||||
|toString(groupUniqArray(${ApplicationConfig.DISTINCT_CLIENT_IP_NUM})(common_client_ip)) AS DIST_CIP_RECENT,'HTTP' AS schema_type
|
|toString(groupUniqArray(${ApplicationConfig.DISTINCT_CLIENT_IP_NUM})(common_client_ip)) AS DIST_CIP_RECENT,'HTTP' AS schema_type
|
||||||
|FROM tsg_galaxy_v3.connection_record_log
|
|FROM tsg_galaxy_v3.connection_record_log
|
||||||
|WHERE $where and common_schema_type = 'HTTP' GROUP BY http_host,common_server_ip))
|
|WHERE $where and common_schema_type = 'HTTP' GROUP BY http_host,common_server_ip))
|
||||||
|
|||||||
@@ -38,7 +38,8 @@ object MergeDataFrame {
|
|||||||
max("LAST_FOUND_TIME").alias("LAST_FOUND_TIME"),
|
max("LAST_FOUND_TIME").alias("LAST_FOUND_TIME"),
|
||||||
collect_list("SESSION_COUNT").alias("SESSION_COUNT_LIST"),
|
collect_list("SESSION_COUNT").alias("SESSION_COUNT_LIST"),
|
||||||
collect_list("BYTES_SUM").alias("BYTES_SUM_LIST"),
|
collect_list("BYTES_SUM").alias("BYTES_SUM_LIST"),
|
||||||
collect_list("ip_type").alias("ip_type_list")
|
collect_list("ip_type").alias("ip_type_list"),
|
||||||
|
last("common_link_info").alias("common_link_info")
|
||||||
)
|
)
|
||||||
val values = frame.rdd.map(row => (row.get(0), row))
|
val values = frame.rdd.map(row => (row.get(0), row))
|
||||||
.partitionBy(new CustomPartitioner(ApplicationConfig.SPARK_SQL_SHUFFLE_PARTITIONS)).values
|
.partitionBy(new CustomPartitioner(ApplicationConfig.SPARK_SQL_SHUFFLE_PARTITIONS)).values
|
||||||
|
|||||||
@@ -26,6 +26,10 @@ object UpdateDocHandler {
|
|||||||
hisDoc.addAttribute(attributeName,newAttribute+hisAttritube)
|
hisDoc.addAttribute(attributeName,newAttribute+hisAttritube)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def replaceAttribute(hisDoc: BaseDocument,newAttribute:String,attributeName:String): Unit ={
|
||||||
|
hisDoc.addAttribute(attributeName,newAttribute)
|
||||||
|
}
|
||||||
|
|
||||||
def separateAttributeByIpType(ipTypeList:ofRef[String],
|
def separateAttributeByIpType(ipTypeList:ofRef[String],
|
||||||
sessionCountList:ofRef[AnyRef],
|
sessionCountList:ofRef[AnyRef],
|
||||||
bytesSumList:ofRef[AnyRef]): (Long,Long,Long,Long) ={
|
bytesSumList:ofRef[AnyRef]): (Long,Long,Long,Long) ={
|
||||||
|
|||||||
@@ -174,6 +174,7 @@ object UpdateDocument {
|
|||||||
val sessionCountList = row.getAs[ofRef[AnyRef]]("SESSION_COUNT_LIST")
|
val sessionCountList = row.getAs[ofRef[AnyRef]]("SESSION_COUNT_LIST")
|
||||||
val bytesSumList = row.getAs[ofRef[AnyRef]]("BYTES_SUM_LIST")
|
val bytesSumList = row.getAs[ofRef[AnyRef]]("BYTES_SUM_LIST")
|
||||||
val ipTypeList = row.getAs[ofRef[String]]("ip_type_list")
|
val ipTypeList = row.getAs[ofRef[String]]("ip_type_list")
|
||||||
|
val linkInfo = row.getAs[String]("common_link_info")
|
||||||
val sepAttributeTuple = separateAttributeByIpType(ipTypeList, sessionCountList, bytesSumList)
|
val sepAttributeTuple = separateAttributeByIpType(ipTypeList, sessionCountList, bytesSumList)
|
||||||
|
|
||||||
var document = dictionaryMap.getOrDefault(ip, null)
|
var document = dictionaryMap.getOrDefault(ip, null)
|
||||||
@@ -183,6 +184,7 @@ object UpdateDocument {
|
|||||||
updateSumAttribute(document, sepAttributeTuple._2, "SERVER_BYTES_SUM")
|
updateSumAttribute(document, sepAttributeTuple._2, "SERVER_BYTES_SUM")
|
||||||
updateSumAttribute(document, sepAttributeTuple._3, "CLIENT_SESSION_COUNT")
|
updateSumAttribute(document, sepAttributeTuple._3, "CLIENT_SESSION_COUNT")
|
||||||
updateSumAttribute(document, sepAttributeTuple._4, "CLIENT_BYTES_SUM")
|
updateSumAttribute(document, sepAttributeTuple._4, "CLIENT_BYTES_SUM")
|
||||||
|
replaceAttribute(document,linkInfo,"COMMON_LINK_INFO")
|
||||||
} else {
|
} else {
|
||||||
document = new BaseDocument
|
document = new BaseDocument
|
||||||
document.setKey(ip)
|
document.setKey(ip)
|
||||||
@@ -193,7 +195,7 @@ object UpdateDocument {
|
|||||||
document.addAttribute("SERVER_BYTES_SUM", sepAttributeTuple._2)
|
document.addAttribute("SERVER_BYTES_SUM", sepAttributeTuple._2)
|
||||||
document.addAttribute("CLIENT_SESSION_COUNT", sepAttributeTuple._3)
|
document.addAttribute("CLIENT_SESSION_COUNT", sepAttributeTuple._3)
|
||||||
document.addAttribute("CLIENT_BYTES_SUM", sepAttributeTuple._4)
|
document.addAttribute("CLIENT_BYTES_SUM", sepAttributeTuple._4)
|
||||||
document.addAttribute("COMMON_LINK_INFO", "")
|
document.addAttribute("COMMON_LINK_INFO", linkInfo)
|
||||||
}
|
}
|
||||||
document
|
document
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user