Merge branch 'master' into ip-learning-graph-datacenter
This commit is contained in:
@@ -19,6 +19,7 @@ import static cn.ac.iie.service.ingestion.ReadClickhouseData.*;
|
||||
* @author wlh
|
||||
* 多线程全量读取arangoDb历史数据,封装到map
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public class ReadHistoryArangoData<T extends BaseDocument> extends Thread {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(ReadHistoryArangoData.class);
|
||||
|
||||
@@ -84,7 +85,7 @@ public class ReadHistoryArangoData<T extends BaseDocument> extends Thread {
|
||||
for (String protocol : ReadClickhouseData.PROTOCOL_SET) {
|
||||
String protocolRecent = protocol + "_CNT_RECENT";
|
||||
ArrayList<Long> cntRecent = (ArrayList<Long>) doc.getAttribute(protocolRecent);
|
||||
Long[] cntRecentsSrc = cntRecent.toArray(new Long[cntRecent.size()]);
|
||||
Long[] cntRecentsSrc = cntRecent.toArray(new Long[0]);
|
||||
Long[] cntRecentsDst = new Long[RECENT_COUNT_HOUR];
|
||||
System.arraycopy(cntRecentsSrc, 0, cntRecentsDst, 1, cntRecentsSrc.length - 1);
|
||||
cntRecentsDst[0] = 0L;
|
||||
|
||||
@@ -16,6 +16,7 @@ import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class ArangoDBConnect {
|
||||
@@ -71,7 +72,7 @@ public class ArangoDBConnect {
|
||||
}
|
||||
}
|
||||
|
||||
public <T> void overwrite(ArrayList<T> docOverwrite,String collectionName){
|
||||
public <T> void overwrite(List<T> docOverwrite, String collectionName){
|
||||
ArangoDatabase database = getDatabase();
|
||||
try {
|
||||
ArangoCollection collection = database.collection(collectionName);
|
||||
|
||||
@@ -1,10 +1,25 @@
|
||||
package cn.ac.iie;
|
||||
|
||||
import cn.ac.iie.utils.ArangoDBConnect;
|
||||
import com.arangodb.ArangoCursor;
|
||||
import com.arangodb.entity.BaseEdgeDocument;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
public class TestList {
|
||||
public static void main(String[] args) {
|
||||
ArangoDBConnect arangoConnect = ArangoDBConnect.getInstance();
|
||||
ArangoCursor<BaseEdgeDocument> documents = arangoConnect.executorQuery("FOR doc IN R_LOCATE_FQDN2IP filter doc.FIRST_FOUND_TIME >= 1596080839 and doc.FIRST_FOUND_TIME <= 1596395473 RETURN doc", BaseEdgeDocument.class);
|
||||
List<BaseEdgeDocument> baseEdgeDocuments = documents.asListRemaining();
|
||||
for (BaseEdgeDocument doc: baseEdgeDocuments){
|
||||
doc.updateAttribute("PROTOCOL_TYPE","123");
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
ArrayList<Integer> integers = new ArrayList<>();
|
||||
integers.add(10);
|
||||
integers.add(8);
|
||||
@@ -14,11 +29,17 @@ public class TestList {
|
||||
integers.add(4);
|
||||
integers.add(4);
|
||||
integers.add(12);
|
||||
|
||||
Integer[] objects = integers.toArray(new Integer[2]);
|
||||
System.out.println(Arrays.toString(objects));
|
||||
|
||||
|
||||
Collections.sort(integers);
|
||||
System.out.println(integers);
|
||||
integers.add(5);
|
||||
Collections.sort(integers);
|
||||
System.out.println(integers);
|
||||
System.out.println(integers.indexOf(5));
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,4 +42,4 @@ update.arango.batch=10000
|
||||
distinct.client.ip.num=10000
|
||||
recent.count.hour=24
|
||||
|
||||
update.interval=3600
|
||||
update.interval=10800
|
||||
|
||||
@@ -1,22 +1,10 @@
|
||||
package cn.ac.iie.main
|
||||
|
||||
import cn.ac.iie.service.update.UpdateDocument._
|
||||
import cn.ac.iie.utils.{ExecutorThreadPool, SparkSessionUtil}
|
||||
import cn.ac.iie.service.update.UpdateDocument
|
||||
|
||||
object IpLearningApplication {
|
||||
private val pool = ExecutorThreadPool.getInstance
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
try {
|
||||
updateVertexFqdn()
|
||||
updateVertexIp()
|
||||
updateRelationFqdnLocateIp()
|
||||
}catch {
|
||||
case e:Exception => e.printStackTrace()
|
||||
}finally {
|
||||
pool.shutdown()
|
||||
arangoManger.clean()
|
||||
SparkSessionUtil.closeSpark()
|
||||
}
|
||||
UpdateDocument.update()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
package cn.ac.iie.service.update
|
||||
|
||||
import java.util
|
||||
import java.util.concurrent.ConcurrentHashMap
|
||||
|
||||
import cn.ac.iie.config.ApplicationConfig
|
||||
import cn.ac.iie.dao.BaseArangoData
|
||||
import cn.ac.iie.dao.BaseArangoData._
|
||||
import cn.ac.iie.service.transform.MergeDataFrame._
|
||||
import cn.ac.iie.service.update.UpdateDocHandler._
|
||||
import cn.ac.iie.utils.ArangoDBConnect
|
||||
import cn.ac.iie.utils.{ArangoDBConnect, ExecutorThreadPool, SparkSessionUtil}
|
||||
import cn.ac.iie.utils.SparkSessionUtil.spark
|
||||
import com.arangodb.entity.{BaseDocument, BaseEdgeDocument}
|
||||
import org.apache.spark.TaskContext
|
||||
@@ -17,27 +19,70 @@ import org.slf4j.LoggerFactory
|
||||
import scala.collection.mutable.WrappedArray.ofRef
|
||||
|
||||
object UpdateDocument {
|
||||
|
||||
val arangoManger: ArangoDBConnect = ArangoDBConnect.getInstance()
|
||||
private val pool = ExecutorThreadPool.getInstance
|
||||
private val arangoManger: ArangoDBConnect = ArangoDBConnect.getInstance()
|
||||
private val LOG = LoggerFactory.getLogger(UpdateDocument.getClass)
|
||||
private val baseArangoData = new BaseArangoData()
|
||||
|
||||
def updateVertexFqdn(): Unit ={
|
||||
baseArangoData.readHistoryData("FQDN",historyVertexFqdnMap,classOf[BaseDocument])
|
||||
val hisVerFqdnBc = spark.sparkContext.broadcast(historyVertexFqdnMap)
|
||||
def update(): Unit = {
|
||||
try {
|
||||
updateDocument("FQDN", historyVertexFqdnMap, getVertexFqdnRow, classOf[BaseDocument], mergeVertexFqdn)
|
||||
updateDocument("IP", historyVertexIpMap, getVertexIpRow, classOf[BaseDocument], mergeVertexIp)
|
||||
updateDocument("R_LOCATE_FQDN2IP", historyRelationFqdnAddressIpMap, getRelationFqdnLocateIpRow, classOf[BaseEdgeDocument], mergeRelationFqdnLocateIp)
|
||||
} catch {
|
||||
case e: Exception => e.printStackTrace()
|
||||
} finally {
|
||||
pool.shutdown()
|
||||
arangoManger.clean()
|
||||
SparkSessionUtil.closeSpark()
|
||||
}
|
||||
}
|
||||
|
||||
private def updateDocument[T <: BaseDocument](collName: String,
|
||||
historyMap: ConcurrentHashMap[Integer, ConcurrentHashMap[String, T]],
|
||||
getDocumentRow: (Row, ConcurrentHashMap[String, T]) => T,
|
||||
clazz: Class[T],
|
||||
getNewDataRdd: () => RDD[Row]
|
||||
): Unit = {
|
||||
baseArangoData.readHistoryData(collName, historyMap, clazz)
|
||||
val hisBc = spark.sparkContext.broadcast(historyMap)
|
||||
try {
|
||||
val start = System.currentTimeMillis()
|
||||
val mergeVertexFqdnDf: RDD[Row] = mergeVertexFqdn()
|
||||
mergeVertexFqdnDf.foreachPartition(iter => {
|
||||
val newDataRdd = getNewDataRdd()
|
||||
newDataRdd.foreachPartition(iter => {
|
||||
val partitionId: Int = TaskContext.get.partitionId
|
||||
val hisVerFqdnMapTmp = hisVerFqdnBc.value.get(partitionId)
|
||||
val resultDocumentList: util.ArrayList[BaseDocument] = new util.ArrayList[BaseDocument]
|
||||
val dictionaryMap: ConcurrentHashMap[String, T] = hisBc.value.get(partitionId)
|
||||
val resultDocumentList = new util.ArrayList[T]
|
||||
var i = 0
|
||||
iter.foreach(row => {
|
||||
val document = getDocumentRow(row, dictionaryMap)
|
||||
resultDocumentList.add(document)
|
||||
i += 1
|
||||
if (i >= ApplicationConfig.UPDATE_ARANGO_BATCH) {
|
||||
arangoManger.overwrite(resultDocumentList, collName)
|
||||
LOG.warn(s"更新:$collName" + i)
|
||||
i = 0
|
||||
}
|
||||
})
|
||||
if (i != 0) {
|
||||
arangoManger.overwrite(resultDocumentList, collName)
|
||||
LOG.warn(s"更新$collName:" + i)
|
||||
}
|
||||
})
|
||||
val last = System.currentTimeMillis()
|
||||
LOG.warn(s"更新$collName 时间:${last - start}")
|
||||
} catch {
|
||||
case e: Exception => e.printStackTrace()
|
||||
} finally {
|
||||
hisBc.destroy()
|
||||
}
|
||||
}
|
||||
|
||||
private def getVertexFqdnRow(row: Row, dictionaryMap: ConcurrentHashMap[String, BaseDocument]): BaseDocument = {
|
||||
val fqdn = row.getAs[String]("FQDN")
|
||||
val lastFoundTime = row.getAs[Long]("LAST_FOUND_TIME")
|
||||
val firstFoundTime = row.getAs[Long]("FIRST_FOUND_TIME")
|
||||
var document: BaseDocument = hisVerFqdnMapTmp.getOrDefault(fqdn,null)
|
||||
var document: BaseDocument = dictionaryMap.getOrDefault(fqdn, null)
|
||||
if (document != null) {
|
||||
updateMaxAttribute(document, lastFoundTime, "LAST_FOUND_TIME")
|
||||
} else {
|
||||
@@ -47,40 +92,10 @@ object UpdateDocument {
|
||||
document.addAttribute("FIRST_FOUND_TIME", firstFoundTime)
|
||||
document.addAttribute("LAST_FOUND_TIME", lastFoundTime)
|
||||
}
|
||||
resultDocumentList.add(document)
|
||||
i+=1
|
||||
if (i >= ApplicationConfig.UPDATE_ARANGO_BATCH){
|
||||
arangoManger.overwrite(resultDocumentList, "FQDN")
|
||||
LOG.warn("更新FQDN:" + i)
|
||||
i = 0
|
||||
}
|
||||
})
|
||||
if (i != 0) {
|
||||
arangoManger.overwrite(resultDocumentList, "FQDN")
|
||||
LOG.warn("更新FQDN:" + i)
|
||||
}
|
||||
})
|
||||
val last = System.currentTimeMillis()
|
||||
LOG.warn(s"更新FQDN时间:${last-start}")
|
||||
}catch {
|
||||
case e:Exception => e.printStackTrace()
|
||||
}finally {
|
||||
hisVerFqdnBc.destroy()
|
||||
}
|
||||
document
|
||||
}
|
||||
|
||||
def updateVertexIp(): Unit ={
|
||||
baseArangoData.readHistoryData("IP",historyVertexIpMap,classOf[BaseDocument])
|
||||
val hisVerIpBc = spark.sparkContext.broadcast(historyVertexIpMap)
|
||||
try {
|
||||
val start = System.currentTimeMillis()
|
||||
val mergeVertexIpDf = mergeVertexIp()
|
||||
mergeVertexIpDf.foreachPartition(iter => {
|
||||
val partitionId: Int = TaskContext.get.partitionId
|
||||
val hisVerIpMapTmp = hisVerIpBc.value.get(partitionId)
|
||||
val resultDocumentList: util.ArrayList[BaseDocument] = new util.ArrayList[BaseDocument]
|
||||
var i = 0
|
||||
iter.foreach(row => {
|
||||
private def getVertexIpRow(row: Row, dictionaryMap: ConcurrentHashMap[String, BaseDocument]): BaseDocument = {
|
||||
val ip = row.getAs[String]("IP")
|
||||
val firstFoundTime = row.getAs[Long]("FIRST_FOUND_TIME")
|
||||
val lastFoundTime = row.getAs[Long]("LAST_FOUND_TIME")
|
||||
@@ -89,7 +104,7 @@ object UpdateDocument {
|
||||
val ipTypeList = row.getAs[ofRef[String]]("ip_type_list")
|
||||
val sepAttributeTuple = separateAttributeByIpType(ipTypeList, sessionCountList, bytesSumList)
|
||||
|
||||
var document = hisVerIpMapTmp.getOrDefault(ip,null)
|
||||
var document = dictionaryMap.getOrDefault(ip, null)
|
||||
if (document != null) {
|
||||
updateMaxAttribute(document, lastFoundTime, "LAST_FOUND_TIME")
|
||||
updateSumAttribute(document, sepAttributeTuple._1, "SERVER_SESSION_COUNT")
|
||||
@@ -108,40 +123,10 @@ object UpdateDocument {
|
||||
document.addAttribute("CLIENT_BYTES_SUM", sepAttributeTuple._4)
|
||||
document.addAttribute("COMMON_LINK_INFO", "")
|
||||
}
|
||||
resultDocumentList.add(document)
|
||||
i+=1
|
||||
if (i >= ApplicationConfig.UPDATE_ARANGO_BATCH){
|
||||
arangoManger.overwrite(resultDocumentList, "IP")
|
||||
LOG.warn("更新IP:" + i)
|
||||
i = 0
|
||||
}
|
||||
})
|
||||
if (i != 0) {
|
||||
arangoManger.overwrite(resultDocumentList, "IP")
|
||||
LOG.warn("更新IP:" + i)
|
||||
}
|
||||
})
|
||||
val last = System.currentTimeMillis()
|
||||
LOG.warn(s"更新IP时间:${last-start}")
|
||||
}catch {
|
||||
case e:Exception => e.printStackTrace()
|
||||
}finally {
|
||||
hisVerIpBc.destroy()
|
||||
}
|
||||
document
|
||||
}
|
||||
|
||||
def updateRelationFqdnLocateIp(): Unit ={
|
||||
baseArangoData.readHistoryData("R_LOCATE_FQDN2IP", historyRelationFqdnAddressIpMap, classOf[BaseEdgeDocument])
|
||||
val hisReFqdnLocIpBc = spark.sparkContext.broadcast(historyRelationFqdnAddressIpMap)
|
||||
try {
|
||||
val start = System.currentTimeMillis()
|
||||
val mergeRelationFqdnLocateIpDf = mergeRelationFqdnLocateIp()
|
||||
mergeRelationFqdnLocateIpDf.foreachPartition(iter => {
|
||||
val partitionId: Int = TaskContext.get.partitionId
|
||||
val hisRelaFqdnLocaIpMapTmp = hisReFqdnLocIpBc.value.get(partitionId)
|
||||
val resultDocumentList: util.ArrayList[BaseEdgeDocument] = new util.ArrayList[BaseEdgeDocument]
|
||||
var i = 0
|
||||
iter.foreach(row => {
|
||||
private def getRelationFqdnLocateIpRow(row: Row, dictionaryMap: ConcurrentHashMap[String, BaseEdgeDocument]): BaseEdgeDocument = {
|
||||
val fqdn = row.getAs[String]("FQDN")
|
||||
val serverIp = row.getAs[String]("common_server_ip")
|
||||
val firstFoundTime = row.getAs[Long]("FIRST_FOUND_TIME")
|
||||
@@ -154,7 +139,7 @@ object UpdateDocument {
|
||||
val distinctIp: Array[String] = mergeDistinctIp(distCipRecent)
|
||||
|
||||
val key = fqdn.concat("-" + serverIp)
|
||||
var document: BaseEdgeDocument = hisRelaFqdnLocaIpMapTmp.getOrDefault(key,null)
|
||||
var document = dictionaryMap.getOrDefault(key, null)
|
||||
if (document != null) {
|
||||
updateMaxAttribute(document, lastFoundTime, "LAST_FOUND_TIME")
|
||||
updateProtocolAttritube(document, sepAttritubeMap)
|
||||
@@ -169,26 +154,7 @@ object UpdateDocument {
|
||||
putProtocolAttritube(document, sepAttritubeMap)
|
||||
putDistinctIp(document, distinctIp)
|
||||
}
|
||||
resultDocumentList.add(document)
|
||||
i+=1
|
||||
if (i >= ApplicationConfig.UPDATE_ARANGO_BATCH){
|
||||
arangoManger.overwrite(resultDocumentList, "R_LOCATE_FQDN2IP")
|
||||
LOG.warn("更新R_LOCATE_FQDN2IP:" + i)
|
||||
i = 0
|
||||
}
|
||||
})
|
||||
if (i != 0) {
|
||||
arangoManger.overwrite(resultDocumentList, "R_LOCATE_FQDN2IP")
|
||||
LOG.warn("更新R_LOCATE_FQDN2IP:" + i)
|
||||
}
|
||||
})
|
||||
val last = System.currentTimeMillis()
|
||||
LOG.warn(s"更新R_LOCATE_FQDN2IP时间:${last-start}")
|
||||
}catch {
|
||||
case e:Exception => e.printStackTrace()
|
||||
}finally {
|
||||
hisReFqdnLocIpBc.destroy()
|
||||
}
|
||||
document
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
package cn.ac.iie.test;
|
||||
|
||||
public class threadTest implements Runnable {
|
||||
private int count = 10;
|
||||
@Override
|
||||
public /*synchronized*/ void run() {
|
||||
count--;
|
||||
System.out.println(Thread.currentThread().getName() + " count = " + count);
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
threadTest t = new threadTest();
|
||||
for(int i=0; i<5; i++) {
|
||||
new Thread(t, "THREAD" + i).start();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user