格式化代码

This commit is contained in:
wanglihui
2021-08-02 18:49:43 +08:00
parent 51d2549902
commit 2f7cceb826
6 changed files with 27 additions and 143 deletions

View File

@@ -33,20 +33,21 @@ object BaseClickhouseData {
}
def getVertexFqdnDf: DataFrame = {
val where = "common_recv_time >= " + timeLimit._2 + " AND common_recv_time < " + timeLimit._1
val sql =
"""
s"""
|(SELECT
| FQDN,MAX( LAST_FOUND_TIME ) AS LAST_FOUND_TIME,MIN( FIRST_FOUND_TIME ) AS FIRST_FOUND_TIME
|FROM
| ((SELECT
| ssl_sni AS FQDN,MAX( common_recv_time ) AS LAST_FOUND_TIME,MIN( common_recv_time ) AS FIRST_FOUND_TIME
| FROM connection_record_log
| WHERE common_schema_type = 'SSL' GROUP BY ssl_sni
| WHERE $where and common_schema_type = 'SSL' GROUP BY ssl_sni
| )UNION ALL
| (SELECT
| http_host AS FQDN,MAX( common_recv_time ) AS LAST_FOUND_TIME,MIN( common_recv_time ) AS FIRST_FOUND_TIME
| FROM connection_record_log
| WHERE common_schema_type = 'HTTP' GROUP BY http_host))
| WHERE $where and common_schema_type = 'HTTP' GROUP BY http_host))
|GROUP BY FQDN HAVING FQDN != '') as dbtable
""".stripMargin
LOG.warn(sql)
@@ -164,7 +165,8 @@ object BaseClickhouseData {
val sql =
s"""
|(
|SELECT DISTINCT radius_framed_ip,common_recv_time as LAST_FOUND_TIME FROM radius_record_log WHERE $where
|SELECT radius_framed_ip,MAX(common_recv_time) as LAST_FOUND_TIME FROM radius_record_log WHERE $where
|GROUP BY radius_framed_ip
|)as dbtable
""".stripMargin
LOG.warn(sql)

View File

@@ -18,9 +18,9 @@ object MergeDataFrame {
def mergeVertexFqdn(): RDD[(String, (Option[BaseDocument], Row))] = {
val fqdnRddRow: RDD[(String, Row)] = BaseClickhouseData.getVertexFqdnDf
.rdd.filter(row => isDomain(row.getAs[String](0))).map(row => {
.repartition().rdd.filter(row => isDomain(row.getAs[String](0))).map(row => {
(row.getAs[String]("FQDN"), row)
}).partitionBy(new CustomPartitioner(ApplicationConfig.SPARK_SQL_SHUFFLE_PARTITIONS))
})/*.partitionBy(new CustomPartitioner(ApplicationConfig.SPARK_SQL_SHUFFLE_PARTITIONS))*/
val fqdnRddDoc: ArangoRdd[BaseDocument] = BaseArangoData.loadArangoRdd[BaseDocument]("FQDN")
@@ -29,7 +29,7 @@ object MergeDataFrame {
def mergeVertexIp(): RDD[(String, (Option[BaseDocument], Row))] = {
val vertexIpDf = BaseClickhouseData.getVertexIpDf
val frame = vertexIpDf.groupBy("IP").agg(
val frame = vertexIpDf.repartition().groupBy("IP").agg(
min("FIRST_FOUND_TIME").alias("FIRST_FOUND_TIME"),
max("LAST_FOUND_TIME").alias("LAST_FOUND_TIME"),
collect_list("SESSION_COUNT").alias("SESSION_COUNT_LIST"),
@@ -39,14 +39,15 @@ object MergeDataFrame {
)
val ipRddRow = frame.rdd.map(row => {
(row.getAs[String]("IP"), row)
}).partitionBy(new CustomPartitioner(ApplicationConfig.SPARK_SQL_SHUFFLE_PARTITIONS))
})/*.partitionBy(new CustomPartitioner(ApplicationConfig.SPARK_SQL_SHUFFLE_PARTITIONS))*/
val ipRddDoc = BaseArangoData.loadArangoRdd[BaseDocument]("IP")
ipRddDoc.map(doc => (doc.getKey, doc)).rightOuterJoin(ipRddRow)
}
def mergeRelationFqdnLocateIp(): RDD[(String, (Option[BaseEdgeDocument], Row))] = {
val frame = BaseClickhouseData.getRelationFqdnLocateIpDf.filter(row => isDomain(row.getAs[String]("FQDN")))
val frame = BaseClickhouseData.getRelationFqdnLocateIpDf
.repartition().filter(row => isDomain(row.getAs[String]("FQDN")))
.groupBy("FQDN", "common_server_ip")
.agg(
min("FIRST_FOUND_TIME").alias("FIRST_FOUND_TIME"),
@@ -60,7 +61,7 @@ object MergeDataFrame {
val serverIp = row.getAs[String]("common_server_ip")
val key = fqdn.concat("-" + serverIp)
(key, row)
}).partitionBy(new CustomPartitioner(ApplicationConfig.SPARK_SQL_SHUFFLE_PARTITIONS))
})/*.partitionBy(new CustomPartitioner(ApplicationConfig.SPARK_SQL_SHUFFLE_PARTITIONS))*/
val fqdnLocIpRddDoc = BaseArangoData.loadArangoRdd[BaseEdgeDocument]("R_LOCATE_FQDN2IP")
fqdnLocIpRddDoc.map(doc => (doc.getKey, doc)).rightOuterJoin(fqdnLocIpRddRow)

View File

@@ -2,10 +2,10 @@ package cn.ac.iie.service.update
import java.util
import scala.collection.JavaConversions._
import scala.collection.JavaConversions._
import cn.ac.iie.config.ApplicationConfig
import cn.ac.iie.service.read.ReadHistoryArangoData
import cn.ac.iie.dao.BaseClickhouseData
import com.arangodb.entity.{BaseDocument, BaseEdgeDocument}
import scala.collection.mutable
@@ -132,7 +132,7 @@ object UpdateDocHandler {
def putDistinctIp(doc: BaseEdgeDocument, newDistinctIp: Array[String]): Unit = {
val map = newDistinctIp.map(ip => {
(ip, ReadHistoryArangoData.currentHour)
(ip, BaseClickhouseData.currentHour)
}).toMap
doc.addAttribute("DIST_CIP", map.keys.toArray)
doc.addAttribute("DIST_CIP_TS", map.values.toArray)
@@ -146,7 +146,7 @@ object UpdateDocHandler {
val distCipToTsMap: Map[String, Long] = hisDistCip.zip(hisDistCipTs).toMap
val muDistCipToTsMap: mutable.Map[String, Long] = mutable.Map(distCipToTsMap.toSeq: _*)
newDistinctIp.foreach(cip => {
muDistCipToTsMap.put(cip, ReadHistoryArangoData.currentHour)
muDistCipToTsMap.put(cip, BaseClickhouseData.currentHour)
})
val resultMap = muDistCipToTsMap.toList.sortBy(-_._2).take(ApplicationConfig.DISTINCT_CLIENT_IP_NUM).toMap
hisDoc.addAttribute("DIST_CIP", resultMap.keys.toArray)

View File

@@ -21,11 +21,11 @@ object UpdateDocument {
try {
updateDocument("FQDN", getVertexFqdnRow, mergeVertexFqdn)
updateDocument("SUBSCRIBER", getVertexSubidRow, mergeVertexSubid)
// updateDocument("SUBSCRIBER", getVertexSubidRow, mergeVertexSubid)
insertFrameIp()
// insertFrameIp()
updateDocument("R_LOCATE_SUBSCRIBER2IP", getRelationSubidLocateIpRow, mergeRelationSubidLocateIp)
// updateDocument("R_LOCATE_SUBSCRIBER2IP", getRelationSubidLocateIpRow, mergeRelationSubidLocateIp)
updateDocument("R_LOCATE_FQDN2IP", getRelationFqdnLocateIpRow, mergeRelationFqdnLocateIp)

View File

@@ -1,11 +1,15 @@
package cn.ac.iie.spark.rdd
import java.util
import scala.collection.JavaConverters.asScalaIteratorConverter
import cn.ac.iie.config.ApplicationConfig
import cn.ac.iie.service.update.UpdateDocument
import cn.ac.iie.spark
import cn.ac.iie.spark.partition.QueryArangoPartition
import com.arangodb.ArangoCursor
import com.arangodb.model.AqlQueryOptions
import com.arangodb.util.MapBuilder
import org.apache.spark.{Partition, SparkContext, TaskContext}
import org.apache.spark.rdd.RDD
import org.slf4j.LoggerFactory
@@ -38,13 +42,15 @@ class ArangoRdd[T: ClassTag](@transient override val sparkContext: SparkContext,
var arangoCursor:ArangoCursor[T] = null
val arangoDB = spark.createArangoBuilder(options).build()
val bindVars: util.Map[String, AnyRef] = new MapBuilder().get
val queryOptions: AqlQueryOptions = new AqlQueryOptions().ttl(ApplicationConfig.ARANGODB_TTL)
try {
val offset = split.offset
val separate = split.separate
val collection = options.collection
val sql = s"FOR doc IN $collection limit $offset,$separate RETURN doc"
LOG.info(sql)
arangoCursor = arangoDB.db(options.database).query(sql,clazz.runtimeClass.asInstanceOf[Class[T]])
arangoCursor = arangoDB.db(options.database).query(sql,bindVars,queryOptions,clazz.runtimeClass.asInstanceOf[Class[T]])
}catch {
case e: Exception => LOG.error(s"创建Cursor异常:${e.getMessage}")
}finally {