修改读取arango时间方式,便于调试
This commit is contained in:
@@ -21,11 +21,19 @@ public class ApplicationConfig {
|
||||
public static final Long READ_CLICKHOUSE_MAX_TIME = ConfigUtils.getLongProperty("read.clickhouse.max.time");
|
||||
public static final Long READ_CLICKHOUSE_MIN_TIME = ConfigUtils.getLongProperty("read.clickhouse.min.time");
|
||||
|
||||
public static final Integer TIME_LIMIT_TYPE = ConfigUtils.getIntProperty("time.limit.type");
|
||||
public static final Integer CLICKHOUSE_TIME_LIMIT_TYPE = ConfigUtils.getIntProperty("clickhouse.time.limit.type");
|
||||
public static final Integer UPDATE_INTERVAL = ConfigUtils.getIntProperty("update.interval");
|
||||
|
||||
public static final Integer DISTINCT_CLIENT_IP_NUM = ConfigUtils.getIntProperty("distinct.client.ip.num");
|
||||
public static final Integer RECENT_COUNT_HOUR = ConfigUtils.getIntProperty("recent.count.hour");
|
||||
|
||||
public static final String TOP_DOMAIN_FILE_NAME = ConfigUtils.getStringProperty("top.domain.file.name");
|
||||
|
||||
public static final String ARANGODB_READ_LIMIT = ConfigUtils.getStringProperty("arangoDB.read.limit");
|
||||
|
||||
public static final Integer ARANGO_TIME_LIMIT_TYPE = ConfigUtils.getIntProperty("arango.time.limit.type");
|
||||
|
||||
public static final Long READ_ARANGO_MAX_TIME = ConfigUtils.getLongProperty("read.arango.max.time");
|
||||
public static final Long READ_ARANGO_MIN_TIME = ConfigUtils.getLongProperty("read.arango.min.time");
|
||||
|
||||
}
|
||||
|
||||
@@ -15,77 +15,89 @@ import java.util.concurrent.CountDownLatch;
|
||||
|
||||
/**
|
||||
* 获取arangoDB历史数据
|
||||
*
|
||||
* @author wlh
|
||||
*/
|
||||
public class BaseArangoData {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(BaseArangoData.class);
|
||||
|
||||
static ConcurrentHashMap<Integer,ConcurrentHashMap<String, BaseDocument>> historyVertexFqdnMap = new ConcurrentHashMap<>();
|
||||
static ConcurrentHashMap<Integer,ConcurrentHashMap<String, BaseDocument>> historyVertexIpMap = new ConcurrentHashMap<>();
|
||||
static ConcurrentHashMap<Integer,ConcurrentHashMap<String, BaseDocument>> historyVertexSubscriberMap = new ConcurrentHashMap<>();
|
||||
static ConcurrentHashMap<Integer,ConcurrentHashMap<String, BaseEdgeDocument>> historyRelationFqdnAddressIpMap = new ConcurrentHashMap<>();
|
||||
static ConcurrentHashMap<Integer,ConcurrentHashMap<String, BaseEdgeDocument>> historyRelationIpVisitFqdnMap = new ConcurrentHashMap<>();
|
||||
static ConcurrentHashMap<Integer,ConcurrentHashMap<String, BaseEdgeDocument>> historyRelationFqdnSameFqdnMap = new ConcurrentHashMap<>();
|
||||
static ConcurrentHashMap<Integer,ConcurrentHashMap<String, BaseEdgeDocument>> historyRelationSubsciberLocateIpMap = new ConcurrentHashMap<>();
|
||||
static ConcurrentHashMap<Integer, ConcurrentHashMap<String, BaseDocument>> historyVertexFqdnMap = new ConcurrentHashMap<>();
|
||||
static ConcurrentHashMap<Integer, ConcurrentHashMap<String, BaseDocument>> historyVertexIpMap = new ConcurrentHashMap<>();
|
||||
static ConcurrentHashMap<Integer, ConcurrentHashMap<String, BaseDocument>> historyVertexSubscriberMap = new ConcurrentHashMap<>();
|
||||
static ConcurrentHashMap<Integer, ConcurrentHashMap<String, BaseEdgeDocument>> historyRelationFqdnAddressIpMap = new ConcurrentHashMap<>();
|
||||
static ConcurrentHashMap<Integer, ConcurrentHashMap<String, BaseEdgeDocument>> historyRelationIpVisitFqdnMap = new ConcurrentHashMap<>();
|
||||
static ConcurrentHashMap<Integer, ConcurrentHashMap<String, BaseEdgeDocument>> historyRelationFqdnSameFqdnMap = new ConcurrentHashMap<>();
|
||||
static ConcurrentHashMap<Integer, ConcurrentHashMap<String, BaseEdgeDocument>> historyRelationSubsciberLocateIpMap = new ConcurrentHashMap<>();
|
||||
|
||||
private static ArangoDBConnect arangoDBConnect = ArangoDBConnect.getInstance();
|
||||
|
||||
private ExecutorThreadPool threadPool = ExecutorThreadPool.getInstance();
|
||||
|
||||
<T extends BaseDocument> void readHistoryData(String table, ConcurrentHashMap<Integer,ConcurrentHashMap<String, T>> map, Class<T> type){
|
||||
<T extends BaseDocument> void readHistoryData(String table,
|
||||
ConcurrentHashMap<Integer, ConcurrentHashMap<String, T>> historyMap,
|
||||
Class<T> type) {
|
||||
try {
|
||||
LOG.info("开始更新"+table);
|
||||
LOG.info("开始更新" + table);
|
||||
long start = System.currentTimeMillis();
|
||||
for (int i = 0; i < ApplicationConfig.THREAD_POOL_NUMBER; i++){
|
||||
map.put(i,new ConcurrentHashMap<>());
|
||||
for (int i = 0; i < ApplicationConfig.THREAD_POOL_NUMBER; i++) {
|
||||
historyMap.put(i, new ConcurrentHashMap<>());
|
||||
}
|
||||
CountDownLatch countDownLatch = new CountDownLatch(ApplicationConfig.THREAD_POOL_NUMBER);
|
||||
long[] timeRange = getTimeRange(table);
|
||||
for (int i = 0; i < ApplicationConfig.THREAD_POOL_NUMBER; i++) {
|
||||
String sql = getQuerySql(timeRange, i, table);
|
||||
ReadHistoryArangoData<T> readHistoryArangoData = new ReadHistoryArangoData<>(arangoDBConnect, sql, map,type,table,countDownLatch);
|
||||
ReadHistoryArangoData<T> readHistoryArangoData = new ReadHistoryArangoData<>(arangoDBConnect, sql, historyMap, type, table, countDownLatch);
|
||||
threadPool.executor(readHistoryArangoData);
|
||||
}
|
||||
countDownLatch.await();
|
||||
long last = System.currentTimeMillis();
|
||||
LOG.info("读取"+table+" arangoDB 共耗时:"+(last-start));
|
||||
}catch (Exception e){
|
||||
LOG.info("读取" + table + " arangoDB 共耗时:" + (last - start));
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private long[] getTimeRange(String table){
|
||||
private long[] getTimeRange(String table) {
|
||||
long minTime = 0L;
|
||||
long maxTime = 0L;
|
||||
long startTime = System.currentTimeMillis();
|
||||
String sql = "LET doc = (FOR doc IN "+table+" RETURN doc) return {max_time:MAX(doc[*].FIRST_FOUND_TIME),min_time:MIN(doc[*].FIRST_FOUND_TIME)}";
|
||||
ArangoCursor<BaseDocument> timeDoc = arangoDBConnect.executorQuery(sql, BaseDocument.class);
|
||||
try {
|
||||
if (timeDoc != null){
|
||||
while (timeDoc.hasNext()) {
|
||||
BaseDocument doc = timeDoc.next();
|
||||
maxTime = Long.parseLong(doc.getAttribute("max_time").toString()) + ApplicationConfig.THREAD_POOL_NUMBER;
|
||||
minTime = Long.parseLong(doc.getAttribute("min_time").toString());
|
||||
String sql = "LET doc = (FOR doc IN " + table + " RETURN doc) return {max_time:MAX(doc[*].FIRST_FOUND_TIME),min_time:MIN(doc[*].FIRST_FOUND_TIME)}";
|
||||
switch (ApplicationConfig.ARANGO_TIME_LIMIT_TYPE) {
|
||||
case 0:
|
||||
ArangoCursor<BaseDocument> timeDoc = arangoDBConnect.executorQuery(sql, BaseDocument.class);
|
||||
try {
|
||||
if (timeDoc != null) {
|
||||
while (timeDoc.hasNext()) {
|
||||
BaseDocument doc = timeDoc.next();
|
||||
maxTime = Long.parseLong(doc.getAttribute("max_time").toString()) + ApplicationConfig.THREAD_POOL_NUMBER;
|
||||
minTime = Long.parseLong(doc.getAttribute("min_time").toString());
|
||||
}
|
||||
} else {
|
||||
LOG.warn("获取ArangoDb时间范围为空");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
long lastTime = System.currentTimeMillis();
|
||||
LOG.info(sql+"\n查询最大最小时间用时:" + (lastTime - startTime));
|
||||
}else {
|
||||
LOG.warn("获取ArangoDb时间范围为空");
|
||||
}
|
||||
}catch (Exception e){
|
||||
e.printStackTrace();
|
||||
break;
|
||||
case 1:
|
||||
maxTime = ApplicationConfig.READ_ARANGO_MAX_TIME;
|
||||
minTime = ApplicationConfig.READ_ARANGO_MIN_TIME;
|
||||
break;
|
||||
default:
|
||||
}
|
||||
long lastTime = System.currentTimeMillis();
|
||||
LOG.info(sql + "\n查询最大最小时间用时:" + (lastTime - startTime));
|
||||
return new long[]{minTime, maxTime};
|
||||
|
||||
}
|
||||
|
||||
private String getQuerySql(long[] timeRange,int threadNumber,String table){
|
||||
private String getQuerySql(long[] timeRange, int threadNumber, String table) {
|
||||
long minTime = timeRange[0];
|
||||
long maxTime = timeRange[1];
|
||||
long diffTime = (maxTime - minTime) / ApplicationConfig.THREAD_POOL_NUMBER;
|
||||
long maxThreadTime = minTime + (threadNumber + 1)* diffTime;
|
||||
long maxThreadTime = minTime + (threadNumber + 1) * diffTime;
|
||||
long minThreadTime = minTime + threadNumber * diffTime;
|
||||
return "FOR doc IN "+table+" filter doc.FIRST_FOUND_TIME >= "+minThreadTime+" and doc.FIRST_FOUND_TIME <= "+maxThreadTime+" limit 100 RETURN doc";
|
||||
return "FOR doc IN " + table + " filter doc.FIRST_FOUND_TIME >= " + minThreadTime + " and doc.FIRST_FOUND_TIME <= " + maxThreadTime + " " + ApplicationConfig.ARANGODB_READ_LIMIT + " RETURN doc";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -36,10 +36,13 @@ public class BaseClickhouseData {
|
||||
private DruidPooledConnection connection;
|
||||
private Statement statement;
|
||||
|
||||
<T extends BaseDocument> void baseDocumentFromClickhouse(HashMap<Integer, HashMap<String, ArrayList<T>>> newMap, Supplier<String> getSqlSupplier, Function<ResultSet,T> formatResultFunc){
|
||||
<T extends BaseDocument> void baseDocumentFromClickhouse(HashMap<Integer, HashMap<String, ArrayList<T>>> newMap,
|
||||
Supplier<String> getSqlSupplier,
|
||||
Function<ResultSet,T> formatResultFunc){
|
||||
long start = System.currentTimeMillis();
|
||||
initializeMap(newMap);
|
||||
String sql = getSqlSupplier.get();
|
||||
LOG.info(sql);
|
||||
try {
|
||||
connection = manger.getConnection();
|
||||
statement = connection.createStatement();
|
||||
@@ -53,7 +56,7 @@ public class BaseClickhouseData {
|
||||
}
|
||||
}
|
||||
long last = System.currentTimeMillis();
|
||||
LOG.info(sql + "\n读取"+i+"条数据,运行时间:" + (last - start));
|
||||
LOG.info("读取"+i+"条数据,运行时间:" + (last - start));
|
||||
}catch (Exception e){
|
||||
e.printStackTrace();
|
||||
}finally {
|
||||
|
||||
@@ -4,12 +4,10 @@ import cn.ac.iie.config.ApplicationConfig;
|
||||
import cn.ac.iie.service.read.ReadClickhouseData;
|
||||
import cn.ac.iie.service.update.Document;
|
||||
import cn.ac.iie.service.update.relationship.LocateFqdn2Ip;
|
||||
import cn.ac.iie.service.update.relationship.LocateSubscriber2Ip;
|
||||
import cn.ac.iie.service.update.relationship.SameFqdn2Fqdn;
|
||||
import cn.ac.iie.service.update.relationship.VisitIp2Fqdn;
|
||||
import cn.ac.iie.service.update.vertex.Fqdn;
|
||||
import cn.ac.iie.service.update.vertex.Ip;
|
||||
import cn.ac.iie.service.update.vertex.Subscriber;
|
||||
import cn.ac.iie.utils.ArangoDBConnect;
|
||||
import cn.ac.iie.utils.ExecutorThreadPool;
|
||||
import com.arangodb.entity.BaseDocument;
|
||||
@@ -100,7 +98,7 @@ public class UpdateGraphData {
|
||||
String.class,
|
||||
ConcurrentHashMap.class,
|
||||
CountDownLatch.class);
|
||||
Document<T> docTask = (Document<T>)constructor.newInstance(tmpNewMap, arangoManger, collection, tmpHisMap, countDownLatch);
|
||||
Document docTask = (Document)constructor.newInstance(tmpNewMap, arangoManger, collection, tmpHisMap, countDownLatch);
|
||||
pool.executor(docTask);
|
||||
}
|
||||
countDownLatch.await();
|
||||
|
||||
@@ -25,8 +25,10 @@ public class ReadClickhouseData {
|
||||
|
||||
|
||||
private static long[] timeLimit = getTimeLimit();
|
||||
private static long maxTime = timeLimit[0];
|
||||
private static long minTime = timeLimit[1];
|
||||
public static final Integer DISTINCT_CLIENT_IP_NUM = ApplicationConfig.DISTINCT_CLIENT_IP_NUM;
|
||||
public static final Integer RECENT_COUNT_HOUR = ApplicationConfig.RECENT_COUNT_HOUR;
|
||||
static final Integer RECENT_COUNT_HOUR = ApplicationConfig.RECENT_COUNT_HOUR;
|
||||
public static final HashSet<String> PROTOCOL_SET;
|
||||
|
||||
static {
|
||||
@@ -278,8 +280,6 @@ public class ReadClickhouseData {
|
||||
}
|
||||
|
||||
public static String getVertexFqdnSql() {
|
||||
long maxTime = timeLimit[0];
|
||||
long minTime = timeLimit[1];
|
||||
String where = "recv_time >= "+minTime+" and recv_time <= "+maxTime;
|
||||
String mediaDomainSql = "SELECT s1_domain AS FQDN,MIN(recv_time) AS FIRST_FOUND_TIME,MAX(recv_time) AS LAST_FOUND_TIME FROM media_expire_patch WHERE "+where+" and s1_domain != '' GROUP BY s1_domain";
|
||||
String refererSql = "SELECT s1_referer AS FQDN,MIN(recv_time) AS FIRST_FOUND_TIME,MAX(recv_time) AS LAST_FOUND_TIME FROM media_expire_patch WHERE "+where+" and s1_referer != '' GROUP BY s1_referer";
|
||||
@@ -287,8 +287,6 @@ public class ReadClickhouseData {
|
||||
}
|
||||
|
||||
public static String getVertexIpSql() {
|
||||
long maxTime = timeLimit[0];
|
||||
long minTime = timeLimit[1];
|
||||
String where = " recv_time >= " + minTime + " AND recv_time < " + maxTime;
|
||||
String clientIpSql = "SELECT s1_s_ip AS IP, MIN(recv_time) AS FIRST_FOUND_TIME,MAX(recv_time) AS LAST_FOUND_TIME,count(*) as SESSION_COUNT,sum(media_len) as BYTES_SUM,'client' as ip_type FROM media_expire_patch where " + where + " group by IP";
|
||||
String serverIpSql = "SELECT s1_d_ip AS IP, MIN(recv_time) AS FIRST_FOUND_TIME,MAX(recv_time) AS LAST_FOUND_TIME,count(*) as SESSION_COUNT,sum(media_len) as BYTES_SUM,'server' as ip_type FROM media_expire_patch where " + where + " group by IP";
|
||||
@@ -296,36 +294,26 @@ public class ReadClickhouseData {
|
||||
}
|
||||
|
||||
public static String getRelationshipFqdnAddressIpSql() {
|
||||
long maxTime = timeLimit[0];
|
||||
long minTime = timeLimit[1];
|
||||
String where = "recv_time >= "+minTime+" and recv_time <= "+maxTime +" AND s1_domain != '' AND s1_d_ip != '' ";
|
||||
return "SELECT s1_domain AS FQDN,s1_d_ip AS common_server_ip,MIN( recv_time ) AS FIRST_FOUND_TIME,MAX( recv_time ) AS LAST_FOUND_TIME,COUNT( * ) AS COUNT_TOTAL,groupUniqArray("+DISTINCT_CLIENT_IP_NUM+")(s1_s_ip) AS DIST_CIP_RECENT FROM media_expire_patch WHERE "+where+" GROUP BY s1_d_ip,s1_domain";
|
||||
}
|
||||
|
||||
public static String getRelationshipFqdnSameFqdnSql(){
|
||||
long maxTime = timeLimit[0];
|
||||
long minTime = timeLimit[1];
|
||||
String where = "recv_time >= "+minTime+" and recv_time <= "+maxTime +" AND s1_domain != '' AND s1_referer != '' ";
|
||||
return "SELECT s1_domain AS domainFqdn,s1_referer AS referer,MIN(recv_time) AS FIRST_FOUND_TIME,MAX(recv_time) AS LAST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL FROM media_expire_patch where "+where+" GROUP BY s1_domain,s1_referer";
|
||||
}
|
||||
|
||||
public static String getRelationshipIpVisitFqdnSql() {
|
||||
long maxTime = timeLimit[0];
|
||||
long minTime = timeLimit[1];
|
||||
String where = "recv_time >= "+minTime+" and recv_time <= "+maxTime+" AND s1_s_ip != '' AND s1_domain != '' ";
|
||||
return "SELECT s1_s_ip AS common_client_ip,s1_domain AS FQDN,MIN( recv_time ) AS FIRST_FOUND_TIME,MAX( recv_time ) AS LAST_FOUND_TIME,COUNT( * ) AS COUNT_TOTAL FROM media_expire_patch WHERE "+where+" GROUP BY s1_s_ip,s1_domain";
|
||||
}
|
||||
|
||||
public static String getVertexSubscriberSql() {
|
||||
long maxTime = timeLimit[0];
|
||||
long minTime = timeLimit[1];
|
||||
String where = " common_recv_time >= " + minTime + " AND common_recv_time < " + maxTime + " AND common_subscriber_id != '' AND radius_packet_type = 4 AND radius_acct_status_type = 1";
|
||||
return "SELECT common_subscriber_id,MAX(common_recv_time) as LAST_FOUND_TIME,MIN(common_recv_time) as FIRST_FOUND_TIME FROM radius_record_log WHERE" + where + " GROUP BY common_subscriber_id";
|
||||
}
|
||||
|
||||
public static String getRelationshipSubsciberLocateIpSql() {
|
||||
long maxTime = timeLimit[0];
|
||||
long minTime = timeLimit[1];
|
||||
String where = " common_recv_time >= " + minTime + " AND common_recv_time < " + maxTime + " AND common_subscriber_id != '' AND radius_framed_ip != '' AND radius_packet_type = 4 AND radius_acct_status_type = 1";
|
||||
return "SELECT common_subscriber_id,radius_framed_ip,MAX(common_recv_time) as LAST_FOUND_TIME,MIN(common_recv_time) as FIRST_FOUND_TIME,COUNT(*) as COUNT_TOTAL FROM radius_record_log WHERE" + where + " GROUP BY common_subscriber_id,radius_framed_ip";
|
||||
}
|
||||
@@ -333,7 +321,7 @@ public class ReadClickhouseData {
|
||||
private static long[] getTimeLimit() {
|
||||
long maxTime = 0L;
|
||||
long minTime = 0L;
|
||||
switch (ApplicationConfig.TIME_LIMIT_TYPE) {
|
||||
switch (ApplicationConfig.CLICKHOUSE_TIME_LIMIT_TYPE) {
|
||||
case 0:
|
||||
maxTime = currentHour;
|
||||
minTime = maxTime - ApplicationConfig.UPDATE_INTERVAL;
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
package cn.ac.iie.utils;
|
||||
|
||||
import cn.ac.iie.config.ApplicationConfig;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.*;
|
||||
import java.net.URL;
|
||||
import java.util.HashMap;
|
||||
|
||||
public class TopDomainUtils {
|
||||
@@ -17,13 +16,13 @@ public class TopDomainUtils {
|
||||
String[] split = urlDomain.split("\\.");
|
||||
String secDomain = null;
|
||||
for (int i = split.length - 1; i >= 0; i--) {
|
||||
int maps_index = split.length - (i + 1);
|
||||
HashMap<String, String> innerMap = maps.get("map_id_" + maps_index);
|
||||
int mapsIndex = split.length - (i + 1);
|
||||
HashMap<String, String> innerMap = maps.get("map_id_" + mapsIndex);
|
||||
HashMap<String, String> fullTop = maps.get("full");
|
||||
if (!(innerMap.containsKey(split[i]))) {
|
||||
String strSec = "";
|
||||
StringBuilder strSec = new StringBuilder();
|
||||
for (int j = i; j < split.length; j++) {
|
||||
strSec += (split[j] + ".");
|
||||
strSec.append(split[j]).append(".");
|
||||
}
|
||||
secDomain = strSec.substring(0, strSec.length() - 1);
|
||||
if (fullTop.containsKey(getTopDomainFromSecDomain(secDomain))) {
|
||||
@@ -47,11 +46,25 @@ public class TopDomainUtils {
|
||||
return quFirstDian;
|
||||
}
|
||||
|
||||
public static HashMap<String, HashMap<String, String>> readTopDomainFile(String filePath) {
|
||||
HashMap<String, HashMap<String, String>> maps = makeHashMap(filePath);
|
||||
private static File getTopDomainFile(){
|
||||
URL url = TopDomainUtils.class.getClassLoader().getResource(ApplicationConfig.TOP_DOMAIN_FILE_NAME);
|
||||
File file = null;
|
||||
if (url!=null){
|
||||
file = new File(url.getFile());
|
||||
}
|
||||
if (file != null && file.isFile() && file.exists()){
|
||||
return file;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static HashMap<String, HashMap<String, String>> readTopDomainFile() {
|
||||
URL url = TopDomainUtils.class.getClassLoader().getResource(ApplicationConfig.TOP_DOMAIN_FILE_NAME);
|
||||
assert url != null;
|
||||
HashMap<String, HashMap<String, String>> maps = makeHashMap(url.getFile());
|
||||
try {
|
||||
String encoding = "UTF-8";
|
||||
File file = new File(filePath);
|
||||
File file = new File(url.getFile());
|
||||
if (file.isFile() && file.exists()) {
|
||||
InputStreamReader read = new InputStreamReader(
|
||||
new FileInputStream(file), encoding);
|
||||
@@ -63,15 +76,13 @@ public class TopDomainUtils {
|
||||
maps.put("full", fullTop);
|
||||
String[] split = lineTxt.split("\\.");
|
||||
for (int i = split.length - 1; i >= 0; i--) {
|
||||
int maps_index = split.length - (i + 1);
|
||||
HashMap<String, String> innerMap = maps.get("map_id_" + maps_index);
|
||||
int mapsIndex = split.length - (i + 1);
|
||||
HashMap<String, String> innerMap = maps.get("map_id_" + mapsIndex);
|
||||
innerMap.put(split[i], split[i]);
|
||||
maps.put("map_id_" + maps_index, innerMap);
|
||||
maps.put("map_id_" + mapsIndex, innerMap);
|
||||
}
|
||||
}
|
||||
read.close();
|
||||
} else {
|
||||
logger.error("TopDomainUtils>=>readTopDomainFile filePath is wrong--->{" + filePath + "}<---");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("TopDomainUtils>=>readTopDomainFile get filePathData error--->{" + e + "}<---");
|
||||
@@ -89,7 +100,7 @@ public class TopDomainUtils {
|
||||
InputStreamReader read = new InputStreamReader(
|
||||
new FileInputStream(file), encoding);
|
||||
BufferedReader bufferedReader = new BufferedReader(read);
|
||||
String lineTxt = null;
|
||||
String lineTxt;
|
||||
while ((lineTxt = bufferedReader.readLine()) != null) {
|
||||
String[] split = lineTxt.split("\\.");
|
||||
if (split.length > lengthDomain) {
|
||||
@@ -109,7 +120,7 @@ public class TopDomainUtils {
|
||||
|
||||
private static HashMap<String, HashMap<String, String>> makeHashMap(String filePath) {
|
||||
int maxLength = getMaxLength(filePath);
|
||||
HashMap<String, HashMap<String, String>> maps = new HashMap<String, HashMap<String, String>>();
|
||||
HashMap<String, HashMap<String, String>> maps = new HashMap<>();
|
||||
for (int i = 0; i < maxLength; i++) {
|
||||
maps.put("map_id_" + i, new HashMap<String, String>());
|
||||
}
|
||||
@@ -117,12 +128,16 @@ public class TopDomainUtils {
|
||||
return maps;
|
||||
}
|
||||
|
||||
//通用方法,传入url,返回domain,这里的domain不包含端口号,含有:一定是v6
|
||||
/**
|
||||
* 通用方法,传入url,返回domain,这里的domain不包含端口号,含有:一定是v6
|
||||
* @param oriUrl
|
||||
* @return
|
||||
*/
|
||||
public static String getDomainFromUrl(String oriUrl) {
|
||||
//先按照?切分,排除后续干扰
|
||||
String url = oriUrl.split("[?]")[0];
|
||||
//排除http://或https://干扰
|
||||
url = url.replaceAll("https://","").replaceAll("http://","");
|
||||
url = url.replaceAll("https://", "").replaceAll("http://", "");
|
||||
String domain;
|
||||
|
||||
//获取domain
|
||||
|
||||
@@ -16,10 +16,19 @@ thread.await.termination.time=10
|
||||
|
||||
|
||||
#读取clickhouse时间范围方式,0:读取过去一小时,1:指定时间范围
|
||||
time.limit.type=1
|
||||
clickhouse.time.limit.type=1
|
||||
read.clickhouse.max.time=1571245220
|
||||
read.clickhouse.min.time=1571245210
|
||||
|
||||
#读取arangoDB时间范围方式,0:正常读,1:指定时间范围
|
||||
arango.time.limit.type=1
|
||||
read.arango.max.time=1571245220
|
||||
read.arango.min.time=1571245210
|
||||
|
||||
update.interval=3600
|
||||
distinct.client.ip.num=10000
|
||||
recent.count.hour=24
|
||||
|
||||
top.domain.file.name=topDomain.txt
|
||||
|
||||
arangoDB.read.limit=
|
||||
8911
ip-learning-java-test/src/main/resources/topDomain.txt
Normal file
8911
ip-learning-java-test/src/main/resources/topDomain.txt
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,20 +1,30 @@
|
||||
package cn.ac.iie;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.URL;
|
||||
import java.util.TreeMap;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class TestReadLine {
|
||||
public static void main(String[] args) throws Exception {
|
||||
Pattern pattern = Pattern.compile("^[^.]*$");
|
||||
String encoding = "UTF-8";
|
||||
File file = new File("C:\\Users\\94976\\Desktop\\test.txt");
|
||||
// File file = new File("C:\\Users\\94976\\Desktop\\test.txt");
|
||||
URL url = TestReadLine.class.getClassLoader().getResource("topDomain.txt");
|
||||
assert url != null;
|
||||
File file = new File(url.getFile());
|
||||
InputStreamReader read = new InputStreamReader(
|
||||
new FileInputStream(file), encoding);
|
||||
BufferedReader bufferedReader = new BufferedReader(read);
|
||||
String lineTxt = null;
|
||||
long sum = 0L;
|
||||
String lineTxt;
|
||||
int cnt = 0;
|
||||
while ((lineTxt = bufferedReader.readLine()) != null){
|
||||
long num = Long.parseLong(lineTxt);
|
||||
sum = sum + num;
|
||||
if (pattern.matcher(lineTxt).matches()){
|
||||
cnt += 1;
|
||||
System.out.println(lineTxt);
|
||||
}
|
||||
}
|
||||
System.out.println(sum);
|
||||
System.out.println(cnt);
|
||||
System.out.println(url.getFile());
|
||||
}
|
||||
}
|
||||
|
||||
10
ip-learning-java-test/src/test/java/cn/ac/iie/TestRegex.java
Normal file
10
ip-learning-java-test/src/test/java/cn/ac/iie/TestRegex.java
Normal file
@@ -0,0 +1,10 @@
|
||||
package cn.ac.iie;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class TestRegex {
|
||||
public static void main(String[] args) {
|
||||
Pattern pattern = Pattern.compile("^[^.]*$");
|
||||
System.out.println(pattern.matcher("com.dz").matches());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user