diff --git a/IP-learning-graph/src/main/java/cn/ac/iie/dao/BaseClickhouseData.java b/IP-learning-graph/src/main/java/cn/ac/iie/dao/BaseClickhouseData.java index 0030470..6663b35 100644 --- a/IP-learning-graph/src/main/java/cn/ac/iie/dao/BaseClickhouseData.java +++ b/IP-learning-graph/src/main/java/cn/ac/iie/dao/BaseClickhouseData.java @@ -25,14 +25,14 @@ public class BaseClickhouseData { private static final ClickhouseConnect manger = ClickhouseConnect.getInstance(); private static HashMap> vFqdnMap = new HashMap<>(); private static HashMap> vIpMap = new HashMap<>(); - private static HashMap> eFqdnAddressIpMap = new HashMap<>(); - private static HashMap> eIpVisitFqdnMap = new HashMap<>(); + private static HashMap>> eFqdnAddressIpMap = new HashMap<>(); + private static HashMap>> eIpVisitFqdnMap = new HashMap<>(); private static long[] getTimeLimit() { -// long maxTime = System.currentTimeMillis() / 1000; -// long minTime = maxTime - 3600; - long maxTime = ApplicationConfig.READ_CLICKHOUSE_MAX_TIME; - long minTime = ApplicationConfig.READ_CLICKHOUSE_MIN_TIME; + long maxTime = System.currentTimeMillis() / 1000; + long minTime = maxTime - 3600; +// long maxTime = ApplicationConfig.READ_CLICKHOUSE_MAX_TIME; +// long minTime = ApplicationConfig.READ_CLICKHOUSE_MIN_TIME; return new long[]{maxTime, minTime}; } @@ -59,40 +59,35 @@ public class BaseClickhouseData { } public static void BaseVFqdn() { - long[] timeLimit = getTimeLimit(); - long maxTime = timeLimit[0]; - long minTime = timeLimit[1]; - String where = "common_recv_time >= " + minTime + " AND common_recv_time <= " + maxTime + " AND (common_schema_type = 'HTTP' or common_schema_type = 'SSL')"; - String sql = "SELECT common_schema_type,http_host,ssl_sni,MAX(common_recv_time) as LAST_FOUND_TIME,MIN(common_recv_time) as FIRST_FOUND_TIME FROM tsg_galaxy_v3.connection_record_log WHERE " + where + " GROUP BY common_schema_type,http_host,ssl_sni "; -// LOG.info(sql); + + String sql = getVFqdnSql(); long start = System.currentTimeMillis(); try { DruidPooledConnection connection = manger.getConnection(); Statement statement = connection.createStatement(); ResultSet resultSet = statement.executeQuery(sql); - HashSet fqdnSet = new HashSet<>(); +// HashSet fqdnSet = new HashSet<>(); while (resultSet.next()) { - String commonSchemaType = resultSet.getString("common_schema_type"); - String fqdnName = commonSchemaGetFqdn(commonSchemaType,resultSet); - if (!fqdnName.equals("") || !fqdnSet.contains(fqdnName)){ - fqdnSet.add(fqdnName); - long firstFoundTime = resultSet.getLong("FIRST_FOUND_TIME"); - long lastFoundTime = resultSet.getLong("LAST_FOUND_TIME"); - BaseDocument newDoc = new BaseDocument(); - newDoc.setKey(fqdnName); - newDoc.addAttribute("FQDN_NAME", fqdnName); - newDoc.addAttribute("FIRST_FOUND_TIME", firstFoundTime); - newDoc.addAttribute("LAST_FOUND_TIME", lastFoundTime); - int i = Math.abs(fqdnName.hashCode()) % ApplicationConfig.THREAD_POOL_NUMBER; - ArrayList documentList = vFqdnMap.getOrDefault(i, new ArrayList<>()); - documentList.add(newDoc); - } +// String commonSchemaType = resultSet.getString("common_schema_type"); +// String fqdnName = commonSchemaGetFqdn(commonSchemaType,resultSet); + String fqdnName = resultSet.getString("FQDN"); +// fqdnSet.add(fqdnName); + long firstFoundTime = resultSet.getLong("FIRST_FOUND_TIME"); + long lastFoundTime = resultSet.getLong("LAST_FOUND_TIME"); + BaseDocument newDoc = new BaseDocument(); + newDoc.setKey(fqdnName); + newDoc.addAttribute("FQDN_NAME", fqdnName); + newDoc.addAttribute("FIRST_FOUND_TIME", firstFoundTime); + newDoc.addAttribute("LAST_FOUND_TIME", lastFoundTime); + int i = Math.abs(fqdnName.hashCode()) % ApplicationConfig.THREAD_POOL_NUMBER; + ArrayList documentList = vFqdnMap.getOrDefault(i, new ArrayList<>()); + documentList.add(newDoc); } long last = System.currentTimeMillis(); - LOG.info(sql+"\n读取clickhouse v_FQDN时间:" + (last - start)); + LOG.info(sql + "\n读取clickhouse v_FQDN时间:" + (last - start)); for (int i = 0; i < ApplicationConfig.THREAD_POOL_NUMBER; i++) { ArrayList baseDocumentList = vFqdnMap.get(i); - LOG.info("vFqdn baseDocumentHashMap大小:"+baseDocumentList.size()); + LOG.info("vFqdn baseDocumentHashMap大小:" + baseDocumentList.size()); UpdateVFqdn updateVFqdn = new UpdateVFqdn(baseDocumentList); updateVFqdn.run(); } @@ -102,12 +97,7 @@ public class BaseClickhouseData { } public static void BaseVIp() { - long[] timeLimit = getTimeLimit(); - long maxTime = timeLimit[0]; - long minTime = timeLimit[1]; - String where = " common_recv_time >= " + minTime + " AND common_recv_time <= " + maxTime+ " AND (common_schema_type = 'HTTP' or common_schema_type = 'SSL')"; - String sql = "SELECT IP,location,MIN(common_recv_time) AS FIRST_FOUND_TIME,MAX(common_recv_time) AS LAST_FOUND_TIME,COUNT(*) AS IP_COUNT_TOTAL FROM(( SELECT common_client_ip AS IP, common_client_location AS location, common_recv_time FROM tsg_galaxy_v3.connection_record_log where "+where+" ) UNION ALL ( SELECT common_server_ip AS IP, common_server_location AS location, common_recv_time FROM tsg_galaxy_v3.connection_record_log where "+where+" )) GROUP BY IP,location"; -// LOG.info(sql); + String sql = getVIpSql(); long start = System.currentTimeMillis(); try { DruidPooledConnection connection = manger.getConnection(); @@ -119,10 +109,10 @@ public class BaseClickhouseData { String[] locationSplit = location.split(";"); String ipLocationNation; String ipLocationRegion; - if (locationSplit.length == 3){ + if (locationSplit.length == 3) { ipLocationNation = locationSplit[0]; ipLocationRegion = locationSplit[1]; - }else { + } else { ipLocationNation = location; ipLocationRegion = location; } @@ -132,7 +122,7 @@ public class BaseClickhouseData { newDoc.setKey(ip); newDoc.addAttribute("IP", ip); newDoc.addAttribute("IP_LOCATION_NATION", ipLocationNation); - newDoc.addAttribute("IP_LOCATION_REGION",ipLocationRegion); + newDoc.addAttribute("IP_LOCATION_REGION", ipLocationRegion); newDoc.addAttribute("FIRST_FOUND_TIME", firstFoundTime); newDoc.addAttribute("LAST_FOUND_TIME", lastFoundTime); int i = Math.abs(ip.hashCode()) % ApplicationConfig.THREAD_POOL_NUMBER; @@ -140,10 +130,10 @@ public class BaseClickhouseData { documentList.add(newDoc); } long last = System.currentTimeMillis(); - LOG.info(sql+"\n读取clickhouse v_IP时间:" + (last - start)); + LOG.info(sql + "\n读取clickhouse v_IP时间:" + (last - start)); for (int i = 0; i < ApplicationConfig.THREAD_POOL_NUMBER; i++) { ArrayList baseDocumentList = vIpMap.get(i); - LOG.info("vIp baseDocumentHashMap大小:"+baseDocumentList.size()); + LOG.info("vIp baseDocumentHashMap大小:" + baseDocumentList.size()); UpdateVIP updateVIp = new UpdateVIP(baseDocumentList); updateVIp.run(); } @@ -153,60 +143,46 @@ public class BaseClickhouseData { } public static void BaseEFqdnAddressIp() { - long[] timeLimit = getTimeLimit(); - long maxTime = timeLimit[0]; - long minTime = timeLimit[1]; - String where = " common_recv_time >= " + minTime + " AND common_recv_time <= " + maxTime+ " AND (common_schema_type = 'HTTP' or common_schema_type = 'SSL')"; - String sql = "SELECT common_schema_type,http_host,ssl_sni,common_server_ip,MAX(common_recv_time) as LAST_FOUND_TIME,MIN(common_recv_time) as FIRST_FOUND_TIME,COUNT(*) as COUNT_TOTAL,groupArray(30)(common_client_ip) as DIST_CIP_RECENT FROM tsg_galaxy_v3.connection_record_log WHERE "+where+" GROUP BY common_schema_type,http_host,ssl_sni,common_server_ip"; -// LOG.info(sql); + + String sql = getEFqdnAddressIpSql(); long start = System.currentTimeMillis(); try { DruidPooledConnection connection = manger.getConnection(); Statement statement = connection.createStatement(); ResultSet resultSet = statement.executeQuery(sql); - HashMap> schemaHashMap = new HashMap<>(); -// ArrayList baseEdgeDocuments = new ArrayList<>(); while (resultSet.next()) { String commonSchemaType = resultSet.getString("common_schema_type"); - String vFqdn = commonSchemaGetFqdn(commonSchemaType,resultSet); - if (!vFqdn.equals("")){ -// String vFqdn = resultSet.getString("http_host"); - String vIp = resultSet.getString("common_server_ip"); - long firstFoundTime = resultSet.getLong("FIRST_FOUND_TIME"); - long lastFoundTime = resultSet.getLong("LAST_FOUND_TIME"); - long countTotal = resultSet.getLong("COUNT_TOTAL"); - String[] distCipRecents = (String[]) resultSet.getArray("DIST_CIP_RECENT").getArray(); - String key = vFqdn + "-" + vIp; + String vFqdn = resultSet.getString("FQDN"); + String vIp = resultSet.getString("common_server_ip"); + long firstFoundTime = resultSet.getLong("FIRST_FOUND_TIME"); + long lastFoundTime = resultSet.getLong("LAST_FOUND_TIME"); + long countTotal = resultSet.getLong("COUNT_TOTAL"); + String[] distCipRecents = (String[]) resultSet.getArray("DIST_CIP_RECENT").getArray(); - HashMap map = schemaHashMap.getOrDefault(key, new HashMap<>()); - Long httpCount = map.getOrDefault(commonSchemaType, 0L); - map.put(commonSchemaType,httpCount+countTotal); - schemaHashMap.put(key,map); + String key = vFqdn + "-" + vIp; + BaseEdgeDocument newDoc = new BaseEdgeDocument(); + newDoc.setKey(key); + newDoc.setFrom("FQDN/" + vFqdn); + newDoc.setTo("IP/" + vIp); + newDoc.addAttribute("FIRST_FOUND_TIME", firstFoundTime); + newDoc.addAttribute("LAST_FOUND_TIME", lastFoundTime); + newDoc.addAttribute("COUNT_TOTAL", countTotal); + newDoc.addAttribute("DIST_CIP_RECENT", distCipRecents); + newDoc.addAttribute("DIST_CIP_TOTAL", distCipRecents); - BaseEdgeDocument newDoc = new BaseEdgeDocument(); - newDoc.setKey(key); - newDoc.setFrom("FQDN/" + vFqdn); - newDoc.setTo("IP/" + vIp); - newDoc.addAttribute("FIRST_FOUND_TIME", firstFoundTime); - newDoc.addAttribute("LAST_FOUND_TIME", lastFoundTime); - newDoc.addAttribute("TLS_CNT_TOTAL", map.getOrDefault("SSL",0L)); - newDoc.addAttribute("HTTP_CNT_TOTAL", map.getOrDefault("HTTP",0L)); - newDoc.addAttribute("DIST_CIP_RECENT", distCipRecents); - newDoc.addAttribute("DIST_CIP_TOTAL", distCipRecents); -// baseEdgeDocuments.add(newDoc); - int i = Math.abs(key.hashCode()) % ApplicationConfig.THREAD_POOL_NUMBER; - HashMap documentHashMap = eFqdnAddressIpMap.getOrDefault(i, new HashMap()); - documentHashMap.put(key, newDoc); - } + int hashMod = Math.abs(key.hashCode()) % ApplicationConfig.THREAD_POOL_NUMBER; + HashMap> documentHashMap = eFqdnAddressIpMap.getOrDefault(hashMod, new HashMap()); + + HashMap schemaHashMap = documentHashMap.getOrDefault(key, new HashMap<>()); + schemaHashMap.put(commonSchemaType, newDoc); + documentHashMap.put(key, schemaHashMap); } -// ArangoDBConnect.getInstance().insertAndUpdate(baseEdgeDocuments,null,"R_LOCATE_FQDN2IP"); - schemaHashMap.clear(); long last = System.currentTimeMillis(); - LOG.info(sql+"\n读取clickhouse EFqdnAddressIp时间:" + (last - start)); + LOG.info(sql + "\n读取clickhouse EFqdnAddressIp时间:" + (last - start)); for (int i = 0; i < ApplicationConfig.THREAD_POOL_NUMBER; i++) { - HashMap baseDocumentHashMap = eFqdnAddressIpMap.get(i); - LOG.info("EFqdnAddressIp baseDocumentHashMap大小:"+baseDocumentHashMap.size()); + HashMap> baseDocumentHashMap = eFqdnAddressIpMap.get(i); + LOG.info("EFqdnAddressIp baseDocumentHashMap大小:" + baseDocumentHashMap.size()); UpdateEFqdnAddressIp updateEFqdnAddressIp = new UpdateEFqdnAddressIp(baseDocumentHashMap); updateEFqdnAddressIp.run(); } @@ -216,50 +192,41 @@ public class BaseClickhouseData { } public static void BaseEIpVisitFqdn() { - long[] timeLimit = getTimeLimit(); - long maxTime = timeLimit[0]; - long minTime = timeLimit[1]; - String where = " common_recv_time >= " + minTime + " AND common_recv_time <= " + maxTime+ " AND (common_schema_type = 'HTTP' or common_schema_type = 'SSL')"; - String sql = "SELECT common_schema_type,http_host,ssl_sni,common_client_ip,MAX(common_recv_time) as LAST_FOUND_TIME,MIN(common_recv_time) as FIRST_FOUND_TIME,count(*) as COUNT_TOTAL FROM tsg_galaxy_v3.connection_record_log WHERE "+where+" GROUP BY common_schema_type,http_host,ssl_sni,common_client_ip"; -// LOG.info(sql); + String sql = getEIpVisitFqdnSql(); long start = System.currentTimeMillis(); try { DruidPooledConnection connection = manger.getConnection(); Statement statement = connection.createStatement(); ResultSet resultSet = statement.executeQuery(sql); - HashMap> schemaHashMap = new HashMap<>(); + while (resultSet.next()) { String commonSchemaType = resultSet.getString("common_schema_type"); String vIp = resultSet.getString("common_client_ip"); - String vFqdn = commonSchemaGetFqdn(commonSchemaType,resultSet); - if (!vFqdn.equals("")){ - String key = vIp +"-"+vFqdn; - long firstFoundTime = resultSet.getLong("FIRST_FOUND_TIME"); - long lastFoundTime = resultSet.getLong("LAST_FOUND_TIME"); - long countTotal = resultSet.getLong("COUNT_TOTAL"); - HashMap map = schemaHashMap.getOrDefault(key, new HashMap<>()); - Long httpCount = map.getOrDefault(commonSchemaType, 0L); - map.put(commonSchemaType,httpCount+countTotal); - schemaHashMap.put(key,map); - BaseEdgeDocument newDoc = new BaseEdgeDocument(); - newDoc.setKey(key); - newDoc.setFrom("IP/" + vIp); - newDoc.setTo("FQDN/" + vFqdn); - newDoc.addAttribute("FIRST_FOUND_TIME", firstFoundTime); - newDoc.addAttribute("LAST_FOUND_TIME", lastFoundTime); - newDoc.addAttribute("TLS_CNT_TOTAL", map.getOrDefault("SSL",0L)); - newDoc.addAttribute("HTTP_CNT_TOTAL", map.getOrDefault("HTTP",0L)); - int i = Math.abs(key.hashCode()) % ApplicationConfig.THREAD_POOL_NUMBER; - HashMap documentHashMap = eIpVisitFqdnMap.getOrDefault(i, new HashMap()); - documentHashMap.put(key, newDoc); - } + String vFqdn = resultSet.getString("FQDN"); + String key = vIp + "-" + vFqdn; + long firstFoundTime = resultSet.getLong("FIRST_FOUND_TIME"); + long lastFoundTime = resultSet.getLong("LAST_FOUND_TIME"); + long countTotal = resultSet.getLong("COUNT_TOTAL"); + + BaseEdgeDocument newDoc = new BaseEdgeDocument(); + newDoc.setKey(key); + newDoc.setFrom("IP/" + vIp); + newDoc.setTo("FQDN/" + vFqdn); + newDoc.addAttribute("FIRST_FOUND_TIME", firstFoundTime); + newDoc.addAttribute("LAST_FOUND_TIME", lastFoundTime); + newDoc.addAttribute("COUNT_TOTAL", countTotal); + int i = Math.abs(key.hashCode()) % ApplicationConfig.THREAD_POOL_NUMBER; + HashMap> documentHashMap = eIpVisitFqdnMap.getOrDefault(i, new HashMap()); + + HashMap schemaHashMap = documentHashMap.getOrDefault(key, new HashMap<>()); + schemaHashMap.put(commonSchemaType, newDoc); + documentHashMap.put(key, schemaHashMap); } - schemaHashMap.clear(); long last = System.currentTimeMillis(); - LOG.info(sql+"\n读取clickhouse EIpVisitFqdn时间:" + (last - start)); + LOG.info(sql + "\n读取clickhouse EIpVisitFqdn时间:" + (last - start)); for (int i = 0; i < ApplicationConfig.THREAD_POOL_NUMBER; i++) { - HashMap baseDocumentHashMap = eIpVisitFqdnMap.get(i); - LOG.info("EIpVisitFqdn baseDocumentHashMap大小:"+baseDocumentHashMap.size()); + HashMap> baseDocumentHashMap = eIpVisitFqdnMap.get(i); + LOG.info("EIpVisitFqdn baseDocumentHashMap大小:" + baseDocumentHashMap.size()); UpdateEIpVisitFqdn updateEIpVisitFqdn = new UpdateEIpVisitFqdn(baseDocumentHashMap); updateEIpVisitFqdn.run(); } @@ -268,10 +235,10 @@ public class BaseClickhouseData { } } - private static String commonSchemaGetFqdn(String commonSchemaType,ResultSet resultSet){ + private static String commonSchemaGetFqdn(String commonSchemaType, ResultSet resultSet) { String vFqdn = ""; try { - switch (commonSchemaType){ + switch (commonSchemaType) { case "HTTP": vFqdn = resultSet.getString("http_host"); break; @@ -281,36 +248,76 @@ public class BaseClickhouseData { default: LOG.warn("不支持该类型common_schema_type:" + commonSchemaType); } - }catch (Exception e){ + } catch (Exception e) { LOG.error(e.getMessage()); } - if (isDomain(vFqdn)){ + if (isDomain(vFqdn)) { return vFqdn; } return ""; } - private static boolean isDomain(String fqdn){ + private static boolean isDomain(String fqdn) { try { String[] fqdnArr = fqdn.split("\\."); - if (fqdnArr.length < 4 || fqdnArr.length > 4){ + if (fqdnArr.length < 4 || fqdnArr.length > 4) { return true; } Pattern pattern = Pattern.compile("^[\\d]*$"); - for (String f:fqdnArr){ - if (pattern.matcher(f).matches()){ + for (String f : fqdnArr) { + if (pattern.matcher(f).matches()) { int i = Integer.parseInt(f); - if (i < 0 || i > 255){ + if (i < 0 || i > 255) { return true; } - }else { + } else { return true; } } - }catch (Exception e){ - LOG.error("解析域名 "+fqdn+" 失败:\n"+e.toString()); + } catch (Exception e) { + LOG.error("解析域名 " + fqdn + " 失败:\n" + e.toString()); } return false; } + private static String getVFqdnSql() { + long[] timeLimit = getTimeLimit(); + long maxTime = timeLimit[0]; + long minTime = timeLimit[1]; + String where = "common_recv_time >= " + minTime + " AND common_recv_time <= " + maxTime; + String sslSql = "SELECT ssl_sni AS FQDN,MAX( common_recv_time ) AS LAST_FOUND_TIME,MIN( common_recv_time ) AS FIRST_FOUND_TIME FROM tsg_galaxy_v3.connection_record_log WHERE " + where + " and common_schema_type = 'SSL' GROUP BY ssl_sni"; + String httpSql = "SELECT http_host AS FQDN,MAX( common_recv_time ) AS LAST_FOUND_TIME,MIN( common_recv_time ) AS FIRST_FOUND_TIME FROM tsg_galaxy_v3.connection_record_log WHERE " + where + " and common_schema_type = 'HTTP' GROUP BY http_host"; + return "SELECT FQDN,MAX( LAST_FOUND_TIME ) AS LAST_FOUND_TIME,MIN( FIRST_FOUND_TIME ) AS FIRST_FOUND_TIME FROM ((" + sslSql + ") UNION ALL (" + httpSql + ")) GROUP BY FQDN HAVING FQDN != ''"; + } + + private static String getVIpSql() { + long[] timeLimit = getTimeLimit(); + long maxTime = timeLimit[0]; + long minTime = timeLimit[1]; + String where = " common_recv_time >= " + minTime + " AND common_recv_time <= " + maxTime + " AND (common_schema_type = 'HTTP' or common_schema_type = 'SSL')"; + String clientIpSql = "SELECT common_client_ip AS IP, common_client_location AS location, common_recv_time FROM tsg_galaxy_v3.connection_record_log where " + where; + String serverIpSql = "SELECT common_server_ip AS IP, common_server_location AS location, common_recv_time FROM tsg_galaxy_v3.connection_record_log where " + where; + return "SELECT IP,location,MIN(common_recv_time) AS FIRST_FOUND_TIME,MAX(common_recv_time) AS LAST_FOUND_TIME,COUNT(*) AS IP_COUNT_TOTAL FROM((" + clientIpSql + ") UNION ALL (" + serverIpSql + ")) GROUP BY IP,location"; + } + + private static String getEFqdnAddressIpSql() { + long[] timeLimit = getTimeLimit(); + long maxTime = timeLimit[0]; + long minTime = timeLimit[1]; + String where = " common_recv_time >= " + minTime + " AND common_recv_time <= " + maxTime; + String sslSql = "SELECT ssl_sni AS FQDN,common_server_ip,MAX(common_recv_time) AS LAST_FOUND_TIME,MIN(common_recv_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL,groupUniqArray(30)(common_client_ip) AS DIST_CIP_RECENT,'SSL' AS common_schema_type FROM tsg_galaxy_v3.connection_record_log WHERE " + where + " and common_schema_type = 'SSL' GROUP BY ssl_sni,common_server_ip"; + String httpSql = "SELECT http_host AS FQDN,common_server_ip,MAX(common_recv_time) AS LAST_FOUND_TIME,MIN(common_recv_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL,groupUniqArray(30)(common_client_ip) AS DIST_CIP_RECENT,'HTTP' AS common_schema_type FROM tsg_galaxy_v3.connection_record_log WHERE " + where + " and common_schema_type = 'HTTP' GROUP BY http_host,common_server_ip"; + return "SELECT * FROM ((" + sslSql + ") UNION ALL (" + httpSql + "))WHERE FQDN != ''"; + } + + private static String getEIpVisitFqdnSql() { + long[] timeLimit = getTimeLimit(); + long maxTime = timeLimit[0]; + long minTime = timeLimit[1]; + String where = " common_recv_time >= " + minTime + " AND common_recv_time <= " + maxTime; + String httpSql = "SELECT http_host AS FQDN,common_client_ip,MAX(common_recv_time) AS LAST_FOUND_TIME,MIN(common_recv_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL,'HTTP' AS common_schema_type FROM tsg_galaxy_v3.connection_record_log WHERE " + where + " and common_schema_type = 'HTTP' GROUP BY http_host,common_client_ip"; + String sslSql = "SELECT ssl_sni AS FQDN,common_client_ip,MAX(common_recv_time) AS LAST_FOUND_TIME,MIN(common_recv_time) AS FIRST_FOUND_TIME,COUNT(*) AS COUNT_TOTAL,'SSL' AS common_schema_type FROM tsg_galaxy_v3.connection_record_log WHERE common_schema_type = 'SSL' GROUP BY ssl_sni,common_client_ip"; + return "SELECT * FROM ((" + sslSql + ") UNION ALL (" + httpSql + "))WHERE FQDN != ''"; + } + } diff --git a/IP-learning-graph/src/main/java/cn/ac/iie/etl/BaseUpdateEtl.java b/IP-learning-graph/src/main/java/cn/ac/iie/etl/BaseUpdateEtl.java new file mode 100644 index 0000000..75f7383 --- /dev/null +++ b/IP-learning-graph/src/main/java/cn/ac/iie/etl/BaseUpdateEtl.java @@ -0,0 +1,138 @@ +package cn.ac.iie.etl; + +import com.arangodb.entity.BaseEdgeDocument; + +import java.util.*; + +public class BaseUpdateEtl { + + public static BaseEdgeDocument mergeFqdn2IpBySchema(HashMap newEdgeDocumentSchemaMap){ + + BaseEdgeDocument newBaseEdgeDocument = new BaseEdgeDocument(); + Set schemaSets = newEdgeDocumentSchemaMap.keySet(); + for (String schema : schemaSets) { + BaseEdgeDocument schemaEdgeDoc = newEdgeDocumentSchemaMap.get(schema); + setSchemaCnt(schema,schemaEdgeDoc,newBaseEdgeDocument); + if (newBaseEdgeDocument.getKey() != null){ + Map properties = newBaseEdgeDocument.getProperties(); + setFoundTime(properties,schemaEdgeDoc); + setDistinctClientIpBySchema(properties,schemaEdgeDoc); + }else { + Map properties = schemaEdgeDoc.getProperties(); + properties.remove("COUNT_TOTAL"); + newBaseEdgeDocument = schemaEdgeDoc; + } + } + return newBaseEdgeDocument; + } + + public static BaseEdgeDocument mergeIp2FqdnBySchema(HashMap newEdgeDocumentMap){ + BaseEdgeDocument newBaseEdgeDocument = new BaseEdgeDocument(); + Set schemaSets = newEdgeDocumentMap.keySet(); + for (String schema : schemaSets) { + BaseEdgeDocument schemaEdgeDoc = newEdgeDocumentMap.get(schema); + setSchemaCnt(schema,schemaEdgeDoc,newBaseEdgeDocument); + if (newBaseEdgeDocument.getKey() != null){ + Map properties = newBaseEdgeDocument.getProperties(); + setFoundTime(properties,schemaEdgeDoc); + }else { + Map properties = schemaEdgeDoc.getProperties(); + properties.remove("COUNT_TOTAL"); + newBaseEdgeDocument = schemaEdgeDoc; + } + } + return newBaseEdgeDocument; + } + + public static void mergeIp2FqdnByHistory(BaseEdgeDocument newEdgeDocument,BaseEdgeDocument edgeDocument){ + updateCommonProperty(newEdgeDocument,edgeDocument); + } + + public static void mergeFqdn2IpByHistory(BaseEdgeDocument newEdgeDocument,BaseEdgeDocument edgeDocument){ + updateCommonProperty(newEdgeDocument,edgeDocument); + setDistinctClientIpByHistory(newEdgeDocument,edgeDocument); + } + + private static void setDistinctClientIpByHistory(BaseEdgeDocument newEdgeDocument,BaseEdgeDocument edgeDocument){ + ArrayList distCipTotal = (ArrayList) edgeDocument.getAttribute("DIST_CIP_TOTAL"); + String[] distCipTotalsSrc = distCipTotal.toArray(new String[distCipTotal.size()]); + + String[] distCipRecentsSrc = (String[]) newEdgeDocument.getAttribute("DIST_CIP_RECENT"); + if (distCipTotalsSrc.length == 30) { + Object[] distCipTotals = mergeClientIp(distCipTotalsSrc, distCipRecentsSrc); + edgeDocument.addAttribute("DIST_CIP_TOTAL", distCipTotals); + } + edgeDocument.addAttribute("DIST_CIP_RECENT", distCipRecentsSrc); + } + + private static void updateCommonProperty(BaseEdgeDocument newEdgeDocument,BaseEdgeDocument edgeDocument){ + Object lastFoundTime = newEdgeDocument.getAttribute("LAST_FOUND_TIME"); + edgeDocument.addAttribute("LAST_FOUND_TIME", lastFoundTime); + + setSchemaCntByHistory(edgeDocument,"TLS_CNT_RECENT","TLS_CNT_TOTAL",newEdgeDocument); + setSchemaCntByHistory(edgeDocument,"HTTP_CNT_RECENT","HTTP_CNT_TOTAL",newEdgeDocument); + + } + + private static void setSchemaCntByHistory(BaseEdgeDocument edgeDocument,String schema,String totalSchema,BaseEdgeDocument newEdgeDocument){ + long countTotal = Long.parseLong(newEdgeDocument.getAttribute(totalSchema).toString()); + long updateCountTotal = Long.parseLong(edgeDocument.getAttribute(totalSchema).toString()); + + ArrayList cntRecent = (ArrayList) edgeDocument.getAttribute(schema); + Long[] cntRecentsSrc = cntRecent.toArray(new Long[cntRecent.size()]); + Long[] cntRecentsDst = new Long[7]; + System.arraycopy(cntRecentsSrc, 0, cntRecentsDst, 1, cntRecentsSrc.length - 1); + cntRecentsDst[0] = countTotal; + + edgeDocument.addAttribute(schema, cntRecentsDst); + edgeDocument.addAttribute(totalSchema, countTotal + updateCountTotal); + } + + private static Object[] mergeClientIp(String[] distCipTotalsSrc,String[] distCipRecentsSrc){ + HashSet dIpSet = new HashSet<>(); + dIpSet.addAll(Arrays.asList(distCipRecentsSrc)); + dIpSet.addAll(Arrays.asList(distCipTotalsSrc)); + Object[] distCipTotals = dIpSet.toArray(); + if (distCipTotals.length > 30) { + System.arraycopy(distCipTotals, 0, distCipTotals, 0, 30); + } + return distCipTotals; + } + + private static void setDistinctClientIpBySchema(Map properties,BaseEdgeDocument schemaEdgeDoc){ + String[] schemaDistCipRecents = (String[]) schemaEdgeDoc.getAttribute("DIST_CIP_RECENT"); + String[] distCipRecents = (String[]) properties.get("DIST_CIP_RECENT"); + Object[] mergeClientIp = mergeClientIp(schemaDistCipRecents, distCipRecents); + properties.put("DIST_CIP_RECENT",mergeClientIp); + properties.put("DIST_CIP_TOTAL",mergeClientIp); + } + + private static void setFoundTime(Map properties,BaseEdgeDocument schemaEdgeDoc){ + long schemaFirstFoundTime = Long.parseLong(schemaEdgeDoc.getAttribute("FIRST_FOUND_TIME").toString()); + long firstFoundTime = Long.parseLong(properties.get("FIRST_FOUND_TIME").toString()); + properties.put("FIRST_FOUND_TIME",schemaFirstFoundTimelastFoundTime?schemaLastFoundTime:lastFoundTime); + } + + private static void setSchemaCnt(String schema,BaseEdgeDocument schemaEdgeDoc,BaseEdgeDocument newBaseEdgeDocument){ + switch (schema) { + case "HTTP": + long httpCntTotal = Long.parseLong(schemaEdgeDoc.getAttribute("COUNT_TOTAL").toString()); + newBaseEdgeDocument.addAttribute("HTTP_CNT_TOTAL", httpCntTotal); + long[] httpCntRecentsDst = new long[7]; + httpCntRecentsDst[0] = httpCntTotal; + newBaseEdgeDocument.addAttribute("HTTP_CNT_RECENT", httpCntRecentsDst); + break; + case "SSL": + long tlsCntTotal = Long.parseLong(schemaEdgeDoc.getAttribute("COUNT_TOTAL").toString()); + newBaseEdgeDocument.addAttribute("TLS_CNT_TOTAL", tlsCntTotal); + long[] tlsCntRecentsDst = new long[7]; + tlsCntRecentsDst[0] = tlsCntTotal; + newBaseEdgeDocument.addAttribute("TLS_CNT_RECENT", tlsCntRecentsDst); + break; + } + } + +} diff --git a/IP-learning-graph/src/main/java/cn/ac/iie/etl/fqdn2ip/UpdateEFqdnAddressIp.java b/IP-learning-graph/src/main/java/cn/ac/iie/etl/fqdn2ip/UpdateEFqdnAddressIp.java index 6ae4401..58502a8 100644 --- a/IP-learning-graph/src/main/java/cn/ac/iie/etl/fqdn2ip/UpdateEFqdnAddressIp.java +++ b/IP-learning-graph/src/main/java/cn/ac/iie/etl/fqdn2ip/UpdateEFqdnAddressIp.java @@ -2,6 +2,7 @@ package cn.ac.iie.etl.fqdn2ip; import cn.ac.iie.config.ApplicationConfig; import cn.ac.iie.dao.BaseArangoData; +import cn.ac.iie.etl.BaseUpdateEtl; import cn.ac.iie.utils.ArangoDBConnect; import com.arangodb.entity.BaseEdgeDocument; import org.slf4j.Logger; @@ -11,11 +12,11 @@ import java.util.*; public class UpdateEFqdnAddressIp implements Runnable { private static final Logger LOG = LoggerFactory.getLogger(UpdateEFqdnAddressIp.class); - private HashMap documentHashMap; + private HashMap> documentHashMap; private static final ArangoDBConnect arangoManger = ArangoDBConnect.getInstance(); - public UpdateEFqdnAddressIp(HashMap documentHashMap) { + public UpdateEFqdnAddressIp(HashMap> documentHashMap) { this.documentHashMap = documentHashMap; } @@ -23,87 +24,29 @@ public class UpdateEFqdnAddressIp implements Runnable { public void run() { Set keySet = documentHashMap.keySet(); ArrayList docInsert = new ArrayList<>(); - ArrayList docUpdate = new ArrayList<>(); int i = 0; try { for (String key : keySet) { - BaseEdgeDocument newEdgeDocument = documentHashMap.getOrDefault(key, null); - if (newEdgeDocument != null) { + HashMap newEdgeDocumentSchmeaMap = documentHashMap.getOrDefault(key, null); + if (newEdgeDocumentSchmeaMap != null) { + BaseEdgeDocument newEdgeDocument = BaseUpdateEtl.mergeFqdn2IpBySchema(newEdgeDocumentSchmeaMap); i += 1; BaseEdgeDocument edgeDocument = BaseArangoData.e_Fqdn_Address_Ip_Map.getOrDefault(key, null); - - Object lastFoundTime = newEdgeDocument.getAttribute("LAST_FOUND_TIME"); - long tlsCountTotal = Long.parseLong(newEdgeDocument.getAttribute("TLS_CNT_TOTAL").toString()); - long httpCountTotal = Long.parseLong(newEdgeDocument.getAttribute("HTTP_CNT_TOTAL").toString()); - if (edgeDocument != null) { - long tlsUpdateCountTotal = Long.parseLong(edgeDocument.getAttribute("TLS_CNT_TOTAL").toString()); - long httpUpdateCountTotal = Long.parseLong(edgeDocument.getAttribute("HTTP_CNT_TOTAL").toString()); - - edgeDocument.addAttribute("LAST_FOUND_TIME", lastFoundTime); - edgeDocument.addAttribute("TLS_CNT_TOTAL", tlsCountTotal + tlsUpdateCountTotal); - edgeDocument.addAttribute("HTTP_CNT_TOTAL", httpCountTotal + httpUpdateCountTotal); - - ArrayList tlsCntRecent = (ArrayList) edgeDocument.getAttribute("TLS_CNT_RECENT"); - Long[] tlsCntRecentsSrc = tlsCntRecent.toArray(new Long[tlsCntRecent.size()]); -// Long[] tlsCntRecentsSrc = (Long[]) edgeDocument.getAttribute("TLS_CNT_RECENT"); - Long[] tlsCntRecentsDst = new Long[7]; - System.arraycopy(tlsCntRecentsSrc, 0, tlsCntRecentsDst, 1, tlsCntRecentsSrc.length - 1); - tlsCntRecentsDst[0] = tlsCountTotal; - edgeDocument.addAttribute("TLS_CNT_RECENT", tlsCntRecentsDst); - - ArrayList httpCntRecent = (ArrayList) edgeDocument.getAttribute("HTTP_CNT_RECENT"); - Long[] httpCntRecentsSrc = httpCntRecent.toArray(new Long[httpCntRecent.size()]); -// Long[] httpCntRecentsSrc = (Long[]) edgeDocument.getAttribute("HTTP_CNT_RECENT"); - Long[] httpCntRecentsDst = new Long[7]; - System.arraycopy(httpCntRecentsSrc, 0, httpCntRecentsDst, 1, httpCntRecentsDst.length - 1); - httpCntRecentsDst[0] = httpCountTotal; - edgeDocument.addAttribute("HTTP_CNT_RECENT", httpCntRecentsDst); - - ArrayList distCipTotal = (ArrayList) edgeDocument.getAttribute("DIST_CIP_TOTAL"); - String[] distCipTotalsSrc = distCipTotal.toArray(new String[distCipTotal.size()]); -// String[] distCipTotalsSrc = (String[]) edgeDocument.getAttribute("DIST_CIP_TOTAL"); - -// ArrayList distCipRecent = (ArrayList) newEdgeDocument.getAttribute("DIST_CIP_RECENT"); -// String[] distCipRecentsSrc = distCipRecent.toArray(new String[distCipRecent.size()]); - String[] distCipRecentsSrc = (String[]) newEdgeDocument.getAttribute("DIST_CIP_RECENT"); - - if (distCipTotalsSrc.length == 30) { - HashSet dIpSet = new HashSet<>(); - dIpSet.addAll(Arrays.asList(distCipRecentsSrc)); - dIpSet.addAll(Arrays.asList(distCipTotalsSrc)); - Object[] distCipTotals = dIpSet.toArray(); - if (distCipTotals.length > 30) { - System.arraycopy(distCipTotals, 0, distCipTotals, 0, 30); - } - edgeDocument.addAttribute("DIST_CIP_TOTAL", distCipTotals); - } - edgeDocument.addAttribute("DIST_CIP_RECENT", distCipRecentsSrc); - -// docUpdate.add(edgeDocument); + BaseUpdateEtl.mergeFqdn2IpByHistory(newEdgeDocument,edgeDocument); docInsert.add(edgeDocument); } else { - long[] tlsCntRecentsDst = new long[7]; - tlsCntRecentsDst[0] = tlsCountTotal; - newEdgeDocument.addAttribute("TLS_CNT_RECENT", tlsCntRecentsDst); - - long[] httpCntRecentsDst = new long[7]; - httpCntRecentsDst[0] = httpCountTotal; - newEdgeDocument.addAttribute("HTTP_CNT_RECENT", httpCntRecentsDst); - docInsert.add(newEdgeDocument); } if (i >= ApplicationConfig.UPDATE_ARANGO_BATCH) { -// arangoManger.insertAndUpdate(docInsert, docUpdate, "R_LOCATE_FQDN2IP"); - arangoManger.overwrite(docInsert,"R_LOCATE_FQDN2IP"); + arangoManger.overwrite(docInsert, "R_LOCATE_FQDN2IP"); LOG.info("更新R_LOCATE_FQDN2IP:" + i); i = 0; } } } if (i != 0) { -// arangoManger.insertAndUpdate(docInsert, docUpdate, "R_LOCATE_FQDN2IP"); - arangoManger.overwrite(docInsert,"R_LOCATE_FQDN2IP"); + arangoManger.overwrite(docInsert, "R_LOCATE_FQDN2IP"); LOG.info("更新R_LOCATE_FQDN2IP:" + i); } } catch (Exception e) { diff --git a/IP-learning-graph/src/main/java/cn/ac/iie/etl/ip2fqdn/UpdateEIpVisitFqdn.java b/IP-learning-graph/src/main/java/cn/ac/iie/etl/ip2fqdn/UpdateEIpVisitFqdn.java index 90ee673..c741667 100644 --- a/IP-learning-graph/src/main/java/cn/ac/iie/etl/ip2fqdn/UpdateEIpVisitFqdn.java +++ b/IP-learning-graph/src/main/java/cn/ac/iie/etl/ip2fqdn/UpdateEIpVisitFqdn.java @@ -2,6 +2,7 @@ package cn.ac.iie.etl.ip2fqdn; import cn.ac.iie.config.ApplicationConfig; import cn.ac.iie.dao.BaseArangoData; +import cn.ac.iie.etl.BaseUpdateEtl; import cn.ac.iie.utils.ArangoDBConnect; import com.arangodb.entity.BaseEdgeDocument; import org.slf4j.Logger; @@ -13,11 +14,11 @@ import java.util.Set; public class UpdateEIpVisitFqdn implements Runnable { private static final Logger LOG = LoggerFactory.getLogger(UpdateEIpVisitFqdn.class); - private HashMap documentHashMap; + private HashMap> documentHashMap; private static final ArangoDBConnect arangoManger = ArangoDBConnect.getInstance(); - public UpdateEIpVisitFqdn(HashMap documentHashMap) { + public UpdateEIpVisitFqdn(HashMap> documentHashMap) { this.documentHashMap = documentHashMap; } @@ -25,60 +26,23 @@ public class UpdateEIpVisitFqdn implements Runnable { public void run() { Set keySet = documentHashMap.keySet(); ArrayList docInsert = new ArrayList<>(); - ArrayList docUpdate = new ArrayList<>(); int i = 0; try { for (String key : keySet) { - - BaseEdgeDocument newEdgeDocument = documentHashMap.getOrDefault(key, null); - if (newEdgeDocument != null) { + HashMap newEdgeDocumentMap = documentHashMap.getOrDefault(key, null); + if (newEdgeDocumentMap != null) { + BaseEdgeDocument newEdgeDocument = BaseUpdateEtl.mergeIp2FqdnBySchema(newEdgeDocumentMap); i += 1; BaseEdgeDocument edgeDocument = BaseArangoData.e_Ip_Visit_Fqdn_Map.getOrDefault(key, null); - Object lastFoundTime = newEdgeDocument.getAttribute("LAST_FOUND_TIME"); - long tlsCountTotal = Long.parseLong(newEdgeDocument.getAttribute("TLS_CNT_TOTAL").toString()); - long httpCountTotal = Long.parseLong(newEdgeDocument.getAttribute("HTTP_CNT_TOTAL").toString()); - if (edgeDocument != null) { - long tlsUpdateCountTotal = Long.parseLong(edgeDocument.getAttribute("TLS_CNT_TOTAL").toString()); - long httpUpdateCountTotal = Long.parseLong(edgeDocument.getAttribute("HTTP_CNT_TOTAL").toString()); - - edgeDocument.addAttribute("LAST_FOUND_TIME", lastFoundTime); - edgeDocument.addAttribute("TLS_CNT_TOTAL", tlsCountTotal + tlsUpdateCountTotal); - edgeDocument.addAttribute("HTTP_CNT_TOTAL", httpCountTotal + httpUpdateCountTotal); - - ArrayList tlsCntRecent = (ArrayList) edgeDocument.getAttribute("TLS_CNT_RECENT"); - Long[] tlsCntRecentsSrc = tlsCntRecent.toArray(new Long[tlsCntRecent.size()]); -// Long[] tlsCntRecentsSrc = (Long[]) edgeDocument.getAttribute("TLS_CNT_RECENT"); - Long[] tlsCntRecentsDst = new Long[7]; - System.arraycopy(tlsCntRecentsSrc, 0, tlsCntRecentsDst, 1, tlsCntRecentsSrc.length - 1); - tlsCntRecentsDst[0] = tlsCountTotal; - edgeDocument.addAttribute("TLS_CNT_RECENT", tlsCntRecentsDst); - - ArrayList httpCntRecent = (ArrayList) edgeDocument.getAttribute("HTTP_CNT_RECENT"); - Long[] httpCntRecentsSrc = httpCntRecent.toArray(new Long[httpCntRecent.size()]); -// Long[] httpCntRecentsSrc = (Long[]) edgeDocument.getAttribute("HTTP_CNT_RECENT"); - Long[] httpCntRecentsDst = new Long[7]; - System.arraycopy(httpCntRecentsSrc, 0, httpCntRecentsDst, 1, httpCntRecentsDst.length - 1); - httpCntRecentsDst[0] = httpCountTotal; - edgeDocument.addAttribute("HTTP_CNT_RECENT", httpCntRecentsDst); - -// docUpdate.add(edgeDocument); + BaseUpdateEtl.mergeIp2FqdnByHistory(newEdgeDocument,edgeDocument); docInsert.add(edgeDocument); } else { - long[] tlsCntRecentsDst = new long[7]; - tlsCntRecentsDst[0] = tlsCountTotal; - newEdgeDocument.addAttribute("TLS_CNT_RECENT", tlsCntRecentsDst); - - long[] httpCntRecentsDst = new long[7]; - httpCntRecentsDst[0] = httpCountTotal; - newEdgeDocument.addAttribute("HTTP_CNT_RECENT", httpCntRecentsDst); - docInsert.add(newEdgeDocument); } if (i >= ApplicationConfig.UPDATE_ARANGO_BATCH) { -// arangoManger.insertAndUpdate(docInsert, docUpdate, "R_VISIT_IP2FQDN"); arangoManger.overwrite(docInsert,"R_VISIT_IP2FQDN"); LOG.info("更新R_VISIT_IP2FQDN:" + i); i = 0; @@ -86,7 +50,6 @@ public class UpdateEIpVisitFqdn implements Runnable { } } if (i != 0) { -// arangoManger.insertAndUpdate(docInsert, docUpdate, "R_VISIT_IP2FQDN"); arangoManger.overwrite(docInsert,"R_VISIT_IP2FQDN"); LOG.info("更新R_VISIT_IP2FQDN:" + i); } diff --git a/IP-learning-graph/src/main/java/cn/ac/iie/pojo/VertexFqdn.java b/IP-learning-graph/src/main/java/cn/ac/iie/pojo/VertexFqdn.java deleted file mode 100644 index 52b3653..0000000 --- a/IP-learning-graph/src/main/java/cn/ac/iie/pojo/VertexFqdn.java +++ /dev/null @@ -1,5 +0,0 @@ -package cn.ac.iie.pojo; - -public class VertexFqdn { - -} diff --git a/IP-learning-graph/src/test/java/cn/ac/iie/TestMap.java b/IP-learning-graph/src/test/java/cn/ac/iie/TestMap.java index cb4d2cf..34a39c5 100644 --- a/IP-learning-graph/src/test/java/cn/ac/iie/TestMap.java +++ b/IP-learning-graph/src/test/java/cn/ac/iie/TestMap.java @@ -44,11 +44,8 @@ public class TestMap { for (long c:longs1){ System.out.println(c); } - */ - - String[] distCipRecents = new String[]{"2.3"}; ArrayList baseEdgeDocuments = new ArrayList<>(); BaseDocument newDoc = new BaseDocument(); @@ -66,11 +63,13 @@ public class TestMap { baseEdgeDocuments.add(document); ArangoDBConnect instance = ArangoDBConnect.getInstance(); - instance.overwrite(baseEdgeDocuments,"FQDN"); ArangoDBConnect.clean(); +*/ + BaseEdgeDocument baseEdgeDocument = new BaseEdgeDocument(); + System.out.println(baseEdgeDocument.getProperties().getOrDefault("1",155)); /*