性能优化,批量读取查询,sql功能优化
This commit is contained in:
5
pom.xml
5
pom.xml
@@ -33,9 +33,8 @@
|
|||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-compiler-plugin</artifactId>
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
<configuration>
|
<configuration>
|
||||||
<source>7</source>
|
<source>8</source>
|
||||||
<target>7</target>
|
<target>8</target>
|
||||||
|
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ public class ApplicationConfig {
|
|||||||
public static final Integer OFFLINE_SCHEDULE_SECOND = ConfigUtils.getIntProperty("offline.schedule.second");
|
public static final Integer OFFLINE_SCHEDULE_SECOND = ConfigUtils.getIntProperty("offline.schedule.second");
|
||||||
public static final String OFFLINE_IMPORT_PATH = ConfigUtils.getStringProperty("offline.import.path");
|
public static final String OFFLINE_IMPORT_PATH = ConfigUtils.getStringProperty("offline.import.path");
|
||||||
public static final String OFFLINE_OUTPUT_PATH = ConfigUtils.getStringProperty("offline.output.path");
|
public static final String OFFLINE_OUTPUT_PATH = ConfigUtils.getStringProperty("offline.output.path");
|
||||||
|
public static final Integer OFFLINE_READIN_BATCH = ConfigUtils.getIntProperty("offline.readin.batch");
|
||||||
|
|
||||||
|
|
||||||
public static final Boolean UPDATE_SWITCH = ConfigUtils.getBooleanProperty("update.switch");
|
public static final Boolean UPDATE_SWITCH = ConfigUtils.getBooleanProperty("update.switch");
|
||||||
@@ -27,7 +28,7 @@ public class ApplicationConfig {
|
|||||||
public static final Integer LOG_OFFLINE_NUMBER = ConfigUtils.getIntProperty("log.offline.number");
|
public static final Integer LOG_OFFLINE_NUMBER = ConfigUtils.getIntProperty("log.offline.number");
|
||||||
public static final String LOG_BC_QUERY_REPORT_FILE = ConfigUtils.getStringProperty("log.bc.query.report.file");
|
public static final String LOG_BC_QUERY_REPORT_FILE = ConfigUtils.getStringProperty("log.bc.query.report.file");
|
||||||
|
|
||||||
public static final Integer MAXIMUM_URL_ONCE = ConfigUtils.getIntProperty("maximum.url.once");
|
public static final Integer MAXIMUM_URL_ONCE_BC_QUERY = ConfigUtils.getIntProperty("maximum.url.once.bc.query");
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
package cn.ac.iie.dao;
|
package cn.ac.iie.dao;
|
||||||
|
|
||||||
|
|
||||||
import cn.ac.iie.config.MariaDBConfig;
|
import cn.ac.iie.config.MariaDBConfig;
|
||||||
import cn.ac.iie.utils.TimeUtils;
|
import cn.ac.iie.utils.TimeUtils;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
@@ -10,6 +9,7 @@ import java.util.Arrays;
|
|||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author yjy
|
* @author yjy
|
||||||
@@ -67,11 +67,17 @@ public class BaseMariaDB {
|
|||||||
return querySqlExecute(getQueryRecordSql(fqdn));
|
return querySqlExecute(getQueryRecordSql(fqdn));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ResultSet getDatabaseRecord(List<String> fqdns){
|
||||||
|
return querySqlExecute(getQueryRecordSql(fqdns));
|
||||||
|
}
|
||||||
|
|
||||||
public void writeSqlExecute(String sql){
|
public void writeSqlExecute(String sql){
|
||||||
try {
|
try {
|
||||||
statement.executeUpdate(sql);
|
statement.executeUpdate(sql);
|
||||||
|
} catch (SQLIntegrityConstraintViolationException e){
|
||||||
|
LOG.error("Duplicated entry for key 'PRIMARY'");
|
||||||
} catch (SQLException exception) {
|
} catch (SQLException exception) {
|
||||||
LOG.debug("Sql : " + sql);
|
LOG.error("Sql : " + sql);
|
||||||
exception.printStackTrace();
|
exception.printStackTrace();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -150,10 +156,18 @@ public class BaseMariaDB {
|
|||||||
.append(MariaDBConfig.MARIADB_TABLE).append(' ')
|
.append(MariaDBConfig.MARIADB_TABLE).append(' ')
|
||||||
.append(" WHERE fqdn = '").append(fqdn).append('\'');
|
.append(" WHERE fqdn = '").append(fqdn).append('\'');
|
||||||
|
|
||||||
String resSql = sql.toString();
|
return sql.toString();
|
||||||
resSql = resSql.replace("'null'", "null");
|
}
|
||||||
|
|
||||||
return resSql;
|
public String getQueryRecordSql(List<String> fqdns){
|
||||||
|
StringBuilder sql = new StringBuilder("SELECT * FROM ");
|
||||||
|
String queryFqdns = fqdns.stream().map(s -> "'" + s + "'").collect(Collectors.joining(","));
|
||||||
|
|
||||||
|
sql.append(MariaDBConfig.MARIADB_DATABASE).append(".")
|
||||||
|
.append(MariaDBConfig.MARIADB_TABLE).append(' ')
|
||||||
|
.append(" WHERE fqdn in (").append(queryFqdns).append(")");
|
||||||
|
|
||||||
|
return sql.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getExpiredRecordSql(){
|
public static String getExpiredRecordSql(){
|
||||||
|
|||||||
@@ -5,8 +5,8 @@ import cn.ac.iie.dao.BaseMariaDB;
|
|||||||
import cn.ac.iie.dao.FqdnFile;
|
import cn.ac.iie.dao.FqdnFile;
|
||||||
import cn.ac.iie.utils.BrightCloudUtils;
|
import cn.ac.iie.utils.BrightCloudUtils;
|
||||||
import cn.ac.iie.utils.FileUtils;
|
import cn.ac.iie.utils.FileUtils;
|
||||||
import cn.ac.iie.utils.MariaDBConnect;
|
|
||||||
import cn.ac.iie.utils.LogUtils;
|
import cn.ac.iie.utils.LogUtils;
|
||||||
|
import cn.ac.iie.utils.MariaDBConnect;
|
||||||
import com.alibaba.fastjson.JSONObject;
|
import com.alibaba.fastjson.JSONObject;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
@@ -17,7 +17,9 @@ import java.sql.ResultSet;
|
|||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.sql.Statement;
|
import java.sql.Statement;
|
||||||
import java.text.DecimalFormat;
|
import java.text.DecimalFormat;
|
||||||
import java.util.*;
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.TimerTask;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author yjy
|
* @author yjy
|
||||||
@@ -38,7 +40,6 @@ public class OfflineTask extends TimerTask {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void runTask() throws SQLException, IOException {
|
private void runTask() throws SQLException, IOException {
|
||||||
|
|
||||||
List<String> fileNames = catpureNewFiles();
|
List<String> fileNames = catpureNewFiles();
|
||||||
List<String> fqdns;
|
List<String> fqdns;
|
||||||
|
|
||||||
@@ -51,119 +52,123 @@ public class OfflineTask extends TimerTask {
|
|||||||
for (String fileName : fileNames) {
|
for (String fileName : fileNames) {
|
||||||
File importFile = new File(fileName);
|
File importFile = new File(fileName);
|
||||||
String importFileName = FileUtils.getFileName(importFile);
|
String importFileName = FileUtils.getFileName(importFile);
|
||||||
|
InputStreamReader inputStreamReader = new InputStreamReader(
|
||||||
|
new FileInputStream(importFile), "GBK");
|
||||||
|
BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
|
||||||
|
|
||||||
|
Long fileLineCount = FileUtils.getFileLineNum(importFile);
|
||||||
|
|
||||||
|
// 输出结果保存文件
|
||||||
LOG.info("[Offline import file query]-" + importFileName + ": File Found.");
|
LOG.info("[Offline import file query]-" + importFileName + ": File Found.");
|
||||||
String outputFileName =
|
String outputFileName =
|
||||||
importFileName.substring(0, importFileName.length() - ApplicationConfig.OFFLINE_IMPORT_FILENAME_SUFFIX.length())
|
importFileName.substring(0, importFileName.length() - ApplicationConfig.OFFLINE_IMPORT_FILENAME_SUFFIX.length())
|
||||||
+ ApplicationConfig.OFFLINE_OUTPUT_FILENAME_SUFFIX;
|
+ ApplicationConfig.OFFLINE_OUTPUT_FILENAME_SUFFIX;
|
||||||
// TODO 遍历处理消耗太大
|
File outputFile = new File(ApplicationConfig.OFFLINE_OUTPUT_PATH + "/" + outputFileName);
|
||||||
fqdns = FileUtils.readTxtFileIntoStringArrList(importFile.toString());
|
if (!outputFile.exists()) {
|
||||||
fqdns = SingleTermTask.getCheckedFqdn(fqdns);
|
FileUtils.createFile(new File(ApplicationConfig.OFFLINE_OUTPUT_PATH), outputFileName);
|
||||||
|
}
|
||||||
|
OutputStream outStream = new FileOutputStream(outputFile);
|
||||||
|
OutputStreamWriter outWriter = new OutputStreamWriter(outStream, StandardCharsets.UTF_8);
|
||||||
|
// 添加表头
|
||||||
|
outWriter.write(FqdnFile.getKeys() + "\n");
|
||||||
|
|
||||||
long standardFqdnNum = fqdns.size();
|
List<String> toQueryBC = new ArrayList<>();
|
||||||
|
|
||||||
|
long standardFqdnNum = 0;
|
||||||
|
long complNum = 0;
|
||||||
long dbQueryNum = 0;
|
long dbQueryNum = 0;
|
||||||
long bcQueryNum = 0;
|
long bcQueryNum = 0;
|
||||||
long failQueryNum = 0;
|
long failQueryNum = 0;
|
||||||
long effecResNum = 0;
|
long effecResNum = 0;
|
||||||
long noLabelNum = 0;
|
long noLabelNum = 0;
|
||||||
if (standardFqdnNum>0){
|
|
||||||
|
|
||||||
// 创建结果保存文件
|
// 批量读取数据,读取时去空行&去空格
|
||||||
File outputFile = new File(ApplicationConfig.OFFLINE_OUTPUT_PATH + "/" + outputFileName);
|
LOG.info("[Offline import file query]-" + importFileName + " Progress: 0.00%");
|
||||||
if (!outputFile.exists()){
|
while ((fqdns = FileUtils.getBatchFqdnReadIn(bufferedReader, ApplicationConfig.OFFLINE_READIN_BATCH)).size()>0) {
|
||||||
FileUtils.createFile(new File(ApplicationConfig.OFFLINE_OUTPUT_PATH), outputFileName);
|
// 校验格式&去重
|
||||||
}
|
fqdns = SingleTermTask.getCheckedFqdn(fqdns);
|
||||||
|
standardFqdnNum += fqdns.size();
|
||||||
|
|
||||||
OutputStream outStream = new FileOutputStream(outputFile);
|
// 批量db查询
|
||||||
OutputStreamWriter outWriter = new OutputStreamWriter(outStream, StandardCharsets.UTF_8);
|
ResultSet dbResult = mariaDB.getDatabaseRecord(fqdns);
|
||||||
outWriter.write(FqdnFile.getKeys() + "\n");
|
List<String> dbQueryFqdns = new ArrayList<>();
|
||||||
|
while (dbResult.next()) {
|
||||||
List<String> fqdnToQuery = new ArrayList<>();
|
try {
|
||||||
// 遍历列表域名
|
FqdnFile fqdnFile = SingleTermTask.ResSet2FqdnFile(dbResult);
|
||||||
LOG.info("[Offline import file query]-" + importFileName + " Progress: 0%");
|
dbQueryFqdns.add(fqdnFile.getFqdn());
|
||||||
for (int index=0; index<standardFqdnNum; index=index+1){
|
|
||||||
String fqdn = fqdns.get(index);
|
|
||||||
FqdnFile fqdnFile;
|
|
||||||
if (fqdn.equals("")){
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
ResultSet rs = mariaDB.getDatabaseRecord(fqdn);
|
|
||||||
if (rs.next()){
|
|
||||||
// 查db
|
|
||||||
fqdnFile = SingleTermTask.ResSet2FqdnFile(rs);
|
|
||||||
dbQueryNum += 1;
|
|
||||||
outWriter.write(fqdnFile.getValues() + "\n");
|
outWriter.write(fqdnFile.getValues() + "\n");
|
||||||
if (fqdnFile.getQuery_success().equals(false)){
|
|
||||||
failQueryNum += 1;
|
|
||||||
} else if (fqdnFile.getCategory_id().equals(0)){
|
|
||||||
noLabelNum += 1;
|
|
||||||
} else {
|
|
||||||
effecResNum += 1;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// 查bc
|
|
||||||
fqdnToQuery.add(fqdn);
|
|
||||||
bcQueryNum = bcQueryNum + 1;
|
|
||||||
|
|
||||||
// 缓存满
|
// 计数
|
||||||
if (fqdnToQuery.size() == ApplicationConfig.MAXIMUM_URL_ONCE){
|
dbQueryNum += 1;
|
||||||
// 批量查
|
complNum += 1;
|
||||||
JSONObject resObj = brightCloudUtils.getQueryResults(fqdnToQuery);
|
failQueryNum = fqdnFile.getQuery_success().equals(false) ? failQueryNum + 1 : failQueryNum;
|
||||||
|
noLabelNum = fqdnFile.getCategory_id().equals(0) ? noLabelNum + 1 : noLabelNum;
|
||||||
|
effecResNum = (fqdnFile.getQuery_success().equals(false) | fqdnFile.getCategory_id().equals(0)) ?
|
||||||
|
effecResNum : effecResNum+1;
|
||||||
|
|
||||||
|
// 打印进度日志
|
||||||
|
if (complNum > 0 && complNum % ApplicationConfig.LOG_OFFLINE_NUMBER == 0) {
|
||||||
|
String percent = new DecimalFormat("##.00%").format((float) complNum / fileLineCount);
|
||||||
|
LOG.info("[Offline import file query]-" + importFileName + " Progress:" + percent);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("[Offline import file query]-" + importFileName + ": Wrong in database query");
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// 添加bc查询目标
|
||||||
|
fqdns.removeAll(dbQueryFqdns);
|
||||||
|
toQueryBC.addAll(fqdns);
|
||||||
|
while (toQueryBC.size() > ApplicationConfig.MAXIMUM_URL_ONCE_BC_QUERY) {
|
||||||
|
// LOG.debug("Execute batch bc query...");
|
||||||
|
JSONObject resObj = brightCloudUtils.getQueryResults(toQueryBC.subList(0, ApplicationConfig.MAXIMUM_URL_ONCE_BC_QUERY));
|
||||||
List<FqdnFile> fqdnFiles = brightCloudUtils.responseSparse(resObj);
|
List<FqdnFile> fqdnFiles = brightCloudUtils.responseSparse(resObj);
|
||||||
assert fqdnFiles.size() > 0;
|
assert fqdnFiles.size() > 0;
|
||||||
|
|
||||||
// 存数据库
|
// 存数据库
|
||||||
mariaDB.insertRecords(fqdnFiles);
|
mariaDB.insertRecords(fqdnFiles);
|
||||||
|
|
||||||
// 写入output
|
|
||||||
for (FqdnFile tmpFile : fqdnFiles) {
|
for (FqdnFile tmpFile : fqdnFiles) {
|
||||||
|
// 写入output
|
||||||
outWriter.write(tmpFile.getValues() + "\n");
|
outWriter.write(tmpFile.getValues() + "\n");
|
||||||
if (tmpFile.getQuery_success().equals(false)){
|
// 计数
|
||||||
failQueryNum += 1;
|
bcQueryNum += 1;
|
||||||
} else if (tmpFile.getCategory_id().equals(0)){
|
complNum += 1;
|
||||||
noLabelNum += 1;
|
failQueryNum = tmpFile.getQuery_success().equals(false) ? failQueryNum + 1 : failQueryNum;
|
||||||
} else {
|
noLabelNum = tmpFile.getCategory_id().equals(0) ? noLabelNum + 1 : noLabelNum;
|
||||||
effecResNum += 1;
|
effecResNum = (tmpFile.getQuery_success().equals(false) | tmpFile.getCategory_id().equals(0)) ? effecResNum : effecResNum+1;
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// // 打印处理进度日志
|
// 打印进度日志
|
||||||
// String percent = new DecimalFormat("##.0%").format((float)index/standardFqdnNum);
|
if (complNum > 0 && complNum % ApplicationConfig.LOG_OFFLINE_NUMBER == 0) {
|
||||||
// LOG.info("[Offline import file query]-" + importFileName + " Progress:" + percent);
|
String percent = new DecimalFormat("##.00%").format((float) complNum / fileLineCount);
|
||||||
|
|
||||||
// 缓存复位
|
|
||||||
fqdnToQuery = new ArrayList<>();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 打印处理进度至日志
|
|
||||||
if (index>0 && index % ApplicationConfig.LOG_OFFLINE_NUMBER==0){
|
|
||||||
String percent = new DecimalFormat("##.0%").format((float)index/standardFqdnNum);
|
|
||||||
LOG.info("[Offline import file query]-" + importFileName + " Progress:" + percent);
|
LOG.info("[Offline import file query]-" + importFileName + " Progress:" + percent);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
toQueryBC = toQueryBC.subList(ApplicationConfig.MAXIMUM_URL_ONCE_BC_QUERY, toQueryBC.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// 剩余待查bc
|
// 完成剩余bc查询
|
||||||
if (fqdnToQuery.size()>0){
|
if (toQueryBC.size()>0){
|
||||||
JSONObject resObj = brightCloudUtils.getQueryResults(fqdnToQuery);
|
JSONObject resObj = brightCloudUtils.getQueryResults(toQueryBC);
|
||||||
List<FqdnFile> fqdnFiles = brightCloudUtils.responseSparse(resObj);
|
List<FqdnFile> fqdnFiles = brightCloudUtils.responseSparse(resObj);
|
||||||
assert fqdnFiles.size() > 0;
|
assert fqdnFiles.size() > 0;
|
||||||
// 存数据库
|
// 存数据库
|
||||||
mariaDB.insertRecords(fqdnFiles);
|
mariaDB.insertRecords(fqdnFiles);
|
||||||
// 写入output
|
|
||||||
for (FqdnFile tmpFile : fqdnFiles) {
|
for (FqdnFile tmpFile : fqdnFiles) {
|
||||||
|
// 写入output
|
||||||
outWriter.write(tmpFile.getValues() + "\n");
|
outWriter.write(tmpFile.getValues() + "\n");
|
||||||
if (tmpFile.getQuery_success().equals(false)){
|
// 计数
|
||||||
failQueryNum += 1;
|
bcQueryNum += 1;
|
||||||
} else if (tmpFile.getCategory_id().equals(0)){
|
complNum += 1;
|
||||||
noLabelNum += 1;
|
failQueryNum = tmpFile.getQuery_success().equals(false) ? failQueryNum + 1 : failQueryNum;
|
||||||
} else {
|
noLabelNum = tmpFile.getCategory_id().equals(0) ? noLabelNum + 1 : noLabelNum;
|
||||||
effecResNum += 1;
|
effecResNum = (tmpFile.getQuery_success().equals(true) && tmpFile.getCategory_id().equals(1)) ? effecResNum + 1 : effecResNum;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("[Offline import file query]-" + importFileName + " Progress: 100%");
|
LOG.info("[Offline import file query]-" + importFileName + " Progress: 100.00%");
|
||||||
// 打印处理结果至日志
|
// 打印处理结果至日志
|
||||||
LOG.info("[Offline import file query]-" + importFileName + " "
|
LOG.info("[Offline import file query]-" + importFileName + " "
|
||||||
+ "Query result: submit " + standardFqdnNum + " valid fqdns, "
|
+ "Query result: submit " + standardFqdnNum + " valid fqdns, "
|
||||||
@@ -186,15 +191,12 @@ public class OfflineTask extends TimerTask {
|
|||||||
}
|
}
|
||||||
|
|
||||||
FileUtils.writerClose(outWriter, outStream);
|
FileUtils.writerClose(outWriter, outStream);
|
||||||
} else {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
//查询结束修改后缀
|
//查询结束修改后缀
|
||||||
importFile.renameTo(new File(
|
importFile.renameTo(new File(importFile.toString().substring(0, importFile.toString().length() - ApplicationConfig.OFFLINE_IMPORT_FILENAME_SUFFIX.length())
|
||||||
importFile.toString().substring(0, importFile.toString().length()-ApplicationConfig.OFFLINE_IMPORT_FILENAME_SUFFIX.length())
|
|
||||||
+ ApplicationConfig.OFFLINE_IMPORT_FILEDONE_SUFFIX));
|
+ ApplicationConfig.OFFLINE_IMPORT_FILEDONE_SUFFIX));
|
||||||
|
|
||||||
|
FileUtils.readerClose(bufferedReader, inputStreamReader);
|
||||||
}
|
}
|
||||||
|
|
||||||
MariaDBConnect.close(mariaStat, mariaConn);
|
MariaDBConnect.close(mariaStat, mariaConn);
|
||||||
@@ -226,12 +228,6 @@ public class OfflineTask extends TimerTask {
|
|||||||
}
|
}
|
||||||
return newFiles;
|
return newFiles;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private Boolean checkFileName(String fileName) {
|
|
||||||
boolean isStandard = true;
|
|
||||||
if (!fileName.endsWith(ApplicationConfig.OFFLINE_IMPORT_FILENAME_SUFFIX)) {
|
|
||||||
isStandard = false;
|
|
||||||
}
|
|
||||||
return isStandard;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
package cn.ac.iie.service;
|
package cn.ac.iie.service;
|
||||||
|
|
||||||
import cn.ac.iie.config.ApplicationConfig;
|
import cn.ac.iie.config.ApplicationConfig;
|
||||||
|
import cn.ac.iie.dao.BaseMariaDB;
|
||||||
import cn.ac.iie.dao.FqdnFile;
|
import cn.ac.iie.dao.FqdnFile;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
import java.sql.ResultSet;
|
import java.sql.ResultSet;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
@@ -18,6 +20,7 @@ import java.util.regex.Pattern;
|
|||||||
* @date 2021/2/25 2:40 下午
|
* @date 2021/2/25 2:40 下午
|
||||||
*/
|
*/
|
||||||
public class SingleTermTask {
|
public class SingleTermTask {
|
||||||
|
private static final Logger LOG = Logger.getLogger(SingleTermTask.class);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 提取三级域名
|
* 提取三级域名
|
||||||
@@ -71,6 +74,8 @@ public class SingleTermTask {
|
|||||||
// 去重 & 校验
|
// 去重 & 校验
|
||||||
if (isValidDomain(fqdn) && !res.contains(fqdn)){
|
if (isValidDomain(fqdn) && !res.contains(fqdn)){
|
||||||
res.add(fqdn.toLowerCase());
|
res.add(fqdn.toLowerCase());
|
||||||
|
} else {
|
||||||
|
LOG.debug("Bad or duplicated fqdn:" + fqdn);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
|
|||||||
@@ -10,13 +10,15 @@ import cn.ac.iie.utils.MariaDBConnect;
|
|||||||
import com.alibaba.fastjson.JSONObject;
|
import com.alibaba.fastjson.JSONObject;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.io.OutputStreamWriter;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.sql.Connection;
|
import java.sql.Connection;
|
||||||
import java.sql.ResultSet;
|
import java.sql.ResultSet;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.sql.Statement;
|
import java.sql.Statement;
|
||||||
import java.text.DecimalFormat;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.TimerTask;
|
import java.util.TimerTask;
|
||||||
@@ -63,7 +65,6 @@ public class UpdateTask extends TimerTask {
|
|||||||
}
|
}
|
||||||
long unlabeledNum = updateFqdns.size() - expiredNum;
|
long unlabeledNum = updateFqdns.size() - expiredNum;
|
||||||
|
|
||||||
//TODO 分批查询
|
|
||||||
if (updateFqdns.size()>0){
|
if (updateFqdns.size()>0){
|
||||||
JSONObject jsonObj = brightCloudUtils.getQueryResults(updateFqdns);
|
JSONObject jsonObj = brightCloudUtils.getQueryResults(updateFqdns);
|
||||||
List<FqdnFile> updateFiles = brightCloudUtils.responseSparse(jsonObj);
|
List<FqdnFile> updateFiles = brightCloudUtils.responseSparse(jsonObj);
|
||||||
|
|||||||
@@ -1,11 +1,10 @@
|
|||||||
package cn.ac.iie.utils;
|
package cn.ac.iie.utils;
|
||||||
|
|
||||||
import cn.ac.iie.dao.FqdnFile;
|
|
||||||
import cn.ac.iie.config.ApplicationConfig;
|
import cn.ac.iie.config.ApplicationConfig;
|
||||||
|
import cn.ac.iie.dao.FqdnFile;
|
||||||
import com.alibaba.fastjson.JSON;
|
import com.alibaba.fastjson.JSON;
|
||||||
import com.alibaba.fastjson.JSONArray;
|
import com.alibaba.fastjson.JSONArray;
|
||||||
import com.alibaba.fastjson.JSONObject;
|
import com.alibaba.fastjson.JSONObject;
|
||||||
import com.google.common.collect.Lists;
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
@@ -55,7 +54,7 @@ public class BrightCloudUtils {
|
|||||||
private final HashMap<Integer, List<String>> catId2Info = new HashMap<>();
|
private final HashMap<Integer, List<String>> catId2Info = new HashMap<>();
|
||||||
|
|
||||||
public JSONObject getQueryResults (List<String> urls) {
|
public JSONObject getQueryResults (List<String> urls) {
|
||||||
if (urls.size()>ApplicationConfig.MAXIMUM_URL_ONCE){
|
if (urls.size()>ApplicationConfig.MAXIMUM_URL_ONCE_BC_QUERY){
|
||||||
LOG.warn("Too many urls in a http post request!");
|
LOG.warn("Too many urls in a http post request!");
|
||||||
}
|
}
|
||||||
JSONObject jsonRes = null;
|
JSONObject jsonRes = null;
|
||||||
|
|||||||
@@ -1,11 +1,13 @@
|
|||||||
package cn.ac.iie.utils;
|
package cn.ac.iie.utils;
|
||||||
|
|
||||||
|
import cn.ac.iie.config.ApplicationConfig;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author yjy
|
* @author yjy
|
||||||
@@ -52,6 +54,22 @@ public class FileUtils {
|
|||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static List<String> getBatchFqdnReadIn(BufferedReader bufferedReader, int batchSize){
|
||||||
|
List<String> list = new ArrayList<>();
|
||||||
|
String lineTxt;
|
||||||
|
try{
|
||||||
|
while ((lineTxt = bufferedReader.readLine()) != null && list.size()<batchSize)
|
||||||
|
{
|
||||||
|
if (!lineTxt.equals("")) {
|
||||||
|
list.add(lineTxt.trim());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException e){
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
|
||||||
public static void createFile(File filePath, String fileName){
|
public static void createFile(File filePath, String fileName){
|
||||||
try {
|
try {
|
||||||
File file = new File(filePath.toString() + "/" + fileName);
|
File file = new File(filePath.toString() + "/" + fileName);
|
||||||
@@ -119,4 +137,37 @@ public class FileUtils {
|
|||||||
outStream.close();
|
outStream.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void readerClose(BufferedReader bufferedReader, InputStreamReader inputStreamReader) throws IOException {
|
||||||
|
assert inputStreamReader != null;
|
||||||
|
bufferedReader.close();
|
||||||
|
inputStreamReader.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
//执行cmd命令,获取返回结果
|
||||||
|
public static String execCMD(String command) {
|
||||||
|
StringBuilder sb =new StringBuilder();
|
||||||
|
try {
|
||||||
|
Process process=Runtime.getRuntime().exec(command);
|
||||||
|
BufferedReader bufferedReader=new BufferedReader(new InputStreamReader(process.getInputStream()));
|
||||||
|
String line;
|
||||||
|
while((line=bufferedReader.readLine())!=null)
|
||||||
|
{
|
||||||
|
sb.append(line+"\n");
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
return e.toString();
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Long getFileLineNum(File file){
|
||||||
|
Long num = 0L;
|
||||||
|
if (!file.exists()){
|
||||||
|
LOG.error("File not exist: " + file.toString());
|
||||||
|
} else {
|
||||||
|
String res = FileUtils.execCMD("wc -l " + file.toString());
|
||||||
|
num = Long.parseLong(res.trim().split(" ")[0]);
|
||||||
|
}
|
||||||
|
return num;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
package cn.ac.iie.utils;
|
package cn.ac.iie.utils;
|
||||||
|
|
||||||
import cn.ac.iie.config.ApplicationConfig;
|
import cn.ac.iie.config.ApplicationConfig;
|
||||||
import java.io.*;
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author yjy
|
* @author yjy
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package cn.ac.iie.utils;
|
package cn.ac.iie.utils;
|
||||||
|
|
||||||
import cn.ac.iie.config.ApplicationConfig;
|
import cn.ac.iie.config.ApplicationConfig;
|
||||||
|
|
||||||
import java.util.Calendar;
|
import java.util.Calendar;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
|
||||||
|
|||||||
@@ -27,18 +27,26 @@ update.schedule.day = 7
|
|||||||
database = web_sketch
|
database = web_sketch
|
||||||
table = fqdn_profile_via_brightcloud
|
table = fqdn_profile_via_brightcloud
|
||||||
|
|
||||||
|
# offline读取批处理量
|
||||||
|
offline.readin.batch = 1000
|
||||||
|
|
||||||
# bc api单次查询url长度限制 API最高限制
|
# bc api单次查询url长度限制 API最高限制
|
||||||
maximum.url.once = 100
|
maximum.url.once.bc.query = 100
|
||||||
|
|
||||||
# log
|
# log
|
||||||
# 打印进度日志的查询条数
|
# 打印进度日志的查询条数
|
||||||
log.offline.number = 10000
|
log.offline.number = 10000
|
||||||
log.bc.query.report.file = /home/WebSKT/Data/bright_cloud_query_count.csv
|
log.bc.query.report.file = /home/WebSKT/Data/bright_cloud_query_count.csv
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
########################
|
||||||
|
### for test ###########
|
||||||
|
########################
|
||||||
## 离线导入指定目录
|
## 离线导入指定目录
|
||||||
#offline.import.path = /Users/joy/work/iie/project/cyber_narrator/APP/WebSketch/QueryAgentV1/files/import_file
|
#offline.import.path = /Users/joy/work/iie/project/cyber_narrator/APP/WebSketch/QueryAgentV3/files/import_file
|
||||||
## 离线指定查询结果保存目录
|
## 离线指定查询结果保存目录
|
||||||
#offline.output.path = /Users/joy/work/iie/project/cyber_narrator/APP/WebSketch/QueryAgentV1/files/output_file
|
#offline.output.path = /Users/joy/work/iie/project/cyber_narrator/APP/WebSketch/QueryAgentV3/files/output_file
|
||||||
## 服务调用统计结果
|
## 服务调用统计结果
|
||||||
#log.bc.query.report.file = /Users/joy/work/iie/project/cyber_narrator/App/WebSketch/LOG_IMPORTANT/bright_cloud_query_count.csv
|
#log.bc.query.report.file = /Users/joy/work/iie/project/cyber_narrator/App/WebSketch/LOG_IMPORTANT/bright_cloud_query_count.csv
|
||||||
|
|
||||||
|
|||||||
@@ -16,8 +16,6 @@ bc.api.reputation = 1
|
|||||||
bc.api.xml = 0
|
bc.api.xml = 0
|
||||||
|
|
||||||
bc.cateinfo.filepath = /home/WebSKT/Data/categoryinfo.json
|
bc.cateinfo.filepath = /home/WebSKT/Data/categoryinfo.json
|
||||||
# for test
|
|
||||||
#bc.cateinfo.filepath = /Users/joy/work/iie/project/cyber_narrator/App/WebSketch/QueryAgentV1/src/main/resources/categoryinfo.json
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
########################## logger ##############################
|
########################## logger ##############################
|
||||||
|
|
||||||
### 设置###
|
### 设置###
|
||||||
log4j.rootLogger = debug,E,stdout
|
log4j.rootLogger = info,E,stdout
|
||||||
|
|
||||||
log4j.appender.stdout = org.apache.log4j.ConsoleAppender
|
log4j.appender.stdout = org.apache.log4j.ConsoleAppender
|
||||||
log4j.appender.stdout.Target = System.out
|
log4j.appender.stdout.Target = System.out
|
||||||
@@ -20,4 +20,4 @@ log4j.appender.E.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ]
|
|||||||
#########################
|
#########################
|
||||||
#### for test ###########
|
#### for test ###########
|
||||||
#########################
|
#########################
|
||||||
#log4j.appender.E.File =/Users/joy/work/iie/project/cyber_narrator/App/WebSketch/QueryAgentV1/logs/run.log
|
#log4j.appender.E.File =/Users/joy/work/iie/project/cyber_narrator/App/WebSketch/QueryAgentV3/logs/run.log
|
||||||
Reference in New Issue
Block a user