分类/信誉/whois结果分表存储,分开查询;修改查询逻辑,二级域名截断

This commit is contained in:
yinjiangyi
2021-06-17 10:31:07 +08:00
parent 428e0b2e2c
commit 7006046e68
17 changed files with 10240 additions and 465 deletions

View File

@@ -1,8 +1,8 @@
package cn.ac.iie;
import cn.ac.iie.config.ApplicationConfig;
import cn.ac.iie.service.OfflineTask;
import cn.ac.iie.config.CommonConfig;
import cn.ac.iie.service.UpdateTask;
import cn.ac.iie.service.OfflineTask;
import cn.ac.iie.utils.TimeUtils;
import org.apache.log4j.Logger;
@@ -16,6 +16,7 @@ import java.util.Timer;
* @date 2021/2/25 11:27 上午
*/
public class MainScheduleTasks {
private static final Logger LOG = Logger.getLogger(MainScheduleTasks.class);
private Date offlineStartTime;
@@ -25,25 +26,26 @@ public class MainScheduleTasks {
*/
private void Timers() {
if (ApplicationConfig.OFFLINE_SWITCH){
if (CommonConfig.OFFLINE_SWITCH){
LOG.info("Start offline schedule task");
Timer offlineTimer = new Timer();
Date offlineStartTime = Calendar.getInstance().getTime();
offlineTimer.schedule(new OfflineTask(), offlineStartTime,
ApplicationConfig.OFFLINE_SCHEDULE_SECOND * TimeUtils.SECOND_TO_MILLSEDONDS);
CommonConfig.OFFLINE_SCHEDULE_SECOND * TimeUtils.SECOND_TO_MILLSEDONDS);
}
if (ApplicationConfig.UPDATE_SWITCH){
if (CommonConfig.UPDATE_SWITCH){
LOG.info("Start update schedule task");
Timer updateTimer = new Timer();
Date updateStartTime = TimeUtils.getStartOfDay(1);
updateTimer.schedule(new UpdateTask(), updateStartTime,
ApplicationConfig.UPDATE_SCHEDULE_DAY * TimeUtils.DAY_TO_MILLSEDONDS);
CommonConfig.UPDATE_SCHEDULE_DAY * TimeUtils.DAY_TO_MILLSEDONDS);
}
}
public static void main(String[] args) {
MainScheduleTasks tasks = new MainScheduleTasks();
tasks.Timers();

View File

@@ -0,0 +1,50 @@
package cn.ac.iie.config;
import cn.ac.iie.utils.ConfigUtils;
public class CommonConfig {
// Application
public static final Boolean OFFLINE_SWITCH = ConfigUtils.getBooleanProperty("offline.switch");
public static final String OFFLINE_IMPORT_FILENAME_SUFFIX = ConfigUtils.getStringProperty("offline.import.filename.suffix");
public static final String OFFLINE_IMPORT_FILEDONE_SUFFIX = ConfigUtils.getStringProperty("offline.import.filedone.suffix");
public static final String OFFLINE_OUTPUT_FILENAME_SUFFIX = ConfigUtils.getStringProperty("offline.output.filename.suffix");
public static final Integer OFFLINE_SCHEDULE_SECOND = ConfigUtils.getIntProperty("offline.schedule.second");
public static final String OFFLINE_IMPORT_PATH = ConfigUtils.getStringProperty("offline.import.path");
public static final String OFFLINE_OUTPUT_PATH = ConfigUtils.getStringProperty("offline.output.path");
public static final Integer OFFLINE_READIN_BATCH = ConfigUtils.getIntProperty("offline.readin.batch");
public static final Boolean UPDATE_SWITCH = ConfigUtils.getBooleanProperty("update.switch");
public static final Integer UPDATE_SCHEDULE_DAY = ConfigUtils.getIntProperty("update.schedule.day");
public static final Integer QUERY_STANDARD_FQDN_LEVEL = ConfigUtils.getIntProperty("query.standard.fqdn.level");
public static final Boolean QUERY_URL_INFO_SWITCH = ConfigUtils.getBooleanProperty("query.url_info.switch");
public static final Boolean QUERY_URL_REP_SWITCH = ConfigUtils.getBooleanProperty("query.url_rep.switch");
public static final Boolean QUERY_URL_WHOIS_SWITCH = ConfigUtils.getBooleanProperty("query.url_whois.switch");
public static final Integer LOG_OFFLINE_NUMBER = ConfigUtils.getIntProperty("log.offline.number");
public static final String LOG_BC_QUERY_REPORT_FILE = ConfigUtils.getStringProperty("log.bc.query.report.file");
public static final Integer MAXIMUM_URL_ONCE_BC_QUERY = ConfigUtils.getIntProperty("maximum.url.once.bc.query");
public static final String TLD_FILEPATH = ConfigUtils.getStringProperty("tld.filepath");
// BrightCloud
public static final String BC_API_NAME_CATEGORY = ConfigUtils.getStringProperty("bc.api.name.category");
public static final String BC_API_NAME_REPUTATION = ConfigUtils.getStringProperty("bc.api.name.reputation");
public static final String BC_API_NAME_WHOIS = ConfigUtils.getStringProperty("bc.api.name.whois");
// Mariadb
public static final String MARIADB_DATABASE = ConfigUtils.getStringProperty("database");
public static final String MARIADB_CATE_TABLE = ConfigUtils.getStringProperty("category_info_table");
public static final String MARIADB_REPU_TABLE = ConfigUtils.getStringProperty("reputation_info_table");
public static final String MARIADB_WHOIS_TABLE = ConfigUtils.getStringProperty("whois_info_table");
}

View File

@@ -1,6 +1,6 @@
package cn.ac.iie.dao;
import cn.ac.iie.config.MariaDBConfig;
import cn.ac.iie.config.CommonConfig;
import cn.ac.iie.utils.TimeUtils;
import org.apache.log4j.Logger;
@@ -27,30 +27,29 @@ public class BaseMariaDB {
statement = stat;
}
public void insertRecords(List<FqdnFile> fqdnFiles){
public void insertRecords(List<FqdnFile> fqdnFiles, String queryType){
for (FqdnFile fqdnFile : fqdnFiles) {
fqdnFileWrite(fqdnFile, "insert");
fqdnFileWrite(fqdnFile, "insert", queryType);
}
}
public void updateRecords(List<FqdnFile> fqdnFiles){
for (FqdnFile fqdnFile : fqdnFiles) {
fqdnFileWrite(fqdnFile, "update");
fqdnFileWrite(fqdnFile, "update", CommonConfig.BC_API_NAME_CATEGORY);
}
long last = System.currentTimeMillis();
}
/**
* 单条FqdnFile写入操作更新、插入
*/
public void fqdnFileWrite(FqdnFile fqdnFile, String method){
public void fqdnFileWrite(FqdnFile fqdnFile, String method, String queryType){
String sql = null;
List<String> methods = Arrays.asList("insert", "update");
assert methods.contains(method);
if (method.equals("insert")){
sql = getInsertSql(fqdnFile);
sql = getInsertSql(fqdnFile, queryType);
} else {
sql = getUpdateSql(fqdnFile);
sql = getUpdateSql(fqdnFile, queryType);
}
writeSqlExecute(sql);
}
@@ -63,12 +62,8 @@ public class BaseMariaDB {
return querySqlExecute(getUnlabelRecordSql());
}
public ResultSet getDatabaseRecord(String fqdn){
return querySqlExecute(getQueryRecordSql(fqdn));
}
public ResultSet getDatabaseRecord(List<String> fqdns){
return querySqlExecute(getQueryRecordSql(fqdns));
public ResultSet getDatabaseRecord(List<String> fqdns, String queryType){
return querySqlExecute(getQueryRecordSql(fqdns, queryType));
}
public void writeSqlExecute(String sql){
@@ -92,45 +87,38 @@ public class BaseMariaDB {
return set;
}
public Boolean isInDatabase(String standardFqdn){
boolean isInDb = false;
try {
String querySql = getQueryExistSql(standardFqdn);
ResultSet set = statement.executeQuery(querySql);
set.next();
isInDb = set.getInt("COUNT(*)") > 0;
public String getInsertSql(FqdnFile fqdnFile, String queryType){
String tableName = CommonConfig.MARIADB_CATE_TABLE;
} catch (SQLException e) {
e.printStackTrace();
if (queryType.equals(CommonConfig.BC_API_NAME_REPUTATION)){
tableName = CommonConfig.MARIADB_REPU_TABLE;
} else if (queryType.equals(CommonConfig.BC_API_NAME_WHOIS)){
tableName = CommonConfig.MARIADB_WHOIS_TABLE;
}
return isInDb;
}
public String getInsertSql(FqdnFile fqdnFile){
long start = System.currentTimeMillis();
StringBuilder sql = new StringBuilder("INSERT INTO ");
sql.append(MariaDBConfig.MARIADB_DATABASE).append(".")
.append(MariaDBConfig.MARIADB_TABLE).append(' ');
sql.append(CommonConfig.MARIADB_DATABASE).append(".")
.append(tableName).append(' ');
sql.append(" (fqdn,query_success,reputation_score,reputation_level,")
.append("category_id,category_name,category_group,category_conf,is_a1_cat,popularity,observ_age,country,threat_history,")
.append("whois_domain,whois_update_date,whois_create_date,whois_expire_date,whois_email,whois_ns,")
.append("whois_registrar_name,whois_registrant_org,whois_regisrante_name,whois_regisrante_street,")
.append("whois_regisrante_city,whois_regisrante_state,whois_regisrante_postcode,whois_regisrante_country,")
.append("whois_regisrante_phone) values");
sql.append(" (").append(FqdnFile.getKeys(queryType)).append(") values");
sql.append('(').append(fqdnFile.getValues()).append(')');
sql.append('(').append(fqdnFile.getValues(queryType)).append(')');
String resSql = sql.toString();
resSql = resSql.replace("'null'", "null");
return resSql;
}
public String getUpdateSql(FqdnFile fqdnFile){
public String getUpdateSql(FqdnFile fqdnFile, String queryType){
String tableName = CommonConfig.MARIADB_CATE_TABLE;
if (queryType.equals(CommonConfig.BC_API_NAME_REPUTATION)){
tableName = CommonConfig.MARIADB_REPU_TABLE;
} else if (queryType.equals(CommonConfig.BC_API_NAME_WHOIS)){
tableName = CommonConfig.MARIADB_WHOIS_TABLE;
}
StringBuilder sql = new StringBuilder("UPDATE ");
sql.append(MariaDBConfig.MARIADB_DATABASE).append(".")
.append(MariaDBConfig.MARIADB_TABLE).append(' ')
sql.append(CommonConfig.MARIADB_DATABASE).append(".")
.append(tableName).append(' ')
.append("SET ").append(fqdnFile.getKeyValues())
.append(", update_time = current_time() ")
.append(" WHERE fqdn = '").append(fqdnFile.getFqdn()).append('\'');
@@ -141,40 +129,35 @@ public class BaseMariaDB {
return resSql;
}
public String getQueryExistSql(String fqdn){
StringBuilder sql = new StringBuilder("SELECT COUNT(*) FROM ");
sql.append(MariaDBConfig.MARIADB_DATABASE).append(".")
.append(MariaDBConfig.MARIADB_TABLE).append(' ')
.append(" WHERE fqdn = '").append(fqdn).append('\'');
return sql.toString();
}
public String getQueryRecordSql(List<String> fqdns, String queryType){
String tableName = CommonConfig.MARIADB_CATE_TABLE;
public String getQueryRecordSql(String fqdn){
StringBuilder sql = new StringBuilder("SELECT * FROM ");
sql.append(MariaDBConfig.MARIADB_DATABASE).append(".")
.append(MariaDBConfig.MARIADB_TABLE).append(' ')
.append(" WHERE fqdn = '").append(fqdn).append('\'');
if (queryType.equals(CommonConfig.BC_API_NAME_REPUTATION)){
tableName = CommonConfig.MARIADB_REPU_TABLE;
} else if (queryType.equals(CommonConfig.BC_API_NAME_WHOIS)){
tableName = CommonConfig.MARIADB_WHOIS_TABLE;
}
return sql.toString();
}
public String getQueryRecordSql(List<String> fqdns){
StringBuilder sql = new StringBuilder("SELECT * FROM ");
String queryFqdns = fqdns.stream().map(s -> "'" + s + "'").collect(Collectors.joining(","));
sql.append(MariaDBConfig.MARIADB_DATABASE).append(".")
.append(MariaDBConfig.MARIADB_TABLE).append(' ')
sql.append(CommonConfig.MARIADB_DATABASE).append(".")
.append(tableName).append(' ')
.append(" WHERE fqdn in (").append(queryFqdns).append(")");
return sql.toString();
}
/**
* 只更新分类结果表
* @return
*/
public static String getExpiredRecordSql(){
Date lastUpdateTime = new Timestamp(TimeUtils.getExpiredTime().getTime());
String resSql = "SELECT x.fqdn FROM " + MariaDBConfig.MARIADB_DATABASE + "." +
MariaDBConfig.MARIADB_TABLE +
String resSql = "SELECT x.fqdn FROM " + CommonConfig.MARIADB_DATABASE + "." +
CommonConfig.MARIADB_CATE_TABLE +
" x WHERE x.update_time < '" + lastUpdateTime + '\'';
LOG.debug("Update task: expired query sql" + resSql);
@@ -184,8 +167,8 @@ public class BaseMariaDB {
public static String getUnlabelRecordSql(){
return "SELECT x.fqdn FROM " + MariaDBConfig.MARIADB_DATABASE + "." +
MariaDBConfig.MARIADB_TABLE +
return "SELECT x.fqdn FROM " + CommonConfig.MARIADB_DATABASE + "." +
CommonConfig.MARIADB_CATE_TABLE +
" x WHERE x.category_id = 0 or x.query_success != 1";
}
}

View File

@@ -1,5 +1,10 @@
package cn.ac.iie.dao;
import cn.ac.iie.config.CommonConfig;
import cn.ac.iie.utils.TopDomainUtils;
import org.apache.log4j.Logger;
import java.sql.Timestamp;
import java.util.Date;
/**
@@ -10,10 +15,12 @@ import java.util.Date;
*
*/
public class FqdnFile
{
public class FqdnFile {
private static final Logger LOG = Logger.getLogger(FqdnFile.class);
private String fqdn;
private Boolean query_success;
private Integer match_pattern;
private Integer reputation_score;
private String reputation_level;
private Integer category_id;
@@ -26,9 +33,9 @@ public class FqdnFile
private String country;
private Integer threat_history;
private String whois_domain;
private java.sql.Timestamp whois_update_date;
private java.sql.Timestamp whois_create_date;
private java.sql.Timestamp whois_expire_date;
private Timestamp whois_update_date;
private Timestamp whois_create_date;
private Timestamp whois_expire_date;
private String whois_email;
private String whois_ns;
private String whois_registrar_name;
@@ -41,19 +48,13 @@ public class FqdnFile
private String whois_regisrante_country;
private String whois_regisrante_phone;
// update
public FqdnFile(String fqdn, Boolean query_success,
Integer reputation_score, String reputationLevel,
Integer categoryId, String categoryName, String categoryGroup, Integer categoryConf, Boolean isA1Cat,
Integer popularity, Integer observAge, String country, Integer threatHistory,
String whoisDomain, Date whoisUpdateDate, Date whoisCreateDate, Date whoisExpireDate,
String whoisEmail, String whoisNs, String whoisRegistrarName, String whoisRegistrantOrg,
String whoisRegisranteName, String whoisRegisranteStreet, String whoisRegisranteCity,
String whoisRegisranteState, String whoisRegisrantePostcode, String whoisRegisranteCountry,
String whoisRegisrantePhone) {
Integer categoryId, String categoryName, String categoryGroup, Integer categoryConf, Boolean isA1Cat) {
this.fqdn = fqdn;
this.query_success = query_success;
this.match_pattern = getMatchPattern(fqdn);
this.reputation_score = reputation_score;
this.reputation_level = getEffectiveString(reputationLevel);
@@ -62,15 +63,39 @@ public class FqdnFile
this.category_group = getEffectiveString(categoryGroup);
this.category_conf = categoryConf;
this.is_a1_cat = isA1Cat;
}
public FqdnFile(String fqdn, Boolean query_success,
Integer reputation_score, String reputationLevel,
Integer popularity, Integer observAge, String country, Integer threatHistory) {
this.fqdn = fqdn;
this.query_success = query_success;
this.match_pattern = getMatchPattern(fqdn);
this.reputation_score = reputation_score;
this.reputation_level = getEffectiveString(reputationLevel);
this.popularity = popularity;
this.observ_age = observAge;
this.country = getEffectiveString(country);
this.threat_history = threatHistory;
}
public FqdnFile(String fqdn, Boolean query_success,
String whoisDomain, Date whoisUpdateDate, Date whoisCreateDate, Date whoisExpireDate,
String whoisEmail, String whoisNs, String whoisRegistrarName, String whoisRegistrantOrg,
String whoisRegisranteName, String whoisRegisranteStreet, String whoisRegisranteCity,
String whoisRegisranteState, String whoisRegisrantePostcode, String whoisRegisranteCountry,
String whoisRegisrantePhone) {
this.fqdn = fqdn;
this.query_success = query_success;
this.match_pattern = getMatchPattern(fqdn);
this.whois_domain = getEffectiveString(whoisDomain);
this.whois_update_date = whoisUpdateDate == null? null : new java.sql.Timestamp(whoisUpdateDate.getTime());
this.whois_create_date = whoisCreateDate == null? null : new java.sql.Timestamp(whoisCreateDate.getTime());
this.whois_expire_date = whoisExpireDate == null? null : new java.sql.Timestamp(whoisExpireDate.getTime());
this.whois_update_date = whoisUpdateDate == null ? null : new Timestamp(whoisUpdateDate.getTime());
this.whois_create_date = whoisCreateDate == null ? null : new Timestamp(whoisCreateDate.getTime());
this.whois_expire_date = whoisExpireDate == null ? null : new Timestamp(whoisExpireDate.getTime());
this.whois_email = getEffectiveString(whoisEmail);
this.whois_ns = getEffectiveString(whoisNs);
this.whois_registrar_name = getEffectiveString(whoisRegistrarName);
@@ -84,14 +109,52 @@ public class FqdnFile
this.whois_regisrante_phone = getEffectiveString(whoisRegisrantePhone);
}
private String getEffectiveString(String s){
if (!(s ==null)){
private String getEffectiveString(String s) {
if (!(s == null)) {
return s.length() == 0 ? null : s;
} else{
} else {
return null;
}
}
/**
* 二级域名右匹配1
* 非二级域名全匹配2
* @param fqdn
* @return
*/
public static Integer getMatchPattern(String fqdn){
int match_pattern = 2;
if (fqdn.equals(TopDomainUtils.getSecDomain(fqdn))){
match_pattern = 1;
}
return match_pattern;
}
public Integer getMatch_pattern() {
return match_pattern;
}
public void setMatch_pattern(Integer match_pattern) {
this.match_pattern = match_pattern;
}
public Boolean getIs_a1_cat() {
return is_a1_cat;
}
public void setIs_a1_cat(Boolean is_a1_cat) {
this.is_a1_cat = is_a1_cat;
}
public Timestamp getWhois_create_date() {
return whois_create_date;
}
public void setWhois_create_date(Timestamp whois_create_date) {
this.whois_create_date = whois_create_date;
}
public String getFqdn() {
return fqdn;
@@ -209,7 +272,7 @@ public class FqdnFile
return whois_update_date;
}
public void setWhois_update_date(java.sql.Timestamp whois_update_date) {
public void setWhois_update_date(Timestamp whois_update_date) {
this.whois_update_date = whois_update_date;
}
@@ -217,7 +280,7 @@ public class FqdnFile
return whois_create_date;
}
public void setWhoisCreateDate(java.sql.Timestamp whoisCreateDate) {
public void setWhoisCreateDate(Timestamp whoisCreateDate) {
this.whois_create_date = whoisCreateDate;
}
@@ -225,7 +288,7 @@ public class FqdnFile
return whois_expire_date;
}
public void setWhois_expire_date(java.sql.Timestamp whois_expire_date) {
public void setWhois_expire_date(Timestamp whois_expire_date) {
this.whois_expire_date = whois_expire_date;
}
@@ -351,99 +414,199 @@ public class FqdnFile
'}';
}
public String getValues(){
String resString =
"'" + fqdn + '\'' +
"," + query_success +
", " + reputation_score +
", '" + reputation_level + '\'' +
", " + category_id +
", '" + category_name + '\'' +
", '" + category_group + '\'' +
", " + category_conf +
", " + is_a1_cat +
", " + popularity +
", " + observ_age +
", '" + country + '\'' +
", " + threat_history +
", '" + whois_domain + '\'' +
", '" + whois_update_date + '\'' +
", '" + whois_create_date + '\'' +
", '" + whois_expire_date + '\'' +
", '" + whois_email + '\'' +
", '" + whois_ns + '\'' +
", '" + whois_registrar_name + '\'' +
", '" + whois_registrant_org + '\'' +
", '" + whois_regisrante_name + '\'' +
", '" + whois_regisrante_street + '\'' +
", '" + whois_regisrante_city + '\'' +
", '" + whois_regisrante_state + '\'' +
", '" + whois_regisrante_postcode + '\'' +
", '" + whois_regisrante_country + '\'' +
", '" + whois_regisrante_phone + '\'' ;
return resString.replace("'null'", "null");
// public String getValues() {
// String resString =
// "'" + fqdn + '\'' +
// "," + query_success +
// ", " + reputation_score +
// ", '" + reputation_level + '\'' +
// ", " + category_id +
// ", '" + category_name + '\'' +
// ", '" + category_group + '\'' +
// ", " + category_conf +
// ", " + is_a1_cat +
// ", " + popularity +
// ", " + observ_age +
// ", '" + country + '\'' +
// ", " + threat_history +
// ", '" + whois_domain + '\'' +
// ", '" + whois_update_date + '\'' +
// ", '" + whois_create_date + '\'' +
// ", '" + whois_expire_date + '\'' +
// ", '" + whois_email + '\'' +
// ", '" + whois_ns + '\'' +
// ", '" + whois_registrar_name + '\'' +
// ", '" + whois_registrant_org + '\'' +
// ", '" + whois_regisrante_name + '\'' +
// ", '" + whois_regisrante_street + '\'' +
// ", '" + whois_regisrante_city + '\'' +
// ", '" + whois_regisrante_state + '\'' +
// ", '" + whois_regisrante_postcode + '\'' +
// ", '" + whois_regisrante_country + '\'' +
// ", '" + whois_regisrante_phone + '\'';
// return resString.replace("'null'", "null");
// }
public String getValues(String queryType) {
String resString = "'" + fqdn + '\'' + ", " + query_success + ", " + match_pattern + ", " ;
if (queryType.equals(CommonConfig.BC_API_NAME_CATEGORY)){
resString = resString + reputation_score +
", '" + reputation_level + '\'' +
", " + category_id +
", '" + category_name + '\'' +
", '" + category_group + '\'' +
", " + category_conf +
", " + is_a1_cat;
} else if (queryType.equals(CommonConfig.BC_API_NAME_REPUTATION)) {
resString = resString + reputation_score +
", '" + reputation_level + '\'' +
", " + popularity +
", " + observ_age +
", '" + country + '\'' +
", " + threat_history;
} else if (queryType.equals(CommonConfig.BC_API_NAME_WHOIS)) {
resString = resString +
"'" + whois_domain + '\'' +
", '" + whois_update_date + '\'' +
", '" + whois_create_date + '\'' +
", '" + whois_expire_date + '\'' +
", '" + whois_email + '\'' +
", '" + whois_ns + '\'' +
", '" + whois_registrar_name + '\'' +
", '" + whois_registrant_org + '\'' +
", '" + whois_regisrante_name + '\'' +
", '" + whois_regisrante_street + '\'' +
", '" + whois_regisrante_city + '\'' +
", '" + whois_regisrante_state + '\'' +
", '" + whois_regisrante_postcode + '\'' +
", '" + whois_regisrante_country + '\'' +
", '" + whois_regisrante_phone + '\'';
}
return resString.replace("'null'", "null");
}
public static String getKeys() {
return "fqdn" +
", query_success" +
", reputation_score" +
", reputation_level" +
", category_id" +
", category_name" +
", category_group" +
", category_conf" +
", is_a1_cat" +
", popularity" +
", observ_age" +
", country" +
", threat_history" +
", whois_domain" +
", whois_update_date" +
", whois_create_date" +
", whois_expire_date" +
", whois_email" +
", whois_ns" +
", whois_registrar_name" +
", whois_registrant_org" +
", whois_regisrante_name" +
", whois_regisrante_street" +
", whois_regisrante_city" +
", whois_regisrante_state" +
", whois_regisrante_postcode" +
", whois_regisrante_country" +
", whois_regisrante_phone";
public static String getKeys(String queryType) {
String resString = "";
if (queryType.equals(CommonConfig.BC_API_NAME_CATEGORY)) {
resString = "fqdn" +
", query_success" +
", match_pattern" +
", reputation_score" +
", reputation_level" +
", category_id" +
", category_name" +
", category_group" +
", category_conf" +
", is_a1_cat";
} else if (queryType.equals(CommonConfig.BC_API_NAME_REPUTATION)) {
resString = "fqdn" +
", query_success" +
", match_pattern" +
", reputation_score" +
", reputation_level" +
", popularity" +
", observ_age" +
", country" +
", threat_history";
} else if (queryType.equals(CommonConfig.BC_API_NAME_WHOIS)) {
resString = "fqdn" +
", query_success" +
", match_pattern" +
", whois_domain" +
", whois_update_date" +
", whois_create_date" +
", whois_expire_date" +
", whois_email" +
", whois_ns" +
", whois_registrar_name" +
", whois_registrant_org" +
", whois_regisrante_name" +
", whois_regisrante_street" +
", whois_regisrante_city" +
", whois_regisrante_state" +
", whois_regisrante_postcode" +
", whois_regisrante_country" +
", whois_regisrante_phone";
}
return resString;
}
public String getKeyValues(){
public String getKeyValues() {
String resString =
"query_success=" + query_success +
", reputation_score=" + reputation_score +
", reputation_level='" + reputation_level + '\'' +
", category_id=" + category_id +
", category_name='" + category_name + '\'' +
", category_group='" + category_group + '\'' +
", category_conf=" + category_conf +
", is_a1_cat=" + is_a1_cat +
", popularity=" + popularity +
", observ_age=" + observ_age +
", country='" + country + '\'' +
", threat_history=" + threat_history +
", whois_domain='" + whois_domain + '\'' +
", whois_update_date='" + whois_update_date + '\'' +
", whois_create_date='" + whois_create_date + '\'' +
", whois_expire_date='" + whois_expire_date + '\'' +
", whois_email='" + whois_email + '\'' +
", whois_ns='" + whois_ns + '\'' +
", whois_registrar_name='" + whois_registrar_name + '\'' +
", whois_registrant_org='" + whois_registrant_org + '\'' +
", whois_regisrante_name='" + whois_regisrante_name + '\'' +
", whois_regisrante_street='" + whois_regisrante_street + '\'' +
", whois_regisrante_city='" + whois_regisrante_city + '\'' +
", whois_regisrante_state='" + whois_regisrante_state + '\'' +
", whois_regisrante_postcode='" + whois_regisrante_postcode + '\'' +
", whois_regisrante_country='" + whois_regisrante_country + '\'' +
", whois_regisrante_phone='" + whois_regisrante_phone + '\'';
return resString.replace("'null'", "null");
", reputation_score=" + reputation_score +
", reputation_level='" + reputation_level + '\'' +
", category_id=" + category_id +
", category_name='" + category_name + '\'' +
", category_group='" + category_group + '\'' +
", category_conf=" + category_conf +
", is_a1_cat=" + is_a1_cat +
", popularity=" + popularity +
", observ_age=" + observ_age +
", country='" + country + '\'' +
", threat_history=" + threat_history +
", whois_domain='" + whois_domain + '\'' +
", whois_update_date='" + whois_update_date + '\'' +
", whois_create_date='" + whois_create_date + '\'' +
", whois_expire_date='" + whois_expire_date + '\'' +
", whois_email='" + whois_email + '\'' +
", whois_ns='" + whois_ns + '\'' +
", whois_registrar_name='" + whois_registrar_name + '\'' +
", whois_registrant_org='" + whois_registrant_org + '\'' +
", whois_regisrante_name='" + whois_regisrante_name + '\'' +
", whois_regisrante_street='" + whois_regisrante_street + '\'' +
", whois_regisrante_city='" + whois_regisrante_city + '\'' +
", whois_regisrante_state='" + whois_regisrante_state + '\'' +
", whois_regisrante_postcode='" + whois_regisrante_postcode + '\'' +
", whois_regisrante_country='" + whois_regisrante_country + '\'' +
", whois_regisrante_phone='" + whois_regisrante_phone + '\'';
return resString.replace("'null'", "null");
}
public String getKeyValues(String queryType) {
String resString = null;
if (queryType.equals(CommonConfig.BC_API_NAME_CATEGORY)){
resString =
"query_success=" + query_success +
", match_pattern=" + match_pattern +
", reputation_score=" + reputation_score +
", reputation_level='" + reputation_level + '\'' +
", category_id=" + category_id +
", category_name='" + category_name + '\'' +
", category_group='" + category_group + '\'' +
", category_conf=" + category_conf +
", is_a1_cat=" + is_a1_cat;
} else if (queryType.equals(CommonConfig.BC_API_NAME_REPUTATION)){
resString =
"query_success=" + query_success +
", match_pattern=" + match_pattern +
", reputation_score=" + reputation_score +
", reputation_level='" + reputation_level + '\'' +
", popularity=" + popularity +
", observ_age=" + observ_age +
", country='" + country + '\'' +
", threat_history=" + threat_history ;
} else if (queryType.equals(CommonConfig.BC_API_NAME_WHOIS)){
resString =
"query_success=" + query_success +
", match_pattern=" + match_pattern +
", whois_domain='" + whois_domain + '\'' +
", whois_update_date='" + whois_update_date + '\'' +
", whois_create_date='" + whois_create_date + '\'' +
", whois_expire_date='" + whois_expire_date + '\'' +
", whois_email='" + whois_email + '\'' +
", whois_ns='" + whois_ns + '\'' +
", whois_registrar_name='" + whois_registrar_name + '\'' +
", whois_registrant_org='" + whois_registrant_org + '\'' +
", whois_regisrante_name='" + whois_regisrante_name + '\'' +
", whois_regisrante_street='" + whois_regisrante_street + '\'' +
", whois_regisrante_city='" + whois_regisrante_city + '\'' +
", whois_regisrante_state='" + whois_regisrante_state + '\'' +
", whois_regisrante_postcode='" + whois_regisrante_postcode + '\'' +
", whois_regisrante_country='" + whois_regisrante_country + '\'' +
", whois_regisrante_phone='" + whois_regisrante_phone + '\'';
}
assert resString != null;
return resString.replace("'null'", "null");
}
}

View File

@@ -1,12 +1,9 @@
package cn.ac.iie.service;
import cn.ac.iie.config.ApplicationConfig;
import cn.ac.iie.config.CommonConfig;
import cn.ac.iie.dao.BaseMariaDB;
import cn.ac.iie.dao.FqdnFile;
import cn.ac.iie.utils.BrightCloudUtils;
import cn.ac.iie.utils.FileUtils;
import cn.ac.iie.utils.LogUtils;
import cn.ac.iie.utils.MariaDBConnect;
import cn.ac.iie.utils.*;
import com.alibaba.fastjson.JSONObject;
import org.apache.log4j.Logger;
@@ -17,9 +14,7 @@ import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.TimerTask;
import java.util.*;
/**
* @author yjy
@@ -30,6 +25,34 @@ public class OfflineTask extends TimerTask {
private static final Logger LOG = Logger.getLogger(OfflineTask.class);
private static final LogUtils logutils = new LogUtils();
private BrightCloudUtils brightCloudUtils = new BrightCloudUtils();
private Connection mariaConn;
private Statement mariaStat;
private BaseMariaDB mariaDB;
private OutputStream outStream;
private OutputStreamWriter outWriter;
private List<String> queryTypes = BrightCloudUtils.getQueryTypes();
private String currentType;
private File importFile;
private String importFileName;
private File outputFile;
private InputStreamReader inputStreamReader;
private BufferedReader bufferedReader;
long standardFqdnNum;
long complNum;
long dbQueryNum;
long bcQueryNum;
long failQueryNum;
long effecResNum;
long noLabelNum;
long fileLineCount;
@Override
public void run() {
try {
@@ -39,195 +62,282 @@ public class OfflineTask extends TimerTask {
}
}
/**
* runTask函数对新增导入文件执行查询任务
* @throws SQLException
* @throws IOException
*/
private void runTask() throws SQLException, IOException {
List<String> fileNames = catpureNewFiles();
List<String> fqdns;
List<String> fileNames = captureNewFiles();
Connection mariaConn = MariaDBConnect.getConnection();
Statement mariaStat = mariaConn.createStatement();
BaseMariaDB mariaDB = new BaseMariaDB(mariaConn, mariaStat);
BrightCloudUtils brightCloudUtils = new BrightCloudUtils();
List<String> queryTypes = BrightCloudUtils.getQueryTypes();
mariaConn = MariaDBConnect.getConnection();
mariaStat = mariaConn.createStatement();
mariaDB = new BaseMariaDB(mariaConn, mariaStat);
for (String fileName : fileNames) {
File importFile = new File(fileName);
String importFileName = FileUtils.getFileName(importFile);
InputStreamReader inputStreamReader = new InputStreamReader(
new FileInputStream(importFile), "GBK");
BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
Long fileLineCount = FileUtils.getFileLineNum(importFile);
// 输出结果保存文件
LOG.info("[Offline import file query]-" + importFileName + ": File Found.");
String outputFileName =
importFileName.substring(0, importFileName.length() - ApplicationConfig.OFFLINE_IMPORT_FILENAME_SUFFIX.length())
+ ApplicationConfig.OFFLINE_OUTPUT_FILENAME_SUFFIX;
File outputFile = new File(ApplicationConfig.OFFLINE_OUTPUT_PATH + "/" + outputFileName);
if (!outputFile.exists()) {
FileUtils.createFile(new File(ApplicationConfig.OFFLINE_OUTPUT_PATH), outputFileName);
importFile = new File(fileName);
for (String type: queryTypes){
currentType = type;
runFileQuery(importFile);
}
OutputStream outStream = new FileOutputStream(outputFile);
OutputStreamWriter outWriter = new OutputStreamWriter(outStream, StandardCharsets.UTF_8);
// 添加表头
outWriter.write(FqdnFile.getKeys() + "\n");
List<String> toQueryBC = new ArrayList<>();
long standardFqdnNum = 0;
long complNum = 0;
long dbQueryNum = 0;
long bcQueryNum = 0;
long failQueryNum = 0;
long effecResNum = 0;
long noLabelNum = 0;
// 批量读取数据,读取时去空行&去空格
LOG.info("[Offline import file query]-" + importFileName + " Progress: 0.00%");
while ((fqdns = FileUtils.getBatchFqdnReadIn(bufferedReader, ApplicationConfig.OFFLINE_READIN_BATCH)).size()>0) {
// 校验格式&去重
fqdns = SingleTermTask.getCheckedFqdn(fqdns);
standardFqdnNum += fqdns.size();
// 批量db查询
ResultSet dbResult = mariaDB.getDatabaseRecord(fqdns);
List<String> dbQueryFqdns = new ArrayList<>();
while (dbResult.next()) {
try {
FqdnFile fqdnFile = SingleTermTask.ResSet2FqdnFile(dbResult);
dbQueryFqdns.add(fqdnFile.getFqdn());
outWriter.write(fqdnFile.getValues() + "\n");
// 计数
dbQueryNum += 1;
complNum += 1;
failQueryNum = fqdnFile.getQuery_success().equals(false) ? failQueryNum + 1 : failQueryNum;
noLabelNum = fqdnFile.getCategory_id().equals(0) ? noLabelNum + 1 : noLabelNum;
effecResNum = (fqdnFile.getQuery_success().equals(false) | fqdnFile.getCategory_id().equals(0)) ?
effecResNum : effecResNum+1;
// 打印进度日志
if (complNum > 0 && complNum % ApplicationConfig.LOG_OFFLINE_NUMBER == 0) {
String percent = new DecimalFormat("##.00%").format((float) complNum / fileLineCount);
LOG.info("[Offline import file query]-" + importFileName + " Progress:" + percent);
}
} catch (Exception e) {
LOG.error("[Offline import file query]-" + importFileName + ": Wrong in database query");
e.printStackTrace();
}
}
// 添加bc查询目标
fqdns.removeAll(dbQueryFqdns);
toQueryBC.addAll(fqdns);
while (toQueryBC.size() > ApplicationConfig.MAXIMUM_URL_ONCE_BC_QUERY) {
// LOG.debug("Execute batch bc query...");
JSONObject resObj = brightCloudUtils.getQueryResults(toQueryBC.subList(0, ApplicationConfig.MAXIMUM_URL_ONCE_BC_QUERY));
List<FqdnFile> fqdnFiles = brightCloudUtils.responseSparse(resObj);
assert fqdnFiles.size() > 0;
// 存数据库
mariaDB.insertRecords(fqdnFiles);
for (FqdnFile tmpFile : fqdnFiles) {
// 写入output
outWriter.write(tmpFile.getValues() + "\n");
// 计数
bcQueryNum += 1;
complNum += 1;
failQueryNum = tmpFile.getQuery_success().equals(false) ? failQueryNum + 1 : failQueryNum;
noLabelNum = tmpFile.getCategory_id().equals(0) ? noLabelNum + 1 : noLabelNum;
effecResNum = (tmpFile.getQuery_success().equals(false) | tmpFile.getCategory_id().equals(0)) ? effecResNum : effecResNum+1;
// 打印进度日志
if (complNum > 0 && complNum % ApplicationConfig.LOG_OFFLINE_NUMBER == 0) {
String percent = new DecimalFormat("##.00%").format((float) complNum / fileLineCount);
LOG.info("[Offline import file query]-" + importFileName + " Progress:" + percent);
}
}
toQueryBC = toQueryBC.subList(ApplicationConfig.MAXIMUM_URL_ONCE_BC_QUERY, toQueryBC.size());
}
}
// 完成剩余bc查询
if (toQueryBC.size()>0){
JSONObject resObj = brightCloudUtils.getQueryResults(toQueryBC);
List<FqdnFile> fqdnFiles = brightCloudUtils.responseSparse(resObj);
assert fqdnFiles.size() > 0;
// 存数据库
mariaDB.insertRecords(fqdnFiles);
for (FqdnFile tmpFile : fqdnFiles) {
// 写入output
outWriter.write(tmpFile.getValues() + "\n");
// 计数
bcQueryNum += 1;
complNum += 1;
failQueryNum = tmpFile.getQuery_success().equals(false) ? failQueryNum + 1 : failQueryNum;
noLabelNum = tmpFile.getCategory_id().equals(0) ? noLabelNum + 1 : noLabelNum;
effecResNum = (tmpFile.getQuery_success().equals(true) && tmpFile.getCategory_id().equals(1)) ? effecResNum + 1 : effecResNum;
}
}
LOG.info("[Offline import file query]-" + importFileName + " Progress: 100.00%");
// 打印处理结果至日志
LOG.info("[Offline import file query]-" + importFileName + " "
+ "Query result: submit " + standardFqdnNum + " valid fqdns, "
+ dbQueryNum + " (" + new DecimalFormat("##.0%").format((float) dbQueryNum / standardFqdnNum) + ")" + " results from database"
+ bcQueryNum + " (" + new DecimalFormat("##.0%").format((float) bcQueryNum / standardFqdnNum) + ")" + " results from bright cloud. "
+ effecResNum + " (" + new DecimalFormat("##.0%").format((float) effecResNum / standardFqdnNum) + ")" + " effective results"
+ failQueryNum + " (" + new DecimalFormat("##.0%").format((float) failQueryNum / standardFqdnNum) + ")" + " failed queries"
+ noLabelNum + " (" + new DecimalFormat("##.0%").format((float) noLabelNum / standardFqdnNum) + ")" + " unlabeled results");
LOG.info("[Offline import file query]-" + importFileName + " Results saved in " + outputFile.toString());
// 打印查询服务调用记录日志
if (bcQueryNum > 0) {
OutputStream bcQueryLogStream = new FileOutputStream(ApplicationConfig.LOG_BC_QUERY_REPORT_FILE, true);
OutputStreamWriter bcQueryLogWriter = new OutputStreamWriter(bcQueryLogStream, StandardCharsets.UTF_8);
for (String type : queryTypes) {
java.sql.Date d = new java.sql.Date(System.currentTimeMillis());
bcQueryLogWriter.write(d + "," + "OfflineTask," + importFileName + "," + type + "," + bcQueryNum + "\n");
}
FileUtils.writerClose(bcQueryLogWriter, bcQueryLogStream);
}
FileUtils.writerClose(outWriter, outStream);
//查询结束修改后缀
importFile.renameTo(new File(importFile.toString().substring(0, importFile.toString().length() - ApplicationConfig.OFFLINE_IMPORT_FILENAME_SUFFIX.length())
+ ApplicationConfig.OFFLINE_IMPORT_FILEDONE_SUFFIX));
importFile.renameTo(new File(importFile.toString().substring(0, importFile.toString().length() - CommonConfig.OFFLINE_IMPORT_FILENAME_SUFFIX.length())
+ CommonConfig.OFFLINE_IMPORT_FILEDONE_SUFFIX));
FileUtils.readerClose(bufferedReader, inputStreamReader);
}
MariaDBConnect.close(mariaStat, mariaConn);
}
private List<String> catpureNewFiles() {
/**
* 单个文件的查询过程fqdn完全匹配查db -> fqdn截二级域名查db -> fqdn查brightcloud
* @param importFile
* @throws IOException
* @throws SQLException
*/
private void runFileQuery(File importFile) throws IOException, SQLException {
List<String> fqdns;
importFileName = FileUtils.getFileName(importFile);
inputStreamReader = new InputStreamReader(
new FileInputStream(importFile), "GBK");
bufferedReader = new BufferedReader(inputStreamReader);
fileLineCount = FileUtils.getFileLineNum(importFile);
standardFqdnNum = 0;
complNum = 0;
dbQueryNum = 0;
bcQueryNum = 0;
failQueryNum = 0;
effecResNum = 0;
noLabelNum = 0;
// 创建查询结果保存文件
LOG.info("[Offline import file query]-" + importFileName + ": File Found.");
LOG.info("[Offline import file query]-" + currentType + ".");
String outputFileName =
importFileName.substring(0, importFileName.length() - CommonConfig.OFFLINE_IMPORT_FILENAME_SUFFIX.length())
+ CommonConfig.OFFLINE_OUTPUT_FILENAME_SUFFIX;
outputFileName = currentType.equals(CommonConfig.BC_API_NAME_CATEGORY)? outputFileName:outputFileName + '.' + currentType;
outputFile = new File(CommonConfig.OFFLINE_OUTPUT_PATH + "/" + outputFileName);
if (!outputFile.exists()) {
FileUtils.createFile(new File(CommonConfig.OFFLINE_OUTPUT_PATH), outputFileName);
}
outStream = new FileOutputStream(outputFile);
outWriter = new OutputStreamWriter(outStream, StandardCharsets.UTF_8);
// 添加表头
outWriter.write(FqdnFile.getKeys(currentType) + "\n");
// 开始查询
List<String> toQueryBc = new ArrayList<>();
// 批量读取数据,读取时去空行&去空格
LOG.info("[Offline import file query]-" + currentType + "-" + importFileName + " Progress: 0.00%");
while ((fqdns = FileUtils.getBatchFqdnReadIn(bufferedReader, CommonConfig.OFFLINE_READIN_BATCH)).size() > 0) {
// 校验格式&去重
fqdns = SingleTermTask.getCheckedFqdn(fqdns);
standardFqdnNum += fqdns.size();
// 批量db查询
List<String> dbQueryFqdns = fetchFromDb(fqdns);
fqdns.removeAll(dbQueryFqdns);
// db查询不到的截断二级域查询
List<String> dbSecDomainQueryFqdns = fetchFromDbSecDomain(fqdns);
fqdns.removeAll(dbSecDomainQueryFqdns);
// 添加bc查询目标
toQueryBc.addAll(fqdns);
while (toQueryBc.size() > CommonConfig.MAXIMUM_URL_ONCE_BC_QUERY) {
fetchFromBc(toQueryBc.subList(0, CommonConfig.MAXIMUM_URL_ONCE_BC_QUERY));
toQueryBc = toQueryBc.subList(CommonConfig.MAXIMUM_URL_ONCE_BC_QUERY, toQueryBc.size());
}
}
// 完成剩余bc查询
if (toQueryBc.size() > 0) {
fetchFromBc(toQueryBc);
}
LOG.info("[Offline import file query]-" + currentType + "-" + importFileName + " Progress: 100.00%");
// 打印日志
logQueryFileStatistic();
logBcCount();
FileUtils.writerClose(outWriter, outStream);
FileUtils.readerClose(bufferedReader, inputStreamReader);
}
private List<String> fetchFromDb(List<String> fqdns) throws SQLException {
List<String> dbQueryFqdns = new ArrayList<>();
ResultSet dbResult = mariaDB.getDatabaseRecord(fqdns, currentType);
while (dbResult.next()) {
try {
FqdnFile fqdnFile = SingleTermTask.ResSet2FqdnFile(dbResult, currentType);
dbQueryFqdns.add(fqdnFile.getFqdn());
outWriter.write(fqdnFile.getValues(currentType) + "\n");
// 计数
singleQueryCumulate(fqdnFile, "db");
// 满足条件时打印进度日志
singleQueryLogger();
} catch (Exception e) {
LOG.error("[Offline import file query]-" + currentType + "-" + importFileName + ": Wrong in database query");
e.printStackTrace();
}
}
return dbQueryFqdns;
}
private List<String> fetchFromDbSecDomain(List<String> fqdns) throws SQLException {
List<String> dbQueryFqdnsSecDomain = new ArrayList<>();
if (fqdns.size()==0){
return dbQueryFqdnsSecDomain;
}
// 截取二级域名
Map<String, List<String>> secDomain2Fqdn = new HashMap<String, List<String>>();
ArrayList<String>secDomains = (ArrayList<String>) TopDomainUtils.getSecDomain(fqdns);
for (int index = 0; index < secDomains.size(); index=index+1){
String secDomain = secDomains.get(index);
if (secDomain2Fqdn.containsKey(secDomain)){
List<String> tmp = secDomain2Fqdn.get(secDomain);
tmp.add(fqdns.get(index));
secDomain2Fqdn.put(secDomain, tmp);
} else {
secDomain2Fqdn.put(secDomain, new ArrayList<>(Collections.singletonList(fqdns.get(index))));
}
}
// 查询二级域名
ResultSet dbResult = mariaDB.getDatabaseRecord(new ArrayList<>(secDomain2Fqdn.keySet()), currentType);
while (dbResult.next()) {
try {
FqdnFile secDomainFile = SingleTermTask.ResSet2FqdnFile(dbResult, currentType);
for (String fqdn: secDomain2Fqdn.get(secDomainFile.getFqdn())){
dbQueryFqdnsSecDomain.add(fqdn);
secDomainFile.setFqdn(fqdn);
outWriter.write(secDomainFile.getValues(currentType) + "\n");
// 计数
singleQueryCumulate(secDomainFile, "db");
// 满足条件时打印进度日志
singleQueryLogger();
}
} catch (Exception e) {
LOG.error("[Offline import file query]-" + currentType + "-"+ importFileName + ": Wrong in database query");
e.printStackTrace();
}
}
return dbQueryFqdnsSecDomain;
}
private void fetchFromBc(List<String> fqdns) throws IOException {
JSONObject resObj = brightCloudUtils.getQueryResults(fqdns, currentType);
List<FqdnFile> fqdnFiles = brightCloudUtils.responseSparse(resObj, currentType);
assert fqdnFiles.size() > 0;
// 存数据库
mariaDB.insertRecords(fqdnFiles, currentType);
for (FqdnFile tmpFile : fqdnFiles) {
// 写入output
outWriter.write(tmpFile.getValues(currentType) + "\n");
// 计数
singleQueryCumulate(tmpFile, "bc");
// 满足条件时打印进度日志
singleQueryLogger();
}
}
/**
* 打印日志:查询结果统计
*/
private void logQueryFileStatistic() {
// 打印处理结果至日志
LOG.info("[Offline import file query]-" + currentType + "-" + importFileName + " "
+ "Query result: submit " + standardFqdnNum + " valid fqdns, "
+ dbQueryNum + " (" + new DecimalFormat("##.0%").format((float) dbQueryNum / standardFqdnNum) + ")" + " results from database"
+ bcQueryNum + " (" + new DecimalFormat("##.0%").format((float) bcQueryNum / standardFqdnNum) + ")" + " results from bright cloud. "
+ effecResNum + " (" + new DecimalFormat("##.0%").format((float) effecResNum / standardFqdnNum) + ")" + " effective results"
+ failQueryNum + " (" + new DecimalFormat("##.0%").format((float) failQueryNum / standardFqdnNum) + ")" + " failed queries"
+ noLabelNum + " (" + new DecimalFormat("##.0%").format((float) noLabelNum / standardFqdnNum) + ")" + " unlabeled results");
LOG.info("[Offline import file query]-" + importFileName + " Results saved in " + outputFile.toString());
}
/**
* 计数用于webroot服务调用次数统计
* @throws IOException
*/
private void logBcCount() throws IOException {
// 打印服务调用统计日志
if (bcQueryNum > 0) {
OutputStream bcQueryLogStream = new FileOutputStream(CommonConfig.LOG_BC_QUERY_REPORT_FILE, true);
OutputStreamWriter bcQueryLogWriter = new OutputStreamWriter(bcQueryLogStream, StandardCharsets.UTF_8);
java.sql.Date d = new java.sql.Date(System.currentTimeMillis());
bcQueryLogWriter.write(d + "," + "OfflineTask," + importFileName + "," + currentType + "," + bcQueryNum + "\n");
FileUtils.writerClose(bcQueryLogWriter, bcQueryLogStream);
}
}
/**
* 计数:用于查询结果统计计算
* @param fqdnFile 当前查询结果
*/
private void singleQueryCumulate(FqdnFile fqdnFile, String cumulateType) {
if (cumulateType.equals("db")){
dbQueryNum += 1;
} else if (cumulateType.equals("bc")){
bcQueryNum += 1;
}
complNum += 1;
failQueryNum = fqdnFile.getQuery_success().equals(false) ? failQueryNum + 1 : failQueryNum;
if (currentType.equals(CommonConfig.BC_API_NAME_CATEGORY)){
noLabelNum = fqdnFile.getCategory_id().equals(0) ? noLabelNum + 1 : noLabelNum;
effecResNum = (fqdnFile.getQuery_success().equals(false) | fqdnFile.getCategory_id().equals(0)) ?
effecResNum : effecResNum + 1;
}
}
/**
* 打印日志:满足条件时打印进度日志
*/
private void singleQueryLogger(){
if (complNum > 0 && complNum % CommonConfig.LOG_OFFLINE_NUMBER == 0) {
String percent = new DecimalFormat("##.00%").format((float) complNum / fileLineCount);
LOG.info("[Offline import file query]-" + currentType + "-"+ importFileName + " Progress:" + percent);
}
}
/**
* 获取新增倒入csv文件
* @return 新增的文件列表
*/
private List<String> captureNewFiles() {
List<String> newFiles = new ArrayList<>();
// Get all files
File dir = new File(ApplicationConfig.OFFLINE_IMPORT_PATH);
File dir = new File(CommonConfig.OFFLINE_IMPORT_PATH);
if (!dir.exists()) {
boolean isCreateDir = dir.mkdir();
if (isCreateDir) {
LOG.info("Create new dictionary: " + ApplicationConfig.OFFLINE_IMPORT_PATH);
LOG.info("Create new dictionary: " + CommonConfig.OFFLINE_IMPORT_PATH);
} else {
LOG.error("Failed to create import dictionary: " + ApplicationConfig.OFFLINE_IMPORT_PATH);
LOG.error("Failed to create import dictionary: " + CommonConfig.OFFLINE_IMPORT_PATH);
}
}
File[] allFiles = dir.listFiles();
if (allFiles == null) {
LOG.info("Dir is empty: " + ApplicationConfig.OFFLINE_IMPORT_PATH + System.currentTimeMillis());
LOG.info("Dir is empty: " + CommonConfig.OFFLINE_IMPORT_PATH + System.currentTimeMillis());
} else {
for (File file : allFiles) {
if (file.isFile() && file.getName().endsWith(ApplicationConfig.OFFLINE_IMPORT_FILENAME_SUFFIX)) {
if (file.isFile() && file.getName().endsWith(CommonConfig.OFFLINE_IMPORT_FILENAME_SUFFIX)) {
newFiles.add(file.toString());
}
}
}
return newFiles;
}
}

View File

@@ -1,7 +1,6 @@
package cn.ac.iie.service;
import cn.ac.iie.config.ApplicationConfig;
import cn.ac.iie.dao.BaseMariaDB;
import cn.ac.iie.config.CommonConfig;
import cn.ac.iie.dao.FqdnFile;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
@@ -45,9 +44,9 @@ public class SingleTermTask {
}
}
if (host.split("\\.").length > ApplicationConfig.QUERY_STANDARD_FQDN_LEVEL){
if (host.split("\\.").length > CommonConfig.QUERY_STANDARD_FQDN_LEVEL){
List<String> domains = Arrays.asList(host.split("\\."));
host = StringUtils.join(domains.subList(domains.size()-ApplicationConfig.QUERY_STANDARD_FQDN_LEVEL,
host = StringUtils.join(domains.subList(domains.size()- CommonConfig.QUERY_STANDARD_FQDN_LEVEL,
domains.size()), ".");
}
return host;
@@ -55,7 +54,7 @@ public class SingleTermTask {
private static boolean isValidDomain(String str)
{
String regex = "^((?!-)[A-Za-z0-9-]"
String regex = "^((?!-)[A-Za-z0-9-_]"
+ "{1,63}(?<!-)\\.)"
+ "+[A-Za-z]{2,6}";
Pattern p = Pattern.compile(regex);
@@ -71,6 +70,8 @@ public class SingleTermTask {
public static List<String> getCheckedFqdn(List<String> fqdns){
List<String> res = new ArrayList<>();
for (String fqdn:fqdns){
//去端口号
fqdn = fqdn.split(":")[0];
// 去重 & 校验
if (isValidDomain(fqdn) && !res.contains(fqdn)){
res.add(fqdn.toLowerCase());
@@ -81,40 +82,51 @@ public class SingleTermTask {
return res;
}
public static FqdnFile ResSet2FqdnFile(ResultSet resultSet){
public static FqdnFile ResSet2FqdnFile(ResultSet resultSet, String queryType){
FqdnFile fqdnFile = null;
try {
fqdnFile = new FqdnFile(
resultSet.getString("fqdn"),
resultSet.getBoolean("query_success"),
resultSet.getInt("reputation_score"),
resultSet.getString("reputation_level"),
resultSet.getInt("category_id"),
resultSet.getString("category_Name"),
resultSet.getString("category_group"),
resultSet.getInt("category_conf"),
resultSet.getBoolean("is_a1_cat"),
resultSet.getInt("popularity"),
resultSet.getInt("observ_age"),
resultSet.getString("country"),
resultSet.getInt("threat_history"),
resultSet.getString("whois_domain"),
resultSet.getDate("whois_update_date"),
resultSet.getDate("whois_create_date"),
resultSet.getDate("whois_expire_date"),
resultSet.getString("whois_email"),
resultSet.getString("whois_ns"),
resultSet.getString("whois_registrar_name"),
resultSet.getString("whois_registrant_org"),
resultSet.getString("whois_regisrante_name"),
resultSet.getString("whois_regisrante_street"),
resultSet.getString("whois_regisrante_city"),
resultSet.getString("whois_regisrante_state"),
resultSet.getString("whois_regisrante_postcode"),
resultSet.getString("whois_regisrante_country"),
resultSet.getString("whois_regisrante_phone")
);
if (queryType.equals(CommonConfig.BC_API_NAME_CATEGORY)){
fqdnFile = new FqdnFile(
resultSet.getString("fqdn"),
resultSet.getBoolean("query_success"),
resultSet.getInt("reputation_score"),
resultSet.getString("reputation_level"),
resultSet.getInt("category_id"),
resultSet.getString("category_Name"),
resultSet.getString("category_group"),
resultSet.getInt("category_conf"),
resultSet.getBoolean("is_a1_cat"));
} else if (queryType.equals(CommonConfig.BC_API_NAME_REPUTATION)) {
fqdnFile = new FqdnFile(
resultSet.getString("fqdn"),
resultSet.getBoolean("query_success"),
resultSet.getInt("reputation_score"),
resultSet.getString("reputation_level"),
resultSet.getInt("popularity"),
resultSet.getInt("observ_age"),
resultSet.getString("country"),
resultSet.getInt("threat_history"));
} else if (queryType.equals(CommonConfig.BC_API_NAME_WHOIS)){
fqdnFile = new FqdnFile(
resultSet.getString("fqdn"),
resultSet.getBoolean("query_success"),
resultSet.getString("whois_domain"),
resultSet.getDate("whois_update_date"),
resultSet.getDate("whois_create_date"),
resultSet.getDate("whois_expire_date"),
resultSet.getString("whois_email"),
resultSet.getString("whois_ns"),
resultSet.getString("whois_registrar_name"),
resultSet.getString("whois_registrant_org"),
resultSet.getString("whois_regisrante_name"),
resultSet.getString("whois_regisrante_street"),
resultSet.getString("whois_regisrante_city"),
resultSet.getString("whois_regisrante_state"),
resultSet.getString("whois_regisrante_postcode"),
resultSet.getString("whois_regisrante_country"),
resultSet.getString("whois_regisrante_phone")
);
}
} catch (SQLException e) {
e.printStackTrace();
}

View File

@@ -1,6 +1,6 @@
package cn.ac.iie.service;
import cn.ac.iie.config.ApplicationConfig;
import cn.ac.iie.config.CommonConfig;
import cn.ac.iie.dao.BaseMariaDB;
import cn.ac.iie.dao.FqdnFile;
import cn.ac.iie.utils.BrightCloudUtils;
@@ -75,7 +75,7 @@ public class UpdateTask extends TimerTask {
LOG.info("[UpdateTask]-update records: " + expiredNum +" expired records, " + unlabeledNum + " unlabeled or failed-query records");
// 打印查询操作记录日志
OutputStream bcQueryLogStream = new FileOutputStream(ApplicationConfig.LOG_BC_QUERY_REPORT_FILE, true);
OutputStream bcQueryLogStream = new FileOutputStream(CommonConfig.LOG_BC_QUERY_REPORT_FILE, true);
OutputStreamWriter bcQueryLogWriter = new OutputStreamWriter(bcQueryLogStream, StandardCharsets.UTF_8);
for (String type : queryTypes) {
java.sql.Date d = new java.sql.Date(System.currentTimeMillis());

View File

@@ -1,6 +1,6 @@
package cn.ac.iie.utils;
import cn.ac.iie.config.ApplicationConfig;
import cn.ac.iie.config.CommonConfig;
import cn.ac.iie.dao.FqdnFile;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
@@ -11,6 +11,7 @@ import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.sql.Connection;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -35,14 +36,14 @@ public class BrightCloudUtils {
e.printStackTrace();
}
if (ApplicationConfig.QUERY_URL_INFO_SWITCH){
queryTypes.add(props.getProperty("bc.api.queries.urlcat"));
if (CommonConfig.QUERY_URL_INFO_SWITCH){
queryTypes.add(props.getProperty("bc.api.name.category"));
}
if (ApplicationConfig.QUERY_URL_REP_SWITCH){
queryTypes.add(props.getProperty("bc.api.queries.urlrep"));
if (CommonConfig.QUERY_URL_REP_SWITCH){
queryTypes.add(props.getProperty("bc.api.name.reputation"));
}
if (ApplicationConfig.QUERY_URL_WHOIS_SWITCH){
queryTypes.add(props.getProperty("bc.api.queries.urlwhois"));
if (CommonConfig.QUERY_URL_WHOIS_SWITCH){
queryTypes.add(props.getProperty("bc.api.name.whois"));
}
assert queryTypes.size()>0: "Switch of all query type has been turned off, please edit the application.properties";
}
@@ -54,7 +55,11 @@ public class BrightCloudUtils {
private final HashMap<Integer, List<String>> catId2Info = new HashMap<>();
public JSONObject getQueryResults (List<String> urls) {
if (urls.size()>ApplicationConfig.MAXIMUM_URL_ONCE_BC_QUERY){
return getQueryResults(urls, CommonConfig.BC_API_NAME_CATEGORY);
}
public JSONObject getQueryResults (List<String> urls, String queryType) {
if (urls.size()> CommonConfig.MAXIMUM_URL_ONCE_BC_QUERY){
LOG.warn("Too many urls in a http post request!");
}
JSONObject jsonRes = null;
@@ -74,7 +79,7 @@ public class BrightCloudUtils {
param.put("deviceid", props.getProperty("bc.deviceid"));
param.put("uid", props.getProperty("bc.uid"));
param.put("queries", queryTypes);
param.put("queries", new ArrayList<>(Collections.singletonList(queryType)));
param.put("a1cat", props.getProperty("bc.api.a1cat"));
param.put("reputation", props.getProperty("bc.api.reputation"));
param.put("xml", props.getProperty("bc.api.xml"));
@@ -115,6 +120,10 @@ public class BrightCloudUtils {
}
public List<FqdnFile> responseSparse(JSONObject records){
return responseSparse(records, CommonConfig.BC_API_NAME_CATEGORY);
}
public List<FqdnFile> responseSparse(JSONObject records, String queryType){
List<FqdnFile> fqdnFiles = new ArrayList<>();
Boolean querySucess = records.get("status").equals(200);
@@ -128,54 +137,56 @@ public class BrightCloudUtils {
// json处理
JSONObject queries = jo.getJSONObject("queries");
JSONObject getInfo = ApplicationConfig.QUERY_URL_INFO_SWITCH ?
queries.getJSONObject(props.getProperty("bc.api.queries.urlcat")): new JSONObject();
JSONObject getRepInfo = ApplicationConfig.QUERY_URL_REP_SWITCH ?
queries.getJSONObject(props.getProperty("bc.api.queries.urlrep")): new JSONObject();
JSONObject getWhoisInfo = ApplicationConfig.QUERY_URL_WHOIS_SWITCH ?
queries.getJSONObject(props.getProperty("bc.api.queries.urlwhois")): new JSONObject();
JSONObject cat = getInfo.getJSONArray("cats").getJSONObject(0);
Integer catId = cat.getInteger("catid");
JSONObject getInfo = queries.getJSONObject(queryType);
String whoisEmail = "";
if (isEmail(getWhoisInfo.getString("contactemail"))){
whoisEmail = getWhoisInfo.getString("contactemail");
if (queryType.equals(CommonConfig.BC_API_NAME_CATEGORY)){
JSONObject cat = getInfo.getJSONArray("cats").getJSONObject(0);
Integer catId = cat.getInteger("catid");
fqdnFiles.add(new FqdnFile(
jo.getString("url"),
querySucess,
getInfo.getInteger("reputation"),
getRepLevel(getInfo.getInteger("reputation")),
catId,
getCatInfo(catId).get(0),
getCatInfo(catId).get(1),
cat.getInteger("conf"),
getInfo.getBoolean("a1cat")));
} else if (queryType.equals(CommonConfig.BC_API_NAME_REPUTATION)){
fqdnFiles.add(new FqdnFile(
jo.getString("url"),
querySucess,
getInfo.getInteger("reputation"),
getRepLevel(getInfo.getInteger("reputation")),
getInfo.getInteger("popularity"),
getInfo.getInteger("age"),
getInfo.getString("country"),
getInfo.getInteger("threathistory")));
} else if (queryType.equals(CommonConfig.BC_API_NAME_WHOIS)){
String whoisEmail = "";
if (isEmail(getInfo.getString("contactemail"))){
whoisEmail = getInfo.getString("contactemail");
}
fqdnFiles.add(new FqdnFile(
jo.getString("url"),
querySucess,
getInfo.getString("domainname"),
getInfo.getDate("audit_auditupdateddate"),
getInfo.getDate("createddate"),
getInfo.getDate("expiresdate"),
whoisEmail,
getInfo.getString("nameservers"),
getInfo.getString("registrarname"),
getInfo.getString("registrant_organization"),
getInfo.getString("registrant_name"),
getInfo.getString("registrant_street1"),
getInfo.getString("registrant_city"),
getInfo.getString("registrant_state"),
getInfo.getString("registrant_postalcode"),
getInfo.getString("registrant_country"),
getInfo.getString("registrant_telephone")));
}
fqdnFiles.add(new FqdnFile(
jo.getString("url"),
querySucess,
getInfo.getInteger("reputation"),
getRepLevel(getInfo.getInteger("reputation")),
catId,
getCatInfo(catId).get(0),
getCatInfo(catId).get(1),
cat.getInteger("conf"),
getInfo.getBoolean("a1cat"),
getRepInfo.getInteger("popularity"),
getRepInfo.getInteger("age"),
getRepInfo.getString("country"),
getRepInfo.getInteger("threathistory"),
getWhoisInfo.getString("domainname"),
getWhoisInfo.getDate("audit_auditupdateddate"),
getWhoisInfo.getDate("createddate"),
getWhoisInfo.getDate("expiresdate"),
whoisEmail,
getWhoisInfo.getString("nameservers"),
getWhoisInfo.getString("registrarname"),
getWhoisInfo.getString("registrant_organization"),
getWhoisInfo.getString("registrant_name"),
getWhoisInfo.getString("registrant_street1"),
getWhoisInfo.getString("registrant_city"),
getWhoisInfo.getString("registrant_state"),
getWhoisInfo.getString("registrant_postalcode"),
getWhoisInfo.getString("registrant_country"),
getWhoisInfo.getString("registrant_telephone")));
}
}
return fqdnFiles;
@@ -183,17 +194,18 @@ public class BrightCloudUtils {
private String getRepLevel(Integer repScore){
String level = null; //用str存放数据
if (repScore > 80) level="Trustworthy";
else if (repScore > 60) level="Low Risk";
else if (repScore > 40) level="Moderate Risk";
else if (repScore > 20) level="Suspicious";
else if (repScore > 0) level="High Risk";
if (repScore > 80){ level="Trustworthy";}
else if (repScore > 60){ level="Low Risk";}
else if (repScore > 40){ level="Moderate Risk";}
else if (repScore > 20){ level="Suspicious";}
else if (repScore > 0){ level="High Risk";}
return level;
}
public static boolean isEmail(String string) {
if (string == null)
if (string == null){
return false;
}
String regEx1 = "^([a-z0-9A-Z]+[-|\\.]?)+[a-z0-9A-Z]@([a-z0-9A-Z]+(-[a-z0-9A-Z]+)?\\.)+[a-zA-Z]{2,}$";
Pattern p;
Matcher m;

View File

@@ -31,6 +31,7 @@ public class ConfigUtils {
try {
propCommon.load(ConfigUtils.class.getClassLoader().getResourceAsStream("application.properties"));
propCommon.load(MariaDbConnection.class.getClassLoader().getResourceAsStream("druid.properties"));
propCommon.load(MariaDbConnection.class.getClassLoader().getResourceAsStream("brightcloud.properties"));
} catch (Exception e) {

View File

@@ -1,13 +1,10 @@
package cn.ac.iie.utils;
import cn.ac.iie.config.ApplicationConfig;
import org.apache.log4j.Logger;
import java.io.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
/**
* @author yjy

View File

@@ -1,6 +1,6 @@
package cn.ac.iie.utils;
import cn.ac.iie.config.ApplicationConfig;
import cn.ac.iie.config.CommonConfig;
import java.io.File;
@@ -12,8 +12,8 @@ import java.io.File;
public class LogUtils {
// 确认BrightCloud查询记录日志文件存在
static {
assert new File(ApplicationConfig.LOG_BC_QUERY_REPORT_FILE).exists():
"Cannot find Bright Cloud query log file: " + ApplicationConfig.LOG_BC_QUERY_REPORT_FILE;
assert new File(CommonConfig.LOG_BC_QUERY_REPORT_FILE).exists():
"Cannot find Bright Cloud query log file: " + CommonConfig.LOG_BC_QUERY_REPORT_FILE;
}
}

View File

@@ -1,6 +1,6 @@
package cn.ac.iie.utils;
import cn.ac.iie.config.ApplicationConfig;
import cn.ac.iie.config.CommonConfig;
import java.util.Calendar;
import java.util.Date;
@@ -53,7 +53,7 @@ public class TimeUtils {
}
public static Date getExpiredTime(){
return new java.sql.Timestamp(TimeUtils.getStartOfDay(-ApplicationConfig.UPDATE_SCHEDULE_DAY).getTime());
return new java.sql.Timestamp(TimeUtils.getStartOfDay(-CommonConfig.UPDATE_SCHEDULE_DAY).getTime());
}
public static Date getExpiredTime(String test){

View File

@@ -0,0 +1,294 @@
package cn.ac.iie.utils;
import org.apache.log4j.Logger;
import org.apache.commons.lang3.StringUtils;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import cn.ac.iie.config.CommonConfig;
public class TopDomainUtils {
private static Logger logger = Logger.getLogger(TopDomainUtils.class);
/**
* 获取二级域名
*
* @param urlDomain 源域名
* @param maps 顶级域名列表maps
* @return
*/
public static String getSecDomain(String urlDomain, HashMap<String, HashMap<String, String>> maps) {
try {
String[] split = urlDomain.split("\\.");
String secDomain = null;
for (int i = split.length - 1; i >= 0; i--) {
int maps_index = split.length - (i + 1);
HashMap<String, String> innerMap = maps.get("map_id_" + maps_index);
HashMap<String, String> fullTop = maps.get("full");
if (!(innerMap.containsKey(split[i]))) {
String strSec = "";
for (int j = i; j < split.length; j++) {
strSec += (split[j] + ".");
}
secDomain = strSec.substring(0, strSec.length() - 1);
if (fullTop.containsKey(getTopDomainFromSecDomain(secDomain))) {
break;
} else {
while (!fullTop.containsKey(getTopDomainFromSecDomain(secDomain)) && getTopDomainFromSecDomain(secDomain).contains(".")) {
secDomain = getTopDomainFromSecDomain(secDomain);
}
break;
}
}
}
// 右匹配为顶级域名
if (secDomain == null){
secDomain = urlDomain;
}
return secDomain;
} catch (Exception e) {
logger.error("urlDomain:" + urlDomain);
e.printStackTrace();
return "---no---return---";
}
}
public static String getSecDomain(String fqdn){
HashMap<String, HashMap<String, String>> maps = readTopDomainFile(CommonConfig.TLD_FILEPATH);
return getSecDomain(fqdn, maps);
}
public static List<String> getSecDomain(List<String> urlDomains) {
HashMap<String, HashMap<String, String>> maps = readTopDomainFile(CommonConfig.TLD_FILEPATH);
List<String> secDomainList = new ArrayList<>();
for (String oriDomain : urlDomains) {
String secDomain = getSecDomain(oriDomain, maps);
if (StringUtils.isNotBlank(secDomain) && !("---no---return---".equals(secDomain))) {
secDomainList.add(secDomain);
} else {
System.out.println(oriDomain);
}
}
return secDomainList;
}
public static String getTopDomainFromSecDomain(String secDomain) {
String quFirstDian = secDomain;
if (secDomain.contains(".")) {
quFirstDian = secDomain.substring(secDomain.indexOf(".")).substring(1);
}
return quFirstDian;
}
/**
* 功能Java读取txt文件的内容
*
* @param filePath
*/
public static HashMap<String, HashMap<String, String>> readTopDomainFile(String filePath) {
HashMap<String, HashMap<String, String>> maps = makeHashMap(filePath);
try {
String encoding = "UTF-8";
File file = new File(filePath);
if (file.isFile() && file.exists()) {
InputStreamReader read = new InputStreamReader(
new FileInputStream(file), encoding);
BufferedReader bufferedReader = new BufferedReader(read);
String lineTxt = null;
while ((lineTxt = bufferedReader.readLine()) != null) {
HashMap<String, String> fullTop = maps.get("full");
fullTop.put(lineTxt, lineTxt);
maps.put("full", fullTop);
String[] split = lineTxt.split("\\.");
for (int i = split.length - 1; i >= 0; i--) {
int maps_index = split.length - (i + 1);
HashMap<String, String> innerMap = maps.get("map_id_" + maps_index);
innerMap.put(split[i], split[i]);
maps.put("map_id_" + maps_index, innerMap);
}
}
read.close();
} else {
logger.error("TopDomainUtils>=>readTopDomainFile filePath is wrong--->{" + filePath + "}<---");
}
} catch (Exception e) {
logger.error("TopDomainUtils>=>readTopDomainFile get filePathData error--->{" + e + "}<---");
e.printStackTrace();
}
return maps;
}
/**
* 获取顶级域名的最大长度
*
* @param filePath
* @return
*/
public static int getMaxLength(String filePath) {
int lengthDomain = 0;
try {
String encoding = "UTF-8";
File file = new File(filePath);
if (file.isFile() && file.exists()) {
InputStreamReader read = new InputStreamReader(
new FileInputStream(file), encoding);
BufferedReader bufferedReader = new BufferedReader(read);
String lineTxt = null;
while ((lineTxt = bufferedReader.readLine()) != null) {
String[] split = lineTxt.split("\\.");
if (split.length > lengthDomain) {
lengthDomain = split.length;
}
}
read.close();
} else {
logger.error("TopDomainUtils>>getMaxLength filePath is wrong--->{" + filePath + "}<---");
}
} catch (Exception e) {
logger.error("TopDomainUtils>=>getMaxLength get filePathData error--->{" + e + "}<---");
e.printStackTrace();
}
return lengthDomain;
}
/**
* 初始化所有的HashMap组合
*
* @param filePath
* @return
*/
public static HashMap<String, HashMap<String, String>> makeHashMap(String filePath) {
int maxLength = getMaxLength(filePath);
HashMap<String, HashMap<String, String>> maps = new HashMap<String, HashMap<String, String>>();
for (int i = 0; i < maxLength; i++) {
maps.put("map_id_" + i, new HashMap<String, String>());
}
maps.put("full", new HashMap<String, String>());
return maps;
}
// /**
// * 读取文件比对后写入另一个文件
// *
// */
// public static void main(String[] args) {
//
// try {
// /**
// * 读ysp2019.csv写入ysp2019-sec.csv
// */
// ArrayList<String> oriDomainList = readFile("D:/inputDomainFile/ysp2019.csv");
// writeFile("D:/outputSecDomainFile/ysp2019-sec.csv", oriDomainList);
//
// /**
// * 将文件内容去重
// */
// RemoveDuplicates("D:/outputSecDomainFile/repetitive/ysp2019-sec.csv",
// "D:/outputSecDomainFile/no-repetitive/ysp2019-sec.csv");
// } catch (Exception e) {
// e.printStackTrace();
// }
//
// }
//
//
// /**
// * 文件写入
// * @param outputFilePath
// * @param oriDomainList
// * @throws IOException
// */
// public static void writeFile(String outputFilePath, ArrayList<String> oriDomainList) throws IOException {
// ArrayList<String> secDomainList = new ArrayList<>();
// HashMap<String, HashMap<String, String>> maps = readTopDomainFile("D:/domainsuffix/public_suffix_list_only.dat");
//
//
// for (String oriDomain : oriDomainList) {
// String secDomain = getSecDomain(oriDomain, maps);
// if (StringUtils.isNotBlank(secDomain) && !("---no---return---".equals(secDomain))) {
// secDomainList.add(secDomain);
// }
// }
//
//
// FileOutputStream fos = new FileOutputStream(new File(outputFilePath));
// OutputStreamWriter osw = new OutputStreamWriter(fos, StandardCharsets.UTF_8);
// BufferedWriter bw = new BufferedWriter(osw);
// for (String secDomain : secDomainList) {
// bw.write(secDomain + "\t\n");
// }
//
// bw.close();
// osw.close();
// fos.close();
// }
//
// /**
// * 读文件将其放入ArrayList<String>
// *
// * @param inputFilePath
// * @throws IOException
// */
// public static ArrayList<String> readFile(String inputFilePath) throws IOException {
// ArrayList<String> oriDomainList = new ArrayList<>();
//
// FileInputStream fis = new FileInputStream(inputFilePath);
// InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
// BufferedReader br = new BufferedReader(isr);
// String line = "";
// while ((line = br.readLine()) != null) {
// if (StringUtils.isNotBlank(line)) {
// oriDomainList.add(line);
// }
//
// }
// br.close();
// isr.close();
// fis.close();
//
//
// return oriDomainList;
// }
//
// /**
// * 读取文件写入HashSet去重
// * @param inputFilePath
// * @param outputFilePath
// * @throws IOException
// */
// public static void RemoveDuplicates(String inputFilePath,String outputFilePath) throws IOException {
// HashSet<String> secDomainHashSet = new HashSet<>();
//
// FileInputStream fis = new FileInputStream(inputFilePath);
// InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
// BufferedReader br = new BufferedReader(isr);
// String line = "";
// while ((line = br.readLine()) != null) {
// if (StringUtils.isNotBlank(line)) {
// secDomainHashSet.add(line);
// }
//
// }
// br.close();
// isr.close();
// fis.close();
//
// FileOutputStream fos = new FileOutputStream(new File(outputFilePath));
// OutputStreamWriter osw = new OutputStreamWriter(fos, StandardCharsets.UTF_8);
// BufferedWriter bw = new BufferedWriter(osw);
//
// for (String secDomain : secDomainHashSet) {
// bw.write(secDomain + "\t\n");
// }
//
// bw.close();
// osw.close();
// fos.close();
//
// }
}

View File

@@ -25,7 +25,9 @@ update.schedule.day = 7
# mariadb
database = web_sketch
table = fqdn_profile_via_brightcloud
category_info_table = fqdn_category_via_brightcloud
reputation_info_table = fqdn_reputation_via_brightcloud
whois_info_table = fqdn_whois_via_brightcloud
# offline读取批处理量
offline.readin.batch = 1000
@@ -38,15 +40,20 @@ maximum.url.once.bc.query = 100
log.offline.number = 10000
log.bc.query.report.file = /home/WebSKT/Data/bright_cloud_query_count.csv
# resource
tld.filepath = /home/WebSKT/Data/public_suffix_list_only.dat
########################
### for test ###########
########################
## 离线导入指定目录
#offline.import.path = /Users/joy/work/iie/project/cyber_narrator/APP/WebSketch/QueryAgentV3/files/import_file
## 离线指定查询结果保存目录
#offline.output.path = /Users/joy/work/iie/project/cyber_narrator/APP/WebSketch/QueryAgentV3/files/output_file
## 服务调用统计结果
######################
# for test ###########
######################
# 离线导入指定目录
#offline.import.path = /Users/joy/work/iie/project/cyber_narrator/APP/WebSketch/QueryAgentV4/files/import_file
# 离线指定查询结果保存目录
#offline.output.path = /Users/joy/work/iie/project/cyber_narrator/APP/WebSketch/QueryAgentV4/files/output_file
# 服务调用统计结果
#log.bc.query.report.file = /Users/joy/work/iie/project/cyber_narrator/App/WebSketch/LOG_IMPORTANT/bright_cloud_query_count.csv
#tld.filepath = /Users/joy/work/iie/project/cyber_narrator/App/WebSketch/QueryAgentV4/src/main/resources/public_suffix_list_only.dat

View File

@@ -6,9 +6,9 @@ bc.api.url = https://api.bcti.brightcloud.com/1.0/url/getinfo
bc.api.method = POST
bc.api.type = url
bc.api.queries.urlcat = getinfo
bc.api.queries.urlrep = getrepinfo
bc.api.queries.urlwhois = getwhoisinfo
bc.api.name.category = getinfo
bc.api.name.reputation = getrepinfo
bc.api.name.whois = getwhoisinfo
bc.api.a1cat = 1
# 是否默认返回信誉评分
bc.api.reputation = 1
@@ -16,6 +16,8 @@ bc.api.reputation = 1
bc.api.xml = 0
bc.cateinfo.filepath = /home/WebSKT/Data/categoryinfo.json
# for test
#bc.cateinfo.filepath = /Users/joy/work/iie/project/cyber_narrator/App/WebSketch/QueryAgentV4/src/main/resources/categoryinfo.json

View File

@@ -20,4 +20,4 @@ log4j.appender.E.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ]
#########################
#### for test ###########
#########################
#log4j.appender.E.File =/Users/joy/work/iie/project/cyber_narrator/App/WebSketch/QueryAgentV3/logs/run.log
#log4j.appender.E.File =/Users/joy/work/iie/project/cyber_narrator/App/WebSketch/QueryAgentV4/logs/run.log

File diff suppressed because it is too large Load Diff