diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..ea6236c --- /dev/null +++ b/pom.xml @@ -0,0 +1,99 @@ + + + 4.0.0 + + iie.ac.cn + QueryAgent + 1.0-SNAPSHOT + + + + org.apache.maven.plugins + maven-shade-plugin + 3.0.0 + + + package + + shade + + + + + cn.ac.iie.MainScheduleTasks + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 7 + 7 + + + + + + + + + + log4j + log4j + 1.2.17 + + + org.mariadb.jdbc + mariadb-java-client + 2.7.1 + + + org.apache.httpcomponents + httpclient + 4.5.6 + + + org.springframework + spring-core + 2.5.6 + + + com.alibaba + fastjson + 1.2.31 + + + commons-io + commons-io + 2.4 + + + com.alibaba + druid + 1.0.9 + + + me.geso + jdbcutils + 1.0.0 + + + com.google.guava + guava + 21.0 + + + org.apache.commons + commons-lang3 + 3.4 + + + + + \ No newline at end of file diff --git a/src/main/java/cn/ac/iie/MainScheduleTasks.java b/src/main/java/cn/ac/iie/MainScheduleTasks.java new file mode 100644 index 0000000..5567530 --- /dev/null +++ b/src/main/java/cn/ac/iie/MainScheduleTasks.java @@ -0,0 +1,55 @@ +package cn.ac.iie; + +import cn.ac.iie.config.ApplicationConfig; +import cn.ac.iie.service.OfflineTask; +import cn.ac.iie.service.UpdateTask; +import cn.ac.iie.utils.TimeUtils; +import org.apache.log4j.Logger; + +import java.util.Calendar; +import java.util.Date; +import java.util.Timer; + +/** + * @author yjy + * @version 1.0 + * @date 2021/2/25 11:27 上午 + */ +public class MainScheduleTasks { + private static final Logger LOG = Logger.getLogger(MainScheduleTasks.class); + private Date offlineStartTime; + + /** + * offline 每小时扫描一次离线目录 + * update 每天00:00更新一次库表内容 + */ + private void Timers() { + + if (ApplicationConfig.OFFLINE_SWITCH){ + LOG.info("Start offline schedule task"); + Timer offlineTimer = new Timer(); + Date offlineStartTime = Calendar.getInstance().getTime(); + + offlineTimer.schedule(new OfflineTask(), offlineStartTime, + ApplicationConfig.OFFLINE_SCHEDULE_SECOND * TimeUtils.SECOND_TO_MILLSEDONDS); + } + + if (ApplicationConfig.UPDATE_SWITCH){ + LOG.info("Start update schedule task"); + Timer updateTimer = new Timer(); + Date updateStartTime = TimeUtils.getStartOfDay(1); + + updateTimer.schedule(new UpdateTask(), updateStartTime, + ApplicationConfig.UPDATE_SCHEDULE_DAY * TimeUtils.DAY_TO_MILLSEDONDS); + } + } + + public static void main(String[] args) { + MainScheduleTasks tasks = new MainScheduleTasks(); + tasks.Timers(); + } + + +} + + diff --git a/src/main/java/cn/ac/iie/config/ApplicationConfig.java b/src/main/java/cn/ac/iie/config/ApplicationConfig.java new file mode 100644 index 0000000..50fe885 --- /dev/null +++ b/src/main/java/cn/ac/iie/config/ApplicationConfig.java @@ -0,0 +1,33 @@ +package cn.ac.iie.config; + + +import cn.ac.iie.utils.ConfigUtils; + +public class ApplicationConfig { + + public static final Boolean OFFLINE_SWITCH = ConfigUtils.getBooleanProperty("offline.switch"); + public static final String OFFLINE_IMPORT_FILENAME_SUFFIX = ConfigUtils.getStringProperty("offline.import.filename.suffix"); + public static final String OFFLINE_IMPORT_FILEDONE_SUFFIX = ConfigUtils.getStringProperty("offline.import.filedone.suffix"); + public static final String OFFLINE_OUTPUT_FILENAME_SUFFIX = ConfigUtils.getStringProperty("offline.output.filename.suffix"); + + public static final Integer OFFLINE_SCHEDULE_SECOND = ConfigUtils.getIntProperty("offline.schedule.second"); + public static final String OFFLINE_IMPORT_PATH = ConfigUtils.getStringProperty("offline.import.path"); + public static final String OFFLINE_OUTPUT_PATH = ConfigUtils.getStringProperty("offline.output.path"); + + + public static final Boolean UPDATE_SWITCH = ConfigUtils.getBooleanProperty("update.switch"); + public static final Integer UPDATE_SCHEDULE_DAY = ConfigUtils.getIntProperty("update.schedule.day"); + + public static final Integer QUERY_STANDARD_FQDN_LEVEL = ConfigUtils.getIntProperty("query.standard.fqdn.level"); + public static final Boolean QUERY_URL_INFO_SWITCH = ConfigUtils.getBooleanProperty("query.url_info.switch"); + public static final Boolean QUERY_URL_REP_SWITCH = ConfigUtils.getBooleanProperty("query.url_rep.switch"); + public static final Boolean QUERY_URL_WHOIS_SWITCH = ConfigUtils.getBooleanProperty("query.url_whois.switch"); + + + public static final Integer LOG_OFFLINE_NUMBER = ConfigUtils.getIntProperty("log.offline.number"); + public static final String LOG_BC_QUERY_REPORT_FILE = ConfigUtils.getStringProperty("log.bc.query.report.file"); + + public static final Integer MAXIMUM_URL_ONCE = ConfigUtils.getIntProperty("maximum.url.once"); + + +} diff --git a/src/main/java/cn/ac/iie/config/MariaDBConfig.java b/src/main/java/cn/ac/iie/config/MariaDBConfig.java new file mode 100644 index 0000000..de51cd9 --- /dev/null +++ b/src/main/java/cn/ac/iie/config/MariaDBConfig.java @@ -0,0 +1,16 @@ +package cn.ac.iie.config; + +import cn.ac.iie.utils.ConfigUtils; + +/** + * @author yjy + * @version 1.0 + * @date 2021/2/25 11:09 上午 + */ + +public class MariaDBConfig { + + public static final String MARIADB_DATABASE = ConfigUtils.getStringProperty("database"); + public static final String MARIADB_TABLE = ConfigUtils.getStringProperty("table"); + +} diff --git a/src/main/java/cn/ac/iie/dao/BaseMariaDB.java b/src/main/java/cn/ac/iie/dao/BaseMariaDB.java new file mode 100644 index 0000000..ebf4677 --- /dev/null +++ b/src/main/java/cn/ac/iie/dao/BaseMariaDB.java @@ -0,0 +1,177 @@ +package cn.ac.iie.dao; + + +import cn.ac.iie.config.MariaDBConfig; +import cn.ac.iie.utils.TimeUtils; +import org.apache.log4j.Logger; + +import java.sql.*; +import java.util.Arrays; +import java.util.Date; +import java.util.List; +import java.util.Properties; + +/** + * @author yjy + * @version 1.0 + * @date 2021/2/19 6:25 下午 + * //TODO 非持久连接,一次实例化满足一次数据库操作 + */ +public class BaseMariaDB { + private static final Logger LOG = Logger.getLogger(BaseMariaDB.class); + private static final Properties props = new Properties(); + + private Statement statement; + + public BaseMariaDB(Connection conn, Statement stat) { + statement = stat; + } + + public void insertRecords(List fqdnFiles){ + for (FqdnFile fqdnFile : fqdnFiles) { + fqdnFileWrite(fqdnFile, "insert"); + } + } + + public void updateRecords(List fqdnFiles){ + for (FqdnFile fqdnFile : fqdnFiles) { + fqdnFileWrite(fqdnFile, "update"); + } + long last = System.currentTimeMillis(); + } + + /** + * 单条FqdnFile写入操作:更新、插入 + */ + public void fqdnFileWrite(FqdnFile fqdnFile, String method){ + String sql = null; + List methods = Arrays.asList("insert", "update"); + assert methods.contains(method); + if (method.equals("insert")){ + sql = getInsertSql(fqdnFile); + } else { + sql = getUpdateSql(fqdnFile); + } + writeSqlExecute(sql); + } + + public ResultSet getExpiredRecord(){ + return querySqlExecute(getExpiredRecordSql()); + } + + public ResultSet getUnlabelRecord(){ + return querySqlExecute(getUnlabelRecordSql()); + } + + public ResultSet getDatabaseRecord(String fqdn){ + return querySqlExecute(getQueryRecordSql(fqdn)); + } + + public void writeSqlExecute(String sql){ + try { + statement.executeUpdate(sql); + } catch (SQLException exception) { + LOG.debug("Sql : " + sql); + exception.printStackTrace(); + } + } + + public ResultSet querySqlExecute(String sql){ + ResultSet set = null; + try { + set = statement.executeQuery(sql); + } catch (SQLException exception) { + exception.printStackTrace(); + } + return set; + } + + public Boolean isInDatabase(String standardFqdn){ + boolean isInDb = false; + try { + String querySql = getQueryExistSql(standardFqdn); + ResultSet set = statement.executeQuery(querySql); + set.next(); + isInDb = set.getInt("COUNT(*)") > 0; + + } catch (SQLException e) { + e.printStackTrace(); + } + return isInDb; + } + + + public String getInsertSql(FqdnFile fqdnFile){ + long start = System.currentTimeMillis(); + StringBuilder sql = new StringBuilder("INSERT INTO "); + sql.append(MariaDBConfig.MARIADB_DATABASE).append(".") + .append(MariaDBConfig.MARIADB_TABLE).append(' '); + + sql.append(" (fqdn,query_success,reputation_score,reputation_level,") + .append("category_id,category_name,category_group,category_conf,is_a1_cat,popularity,observ_age,country,threat_history,") + .append("whois_domain,whois_update_date,whois_create_date,whois_expire_date,whois_email,whois_ns,") + .append("whois_registrar_name,whois_registrant_org,whois_regisrante_name,whois_regisrante_street,") + .append("whois_regisrante_city,whois_regisrante_state,whois_regisrante_postcode,whois_regisrante_country,") + .append("whois_regisrante_phone) values"); + + sql.append('(').append(fqdnFile.getValues()).append(')'); + String resSql = sql.toString(); + resSql = resSql.replace("'null'", "null"); + + return resSql; + } + + public String getUpdateSql(FqdnFile fqdnFile){ + StringBuilder sql = new StringBuilder("UPDATE "); + sql.append(MariaDBConfig.MARIADB_DATABASE).append(".") + .append(MariaDBConfig.MARIADB_TABLE).append(' ') + .append("SET ").append(fqdnFile.getKeyValues()) + .append(", update_time = current_time() ") + .append(" WHERE fqdn = '").append(fqdnFile.getFqdn()).append('\''); + + String resSql = sql.toString(); + resSql = resSql.replace("'null'", "null"); + + return resSql; + } + + public String getQueryExistSql(String fqdn){ + StringBuilder sql = new StringBuilder("SELECT COUNT(*) FROM "); + sql.append(MariaDBConfig.MARIADB_DATABASE).append(".") + .append(MariaDBConfig.MARIADB_TABLE).append(' ') + .append(" WHERE fqdn = '").append(fqdn).append('\''); + + return sql.toString(); + } + + public String getQueryRecordSql(String fqdn){ + StringBuilder sql = new StringBuilder("SELECT * FROM "); + sql.append(MariaDBConfig.MARIADB_DATABASE).append(".") + .append(MariaDBConfig.MARIADB_TABLE).append(' ') + .append(" WHERE fqdn = '").append(fqdn).append('\''); + + String resSql = sql.toString(); + resSql = resSql.replace("'null'", "null"); + + return resSql; + } + + public static String getExpiredRecordSql(){ + Date lastUpdateTime = new Timestamp(TimeUtils.getExpiredTime().getTime()); + + String resSql = "SELECT x.fqdn FROM " + MariaDBConfig.MARIADB_DATABASE + "." + + MariaDBConfig.MARIADB_TABLE + + " x WHERE x.update_time < '" + lastUpdateTime + '\''; + + LOG.debug("Update task: expired query sql" + resSql); + + return resSql; + } + + public static String getUnlabelRecordSql(){ + + return "SELECT x.fqdn FROM " + MariaDBConfig.MARIADB_DATABASE + "." + + MariaDBConfig.MARIADB_TABLE + + " x WHERE x.category_id = 0 or x.query_success != 1"; + } +} diff --git a/src/main/java/cn/ac/iie/dao/FqdnFile.java b/src/main/java/cn/ac/iie/dao/FqdnFile.java new file mode 100644 index 0000000..48a5791 --- /dev/null +++ b/src/main/java/cn/ac/iie/dao/FqdnFile.java @@ -0,0 +1,449 @@ +package cn.ac.iie.dao; + +import java.util.Date; + +/** + * @author yjy + * @version 1.0 + * @date 2021/2/22 7:40 下午 + * field name 和 mariadb 一致 + * + */ + +public class FqdnFile +{ + private String fqdn; + private Boolean query_success; + private Integer reputation_score; + private String reputation_level; + private Integer category_id; + private String category_name; + private String category_group; + private Integer category_conf; + private Boolean is_a1_cat; + private Integer popularity; + private Integer observ_age; + private String country; + private Integer threat_history; + private String whois_domain; + private java.sql.Timestamp whois_update_date; + private java.sql.Timestamp whois_create_date; + private java.sql.Timestamp whois_expire_date; + private String whois_email; + private String whois_ns; + private String whois_registrar_name; + private String whois_registrant_org; + private String whois_regisrante_name; + private String whois_regisrante_street; + private String whois_regisrante_city; + private String whois_regisrante_state; + private String whois_regisrante_postcode; + private String whois_regisrante_country; + private String whois_regisrante_phone; + + // update + public FqdnFile(String fqdn, Boolean query_success, + Integer reputation_score, String reputationLevel, + Integer categoryId, String categoryName, String categoryGroup, Integer categoryConf, Boolean isA1Cat, + Integer popularity, Integer observAge, String country, Integer threatHistory, + String whoisDomain, Date whoisUpdateDate, Date whoisCreateDate, Date whoisExpireDate, + String whoisEmail, String whoisNs, String whoisRegistrarName, String whoisRegistrantOrg, + String whoisRegisranteName, String whoisRegisranteStreet, String whoisRegisranteCity, + String whoisRegisranteState, String whoisRegisrantePostcode, String whoisRegisranteCountry, + String whoisRegisrantePhone) { + + this.fqdn = fqdn; + this.query_success = query_success; + + this.reputation_score = reputation_score; + this.reputation_level = getEffectiveString(reputationLevel); + this.category_id = categoryId; + this.category_name = getEffectiveString(categoryName); + this.category_group = getEffectiveString(categoryGroup); + this.category_conf = categoryConf; + this.is_a1_cat = isA1Cat; + this.popularity = popularity; + this.observ_age = observAge; + this.country = getEffectiveString(country); + this.threat_history = threatHistory; + + this.whois_domain = getEffectiveString(whoisDomain); + this.whois_update_date = whoisUpdateDate == null? null : new java.sql.Timestamp(whoisUpdateDate.getTime()); + this.whois_create_date = whoisCreateDate == null? null : new java.sql.Timestamp(whoisCreateDate.getTime()); + this.whois_expire_date = whoisExpireDate == null? null : new java.sql.Timestamp(whoisExpireDate.getTime()); + this.whois_email = getEffectiveString(whoisEmail); + this.whois_ns = getEffectiveString(whoisNs); + this.whois_registrar_name = getEffectiveString(whoisRegistrarName); + this.whois_registrant_org = getEffectiveString(whoisRegistrantOrg); + this.whois_regisrante_name = getEffectiveString(whoisRegisranteName); + this.whois_regisrante_street = getEffectiveString(whoisRegisranteStreet); + this.whois_regisrante_city = getEffectiveString(whoisRegisranteCity); + this.whois_regisrante_state = getEffectiveString(whoisRegisranteState); + this.whois_regisrante_postcode = getEffectiveString(whoisRegisrantePostcode); + this.whois_regisrante_country = getEffectiveString(whoisRegisranteCountry); + this.whois_regisrante_phone = getEffectiveString(whoisRegisrantePhone); + } + + private String getEffectiveString(String s){ + if (!(s ==null)){ + return s.length() == 0 ? null : s; + } else{ + return null; + } + } + + + public String getFqdn() { + return fqdn; + } + + public void setFqdn(String fqdn) { + this.fqdn = fqdn; + } + + public Boolean getQuery_success() { + return query_success; + } + + public void setQuery_success(Boolean query_success) { + this.query_success = query_success; + } + + public Integer getReputation_score() { + return reputation_score; + } + + public void setReputation_score(Integer reputation_score) { + this.reputation_score = reputation_score; + } + + public String getReputation_level() { + return reputation_level; + } + + public void setReputation_level(String reputation_level) { + this.reputation_level = reputation_level; + } + + public Integer getCategory_id() { + return category_id; + } + + public void setCategory_id(Integer category_id) { + this.category_id = category_id; + } + + public String getCategory_name() { + return category_name; + } + + public void setCategory_name(String category_name) { + this.category_name = category_name; + } + + public String getCategory_group() { + return category_group; + } + + public void setCategory_group(String category_group) { + this.category_group = category_group; + } + + public Integer getCategory_conf() { + return category_conf; + } + + public void setCategory_conf(Integer category_conf) { + this.category_conf = category_conf; + } + + public Boolean getA1Cat() { + return is_a1_cat; + } + + public void setA1Cat(Boolean a1Cat) { + is_a1_cat = a1Cat; + } + + public Integer getPopularity() { + return popularity; + } + + public void setPopularity(Integer popularity) { + this.popularity = popularity; + } + + public String getCountry() { + return country; + } + + public void setCountry(String country) { + this.country = country; + } + + public Integer getObserv_age() { + return observ_age; + } + + public void setObserv_age(Integer observ_age) { + this.observ_age = observ_age; + } + + public Integer getThreat_history() { + return threat_history; + } + + public void setThreat_history(Integer threat_history) { + this.threat_history = threat_history; + } + + public String getWhois_domain() { + return whois_domain; + } + + public void setWhois_domain(String whois_domain) { + this.whois_domain = whois_domain; + } + + public Date getWhois_update_date() { + return whois_update_date; + } + + public void setWhois_update_date(java.sql.Timestamp whois_update_date) { + this.whois_update_date = whois_update_date; + } + + public Date getWhoisCreateDate() { + return whois_create_date; + } + + public void setWhoisCreateDate(java.sql.Timestamp whoisCreateDate) { + this.whois_create_date = whoisCreateDate; + } + + public Date getWhois_expire_date() { + return whois_expire_date; + } + + public void setWhois_expire_date(java.sql.Timestamp whois_expire_date) { + this.whois_expire_date = whois_expire_date; + } + + public String getWhois_email() { + return whois_email; + } + + public void setWhois_email(String whois_email) { + this.whois_email = whois_email; + } + + public String getWhois_ns() { + return whois_ns; + } + + public void setWhois_ns(String whois_ns) { + this.whois_ns = whois_ns; + } + + public String getWhois_registrar_name() { + return whois_registrar_name; + } + + public void setWhois_registrar_name(String whois_registrar_name) { + this.whois_registrar_name = whois_registrar_name; + } + + public String getWhois_registrant_org() { + return whois_registrant_org; + } + + public void setWhois_registrant_org(String whois_registrant_org) { + this.whois_registrant_org = whois_registrant_org; + } + + public String getWhois_regisrante_name() { + return whois_regisrante_name; + } + + public void setWhois_regisrante_name(String whois_regisrante_name) { + this.whois_regisrante_name = whois_regisrante_name; + } + + public String getWhois_regisrante_street() { + return whois_regisrante_street; + } + + public void setWhois_regisrante_street(String whois_regisrante_street) { + this.whois_regisrante_street = whois_regisrante_street; + } + + public String getWhois_regisrante_city() { + return whois_regisrante_city; + } + + public void setWhois_regisrante_city(String whois_regisrante_city) { + this.whois_regisrante_city = whois_regisrante_city; + } + + public String getWhois_regisrante_state() { + return whois_regisrante_state; + } + + public void setWhois_regisrante_state(String whois_regisrante_state) { + this.whois_regisrante_state = whois_regisrante_state; + } + + public String getWhois_regisrante_postcode() { + return whois_regisrante_postcode; + } + + public void setWhois_regisrante_postcode(String whois_regisrante_postcode) { + this.whois_regisrante_postcode = whois_regisrante_postcode; + } + + public String getWhois_regisrante_country() { + return whois_regisrante_country; + } + + public void setWhois_regisrante_country(String whois_regisrante_country) { + this.whois_regisrante_country = whois_regisrante_country; + } + + public String getWhois_regisrante_phone() { + return whois_regisrante_phone; + } + + public void setWhois_regisrante_phone(String whois_regisrante_phone) { + this.whois_regisrante_phone = whois_regisrante_phone; + } + + @Override + public String toString() { + return "FqdnFile{" + + "fqdn='" + fqdn + '\'' + + ", query_success=" + query_success + + ", reputation_score=" + reputation_score + + ", reputation_level='" + reputation_level + '\'' + + ", category_id=" + category_id + + ", category_name='" + category_name + '\'' + + ", category_group='" + category_group + '\'' + + ", category_conf=" + category_conf + + ", is_a1_cat=" + is_a1_cat + + ", popularity=" + popularity + + ", observ_age=" + observ_age + + ", country='" + country + '\'' + + ", threat_history=" + threat_history + + ", whois_domain='" + whois_domain + '\'' + + ", whois_update_date=" + whois_update_date + + ", whois_create_date=" + whois_create_date + + ", whois_expire_date=" + whois_expire_date + + ", whois_email='" + whois_email + '\'' + + ", whois_ns='" + whois_ns + '\'' + + ", whois_registrar_name='" + whois_registrar_name + '\'' + + ", whois_registrant_org='" + whois_registrant_org + '\'' + + ", whois_regisrante_name='" + whois_regisrante_name + '\'' + + ", whois_regisrante_street='" + whois_regisrante_street + '\'' + + ", whois_regisrante_city='" + whois_regisrante_city + '\'' + + ", whois_regisrante_state='" + whois_regisrante_state + '\'' + + ", whois_regisrante_postcode='" + whois_regisrante_postcode + '\'' + + ", whois_regisrante_country='" + whois_regisrante_country + '\'' + + ", whois_regisrante_phone='" + whois_regisrante_phone + '\'' + + '}'; + } + + public String getValues(){ + String resString = + "'" + fqdn + '\'' + + "," + query_success + + ", " + reputation_score + + ", '" + reputation_level + '\'' + + ", " + category_id + + ", '" + category_name + '\'' + + ", '" + category_group + '\'' + + ", " + category_conf + + ", " + is_a1_cat + + ", " + popularity + + ", " + observ_age + + ", '" + country + '\'' + + ", " + threat_history + + ", '" + whois_domain + '\'' + + ", '" + whois_update_date + '\'' + + ", '" + whois_create_date + '\'' + + ", '" + whois_expire_date + '\'' + + ", '" + whois_email + '\'' + + ", '" + whois_ns + '\'' + + ", '" + whois_registrar_name + '\'' + + ", '" + whois_registrant_org + '\'' + + ", '" + whois_regisrante_name + '\'' + + ", '" + whois_regisrante_street + '\'' + + ", '" + whois_regisrante_city + '\'' + + ", '" + whois_regisrante_state + '\'' + + ", '" + whois_regisrante_postcode + '\'' + + ", '" + whois_regisrante_country + '\'' + + ", '" + whois_regisrante_phone + '\'' ; + return resString.replace("'null'", "null"); + } + + public static String getKeys() { + return "fqdn" + + ", query_success" + + ", reputation_score" + + ", reputation_level" + + ", category_id" + + ", category_name" + + ", category_group" + + ", category_conf" + + ", is_a1_cat" + + ", popularity" + + ", observ_age" + + ", country" + + ", threat_history" + + ", whois_domain" + + ", whois_update_date" + + ", whois_create_date" + + ", whois_expire_date" + + ", whois_email" + + ", whois_ns" + + ", whois_registrar_name" + + ", whois_registrant_org" + + ", whois_regisrante_name" + + ", whois_regisrante_street" + + ", whois_regisrante_city" + + ", whois_regisrante_state" + + ", whois_regisrante_postcode" + + ", whois_regisrante_country" + + ", whois_regisrante_phone"; + } + + public String getKeyValues(){ + String resString = + "query_success=" + query_success + + ", reputation_score=" + reputation_score + + ", reputation_level='" + reputation_level + '\'' + + ", category_id=" + category_id + + ", category_name='" + category_name + '\'' + + ", category_group='" + category_group + '\'' + + ", category_conf=" + category_conf + + ", is_a1_cat=" + is_a1_cat + + ", popularity=" + popularity + + ", observ_age=" + observ_age + + ", country='" + country + '\'' + + ", threat_history=" + threat_history + + ", whois_domain='" + whois_domain + '\'' + + ", whois_update_date='" + whois_update_date + '\'' + + ", whois_create_date='" + whois_create_date + '\'' + + ", whois_expire_date='" + whois_expire_date + '\'' + + ", whois_email='" + whois_email + '\'' + + ", whois_ns='" + whois_ns + '\'' + + ", whois_registrar_name='" + whois_registrar_name + '\'' + + ", whois_registrant_org='" + whois_registrant_org + '\'' + + ", whois_regisrante_name='" + whois_regisrante_name + '\'' + + ", whois_regisrante_street='" + whois_regisrante_street + '\'' + + ", whois_regisrante_city='" + whois_regisrante_city + '\'' + + ", whois_regisrante_state='" + whois_regisrante_state + '\'' + + ", whois_regisrante_postcode='" + whois_regisrante_postcode + '\'' + + ", whois_regisrante_country='" + whois_regisrante_country + '\'' + + ", whois_regisrante_phone='" + whois_regisrante_phone + '\''; + return resString.replace("'null'", "null"); + } +} \ No newline at end of file diff --git a/src/main/java/cn/ac/iie/service/OfflineTask.java b/src/main/java/cn/ac/iie/service/OfflineTask.java new file mode 100644 index 0000000..0a495a8 --- /dev/null +++ b/src/main/java/cn/ac/iie/service/OfflineTask.java @@ -0,0 +1,237 @@ +package cn.ac.iie.service; + +import cn.ac.iie.config.ApplicationConfig; +import cn.ac.iie.dao.BaseMariaDB; +import cn.ac.iie.dao.FqdnFile; +import cn.ac.iie.utils.BrightCloudUtils; +import cn.ac.iie.utils.FileUtils; +import cn.ac.iie.utils.MariaDBConnect; +import cn.ac.iie.utils.LogUtils; +import com.alibaba.fastjson.JSONObject; +import org.apache.log4j.Logger; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.text.DecimalFormat; +import java.util.*; + +/** + * @author yjy + * @version 1.0 + * @date 2021/2/25 11:29 上午 + */ +public class OfflineTask extends TimerTask { + private static final Logger LOG = Logger.getLogger(OfflineTask.class); + private static final LogUtils logutils = new LogUtils(); + + @Override + public void run() { + try { + runTask(); + } catch (SQLException | IOException exception) { + exception.printStackTrace(); + } + } + + private void runTask() throws SQLException, IOException { + + List fileNames = catpureNewFiles(); + List fqdns; + + Connection mariaConn = MariaDBConnect.getConnection(); + Statement mariaStat = mariaConn.createStatement(); + BaseMariaDB mariaDB = new BaseMariaDB(mariaConn, mariaStat); + BrightCloudUtils brightCloudUtils = new BrightCloudUtils(); + List queryTypes = BrightCloudUtils.getQueryTypes(); + + for (String fileName: fileNames){ + File importFile = new File(fileName); + String importFileName = FileUtils.getFileName(importFile); + LOG.info("[Offline import file query]-" + importFileName + ": File Found."); + String outputFileName = + importFileName.substring(0, importFileName.length()-ApplicationConfig.OFFLINE_IMPORT_FILENAME_SUFFIX.length()) + + ApplicationConfig.OFFLINE_OUTPUT_FILENAME_SUFFIX; + // TODO 遍历处理消耗太大 + fqdns = FileUtils.readTxtFileIntoStringArrList(importFile.toString()); + fqdns = SingleTermTask.getCheckedFqdn(fqdns); + + long standardFqdnNum = fqdns.size(); + long dbQueryNum = 0; + long bcQueryNum = 0; + long failQueryNum = 0; + long effecResNum = 0; + long noLabelNum = 0; + if (standardFqdnNum>0){ + + // 创建结果保存文件 + File outputFile = new File(ApplicationConfig.OFFLINE_OUTPUT_PATH + "/" + outputFileName); + if (!outputFile.exists()){ + FileUtils.createFile(new File(ApplicationConfig.OFFLINE_OUTPUT_PATH), outputFileName); + } + + OutputStream outStream = new FileOutputStream(outputFile); + OutputStreamWriter outWriter = new OutputStreamWriter(outStream, StandardCharsets.UTF_8); + outWriter.write(FqdnFile.getKeys() + "\n"); + + List fqdnToQuery = new ArrayList<>(); + // 遍历列表域名 + LOG.info("[Offline import file query]-" + importFileName + " Progress: 0%"); + for (int index=0; index fqdnFiles = brightCloudUtils.responseSparse(resObj); + assert fqdnFiles.size()>0; + + // 存数据库 + mariaDB.insertRecords(fqdnFiles); + + // 写入output + for (FqdnFile tmpFile:fqdnFiles){ + outWriter.write(tmpFile.getValues() + "\n"); + if (tmpFile.getQuery_success().equals(false)){ + failQueryNum += 1; + } else if (tmpFile.getCategory_id().equals(0)){ + noLabelNum += 1; + } else { + effecResNum += 1; + } + } + +// // 打印处理进度日志 +// String percent = new DecimalFormat("##.0%").format((float)index/standardFqdnNum); +// LOG.info("[Offline import file query]-" + importFileName + " Progress:" + percent); + + // 缓存复位 + fqdnToQuery = new ArrayList<>(); + } + } + + // 打印处理进度至日志 + if (index>0 && index % ApplicationConfig.LOG_OFFLINE_NUMBER==0){ + String percent = new DecimalFormat("##.0%").format((float)index/standardFqdnNum); + LOG.info("[Offline import file query]-" + importFileName + " Progress:" + percent); + } + } + + // 剩余待查bc + if (fqdnToQuery.size()>0){ + JSONObject resObj = brightCloudUtils.getQueryResults(fqdnToQuery); + List fqdnFiles = brightCloudUtils.responseSparse(resObj); + assert fqdnFiles.size()>0; + // 存数据库 + mariaDB.insertRecords(fqdnFiles); + // 写入output + for (FqdnFile tmpFile:fqdnFiles){ + outWriter.write(tmpFile.getValues() + "\n"); + if (tmpFile.getQuery_success().equals(false)){ + failQueryNum += 1; + } else if (tmpFile.getCategory_id().equals(0)){ + noLabelNum += 1; + } else { + effecResNum += 1; + } + } + } + + LOG.info("[Offline import file query]-" + importFileName + " Progress: 100%"); + // 打印处理结果至日志 + LOG.info("[Offline import file query]-" + importFileName + " " + + "Query result: submit " + standardFqdnNum+" valid fqdns, " + + dbQueryNum + " (" + new DecimalFormat("##.0%").format((float)dbQueryNum/standardFqdnNum) + ")" + " results from database," + + bcQueryNum + " (" + new DecimalFormat("##.0%").format((float)bcQueryNum/standardFqdnNum) + ")" + " results from bright cloud. " + + effecResNum + " (" + new DecimalFormat("##.0%").format((float)effecResNum/standardFqdnNum) + ")" + " effective results," + + failQueryNum + " (" + new DecimalFormat("##.0%").format((float)failQueryNum/standardFqdnNum) + ")" + " failed queries," + + noLabelNum + " (" + new DecimalFormat("##.0%").format((float)noLabelNum/standardFqdnNum) + ")" + " unlabeled results"); + LOG.info("[Offline import file query]-" + importFileName + " Results saved in " + outputFile.toString()); + // 打印查询服务调用记录日志 + if (bcQueryNum > 0){ + OutputStream bcQueryLogStream = new FileOutputStream(ApplicationConfig.LOG_BC_QUERY_REPORT_FILE, true); + OutputStreamWriter bcQueryLogWriter = new OutputStreamWriter(bcQueryLogStream, StandardCharsets.UTF_8); + for (String type : queryTypes) { + java.sql.Date d = new java.sql.Date(System.currentTimeMillis()); + bcQueryLogWriter.write(d + "," + "OfflineTask," + importFileName + "," + type + "," + bcQueryNum + "\n"); + } + + FileUtils.writerClose(bcQueryLogWriter, bcQueryLogStream); + } + + FileUtils.writerClose(outWriter, outStream); + } else { + continue; + } + + // 查询结束修改后缀 + importFile.renameTo(new File( + importFile.toString().substring(0, importFile.toString().length()-ApplicationConfig.OFFLINE_IMPORT_FILENAME_SUFFIX.length()) + + ApplicationConfig.OFFLINE_IMPORT_FILEDONE_SUFFIX)); + + } + + MariaDBConnect.close(mariaStat, mariaConn); + } + + private List catpureNewFiles() { + List newFiles = new ArrayList<>(); + + // Get all files + File dir = new File(ApplicationConfig.OFFLINE_IMPORT_PATH); + if (!dir.exists()) { + boolean isCreateDir = dir.mkdir(); + if (isCreateDir) { + LOG.info("Create new dictionary: " + ApplicationConfig.OFFLINE_IMPORT_PATH); + } else { + LOG.error("Failed to create import dictionary: " + ApplicationConfig.OFFLINE_IMPORT_PATH); + } + } + File[] allFiles = dir.listFiles(); + + if (allFiles == null) { + LOG.info("Dir is empty: " + ApplicationConfig.OFFLINE_IMPORT_PATH + System.currentTimeMillis()); + } else { + for (File file : allFiles) { + if (file.isFile() && file.getName().endsWith(ApplicationConfig.OFFLINE_IMPORT_FILENAME_SUFFIX)) { + newFiles.add(file.toString()); + } + } + } + return newFiles; + } + + private Boolean checkFileName(String fileName) { + boolean isStandard = true; + if (!fileName.endsWith(ApplicationConfig.OFFLINE_IMPORT_FILENAME_SUFFIX)) { + isStandard = false; + } + return isStandard; + } +} diff --git a/src/main/java/cn/ac/iie/service/SingleTermTask.java b/src/main/java/cn/ac/iie/service/SingleTermTask.java new file mode 100644 index 0000000..5d938d1 --- /dev/null +++ b/src/main/java/cn/ac/iie/service/SingleTermTask.java @@ -0,0 +1,119 @@ +package cn.ac.iie.service; + +import cn.ac.iie.config.ApplicationConfig; +import cn.ac.iie.dao.FqdnFile; +import org.apache.commons.lang3.StringUtils; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * @author yjy + * @version 1.0 + * @date 2021/2/25 2:40 下午 + */ +public class SingleTermTask { + + /** + * 提取三级域名 + * @return 返回域名或空字符"" + */ + public static String getStandardFqdn(String url){ + if(url==null|| "".equals(url.trim())){ + return ""; + } + if (url.contains("://")){ + url = url.split("://")[1]; + } + String host = ""; + Pattern p = Pattern.compile("^((http://)|(https://)|())?([a-zA-Z0-9]([a-zA-Z0-9\\-]{0,61})?\\.)+[a-zA-Z]{2,6}"); + Matcher matcher = p.matcher(url); + if(matcher.find()){ + String tmp = matcher.group(); + String regex=".*[a-zA-Z]+.*"; + Matcher m = Pattern.compile(regex).matcher(tmp); + if (m.matches()){ + host = tmp; + } + } + + if (host.split("\\.").length > ApplicationConfig.QUERY_STANDARD_FQDN_LEVEL){ + List domains = Arrays.asList(host.split("\\.")); + host = StringUtils.join(domains.subList(domains.size()-ApplicationConfig.QUERY_STANDARD_FQDN_LEVEL, + domains.size()), "."); + } + return host; + } + + private static boolean isValidDomain(String str) + { + String regex = "^((?!-)[A-Za-z0-9-]" + + "{1,63}(? getCheckedFqdn(List fqdns){ + List res = new ArrayList<>(); + for (String fqdn:fqdns){ + // 去重 & 校验 + if (isValidDomain(fqdn) && !res.contains(fqdn)){ + res.add(fqdn.toLowerCase()); + } + } + return res; + } + + public static FqdnFile ResSet2FqdnFile(ResultSet resultSet){ + FqdnFile fqdnFile = null; + + try { + fqdnFile = new FqdnFile( + resultSet.getString("fqdn"), + resultSet.getBoolean("query_success"), + resultSet.getInt("reputation_score"), + resultSet.getString("reputation_level"), + resultSet.getInt("category_id"), + resultSet.getString("category_Name"), + resultSet.getString("category_group"), + resultSet.getInt("category_conf"), + resultSet.getBoolean("is_a1_cat"), + resultSet.getInt("popularity"), + resultSet.getInt("observ_age"), + resultSet.getString("country"), + resultSet.getInt("threat_history"), + resultSet.getString("whois_domain"), + resultSet.getDate("whois_update_date"), + resultSet.getDate("whois_create_date"), + resultSet.getDate("whois_expire_date"), + resultSet.getString("whois_email"), + resultSet.getString("whois_ns"), + resultSet.getString("whois_registrar_name"), + resultSet.getString("whois_registrant_org"), + resultSet.getString("whois_regisrante_name"), + resultSet.getString("whois_regisrante_street"), + resultSet.getString("whois_regisrante_city"), + resultSet.getString("whois_regisrante_state"), + resultSet.getString("whois_regisrante_postcode"), + resultSet.getString("whois_regisrante_country"), + resultSet.getString("whois_regisrante_phone") + ); + } catch (SQLException e) { + e.printStackTrace(); + } + return fqdnFile; + } + +} diff --git a/src/main/java/cn/ac/iie/service/UpdateTask.java b/src/main/java/cn/ac/iie/service/UpdateTask.java new file mode 100644 index 0000000..a6bba9e --- /dev/null +++ b/src/main/java/cn/ac/iie/service/UpdateTask.java @@ -0,0 +1,91 @@ +package cn.ac.iie.service; + +import cn.ac.iie.config.ApplicationConfig; +import cn.ac.iie.dao.BaseMariaDB; +import cn.ac.iie.dao.FqdnFile; +import cn.ac.iie.utils.BrightCloudUtils; +import cn.ac.iie.utils.FileUtils; +import cn.ac.iie.utils.LogUtils; +import cn.ac.iie.utils.MariaDBConnect; +import com.alibaba.fastjson.JSONObject; +import org.apache.log4j.Logger; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.List; +import java.util.TimerTask; + +/** + * @author yjy + * @version 1.0 + * @date 2021/2/25 11:29 上午 + */ +public class UpdateTask extends TimerTask { + private static final Logger LOG = Logger.getLogger(UpdateTask.class); + private static final LogUtils logutils = new LogUtils(); + + @Override + public void run() { + try { + runTask(); + } catch (SQLException | IOException exception) { + exception.printStackTrace(); + } + } + + private void runTask() throws SQLException, IOException { + LOG.info("Start update task ..."); + List updateFqdns = new ArrayList<>(); + + Connection mariaConn = MariaDBConnect.getConnection(); + Statement mariaStat = mariaConn.createStatement(); + BaseMariaDB mariaDB = new BaseMariaDB(mariaConn, mariaStat); + BrightCloudUtils brightCloudUtils = new BrightCloudUtils(); + List queryTypes = BrightCloudUtils.getQueryTypes(); + + // expired records + ResultSet expiredSet = mariaDB.getExpiredRecord(); + while (expiredSet.next()) { + updateFqdns.add(expiredSet.getString("fqdn")); + } + long expiredNum = updateFqdns.size(); + + // unlabelled records + ResultSet unlabeledSet = mariaDB.getUnlabelRecord(); + while (unlabeledSet.next()) { + updateFqdns.add(unlabeledSet.getString("fqdn")); + } + long unlabeledNum = updateFqdns.size() - expiredNum; + + //TODO 分批查询 + if (updateFqdns.size()>0){ + JSONObject jsonObj = brightCloudUtils.getQueryResults(updateFqdns); + List updateFiles = brightCloudUtils.responseSparse(jsonObj); + mariaDB.updateRecords(updateFiles); + MariaDBConnect.close(mariaStat, mariaConn); + + // 打印处理结果至日志 + LOG.info("[UpdateTask]-update records: " + expiredNum +" expired records, " + unlabeledNum + " unlabeled or failed-query records"); + + // 打印查询操作记录日志 + OutputStream bcQueryLogStream = new FileOutputStream(ApplicationConfig.LOG_BC_QUERY_REPORT_FILE, true); + OutputStreamWriter bcQueryLogWriter = new OutputStreamWriter(bcQueryLogStream, StandardCharsets.UTF_8); + for (String type : queryTypes) { + java.sql.Date d = new java.sql.Date(System.currentTimeMillis()); + bcQueryLogWriter.write(d + "," + "UpdateTask," + null + "," + type + "," + updateFqdns.size() + "\n"); + } + + FileUtils.writerClose(bcQueryLogWriter, bcQueryLogStream); + } + + } +} + + + diff --git a/src/main/java/cn/ac/iie/utils/BrightCloudUtils.java b/src/main/java/cn/ac/iie/utils/BrightCloudUtils.java new file mode 100644 index 0000000..45b68e3 --- /dev/null +++ b/src/main/java/cn/ac/iie/utils/BrightCloudUtils.java @@ -0,0 +1,253 @@ +package cn.ac.iie.utils; + +import cn.ac.iie.dao.FqdnFile; +import cn.ac.iie.config.ApplicationConfig; +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONArray; +import com.alibaba.fastjson.JSONObject; +import com.google.common.collect.Lists; +import org.apache.log4j.Logger; + +import java.io.*; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + + +/** + * @author yjy + * @version 1.0 + * @date 2021/2/22 2:37 下午 + */ + +public class BrightCloudUtils { + private static final Logger LOG = Logger.getLogger(BrightCloudUtils.class); + private static final Properties props = new Properties(); + private HttpURLConnection con; + private static List queryTypes = new ArrayList<>(); + + static { + try { + props.load(BrightCloudUtils.class.getClassLoader().getResourceAsStream("brightcloud.properties")); + } catch (IOException e) { + e.printStackTrace(); + } + + if (ApplicationConfig.QUERY_URL_INFO_SWITCH){ + queryTypes.add(props.getProperty("bc.api.queries.urlcat")); + } + if (ApplicationConfig.QUERY_URL_REP_SWITCH){ + queryTypes.add(props.getProperty("bc.api.queries.urlrep")); + } + if (ApplicationConfig.QUERY_URL_WHOIS_SWITCH){ + queryTypes.add(props.getProperty("bc.api.queries.urlwhois")); + } + assert queryTypes.size()>0: "Switch of all query type has been turned off, please edit the application.properties"; + } + + public HashMap> getCatId2Info() { + return catId2Info; + } + + private final HashMap> catId2Info = new HashMap<>(); + + public JSONObject getQueryResults (List urls) { + if (urls.size()>ApplicationConfig.MAXIMUM_URL_ONCE){ + LOG.warn("Too many urls in a http post request!"); + } + JSONObject jsonRes = null; + try { + + URL url = new URL(props.getProperty("bc.api.url")); + // 打开和URL之间的连接 + con = (HttpURLConnection) url.openConnection(); + con.setRequestMethod(props.getProperty("bc.api.method")); + con.setDoOutput(true); + con.setDoInput(true); + + con.setRequestProperty("Content-Type", "application/json"); + + JSONObject param = new JSONObject(); + param.put("oemid", props.getProperty("bc.oemid")); + param.put("deviceid", props.getProperty("bc.deviceid")); + param.put("uid", props.getProperty("bc.uid")); + + param.put("queries", queryTypes); + param.put("a1cat", props.getProperty("bc.api.a1cat")); + param.put("reputation", props.getProperty("bc.api.reputation")); + param.put("xml", props.getProperty("bc.api.xml")); + + param.put("urls", urls); + + // 建立实际的连接 + con.connect(); + OutputStreamWriter writer = new OutputStreamWriter(this.con.getOutputStream(), StandardCharsets.UTF_8); + writer.write(param.toString()); + writer.flush(); + } catch (IOException e) { + e.printStackTrace(); + } + + try { + // 获取服务端响应,通过输入流来读取URL的响应 + InputStream is = con.getInputStream(); + BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); + StringBuffer sbf = new StringBuffer(); + String strRead = null; + while ((strRead = reader.readLine()) != null) { + sbf.append(strRead); + sbf.append("\r\n"); + } + reader.close(); + + jsonRes = JSONObject.parseObject(sbf.toString()); + con.disconnect(); + } catch (IOException e) { + e.printStackTrace(); + } + return jsonRes; + } + + public HttpURLConnection getCon() { + return con; + } + + public List responseSparse(JSONObject records){ + List fqdnFiles = new ArrayList<>(); + Boolean querySucess = records.get("status").equals(200); + + if (!querySucess) { + System.out.print(records.toString()); + LOG.error("Wrong query. Query type: " + records.get("type")); + } else { + JSONArray array = records.getJSONArray("results"); + for (int i = 0; i < array.size(); i++) { + JSONObject jo = array.getJSONObject(i); + + // json处理 + JSONObject queries = jo.getJSONObject("queries"); + JSONObject getInfo = ApplicationConfig.QUERY_URL_INFO_SWITCH ? + queries.getJSONObject(props.getProperty("bc.api.queries.urlcat")): new JSONObject(); + JSONObject getRepInfo = ApplicationConfig.QUERY_URL_REP_SWITCH ? + queries.getJSONObject(props.getProperty("bc.api.queries.urlrep")): new JSONObject(); + JSONObject getWhoisInfo = ApplicationConfig.QUERY_URL_WHOIS_SWITCH ? + queries.getJSONObject(props.getProperty("bc.api.queries.urlwhois")): new JSONObject(); + + JSONObject cat = getInfo.getJSONArray("cats").getJSONObject(0); + Integer catId = cat.getInteger("catid"); + + String whoisEmail = ""; + if (isEmail(getWhoisInfo.getString("contactemail"))){ + whoisEmail = getWhoisInfo.getString("contactemail"); + } + + fqdnFiles.add(new FqdnFile( + jo.getString("url"), + querySucess, + + getInfo.getInteger("reputation"), + getRepLevel(getInfo.getInteger("reputation")), + catId, + getCatInfo(catId).get(0), + getCatInfo(catId).get(1), + cat.getInteger("conf"), + getInfo.getBoolean("a1cat"), + + getRepInfo.getInteger("popularity"), + getRepInfo.getInteger("age"), + getRepInfo.getString("country"), + getRepInfo.getInteger("threathistory"), + + getWhoisInfo.getString("domainname"), + getWhoisInfo.getDate("audit_auditupdateddate"), + getWhoisInfo.getDate("createddate"), + getWhoisInfo.getDate("expiresdate"), + whoisEmail, + getWhoisInfo.getString("nameservers"), + getWhoisInfo.getString("registrarname"), + getWhoisInfo.getString("registrant_organization"), + getWhoisInfo.getString("registrant_name"), + getWhoisInfo.getString("registrant_street1"), + getWhoisInfo.getString("registrant_city"), + getWhoisInfo.getString("registrant_state"), + getWhoisInfo.getString("registrant_postalcode"), + getWhoisInfo.getString("registrant_country"), + getWhoisInfo.getString("registrant_telephone"))); + + } + } + return fqdnFiles; + } + + private String getRepLevel(Integer repScore){ + String level = null; //用str存放数据 + if (repScore > 80) level="Trustworthy"; + else if (repScore > 60) level="Low Risk"; + else if (repScore > 40) level="Moderate Risk"; + else if (repScore > 20) level="Suspicious"; + else if (repScore > 0) level="High Risk"; + return level; + } + + public static boolean isEmail(String string) { + if (string == null) + return false; + String regEx1 = "^([a-z0-9A-Z]+[-|\\.]?)+[a-z0-9A-Z]@([a-z0-9A-Z]+(-[a-z0-9A-Z]+)?\\.)+[a-zA-Z]{2,}$"; + Pattern p; + Matcher m; + p = Pattern.compile(regEx1); + m = p.matcher(string); + return m.matches(); + } + + + private void geneCatInfo(){ + if (catId2Info.size()==0){ + + JSONObject jsonObject = null; + + String s = FileUtils.readJsonFile(props.getProperty("bc.cateinfo.filepath")); + jsonObject = JSON.parseObject(s); + + if (!(jsonObject==null)){ + JSONObject tmp = (JSONObject) jsonObject.getJSONArray("results").get(0); + JSONArray catInfoArray = tmp.getJSONObject("queries").getJSONObject("getcatlist").getJSONArray("cats"); + + for (int i = 0; i < catInfoArray.size(); i++){ + JSONObject keyObject = catInfoArray.getJSONObject(i); + List value = new ArrayList<>(Arrays.asList( + keyObject.getString("catname"), + keyObject.getString("catgroup"))); + catId2Info.put(i+1, value); + } + } + } + } + + public List getCatInfo(Integer catId){ + List info = Arrays.asList("", ""); + + if (0 < catId && catId <= 83) { + if (catId2Info.size()==0){ + geneCatInfo(); + } + + info = catId2Info.get(catId); + + if (info == null){ + LOG.error("Failed at geneCatInfo function"); + System.out.print("Failed at geneCatInfo function"); + } + } + + return info; + } + + public static List getQueryTypes() { + return queryTypes; + } +} + diff --git a/src/main/java/cn/ac/iie/utils/ConfigUtils.java b/src/main/java/cn/ac/iie/utils/ConfigUtils.java new file mode 100644 index 0000000..88a7318 --- /dev/null +++ b/src/main/java/cn/ac/iie/utils/ConfigUtils.java @@ -0,0 +1,41 @@ +package cn.ac.iie.utils; + + +import org.apache.log4j.Logger; +import org.mariadb.jdbc.MariaDbConnection; + +import java.util.Properties; + +public class ConfigUtils { + private static final Logger LOG = Logger.getLogger(ConfigUtils.class); + private static Properties propCommon = new Properties(); + + public static String getStringProperty(String key) { + return propCommon.getProperty(key); + } + + + public static Integer getIntProperty(String key) { + return Integer.parseInt(propCommon.getProperty(key)); + } + + public static Long getLongProperty(String key) { + return Long.parseLong(propCommon.getProperty(key)); + } + + public static Boolean getBooleanProperty(String key) { + return "true".equals(propCommon.getProperty(key).toLowerCase().trim()); + } + + static { + try { + propCommon.load(ConfigUtils.class.getClassLoader().getResourceAsStream("application.properties")); + propCommon.load(MariaDbConnection.class.getClassLoader().getResourceAsStream("druid.properties")); + + + } catch (Exception e) { + propCommon = null; + LOG.error("配置加载失败"); + } + } +} diff --git a/src/main/java/cn/ac/iie/utils/FileUtils.java b/src/main/java/cn/ac/iie/utils/FileUtils.java new file mode 100644 index 0000000..90df21b --- /dev/null +++ b/src/main/java/cn/ac/iie/utils/FileUtils.java @@ -0,0 +1,122 @@ +package cn.ac.iie.utils; + +import org.apache.log4j.Logger; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; + +/** + * @author yjy + * @version 1.0 + * @date 2021/2/25 6:11 下午 + */ +public class FileUtils { + private static final Logger LOG = Logger.getLogger(FileUtils.class); + + public static List readTxtFileIntoStringArrList(String filePath) + { + List list = new ArrayList<>(); + try + { + String encoding = "GBK"; + File file = new File(filePath); + if (file.isFile() && file.exists()) + { // 判断文件是否存在 + InputStreamReader read = new InputStreamReader( + new FileInputStream(file), encoding); + BufferedReader bufferedReader = new BufferedReader(read); + String lineTxt = null; + + while ((lineTxt = bufferedReader.readLine()) != null) + { + if (!lineTxt.equals("")) { + list.add(lineTxt.trim()); + } + } + bufferedReader.close(); + read.close(); + } + else + { + System.out.println("Can not find file: " + filePath); + } + } + catch (Exception e) + { + System.out.println("Error occurred in Function 'readTxtFileIntoStringArrList'"); + e.printStackTrace(); + } + + return list; + } + + public static void createFile(File filePath, String fileName){ + try { + File file = new File(filePath.toString() + "/" + fileName); + + if (!filePath.exists()){ + filePath.mkdirs(); + } + + boolean isCreate = file.createNewFile(); + if (isCreate){ + LOG.info("File " + fileName + " is created."); + } + } catch (IOException e) { + e.printStackTrace(); + } + + } + + public static void createFile(File file){ + try { + boolean isCreate = file.createNewFile(); + if (isCreate){ + LOG.info("File " + file + " is created."); + } + } catch (IOException e) { + e.printStackTrace(); + } + + } + + + public static String readJsonFile(String fileName) { + String jsonStr = ""; + try { + File jsonFile = new File(fileName); + FileReader fileReader = new FileReader(jsonFile); + + Reader reader = new InputStreamReader(new FileInputStream(jsonFile), "utf-8"); + int ch = 0; + StringBuffer sb = new StringBuffer(); + while ((ch = reader.read()) != -1) { + sb.append((char) ch); + } + + fileReader.close(); + reader.close(); + jsonStr = sb.toString(); + return jsonStr; + } catch (IOException e) { + e.printStackTrace(); + return null; + } + } + + public static String getFileName(File file){ + String[] tmp = file.toString().split("/"); + String fileName = tmp[tmp.length-1]; + return fileName; + } + + + public static void writerClose(OutputStreamWriter outWriter, OutputStream outStream) throws IOException { + assert outWriter != null; + outWriter.close(); + outStream.close(); + } + +} diff --git a/src/main/java/cn/ac/iie/utils/LogUtils.java b/src/main/java/cn/ac/iie/utils/LogUtils.java new file mode 100644 index 0000000..8d38b1c --- /dev/null +++ b/src/main/java/cn/ac/iie/utils/LogUtils.java @@ -0,0 +1,18 @@ +package cn.ac.iie.utils; + +import cn.ac.iie.config.ApplicationConfig; +import java.io.*; + +/** + * @author yjy + * @version 1.0 + * @date 2021/3/10 3:31 下午 + */ +public class LogUtils { + // 确认BrightCloud查询记录日志文件存在 + static { + assert new File(ApplicationConfig.LOG_BC_QUERY_REPORT_FILE).exists(): + "Cannot find Bright Cloud query log file: " + ApplicationConfig.LOG_BC_QUERY_REPORT_FILE; + } + +} diff --git a/src/main/java/cn/ac/iie/utils/MariaDBConnect.java b/src/main/java/cn/ac/iie/utils/MariaDBConnect.java new file mode 100644 index 0000000..0e7df36 --- /dev/null +++ b/src/main/java/cn/ac/iie/utils/MariaDBConnect.java @@ -0,0 +1,92 @@ +package cn.ac.iie.utils; /** + * @author yjy + * @version 1.0 + * @date 2021/2/24 4:17 下午 + */ +import com.alibaba.druid.pool.DruidDataSourceFactory; +import me.geso.jdbcutils.JDBCUtils; + +import javax.sql.DataSource; +import java.io.IOException; +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Properties; + +/** + * Druid连接池的工具类 + */ +public class MariaDBConnect { + + //1.定义成员变量 DataSource + private static DataSource ds ; + + static{ + try { + //1.加载配置文件 + Properties pro = new Properties(); + pro.load(JDBCUtils.class.getClassLoader().getResourceAsStream("druid.properties")); + //2.获取DataSource + ds = DruidDataSourceFactory.createDataSource(pro); + } catch (IOException e) { + e.printStackTrace(); + } catch (Exception e) { + e.printStackTrace(); + } + } + + /** + * 获取连接 + */ + public static Connection getConnection() throws SQLException { + return ds.getConnection(); + } + + /** + * 释放资源 + */ + public static void close(Statement stmt,Connection conn){ + + close(null,stmt,conn); + } + + + public static void close(ResultSet rs , Statement stmt, Connection conn){ + + + if(rs != null){ + try { + rs.close(); + } catch (SQLException e) { + e.printStackTrace(); + } + } + + + if(stmt != null){ + try { + stmt.close(); + } catch (SQLException e) { + e.printStackTrace(); + } + } + + if(conn != null){ + try { + conn.close(); + } catch (SQLException e) { + e.printStackTrace(); + } + } + } + + /** + * 获取连接池方法 + */ + + public static DataSource getDataSource(){ + return ds; + } + +} \ No newline at end of file diff --git a/src/main/java/cn/ac/iie/utils/TimeUtils.java b/src/main/java/cn/ac/iie/utils/TimeUtils.java new file mode 100644 index 0000000..eaf07b4 --- /dev/null +++ b/src/main/java/cn/ac/iie/utils/TimeUtils.java @@ -0,0 +1,65 @@ +package cn.ac.iie.utils; + +import cn.ac.iie.config.ApplicationConfig; +import java.util.Calendar; +import java.util.Date; + +/** + * @author yjy + * @version 1.0 + * @date 2021/2/25 11:26 上午 + */ +public class TimeUtils { + public static final Long HOUR_TO_MILLISECONDS = 3600000L; + public static final Long DAY_TO_MILLSEDONDS = 86400000L; + public static final Integer SECOND_TO_MILLSEDONDS = 1000; + + + public static Date getStartOfHour() { + return getStartOfHour(0); + } + + public static Date getStartOfHour(Integer offset) { + Calendar ca = Calendar.getInstance(); + ca.add(Calendar.HOUR, offset); + ca.set(Calendar.MINUTE, 0); + ca.set(Calendar.SECOND, 0); + ca.set(Calendar.MILLISECOND, 0); + return ca.getTime(); + } + + public static Date getStartOfDay() { + return getStartOfDay(0); + } + public static Date getStartOfDay(Integer bias) { + Calendar ca = Calendar.getInstance(); + ca.add(Calendar.DATE, bias); + ca.set(Calendar.HOUR, -12); + ca.set(Calendar.MINUTE, 0); + ca.set(Calendar.SECOND, 0); + ca.set(Calendar.MILLISECOND, 0); + return ca.getTime(); + } + + public static Date getStartOfMonth() { + Calendar ca = Calendar.getInstance(); + ca.set(Calendar.DATE, 1); + ca.set(Calendar.HOUR, -12); + ca.set(Calendar.MINUTE, 0); + ca.set(Calendar.SECOND, 0); + ca.set(Calendar.MILLISECOND, 0); + return ca.getTime(); + } + + public static Date getExpiredTime(){ + return new java.sql.Timestamp(TimeUtils.getStartOfDay(-ApplicationConfig.UPDATE_SCHEDULE_DAY).getTime()); + } + + public static Date getExpiredTime(String test){ + Date date = Calendar.getInstance().getTime(); + date.setHours(11); + return date; + } + +} + diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties new file mode 100644 index 0000000..c9987b5 --- /dev/null +++ b/src/main/resources/application.properties @@ -0,0 +1,44 @@ +# fqdn域名保留层级数 +query.standard.fqdn.level = 3 + +# 离线导入指定目录 +offline.import.path = /home/WebSKT/Data/request_file +# 离线指定查询结果保存目录 +offline.output.path = /home/WebSKT/Data/output_file + +# switch +offline.switch = true +update.switch = false +query.url_info.switch = true +query.url_rep.switch = false +query.url_whois.switch = false + +# 离线导入文件后缀名 +offline.import.filename.suffix = .compl +offline.import.filedone.suffix = .done +offline.output.filename.suffix = .result + +# 离线导入定时任务时间间隔10s +offline.schedule.second = 5 +# 数据库更新定时任务时间间隔1d 1000 * 3600 * 24 = 86400000 +update.schedule.day = 7 + +# mariadb +database = web_sketch +table = fqdn_profile_via_brightcloud + +# bc api单次查询url长度限制 API最高限制 +maximum.url.once = 100 + +# log +# 打印进度日志的查询条数 +log.offline.number = 10000 +log.bc.query.report.file = /home/WebSKT/Data/bright_cloud_query_count.csv + +## 离线导入指定目录 +#offline.import.path = /Users/joy/work/iie/project/cyber_narrator/APP/WebSketch/QueryAgentV1/files/import_file +## 离线指定查询结果保存目录 +#offline.output.path = /Users/joy/work/iie/project/cyber_narrator/APP/WebSketch/QueryAgentV1/files/output_file +## 服务调用统计结果 +#log.bc.query.report.file = /Users/joy/work/iie/project/cyber_narrator/App/WebSketch/LOG_IMPORTANT/bright_cloud_query_count.csv + diff --git a/src/main/resources/brightcloud.properties b/src/main/resources/brightcloud.properties new file mode 100644 index 0000000..e787ac3 --- /dev/null +++ b/src/main/resources/brightcloud.properties @@ -0,0 +1,23 @@ +bc.oemid = GeedgeNet +bc.deviceid = TSG-Dev +bc.uid = GN0001 + +bc.api.url = https://api.bcti.brightcloud.com/1.0/url/getinfo +bc.api.method = POST +bc.api.type = url + +bc.api.queries.urlcat = getinfo +bc.api.queries.urlrep = getrepinfo +bc.api.queries.urlwhois = getwhoisinfo +bc.api.a1cat = 1 +# 是否默认返回信誉评分 +bc.api.reputation = 1 +# 返回json格式 +bc.api.xml = 0 + +bc.cateinfo.filepath = /home/WebSKT/Data/categoryinfo.json +# for test +#bc.cateinfo.filepath = /Users/joy/work/iie/project/cyber_narrator/App/WebSketch/QueryAgentV1/src/main/resources/categoryinfo.json + + + diff --git a/src/main/resources/categoryinfo.json b/src/main/resources/categoryinfo.json new file mode 100644 index 0000000..aa50426 --- /dev/null +++ b/src/main/resources/categoryinfo.json @@ -0,0 +1,431 @@ +{ + "status": 200, + "requestid": "12345", + "type": "url", + "results": [ + { + "url": "getcatlist", + "queries": { + "getcatlist": { + "cats": [ + { + "catid": 1, + "catname": "Real Estate", + "catgroup": "Productivity" + }, + { + "catid": 2, + "catname": "Computer and Internet Security", + "catgroup": "Productivity" + }, + { + "catid": 3, + "catname": "Financial Services", + "catgroup": "Privacy" + }, + { + "catid": 4, + "catname": "Business and Economy", + "catgroup": "Productivity" + }, + { + "catid": 5, + "catname": "Computer and Internet Info", + "catgroup": "Productivity" + }, + { + "catid": 6, + "catname": "Auctions", + "catgroup": "Productivity" + }, + { + "catid": 7, + "catname": "Shopping", + "catgroup": "Productivity" + }, + { + "catid": 8, + "catname": "Cult and Occult", + "catgroup": "Sensitive" + }, + { + "catid": 9, + "catname": "Travel", + "catgroup": "Productivity" + }, + { + "catid": 10, + "catname": "Abused Drugs", + "catgroup": "Sensitive" + }, + { + "catid": 11, + "catname": "Adult and Pornography", + "catgroup": "Sensitive" + }, + { + "catid": 12, + "catname": "Home and Garden", + "catgroup": "Productivity" + }, + { + "catid": 13, + "catname": "Military", + "catgroup": "Productivity" + }, + { + "catid": 14, + "catname": "Social Networking", + "catgroup": "Productivity" + }, + { + "catid": 15, + "catname": "Dead Sites", + "catgroup": "Misc" + }, + { + "catid": 16, + "catname": "Individual Stock Advice and Tools", + "catgroup": "Productivity" + }, + { + "catid": 17, + "catname": "Training and Tools", + "catgroup": "Productivity" + }, + { + "catid": 18, + "catname": "Dating", + "catgroup": "Sensitive" + }, + { + "catid": 19, + "catname": "Sex Education", + "catgroup": "Sensitive" + }, + { + "catid": 20, + "catname": "Religion", + "catgroup": "Sensitive" + }, + { + "catid": 21, + "catname": "Entertainment and Arts", + "catgroup": "Productivity" + }, + { + "catid": 22, + "catname": "Personal sites and Blogs", + "catgroup": "Productivity" + }, + { + "catid": 23, + "catname": "Legal", + "catgroup": "Privacy" + }, + { + "catid": 24, + "catname": "Local Information", + "catgroup": "Productivity" + }, + { + "catid": 25, + "catname": "Streaming Media", + "catgroup": "IT Resources" + }, + { + "catid": 26, + "catname": "Job Search", + "catgroup": "Productivity" + }, + { + "catid": 27, + "catname": "Gambling", + "catgroup": "Sensitive" + }, + { + "catid": 28, + "catname": "Translation", + "catgroup": "Sensitive" + }, + { + "catid": 29, + "catname": "Reference and Research", + "catgroup": "Productivity" + }, + { + "catid": 30, + "catname": "Shareware and Freeware", + "catgroup": "IT Resources" + }, + { + "catid": 31, + "catname": "Peer to Peer", + "catgroup": "IT Resources" + }, + { + "catid": 32, + "catname": "Marijuana", + "catgroup": "Sensitive" + }, + { + "catid": 33, + "catname": "Hacking", + "catgroup": "Sensitive" + }, + { + "catid": 34, + "catname": "Games", + "catgroup": "Productivity" + }, + { + "catid": 35, + "catname": "Philosophy and Political Advocacy", + "catgroup": "Productivity" + }, + { + "catid": 36, + "catname": "Weapons", + "catgroup": "Sensitive" + }, + { + "catid": 37, + "catname": "Pay to Surf", + "catgroup": "Productivity" + }, + { + "catid": 38, + "catname": "Hunting and Fishing", + "catgroup": "Productivity" + }, + { + "catid": 39, + "catname": "Society", + "catgroup": "Productivity" + }, + { + "catid": 40, + "catname": "Educational Institutions", + "catgroup": "Productivity" + }, + { + "catid": 41, + "catname": "Online Greeting Cards", + "catgroup": "IT Resources" + }, + { + "catid": 42, + "catname": "Sports", + "catgroup": "Productivity" + }, + { + "catid": 43, + "catname": "Swimsuits and Intimate Apparel", + "catgroup": "Sensitive" + }, + { + "catid": 44, + "catname": "Questionable", + "catgroup": "Sensitive" + }, + { + "catid": 45, + "catname": "Kids", + "catgroup": "Productivity" + }, + { + "catid": 46, + "catname": "Hate and Racism", + "catgroup": "Sensitive" + }, + { + "catid": 47, + "catname": "Personal Storage", + "catgroup": "IT Resources" + }, + { + "catid": 48, + "catname": "Violence", + "catgroup": "Sensitive" + }, + { + "catid": 49, + "catname": "Keyloggers and Monitoring", + "catgroup": "Security" + }, + { + "catid": 50, + "catname": "Search Engines", + "catgroup": "Productivity" + }, + { + "catid": 51, + "catname": "Internet Portals", + "catgroup": "Productivity" + }, + { + "catid": 52, + "catname": "Web Advertisements", + "catgroup": "IT Resources" + }, + { + "catid": 53, + "catname": "Cheating", + "catgroup": "Sensitive" + }, + { + "catid": 54, + "catname": "Gross", + "catgroup": "Sensitive" + }, + { + "catid": 55, + "catname": "Web-based Email", + "catgroup": "Privacy" + }, + { + "catid": 56, + "catname": "Malware Sites", + "catgroup": "Security" + }, + { + "catid": 57, + "catname": "Phishing and Other Frauds", + "catgroup": "Security" + }, + { + "catid": 58, + "catname": "Proxy Avoidance and Anonymizers", + "catgroup": "Security" + }, + { + "catid": 59, + "catname": "Spyware and Adware", + "catgroup": "Security" + }, + { + "catid": 60, + "catname": "Music", + "catgroup": "Productivity" + }, + { + "catid": 61, + "catname": "Government", + "catgroup": "Privacy" + }, + { + "catid": 62, + "catname": "Nudity", + "catgroup": "Sensitive" + }, + { + "catid": 63, + "catname": "News and Media", + "catgroup": "Productivity" + }, + { + "catid": 64, + "catname": "Illegal", + "catgroup": "Sensitive" + }, + { + "catid": 65, + "catname": "Content Delivery Networks", + "catgroup": "IT Resources" + }, + { + "catid": 66, + "catname": "Internet Communications", + "catgroup": "IT Resources" + }, + { + "catid": 67, + "catname": "Bot Nets", + "catgroup": "Security" + }, + { + "catid": 68, + "catname": "Abortion", + "catgroup": "Sensitive" + }, + { + "catid": 69, + "catname": "Health and Medicine", + "catgroup": "Privacy" + }, + { + "catid": 70, + "catname": "Confirmed SPAM Sources", + "catgroup": "Unmaintained" + }, + { + "catid": 71, + "catname": "SPAM URLs", + "catgroup": "Security" + }, + { + "catid": 72, + "catname": "Unconfirmed SPAM Sources", + "catgroup": "Unmaintained" + }, + { + "catid": 73, + "catname": "Open HTTP Proxies", + "catgroup": "Unmaintained" + }, + { + "catid": 74, + "catname": "Dynamically Generated Content", + "catgroup": "Productivity" + }, + { + "catid": 75, + "catname": "Parked Domains", + "catgroup": "Sensitive" + }, + { + "catid": 76, + "catname": "Alcohol and Tobacco", + "catgroup": "Sensitive" + }, + { + "catid": 77, + "catname": "Private IP Addresses", + "catgroup": "Unmaintained" + }, + { + "catid": 78, + "catname": "Image and Video Search", + "catgroup": "Sensitive" + }, + { + "catid": 79, + "catname": "Fashion and Beauty", + "catgroup": "Productivity" + }, + { + "catid": 80, + "catname": "Recreation and Hobbies", + "catgroup": "Productivity" + }, + { + "catid": 81, + "catname": "Motor Vehicles", + "catgroup": "Productivity" + }, + { + "catid": 82, + "catname": "Web Hosting", + "catgroup": "IT Resources" + }, + { + "catid": 83, + "catname": "Food and Dining", + "catgroup": "Unmaintained" + } + ] + } + } + } + ] +} \ No newline at end of file diff --git a/src/main/resources/druid.properties b/src/main/resources/druid.properties new file mode 100644 index 0000000..cf055f0 --- /dev/null +++ b/src/main/resources/druid.properties @@ -0,0 +1,9 @@ +driverClassName=org.mariadb.jdbc.Driver +url=jdbc:mariadb://api.geedge.net:3306/web_sketch +username=root +password=webskt2021 +initialSize=5 +maxActive=10 +maxWait=1000 + +validationQuery=SELECT 1 \ No newline at end of file diff --git a/src/main/resources/log4j.properties b/src/main/resources/log4j.properties new file mode 100644 index 0000000..405080d --- /dev/null +++ b/src/main/resources/log4j.properties @@ -0,0 +1,23 @@ + +########################## logger ############################## + +### 设置### +log4j.rootLogger = debug,E,stdout + +log4j.appender.stdout = org.apache.log4j.ConsoleAppender +log4j.appender.stdout.Target = System.out +log4j.appender.stdout.layout = org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern = [%-5p] %d{yyyy-MM-dd HH:mm:ss,SSS} method:%l%n%m%n + + +log4j.appender.E = org.apache.log4j.DailyRollingFileAppender +log4j.appender.E.File =/home/WebSKT/Source/query_agent/logs/run.log +log4j.appender.E.Append = true +log4j.appender.E.Threshold = INFO +log4j.appender.E.layout = org.apache.log4j.PatternLayout +log4j.appender.E.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n + +######################### +#### for test ########### +######################### +#log4j.appender.E.File =/Users/joy/work/iie/project/cyber_narrator/App/WebSketch/QueryAgentV1/logs/run.log \ No newline at end of file