diff --git a/src/entry/FQDN_engine.cpp b/src/entry/FQDN_engine.cpp new file mode 100644 index 0000000..7546850 --- /dev/null +++ b/src/entry/FQDN_engine.cpp @@ -0,0 +1,343 @@ +/* + * + * Copyright (c) 2020 + * String Algorithms Research Group + * Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS) + * National Engineering Laboratory for Information Security Technologies (NELIST) + * All rights reserved + * + * Written by: LIU YANBING (liuyanbing@iie.ac.cn) + * Last modification: 2020-09-01 + * + * This code is the exclusive and proprietary property of IIE-CAS and NELIST. + * Usage for direct or indirect commercial advantage is not allowed without + * written permission from the authors. + * + */ + +#include "FQDN_engine.h" +#include +#include +#include +#include +#include + +/*************************************************************************************/ +#include +#define popcnt_u64 _mm_popcnt_u64 + +#define FOR(i, n) for(int i=0, _n=(int)(n); i<_n; i++) + +struct packedRT_t +{ + unsigned long long bitmap[4]; + unsigned int A; + unsigned char B[4]; +}; + +void * aligned_malloc(size_t size, size_t align) +{ + void * malloc_ptr; + void * aligned_ptr; + + /* Error if align is not a power of two. */ + if (align & (align - 1)) + { + return ((void*) 0); + } + + if (align==0 || size == 0) + { + return ((void *) 0); + } + + malloc_ptr = malloc (sizeof(void *) + align - 1 + size); + if (!malloc_ptr) + { + return ((void *) 0); + } + + aligned_ptr = (void *) (((size_t)malloc_ptr + sizeof(void *) + align-1) & ~(align-1)); + + ((void **) aligned_ptr) [-1] = malloc_ptr; + + return aligned_ptr; +} + +void aligned_free(void * aligned_ptr) +{ + if (aligned_ptr) + { + free (((void **) aligned_ptr) [-1]); + } +} + +/*************************************************************************************/ +struct domain_impl_t +{ + unsigned int id; + int suf_match; + unsigned int len; + unsigned long long hash; /*用64位哈希值唯一表示一个域名*/ + domain_impl_t * next; + void * utag; +}; + +class CHashTrieFQDN +{ +public: + CHashTrieFQDN(); + ~CHashTrieFQDN(); + + int initialize(const struct FQDN_rule * rules, size_t n_rule); + int search(const char * FQDN, size_t FQDN_len, struct FQDN_match * results, size_t n_result); + +protected: + unsigned int rank(unsigned int h); + +protected: + unsigned int m_num; + domain_impl_t * m_domains; + unsigned int m_H; + unsigned int m_max_pat_len; + unsigned long long * m_B; + packedRT_t * m_RT; + domain_impl_t ** m_matched; + unsigned char m_case_tab[256]; +}; + + +const unsigned long long ONE=1; + +#define is_bit_set(tbl, off) ( tbl[off>>6] & ( ONE<<(off&63) ) ) +#define set_bit(tbl, off) ( tbl[off>>6] |= ( ONE<<(off&63) ) ) + +const unsigned long long A=6364136223846793005LL; + +CHashTrieFQDN::CHashTrieFQDN() +{ + m_num=0; + m_domains=NULL; + m_B=NULL; + m_RT=NULL; + m_matched=NULL; + FOR(c, 256) m_case_tab[c]=tolower(c); +} + +CHashTrieFQDN::~CHashTrieFQDN() +{ + if(m_domains!=NULL) + { + delete [] m_domains; + } + + if(m_B!=NULL) + { + delete [] m_B; + } + + if(m_RT!=NULL) + { + aligned_free(m_RT); + } + + if(m_matched!=NULL) + { + delete [] m_matched; + } +} + +int CHashTrieFQDN::initialize(const struct FQDN_rule * rules, size_t n_rule) +{ + long long mem_bytes=0; + + if(n_rule==0) return -1; + m_num=n_rule; + m_domains=new domain_impl_t[m_num]; + mem_bytes+=m_num*sizeof(domain_impl_t); + + unsigned int N=m_num; + m_max_pat_len=0; + + FOR(k, m_num) + { + m_domains[k].id =rules[k].id; + m_domains[k].suf_match=rules[k].is_suffix_match; + m_domains[k].len=rules[k].len; + m_domains[k].next=NULL; + m_domains[k].utag=rules[k].user_tag; + + FOR(j, rules[k].len) + { + if(rules[k].FQDN[j]=='.') ++N; + } + + if(m_max_pat_len>6]; + mem_bytes+=(m_H>>6)*sizeof(unsigned long long); + FOR(i, (m_H>>6)) m_B[i]=0; + + m_RT=(packedRT_t *)aligned_malloc(sizeof(packedRT_t)*((m_H>>8)+1), 64); + mem_bytes+=((m_H>>8)+1)*sizeof(packedRT_t); + FOR(i, (m_H>>8)) + { + FOR(j, 4) m_RT[i].bitmap[j]=0; + } + + FOR(k, m_num) + { + const unsigned char * pb=(const unsigned char *)rules[k].FQDN; + m_domains[k].hash=0; + + for(int j=rules[k].len-1; j>=-1; --j) + { + if(j==-1 || pb[j]=='.') + { + unsigned int h=m_domains[k].hash&(m_H-1); + set_bit(m_B, h); + if(j==-1) + { + int q=h&255; + m_RT[h>>8].bitmap[q>>6]|=(ONE<<(q&63)); + } + } + else + { + m_domains[k].hash=A*m_domains[k].hash+m_case_tab[pb[j]]; + } + } + } + + m_RT[0].A=0; + FOR(i, (m_H>>8)) + { + m_RT[i].B[0]=0; + m_RT[i].B[1]= popcnt_u64(m_RT[i].bitmap[0]); + m_RT[i].B[2]= m_RT[i].B[1]+popcnt_u64(m_RT[i].bitmap[1]); + m_RT[i].B[3]= m_RT[i].B[2]+popcnt_u64(m_RT[i].bitmap[2]); + m_RT[i+1].A=m_RT[i].A+m_RT[i].B[3]+popcnt_u64(m_RT[i].bitmap[3]); + } + + int tn=m_RT[m_H>>8].A; + + m_matched=new domain_impl_t *[tn]; + mem_bytes+=tn*sizeof(domain_impl_t *); + FOR(i, tn) m_matched[i]=NULL; + + FOR(k, m_num) + { + unsigned int h=m_domains[k].hash&(m_H-1); + unsigned idx=rank(h); + m_domains[k].next=m_matched[idx]; + m_matched[idx]=&(m_domains[k]); + } + + printf("mem_bytes=%u(MB)\n", mem_bytes/(1U<<20)); + + return 1; +} + +unsigned int CHashTrieFQDN::rank(unsigned int h) +{ + int p=(h>>8); + int r=((h&255)>>6); + int s=(h&63); + unsigned long long e=m_RT[p].bitmap[r]&((ONE<=-1; --j) + { + if(j==-1 || pb[j]=='.') + { + unsigned int h=hash&(m_H-1); + if(is_bit_set(m_B, h)==0) break; + HASH[t]=hash; + P[t]=j+1; + ++t; + } + else if(j+m_max_pat_len=0; t--) + { + unsigned int h=HASH[t]&(m_H-1); + int q=h&255; + + if(m_RT[h>>8].bitmap[q>>6]&(ONE<<(q&63))) + { + unsigned idx=rank(h); + + for(domain_impl_t * pt=m_matched[idx]; pt!=NULL; pt=pt->next) + { + if(P[t]!=0 && pt->suf_match==0) continue; + if(pt->len+P[t]==FQDN_len && pt->hash==HASH[t]) + { + if(match_num>0 && P[t]!=results[match_num-1].offset) return match_num; + results[match_num].id=pt->id; + results[match_num].offset=P[t]; + results[match_num].user_tag=pt->utag; + ++match_num; + if(match_num==n_result) return match_num; + } + } + } + } + + return match_num; +} + +/*************************************************************************************/ +struct FQDN_engine +{ + CHashTrieFQDN ht; +}; + +struct FQDN_engine * FQDN_engine_new(const struct FQDN_rule * rules, size_t n_rule) +{ + struct FQDN_engine * instance=new struct FQDN_engine; + if(instance->ht.initialize(rules, n_rule)<0) + { + delete instance; + return NULL; + } + else + { + return instance; + } +} + +int FQDN_engine_search(struct FQDN_engine * instance, const char * FQDN, size_t FQDN_len, struct FQDN_match * results, size_t n_result) +{ + if(instance==NULL) return -1; + return instance->ht.search(FQDN, FQDN_len, results, n_result); +} + +void FQDN_engine_free(struct FQDN_engine * instance) +{ + if(instance!=NULL) delete instance; +} diff --git a/src/entry/Maat_rule.cpp b/src/entry/Maat_rule.cpp index e6c30e1..3630135 100644 --- a/src/entry/Maat_rule.cpp +++ b/src/entry/Maat_rule.cpp @@ -1392,6 +1392,11 @@ int add_digest_rule(struct Maat_table_schema* table, struct db_digest_rule* db_r scanner->gie_update_q_size++; return 0; } +int add_fqdn_rule(struct Maat_table_schema* table, struct db_digest_rule* db_rule, struct Maat_scanner *scanner,void* logger) +{ + +} + int del_region_rule(struct Maat_table_schema* table, int region_id, int group_id, int rule_type, struct Maat_scanner *maat_scanner, void* logger) { int i=0; @@ -1430,6 +1435,10 @@ int del_region_rule(struct Maat_table_schema* table, int region_id, int group_id table_rt=Maat_table_runtime_get(maat_scanner->table_rt_mgr, table->table_id); Maat_table_runtime_digest_del(table_rt, region->expr_id_lb); maat_scanner->gie_update_q_size++; + break; + case TABLE_TYPE_FQDN: + case TABLE_TYPE_FQDN_PLUGIN: + break; default: assert(0); @@ -2163,6 +2172,65 @@ error_out: digest_rule=NULL; return; } +void update_fqdn_rule(struct Maat_table_schema* table, const char* table_line, struct Maat_scanner *scanner, void* logger) +{ + struct Maat_table_runtime* table_rt=Maat_table_runtime_get(scanner->table_rt_mgr, table->table_id); + struct db_fqdn_rule* fqdn_rule=ALLOC(struct db_fqdn_rule, 1); + int ret=0; + char fqdn_buff[MAX_TABLE_LINE_SIZE]={'\0'}; + + ret=sscanf(table_line,"%d\t%d\t%llu\t%s\t%hd\t%d", &(fqdn_rule->region_id), + &(fqdn_rule->group_id), + &(fqdn_rule->is_suffix_match), + fqdn_buff, + &(fqdn_rule->is_valid)); + + fqdn_rule->fqdn=fqdn_buff; + if(ret!=5) + { + MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , + "update error, invalid format of fqdn table %s:%s" + ,table->table_name[table->updating_name],table_line); + table->udpate_err_cnt++; + goto error_out; + } + + if(fqdn_rule->is_valid==FALSE) + { + //digest rule is not build with rulescan, this rule type is useless in count_rs_region funciton. + ret=del_region_rule(table, fqdn_rule->region_id, fqdn_rule->group_id, 0, scanner, logger); + if(ret<0) + { + table->udpate_err_cnt++; + } + else + { + table_rt->origin_rule_num--; + } + + } + else + { + ret=add_fqdn_rule(table, digest_rule,scanner,logger); + if(ret<0) + { + MESA_handle_runtime_log(logger,RLOG_LV_INFO,maat_module , + "duplicate config of intval table %s config_id=%d" + ,table->table_name[table->updating_name],digest_rule->region_id); + table->udpate_err_cnt++; + } + else + { + table_rt->origin_rule_num++; + } + + } +error_out: + fqdn_rule->fqdn=NULL; + free(fqdn_rule); + fqdn_rule=NULL; + return; +} void update_plugin_table(struct Maat_table_schema* table_schema, const char* row, Maat_scanner* scanner, const struct rule_tag* tags, int n_tags, void* logger) { diff --git a/src/entry/Maat_table.cpp b/src/entry/Maat_table.cpp index 2994445..3efc780 100644 --- a/src/entry/Maat_table.cpp +++ b/src/entry/Maat_table.cpp @@ -491,6 +491,8 @@ struct Maat_table_manager* Maat_table_manager_create(const char* table_info_path map_register(string2int_map,"compile", TABLE_TYPE_COMPILE); map_register(string2int_map,"plugin", TABLE_TYPE_PLUGIN); map_register(string2int_map,"ip_plugin", TABLE_TYPE_IP_PLUGIN); + map_register(string2int_map,"fqdn", TABLE_TYPE_FQDN); + map_register(string2int_map,"fqdn_plugin", TABLE_TYPE_FQDN_PLUGIN); map_register(string2int_map,"intval", TABLE_TYPE_INTERVAL); map_register(string2int_map,"interval", TABLE_TYPE_INTERVAL); map_register(string2int_map,"intval_plus", TABLE_TYPE_INTERVAL_PLUS); @@ -616,6 +618,9 @@ struct Maat_table_manager* Maat_table_manager_create(const char* table_info_path { p->compile.user_region_encoding=USER_REGION_ENCODE_NONE; } + break + case TABLE_TYPE_FQDN: + default: break; } diff --git a/src/entry/Maat_table_runtime.cpp b/src/entry/Maat_table_runtime.cpp index fe0c574..48a5388 100644 --- a/src/entry/Maat_table_runtime.cpp +++ b/src/entry/Maat_table_runtime.cpp @@ -357,6 +357,10 @@ void Maat_table_runtime_digest_del(struct Maat_table_runtime* table_rt, int expr ,NULL); MESA_lqueue_join_tail(table_rt->similar.update_q,&digest_rule, sizeof(void*)); return; +} +void Maat_table_runtime_fqdn_add(struct Maat_table_runtime* table_rt, int expr_id, const char* fqdn, int is_suffix_match, void* tag) +{ + } int Maat_table_runtime_digest_batch_udpate(struct Maat_table_runtime* table_rt) { diff --git a/src/inc_internal/FQDN_engine.h b/src/inc_internal/FQDN_engine.h new file mode 100644 index 0000000..d2213fc --- /dev/null +++ b/src/inc_internal/FQDN_engine.h @@ -0,0 +1,69 @@ +/* + * + * Copyright (c) 2020 + * String Algorithms Research Group + * Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS) + * National Engineering Laboratory for Information Security Technologies (NELIST) + * All rights reserved + * + * Written by: LIU YANBING (liuyanbing@iie.ac.cn) + * Last modification: 2020-09-01 + * + * This code is the exclusive and proprietary property of IIE-CAS and NELIST. + * Usage for direct or indirect commercial advantage is not allowed without + * written permission from the authors. + * + */ + +#ifndef H_FQDN_ENGINE_H +#define H_FQDN_ENGINE_H + +#ifdef __cplusplus +extern "C" { +#endif + + #include + + struct FQDN_rule + { + unsigned int id; + int is_suffix_match; /* is_suffix_match==0: exact match; is_suffix_match==1: longest suffix matching. */ + size_t len; + char * FQDN; /* Non-ASCII character is allowed. */ + void * user_tag; /* A transparent user tag for convenient accessing, the caller is responsible for its memory management. */ + }; + + struct FQDN_engine; + + struct FQDN_engine * FQDN_engine_new(const struct FQDN_rule * rules, size_t n_rule); + + struct FQDN_match + { + unsigned int id; + unsigned int offset; /* offset==0 for exact matching; offset>0 for longest suffix matching. */ + void * user_tag; + }; + + /* + *Function: + * Search FQDN in the rule base + *Paramters: + * instance[in]: Instance of FQDN engine + * FQDN[in]: FQDN for search + * FQDN_len[in]: Length of FQDN + * results[out]: An array to store matched FQDNs + * n_result[in]: Number of element in the result array + * Return: + * 0: No matched FQDN; + * >0: Number of matched FQNDs which were stored in results; + * <0: Error. + */ + int FQDN_engine_search(struct FQDN_engine * instance, const char * FQDN, size_t FQDN_len, struct FQDN_match * results, size_t n_result); + + void FQDN_engine_free(struct FQDN_engine * instance); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/inc_internal/Maat_rule_internal.h b/src/inc_internal/Maat_rule_internal.h index 39016f3..634f3d5 100644 --- a/src/inc_internal/Maat_rule_internal.h +++ b/src/inc_internal/Maat_rule_internal.h @@ -16,6 +16,7 @@ #include "stream_fuzzy_hash.h" #include "gram_index_engine.h" #include "alignment_int64.h" +#include "FQDN_engine.h" #include #include #include @@ -78,6 +79,15 @@ struct db_digest_rule short confidence_degree; int is_valid; }; +struct db_fqdn_rule +{ + int region_id; + int group_id; + int is_suffix_match; + char* fqdn; + int is_valid; +}; + struct Maat_rule_head { int config_id; diff --git a/src/inc_internal/Maat_table.h b/src/inc_internal/Maat_table.h index f607827..85d0eb3 100644 --- a/src/inc_internal/Maat_table.h +++ b/src/inc_internal/Maat_table.h @@ -36,7 +36,9 @@ enum MAAT_TABLE_TYPE TABLE_TYPE_COMPILE, TABLE_TYPE_PLUGIN, TABLE_TYPE_IP_PLUGIN, - TABLE_TYPE_INTERVAL_PLUS + TABLE_TYPE_INTERVAL_PLUS, + TABLE_TYPE_FQDN, + TABLE_TYPE_FQDN_PLUGIN }; struct compile_ex_data_idx diff --git a/src/inc_internal/Maat_table_runtime.h b/src/inc_internal/Maat_table_runtime.h index 15fc7a6..60d1f49 100644 --- a/src/inc_internal/Maat_table_runtime.h +++ b/src/inc_internal/Maat_table_runtime.h @@ -4,6 +4,7 @@ #include "IPMatcher.h" #include "gram_index_engine.h" +#include "FQDN_engine.h" #include "alignment_int64.h" #include "dynamic_array.h" #include @@ -14,7 +15,14 @@ struct similar_runtime GIE_handle_t* gie_handle; MESA_lqueue_head update_q; }; - +struct fqdn_runtime +{ + struct FQDN_engine* fqdn_engine; + struct FQDN_engine* old_fqdn_engine; + struct EX_data_rt* ex_data_rt; //for fqdn_plugin ONLY + struct Maat_garbage_bin* bin; + int changed_flag; +}; struct plugin_runtime { struct EX_data_rt* ex_data_rt; @@ -51,6 +59,7 @@ struct Maat_table_runtime union { struct similar_runtime similar; //for digest and similarity + struct fqdn_runtime fqdn;//for fqdn and fqdn_plugin struct plugin_runtime plugin; struct ip_plugin_runtime ip_plugin; struct expr_runtime expr;