diff --git a/src/entry/Maat_rule.cpp b/src/entry/Maat_rule.cpp index 9d5fcd5..3fce008 100644 --- a/src/entry/Maat_rule.cpp +++ b/src/entry/Maat_rule.cpp @@ -977,8 +977,7 @@ void EMPTY_FREE(void*p) } struct _Maat_compile_inner_t * create_compile_rule(int compile_id) { - struct _Maat_compile_inner_t* p=NULL; - p=(struct _Maat_compile_inner_t*)calloc(sizeof(struct _Maat_compile_inner_t),1); + struct _Maat_compile_inner_t* p=ALLOC(struct _Maat_compile_inner_t,1); p->compile_id=compile_id; p->group_cnt=0; p->group_boundary=1; @@ -1741,7 +1740,7 @@ unsigned int del_region_from_group(struct _Maat_group_inner_t* group,int region_ return j; } -int add_group_to_compile(struct _Maat_compile_inner_t*a_compile_rule,struct _Maat_group_inner_t* a_rule_group) +int add_group_to_compile(struct _Maat_compile_inner_t*a_compile_rule,struct _Maat_group_inner_t* a_rule_group, int not_flag) { int i=0,ret=-1; int write_pos=-1; @@ -1781,7 +1780,15 @@ int add_group_to_compile(struct _Maat_compile_inner_t*a_compile_rule,struct _Maa write_pos=a_compile_rule->group_boundary; a_compile_rule->group_boundary++; } - dynamic_array_write(a_compile_rule->groups,write_pos, a_rule_group); + dynamic_array_write(a_compile_rule->groups, write_pos, a_rule_group); + if(not_flag) + { + a_compile_rule->not_flag[write_pos]=1; + } + else + { + a_compile_rule->not_flag[write_pos]=0; + } a_compile_rule->group_cnt++; a_rule_group->ref_cnt++; //member group->compile_shortcut may set to NULL and compile rule pointer repeatly,until rule build finish. @@ -2318,7 +2325,7 @@ int add_group_rule(struct Maat_table_desc* table,struct db_group_rule_t* db_grou group_rule=create_group_rule(db_group_rule->group_id); group_rule->table_id=table->table_id; - ret=HASH_add_by_id(scanner->group_hash, db_group_rule->group_id,group_rule); + ret=HASH_add_by_id(scanner->group_hash, db_group_rule->group_id, group_rule); assert(ret>=0); } @@ -2326,10 +2333,10 @@ int add_group_rule(struct Maat_table_desc* table,struct db_group_rule_t* db_grou if(compile_rule==NULL) { compile_rule=create_compile_rule(db_group_rule->compile_id); - ret=HASH_add_by_id(scanner->compile_hash,db_group_rule->compile_id, compile_rule); + ret=HASH_add_by_id(scanner->compile_hash, db_group_rule->compile_id, compile_rule); assert(ret>=0); } - ret=add_group_to_compile(compile_rule,group_rule); + ret=add_group_to_compile(compile_rule, group_rule, db_group_rule->not_flag); if(ret<0) { MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module, @@ -2437,35 +2444,40 @@ void update_group_rule(struct Maat_table_desc* table,const char* table_line,stru struct db_group_rule_t db_group_rule; struct Maat_table_runtime* table_rt=scanner->table_rt[table->table_id]; int ret=0; - ret=sscanf(table_line,"%d\t%d\t%d",&(db_group_rule.group_id) - ,&(db_group_rule.compile_id) - ,&(db_group_rule.is_valid)); - if(ret!=3) + ret=sscanf(table_line,"%d\t%d\t%d\t%d", &(db_group_rule.group_id), + &(db_group_rule.compile_id), + &(db_group_rule.is_valid), + &(db_group_rule.not_flag)); + if(ret!=3&&ret!=4) { MESA_handle_runtime_log(logger,RLOG_LV_INFO,maat_module , - "update error,invalid format of group table %s:%s" - ,table->table_name[table->updating_name], table_line); + "update error,invalid format of group table %s:%s", + table->table_name[table->updating_name], table_line); table->udpate_err_cnt++; return; } if(db_group_rule.is_valid==FALSE) { - del_group_rule(table, &db_group_rule,scanner,logger); + del_group_rule(table, &db_group_rule, scanner, logger); //leave no trace when compatible_group_update calling if(table->table_type==TABLE_TYPE_GROUP) { table_rt->origin_rule_num--; + if(db_group_rule.not_flag) + { + table_rt->group.not_flag_group--; + } } } else { - ret=add_group_rule(table,&db_group_rule, scanner,logger); + ret=add_group_rule(table,&db_group_rule, scanner, logger); if(ret<0) { MESA_handle_runtime_log(logger,RLOG_LV_INFO,maat_module , - "duplicate config of group table %s group_id %d compile_id %d.",table->table_name[0] - ,db_group_rule.group_id - ,db_group_rule.compile_id); + "duplicate config of group table %s group_id %d compile_id %d.", table->table_name[0], + db_group_rule.group_id, + db_group_rule.compile_id); } else @@ -2474,6 +2486,10 @@ void update_group_rule(struct Maat_table_desc* table,const char* table_line,stru if(table->table_type==TABLE_TYPE_GROUP) { table_rt->origin_rule_num++; + if(db_group_rule.not_flag) + { + table_rt->group.not_flag_group++; + } } } } diff --git a/src/entry/bool_matcher.cpp b/src/entry/bool_matcher.cpp new file mode 100644 index 0000000..2a35393 --- /dev/null +++ b/src/entry/bool_matcher.cpp @@ -0,0 +1,296 @@ +#include "bool_matcher.h" +#include +#include +#include +using namespace std; +#include +#include + +static const unsigned int MAX_ARRAY_SIZE=65536; + +struct thread_local_data_t +{ + unsigned int mapped_ids[MAX_ARRAY_SIZE]; + unsigned int used_cells[MAX_ARRAY_SIZE]; + void * cached_results[MAX_ARRAY_SIZE]; + unsigned char * multiexpr_bitmap; + unsigned int * singlexpr_bitmap; +}; + +struct bool_matcher +{ + unsigned int max_thread_num; + unsigned int bool_expr_num; + unsigned int multi_expr_num; + void ** bool_expr_ids; + unsigned char * multi_expr_size; + unsigned char * multi_expr_mask; + unsigned int bool_item_id_num; + unsigned long long min_item_id; + unsigned long long max_item_id; + unsigned long long * bool_item_ids; + unsigned int * mapped_ptr; + unsigned int * mapped_ids; + unsigned int theta; + unsigned int L[65537]; + thread_local_data_t * thread_data; +}; + +struct bool_matcher * bool_matcher_new(struct bool_expr * exprs, size_t expr_num, unsigned int max_thread_num, size_t * mem_size) +{ + if(exprs==NULL || expr_num==0 || max_thread_num==0) return NULL; + + for(unsigned int i=0; iMAX_ITEMS_PER_BOOL_EXPR) + { + return NULL; + } + } + + int I=-1, J=(int)expr_num; + while(I1) I++; + if(I==J) break; + J--; + while(J>I && exprs[J].item_num==1) J--; + if(J==I) break; + swap(exprs[I], exprs[J]); + } + + for(int k=0; k<(int)expr_num; k++) + { + if((k=I && exprs[k].item_num>1)) + { + printf("[%s:%d]: fatal error!\n", __FILE__, __LINE__); + return NULL; + } + } + + unsigned int mem_bytes=0; + + struct bool_matcher * matcher=new struct bool_matcher; + mem_bytes+=sizeof(bool_matcher); + + matcher->max_thread_num=max_thread_num; + matcher->bool_expr_num=(unsigned int)expr_num; + matcher->multi_expr_num=I; + + matcher->bool_expr_ids=new void *[expr_num]; + mem_bytes+=(unsigned int)expr_num*sizeof(void *); + + matcher->multi_expr_size=new unsigned char[matcher->multi_expr_num+1]; + mem_bytes+=(matcher->multi_expr_num+1)*sizeof(unsigned char); + + matcher->multi_expr_mask=new unsigned char[matcher->multi_expr_num+1]; + mem_bytes+=(matcher->multi_expr_num+1)*sizeof(unsigned char); + + matcher->thread_data=new thread_local_data_t[max_thread_num]; + mem_bytes+=max_thread_num*sizeof(thread_local_data_t); + + for(unsigned int i=0; ithread_data[i].multiexpr_bitmap=new unsigned char[matcher->multi_expr_num+1]; + mem_bytes+=(matcher->multi_expr_num+1)*sizeof(unsigned char); + + unsigned int size=(unsigned int)(expr_num-matcher->multi_expr_num); + size=(size>>5)+1; + matcher->thread_data[i].singlexpr_bitmap=new unsigned int[size]; + mem_bytes+=size*sizeof(unsigned int); + } + + map< unsigned long long, vector > M; + unsigned int count=0; + for(unsigned int i=0; ibool_expr_ids[i]=exprs[i].user_tag; + if(imulti_expr_num) + { + matcher->multi_expr_size[i]=(unsigned int)exprs[i].item_num; + } + count+=(unsigned int)exprs[i].item_num; + unsigned char mask=0; + for(unsigned int j=0; jmulti_expr_num) matcher->multi_expr_mask[i]=mask; + } + + matcher->bool_item_id_num=(unsigned int)M.size(); + matcher->bool_item_ids=new unsigned long long[M.size()]; + matcher->mapped_ptr =new unsigned int[M.size()+1]; + matcher->mapped_ids =new unsigned int[count]; + mem_bytes+=((unsigned int)M.size()+1+count)*sizeof(unsigned int)+(unsigned int)M.size()*sizeof(unsigned long long); + + matcher->mapped_ptr[0]=0; + map< unsigned long long, vector >::const_iterator it=M.begin(); + for(unsigned int k=0; kbool_item_ids[k]=it->first; + copy(it->second.begin(), it->second.end(), matcher->mapped_ids+matcher->mapped_ptr[k]); + matcher->mapped_ptr[k+1]=matcher->mapped_ptr[k]+(unsigned int)it->second.size(); + } + + matcher->min_item_id=matcher->bool_item_ids[0]; + matcher->max_item_id=matcher->bool_item_ids[M.size()-1]; + for(unsigned int k=0; kbool_item_ids[k]-=matcher->min_item_id; + } + + const unsigned long long ONE=1; + unsigned int theta=0; + while((ONE<<(theta+16))<=matcher->bool_item_ids[M.size()-1]) theta++; + matcher->theta=theta; + + matcher->L[0]=0; + for(unsigned int i=1; i<65536; i++) + { + matcher->L[i]=(unsigned int)(lower_bound(matcher->bool_item_ids, matcher->bool_item_ids+M.size(), i*(ONE<bool_item_ids); + } + matcher->L[65536]=(unsigned int)M.size(); + + M.clear(); + + *mem_size=mem_bytes; + return matcher; +} + +int bool_matcher_match(struct bool_matcher * matcher, unsigned int thread_id, unsigned long long * item_ids, size_t item_num, void ** result, size_t size) +{ + if(matcher==NULL) return -1; + if(thread_id>=matcher->max_thread_num) return -1; + + unsigned int * mapped_ids=matcher->thread_data[thread_id].mapped_ids; + unsigned int ids_num=0; + for(unsigned int i=0; imin_item_id || item_ids[i]>matcher->max_item_id) continue; + + unsigned long long id=item_ids[i]-matcher->min_item_id; + unsigned int k=(unsigned int)(id>>matcher->theta); + + int l=matcher->L[k], h=(int)matcher->L[k+1]-1; + if(hbool_item_ids[m]) h=m-1; + else l=m+1; + } + if(h<(int)matcher->L[k] || matcher->bool_item_ids[h]!=id) continue; + + for(unsigned int j=matcher->mapped_ptr[h]; jmapped_ptr[h+1]; j++) + { + if(ids_num==MAX_ARRAY_SIZE) return -1; + mapped_ids[ids_num++]=matcher->mapped_ids[j]; + } + } + + unsigned int * used_cells=matcher->thread_data[thread_id].used_cells; + unsigned int used_num=0; + for(unsigned int i=0; i>3); + } + + unsigned char * m_bitmap=matcher->thread_data[thread_id].multiexpr_bitmap; + unsigned int * s_bitmap=matcher->thread_data[thread_id].singlexpr_bitmap; + unsigned char * m_mask=matcher->multi_expr_mask; + for(unsigned int i=0; imulti_expr_num) + { + m_bitmap[used_cells[i]]=m_mask[used_cells[i]]; + } + else + { + unsigned int j=used_cells[i]-matcher->multi_expr_num; + s_bitmap[j>>5]&=~(1U<<(j&31)); + } + } + + for(unsigned int i=0; i>3); + if(xmulti_expr_num) + { + unsigned int y=(mapped_ids[i]&7); + if(m_mask[x]&(1U<multi_expr_num; + s_bitmap[j>>5]|=(1U<<(j&31)); + } + } + + unsigned int r=0; + void ** cached_results=matcher->thread_data[thread_id].cached_results; + + for(unsigned int i=0; imulti_expr_num) + { + if(m_bitmap[x]==(1U<multi_expr_size[x])-1) + { + if(rbool_expr_ids[x]; + } + } + else + { + unsigned int j=used_cells[i]-matcher->multi_expr_num; + if((s_bitmap[j>>5]&(1U<<(j&31)))!=0) + { + if(rbool_expr_ids[x]; + } + } + } + + sort(cached_results, cached_results+r); + + int I=0; + for(unsigned int J=0; Jbool_expr_ids; + delete [] matcher->multi_expr_size; + delete [] matcher->multi_expr_mask; + delete [] matcher->bool_item_ids; + delete [] matcher->mapped_ptr; + delete [] matcher->mapped_ids; + for(unsigned int i=0; imax_thread_num; i++) + { + delete [] matcher->thread_data[i].multiexpr_bitmap; + delete [] matcher->thread_data[i].singlexpr_bitmap; + } + delete [] matcher->thread_data; + delete matcher; + return; +} diff --git a/src/inc_internal/Maat_rule_internal.h b/src/inc_internal/Maat_rule_internal.h index 0c783fc..3075e63 100644 --- a/src/inc_internal/Maat_rule_internal.h +++ b/src/inc_internal/Maat_rule_internal.h @@ -108,6 +108,7 @@ struct db_group_rule_t int group_id; int compile_id; int is_valid; + int not_flag; }; struct op_expr_t { @@ -144,7 +145,8 @@ struct _Maat_group_inner_t struct _Maat_compile_inner_t { struct db_compile_rule_t *db_c_rule; - dynamic_array_t *groups; + dynamic_array_t *groups; //element is struct _Maat_group_inner_t* + char not_flag[MAX_ITEMS_PER_BOOL_EXPR]; int is_valid; int compile_id;//equal to db_c_rule->m_rule.config_id const struct Maat_table_desc* ref_table; @@ -234,6 +236,10 @@ struct ip_runtime long long ipv6_rule_cnt; }; +struct group_runtime +{ + long long not_flag_group; +}; struct Maat_table_runtime { enum MAAT_TABLE_TYPE table_type; @@ -244,6 +250,7 @@ struct Maat_table_runtime struct plugin_runtime plugin; struct expr_runtime expr; struct ip_runtime ip; + struct group_runtime group; void * other; }; mcore_long_t scan_cnt; @@ -266,10 +273,7 @@ struct _Maat_scanner_t long gie_total_q_size; struct Maat_table_runtime* table_rt[MAX_TABLE_NUM]; -/* - struct similar_runtime gie_aux[MAX_TABLE_NUM]; - struct plugin_runtime plugin_aux[MAX_TABLE_NUM]; -*/ + MESA_htable_handle region_hash; MESA_htable_handle group_hash; MESA_htable_handle compile_hash; @@ -283,7 +287,7 @@ struct _Maat_scanner_t void * bool_macher_expr_compiler; scan_result_t *region_rslt_buff; MESA_lqueue_head tomb_ref;//reference of g_feather->garbage_q -// struct _region_stat_t region_counter[MAX_TABLE_NUM]; + int max_thread_num; iconv_t iconv_handle[MAX_CHARSET_NUM][MAX_CHARSET_NUM];//iconv_handle[to][from] }; @@ -374,7 +378,7 @@ struct _Maat_feather_t int active_plugin_table_num; int is_last_plugin_table_updating; -//for stat>>>> +//for scanner independent stat>>>> int backgroud_update_enabled; screen_stat_handle_t stat_handle; int total_stat_id; diff --git a/src/inc_internal/bool_matcher.h b/src/inc_internal/bool_matcher.h new file mode 100644 index 0000000..619eea4 --- /dev/null +++ b/src/inc_internal/bool_matcher.h @@ -0,0 +1,56 @@ +/* + * + * Copyright (c) 2018 + * String Algorithms Research Group + * Institute of Information Engineering, Chinese Academy of Sciences (IIE-CAS) + * National Engineering Laboratory for Information Security Technologies (NELIST) + * All rights reserved + * + * Written by: LIU YANBING (liuyanbing@iie.ac.cn) + * Last modification: 2018-12-31 + * + * This code is the exclusive and proprietary property of IIE-CAS and NELIST. + * Usage for direct or indirect commercial advantage is not allowed without + * written permission from the authors. + * + */ + +#ifndef INCLUDE_BOOL_MATCHER_H +#define INCLUDE_BOOL_MATCHER_H +#include + +#ifdef __cplusplus +extern "C" +{ +#endif + #define MAX_ITEMS_PER_BOOL_EXPR 8 + + /*not_flag=0表示布尔项item_id必须出现;not_flag=1表示布尔项item_id不能出现*/ + struct bool_item + { + unsigned long long item_id; + unsigned char not_flag; + }; + + /*注意:不支持布尔项全“非”的情形*/ + struct bool_expr + { + void * user_tag; + size_t item_num; + struct bool_item items[MAX_ITEMS_PER_BOOL_EXPR]; + }; + + struct bool_matcher; + + /*注意:本函数调用会交换bool_exprs中元素的位置*/ + struct bool_matcher * bool_matcher_new(struct bool_expr * exprs, size_t expr_num, unsigned int max_thread_num, size_t * mem_size); + + int bool_matcher_match(struct bool_matcher * matcher, unsigned int thread_id, unsigned long long * item_ids, size_t item_num, void ** result, size_t size); + + void bool_matcher_free(struct bool_matcher * matcher); + +#ifdef __cplusplus +} +#endif + +#endif