diff --git a/src/entry/Maat_api.cpp b/src/entry/Maat_api.cpp index 82e3bb5..44da318 100644 --- a/src/entry/Maat_api.cpp +++ b/src/entry/Maat_api.cpp @@ -417,7 +417,7 @@ Maat_feather_t Maat_feather(int max_thread_num,const char* table_info_path,void* _Maat_feather_t* feather=(_Maat_feather_t*)calloc(sizeof(struct _Maat_feather_t),1); feather->table_cnt=read_table_info(feather->p_table_info, MAX_TABLE_NUM,table_info_path,max_thread_num,logger); feather->map_tablename2id=map_create(); - int i=0; + int i=0,j=0,ret=0; for(i=0;ip_table_info[i]!=NULL) @@ -426,7 +426,18 @@ Maat_feather_t Maat_feather(int max_thread_num,const char* table_info_path,void* { feather->GROUP_MODE_ON=1; } - map_register(feather->map_tablename2id,feather->p_table_info[i]->table_name,feather->p_table_info[i]->table_id); + for(j=0;jp_table_info[i]->conj_cnt;j++) + { + ret=map_register(feather->map_tablename2id,feather->p_table_info[i]->table_name[j],feather->p_table_info[i]->table_id); + if(ret<0) + { + MESA_handle_runtime_log(feather->logger,RLOG_LV_FATAL,maat_module , + "Duplicate table name %s of table id %d" + ,feather->p_table_info[i]->table_name[j] + ,feather->p_table_info[i]->table_id); + continue; + } + } } } feather->logger=logger; @@ -434,7 +445,9 @@ Maat_feather_t Maat_feather(int max_thread_num,const char* table_info_path,void* feather->garbage_q=MESA_lqueue_create(0,0); feather->effect_interval_ms=60*1000; feather->scan_interval_ms=1*1000; - feather->rule_scan_type=2; + //Prepare for comptetion,return no hit detail as default for temporary. + feather->rule_scan_type=0; + //feather->rule_scan_type=2; feather->thread_call_cnt=aligment_int64_array_alloc(max_thread_num); feather->outer_mid_cnt=aligment_int64_array_alloc(max_thread_num); feather->inner_mid_cnt=aligment_int64_array_alloc(max_thread_num); @@ -1044,6 +1057,10 @@ stream_para_t Maat_stream_scan_string_start(Maat_feather_t feather,int table_id, { return NULL; } + if(p_table->quick_expr_switch==1) + { + return NULL; + } struct _stream_para_t* sp=(struct _stream_para_t*)calloc(sizeof(struct _stream_para_t),1); scanner=_feather->scanner; sp->feather=_feather; @@ -1470,6 +1487,10 @@ int Maat_set_scan_status(Maat_feather_t feather,scan_status_t* mid,enum MAAT_SCA switch(type) { case MAAT_SET_SCAN_DISTRICT: + if(value==NULL||size<=0) + { + return -1; + } map_ret=map_str2int(_feather->scanner->district_map,(const char*)value,&(_mid->district_id)); if(map_ret<0) { diff --git a/src/entry/Maat_rule.cpp b/src/entry/Maat_rule.cpp index 65c5375..f460534 100644 --- a/src/entry/Maat_rule.cpp +++ b/src/entry/Maat_rule.cpp @@ -26,7 +26,7 @@ #include "mesa_fuzzy.h" #include "great_index_engine.h" -int MAAT_FRAME_VERSION_1_8_20160603=1; +int MAAT_FRAME_VERSION_1_8_20160909_TABLE_CONJ=1; const char *maat_module="MAAT Frame"; const char* CHARSET_STRING[]={"NONE","gbk","big5","unicode","utf8","bin", @@ -338,6 +338,42 @@ char *str_unescape_and(char*s) s[j]='\0'; return s; } +char* str_unescape(char* s) +{ + int i=0,j=0; + int len=strlen(s); + for(i=0,j=0;iconj_cnt=1; p->scan_cnt=aligment_int64_array_alloc(max_thread_num); p->scan_cpu_time=aligment_int64_array_alloc(max_thread_num); p->input_bytes=aligment_int64_array_alloc(max_thread_num); p->stream_num=aligment_int64_array_alloc(max_thread_num); p->hit_cnt=aligment_int64_array_alloc(max_thread_num); p->cross_cache_size=0; + p->quick_expr_switch=0; return p; } void destroy_table_info(struct _Maat_table_info_t*p) @@ -404,10 +442,11 @@ int read_table_info(struct _Maat_table_info_t** p_table_info,int num,const char* FILE*fp=NULL; char line[MAX_TABLE_LINE_SIZE]; int i=0,j=0,ret[4]={0},table_cnt=0; - char table_type[16],src_charset[256],dst_charset[256],merge[4]; + char table_type[16],src_charset[256],dst_charset[256],merge[4],quick_str_scan[32]={0}; MESA_htable_handle string2int_map=map_create(); char *token=NULL,*sub_token=NULL,*saveptr; struct _Maat_table_info_t*p=NULL; + struct _Maat_table_info_t*conj_table=NULL; map_register(string2int_map,"expr", TABLE_TYPE_EXPR); map_register(string2int_map,"ip", TABLE_TYPE_IP); @@ -417,6 +456,8 @@ int read_table_info(struct _Maat_table_info_t** p_table_info,int num,const char* map_register(string2int_map,"digest", TABLE_TYPE_DIGEST); map_register(string2int_map,"expr_plus", TABLE_TYPE_EXPR_PLUS); map_register(string2int_map,"group", TABLE_TYPE_GROUP); + map_register(string2int_map,"quickoff",0); + map_register(string2int_map,"quickon",1); for(i=0;i0) @@ -458,23 +499,30 @@ int read_table_info(struct _Maat_table_info_t** p_table_info,int num,const char* } p=create_table_info(max_thread_num); - sscanf(line,"%hu\t%s\t%s\t%s\t%s\t%s\t%d",&(p->table_id) - ,p->table_name + sscanf(line,"%hu\t%s\t%s\t%s\t%s\t%s\t%d\t%s",&(p->table_id) + ,p->table_name[0] ,table_type ,src_charset ,dst_charset ,merge - ,&(p->cross_cache_size)); + ,&(p->cross_cache_size) + ,quick_str_scan); ret[0]=map_str2int(string2int_map,strlwr(table_type),(int*)&(p->table_type)); ret[1]=map_str2int(string2int_map,strlwr(src_charset),(int*)&(p->src_charset)); ret[2]=map_str2int(string2int_map,strlwr(merge),&(p->do_charset_merge)); - for(j=0;j<3;j++) + if(strlen(quick_str_scan)>0) + { + ret[3]=map_str2int(string2int_map,strlwr(quick_str_scan),&(p->quick_expr_switch)); + } + memset(quick_str_scan,0,sizeof(quick_str_scan)); + + for(j=0;j<4;j++) { if(ret[j]<0) { fprintf(stderr,"Maat read table info %s line %d error:unknown column.\n",table_info_path,i); MESA_handle_runtime_log(logger, RLOG_LV_FATAL,maat_module, - "Maat read table info %s line %d error:unknown column.\n",table_info_path,i); + "Maat read table info %s line %d error:unknown column.",table_info_path,i); goto error_jump; } } @@ -497,25 +545,36 @@ int read_table_info(struct _Maat_table_info_t** p_table_info,int num,const char* { fprintf(stderr,"Maat read table info %s line %d error:unknown dest charset %s.\n",table_info_path,i,sub_token); MESA_handle_runtime_log(logger, RLOG_LV_FATAL,maat_module, - "Maat read table info %s line %d error: unknown dest charset %s.\n",table_info_path,i,sub_token); + "Maat read table info %s line %d error: unknown dest charset %s.",table_info_path,i,sub_token); goto error_jump; } } if(p->table_id>=num) { - fprintf(stderr,"Maat read table info %s line %d error: table id %uh > %d.\n",table_info_path,i,p->table_id,num); + fprintf(stderr,"Maat read table info %s:%d error: table id %uh > %d.\n",table_info_path,i,p->table_id,num); MESA_handle_runtime_log(logger, RLOG_LV_FATAL,maat_module, "Maat read table info %s line %d error: table id %uh > %d.\n",table_info_path,i,p->table_id,num); goto error_jump; } - if(p_table_info[p->table_id]!=NULL) + if(p_table_info[p->table_id]!=NULL)//duplicate table_id,means conjunction table; { - fprintf(stderr,"Maat read table info %s line %d error:duplicated table id %d.\n",table_info_path,i,p->table_id); - MESA_handle_runtime_log(logger, RLOG_LV_FATAL,maat_module, - "Maat read table info %s line %d error:duplicated table id %d.\n",table_info_path,i,p->table_id); - + conj_table=p_table_info[p->table_id]; + if(conj_table->conj_cnt==MAX_CONJUNCTION_TABLE_NUM) + { + MESA_handle_runtime_log(logger, RLOG_LV_FATAL,maat_module, + "Maat read table info %s line %d error:reach tableid %d conjunction upper limit." + ,table_info_path,i,p->table_id); + goto error_jump; + } + memcpy(conj_table->table_name[conj_table->conj_cnt],p->table_name[0],MAX_TABLE_NAME_LEN); + conj_table->conj_cnt++; + MESA_handle_runtime_log(logger, RLOG_LV_INFO,maat_module, + "Maat read table info %s:%d:conjunction %s with %s (id=%d,total=%d)." + ,table_info_path,i,p->table_name[0] + ,conj_table->table_name[0],conj_table->table_id,conj_table->conj_cnt); + //use goto to free the conjunctioned table_info goto error_jump; } if(p->table_type==TABLE_TYPE_PLUGIN) @@ -765,6 +824,7 @@ struct op_expr_t* create_op_expr(unsigned int expr_id,int operation,void* u_para struct op_expr_t* op_expr=NULL; op_expr=(struct op_expr_t*)calloc(sizeof(struct op_expr_t),1); op_expr->no_effect_convert_cnt=0; + op_expr->convert_failed=0; op_expr->p_expr=(boolean_expr_t*)calloc(sizeof(boolean_expr_t),1); op_expr->p_expr->expr_id=expr_id; op_expr->p_expr->operation=operation; @@ -837,9 +897,16 @@ void destroy_digest_rule(GIE_digest_t*rule) rule=NULL; return; } -struct _Maat_scanner_t* create_maat_scanner(unsigned int version,int scan_thread_num,MESA_lqueue_head tomb,int rs_scan_type) +struct _Maat_scanner_t* create_maat_scanner(unsigned int version,_Maat_feather_t *feather) { - int i=0; + int scan_thread_num=feather->scan_thread_num; + MESA_lqueue_head tomb=feather->garbage_q; +// int rs_scan_type=feather->rule_scan_type; + struct _Maat_table_info_t ** pp_table=feather->p_table_info; + + int i=0,j=0; + unsigned int sub_type=0; + int ret=0; MESA_htable_create_args_t hargs; memset(&hargs,0,sizeof(hargs)); @@ -880,14 +947,45 @@ struct _Maat_scanner_t* create_maat_scanner(unsigned int version,int scan_thread scanner->ref_cnt=aligment_int64_array_alloc(scan_thread_num); scanner->region_update_q=MESA_lqueue_create(0,0); scanner->region=rulescan_initialize(scan_thread_num); - rulescan_set_param(scanner->region,rs_scan_type); + + //For best performance test: + //1.Do NOT set this option,rulescan return no hit detail as default; + //2.Set necessary STR rule to QUICK; + //rulescan_set_param(scanner->region,RULESCAN_DETAIL_RESULT,); scanner->tomb_ref=tomb; scanner->region_rslt_buff=(scan_result_t*)malloc(sizeof(scan_result_t)*MAX_SCANNER_HIT_NUM*scan_thread_num); for(i=0;idigest_update_q[i]=MESA_lqueue_create(0,0); - pthread_rwlock_init(&(scanner->digest_rwlock[i]),NULL); + if(pp_table[i]==NULL) + { + continue; + } + switch(pp_table[i]->table_type) + { + case TABLE_TYPE_DIGEST: + scanner->digest_update_q[i]=MESA_lqueue_create(0,0); + pthread_rwlock_init(&(scanner->digest_rwlock[i]),NULL); + break; + case TABLE_TYPE_EXPR: + case TABLE_TYPE_EXPR_PLUS: + if(pp_table[i]->quick_expr_switch==1) + { + for(j=0;jdst_charset[j]!=CHARSET_NONE;j++) + { + sub_type=make_sub_type(pp_table[i]->table_id, pp_table[i]->dst_charset[j], pp_table[i]->do_charset_merge); + ret=rulescan_set_param(scanner->region,RULESCAN_QUICK_SCAN,&sub_type,sizeof(sub_type)); + assert(ret==1); + if(pp_table[i]->do_charset_merge==1) + { + break; + } + } + } + break; + default: + break; + } } return scanner; } @@ -905,6 +1003,8 @@ void destroy_maat_scanner(struct _Maat_scanner_t*scanner) MESA_htable_destroy(scanner->group_hash, NULL); MESA_htable_destroy(scanner->region_hash, NULL); map_destroy(scanner->district_map); + scanner->district_map=NULL; + assert(scanner->tmp_district_map==NULL); destroy_bool_matcher((void*)scanner->expr_compiler); q_cnt=MESA_lqueue_get_count(scanner->region_update_q); for(i=0;idigest_handle[i]); } + if(scanner->digest_update_q[i]==NULL) + { + continue; + } q_cnt=MESA_lqueue_get_count(scanner->digest_update_q[i]); for(j=0;jdistrict_map, district_str,&district_id); if(map_ret<0) { - district_id= scanner->district_num; - map_register(scanner->district_map,district_str, district_id); - scanner->district_num++; + if(scanner->tmp_district_map==NULL) + { + scanner->tmp_district_map=map_duplicate(scanner->district_map); + } + map_ret=map_str2int(scanner->tmp_district_map, district_str,&district_id); + if(map_ret<0) + { + district_id= scanner->district_num; + map_register(scanner->tmp_district_map,district_str, district_id); + scanner->district_num++; + } } return district_id; } @@ -1294,7 +1406,7 @@ int add_expr_rule(struct _Maat_table_info_t* table,struct db_str_rule_t* db_rule { MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , "Table %s region cfg %d is EXPR_TYPE_AND,but match method is not MATCH_METHOD_SUB,force fixed.", - table->table_name,db_rule->region_id); + table->table_name[table->updating_name],db_rule->region_id); db_rule->match_method=MATCH_METHOD_SUB; } @@ -1303,7 +1415,7 @@ int add_expr_rule(struct _Maat_table_info_t* table,struct db_str_rule_t* db_rule if(i>=MAAT_MAX_EXPR_ITEM_NUM) { MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , - "Table %s region cfg %d too many expr.",table->table_name,db_rule->region_id); + "Table %s region cfg %d too many expr.",table->table_name[table->updating_name],db_rule->region_id); return -1; } sub_key_array[i]=strtok_r_esc(p,'&',&saveptr); @@ -1311,7 +1423,7 @@ int add_expr_rule(struct _Maat_table_info_t* table,struct db_str_rule_t* db_rule { break; } - sub_key_array[i]=str_unescape_and(sub_key_array[i]); + sub_key_array[i]=str_unescape(sub_key_array[i]); } sub_expr_cnt=i; table->expr_rule_cnt++; @@ -1322,7 +1434,7 @@ int add_expr_rule(struct _Maat_table_info_t* table,struct db_str_rule_t* db_rule if(i>=MAAT_MAX_EXPR_ITEM_NUM) { MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , - "Table %s region cfg %d too many expr.",table->table_name,db_rule->region_id); + "Table %s region cfg %d too many expr.",table->table_name[table->updating_name],db_rule->region_id); return -1; } sub_key_array[i]=strtok_r_esc(p,'&',&saveptr); @@ -1334,18 +1446,18 @@ int add_expr_rule(struct _Maat_table_info_t* table,struct db_str_rule_t* db_rule if(!(key_left_offset[i]>=0&&key_right_offset[i]>0&&key_left_offset[i]table_name,db_rule->region_id); + "Table %s region cfg %d invalid offset.",table->table_name[table->updating_name],db_rule->region_id); return -1; } sub_key_array[i]=(char*)memchr(sub_key_array[i],':',strlen(sub_key_array[i])); if(sub_key_array[i]==NULL) { MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , - "Table %s region cfg %d invalid keywords format.",table->table_name,db_rule->region_id); + "Table %s region cfg %d invalid keywords format.",table->table_name[table->updating_name],db_rule->region_id); return -1; } sub_key_array[i]++;//jump over ':' - sub_key_array[i]=str_unescape_and(sub_key_array[i]); + sub_key_array[i]=str_unescape(sub_key_array[i]); } sub_expr_cnt=i; table->expr_rule_cnt++; @@ -1365,7 +1477,7 @@ int add_expr_rule(struct _Maat_table_info_t* table,struct db_str_rule_t* db_rule if(i>=MAAT_MAX_EXPR_ITEM_NUM) { MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , - "Table %s region cfg %d too many expr.",table->table_name,db_rule->region_id); + "Table %s region cfg %d too many expr.",table->table_name[table->updating_name],db_rule->region_id); return -1; } sub_key_array[i]=strtok_r_esc(p,'&',&saveptr); @@ -1373,7 +1485,7 @@ int add_expr_rule(struct _Maat_table_info_t* table,struct db_str_rule_t* db_rule { break; } - sub_key_array[i]=str_unescape_and(sub_key_array[i]); + sub_key_array[i]=str_unescape_and(sub_key_array[i]);//regex remain use str_unescape_and p_rule=create_rs_str_rule(make_sub_type(table->table_id,CHARSET_NONE,0) ,MATCH_METHOD_SUB//not care db_rule->match_method ,db_rule->is_case_sensitive @@ -1392,7 +1504,7 @@ int add_expr_rule(struct _Maat_table_info_t* table,struct db_str_rule_t* db_rule case EXPR_TYPE_STRING: sub_expr_cnt=1; sub_key_array[0]=db_rule->keywords; - sub_key_array[0]=str_unescape_and(sub_key_array[0]); + sub_key_array[0]=str_unescape(sub_key_array[0]); table->expr_rule_cnt++; break; default: @@ -1404,7 +1516,7 @@ int add_expr_rule(struct _Maat_table_info_t* table,struct db_str_rule_t* db_rule { MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , "Table %s region cfg %d has an empty sub string.", - table->table_name,db_rule->region_id); + table->table_name[table->updating_name],db_rule->region_id); //this sub string will jump over before iconv_convert } } @@ -1449,7 +1561,8 @@ int add_expr_rule(struct _Maat_table_info_t* table,struct db_str_rule_t* db_rule ,CHARSET_STRING[table->src_charset] ,CHARSET_STRING[dst_charset]); free(region_string); - continue; + op_expr->convert_failed++; + break; } if(region_str_len==(int)strlen(sub_key_array[k])&& 0==memcmp(sub_key_array[k],region_string,region_str_len)) @@ -1474,7 +1587,9 @@ int add_expr_rule(struct _Maat_table_info_t* table,struct db_str_rule_t* db_rule region_string=NULL; } //if each sub string's convert take no effect and src charset is one of the dst. - if(TRUE==table->src_charset_in_dst&&op_expr->no_effect_convert_cnt==sub_expr_cnt) + //if any sub expr convert failed + if((TRUE==table->src_charset_in_dst&&op_expr->no_effect_convert_cnt==sub_expr_cnt)|| + op_expr->convert_failed>0) { destroy_op_expr(op_expr); op_expr=NULL; @@ -1620,7 +1735,7 @@ int del_region_rule(struct _Maat_table_info_t* table,int region_id,int group_id, { MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , "update error,table %s group id %u not exist,while delete region id %d." - ,table->table_name + ,table->table_name[table->updating_name] ,group_id ,region_id); return -1; @@ -1632,7 +1747,7 @@ int del_region_rule(struct _Maat_table_info_t* table,int region_id,int group_id, MESA_handle_runtime_log(logger,RLOG_LV_INFO,maat_module , "region delete error,id %d table %s region not in group id %d." ,region_id - ,table->table_name + ,table->table_name[table->updating_name] ,group_id); return -1; } @@ -1667,7 +1782,7 @@ int del_region_rule(struct _Maat_table_info_t* table,int region_id,int group_id, MESA_handle_runtime_log(logger,RLOG_LV_INFO,maat_module , "last region rule of group id %d in table %s region id %d has been delete." ,group_id - ,table->table_name + ,table->table_name[table->updating_name] ,region_id); } return 1; @@ -1697,7 +1812,7 @@ int add_group_rule(struct _Maat_table_info_t* table,struct db_group_rule_t* db_g { MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module, "update error,add %s group %d to compile %d error,compile rule is full or duplicate group." - ,table->table_name + ,table->table_name[table->updating_name] ,db_group_rule->group_id ,db_group_rule->compile_id); return -1; @@ -1714,7 +1829,7 @@ void del_group_rule(struct _Maat_table_info_t* table,struct db_group_rule_t* db_ { MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , "update error,delete %s group rule error : compile id %d does not exisit." - ,table->table_name + ,table->table_name[table->updating_name] ,db_group_rule->compile_id); return; } @@ -1723,7 +1838,7 @@ void del_group_rule(struct _Maat_table_info_t* table,struct db_group_rule_t* db_ { MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , "update error,delete %s group rule error : group id %d not in compile id %d." - ,table->table_name + ,table->table_name[table->updating_name] ,db_group_rule->group_id ,db_group_rule->compile_id); return; @@ -1739,7 +1854,7 @@ void del_group_rule(struct _Maat_table_info_t* table,struct db_group_rule_t* db_ garbage_bagging(GARBAGE_GROUP_RULE, group_rule, scanner->tomb_ref); MESA_handle_runtime_log(logger,RLOG_LV_INFO,maat_module , "table %s group id %d been eternal delete." - ,table->table_name + ,table->table_name[table->updating_name] ,db_group_rule->group_id); } return; @@ -1770,7 +1885,7 @@ int del_compile_rule(struct _Maat_table_info_t* table,struct db_compile_rule_t* { MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , "update error,delete %s compile rule error : congfig id %d does not exisit." - ,table->table_name + ,table->table_name[table->updating_name] ,db_compile_rule->m_rule_head.config_id); return -1; } @@ -1807,7 +1922,7 @@ void update_group_rule(struct _Maat_table_info_t* table,const char* table_line,s if(ret<0) { MESA_handle_runtime_log(logger,RLOG_LV_INFO,maat_module , - "duplicate config of group table %s group_id %d compile_id %d.",table->table_name + "duplicate config of group table %s group_id %d compile_id %d.",table->table_name[table->conj_cnt] ,db_group_rule.group_id ,db_group_rule.compile_id); @@ -1847,7 +1962,7 @@ void update_expr_rule(struct _Maat_table_info_t* table,const char* table_line,st if(ret!=7) { MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , - "update error,invalid format of expr table %s:%s",table->table_name,table_line); + "update error,invalid format of expr table %s:%s",table->table_name[table->updating_name],table_line); free(maat_str_rule); maat_str_rule=NULL; return; @@ -1865,7 +1980,7 @@ void update_expr_rule(struct _Maat_table_info_t* table,const char* table_line,st if(ret!=8) { MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , - "update error,invalid format of expr_plus table %s:%s",table->table_name,table_line); + "update error,invalid format of expr_plus table %s:%s",table->table_name[table->updating_name],table_line); free(maat_str_rule); maat_str_rule=NULL; return; @@ -1891,12 +2006,13 @@ void update_expr_rule(struct _Maat_table_info_t* table,const char* table_line,st break; default: MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , - "update error,invalid hexbin value of expr table %s:%s",table->table_name,table_line); + "update error,invalid hexbin value of expr table %s:%s" + ,table->table_name[table->updating_name],table_line); goto error_out; } ret=sync_region(scanner->region_hash ,maat_str_rule->region_id - ,table->table_name + ,table->table_name[table->updating_name] ,maat_str_rule->is_valid,logger); if(ret<0) { @@ -1928,7 +2044,8 @@ void update_expr_rule(struct _Maat_table_info_t* table,const char* table_line,st if(ret<0) { MESA_handle_runtime_log(logger,RLOG_LV_INFO,maat_module , - "duplicate config of expr table %s region_id=%d",table->table_name,maat_str_rule->region_id); + "duplicate config of expr table %s region_id=%d" + ,table->table_name[table->updating_name],maat_str_rule->region_id); } else @@ -1946,7 +2063,6 @@ void update_ip_rule(struct _Maat_table_info_t* table,const char* table_line,stru char src_ip[40],mask_src_ip[40],dst_ip[40],mask_dst_ip[40]; unsigned short i_src_port,i_sport_mask,i_dst_port,i_dport_mask; - struct in6_addr v6_src_mask,v6_dst_mask; int protocol=0,direction=0; int ret=0; int ret_array[8]={1},i=0; @@ -1970,7 +2086,8 @@ void update_ip_rule(struct _Maat_table_info_t* table,const char* table_line,stru ||(direction!=0&&direction!=1)) { MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , - "update error,invalid format of ip table %s:%s",table->table_name,table_line); + "update error,invalid format of ip table %s:%s" + ,table->table_name[table->updating_name],table_line); goto error_out; } if(ip_rule->addr_type==4) @@ -1998,13 +2115,13 @@ void update_ip_rule(struct _Maat_table_info_t* table,const char* table_line,stru { ret_array[0]=inet_pton(AF_INET6,src_ip,&(ip_rule->ipv6_rule.saddr)); ipv6_ntoh(ip_rule->ipv6_rule.saddr); - ret_array[1]=inet_pton(AF_INET6,mask_src_ip,&(v6_src_mask)); - ip_rule->ipv6_rule.smask_bits=128-cnt_maskbits(v6_src_mask); - + ret_array[1]=inet_pton(AF_INET6,mask_src_ip,&(ip_rule->ipv6_rule.smask)); + ipv6_ntoh(ip_rule->ipv6_rule.smask); + ret_array[2]=inet_pton(AF_INET6,dst_ip,&(ip_rule->ipv6_rule.daddr)); ipv6_ntoh(ip_rule->ipv6_rule.daddr); - ret_array[3]=inet_pton(AF_INET6,mask_dst_ip,&(v6_dst_mask)); - ip_rule->ipv6_rule.dmask_bits=128-cnt_maskbits(v6_dst_mask); + ret_array[3]=inet_pton(AF_INET6,mask_dst_ip,&(ip_rule->ipv6_rule.dmask)); + ipv6_ntoh(ip_rule->ipv6_rule.dmask); ip_rule->ipv6_rule.min_sport=i_src_port&i_sport_mask; ip_rule->ipv6_rule.max_sport=(i_src_port&i_sport_mask)+(~i_sport_mask); @@ -2020,13 +2137,14 @@ void update_ip_rule(struct _Maat_table_info_t* table,const char* table_line,stru if(ret_array[i]<=0) { MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , - "update error,invalid format of ip table %s:%s",table->table_name,table_line); + "update error,invalid format of ip table %s:%s" + ,table->table_name[table->updating_name],table_line); goto error_out; } } ret=sync_region(scanner->region_hash ,ip_rule->region_id - ,table->table_name + ,table->table_name[table->updating_name] ,ip_rule->is_valid,logger); if(ret<0) { @@ -2065,7 +2183,8 @@ void update_ip_rule(struct _Maat_table_info_t* table,const char* table_line,stru if(ret<0) { MESA_handle_runtime_log(logger,RLOG_LV_INFO,maat_module , - "duplicate config of ip table %s config_id=%d",table->table_name,ip_rule->region_id); + "duplicate config of ip table %s config_id=%d" + ,table->table_name[table->updating_name],ip_rule->region_id); } else @@ -2099,12 +2218,13 @@ void update_intval_rule(struct _Maat_table_info_t* table,const char* table_line, if(ret!=5||intval_rule->intval.ubintval.lb) { MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , - "update error,invalid format of interval table %s:%s",table->table_name,table_line); + "update error,invalid format of interval table %s:%s" + ,table->table_name[table->updating_name],table_line); goto error_out; } ret=sync_region(scanner->region_hash ,intval_rule->region_id - ,table->table_name + ,table->table_name[table->updating_name] ,intval_rule->is_valid,logger); if(ret<0) { @@ -2135,7 +2255,8 @@ void update_intval_rule(struct _Maat_table_info_t* table,const char* table_line, if(ret<0) { MESA_handle_runtime_log(logger,RLOG_LV_INFO,maat_module , - "duplicate config of intval table %s config_id=%d",table->table_name,intval_rule->region_id); + "duplicate config of intval table %s config_id=%d" + ,table->table_name[table->updating_name],intval_rule->region_id); } else { @@ -2166,7 +2287,8 @@ void update_compile_rule(struct _Maat_table_info_t* table,const char* table_line if((ret!=8&&ret!=9)||strlen(user_region)>MAX_SERVICE_DEFINE_LEN||p_compile->declare_grp_num>MAAT_MAX_EXPR_ITEM_NUM) { MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , - "update error,invalid format of compile table %s:%s",table->table_name,table_line); + "update error,invalid format of compile table %s:%s" + ,table->table_name[table->updating_name],table_line); free(p_compile); p_compile=NULL; return; @@ -2193,7 +2315,8 @@ void update_compile_rule(struct _Maat_table_info_t* table,const char* table_line if(ret<0) { MESA_handle_runtime_log(logger,RLOG_LV_INFO,maat_module , - "duplicate config of compile table %s config_id=%d",table->table_name,p_m_rule->config_id); + "duplicate config of compile table %s config_id=%d" + ,table->table_name[table->updating_name],p_m_rule->config_id); free(p_compile->service_defined); p_compile->service_defined=NULL; free(p_compile); @@ -2225,12 +2348,13 @@ void update_digest_rule(struct _Maat_table_info_t* table,const char* table_line, if(ret!=6||digest_rule->confidence_degree>10||digest_rule->confidence_degree<0) { MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , - "update error,invalid format of digest table %s:%s",table->table_name,table_line); + "update error,invalid format of digest table %s:%s" + ,table->table_name[table->updating_name],table_line); goto error_out; } ret=sync_region(scanner->region_hash ,digest_rule->region_id - ,table->table_name + ,table->table_name[table->updating_name] ,digest_rule->is_valid,logger); if(ret<0) { @@ -2261,7 +2385,8 @@ void update_digest_rule(struct _Maat_table_info_t* table,const char* table_line, if(ret<0) { MESA_handle_runtime_log(logger,RLOG_LV_INFO,maat_module , - "duplicate config of intval table %s config_id=%d",table->table_name,digest_rule->region_id); + "duplicate config of intval table %s config_id=%d" + ,table->table_name[table->updating_name],digest_rule->region_id); } else { @@ -2337,6 +2462,9 @@ void garbage_bury(MESA_lqueue_head garbage_q,void *logger) case GARBAGE_BOOL_MATCHER: destroy_bool_matcher(bag->bool_matcher); break; + case GARBAGE_MAP_STR2INT: + map_destroy(bag->str2int_map); + break; default: assert(0); } @@ -2378,6 +2506,7 @@ void plugin_table_callback(struct _Maat_table_info_t* table,const char* table_li void do_scanner_update(struct _Maat_scanner_t* scanner,MESA_lqueue_head garbage_q,int scan_thread_num,void* logger) { void *tmp1=NULL,*tmp2=NULL; + MESA_htable_handle tmp_map=NULL; int i=0; long q_cnt; GIE_create_para_t para; @@ -2399,6 +2528,10 @@ void do_scanner_update(struct _Maat_scanner_t* scanner,MESA_lqueue_head garbage_ logger); for(i=0;idigest_update_q[i]==NULL) + { + continue; + } q_cnt=MESA_lqueue_get_count(scanner->digest_update_q[i]); if(q_cnt==0) { @@ -2412,6 +2545,13 @@ void do_scanner_update(struct _Maat_scanner_t* scanner,MESA_lqueue_head garbage_ digest_batch_update(scanner->digest_handle[i], scanner->digest_update_q[i],logger); pthread_rwlock_unlock(&(scanner->digest_rwlock[i])); } + if(scanner->tmp_district_map!=NULL) + { + tmp_map=scanner->district_map; + scanner->district_map=scanner->tmp_district_map; + scanner->tmp_district_map=NULL; + garbage_bagging(GARBAGE_MAP_STR2INT, tmp_map, garbage_q); + } scanner->last_update_time=time(NULL); return; @@ -2425,7 +2565,7 @@ void maat_start_cb(unsigned int new_version,int update_type,void*u_para) int i=0,j=0; if(update_type==CM_UPDATE_TYPE_FULL) { - feather->update_tmp_scanner=create_maat_scanner(new_version,feather->scan_thread_num,feather->garbage_q,feather->rule_scan_type); + feather->update_tmp_scanner=create_maat_scanner(new_version,feather); MESA_handle_runtime_log(feather->logger,RLOG_LV_INFO,maat_module, "Full config version %u -> %u update start", feather->maat_version,new_version); @@ -2438,7 +2578,7 @@ void maat_start_cb(unsigned int new_version,int update_type,void*u_para) feather->maat_version,new_version); feather->maat_version=new_version; } - for(i=0;itable_cnt;i++) + for(i=0;ip_table_info[i]; if(p_table==NULL||p_table->table_type!=TABLE_TYPE_PLUGIN) @@ -2530,9 +2670,10 @@ void maat_finish_cb(void* u_para) void maat_update_cb(const char* table_name,const char* line,void *u_para) { struct _Maat_feather_t *feather=(struct _Maat_feather_t *)u_para; - int ret=-1; + int ret=-1,i=0; int table_id=-1; _Maat_scanner_t* scanner=NULL; + struct _Maat_table_info_t* p_table=NULL; if(feather->update_tmp_scanner!=NULL) { scanner=feather->update_tmp_scanner; @@ -2547,6 +2688,16 @@ void maat_update_cb(const char* table_name,const char* line,void *u_para) MESA_handle_runtime_log(feather->logger,RLOG_LV_INFO,maat_module ,"update warning,unknown table name %s",table_name); return; } + p_table=feather->p_table_info[table_id]; + for(i=0;iconj_cnt;i++) + { + if(0==memcmp(p_table->table_name[i],table_name,strlen(table_name))) + { + p_table->updating_name=i; + } + } + assert(i<=p_table->conj_cnt); + switch(feather->p_table_info[table_id]->table_type) { case TABLE_TYPE_EXPR: diff --git a/src/entry/Maat_rule_internal.h b/src/entry/Maat_rule_internal.h index 0dcdbda..9be0022 100644 --- a/src/entry/Maat_rule_internal.h +++ b/src/entry/Maat_rule_internal.h @@ -35,6 +35,7 @@ typedef int atomic_t; #define FALSE 0 #define MAX_TABLE_NUM 256 +#define MAX_CONJUNCTION_TABLE_NUM 8 #define MAX_CHARSET_NUM 16 #define MAX_TABLE_NAME_LEN 256 #define MAX_TABLE_LINE_SIZE (1024*4) @@ -167,6 +168,7 @@ struct op_expr_t { boolean_expr_t* p_expr; scan_rule_t* p_rules[MAAT_MAX_EXPR_ITEM_NUM]; + int convert_failed; int no_effect_convert_cnt; }; @@ -222,7 +224,9 @@ struct _plugin_table_info struct _Maat_table_info_t { unsigned short table_id; - char table_name[MAX_TABLE_NAME_LEN]; + unsigned short conj_cnt; + unsigned short updating_name; + char table_name[MAX_CONJUNCTION_TABLE_NUM][MAX_TABLE_NAME_LEN]; enum MAAT_TABLE_TYPE table_type; enum MAAT_CHARSET src_charset; enum MAAT_CHARSET dst_charset[MAX_CHARSET_NUM]; @@ -230,6 +234,7 @@ struct _Maat_table_info_t int do_charset_merge; int cfg_num; int cross_cache_size; + int quick_expr_switch; union { int expr_rule_cnt; //expr_type=0,1,3 @@ -271,7 +276,8 @@ enum maat_garbage_type GARBAGE_SCANNER=0, GARBAGE_GROUP_RULE, GARBAGE_COMPILE_RULE, - GARBAGE_BOOL_MATCHER + GARBAGE_BOOL_MATCHER, + GARBAGE_MAP_STR2INT }; struct iconv_handle_t { @@ -310,6 +316,7 @@ struct _Maat_scanner_t MESA_htable_handle group_hash; MESA_htable_handle compile_hash; MESA_htable_handle district_map; + MESA_htable_handle tmp_district_map; unsigned int district_num; unsigned int cfg_num; unsigned int exprid_generator; @@ -366,6 +373,7 @@ struct _maat_garbage_t struct _Maat_compile_rule_t* compile_rule; void* bool_matcher; void * raw; + MESA_htable_handle str2int_map; }; }; void garbage_bagging(enum maat_garbage_type type,void *p,MESA_lqueue_head garbage_q); diff --git a/src/entry/Maat_stat.cpp b/src/entry/Maat_stat.cpp index 6a32535..60f8fdd 100644 --- a/src/entry/Maat_stat.cpp +++ b/src/entry/Maat_stat.cpp @@ -23,11 +23,13 @@ enum MAAT_FS_COLUMN COLUMN_TABLE_CPU_TIME,//microseconds COLUMN_TABLE_HIT_CNT, }; +#define MAX_CONJ_NAME_LEN 22 void maat_stat_init(struct _Maat_feather_t* feather) { int value=0; - int i=0; + int i=0,j=0,offset=0; struct _Maat_table_info_t* p_table=NULL; + char conj_table_name[(MAX_TABLE_NAME_LEN+1)*MAX_CONJUNCTION_TABLE_NUM]={0}; feather->stat_handle=FS_create_handle(); FS_set_para(feather->stat_handle, OUTPUT_DEVICE, feather->stat_file, strlen(feather->stat_file)+1); @@ -94,7 +96,20 @@ void maat_stat_init(struct _Maat_feather_t* feather) { continue; } - p_table->stat_line_id=FS_register(feather->stat_handle, FS_STYLE_LINE, FS_CALC_CURRENT,p_table->table_name); + offset=0; + for(j=0;jconj_cnt;j++) + { + offset+=snprintf(conj_table_name+offset,sizeof(conj_table_name)-offset + ,"%s/",p_table->table_name[j]); + } + conj_table_name[offset-1]='\0';//delete the last slash + if(strlen(conj_table_name)>MAX_CONJ_NAME_LEN) + { + snprintf(conj_table_name+MAX_CONJ_NAME_LEN-2,sizeof(conj_table_name)-MAX_CONJ_NAME_LEN-2,".."); + } + p_table->stat_line_id=FS_register(feather->stat_handle + , FS_STYLE_LINE, FS_CALC_CURRENT + ,conj_table_name); } FS_start(feather->stat_handle); return; diff --git a/src/entry/Makefile b/src/entry/Makefile index 4b72255..65c5b52 100644 --- a/src/entry/Makefile +++ b/src/entry/Makefile @@ -4,10 +4,13 @@ CC = gcc CCC = g++ CFLAGS = -Wall -g -fPIC +GCOV_FLAGS = -fprofile-arcs -ftest-coverage CFLAGS += $(OPTFLAGS) +#CFLAGS += $(GCOV_FLAGS) LDDICTATOR = -Wl,-wrap,malloc -Wl,-wrap,calloc -Wl,-wrap,free -Wl,-wrap,realloc -LDFLAGS = -lMESA_handle_logger -lMESA_htable -lpthread -lrt -lm -lrulescan -lpcre -lMESA_field_stat2 +LDFLAGS = -lMESA_handle_logger -lMESA_htable -lpthread -lrt -lm -lrulescan -lpcre -lMESA_field_stat2 -lgcov #LDFLAGS += $(LDDICTATOR) +LDFLAGS += $(GCOV_FLAGS) MAILLIB = ../lib G_H_DIR =../inc_internal @@ -34,7 +37,7 @@ $(LIBMAAT): $(OBJS) cp $(LIBMAAT) ../../lib/ clean: - rm -rf *.o $(LIBMAAT) *~ + rm -rf *.o *.gcov *.gcno $(LIBMAAT) *~ opt: $(MAKE) all diff --git a/src/entry/map_str2int.cpp b/src/entry/map_str2int.cpp index 2e8752e..f8cf554 100644 --- a/src/entry/map_str2int.cpp +++ b/src/entry/map_str2int.cpp @@ -1,3 +1,4 @@ +#include #include void map_tmp_free(void* ptr) @@ -22,7 +23,7 @@ MESA_htable_handle map_create(void) MESA_htable_handle string2int_map; MESA_htable_create_args_t hargs; memset(&hargs,0,sizeof(hargs)); - hargs.thread_safe=8; + hargs.thread_safe=0; hargs.hash_slot_size = 4*1024; hargs.max_elem_num = 0; hargs.eliminate_type = HASH_ELIMINATE_ALGO_FIFO; @@ -74,4 +75,23 @@ int map_str2int(MESA_htable_handle handle,const char* string,int* value) return -1; } } +void duplicate_func(const uchar * key, uint size, void * data, void * user) +{ + MESA_htable_handle target=(MESA_htable_handle)user; + int value=*(int*)data; + int ret=0; + char* string=(char*)calloc(sizeof(char),(size+1)); + memcpy(string,key,size); + ret=map_register(target,string,value); + assert(ret>0); +} +MESA_htable_handle map_duplicate(MESA_htable_handle origin_map) +{ + int ret=0; + MESA_htable_handle target=NULL; + target=map_create(); + ret=MESA_htable_iterate(origin_map, duplicate_func, target); + assert(ret==0); + return target; +} diff --git a/src/entry/map_str2int.h b/src/entry/map_str2int.h index 4652ee9..132a1e0 100644 --- a/src/entry/map_str2int.h +++ b/src/entry/map_str2int.h @@ -4,5 +4,6 @@ MESA_htable_handle map_create(void); void map_destroy(MESA_htable_handle p); int map_register(MESA_htable_handle handle,const char* string,int value); int map_str2int(MESA_htable_handle handle,const char* string,int* value); +MESA_htable_handle map_duplicate(MESA_htable_handle origin_map); #endif diff --git a/src/entry/mesa_fuzzy.c b/src/entry/mesa_fuzzy.c index 88a1b6c..b529ad9 100644 --- a/src/entry/mesa_fuzzy.c +++ b/src/entry/mesa_fuzzy.c @@ -11,20 +11,12 @@ //#define DEBUG_PRINT #define INIT_SIZE 128 #define ENTROPY_THRESHOLD 0.5 -int count = 0; -const char * b64 = + +const char * sfh_b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - -struct entry -{ - unsigned int * r_array; - unsigned int r_index; - unsigned int r_size; -}; - double get_rs_entropy(unsigned int * r_array, unsigned int r_index); -int cmp(const void * a, const void * b); +int loop_cmp(const void * a, const void * b); void sfh_rs_entropy(IVI_seg_t * seg, void * user_para); void sfh_tune_simulation(IVI_seg_t * seg, void * user_para); @@ -339,7 +331,7 @@ unsigned int segment_overlap(fuzzy_handle_inner_t * _handle, unsigned int size, return effective_length; } -int cmp(const void * a, const void * b) +int loop_cmp(const void * a, const void * b) { unsigned int tmp_a = *(unsigned int *)a; unsigned int tmp_b = *(unsigned int *)b; @@ -359,7 +351,7 @@ int cmp(const void * a, const void * b) double get_rs_entropy(unsigned int * r_array, unsigned int r_index) { - qsort(r_array, r_index, sizeof(unsigned int), cmp); + qsort(r_array, r_index, sizeof(unsigned int), loop_cmp); unsigned int current_r = r_array[0]; unsigned int * tmp_r = r_array; unsigned int count = 0; @@ -626,17 +618,17 @@ void sfh_output_state(IVI_seg_t * seg, void * user_para) memset(hash_result,0,sizeof(hash_result)); if(seg->left == 0) { - hash_result[j] = b64[zt_hash_code(&(node->ps)) & 0x3F]; + hash_result[j] = sfh_b64[zt_hash_code(&(node->ps)) & 0x3F]; j++; } for(i = 0; i < node->s_cnt; i++,j++) { - hash_result[j] = b64[(node->s_array[i].val) & 0x3F]; + hash_result[j] = sfh_b64[(node->s_array[i].val) & 0x3F]; } hash_result[j+1]='\0'; if(0!=memcmp(&(node->s_state),ZT_INIT_VAL,sizeof(ZT_INIT_VAL))) { - result->last_char=b64[zt_hash_code(&(node->s_state)) & 0x3F]; + result->last_char=sfh_b64[zt_hash_code(&(node->s_state)) & 0x3F]; } else { diff --git a/src/entry/zt_hash.h b/src/entry/zt_hash.h index 8acf0be..a8b45a7 100644 --- a/src/entry/zt_hash.h +++ b/src/entry/zt_hash.h @@ -10,7 +10,7 @@ struct zt_state_t }; }; -const unsigned char table[256][4] = +const unsigned char table_char2matrix[256][4] = { {76,28,128,81},{76,204,128,209},{204,128,209,81},{204,76,209,128},{238,209,196,115},{238,63,196,183},{63,209,183,115},{63,238,183,196},{230,196,193,123},{230,34,193,186},{34,196,186,123},{34,230,186,193},{0,183,175,76},{0,183,175,227},{183,183,227,76},{183,0,227,175},{228,193,192,121},{228,37,192,185},{37,193,185,121},{37,228,185,192},{18,186,164,75},{18,168,164,239},{168,186,239,75},{168,18,239,164},{15,175,169,67},{15,160,169,234},{160,175,234,67},{160,15,234,169},{151,227,247,108},{151,116,247,155},{116,227,155,108},{116,151,155,247},{228,192,193,121},{228,36,193,184},{36,192,184,121},{36,228,184,193},{22,185,167,74},{22,175,167,237},{175,185,237,74},{175,22,237,167},{30,164,162,71},{30,186,162,229},{186,164,229,71},{186,30,229,162},{136,239,250,107},{136,103,250,145},{103,239,145,107},{103,136,145,250},{12,169,169,64},{12,165,169,233},{165,169,233,64},{165,12,233,169},{138,234,251,105},{138,96,251,146},{96,234,146,105},{96,138,146,251},{159,247,243,100},{159,104,243,151},{104,247,151,100},{104,159,151,243},{71,155,133,95},{71,220,133,218},{220,155,218,95},{220,71,218,133},{230,193,196,123},{230,39,196,191},{39,193,191,123},{39,230,191,196},{20,184,160,73},{20,172,160,233},{172,184,233,73},{172,20,233,160},{25,167,167,69},{25,190,167,226},{190,167,226,69},{190,25,226,167},{141,237,253,104},{141,96,253,149},{96,237,149,104},{96,141,149,253},{30,162,164,71},{30,188,164,227},{188,162,227,71},{188,30,227,164},{144,229,240,109},{144,117,240,157},{117,229,157,109},{117,144,157,240},{130,250,251,97},{130,120,251,154},{120,250,154,97},{120,130,154,251},{84,145,137,88},{84,197,137,209},{197,145,209,88},{197,84,209,137},{15,169,175,67},{15,166,175,236},{166,169,236,67},{166,15,236,175},{143,233,253,106},{143,102,253,151},{102,233,151,106},{102,143,151,253},{130,251,250,97},{130,121,250,155},{121,251,155,97},{121,130,155,250},{80,146,138,89},{80,194,138,211},{194,146,211,89},{194,80,211,138},{159,243,247,100},{159,108,247,147},{108,243,147,100},{108,159,147,247},{87,151,137,91},{87,192,137,210},{192,151,210,91},{192,87,210,137},{72,133,133,80},{72,205,133,213},{205,133,213,80},{205,72,213,133},{246,218,207,117},{246,44,207,186},{44,218,186,117},{44,246,186,207},{238,196,209,115},{238,42,209,162},{42,196,162,115},{42,238,162,209},{24,191,191,68},{24,167,191,251},{167,191,251,68},{167,24,251,191},{20,160,184,73},{20,180,184,241},{180,160,241,73},{180,20,241,184},{134,233,236,99},{134,111,236,143},{111,233,143,99},{111,134,143,236},{22,167,185,74},{22,177,185,243},{177,167,243,74},{177,22,243,185},{156,226,227,103},{156,126,227,132},{126,226,132,103},{126,156,132,227},{143,253,233,106},{143,114,233,131},{114,253,131,106},{114,143,131,233},{95,149,147,87},{95,202,147,196},{202,149,196,87},{202,95,196,147},{18,164,186,75},{18,182,186,241},{182,164,241,75},{182,18,241,186},{156,227,226,103},{156,127,226,133},{127,227,133,103},{127,156,133,226},{144,240,229,109},{144,96,229,136},{96,240,136,109},{96,144,136,229},{74,157,155,82},{74,215,155,201},{215,157,201,82},{215,74,201,155},{138,251,234,105},{138,113,234,131},{113,251,131,105},{113,138,131,234},{72,154,154,81},{72,210,154,203},{210,154,203,81},{210,72,203,154},{87,137,151,91},{87,222,151,204},{222,137,204,91},{222,87,204,151},{231,209,213,122},{231,54,213,175},{54,209,175,122},{54,231,175,213},{0,175,183,76},{0,175,183,251},{175,175,251,76},{175,0,251,183},{134,236,233,99},{134,106,233,138},{106,236,138,99},{106,134,138,233},{141,253,237,104},{141,112,237,133},{112,253,133,104},{112,141,133,237},{89,151,151,85},{89,206,151,194},{206,151,194,85},{206,89,194,151},{136,250,239,107},{136,114,239,132},{114,250,132,107},{114,136,132,239},{74,155,157,82},{74,209,157,207},{209,155,207,82},{209,74,207,157},{80,138,146,89},{80,218,146,203},{218,138,203,89},{218,80,203,146},{226,211,210,121},{226,49,210,171},{49,211,171,121},{49,226,171,210},{151,247,227,108},{151,96,227,143},{96,247,143,108},{96,151,143,227},{95,147,149,87},{95,204,149,194},{204,147,194,87},{204,95,194,149},{84,137,145,88},{84,221,145,201},{221,137,201,88},{221,84,201,145},{226,210,211,121},{226,48,211,170},{48,210,170,121},{48,226,170,211},{71,133,155,95},{71,194,155,196},{194,133,196,95},{194,71,196,155},{231,213,209,122},{231,50,209,171},{50,213,171,122},{50,231,171,209},{246,207,218,117},{246,57,218,175},{57,207,175,117},{57,246,175,218},{28,186,186,69},{28,166,186,255},{166,186,255,69},{166,28,255,186} }; @@ -124,7 +124,7 @@ static inline void zt_hash_arymul(struct zt_state_t * a, struct zt_state_t* b) } /* - ** this function is used to create the table[4][256] + ** this function is used to create the table_char2matrix[4][256] */ /*void convert(int number, unsigned char * ret) { @@ -172,26 +172,26 @@ static inline void zt_hash_arymul(struct zt_state_t * a, struct zt_state_t* b) /* - ** this function is used to create table[4][256] + ** this function is used to create table_char2matrix[4][256] */ /*void zt_hash_create_table() { unsigned char ret[4]={0}; int i = 0; FILE * fp; - fp = fopen("/home/lixiang/zt_hash/table.txt","a"); + fp = fopen("/home/lixiang/zt_hash/table_char2matrix.txt","a"); //galois_create_mult_tables(8); //it should not be a comment for(i = 0; i < 256; i++) { convert(i, ret); - table[i].matrix[0] = ret[0]; - table[i].matrix[1] = ret[1]; - table[i].matrix[2] = ret[2]; - table[i].matrix[3] = ret[3]; - fprintf(fp, "{%d,%d,%d,%d},", table[i].matrix[0], - table[i].matrix[1], - table[i].matrix[2], - table[i].matrix[3]); + table_char2matrix[i].matrix[0] = ret[0]; + table_char2matrix[i].matrix[1] = ret[1]; + table_char2matrix[i].matrix[2] = ret[2]; + table_char2matrix[i].matrix[3] = ret[3]; + fprintf(fp, "{%d,%d,%d,%d},", table_char2matrix[i].matrix[0], + table_char2matrix[i].matrix[1], + table_char2matrix[i].matrix[2], + table_char2matrix[i].matrix[3]); } } @@ -207,7 +207,7 @@ void zt_hash_destroy_table() inline void zt_hash(struct zt_state_t* array, unsigned char c) { - zt_hash_arymul(array, (struct zt_state_t *)(table[c])); + zt_hash_arymul(array, (struct zt_state_t *)(table_char2matrix[c])); } unsigned char ZT_INIT_VAL[4]={1,0,0,1}; diff --git a/src/inc_internal/rulescan.h b/src/inc_internal/rulescan.h index 8ee4b80..a2644b8 100644 --- a/src/inc_internal/rulescan.h +++ b/src/inc_internal/rulescan.h @@ -7,7 +7,7 @@ * All rights reserved * * Written by: LIU YANBING (liuyanbing@iie.ac.cn) - * Last modification: 2015-02-03 + * Last modification: 2016-06-05 * * This code is the exclusive and proprietary property of IIE-CAS and NELIST. * Usage for direct or indirect commercial advantage is not allowed without @@ -23,12 +23,12 @@ extern "C" { #endif - /* 定义不同的扫描类型 */ - enum ScanType + /* rulescan_set_param函数可设置的参数类型 */ + enum RULESCAN_PARA_NAME { - SCANTYPE_DEFAULT = 0, - SCANTYPE_DETAIL_RESULT = 1, - SCANTYPE_REGEX_GROUP = 2 + RULESCAN_DETAIL_RESULT=1, /* 本标志位表示:返回详细命中位置等信息, optval设为NULL,optlen设为0。默认不返回详细信息*/ + RULESCAN_REGEX_GROUP =2, /* 本标志位表示:返回正则表达式匹配的分组信息;开启本字段,需要先设置RULESCAN_DETAIL_RESULT标志位,optval设为NULL,optlen设为0。默认不返回分组信息 */ + RULESCAN_QUICK_SCAN /* 设置需要快速扫描模式, 由用户自己定义,optval设为NULL,optlen设为0。默认设置是普通扫描模式。*/ }; #define MAX_REGEX_GROUP_NUM 5 /* 对于正则表达式,所支持的最大分组的个数 */ @@ -84,9 +84,9 @@ extern "C" typedef struct _ipv6_rule_t { unsigned int saddr[4]; /* 源IP地址;0表示忽略本字段 */ - unsigned char smask_bits;/* 源IP地址掩码位数;0表示固定IP=saddr */ + unsigned int smask[4]; /* 源IP地址掩码;0表示固定IP=saddr */ unsigned int daddr[4]; /* 目的IP地址;0表示忽略本字段 */ - unsigned char dmask_bits;/* 目的IP地址掩码位数;0表示固定IP=daddr */ + unsigned int dmask[4]; /* 目的IP地址掩码;0表示固定IP=daddr */ unsigned short int min_sport; /* 源端口范围下界;0表示忽略本字段 */ unsigned short int max_sport; /* 源端口范围上界;0表示固定端口=min_sport */ unsigned short int min_dport; /* 目的端口范围下界;0表示忽略本字段 */ @@ -208,14 +208,16 @@ extern "C" /* 功能: - 设置扫描参数,本函数在rulescan_update之前可多次调用,每次设置一种扫描类型 + 设置扫描参数,本函数在rulescan_update之前可多次调用,每次设置一种扫描类型。Rulescan中默认不返回命中位置等详细信息 参数: instance[in]: 扫描器对象指针; - scan_type_flag[in]: 扫描类型的标志信息,0代表不返回位置等信息;1代表返回位置等信息,但是不返回正则分组信息;2代表返回位置信息和正则分组信息 + optname [in]: 参数类型; + optval [in]: optval和optlen表示参数的具体内容; + optlen [in]: optval和optlen表示参数的具体内容。 返回值: 1:正确设置,-1:设置失败。 */ - int rulescan_set_param(void * instance, int scan_type_flag); + int rulescan_set_param(void * instance, enum RULESCAN_PARA_NAME optname, const void * optval, unsigned int optlen); /* 功能:动态注册一组与表达式,更新扫描器对象。对于同一个instance句柄,不允许同时有多个更新线程。 diff --git a/test/conf/This folder is used to set rulescan scan para.txt b/test/conf/This folder is used to set rulescan scan para.txt new file mode 100644 index 0000000..e69de29 diff --git a/test/conf/config.txt b/test/conf/config.txt new file mode 100644 index 0000000..5a2112e --- /dev/null +++ b/test/conf/config.txt @@ -0,0 +1,30 @@ +[URL50] +group_num = {2} +group_algor_0 = {AC} +group_algor_1 = {KRF} +group_len_0 = {11} +krf_scale_bits = {5} +[URL100] +group_num = {2} +group_algor_0 = {AC} +group_algor_1 = {KRF} +group_len_0 = {11} +krf_scale_bits = {4} +[URL200] +group_num = {2} +group_algor_0 = {AC} +group_algor_1 = {KRF} +group_len_0 = {11} +krf_scale_bits = {3} +[URL500] +group_num = {2} +group_algor_0 = {AC} +group_algor_1 = {KRF} +group_len_0 = {11} +krf_scale_bits = {3} +[URL1000] +group_num = {2} +group_algor_0 = {AC} +group_algor_1 = {KRF} +group_len_0 = {10} +krf_scale_bits = {5} diff --git a/test/maat_json.json b/test/maat_json.json index b133216..0804d84 100644 --- a/test/maat_json.json +++ b/test/maat_json.json @@ -297,6 +297,88 @@ ] } ] + }, + { + "compile_id": 132, + "service": 1, + "action": 1, + "do_blacklist": 1, + "do_log": 1, + "effective_rage": 0, + "user_region": "string\\bunescape", + "is_valid": "yes", + "groups": [ + { + "group_name": "group_12", + "regions": [ + { + "table_name": "KEYWORDS_TABLE", + "table_type": "string", + "table_content": { + "keywords": "Take\\bme\\bHome&Batman\\", + "expr_type": "and", + "match_method": "sub", + "format": "uncase plain" + } + } + ] + } + ] + }, + { + "compile_id": 133, + "service": 1, + "action": 1, + "do_blacklist": 1, + "do_log": 1, + "effective_rage": 0, + "user_region": "table_conjunction_test_part1", + "is_valid": "yes", + "groups": [ + { + "group_name": "group_13", + "regions": [ + { + "table_name": "HTTP_HOST", + "table_type": "string", + "table_content": { + "keywords": "www.3300av.com", + "expr_type": "none", + "match_method": "sub", + "format": "uncase plain" + } + } + ] + } + ] + } +, + { + "compile_id": 134, + "service": 1, + "action": 1, + "do_blacklist": 1, + "do_log": 1, + "effective_rage": 0, + "user_region": "table_conjunction_test_part2", + "is_valid": "yes", + "groups": [ + { + "group_name": "group_14", + "regions": [ + { + "table_name": "HTTP_URL", + "table_type": "string", + "table_content": { + "keywords": "novel&27122.txt", + "expr_type": "and", + "match_method": "sub", + "format": "uncase plain" + } + } + ] + } + ] } ], "plugin_table": [ diff --git a/test/maat_test.cpp b/test/maat_test.cpp index 7f66b2c..bb3ecc9 100644 --- a/test/maat_test.cpp +++ b/test/maat_test.cpp @@ -59,7 +59,7 @@ void print_maat_ret(int ret) } return; } -const char* print_maat_rule(struct Maat_rule_t* result,int ret) +const char* print_maat_result(struct Maat_rule_t* result,int ret) { static char buff[1024]={0}; int i=0,j=0; @@ -101,7 +101,28 @@ int test_string_full_scan(Maat_feather_t feather,const char* table_name,scan_sta ret=Maat_full_scan_string(feather, table_id,CHARSET_GBK, scan_data, strlen(scan_data), result,found_pos, 4, mid, 0); - print_maat_ret(ret); + printf("Full String Scan:%s\n",print_maat_result(result,ret)); + return ret; +} +int test_unescape_string_scan(Maat_feather_t feather,const char* table_name,scan_status_t* mid) +{ + int ret=0; + int table_id=0; + struct Maat_rule_t result[4]; + int found_pos[4]; + const char* scan_data="Batman\\:Take me Home.Superman/:Fine,stay with me."; + table_id=Maat_table_register(feather,table_name); + if(table_id==-1) + { + printf("Database table %s register failed.\n",table_name); + return -1; + } + + ret=Maat_full_scan_string(feather, table_id,CHARSET_GBK, scan_data, strlen(scan_data), + result,found_pos, 4, + mid, 0); + printf("Unescape String Scan:%s\n",print_maat_result(result,ret)); + return ret; } int test_intval_scan(Maat_feather_t feather,const char* table_name,scan_status_t* mid) @@ -118,7 +139,7 @@ int test_intval_scan(Maat_feather_t feather,const char* table_name,scan_status_t else { ret=Maat_scan_intval(feather, table_id, scan_val, result,4,mid, 0); - print_maat_ret(ret); + printf("Intval Scan:%s\n",print_maat_result(result,ret)); } return ret; } @@ -149,7 +170,7 @@ int test_str_stream_scan(Maat_feather_t feather,const char* table_name,scan_stat ,&detail_ret,mid); Maat_stream_scan_string_end(&sp); free(hit_detail); - print_maat_ret(ret); + printf("Stream String Scan:%s\n",print_maat_result(result,ret)); return ret; } int test_ipv4_scan(Maat_feather_t feather,const char* table_name,scan_status_t* mid) @@ -174,10 +195,7 @@ int test_ipv4_scan(Maat_feather_t feather,const char* table_name,scan_status_t* else { ret=Maat_scan_proto_addr(feather,table_id,&ipv4_addr,6,result,4, mid,0); - if(ret>0) - { - printf("ipv4 scan hit compile rule id %d.\n",result[0].config_id); - } + printf("IPv4 addr Scan:%s\n",print_maat_result(result,ret)); } return ret; } @@ -204,11 +222,9 @@ int test_ipv6_scan(Maat_feather_t feather,const char* table_name,scan_status_t* else { ret=Maat_scan_proto_addr(feather,table_id,&ipv6_addr,6,result,4, mid,0); - if(ret==-2) - { - printf("ipv6 scan hit region.\n"); - } - else + printf("IPv6 addr Scan:%s\n",print_maat_result(result,ret)); + + if(ret!=-2) { printf("ipv6 scan result:%d ,shoulde be -2.\n",ret); @@ -249,7 +265,7 @@ int test_digest_scan(Maat_feather_t feather,const char* table_name,scan_status_t scan_offset+=read_size; if(ret>0) { - printf("digest scan hit %d.\n",result[0].config_id); + printf("Digest Scan:%s\n",print_maat_result(result,ret)); } } @@ -300,12 +316,12 @@ int test_url_encode(Maat_feather_t feather,const char* table_name,scan_status_t* ret=Maat_full_scan_string(feather, table_id,CHARSET_GBK, url_utf8, strlen(url_utf8), result,found_pos, 4, mid, 0); - printf("URL encode scan utf8 url %s\n",print_maat_rule(result,ret)); + printf("URL encode scan utf8 url: %s\n",print_maat_result(result,ret)); ret=Maat_full_scan_string(feather, table_id,CHARSET_GBK, url_gb2312, strlen(url_gb2312), result,found_pos, 4, mid, 0); - printf("URL encode scan gb2312 url %s\n",print_maat_rule(result,ret)); + printf("URL encode scan gb2312 url: %s\n",print_maat_result(result,ret)); return 0; } @@ -363,7 +379,7 @@ int test_unicode_esc(Maat_feather_t feather,const char* table_name,scan_status_t read_len=fread(buff,1,sizeof(buff),fp); if(ret>0) { - printf("UNI2ASCII file %s,%s\n",file_path,print_maat_rule(result,ret)); + printf("UNI2ASCII file %s,%s\n",file_path,print_maat_result(result,ret)); } } Maat_stream_scan_string_end(&sp); @@ -415,6 +431,31 @@ int test_expr_plus(Maat_feather_t feather,const char* table_name,scan_status_t* return ret; } +int test_table_conjunction(Maat_feather_t feather,const char* table_name,const char* conj_table_name,scan_status_t* mid) +{ + int ret=0; + int table_id=0,conj_table_id=0; + struct Maat_rule_t result[4]; + int found_pos[4]; + const char* scan_data="soq is using table conjunction function.http://www.3300av.com/novel/27122.txt"; + + table_id=Maat_table_register(feather,table_name); + if(table_id==-1) + { + printf("Database table %s register failed.\n",table_name); + return -1; + } + conj_table_id=Maat_table_register(feather,conj_table_name); + assert(conj_table_id==table_id); + ret=Maat_full_scan_string(feather, conj_table_id,CHARSET_GBK, scan_data, strlen(scan_data), + result,found_pos, 4, + mid, 0); + if(ret>=2) + { + printf("Table conjunction success %s\n",print_maat_result(result,ret)); + } + return 0; +} int main(int argc,char* argv[]) { Maat_feather_t feather=NULL; @@ -468,7 +509,16 @@ int main(int argc,char* argv[]) test_unicode_esc(feather,"KEYWORDS_TABLE",&mid); Maat_clean_status(&mid); - + + test_unescape_string_scan(feather,"KEYWORDS_TABLE",&mid); + Maat_clean_status(&mid); + + test_str_stream_scan(feather,"HTTP_URL", &mid); + Maat_clean_status(&mid); + + test_table_conjunction(feather, "HTTP_URL", "HTTP_HOST", &mid); + Maat_clean_status(&mid); + sleep(4); Maat_burn_feather(feather); diff --git a/test/right_result.txt b/test/right_result.txt index 2897e09..d42d57d 100644 --- a/test/right_result.txt +++ b/test/right_result.txt @@ -1,8 +1,35 @@ Load entry id 101 SUCCESS. hit 1 rules -hit current region,but not hit compile rule. ipv4 scan hit compile rule id 123. +hit current region,but not hit compile rule. ipv6 scan hit region. digest scan hit 127. digest scan hit 127. digest scan hit 127. +digest scan hit 127. +digest scan hit 127. +digest scan hit 127. +Hit expr_plus rule 128. +URL encode scan utf8 url hit 1 rules, hit ruleid=129 +URL encode scan gb2312 url hit 1 rules, hit ruleid=129 +test_unicode_esc processing ./testdata_uni2ascii/original_Uygur_webpage.html +UNI2ASCII file ./testdata_uni2ascii/original_Uygur_webpage.html,hit 2 rules, hit ruleid=130 131 +UNI2ASCII file ./testdata_uni2ascii/original_Uygur_webpage.html,hit 1 rules, hit ruleid=131 +UNI2ASCII file ./testdata_uni2ascii/original_Uygur_webpage.html,hit 1 rules, hit ruleid=131 +UNI2ASCII file ./testdata_uni2ascii/original_Uygur_webpage.html,hit 1 rules, hit ruleid=131 +UNI2ASCII file ./testdata_uni2ascii/original_Uygur_webpage.html,hit 1 rules, hit ruleid=131 +UNI2ASCII file ./testdata_uni2ascii/original_Uygur_webpage.html,hit 1 rules, hit ruleid=131 +UNI2ASCII file ./testdata_uni2ascii/original_Uygur_webpage.html,hit 1 rules, hit ruleid=131 +UNI2ASCII file ./testdata_uni2ascii/original_Uygur_webpage.html,hit 1 rules, hit ruleid=131 +UNI2ASCII file ./testdata_uni2ascii/original_Uygur_webpage.html,hit 1 rules, hit ruleid=131 +UNI2ASCII file ./testdata_uni2ascii/original_Uygur_webpage.html,hit 1 rules, hit ruleid=131 +test_unicode_esc processing ./testdata_uni2ascii/original_uy.txt +UNI2ASCII file ./testdata_uni2ascii/original_uy.txt,hit 2 rules, hit ruleid=130 131 +test_unicode_esc processing ./testdata_uni2ascii/qq_mail_https.txt +UNI2ASCII file ./testdata_uni2ascii/qq_mail_https.txt,hit 2 rules, hit ruleid=130 131 +UNI2ASCII file ./testdata_uni2ascii/qq_mail_https.txt,hit 1 rules, hit ruleid=130 +test_unicode_esc processing ./testdata_uni2ascii/sina_read_mail.txt +UNI2ASCII file ./testdata_uni2ascii/sina_read_mail.txt,hit 2 rules, hit ruleid=130 131 +test_unicode_esc processing ./testdata_uni2ascii/sohu_mail_unicode.txt +UNI2ASCII file ./testdata_uni2ascii/sohu_mail_unicode.txt,hit 2 rules, hit ruleid=130 131 +UNI2ASCII file ./testdata_uni2ascii/sohu_mail_unicode.txt,hit 2 rules, hit ruleid=130 131 diff --git a/test/table_info.conf b/test/table_info.conf index a4cb5b9..4e51399 100644 --- a/test/table_info.conf +++ b/test/table_info.conf @@ -8,7 +8,8 @@ #id name type src_charset dst_charset do_merge 0 COMPILE compile GBK GBK no 0 1 GROUP group GBK GBK no 0 -2 HTTP_URL expr UTF8 GBK/BIG5/UNICODE/UTF8/url_encode_gb2312/url_encode_utf8 yes 128 +2 HTTP_URL expr UTF8 GBK/BIG5/UNICODE/UTF8/url_encode_gb2312/url_encode_utf8 yes 128 quickoff +2 HTTP_HOST expr UTF8 GBK/BIG5/UNICODE/UTF8/url_encode_gb2312/url_encode_utf8 yes 128 quickoff 3 KEYWORDS_TABLE expr UTF8 GBK/BIG5/UNICODE/UTF8/unicode_ascii_esc/unicode_ascii_aligned/unicode_ncr_dec/unicode_ncr_hex yes 0 4 IP_CONFIG ip GBK GBK no 0 5 CONTENT_SIZE intval GBK GBK no 0