diff --git a/src/entry/Maat_api.cpp b/src/entry/Maat_api.cpp index 6908002..466796d 100644 --- a/src/entry/Maat_api.cpp +++ b/src/entry/Maat_api.cpp @@ -64,7 +64,8 @@ void Maat_clean_status(scan_status_t* mid) alignment_int64_array_add(_mid->feather->outer_mid_cnt,_mid->thread_num,-1); if(_mid->inner!=NULL) { - free(_mid->inner->hitted_group_id); + dynamic_array_destroy(_mid->inner->cur_hit_groups, NULL); + free(_mid->inner->all_hit_group_array); free(_mid->inner); alignment_int64_array_add(_mid->feather->inner_mid_cnt,_mid->thread_num,-1); } @@ -117,15 +118,17 @@ int insert_set_id(unsigned long long **set, size_t* size, size_t cnt, unsigned l } } size_t pickup_hit_region_from_compile(struct bool_expr *compile_hit, - const unsigned long long* hitted_id, size_t hit_cnt, int* region_pos, size_t size) + struct dynamic_array_t* hitted_id, size_t hit_cnt, int* region_pos, size_t size) { size_t i=0, j=0; size_t k=0; + unsigned long long group_id=0; for(i=0;iitem_num; j++) - { - if(hitted_id[i]==compile_hit->items[j].item_id) + { + if(group_id==compile_hit->items[j].item_id) { region_pos[k]=i; k++; @@ -141,12 +144,19 @@ void fill_maat_rule(struct Maat_rule_t *rule, const struct _head_Maat_rule* rule memcpy(rule->service_defined, srv_def, MIN(srv_def_len,MAX_SERVICE_DEFINE_LEN)); return; } -static int compare_compile_id(const void *a, const void *b) +static int compare_compile_inner(const void *a, const void *b) { - struct Maat_rule_t *ra=(struct Maat_rule_t *)a; - struct Maat_rule_t *rb=(struct Maat_rule_t *)b; + const struct Maat_compile_inner *ra=*(const struct Maat_compile_inner **)a; + const struct Maat_compile_inner *rb=*(const struct Maat_compile_inner **)b; - return (rb->config_id-ra->config_id); + if(ra->group_cnt!=rb->group_cnt) + { + return (ra->group_cnt-rb->group_cnt); + } + else + { + return (rb->compile_id-ra->compile_id); + } } int region_compile(_Maat_feather_t*feather,struct _INNER_scan_status_t *_mid,int is_last_region,void* region_hit,int region_type_size,int group_offset,int region_hit_num,struct Maat_rule_t* result,_compile_result_t *rs_result, int size,int thread_num) @@ -163,7 +173,7 @@ int region_compile(_Maat_feather_t*feather,struct _INNER_scan_status_t *_mid,int struct Maat_compile_inner* compile_rule=NULL; int region_pos[MAX_SCANNER_HIT_NUM]; - _mid->cur_hit_cnt=0; + _mid->cur_hit_group_cnt=0; for(i=0;icompile_shortcut); shortcut_avilable_cnt++; } - for(j=0; jendpoint_cnt; j++) + for(j=0; jtop_group_cnt; j++) { - _mid->cur_hit_id[_mid->cur_hit_cnt]=group_rule->endpoints[j]; - _mid->cur_hit_cnt++; - ret=insert_set_id(&(_mid->hitted_group_id), - &(_mid->hit_group_size), - _mid->hit_group_cnt, - group_rule->endpoints[j]); - _mid->hit_group_cnt+=ret; + if(_mid->cur_hit_group_cntcur_hit_groups, _mid->cur_hit_group_cnt, (void*)group_rule->top_groups[j]); + _mid->cur_hit_group_cnt++; + } + ret=insert_set_id(&(_mid->all_hit_group_array), + &(_mid->all_hit_group_array_sz), + _mid->all_hit_group_cnt, + group_rule->top_groups[j]); + _mid->all_hit_group_cnt+=ret; } } if((region_hit_num>0 &&shortcut_avilable_cnt==region_hit_num) || @@ -194,7 +207,7 @@ int region_compile(_Maat_feather_t*feather,struct _INNER_scan_status_t *_mid,int scan_ret=shortcut_avilable_cnt; alignment_int64_array_add(feather->orphan_group_saving, thread_num, 1); } - else if(0&&shortcut_avilable_cnt==0&®ion_hit_num==1&&_mid->hit_group_cnt==1&&is_last_region==1) + else if(0&&shortcut_avilable_cnt==0&®ion_hit_num==1&&_mid->all_hit_group_cnt==1&&is_last_region==1) { //This shortcut is NO longger valid after bool macher support NOT-logic. //short cut for last scan and combination rules @@ -205,9 +218,14 @@ int region_compile(_Maat_feather_t*feather,struct _INNER_scan_status_t *_mid,int else { scan_ret=bool_matcher_match(bm, thread_num, - _mid->hitted_group_id, _mid->hit_group_cnt, + _mid->all_hit_group_array, _mid->all_hit_group_cnt, (void **)compile_arrary, MAX_SCANNER_HIT_NUM); } + if(scan_ret>1) + { + qsort(compile_arrary, scan_ret, sizeof(struct Maat_compile_inner**), + compare_compile_inner); + } for(i=0;icur_hit_id, _mid->cur_hit_cnt, + r_in_c_cnt=pickup_hit_region_from_compile(&(rs_result[result_cnt].group_set), _mid->cur_hit_groups, _mid->cur_hit_group_cnt, region_pos, MAX_SCANNER_HIT_NUM); if(r_in_c_cnt>0 || //compile config hitted becasue of new reigon - _mid->cur_hit_cnt==0) //or ever hit a compile that refer a NOT-logic group + _mid->cur_hit_group_cnt==0) //or ever hit a compile that refer a NOT-logic group { fill_maat_rule(&(result[result_cnt]), &(compile_rule->db_c_rule->m_rule_head), compile_rule->db_c_rule->service_defined ,compile_rule->db_c_rule->m_rule_head.serv_def_len); @@ -244,9 +262,7 @@ int region_compile(_Maat_feather_t*feather,struct _INNER_scan_status_t *_mid,int } if(result_cnt>0) { - qsort(result, result_cnt, sizeof(struct Maat_rule_t), - compare_compile_id); - alignment_int64_array_add(feather->hit_cnt,thread_num,1); + alignment_int64_array_add(feather->hit_cnt, thread_num, 1); } if(region_hit_num==0&&result_cnt>0) { @@ -388,7 +404,7 @@ int fill_region_hit_detail(const char* scan_buff,const _INNER_scan_status_t* _mi for(i=0;icur_hit_id, _mid->cur_hit_cnt, + r_in_c_cnt=pickup_hit_region_from_compile(&(compile_hit[i].group_set), _mid->cur_hit_groups, _mid->cur_hit_group_cnt, region_pos, MAX_SCANNER_HIT_NUM); assert(r_in_c_cnt>0);//previous hitted compile was elimited in region_compile for(j=0,k=0;jcur_hit_cnt=0; - inner_mid->hit_group_cnt=0; - inner_mid->hit_group_size=4; - inner_mid->hitted_group_id= ALLOC(unsigned long long, inner_mid->hit_group_size); + inner_mid->cur_hit_group_cnt=0; + inner_mid->cur_hit_groups=dynamic_array_create(32, 32); + inner_mid->all_hit_group_cnt=0; + inner_mid->all_hit_group_array_sz=4; + inner_mid->all_hit_group_array= ALLOC(unsigned long long, inner_mid->all_hit_group_array_sz); return inner_mid; } struct _OUTER_scan_status_t* _make_outer_status(_Maat_feather_t *feather, int thread_num) diff --git a/src/entry/Maat_rule.cpp b/src/entry/Maat_rule.cpp index ec5b3b0..83b7aaa 100644 --- a/src/entry/Maat_rule.cpp +++ b/src/entry/Maat_rule.cpp @@ -865,8 +865,8 @@ void _destroy_group_rule(struct Maat_group_inner* group) group->table_id=-1; free(group->group_name); group->group_name=NULL; - free(group->endpoints); - group->endpoints=NULL; + free(group->top_groups); + group->top_groups=NULL; pthread_mutex_destroy(&(group->mutex)); free(group); @@ -3374,8 +3374,8 @@ void walk_group_hash(const uchar * key, uint size, void * data, void * user) long int i=0; - int* temp_group_ids=ALLOC(int, igraph_vector_size(&vids)); - size_t path_endpoint_cnt=0; + long long* temp_group_ids=ALLOC(long long, igraph_vector_size(&vids)); + size_t top_group_cnt=0; for(i=0; ivertex_id2group, tmp_vid); if(parent_group->has_compile_neighbors)//including itself { - temp_group_ids[path_endpoint_cnt]=parent_group->group_id; - path_endpoint_cnt++; + temp_group_ids[top_group_cnt]=parent_group->group_id; + top_group_cnt++; } } pthread_mutex_lock(&(group_rule->mutex)); - free(group_rule->endpoints); - group_rule->endpoint_cnt=path_endpoint_cnt; - group_rule->endpoints=ALLOC(int, group_rule->endpoint_cnt); - memcpy(group_rule->endpoints, temp_group_ids, sizeof(int)*group_rule->endpoint_cnt); + free(group_rule->top_groups); + group_rule->top_group_cnt=top_group_cnt; + group_rule->top_groups=ALLOC(long long, group_rule->top_group_cnt); + memcpy(group_rule->top_groups, temp_group_ids, sizeof(long long)*group_rule->top_group_cnt); + if(group_rule->top_group_cnt>scanner->max_presented_top_group_cnt) + { + scanner->max_presented_top_group_cnt=group_rule->top_group_cnt; + scanner->most_popular_sub_group=group_rule->group_id; + } pthread_mutex_unlock(&(group_rule->mutex)); igraph_vector_destroy(&vids); free(temp_group_ids); @@ -3431,7 +3436,11 @@ void do_scanner_update(struct Maat_scanner_t* scanner,MESA_lqueue_head garbage_q garbage_bagging(GARBAGE_BOOL_MATCHER, tmp2, garbage_q); } MESA_handle_runtime_log(logger,RLOG_LV_INFO,maat_module , - "Version %d dedup string rule %lu",scanner->version,scanner->dedup_expr_num); + "Version %d: dedup string rule %lu, sub group %d presents %d top groups", + scanner->version, + scanner->dedup_expr_num, + scanner->most_popular_sub_group, + scanner->max_presented_top_group_cnt); scanner->dedup_expr_num=0; rulescan_batch_update(scanner->region, scanner->region_update_q, diff --git a/src/inc_internal/Maat_rule_internal.h b/src/inc_internal/Maat_rule_internal.h index 2527f95..b55082b 100644 --- a/src/inc_internal/Maat_rule_internal.h +++ b/src/inc_internal/Maat_rule_internal.h @@ -143,8 +143,8 @@ struct Maat_group_inner char* group_name; int has_compile_neighbors; int vertex_id; - int endpoint_cnt; - int* endpoints; + int top_group_cnt; + long long* top_groups; dynamic_array_t *regions; void* compile_shortcut; pthread_mutex_t mutex; @@ -170,11 +170,11 @@ struct _compile_result_t }; struct _INNER_scan_status_t { - size_t cur_hit_cnt; - size_t hit_group_cnt; - size_t hit_group_size; - unsigned long long cur_hit_id[MAX_SCANNER_HIT_NUM]; - unsigned long long *hitted_group_id; + size_t cur_hit_group_cnt; + size_t all_hit_group_cnt; + size_t all_hit_group_array_sz; + struct dynamic_array_t* cur_hit_groups; + unsigned long long *all_hit_group_array; char not_grp_compile_hitted_flag; }; struct _OUTER_scan_status_t @@ -292,6 +292,8 @@ struct Maat_scanner_t igraph_t group_graph; int grp_vertex_id_generator; + int most_popular_sub_group; + long long max_presented_top_group_cnt; unsigned int district_num; unsigned int cfg_num; diff --git a/test/test_maatframe.cpp b/test/test_maatframe.cpp index b3c5002..3d856f6 100644 --- a/test/test_maatframe.cpp +++ b/test/test_maatframe.cpp @@ -325,8 +325,8 @@ TEST(StringScan, PrefixAndSuffix) ret=Maat_full_scan_string(g_feather, mail_addr_table,CHARSET_GBK, hit_twice, strlen(hit_twice), result,found_pos, 4, &mid, 0); EXPECT_EQ(ret, 2); - EXPECT_EQ(result[0].config_id, 152); - EXPECT_EQ(result[1].config_id, 151); + EXPECT_EQ(result[0].config_id, 151);//compile has more groups is priority + EXPECT_EQ(result[1].config_id, 152); Maat_clean_status(&mid); ret=Maat_full_scan_string(g_feather, mail_addr_table,CHARSET_GBK, hit_suffix, strlen(hit_suffix), @@ -1650,7 +1650,7 @@ TEST_F(MaatCmdTest, RuleIDRecycle) struct Maat_rule_t result; scan_status_t mid=NULL; - + memset(&result, 0, sizeof(result)); int table_id=0; table_id=Maat_table_register(feather,table_name); ASSERT_GT(table_id, 0); @@ -1719,6 +1719,7 @@ TEST_F(MaatCmdTest, ReturnRuleIDWithDescendingOrder) ret=Maat_cmd_commit(feather); EXPECT_TRUE(ret>=0); usleep(WAIT_FOR_EFFECTIVE_US);//waiting for commands go into effect + memset(&result, 0, sizeof(result)); ret=Maat_full_scan_string(feather, table_id,CHARSET_GBK, scan_data, strlen(scan_data), result, NULL, 8, &mid, 0);