多命中情况下,按包含分组数由多到少返回,分组数相同是按编译配置ID由大到小的顺序返回。

This commit is contained in:
zhengchao
2019-05-12 16:59:22 +08:00
parent 683c367ac1
commit 3f35cee7aa
4 changed files with 79 additions and 50 deletions

View File

@@ -64,7 +64,8 @@ void Maat_clean_status(scan_status_t* mid)
alignment_int64_array_add(_mid->feather->outer_mid_cnt,_mid->thread_num,-1); alignment_int64_array_add(_mid->feather->outer_mid_cnt,_mid->thread_num,-1);
if(_mid->inner!=NULL) if(_mid->inner!=NULL)
{ {
free(_mid->inner->hitted_group_id); dynamic_array_destroy(_mid->inner->cur_hit_groups, NULL);
free(_mid->inner->all_hit_group_array);
free(_mid->inner); free(_mid->inner);
alignment_int64_array_add(_mid->feather->inner_mid_cnt,_mid->thread_num,-1); alignment_int64_array_add(_mid->feather->inner_mid_cnt,_mid->thread_num,-1);
} }
@@ -117,15 +118,17 @@ int insert_set_id(unsigned long long **set, size_t* size, size_t cnt, unsigned l
} }
} }
size_t pickup_hit_region_from_compile(struct bool_expr *compile_hit, size_t pickup_hit_region_from_compile(struct bool_expr *compile_hit,
const unsigned long long* hitted_id, size_t hit_cnt, int* region_pos, size_t size) struct dynamic_array_t* hitted_id, size_t hit_cnt, int* region_pos, size_t size)
{ {
size_t i=0, j=0; size_t i=0, j=0;
size_t k=0; size_t k=0;
unsigned long long group_id=0;
for(i=0;i<hit_cnt;i++) for(i=0;i<hit_cnt;i++)
{ {
group_id=(unsigned long long)dynamic_array_read(hitted_id, i);
for(j=0; j<compile_hit->item_num; j++) for(j=0; j<compile_hit->item_num; j++)
{ {
if(hitted_id[i]==compile_hit->items[j].item_id) if(group_id==compile_hit->items[j].item_id)
{ {
region_pos[k]=i; region_pos[k]=i;
k++; k++;
@@ -141,12 +144,19 @@ void fill_maat_rule(struct Maat_rule_t *rule, const struct _head_Maat_rule* rule
memcpy(rule->service_defined, srv_def, MIN(srv_def_len,MAX_SERVICE_DEFINE_LEN)); memcpy(rule->service_defined, srv_def, MIN(srv_def_len,MAX_SERVICE_DEFINE_LEN));
return; return;
} }
static int compare_compile_id(const void *a, const void *b) static int compare_compile_inner(const void *a, const void *b)
{ {
struct Maat_rule_t *ra=(struct Maat_rule_t *)a; const struct Maat_compile_inner *ra=*(const struct Maat_compile_inner **)a;
struct Maat_rule_t *rb=(struct Maat_rule_t *)b; const struct Maat_compile_inner *rb=*(const struct Maat_compile_inner **)b;
return (rb->config_id-ra->config_id); if(ra->group_cnt!=rb->group_cnt)
{
return (ra->group_cnt-rb->group_cnt);
}
else
{
return (rb->compile_id-ra->compile_id);
}
} }
int region_compile(_Maat_feather_t*feather,struct _INNER_scan_status_t *_mid,int is_last_region,void* region_hit,int region_type_size,int group_offset,int region_hit_num,struct Maat_rule_t* result,_compile_result_t *rs_result, int size,int thread_num) int region_compile(_Maat_feather_t*feather,struct _INNER_scan_status_t *_mid,int is_last_region,void* region_hit,int region_type_size,int group_offset,int region_hit_num,struct Maat_rule_t* result,_compile_result_t *rs_result, int size,int thread_num)
@@ -163,7 +173,7 @@ int region_compile(_Maat_feather_t*feather,struct _INNER_scan_status_t *_mid,int
struct Maat_compile_inner* compile_rule=NULL; struct Maat_compile_inner* compile_rule=NULL;
int region_pos[MAX_SCANNER_HIT_NUM]; int region_pos[MAX_SCANNER_HIT_NUM];
_mid->cur_hit_cnt=0; _mid->cur_hit_group_cnt=0;
for(i=0;i<region_hit_num;i++) for(i=0;i<region_hit_num;i++)
{ {
group_rule=*(struct Maat_group_inner**)((char*)region_hit+region_type_size*i+group_offset); group_rule=*(struct Maat_group_inner**)((char*)region_hit+region_type_size*i+group_offset);
@@ -176,15 +186,18 @@ int region_compile(_Maat_feather_t*feather,struct _INNER_scan_status_t *_mid,int
compile_arrary[shortcut_avilable_cnt]=(struct Maat_compile_inner*)(group_rule->compile_shortcut); compile_arrary[shortcut_avilable_cnt]=(struct Maat_compile_inner*)(group_rule->compile_shortcut);
shortcut_avilable_cnt++; shortcut_avilable_cnt++;
} }
for(j=0; j<group_rule->endpoint_cnt; j++) for(j=0; j<group_rule->top_group_cnt; j++)
{ {
_mid->cur_hit_id[_mid->cur_hit_cnt]=group_rule->endpoints[j]; if(_mid->cur_hit_group_cnt<MAX_SCANNER_HIT_NUM)
_mid->cur_hit_cnt++; {
ret=insert_set_id(&(_mid->hitted_group_id), dynamic_array_write(_mid->cur_hit_groups, _mid->cur_hit_group_cnt, (void*)group_rule->top_groups[j]);
&(_mid->hit_group_size), _mid->cur_hit_group_cnt++;
_mid->hit_group_cnt, }
group_rule->endpoints[j]); ret=insert_set_id(&(_mid->all_hit_group_array),
_mid->hit_group_cnt+=ret; &(_mid->all_hit_group_array_sz),
_mid->all_hit_group_cnt,
group_rule->top_groups[j]);
_mid->all_hit_group_cnt+=ret;
} }
} }
if((region_hit_num>0 &&shortcut_avilable_cnt==region_hit_num) || if((region_hit_num>0 &&shortcut_avilable_cnt==region_hit_num) ||
@@ -194,7 +207,7 @@ int region_compile(_Maat_feather_t*feather,struct _INNER_scan_status_t *_mid,int
scan_ret=shortcut_avilable_cnt; scan_ret=shortcut_avilable_cnt;
alignment_int64_array_add(feather->orphan_group_saving, thread_num, 1); alignment_int64_array_add(feather->orphan_group_saving, thread_num, 1);
} }
else if(0&&shortcut_avilable_cnt==0&&region_hit_num==1&&_mid->hit_group_cnt==1&&is_last_region==1) else if(0&&shortcut_avilable_cnt==0&&region_hit_num==1&&_mid->all_hit_group_cnt==1&&is_last_region==1)
{ {
//This shortcut is NO longger valid after bool macher support NOT-logic. //This shortcut is NO longger valid after bool macher support NOT-logic.
//short cut for last scan and combination rules //short cut for last scan and combination rules
@@ -205,9 +218,14 @@ int region_compile(_Maat_feather_t*feather,struct _INNER_scan_status_t *_mid,int
else else
{ {
scan_ret=bool_matcher_match(bm, thread_num, scan_ret=bool_matcher_match(bm, thread_num,
_mid->hitted_group_id, _mid->hit_group_cnt, _mid->all_hit_group_array, _mid->all_hit_group_cnt,
(void **)compile_arrary, MAX_SCANNER_HIT_NUM); (void **)compile_arrary, MAX_SCANNER_HIT_NUM);
} }
if(scan_ret>1)
{
qsort(compile_arrary, scan_ret, sizeof(struct Maat_compile_inner**),
compare_compile_inner);
}
for(i=0;i<scan_ret&&result_cnt<size;i++) for(i=0;i<scan_ret&&result_cnt<size;i++)
{ {
compile_rule=compile_arrary[i]; compile_rule=compile_arrary[i];
@@ -227,10 +245,10 @@ int region_compile(_Maat_feather_t*feather,struct _INNER_scan_status_t *_mid,int
else else
{ {
make_group_set(compile_rule, &(rs_result[result_cnt].group_set), &has_not_flag); make_group_set(compile_rule, &(rs_result[result_cnt].group_set), &has_not_flag);
r_in_c_cnt=pickup_hit_region_from_compile(&(rs_result[result_cnt].group_set), _mid->cur_hit_id, _mid->cur_hit_cnt, r_in_c_cnt=pickup_hit_region_from_compile(&(rs_result[result_cnt].group_set), _mid->cur_hit_groups, _mid->cur_hit_group_cnt,
region_pos, MAX_SCANNER_HIT_NUM); region_pos, MAX_SCANNER_HIT_NUM);
if(r_in_c_cnt>0 || //compile config hitted becasue of new reigon if(r_in_c_cnt>0 || //compile config hitted becasue of new reigon
_mid->cur_hit_cnt==0) //or ever hit a compile that refer a NOT-logic group _mid->cur_hit_group_cnt==0) //or ever hit a compile that refer a NOT-logic group
{ {
fill_maat_rule(&(result[result_cnt]), &(compile_rule->db_c_rule->m_rule_head), fill_maat_rule(&(result[result_cnt]), &(compile_rule->db_c_rule->m_rule_head),
compile_rule->db_c_rule->service_defined ,compile_rule->db_c_rule->m_rule_head.serv_def_len); compile_rule->db_c_rule->service_defined ,compile_rule->db_c_rule->m_rule_head.serv_def_len);
@@ -244,8 +262,6 @@ int region_compile(_Maat_feather_t*feather,struct _INNER_scan_status_t *_mid,int
} }
if(result_cnt>0) if(result_cnt>0)
{ {
qsort(result, result_cnt, sizeof(struct Maat_rule_t),
compare_compile_id);
alignment_int64_array_add(feather->hit_cnt, thread_num, 1); alignment_int64_array_add(feather->hit_cnt, thread_num, 1);
} }
if(region_hit_num==0&&result_cnt>0) if(region_hit_num==0&&result_cnt>0)
@@ -388,7 +404,7 @@ int fill_region_hit_detail(const char* scan_buff,const _INNER_scan_status_t* _mi
for(i=0;i<compile_cnt&&i<detail_num;i++) for(i=0;i<compile_cnt&&i<detail_num;i++)
{ {
hit_detail[i].config_id=compile_hit[i].compile_id; hit_detail[i].config_id=compile_hit[i].compile_id;
r_in_c_cnt=pickup_hit_region_from_compile(&(compile_hit[i].group_set), _mid->cur_hit_id, _mid->cur_hit_cnt, r_in_c_cnt=pickup_hit_region_from_compile(&(compile_hit[i].group_set), _mid->cur_hit_groups, _mid->cur_hit_group_cnt,
region_pos, MAX_SCANNER_HIT_NUM); region_pos, MAX_SCANNER_HIT_NUM);
assert(r_in_c_cnt>0);//previous hitted compile was elimited in region_compile assert(r_in_c_cnt>0);//previous hitted compile was elimited in region_compile
for(j=0,k=0;j<r_in_c_cnt&&k<MAAT_MAX_HIT_RULE_NUM;j++) for(j=0,k=0;j<r_in_c_cnt&&k<MAAT_MAX_HIT_RULE_NUM;j++)
@@ -431,10 +447,11 @@ struct _INNER_scan_status_t* _make_inner_status(void)
{ {
struct _INNER_scan_status_t* inner_mid=NULL; struct _INNER_scan_status_t* inner_mid=NULL;
inner_mid=ALLOC(struct _INNER_scan_status_t, 1); inner_mid=ALLOC(struct _INNER_scan_status_t, 1);
inner_mid->cur_hit_cnt=0; inner_mid->cur_hit_group_cnt=0;
inner_mid->hit_group_cnt=0; inner_mid->cur_hit_groups=dynamic_array_create(32, 32);
inner_mid->hit_group_size=4; inner_mid->all_hit_group_cnt=0;
inner_mid->hitted_group_id= ALLOC(unsigned long long, inner_mid->hit_group_size); inner_mid->all_hit_group_array_sz=4;
inner_mid->all_hit_group_array= ALLOC(unsigned long long, inner_mid->all_hit_group_array_sz);
return inner_mid; return inner_mid;
} }
struct _OUTER_scan_status_t* _make_outer_status(_Maat_feather_t *feather, int thread_num) struct _OUTER_scan_status_t* _make_outer_status(_Maat_feather_t *feather, int thread_num)

View File

@@ -865,8 +865,8 @@ void _destroy_group_rule(struct Maat_group_inner* group)
group->table_id=-1; group->table_id=-1;
free(group->group_name); free(group->group_name);
group->group_name=NULL; group->group_name=NULL;
free(group->endpoints); free(group->top_groups);
group->endpoints=NULL; group->top_groups=NULL;
pthread_mutex_destroy(&(group->mutex)); pthread_mutex_destroy(&(group->mutex));
free(group); free(group);
@@ -3374,8 +3374,8 @@ void walk_group_hash(const uchar * key, uint size, void * data, void * user)
long int i=0; long int i=0;
int* temp_group_ids=ALLOC(int, igraph_vector_size(&vids)); long long* temp_group_ids=ALLOC(long long, igraph_vector_size(&vids));
size_t path_endpoint_cnt=0; size_t top_group_cnt=0;
for(i=0; i<igraph_vector_size(&vids); i++) for(i=0; i<igraph_vector_size(&vids); i++)
{ {
tmp_vid=(int) VECTOR(vids)[i]; tmp_vid=(int) VECTOR(vids)[i];
@@ -3386,15 +3386,20 @@ void walk_group_hash(const uchar * key, uint size, void * data, void * user)
parent_group=(struct Maat_group_inner*)HASH_fetch_by_id(scanner->vertex_id2group, tmp_vid); parent_group=(struct Maat_group_inner*)HASH_fetch_by_id(scanner->vertex_id2group, tmp_vid);
if(parent_group->has_compile_neighbors)//including itself if(parent_group->has_compile_neighbors)//including itself
{ {
temp_group_ids[path_endpoint_cnt]=parent_group->group_id; temp_group_ids[top_group_cnt]=parent_group->group_id;
path_endpoint_cnt++; top_group_cnt++;
} }
} }
pthread_mutex_lock(&(group_rule->mutex)); pthread_mutex_lock(&(group_rule->mutex));
free(group_rule->endpoints); free(group_rule->top_groups);
group_rule->endpoint_cnt=path_endpoint_cnt; group_rule->top_group_cnt=top_group_cnt;
group_rule->endpoints=ALLOC(int, group_rule->endpoint_cnt); group_rule->top_groups=ALLOC(long long, group_rule->top_group_cnt);
memcpy(group_rule->endpoints, temp_group_ids, sizeof(int)*group_rule->endpoint_cnt); memcpy(group_rule->top_groups, temp_group_ids, sizeof(long long)*group_rule->top_group_cnt);
if(group_rule->top_group_cnt>scanner->max_presented_top_group_cnt)
{
scanner->max_presented_top_group_cnt=group_rule->top_group_cnt;
scanner->most_popular_sub_group=group_rule->group_id;
}
pthread_mutex_unlock(&(group_rule->mutex)); pthread_mutex_unlock(&(group_rule->mutex));
igraph_vector_destroy(&vids); igraph_vector_destroy(&vids);
free(temp_group_ids); free(temp_group_ids);
@@ -3431,7 +3436,11 @@ void do_scanner_update(struct Maat_scanner_t* scanner,MESA_lqueue_head garbage_q
garbage_bagging(GARBAGE_BOOL_MATCHER, tmp2, garbage_q); garbage_bagging(GARBAGE_BOOL_MATCHER, tmp2, garbage_q);
} }
MESA_handle_runtime_log(logger,RLOG_LV_INFO,maat_module , MESA_handle_runtime_log(logger,RLOG_LV_INFO,maat_module ,
"Version %d dedup string rule %lu",scanner->version,scanner->dedup_expr_num); "Version %d: dedup string rule %lu, sub group %d presents %d top groups",
scanner->version,
scanner->dedup_expr_num,
scanner->most_popular_sub_group,
scanner->max_presented_top_group_cnt);
scanner->dedup_expr_num=0; scanner->dedup_expr_num=0;
rulescan_batch_update(scanner->region, rulescan_batch_update(scanner->region,
scanner->region_update_q, scanner->region_update_q,

View File

@@ -143,8 +143,8 @@ struct Maat_group_inner
char* group_name; char* group_name;
int has_compile_neighbors; int has_compile_neighbors;
int vertex_id; int vertex_id;
int endpoint_cnt; int top_group_cnt;
int* endpoints; long long* top_groups;
dynamic_array_t *regions; dynamic_array_t *regions;
void* compile_shortcut; void* compile_shortcut;
pthread_mutex_t mutex; pthread_mutex_t mutex;
@@ -170,11 +170,11 @@ struct _compile_result_t
}; };
struct _INNER_scan_status_t struct _INNER_scan_status_t
{ {
size_t cur_hit_cnt; size_t cur_hit_group_cnt;
size_t hit_group_cnt; size_t all_hit_group_cnt;
size_t hit_group_size; size_t all_hit_group_array_sz;
unsigned long long cur_hit_id[MAX_SCANNER_HIT_NUM]; struct dynamic_array_t* cur_hit_groups;
unsigned long long *hitted_group_id; unsigned long long *all_hit_group_array;
char not_grp_compile_hitted_flag; char not_grp_compile_hitted_flag;
}; };
struct _OUTER_scan_status_t struct _OUTER_scan_status_t
@@ -292,6 +292,8 @@ struct Maat_scanner_t
igraph_t group_graph; igraph_t group_graph;
int grp_vertex_id_generator; int grp_vertex_id_generator;
int most_popular_sub_group;
long long max_presented_top_group_cnt;
unsigned int district_num; unsigned int district_num;
unsigned int cfg_num; unsigned int cfg_num;

View File

@@ -325,8 +325,8 @@ TEST(StringScan, PrefixAndSuffix)
ret=Maat_full_scan_string(g_feather, mail_addr_table,CHARSET_GBK, hit_twice, strlen(hit_twice), ret=Maat_full_scan_string(g_feather, mail_addr_table,CHARSET_GBK, hit_twice, strlen(hit_twice),
result,found_pos, 4, &mid, 0); result,found_pos, 4, &mid, 0);
EXPECT_EQ(ret, 2); EXPECT_EQ(ret, 2);
EXPECT_EQ(result[0].config_id, 152); EXPECT_EQ(result[0].config_id, 151);//compile has more groups is priority
EXPECT_EQ(result[1].config_id, 151); EXPECT_EQ(result[1].config_id, 152);
Maat_clean_status(&mid); Maat_clean_status(&mid);
ret=Maat_full_scan_string(g_feather, mail_addr_table,CHARSET_GBK, hit_suffix, strlen(hit_suffix), ret=Maat_full_scan_string(g_feather, mail_addr_table,CHARSET_GBK, hit_suffix, strlen(hit_suffix),
@@ -1650,7 +1650,7 @@ TEST_F(MaatCmdTest, RuleIDRecycle)
struct Maat_rule_t result; struct Maat_rule_t result;
scan_status_t mid=NULL; scan_status_t mid=NULL;
memset(&result, 0, sizeof(result));
int table_id=0; int table_id=0;
table_id=Maat_table_register(feather,table_name); table_id=Maat_table_register(feather,table_name);
ASSERT_GT(table_id, 0); ASSERT_GT(table_id, 0);
@@ -1719,6 +1719,7 @@ TEST_F(MaatCmdTest, ReturnRuleIDWithDescendingOrder)
ret=Maat_cmd_commit(feather); ret=Maat_cmd_commit(feather);
EXPECT_TRUE(ret>=0); EXPECT_TRUE(ret>=0);
usleep(WAIT_FOR_EFFECTIVE_US);//waiting for commands go into effect usleep(WAIT_FOR_EFFECTIVE_US);//waiting for commands go into effect
memset(&result, 0, sizeof(result));
ret=Maat_full_scan_string(feather, table_id,CHARSET_GBK, scan_data, strlen(scan_data), ret=Maat_full_scan_string(feather, table_id,CHARSET_GBK, scan_data, strlen(scan_data),
result, NULL, 8, result, NULL, 8,
&mid, 0); &mid, 0);