Feature faster bool matcher on reptead item
This commit is contained in:
@@ -7,7 +7,7 @@
|
||||
* All rights reserved
|
||||
*
|
||||
* Written by: LIU YANBING (liuyanbing@iie.ac.cn)
|
||||
* Last modification: 2018-12-31
|
||||
* Last modification: 2021-06-12
|
||||
*
|
||||
* This code is the exclusive and proprietary property of IIE-CAS and NELIST.
|
||||
* Usage for direct or indirect commercial advantage is not allowed without
|
||||
@@ -25,27 +25,35 @@ extern "C"
|
||||
#endif
|
||||
#define MAX_ITEMS_PER_BOOL_EXPR 8
|
||||
|
||||
/*not_flag=0表示布尔项item_id必须出现;not_flag=1表示布尔项item_id不能出现*/
|
||||
/* not_flag=0表示布尔项item_id必须出现;not_flag=1表示布尔项item_id不能出现 */
|
||||
struct bool_item
|
||||
{
|
||||
unsigned long long item_id;
|
||||
unsigned char not_flag;
|
||||
};
|
||||
|
||||
/*注意:不支持布尔项全“非”的情形*/
|
||||
/* At least one item's not_flag should be 0. */
|
||||
struct bool_expr
|
||||
{
|
||||
unsigned long long expr_id;
|
||||
void * user_tag;
|
||||
size_t item_num;
|
||||
struct bool_item items[MAX_ITEMS_PER_BOOL_EXPR];
|
||||
};
|
||||
|
||||
struct bool_expr_match
|
||||
{
|
||||
unsigned long long expr_id;
|
||||
void * user_tag;
|
||||
};
|
||||
|
||||
struct bool_matcher;
|
||||
|
||||
/*注意:本函数调用会交换bool_exprs中元素的位置*/
|
||||
struct bool_matcher * bool_matcher_new(struct bool_expr * exprs, size_t expr_num, unsigned int max_thread_num, size_t * mem_size);
|
||||
|
||||
int bool_matcher_match(struct bool_matcher * matcher, unsigned int thread_id, const unsigned long long * item_ids, size_t item_num, void ** result, size_t size);
|
||||
/* Returned results are sorted by expr_id in descending order. */
|
||||
/* 本函数将对数组item_ids进行排序,会改变item_ids中元素的顺序 */
|
||||
int bool_matcher_match(struct bool_matcher * matcher, unsigned int thread_id, unsigned long long * item_ids, size_t item_num, struct bool_expr_match * results, size_t n_result);
|
||||
|
||||
void bool_matcher_free(struct bool_matcher * matcher);
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
|
||||
set(MAAT_FRAME_MAJOR_VERSION 3)
|
||||
set(MAAT_FRAME_MINOR_VERSION 1)
|
||||
set(MAAT_FRAME_PATCH_VERSION 20)
|
||||
set(MAAT_FRAME_MINOR_VERSION 2)
|
||||
set(MAAT_FRAME_PATCH_VERSION 1)
|
||||
set(MAAT_FRAME_VERSION ${MAAT_FRAME_MAJOR_VERSION}.${MAAT_FRAME_MINOR_VERSION}.${MAAT_FRAME_PATCH_VERSION})
|
||||
|
||||
message(STATUS "Maat Frame, Version: ${MAAT_FRAME_VERSION}")
|
||||
|
||||
@@ -123,7 +123,7 @@ struct Maat_hierarchy
|
||||
int thread_num;
|
||||
struct Maat_garbage_bin* ref_garbage_bin;
|
||||
void* logger;
|
||||
void **expr_match_buff;
|
||||
struct bool_expr_match *expr_match_buff;
|
||||
};
|
||||
|
||||
int compare_literal_id(const void *pa, const void *pb)
|
||||
@@ -344,7 +344,7 @@ struct Maat_hierarchy* Maat_hierarchy_new(int thread_num, void* mesa_handle_logg
|
||||
hier->hash_dedup_clause_by_literals=NULL;
|
||||
hier->clause_id_generator=0;
|
||||
hier->ref_garbage_bin=bin;
|
||||
hier->expr_match_buff=ALLOC(void*, thread_num*MAX_SCANNER_HIT_NUM);
|
||||
hier->expr_match_buff=ALLOC(struct bool_expr_match, thread_num*MAX_SCANNER_HIT_NUM);
|
||||
|
||||
ret=igraph_empty(&hier->group_graph, 0, IGRAPH_DIRECTED);
|
||||
assert(ret==IGRAPH_SUCCESS);
|
||||
@@ -923,6 +923,7 @@ static struct bool_matcher* Maat_hierarchy_build_bool_matcher(struct Maat_hierar
|
||||
//some compile may have zero groups, e.g. default policy.
|
||||
if(j==(size_t)compile->declared_clause_num&&j>0)
|
||||
{
|
||||
bool_expr_array[expr_cnt].expr_id=compile->compile_id;
|
||||
bool_expr_array[expr_cnt].user_tag=compile;
|
||||
bool_expr_array[expr_cnt].item_num=j;
|
||||
expr_cnt++;
|
||||
@@ -1356,7 +1357,7 @@ int Maat_hierarchy_region_compile(struct Maat_hierarchy* hier, struct Maat_hiera
|
||||
{
|
||||
int bool_match_ret=0, i=0;
|
||||
struct Maat_hierarchy_compile* compile=NULL;
|
||||
void **expr_match=hier->expr_match_buff+mid->thread_num*MAX_SCANNER_HIT_NUM;
|
||||
struct bool_expr_match *expr_match=hier->expr_match_buff+mid->thread_num*MAX_SCANNER_HIT_NUM;
|
||||
|
||||
size_t r_in_c_cnt=0, this_scan_region_hits=mid->this_scan_region_hit_cnt;
|
||||
size_t ud_result_cnt=0;
|
||||
@@ -1371,8 +1372,9 @@ int Maat_hierarchy_region_compile(struct Maat_hierarchy* hier, struct Maat_hiera
|
||||
expr_match, MAX_SCANNER_HIT_NUM);
|
||||
for(i=0; i<bool_match_ret && ud_result_cnt<ud_array_sz; i++)
|
||||
{
|
||||
compile=(struct Maat_hierarchy_compile*)expr_match[i];
|
||||
compile=(struct Maat_hierarchy_compile*)expr_match[i].user_tag;
|
||||
assert(compile->magic==MAAT_HIER_COMPILE_MAGIC);
|
||||
assert((unsigned long long)compile->compile_id==expr_match[i].expr_id);
|
||||
if(compile->actual_clause_num==0)
|
||||
{
|
||||
continue;
|
||||
|
||||
@@ -57,7 +57,7 @@ extern "C"
|
||||
}
|
||||
#endif
|
||||
|
||||
int MAAT_FRAME_VERSION_3_1_22_20210601=1;
|
||||
int MAAT_FRAME_VERSION_3_2_1_20210613=1;
|
||||
|
||||
int is_valid_table_name(const char* str)
|
||||
{
|
||||
|
||||
@@ -5,274 +5,198 @@
|
||||
using namespace std;
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
static const unsigned int MAX_ARRAY_SIZE=65536;
|
||||
|
||||
struct thread_local_data_t
|
||||
struct bool_expr_item
|
||||
{
|
||||
unsigned int mapped_ids[MAX_ARRAY_SIZE];
|
||||
unsigned int used_cells[MAX_ARRAY_SIZE];
|
||||
void * cached_results[MAX_ARRAY_SIZE];
|
||||
unsigned char * multiexpr_bitmap;
|
||||
unsigned int * singlexpr_bitmap;
|
||||
size_t item_num;
|
||||
struct bool_item * items;
|
||||
};
|
||||
|
||||
struct bool_matcher
|
||||
{
|
||||
unsigned int max_thread_num;
|
||||
unsigned int bool_expr_num;
|
||||
unsigned int multi_expr_num;
|
||||
void ** bool_expr_ids;
|
||||
unsigned char * multi_expr_size;
|
||||
unsigned char * multi_expr_mask;
|
||||
unsigned int bool_item_id_num;
|
||||
unsigned long long min_item_id;
|
||||
unsigned long long max_item_id;
|
||||
unsigned long long * bool_item_ids;
|
||||
unsigned int * mapped_ptr;
|
||||
unsigned int * mapped_ids;
|
||||
unsigned int theta;
|
||||
unsigned int L[65537];
|
||||
thread_local_data_t * thread_data;
|
||||
unsigned int bool_expr_num;
|
||||
struct bool_expr_match * bool_expr_ids;
|
||||
struct bool_expr_item * bool_expr_items;
|
||||
unsigned int bool_item_num;
|
||||
unsigned long long * bool_items;
|
||||
unsigned int * mapped_ptr;
|
||||
unsigned int * mapped_ids;
|
||||
unsigned int bitmap_size;
|
||||
unsigned char * bitmap;
|
||||
};
|
||||
|
||||
bool operator<(const struct bool_item & lhs, const struct bool_item & rhs)
|
||||
{
|
||||
return lhs.item_id<rhs.item_id;
|
||||
}
|
||||
|
||||
struct bool_matcher * bool_matcher_new(struct bool_expr * exprs, size_t expr_num, unsigned int max_thread_num, size_t * mem_size)
|
||||
{
|
||||
if(exprs==NULL || expr_num==0 || max_thread_num==0) return NULL;
|
||||
|
||||
for(unsigned int i=0; i<expr_num; i++)
|
||||
{
|
||||
if(exprs[i].item_num==0 || exprs[i].item_num>MAX_ITEMS_PER_BOOL_EXPR)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int I=-1, J=(int)expr_num;
|
||||
while(I<J)
|
||||
{
|
||||
I++;
|
||||
while(I<J && exprs[I].item_num>1) I++;
|
||||
if(I==J) break;
|
||||
J--;
|
||||
while(J>I && exprs[J].item_num==1) J--;
|
||||
if(J==I) break;
|
||||
swap(exprs[I], exprs[J]);
|
||||
}
|
||||
|
||||
for(int k=0; k<(int)expr_num; k++)
|
||||
{
|
||||
if((k<I && exprs[k].item_num==1) || (k>=I && exprs[k].item_num>1))
|
||||
{
|
||||
printf("[%s:%d]: fatal error!\n", __FILE__, __LINE__);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int mem_bytes=0;
|
||||
|
||||
struct bool_matcher * matcher=new struct bool_matcher;
|
||||
mem_bytes+=sizeof(bool_matcher);
|
||||
|
||||
matcher->max_thread_num=max_thread_num;
|
||||
matcher->bool_expr_num=(unsigned int)expr_num;
|
||||
matcher->multi_expr_num=I;
|
||||
|
||||
matcher->bool_expr_ids=new void *[expr_num];
|
||||
mem_bytes+=(unsigned int)expr_num*sizeof(void *);
|
||||
|
||||
matcher->multi_expr_size=new unsigned char[matcher->multi_expr_num+1];
|
||||
mem_bytes+=(matcher->multi_expr_num+1)*sizeof(unsigned char);
|
||||
|
||||
matcher->multi_expr_mask=new unsigned char[matcher->multi_expr_num+1];
|
||||
mem_bytes+=(matcher->multi_expr_num+1)*sizeof(unsigned char);
|
||||
|
||||
matcher->thread_data=new thread_local_data_t[max_thread_num];
|
||||
mem_bytes+=max_thread_num*sizeof(thread_local_data_t);
|
||||
|
||||
for(unsigned int i=0; i<max_thread_num; i++)
|
||||
{
|
||||
matcher->thread_data[i].multiexpr_bitmap=new unsigned char[matcher->multi_expr_num+1];
|
||||
mem_bytes+=(matcher->multi_expr_num+1)*sizeof(unsigned char);
|
||||
|
||||
unsigned int size=(unsigned int)(expr_num-matcher->multi_expr_num);
|
||||
size=(size>>5)+1;
|
||||
matcher->thread_data[i].singlexpr_bitmap=new unsigned int[size];
|
||||
mem_bytes+=size*sizeof(unsigned int);
|
||||
}
|
||||
|
||||
map< unsigned long long, vector<unsigned int> > M;
|
||||
unsigned int count=0;
|
||||
matcher->bool_expr_ids =new struct bool_expr_match[expr_num];
|
||||
matcher->bool_expr_items=new struct bool_expr_item[expr_num];
|
||||
mem_bytes+=(unsigned int)expr_num*(sizeof(struct bool_expr_match)+sizeof(struct bool_expr_item));
|
||||
for(unsigned int i=0; i<expr_num; i++)
|
||||
{
|
||||
matcher->bool_expr_ids[i]=exprs[i].user_tag;
|
||||
if(i<matcher->multi_expr_num)
|
||||
{
|
||||
matcher->multi_expr_size[i]=(unsigned int)exprs[i].item_num;
|
||||
}
|
||||
count+=(unsigned int)exprs[i].item_num;
|
||||
unsigned char mask=0;
|
||||
for(unsigned int j=0; j<exprs[i].item_num; j++)
|
||||
{
|
||||
if(exprs[i].items[j].not_flag==1) mask|=(1U<<j);
|
||||
M[exprs[i].items[j].item_id].push_back((i<<3)|j);
|
||||
}
|
||||
if(i<matcher->multi_expr_num) matcher->multi_expr_mask[i]=mask;
|
||||
matcher->bool_expr_ids[i].expr_id =exprs[i].expr_id;
|
||||
matcher->bool_expr_ids[i].user_tag =exprs[i].user_tag;
|
||||
matcher->bool_expr_items[i].item_num=exprs[i].item_num;
|
||||
matcher->bool_expr_items[i].items=new struct bool_item[exprs[i].item_num];
|
||||
mem_bytes+=(unsigned int)exprs[i].item_num*sizeof(struct bool_item);
|
||||
copy(exprs[i].items, exprs[i].items+exprs[i].item_num, matcher->bool_expr_items[i].items);
|
||||
sort(matcher->bool_expr_items[i].items, matcher->bool_expr_items[i].items+exprs[i].item_num);
|
||||
}
|
||||
|
||||
matcher->bool_item_id_num=(unsigned int)M.size();
|
||||
matcher->bool_item_ids=new unsigned long long[M.size()];
|
||||
matcher->mapped_ptr =new unsigned int[M.size()+1];
|
||||
matcher->mapped_ids =new unsigned int[count];
|
||||
mem_bytes+=((unsigned int)M.size()+1+count)*sizeof(unsigned int)+(unsigned int)M.size()*sizeof(unsigned long long);
|
||||
map<unsigned long long, unsigned int> M1;
|
||||
for(unsigned int i=0; i<expr_num; i++)
|
||||
{
|
||||
for(unsigned int j=0; j<exprs[i].item_num; j++)
|
||||
{
|
||||
if(exprs[i].items[j].not_flag==0) M1[exprs[i].items[j].item_id]++;
|
||||
}
|
||||
}
|
||||
|
||||
map< unsigned long long, vector<unsigned int> > M2;
|
||||
for(unsigned int i=0; i<expr_num; i++)
|
||||
{
|
||||
unsigned int min_count=-1;
|
||||
unsigned long long item_id;
|
||||
for(unsigned int j=0; j<exprs[i].item_num; j++)
|
||||
{
|
||||
if(exprs[i].items[j].not_flag==0)
|
||||
{
|
||||
unsigned int c=M1[exprs[i].items[j].item_id];
|
||||
if(c<min_count)
|
||||
{
|
||||
min_count=c;
|
||||
item_id=exprs[i].items[j].item_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
M2[item_id].push_back(i);
|
||||
}
|
||||
|
||||
matcher->bool_item_num=(unsigned int)M2.size();
|
||||
matcher->bool_items =new unsigned long long[M2.size()];
|
||||
matcher->mapped_ptr =new unsigned int[M2.size()+1];
|
||||
matcher->mapped_ids =new unsigned int[matcher->bool_expr_num];
|
||||
mem_bytes+=((unsigned int)M2.size()+1+matcher->bool_expr_num)*sizeof(unsigned int)+(unsigned int)M2.size()*sizeof(unsigned long long);
|
||||
|
||||
matcher->mapped_ptr[0]=0;
|
||||
map< unsigned long long, vector<unsigned int> >::const_iterator it=M.begin();
|
||||
for(unsigned int k=0; k<M.size(); ++k, ++it)
|
||||
map< unsigned long long, vector<unsigned int> >::const_iterator it=M2.begin();
|
||||
for(unsigned int k=0; k<M2.size(); ++k, ++it)
|
||||
{
|
||||
matcher->bool_item_ids[k]=it->first;
|
||||
matcher->bool_items[k]=it->first;
|
||||
copy(it->second.begin(), it->second.end(), matcher->mapped_ids+matcher->mapped_ptr[k]);
|
||||
matcher->mapped_ptr[k+1]=matcher->mapped_ptr[k]+(unsigned int)it->second.size();
|
||||
}
|
||||
|
||||
matcher->min_item_id=matcher->bool_item_ids[0];
|
||||
matcher->max_item_id=matcher->bool_item_ids[M.size()-1];
|
||||
for(unsigned int k=0; k<M.size(); ++k)
|
||||
M1.clear();
|
||||
M2.clear();
|
||||
|
||||
matcher->bitmap_size=(1U<<27);
|
||||
matcher->bitmap=new unsigned char[(matcher->bitmap_size)>>3];
|
||||
mem_bytes+=(matcher->bitmap_size)>>3;
|
||||
memset(matcher->bitmap, 0, (matcher->bitmap_size)>>3);
|
||||
|
||||
for(unsigned int i=0; i<matcher->bool_item_num; i++)
|
||||
{
|
||||
matcher->bool_item_ids[k]-=matcher->min_item_id;
|
||||
unsigned int j=matcher->bool_items[i]&(matcher->bitmap_size-1);
|
||||
matcher->bitmap[j>>3]|=(1U<<(j&7));
|
||||
}
|
||||
|
||||
const unsigned long long ONE=1;
|
||||
unsigned int theta=0;
|
||||
while((ONE<<(theta+16))<=matcher->bool_item_ids[M.size()-1]) theta++;
|
||||
matcher->theta=theta;
|
||||
|
||||
matcher->L[0]=0;
|
||||
for(unsigned int i=1; i<65536; i++)
|
||||
{
|
||||
matcher->L[i]=(unsigned int)(lower_bound(matcher->bool_item_ids, matcher->bool_item_ids+M.size(), i*(ONE<<theta))-matcher->bool_item_ids);
|
||||
}
|
||||
matcher->L[65536]=(unsigned int)M.size();
|
||||
|
||||
M.clear();
|
||||
|
||||
*mem_size=mem_bytes;
|
||||
if(mem_size!=NULL) *mem_size=mem_bytes;
|
||||
return matcher;
|
||||
}
|
||||
|
||||
int bool_matcher_match(struct bool_matcher * matcher, unsigned int thread_id, const unsigned long long * item_ids, size_t item_num, void ** result, size_t size)
|
||||
int res_comp(const void * lhs, const void * rhs)
|
||||
{
|
||||
bool_expr_match * _lhs=(bool_expr_match *)lhs;
|
||||
bool_expr_match * _rhs=(bool_expr_match *)rhs;
|
||||
return (_lhs->expr_id<_rhs->expr_id) ? 1 : -1;
|
||||
}
|
||||
|
||||
int do_match(struct bool_expr_item * expr, unsigned long long * item_ids, size_t item_num)
|
||||
{
|
||||
unsigned int i=0;
|
||||
for(unsigned int j=0; j<expr->item_num; ++j)
|
||||
{
|
||||
if(expr->items[j].not_flag==0)
|
||||
{
|
||||
while(i<item_num && item_ids[i]<expr->items[j].item_id) ++i;
|
||||
if(i==item_num || item_ids[i]>expr->items[j].item_id) return 0;
|
||||
++i;
|
||||
}
|
||||
else
|
||||
{
|
||||
while(i<item_num && item_ids[i]<expr->items[j].item_id) ++i;
|
||||
if(i<item_num && item_ids[i]==expr->items[j].item_id) return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int bool_matcher_match(struct bool_matcher * matcher, unsigned int thread_id, unsigned long long * item_ids, size_t item_num, struct bool_expr_match * results, size_t n_result)
|
||||
{
|
||||
if(matcher==NULL) return -1;
|
||||
if(thread_id>=matcher->max_thread_num) return -1;
|
||||
if(item_num==0) return 0;
|
||||
|
||||
sort(item_ids, item_ids+item_num);
|
||||
size_t J=0;
|
||||
for(unsigned int i=1; i<item_num; i++)
|
||||
{
|
||||
if(item_ids[i]!=item_ids[J]) item_ids[++J]=item_ids[i];
|
||||
}
|
||||
item_num=J+1;
|
||||
|
||||
unsigned int r=0;
|
||||
|
||||
unsigned int * mapped_ids=matcher->thread_data[thread_id].mapped_ids;
|
||||
unsigned int ids_num=0;
|
||||
for(unsigned int i=0; i<item_num; i++)
|
||||
{
|
||||
if(item_ids[i]<matcher->min_item_id || item_ids[i]>matcher->max_item_id) continue;
|
||||
unsigned int t=item_ids[i]&(matcher->bitmap_size-1);
|
||||
if((matcher->bitmap[t>>3]&(1U<<(t&7)))==0) continue;
|
||||
|
||||
unsigned long long id=item_ids[i]-matcher->min_item_id;
|
||||
unsigned int k=(unsigned int)(id>>matcher->theta);
|
||||
|
||||
int l=matcher->L[k], h=(int)matcher->L[k+1]-1;
|
||||
if(h<l) continue;
|
||||
int l=0, h=(int)matcher->bool_item_num-1;
|
||||
while(l<=h)
|
||||
{
|
||||
int m=(l+h)/2;
|
||||
if(id<matcher->bool_item_ids[m]) h=m-1;
|
||||
else l=m+1;
|
||||
}
|
||||
if(h<(int)matcher->L[k] || matcher->bool_item_ids[h]!=id) continue;
|
||||
|
||||
for(unsigned int j=matcher->mapped_ptr[h]; j<matcher->mapped_ptr[h+1]; j++)
|
||||
{
|
||||
if(ids_num==MAX_ARRAY_SIZE) return -1;
|
||||
mapped_ids[ids_num++]=matcher->mapped_ids[j];
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int * used_cells=matcher->thread_data[thread_id].used_cells;
|
||||
unsigned int used_num=0;
|
||||
for(unsigned int i=0; i<ids_num; i++)
|
||||
{
|
||||
if(used_num==MAX_ARRAY_SIZE) return -1;
|
||||
used_cells[used_num++]=(mapped_ids[i]>>3);
|
||||
}
|
||||
|
||||
unsigned char * m_bitmap=matcher->thread_data[thread_id].multiexpr_bitmap;
|
||||
unsigned int * s_bitmap=matcher->thread_data[thread_id].singlexpr_bitmap;
|
||||
unsigned char * m_mask=matcher->multi_expr_mask;
|
||||
for(unsigned int i=0; i<used_num; i++)
|
||||
{
|
||||
if(used_cells[i]<matcher->multi_expr_num)
|
||||
{
|
||||
m_bitmap[used_cells[i]]=m_mask[used_cells[i]];
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned int j=used_cells[i]-matcher->multi_expr_num;
|
||||
s_bitmap[j>>5]&=~(1U<<(j&31));
|
||||
}
|
||||
}
|
||||
|
||||
for(unsigned int i=0; i<ids_num; i++)
|
||||
{
|
||||
unsigned int x=(mapped_ids[i]>>3);
|
||||
if(x<matcher->multi_expr_num)
|
||||
{
|
||||
unsigned int y=(mapped_ids[i]&7);
|
||||
if(m_mask[x]&(1U<<y))
|
||||
if(item_ids[i]==matcher->bool_items[m])
|
||||
{
|
||||
m_bitmap[x]&=~(1U<<y);
|
||||
for(unsigned int j=matcher->mapped_ptr[m]; j<matcher->mapped_ptr[m+1]; j++)
|
||||
{
|
||||
unsigned int idx=matcher->mapped_ids[j];
|
||||
int ret=do_match(matcher->bool_expr_items+idx, item_ids, item_num);
|
||||
if(ret==1)
|
||||
{
|
||||
if(r==n_result) goto END;
|
||||
results[r++]=matcher->bool_expr_ids[idx];
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
else if(item_ids[i]<matcher->bool_items[m])
|
||||
{
|
||||
h=m-1;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_bitmap[x]|=(1U<<y);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned int j=x-matcher->multi_expr_num;
|
||||
s_bitmap[j>>5]|=(1U<<(j&31));
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int r=0;
|
||||
void ** cached_results=matcher->thread_data[thread_id].cached_results;
|
||||
|
||||
for(unsigned int i=0; i<used_num; i++)
|
||||
{
|
||||
unsigned int x=used_cells[i];
|
||||
if(x<matcher->multi_expr_num)
|
||||
{
|
||||
if(m_bitmap[x]==(1U<<matcher->multi_expr_size[x])-1)
|
||||
{
|
||||
if(r<MAX_ARRAY_SIZE) cached_results[r++]=matcher->bool_expr_ids[x];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned int j=used_cells[i]-matcher->multi_expr_num;
|
||||
if((s_bitmap[j>>5]&(1U<<(j&31)))!=0)
|
||||
{
|
||||
if(r<MAX_ARRAY_SIZE) cached_results[r++]=matcher->bool_expr_ids[x];
|
||||
l=m+1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sort(cached_results, cached_results+r);
|
||||
|
||||
int I=0;
|
||||
for(unsigned int J=0; J<r; ++J)
|
||||
{
|
||||
if(I==0 || cached_results[J]!=result[I-1])
|
||||
{
|
||||
if(I==(int)size) return I;
|
||||
result[I++]=cached_results[J];
|
||||
}
|
||||
}
|
||||
|
||||
return I;
|
||||
END:
|
||||
qsort(results, r, sizeof(bool_expr_match), res_comp);
|
||||
return r;
|
||||
}
|
||||
|
||||
void bool_matcher_free(struct bool_matcher * matcher)
|
||||
@@ -280,17 +204,13 @@ void bool_matcher_free(struct bool_matcher * matcher)
|
||||
if(matcher==NULL) return;
|
||||
|
||||
delete [] matcher->bool_expr_ids;
|
||||
delete [] matcher->multi_expr_size;
|
||||
delete [] matcher->multi_expr_mask;
|
||||
delete [] matcher->bool_item_ids;
|
||||
for(unsigned int i=0; i<matcher->bool_expr_num; i++) delete [] matcher->bool_expr_items[i].items;
|
||||
delete [] matcher->bool_expr_items;
|
||||
|
||||
delete [] matcher->bool_items;
|
||||
delete [] matcher->mapped_ptr;
|
||||
delete [] matcher->mapped_ids;
|
||||
for(unsigned int i=0; i<matcher->max_thread_num; i++)
|
||||
{
|
||||
delete [] matcher->thread_data[i].multiexpr_bitmap;
|
||||
delete [] matcher->thread_data[i].singlexpr_bitmap;
|
||||
}
|
||||
delete [] matcher->thread_data;
|
||||
delete [] matcher->bitmap;
|
||||
delete matcher;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1,10 +1,141 @@
|
||||
#include "Maat_rule.h"
|
||||
#include "bool_matcher.h"
|
||||
#include "stream_fuzzy_hash.h"
|
||||
#include "Maat_command.h"
|
||||
#include <MESA/MESA_handle_logger.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include <stdlib.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <sys/queue.h>
|
||||
struct bool_expr_wrapper
|
||||
{
|
||||
struct bool_expr expr;
|
||||
TAILQ_ENTRY(bool_expr_wrapper) entries;
|
||||
};
|
||||
TAILQ_HEAD(bool_expr_q, bool_expr_wrapper);
|
||||
|
||||
TEST(BoolMatcher, Match)
|
||||
{
|
||||
struct bool_matcher * bm=NULL;
|
||||
struct bool_expr *expr_array=NULL;
|
||||
struct bool_expr_wrapper *p=NULL;
|
||||
bool_expr_q expr_queue;
|
||||
unsigned long long i=0;
|
||||
TAILQ_INIT(&expr_queue);
|
||||
const char* bool_expr_filename="./testdata/bool-matcher-test-exprs.txt";
|
||||
char line[512]={0};
|
||||
int ret=0, expr_num=0;
|
||||
FILE* fp=fopen(bool_expr_filename, "r");
|
||||
memset(line, 0, sizeof(line));
|
||||
while(NULL!=fgets(line,sizeof(line),fp))
|
||||
{
|
||||
if(line[0]=='#'||line[0]==' '||line[0]=='\t'||strlen(line)<4)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
p=(struct bool_expr_wrapper*)calloc(sizeof(struct bool_expr_wrapper), 1);
|
||||
ret=sscanf(line, "%lld %lld %lld %lld %lld %lld %lld %lld %lld",
|
||||
&p->expr.expr_id,
|
||||
&p->expr.items[0].item_id,
|
||||
&p->expr.items[1].item_id,
|
||||
&p->expr.items[2].item_id,
|
||||
&p->expr.items[3].item_id,
|
||||
&p->expr.items[4].item_id,
|
||||
&p->expr.items[5].item_id,
|
||||
&p->expr.items[6].item_id,
|
||||
&p->expr.items[7].item_id);
|
||||
if(ret<2)
|
||||
{
|
||||
free(p);
|
||||
continue;
|
||||
}
|
||||
p->expr.item_num=ret-1;
|
||||
p->expr.user_tag=NULL;
|
||||
TAILQ_INSERT_TAIL(&expr_queue, p, entries);
|
||||
expr_num++;
|
||||
memset(line, 0, sizeof(line));
|
||||
}
|
||||
fclose(fp);
|
||||
expr_array=(struct bool_expr*)malloc(sizeof(struct bool_expr)*expr_num);
|
||||
p=TAILQ_FIRST(&expr_queue);
|
||||
while(p != NULL)
|
||||
{
|
||||
TAILQ_REMOVE(&expr_queue, p, entries);
|
||||
memcpy(expr_array+i, &(p->expr), sizeof(p->expr));
|
||||
free(p);
|
||||
p = TAILQ_FIRST(&expr_queue);
|
||||
i++;
|
||||
}
|
||||
size_t mem_size=0;
|
||||
bm=bool_matcher_new(expr_array, expr_num, 4, &mem_size);
|
||||
unsigned long long test_count=2*1000*1000, match_count=0, unmatch_count=0;
|
||||
long int j=0;
|
||||
size_t k=0;
|
||||
unsigned long long input_item_ids[256], time_elapse_ms=0, scan_per_second=0;
|
||||
size_t input_item_num=0;
|
||||
struct bool_expr_match result_array[1024];
|
||||
srand(19);
|
||||
struct timespec start,end;
|
||||
clock_gettime(CLOCK_MONOTONIC, &start);
|
||||
|
||||
for(i=0; i<test_count;i++)
|
||||
{
|
||||
input_item_num=0;
|
||||
j=random()%expr_num;
|
||||
for(k=0; k<expr_array[j].item_num; k++)
|
||||
{
|
||||
input_item_ids[k]=expr_array[j].items[k].item_id;
|
||||
input_item_num++;
|
||||
}
|
||||
for(k=0; k<8; k++)
|
||||
{
|
||||
input_item_ids[input_item_num]=random();
|
||||
input_item_num++;
|
||||
}
|
||||
ret=bool_matcher_match(bm, 1, input_item_ids, input_item_num, result_array, 1024);
|
||||
if(ret>0)
|
||||
{
|
||||
match_count++;
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(match_count, test_count);
|
||||
input_item_ids[0]=123;
|
||||
input_item_ids[1]=124;
|
||||
input_item_ids[2]=125;
|
||||
input_item_ids[3]=7;
|
||||
input_item_ids[4]=3;
|
||||
input_item_ids[5]=128;
|
||||
input_item_ids[6]=129;
|
||||
input_item_ids[7]=130;
|
||||
input_item_ids[8]=131;
|
||||
input_item_ids[9]=132;
|
||||
input_item_ids[10]=133;
|
||||
input_item_ids[11]=777;
|
||||
input_item_ids[12]=999;
|
||||
input_item_ids[13]=788;
|
||||
input_item_ids[14]=222;
|
||||
input_item_ids[15]=333;
|
||||
input_item_num=8;
|
||||
for(i=0; i<test_count; i++)
|
||||
{
|
||||
ret=bool_matcher_match(bm, 1, input_item_ids, input_item_num, result_array, 1024);
|
||||
if(ret==0)
|
||||
{
|
||||
unmatch_count++;
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(unmatch_count, test_count);
|
||||
clock_gettime(CLOCK_MONOTONIC, &end);
|
||||
time_elapse_ms=(end.tv_sec-start.tv_sec)*1000+(end.tv_nsec-start.tv_nsec)/1000000;
|
||||
scan_per_second=test_count*2*1000/time_elapse_ms;
|
||||
//At least 1 million scan per second
|
||||
EXPECT_GT(scan_per_second, 1000000);
|
||||
printf("Bool matcher memsize %zu, speed %lld lookups/s\n", mem_size, scan_per_second);
|
||||
free(expr_array);
|
||||
expr_array=NULL;
|
||||
bool_matcher_free(bm);
|
||||
}
|
||||
|
||||
#define WAIT_FOR_EFFECTIVE_SECOND 4
|
||||
|
||||
void ipv4_addr_set_copy(struct ipaddr *ipv4_addr, struct stream_tuple4_v4* v4_addr,
|
||||
@@ -436,8 +567,8 @@ TEST_F(MaatCMDPerfTest, UpdateFQDNPlugin)
|
||||
fqdn_plugin_EX_free_cb(0, (void**)&(result[i]), 0, NULL);
|
||||
}
|
||||
|
||||
printf("ready to sleep\n");
|
||||
sleep(300);
|
||||
// printf("ready to sleep\n");
|
||||
// sleep(300);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
77893
test/testdata/bool-matcher-test-exprs.txt
vendored
Normal file
77893
test/testdata/bool-matcher-test-exprs.txt
vendored
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user