#include "bool_matcher.h" #include #include #include using namespace std; #include #include static const unsigned int MAX_ARRAY_SIZE=65536; struct thread_local_data_t { unsigned int mapped_ids[MAX_ARRAY_SIZE]; unsigned int used_cells[MAX_ARRAY_SIZE]; void * cached_results[MAX_ARRAY_SIZE]; unsigned char * multiexpr_bitmap; unsigned int * singlexpr_bitmap; }; struct bool_matcher { unsigned int max_thread_num; unsigned int bool_expr_num; unsigned int multi_expr_num; void ** bool_expr_ids; unsigned char * multi_expr_size; unsigned char * multi_expr_mask; unsigned int bool_item_id_num; unsigned long long min_item_id; unsigned long long max_item_id; unsigned long long * bool_item_ids; unsigned int * mapped_ptr; unsigned int * mapped_ids; unsigned int theta; unsigned int L[65537]; thread_local_data_t * thread_data; }; struct bool_matcher * bool_matcher_new(struct bool_expr * exprs, size_t expr_num, unsigned int max_thread_num, size_t * mem_size) { if(exprs==NULL || expr_num==0 || max_thread_num==0) return NULL; for(unsigned int i=0; iMAX_ITEMS_PER_BOOL_EXPR) { return NULL; } } int I=-1, J=(int)expr_num; while(I1) I++; if(I==J) break; J--; while(J>I && exprs[J].item_num==1) J--; if(J==I) break; swap(exprs[I], exprs[J]); } for(int k=0; k<(int)expr_num; k++) { if((k=I && exprs[k].item_num>1)) { printf("[%s:%d]: fatal error!\n", __FILE__, __LINE__); return NULL; } } unsigned int mem_bytes=0; struct bool_matcher * matcher=new struct bool_matcher; mem_bytes+=sizeof(bool_matcher); matcher->max_thread_num=max_thread_num; matcher->bool_expr_num=(unsigned int)expr_num; matcher->multi_expr_num=I; matcher->bool_expr_ids=new void *[expr_num]; mem_bytes+=(unsigned int)expr_num*sizeof(void *); matcher->multi_expr_size=new unsigned char[matcher->multi_expr_num+1]; mem_bytes+=(matcher->multi_expr_num+1)*sizeof(unsigned char); matcher->multi_expr_mask=new unsigned char[matcher->multi_expr_num+1]; mem_bytes+=(matcher->multi_expr_num+1)*sizeof(unsigned char); matcher->thread_data=new thread_local_data_t[max_thread_num]; mem_bytes+=max_thread_num*sizeof(thread_local_data_t); for(unsigned int i=0; ithread_data[i].multiexpr_bitmap=new unsigned char[matcher->multi_expr_num+1]; mem_bytes+=(matcher->multi_expr_num+1)*sizeof(unsigned char); unsigned int size=(unsigned int)(expr_num-matcher->multi_expr_num); size=(size>>5)+1; matcher->thread_data[i].singlexpr_bitmap=new unsigned int[size]; mem_bytes+=size*sizeof(unsigned int); } map< unsigned long long, vector > M; unsigned int count=0; for(unsigned int i=0; ibool_expr_ids[i]=exprs[i].user_tag; if(imulti_expr_num) { matcher->multi_expr_size[i]=(unsigned int)exprs[i].item_num; } count+=(unsigned int)exprs[i].item_num; unsigned char mask=0; for(unsigned int j=0; jmulti_expr_num) matcher->multi_expr_mask[i]=mask; } matcher->bool_item_id_num=(unsigned int)M.size(); matcher->bool_item_ids=new unsigned long long[M.size()]; matcher->mapped_ptr =new unsigned int[M.size()+1]; matcher->mapped_ids =new unsigned int[count]; mem_bytes+=((unsigned int)M.size()+1+count)*sizeof(unsigned int)+(unsigned int)M.size()*sizeof(unsigned long long); matcher->mapped_ptr[0]=0; map< unsigned long long, vector >::const_iterator it=M.begin(); for(unsigned int k=0; kbool_item_ids[k]=it->first; copy(it->second.begin(), it->second.end(), matcher->mapped_ids+matcher->mapped_ptr[k]); matcher->mapped_ptr[k+1]=matcher->mapped_ptr[k]+(unsigned int)it->second.size(); } matcher->min_item_id=matcher->bool_item_ids[0]; matcher->max_item_id=matcher->bool_item_ids[M.size()-1]; for(unsigned int k=0; kbool_item_ids[k]-=matcher->min_item_id; } const unsigned long long ONE=1; unsigned int theta=0; while((ONE<<(theta+16))<=matcher->bool_item_ids[M.size()-1]) theta++; matcher->theta=theta; matcher->L[0]=0; for(unsigned int i=1; i<65536; i++) { matcher->L[i]=(unsigned int)(lower_bound(matcher->bool_item_ids, matcher->bool_item_ids+M.size(), i*(ONE<bool_item_ids); } matcher->L[65536]=(unsigned int)M.size(); M.clear(); *mem_size=mem_bytes; return matcher; } int bool_matcher_match(struct bool_matcher * matcher, unsigned int thread_id, unsigned long long * item_ids, size_t item_num, void ** result, size_t size) { if(matcher==NULL) return -1; if(thread_id>=matcher->max_thread_num) return -1; unsigned int * mapped_ids=matcher->thread_data[thread_id].mapped_ids; unsigned int ids_num=0; for(unsigned int i=0; imin_item_id || item_ids[i]>matcher->max_item_id) continue; unsigned long long id=item_ids[i]-matcher->min_item_id; unsigned int k=(unsigned int)(id>>matcher->theta); int l=matcher->L[k], h=(int)matcher->L[k+1]-1; if(hbool_item_ids[m]) h=m-1; else l=m+1; } if(h<(int)matcher->L[k] || matcher->bool_item_ids[h]!=id) continue; for(unsigned int j=matcher->mapped_ptr[h]; jmapped_ptr[h+1]; j++) { if(ids_num==MAX_ARRAY_SIZE) return -1; mapped_ids[ids_num++]=matcher->mapped_ids[j]; } } unsigned int * used_cells=matcher->thread_data[thread_id].used_cells; unsigned int used_num=0; for(unsigned int i=0; i>3); } unsigned char * m_bitmap=matcher->thread_data[thread_id].multiexpr_bitmap; unsigned int * s_bitmap=matcher->thread_data[thread_id].singlexpr_bitmap; unsigned char * m_mask=matcher->multi_expr_mask; for(unsigned int i=0; imulti_expr_num) { m_bitmap[used_cells[i]]=m_mask[used_cells[i]]; } else { unsigned int j=used_cells[i]-matcher->multi_expr_num; s_bitmap[j>>5]&=~(1U<<(j&31)); } } for(unsigned int i=0; i>3); if(xmulti_expr_num) { unsigned int y=(mapped_ids[i]&7); if(m_mask[x]&(1U<multi_expr_num; s_bitmap[j>>5]|=(1U<<(j&31)); } } unsigned int r=0; void ** cached_results=matcher->thread_data[thread_id].cached_results; for(unsigned int i=0; imulti_expr_num) { if(m_bitmap[x]==(1U<multi_expr_size[x])-1) { if(rbool_expr_ids[x]; } } else { unsigned int j=used_cells[i]-matcher->multi_expr_num; if((s_bitmap[j>>5]&(1U<<(j&31)))!=0) { if(rbool_expr_ids[x]; } } } sort(cached_results, cached_results+r); int I=0; for(unsigned int J=0; Jbool_expr_ids; delete [] matcher->multi_expr_size; delete [] matcher->multi_expr_mask; delete [] matcher->bool_item_ids; delete [] matcher->mapped_ptr; delete [] matcher->mapped_ids; for(unsigned int i=0; imax_thread_num; i++) { delete [] matcher->thread_data[i].multiexpr_bitmap; delete [] matcher->thread_data[i].singlexpr_bitmap; } delete [] matcher->thread_data; delete matcher; return; }