1、更新SFH和GIE;2、支持相似性字符串匹配;
This commit is contained in:
@@ -216,7 +216,10 @@ int Maat_stream_scan_digest(stream_para_t* stream_para
|
||||
,scan_status_t* mid);
|
||||
void Maat_stream_scan_digest_end(stream_para_t* stream_para);
|
||||
|
||||
|
||||
int Maat_similar_scan_string(Maat_feather_t feather,int table_id
|
||||
,const char* data,int data_len
|
||||
,struct Maat_rule_t*result,int rule_num
|
||||
,scan_status_t* mid,int thread_num);
|
||||
|
||||
void Maat_clean_status(scan_status_t* mid);
|
||||
|
||||
|
||||
@@ -1569,11 +1569,10 @@ int Maat_stream_scan_digest(stream_para_t * stream_para, const char * data, int
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
GIE_handle_t* GIE_handle=sp->feather->scanner->digest_handle[sp->table_id];
|
||||
GIE_handle_t* GIE_handle=sp->feather->scanner->gie_aux[sp->table_id].gie_handle;
|
||||
unsigned long long digest_len=0;
|
||||
char* digest_buff=NULL;
|
||||
struct _OUTER_scan_status_t* _mid=NULL;
|
||||
pthread_rwlock_t *GIE_rwlock=&(sp->feather->scanner->digest_rwlock[sp->table_id]);
|
||||
struct timespec start,end;
|
||||
if(sp->feather->perf_on==1)
|
||||
{
|
||||
@@ -1603,14 +1602,12 @@ int Maat_stream_scan_digest(stream_para_t * stream_para, const char * data, int
|
||||
pthread_mutex_lock(&(sp->fuzzy_mutex));
|
||||
fuzzy_digest(sp->fuzzy_hash_handle,digest_buff, digest_len);
|
||||
pthread_mutex_unlock(&(sp->fuzzy_mutex));
|
||||
if(0==pthread_rwlock_tryrdlock(GIE_rwlock))
|
||||
{
|
||||
|
||||
if(GIE_handle!=NULL)
|
||||
{
|
||||
hit_region_cnt=GIE_query(GIE_handle, sp->total_len, digest_buff, query_result, MAX_SCANNER_HIT_NUM);
|
||||
}
|
||||
pthread_rwlock_unlock(GIE_rwlock);
|
||||
hit_region_cnt=GIE_query(GIE_handle, digest_buff,(int)strlen(digest_buff), query_result, MAX_SCANNER_HIT_NUM);
|
||||
}
|
||||
|
||||
free(digest_buff);
|
||||
digest_buff=NULL;
|
||||
if(hit_region_cnt<0)//error occurs
|
||||
@@ -1714,6 +1711,89 @@ int Maat_set_scan_status(Maat_feather_t feather,scan_status_t* mid,enum MAAT_SCA
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
int Maat_similar_scan_string(Maat_feather_t feather,int table_id
|
||||
,const char* data,int data_len
|
||||
,struct Maat_rule_t*result,int rule_num
|
||||
,scan_status_t* mid,int thread_num)
|
||||
{
|
||||
int region_ret=0,compile_ret=0;
|
||||
struct _OUTER_scan_status_t* _mid=NULL;
|
||||
GIE_result_t region_result[MAX_SCANNER_HIT_NUM];
|
||||
_compile_result_t compile_result[rule_num];
|
||||
struct _Maat_feather_t* _feather=(_Maat_feather_t*)feather;
|
||||
struct _Maat_scanner_t* my_scanner=NULL;
|
||||
_Maat_table_info_t* p_table=NULL;
|
||||
struct timespec start,end;
|
||||
if(_feather->perf_on==1)
|
||||
{
|
||||
clock_gettime(CLOCK_MONOTONIC,&start);
|
||||
}
|
||||
p_table=acqurie_table(_feather,table_id,TABLE_TYPE_SIMILARITY);
|
||||
if(p_table==NULL)
|
||||
{
|
||||
_feather->scan_err_cnt++;
|
||||
return -1;
|
||||
}
|
||||
if(p_table->cfg_num==0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
my_scanner=_feather->scanner;
|
||||
if(my_scanner==NULL)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
GIE_handle_t* gie_handle=my_scanner->gie_aux[table_id].gie_handle;
|
||||
struct _region_stat_t * region_stat=NULL;
|
||||
region_stat=&(my_scanner->region_counter[p_table->table_id]);
|
||||
if(region_stat->cfg_num==0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
aligment_int64_array_add(_feather->thread_call_cnt, thread_num, 1);
|
||||
|
||||
INC_SCANNER_REF(my_scanner,thread_num);
|
||||
region_ret=GIE_query(gie_handle, data, data_len,region_result, MAX_SCANNER_HIT_NUM);
|
||||
if(region_ret<0)
|
||||
{
|
||||
DEC_SCANNER_REF(my_scanner, thread_num);
|
||||
_feather->scan_err_cnt++;
|
||||
return -1;
|
||||
}
|
||||
else if(region_ret>0)
|
||||
{
|
||||
aligment_int64_array_add(p_table->hit_cnt, thread_num,1);
|
||||
_mid=grab_mid(mid, _feather, thread_num, 1);
|
||||
compile_ret=region_compile(_feather,_mid->inner,
|
||||
_mid->is_last_region,
|
||||
region_result,sizeof(GIE_result_t),offsetof(GIE_result_t, tag),
|
||||
region_ret,
|
||||
result,compile_result,rule_num,
|
||||
thread_num);
|
||||
assert(_mid->is_last_region<2);
|
||||
if(_mid->is_last_region==1)
|
||||
{
|
||||
_mid->is_last_region=2;
|
||||
}
|
||||
}
|
||||
|
||||
DEC_SCANNER_REF(my_scanner,thread_num);
|
||||
if(_feather->perf_on==1)
|
||||
{
|
||||
clock_gettime(CLOCK_MONOTONIC,&end);
|
||||
maat_stat_table(p_table,0,&start, &end,thread_num);
|
||||
}
|
||||
else
|
||||
{
|
||||
maat_stat_table(p_table,0,NULL, NULL,thread_num);
|
||||
}
|
||||
if(compile_ret==0&®ion_ret>0)
|
||||
{
|
||||
return -2;
|
||||
}
|
||||
return compile_ret;
|
||||
|
||||
}
|
||||
void Maat_clean_status(scan_status_t* mid)
|
||||
{
|
||||
struct _OUTER_scan_status_t* _mid=NULL;
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
#include "rulescan.h"
|
||||
#include "UniversalBoolMatch.h"
|
||||
#include "mesa_fuzzy.h"
|
||||
#include "great_index_engine.h"
|
||||
#include "gram_index_engine.h"
|
||||
|
||||
int MAAT_FRAME_VERSION_2_0_20170701=1;
|
||||
const char *maat_module="MAAT Frame";
|
||||
@@ -556,6 +556,7 @@ int read_table_info(struct _Maat_table_info_t** p_table_info,int num,const char*
|
||||
map_register(string2int_map,"digest", TABLE_TYPE_DIGEST);
|
||||
map_register(string2int_map,"expr_plus", TABLE_TYPE_EXPR_PLUS);
|
||||
map_register(string2int_map,"group", TABLE_TYPE_GROUP);
|
||||
map_register(string2int_map,"similar", TABLE_TYPE_SIMILARITY);
|
||||
map_register(string2int_map,"quickoff",0);
|
||||
map_register(string2int_map,"quickon",1);
|
||||
for(i=0;i<MAX_CHARSET_NUM;i++)
|
||||
@@ -952,18 +953,18 @@ void op_expr_add_rule(struct op_expr_t* op_expr,scan_rule_t* p_rule)
|
||||
op_expr->rule_type=p_rule->rule_type;
|
||||
return;
|
||||
}
|
||||
GIE_digest_t* create_digest_rule(int id,short op,unsigned long long origin_len,const char* digest,
|
||||
GIE_digest_t* create_digest_rule(int id,short op,const char* digest,
|
||||
short cfds_lvl,struct _Maat_group_inner_t* tag)
|
||||
{
|
||||
GIE_digest_t* rule=(GIE_digest_t*)calloc(sizeof(GIE_digest_t),1);
|
||||
int digest_len=strlen(digest);
|
||||
rule->id=id;
|
||||
rule->operation=op;
|
||||
rule->origin_len=origin_len;
|
||||
rule->sfh_length=digest_len;
|
||||
if(digest!=NULL)
|
||||
{
|
||||
rule->fuzzy_hash=(char*)calloc(sizeof(char),digest_len+1);
|
||||
memcpy(rule->fuzzy_hash,digest,digest_len);
|
||||
rule->sfh=(char*)calloc(sizeof(char),digest_len+1);
|
||||
memcpy(rule->sfh,digest,digest_len);
|
||||
|
||||
}
|
||||
rule->cfds_lvl=cfds_lvl;
|
||||
@@ -972,10 +973,10 @@ GIE_digest_t* create_digest_rule(int id,short op,unsigned long long origin_len,c
|
||||
}
|
||||
void destroy_digest_rule(GIE_digest_t*rule)
|
||||
{
|
||||
if(rule->fuzzy_hash!=NULL)
|
||||
if(rule->sfh!=NULL)
|
||||
{
|
||||
free(rule->fuzzy_hash);
|
||||
rule->fuzzy_hash=NULL;
|
||||
free(rule->sfh);
|
||||
rule->sfh=NULL;
|
||||
}
|
||||
free(rule);
|
||||
rule=NULL;
|
||||
@@ -1059,8 +1060,9 @@ struct _Maat_scanner_t* create_maat_scanner(unsigned int version,_Maat_feather_t
|
||||
switch(pp_table[i]->table_type)
|
||||
{
|
||||
case TABLE_TYPE_DIGEST:
|
||||
scanner->digest_update_q[i]=MESA_lqueue_create(0,0);
|
||||
pthread_rwlock_init(&(scanner->digest_rwlock[i]),NULL);
|
||||
case TABLE_TYPE_SIMILARITY:
|
||||
scanner->gie_aux[i].table_type=pp_table[i]->table_type;
|
||||
scanner->gie_aux[i].update_q=MESA_lqueue_create(0,0);
|
||||
break;
|
||||
case TABLE_TYPE_EXPR:
|
||||
case TABLE_TYPE_EXPR_PLUS:
|
||||
@@ -1126,24 +1128,23 @@ void destroy_maat_scanner(struct _Maat_scanner_t*scanner)
|
||||
}
|
||||
for(i=0;i<MAX_TABLE_NUM;i++)
|
||||
{
|
||||
if(scanner->digest_handle[i]!=NULL)
|
||||
if(scanner->gie_aux[i].gie_handle!=NULL)
|
||||
{
|
||||
GIE_destory(scanner->digest_handle[i]);
|
||||
GIE_destory(scanner->gie_aux[i].gie_handle);
|
||||
}
|
||||
if(scanner->digest_update_q[i]==NULL)
|
||||
if(scanner->gie_aux[i].update_q==NULL)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
q_cnt=MESA_lqueue_get_count(scanner->digest_update_q[i]);
|
||||
q_cnt=MESA_lqueue_get_count(scanner->gie_aux[i].update_q);
|
||||
for(j=0;j<q_cnt;j++)
|
||||
{
|
||||
data_size=sizeof(GIE_digest_t*);
|
||||
q_ret=(MESA_queue_errno_t)MESA_lqueue_get_head(scanner->digest_update_q[i],&digest_rule,&data_size);
|
||||
q_ret=(MESA_queue_errno_t)MESA_lqueue_get_head(scanner->gie_aux[i].update_q,&digest_rule,&data_size);
|
||||
assert(data_size==sizeof(void*)&&q_ret==MESA_QUEUE_RET_OK);
|
||||
destroy_digest_rule(digest_rule);
|
||||
}
|
||||
MESA_lqueue_destroy(scanner->digest_update_q[i], lqueue_destroy_cb, NULL);
|
||||
pthread_rwlock_destroy(&(scanner->digest_rwlock[i]));
|
||||
MESA_lqueue_destroy(scanner->gie_aux[i].update_q, lqueue_destroy_cb, NULL);
|
||||
}
|
||||
free(scanner);
|
||||
return;
|
||||
@@ -1931,12 +1932,15 @@ int add_digest_rule(struct _Maat_table_info_t* table,struct db_digest_rule_t* db
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
digest_rule=create_digest_rule(expr_id, 0
|
||||
,db_digest_rule->orgin_len
|
||||
if(table->table_type==TABLE_TYPE_SIMILARITY)
|
||||
{
|
||||
db_digest_rule->digest_string=str_unescape(db_digest_rule->digest_string);
|
||||
}
|
||||
digest_rule=create_digest_rule(expr_id, GIE_INSERT_OPT
|
||||
,db_digest_rule->digest_string
|
||||
,db_digest_rule->confidence_degree
|
||||
,group_rule);
|
||||
MESA_lqueue_join_tail(scanner->digest_update_q[table->table_id], &digest_rule, sizeof(void*));
|
||||
MESA_lqueue_join_tail(scanner->gie_aux[table->table_id].update_q, &digest_rule, sizeof(void*));
|
||||
return 0;
|
||||
}
|
||||
int del_region_rule(struct _Maat_table_info_t* table,int region_id,int group_id,int rule_type,struct _Maat_scanner_t *maat_scanner,void* logger)
|
||||
@@ -1981,14 +1985,14 @@ int del_region_rule(struct _Maat_table_info_t* table,int region_id,int group_id,
|
||||
MESA_lqueue_join_tail(maat_scanner->region_update_q,&op_expr, sizeof(void*));
|
||||
}
|
||||
break;
|
||||
case TABLE_TYPE_SIMILARITY:
|
||||
case TABLE_TYPE_DIGEST:
|
||||
assert(expr_num==1);
|
||||
digest_rule=create_digest_rule(expr_id[0], 1 //del digest
|
||||
,0
|
||||
digest_rule=create_digest_rule(expr_id[0], GIE_DELETE_OPT //del digest
|
||||
,NULL
|
||||
,0
|
||||
,NULL);
|
||||
MESA_lqueue_join_tail(maat_scanner->digest_update_q[table->table_id],&digest_rule, sizeof(void*));
|
||||
MESA_lqueue_join_tail(maat_scanner->gie_aux[i].update_q,&digest_rule, sizeof(void*));
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
@@ -2643,14 +2647,30 @@ void update_digest_rule(struct _Maat_table_info_t* table,const char* table_line,
|
||||
struct db_digest_rule_t* digest_rule=(struct db_digest_rule_t*)calloc(sizeof(struct db_digest_rule_t),1);
|
||||
int ret=0;
|
||||
char digest_buff[MAX_TABLE_LINE_SIZE]={'\0'};
|
||||
if(table->table_type==TABLE_TYPE_DIGEST)
|
||||
{
|
||||
ret=sscanf(table_line,"%d\t%d\t%llu\t%s\t%hd\t%d",&(digest_rule->region_id)
|
||||
,&(digest_rule->group_id)
|
||||
,&(digest_rule->orgin_len)
|
||||
,digest_buff
|
||||
,&(digest_rule->confidence_degree)
|
||||
,&(digest_rule->is_valid));
|
||||
}
|
||||
else if(table->table_type==TABLE_TYPE_SIMILARITY)
|
||||
{
|
||||
digest_rule->orgin_len=0;
|
||||
ret=sscanf(table_line,"%d\t%d\t%s\t%hd\t%d",&(digest_rule->region_id)
|
||||
,&(digest_rule->group_id)
|
||||
,digest_buff
|
||||
,&(digest_rule->confidence_degree)
|
||||
,&(digest_rule->is_valid));
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
digest_rule->digest_string=digest_buff;
|
||||
if(ret!=6||digest_rule->confidence_degree>10||digest_rule->confidence_degree<0)
|
||||
if(!(ret==6||ret==5)||digest_rule->confidence_degree>100||digest_rule->confidence_degree<0)
|
||||
{
|
||||
MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module ,
|
||||
"update error,invalid format of digest table %s:%s"
|
||||
@@ -2821,8 +2841,8 @@ void do_scanner_update(struct _Maat_scanner_t* scanner,MESA_lqueue_head garbage_
|
||||
int i=0;
|
||||
long q_cnt;
|
||||
GIE_create_para_t para;
|
||||
para.index_interval=100;
|
||||
para.query_accuracy=0.1;
|
||||
para.gram_value=7;
|
||||
para.position_accuracy=10;
|
||||
tmp1=create_bool_matcher(scanner->compile_hash,
|
||||
scan_thread_num,
|
||||
logger);
|
||||
@@ -2843,26 +2863,34 @@ void do_scanner_update(struct _Maat_scanner_t* scanner,MESA_lqueue_head garbage_
|
||||
,scanner);
|
||||
for(i=0;i<MAX_TABLE_NUM;i++)
|
||||
{
|
||||
if(scanner->digest_update_q[i]==NULL)
|
||||
if(scanner->gie_aux[i].update_q==NULL)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
q_cnt=MESA_lqueue_get_count(scanner->digest_update_q[i]);
|
||||
q_cnt=MESA_lqueue_get_count(scanner->gie_aux[i].update_q);
|
||||
if(q_cnt==0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
pthread_rwlock_wrlock(&(scanner->digest_rwlock[i]));
|
||||
if(scanner->digest_handle[i]==NULL)
|
||||
if(scanner->gie_aux[i].gie_handle==NULL)
|
||||
{
|
||||
scanner->digest_handle[i]=GIE_create(¶);
|
||||
if(scanner->gie_aux[i].table_type==TABLE_TYPE_SIMILARITY)
|
||||
{
|
||||
para.ED_reexamine=1;
|
||||
para.format=GIE_INPUT_FORMAT_PLAIN;
|
||||
}
|
||||
digest_batch_update(scanner->digest_handle[i]
|
||||
,scanner->digest_update_q[i]
|
||||
else
|
||||
{
|
||||
para.ED_reexamine=0;
|
||||
para.format=GIE_INPUT_FORMAT_SFH;
|
||||
}
|
||||
scanner->gie_aux[i].gie_handle=GIE_create(¶);
|
||||
}
|
||||
digest_batch_update(scanner->gie_aux[i].gie_handle
|
||||
,scanner->gie_aux[i].update_q
|
||||
,logger
|
||||
,scanner
|
||||
,i);
|
||||
pthread_rwlock_unlock(&(scanner->digest_rwlock[i]));
|
||||
}
|
||||
if(scanner->tmp_district_map!=NULL)
|
||||
{
|
||||
@@ -3060,6 +3088,7 @@ void maat_update_cb(const char* table_name,const char* line,void *u_para)
|
||||
update_intval_rule(feather->p_table_info[table_id], line, scanner,feather->logger,feather->GROUP_MODE_ON);
|
||||
break;
|
||||
case TABLE_TYPE_DIGEST:
|
||||
case TABLE_TYPE_SIMILARITY:
|
||||
update_digest_rule(feather->p_table_info[table_id], line, scanner,feather->logger,feather->GROUP_MODE_ON);
|
||||
break;
|
||||
case TABLE_TYPE_COMPILE:
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
#include "hiredis.h"
|
||||
|
||||
#include "mesa_fuzzy.h"
|
||||
#include "great_index_engine.h"
|
||||
#include "gram_index_engine.h"
|
||||
#include "aligment_int64.h"
|
||||
#include <pthread.h>
|
||||
#include <iconv.h>
|
||||
@@ -124,7 +124,7 @@ struct db_digest_rule_t
|
||||
int region_id;
|
||||
int group_id;
|
||||
unsigned long long orgin_len;
|
||||
const char* digest_string;
|
||||
char* digest_string;
|
||||
short confidence_degree;
|
||||
int is_valid;
|
||||
};
|
||||
@@ -318,14 +318,19 @@ struct _stream_para_t
|
||||
pthread_mutex_t fuzzy_mutex;
|
||||
unsigned char query_point[8];
|
||||
};
|
||||
struct GIE_aux_t
|
||||
{
|
||||
enum MAAT_TABLE_TYPE table_type;
|
||||
GIE_handle_t* gie_handle;
|
||||
MESA_lqueue_head update_q;
|
||||
};
|
||||
struct _Maat_scanner_t
|
||||
{
|
||||
int version;
|
||||
time_t last_update_time;
|
||||
long long *ref_cnt; //optimized for cache_alignment 64
|
||||
rule_scanner_t region;
|
||||
pthread_rwlock_t digest_rwlock[MAX_TABLE_NUM];
|
||||
GIE_handle_t* digest_handle[MAX_TABLE_NUM];
|
||||
struct GIE_aux_t gie_aux[MAX_TABLE_NUM];
|
||||
MESA_htable_handle region_hash;
|
||||
MESA_htable_handle group_hash;
|
||||
MESA_htable_handle compile_hash;
|
||||
@@ -336,7 +341,6 @@ struct _Maat_scanner_t
|
||||
unsigned int exprid_generator;
|
||||
unsigned int dedup_expr_num;
|
||||
MESA_lqueue_head region_update_q;
|
||||
MESA_lqueue_head digest_update_q[MAX_TABLE_NUM];
|
||||
void * expr_compiler;
|
||||
scan_result_t *region_rslt_buff;
|
||||
MESA_lqueue_head tomb_ref;//reference of feather->garbage_q
|
||||
|
||||
@@ -19,7 +19,7 @@ LIBMAAT = libmaatframe.a
|
||||
LIBMAAT_SO = libmaatframe.so
|
||||
|
||||
OBJS=config_monitor.o Maat_rule.o Maat_api.o Maat_command.o Maat_stat.o UniversalBoolMatch.o dynamic_array.o\
|
||||
cJSON.o json2iris.o map_str2int.o interval_index.o great_index_engine.o mesa_fuzzy.o rbtree.o
|
||||
cJSON.o json2iris.o map_str2int.o interval_index.o gram_index_engine.o mesa_fuzzy.o rbtree.o
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -I. $(H_DIR) $<
|
||||
|
||||
|
||||
1359
src/entry/gram_index_engine.c
Normal file
1359
src/entry/gram_index_engine.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
#ifndef _GREAT_INDEX_ENGINE_
|
||||
#define _GREAT_INDEX_ENGINE_
|
||||
#ifndef _GRAM_INDEX_ENGINE_
|
||||
#define _GRAM_INDEX_ENGINE_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -7,6 +7,9 @@ extern "C" {
|
||||
|
||||
#define GIE_INSERT_OPT 0
|
||||
#define GIE_DELETE_OPT 1
|
||||
#define GIE_INPUT_FORMAT_SFH 1
|
||||
#define GIE_INPUT_FORMAT_PLAIN 0
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -17,10 +20,10 @@ typedef struct
|
||||
typedef struct
|
||||
{
|
||||
unsigned int id;
|
||||
unsigned int sfh_length;//size of fuzzy_hash
|
||||
short operation;//GIE_INSERT_OPT or GIE_DELETE_OPT.if operation is GIE_DELETE_OPT, only id is needed;
|
||||
short cfds_lvl;
|
||||
unsigned long long origin_len;
|
||||
char * fuzzy_hash;
|
||||
char * sfh;
|
||||
void * tag;
|
||||
}GIE_digest_t;
|
||||
|
||||
@@ -29,16 +32,18 @@ typedef struct
|
||||
{
|
||||
unsigned int id;
|
||||
short cfds_lvl;
|
||||
unsigned long long origin_len;
|
||||
void * tag;
|
||||
}GIE_result_t;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned long long index_interval;
|
||||
// int confidence_level_threshold;
|
||||
double query_accuracy;
|
||||
unsigned int gram_value;
|
||||
//unsigned int htable_num;
|
||||
unsigned int position_accuracy;
|
||||
short format; //if format==GIE_INPUT_FORMAT_SFH, means the input string is a GIE_INPUT_FORMAT_SFH string
|
||||
//else id format==PALIN, means the input string is common string
|
||||
short ED_reexamine;//if ED_reexamine==1, calculate edit distance to verify the final result
|
||||
}GIE_create_para_t;
|
||||
|
||||
|
||||
@@ -47,11 +52,11 @@ GIE_handle_t * GIE_create(const GIE_create_para_t * para);
|
||||
|
||||
int GIE_update(GIE_handle_t * handle, GIE_digest_t ** digests, int size);
|
||||
|
||||
|
||||
//return actual matched result count
|
||||
//return 0 when matched nothing;
|
||||
//return -1 when error occurs;
|
||||
int GIE_query(GIE_handle_t * handle, unsigned long long origin_len, const char * fuzzy_string, GIE_result_t * results, int result_size);
|
||||
|
||||
int GIE_query(GIE_handle_t * handle, const char * data, int data_len, GIE_result_t * results, int result_size);
|
||||
|
||||
void GIE_destory(GIE_handle_t * handle);
|
||||
|
||||
@@ -59,5 +64,4 @@ void GIE_destory(GIE_handle_t * handle);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,840 +0,0 @@
|
||||
#include<stdio.h>
|
||||
#include<stdlib.h>
|
||||
#include<string.h>
|
||||
#include<math.h>
|
||||
#include<assert.h>
|
||||
#include<MESA/MESA_htable.h>
|
||||
#include "great_index_engine.h"
|
||||
#include "queue.h"
|
||||
int GIE_VERSION_1_0_20151109=1;
|
||||
#define HTABLE_SIZE 1024*1024
|
||||
#define MAX 10000
|
||||
#define TOLERENCE_SIZE 0
|
||||
#define CONF_MAX 10
|
||||
#define BLOCKSIZE_MIN 3
|
||||
#define MAX_UINT64 (0xFFFFFFFFFFFFFFFF)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned long long user_precision;
|
||||
double user_query_accuracy;
|
||||
MESA_htable_handle id_table;
|
||||
MESA_htable_handle index_table;
|
||||
struct VL * valuelist;
|
||||
}GIE_handle_inner_t;
|
||||
|
||||
struct valuelist_node
|
||||
{
|
||||
unsigned long long value;
|
||||
struct VL * valuelist_name;
|
||||
TAILQ_ENTRY(valuelist_node) vlistentry;
|
||||
};
|
||||
|
||||
struct linklist_node
|
||||
{
|
||||
unsigned long long index_key;
|
||||
struct TQ * listname;
|
||||
struct id_table_data * basicinfo;
|
||||
TAILQ_ENTRY(linklist_node) listentry;
|
||||
};
|
||||
|
||||
struct index_table_data
|
||||
{
|
||||
struct TQ * listhead;
|
||||
int cnt;
|
||||
unsigned long long prev_value;
|
||||
unsigned long long next_value;
|
||||
};
|
||||
|
||||
struct id_table_data
|
||||
{
|
||||
unsigned int id;
|
||||
unsigned long long origin_len;
|
||||
unsigned long long blocksize;
|
||||
char * fh;
|
||||
short cfds_lvl;
|
||||
void * tag;
|
||||
struct linklist_node * backtrack;
|
||||
};
|
||||
|
||||
TAILQ_HEAD(TQ, linklist_node);
|
||||
TAILQ_HEAD(VL, valuelist_node);
|
||||
|
||||
void idtable_free(void * data);
|
||||
void indextable_free(void * data);
|
||||
int GIE_insert_indextable(GIE_handle_inner_t * handle, struct id_table_data * info, unsigned long long index_key);
|
||||
int GIE_delete_from_indextable_by_key(GIE_handle_inner_t * handle, struct linklist_node * backtrack);
|
||||
int GIE_delete(GIE_handle_inner_t * handle, GIE_digest_t ** digests, int size);
|
||||
int GIE_union(struct TQ ** union_list, int list_num, struct id_table_data ** result,\
|
||||
unsigned long long min, unsigned long long max, unsigned long long query_blocksize);
|
||||
|
||||
struct TQ * linklist_union(struct TQ * list_first, struct TQ * list_second, unsigned long long min, unsigned long long max,\
|
||||
unsigned long long query_blocksize);
|
||||
|
||||
|
||||
int minof3(int x, int y, int z);
|
||||
int GIE_edit_distance(char* w1, int l1, const char* w2, int l2);
|
||||
int GIE_edit_distance_with_position(char * fh, const char * fuzzy_string, unsigned long long orilen, int * fuzzy_actual_size,\
|
||||
unsigned long long * calculate_len);
|
||||
|
||||
GIE_handle_t * GIE_create(const GIE_create_para_t * para)
|
||||
{
|
||||
GIE_handle_inner_t * handle = (GIE_handle_inner_t *)malloc(sizeof(GIE_handle_inner_t));
|
||||
handle->user_precision = para->index_interval;
|
||||
handle->user_query_accuracy = para->query_accuracy;
|
||||
|
||||
struct VL * head = (struct VL *)malloc(sizeof(struct VL));
|
||||
TAILQ_INIT(head);
|
||||
handle->valuelist = head;
|
||||
|
||||
|
||||
MESA_htable_create_args_t idtable_args,indextable_args;
|
||||
memset(&idtable_args, 0, sizeof(idtable_args));
|
||||
memset(&indextable_args, 0, sizeof(indextable_args));
|
||||
|
||||
|
||||
idtable_args.thread_safe = 0;
|
||||
idtable_args.hash_slot_size = HTABLE_SIZE;
|
||||
idtable_args.max_elem_num = 4 * HTABLE_SIZE;
|
||||
idtable_args.expire_time = 0;
|
||||
idtable_args.eliminate_type = HASH_ELIMINATE_ALGO_FIFO;
|
||||
idtable_args.key_comp = NULL;
|
||||
idtable_args.key2index = NULL;
|
||||
idtable_args.data_free = idtable_free;
|
||||
idtable_args.data_expire_with_condition = NULL;
|
||||
idtable_args.recursive = 1;
|
||||
|
||||
indextable_args.thread_safe = 0;
|
||||
indextable_args.hash_slot_size = HTABLE_SIZE;
|
||||
indextable_args.max_elem_num = 4 * HTABLE_SIZE;
|
||||
indextable_args.expire_time = 0;
|
||||
indextable_args.eliminate_type = HASH_ELIMINATE_ALGO_FIFO;
|
||||
indextable_args.key_comp = NULL;
|
||||
indextable_args.key2index = NULL;
|
||||
indextable_args.data_free = indextable_free;
|
||||
indextable_args.data_expire_with_condition = NULL;
|
||||
indextable_args.recursive = 1;
|
||||
|
||||
handle->id_table = MESA_htable_create(&idtable_args, sizeof(idtable_args));
|
||||
handle->index_table = MESA_htable_create(&indextable_args, sizeof(indextable_args));
|
||||
|
||||
return (GIE_handle_t *)(handle);
|
||||
}
|
||||
|
||||
void idtable_free(void * data)
|
||||
{
|
||||
struct id_table_data * tmp = (struct id_table_data *)data;
|
||||
free(tmp->fh);
|
||||
free(tmp);
|
||||
// printf("free id_table_data!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
void indextable_free(void * data)
|
||||
{
|
||||
// printf("free index_table_data!\n");
|
||||
struct index_table_data * tmp = (struct index_table_data *)data;
|
||||
struct linklist_node * tmp_node = TAILQ_FIRST(tmp->listhead);
|
||||
while(tmp_node != NULL)
|
||||
{
|
||||
struct linklist_node * linklist_tmp = TAILQ_NEXT(tmp_node, listentry);
|
||||
free(tmp_node);
|
||||
// printf("free list_node_data!\n");
|
||||
tmp_node = linklist_tmp;
|
||||
}
|
||||
free(tmp->listhead);
|
||||
free(tmp);
|
||||
return;
|
||||
}
|
||||
|
||||
void GIE_destory(GIE_handle_t * handle)
|
||||
{
|
||||
GIE_handle_inner_t * _handle = (GIE_handle_inner_t *)(handle);
|
||||
MESA_htable_destroy(_handle->index_table, NULL);
|
||||
MESA_htable_destroy(_handle->id_table, NULL);
|
||||
|
||||
struct valuelist_node * tmp_node = TAILQ_FIRST(_handle->valuelist);
|
||||
while(tmp_node != NULL)
|
||||
{
|
||||
struct valuelist_node * valuelist_tmp = TAILQ_NEXT(tmp_node, vlistentry);
|
||||
free(tmp_node);
|
||||
tmp_node = valuelist_tmp;
|
||||
}
|
||||
free(_handle->valuelist);
|
||||
free(_handle);
|
||||
}
|
||||
|
||||
|
||||
unsigned long long calc_fh_blocksize(unsigned long long orilen)
|
||||
{
|
||||
double tmp = orilen/(64 * BLOCKSIZE_MIN);
|
||||
double index = floor(log(tmp)/log(2));
|
||||
double tmp_t = pow(2, index);
|
||||
unsigned long long blocksize = (unsigned long long)(tmp_t * BLOCKSIZE_MIN);
|
||||
return blocksize;
|
||||
}
|
||||
|
||||
void print_item_iterate(const uchar * key, uint size, void * data, void * user)
|
||||
{
|
||||
struct index_table_data * index_data = (struct index_table_data *)data;
|
||||
struct linklist_node * first_node = TAILQ_FIRST(index_data->listhead);
|
||||
printf("index_key = %llu\n", first_node->index_key);
|
||||
struct linklist_node * tmp_node = NULL;
|
||||
TAILQ_FOREACH(tmp_node, index_data->listhead, listentry)
|
||||
{
|
||||
printf("id = %u orilen = %llu ", tmp_node->basicinfo->id, tmp_node->basicinfo->origin_len);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int GIE_update(GIE_handle_t * handle, GIE_digest_t ** digests, int size)
|
||||
{
|
||||
GIE_handle_inner_t * _handle = (GIE_handle_inner_t *)(handle);
|
||||
struct id_table_data * info=NULL;
|
||||
int success_cnt=0;
|
||||
int i = 0;
|
||||
|
||||
unsigned int input_fh_len=0;
|
||||
|
||||
for(i = 0; i < size; i++)
|
||||
{
|
||||
switch(digests[i]->operation)
|
||||
{
|
||||
case GIE_INSERT_OPT:
|
||||
{
|
||||
unsigned long long first_index_key = (digests[i]->origin_len)/(_handle->user_precision)*(_handle->user_precision);
|
||||
info = (struct id_table_data *)malloc(sizeof(struct id_table_data));
|
||||
//printf("malloc id_table_data!\n");
|
||||
input_fh_len=strlen(digests[i]->fuzzy_hash);
|
||||
info->fh = (char *)calloc(sizeof(char),input_fh_len+1);
|
||||
memcpy(info->fh, digests[i]->fuzzy_hash, input_fh_len);
|
||||
|
||||
info->origin_len = digests[i]->origin_len;
|
||||
info->blocksize = calc_fh_blocksize(digests[i]->origin_len);
|
||||
info->tag = digests[i]->tag;
|
||||
info->id = digests[i]->id;
|
||||
info->cfds_lvl = digests[i]->cfds_lvl;
|
||||
|
||||
info->backtrack = NULL;
|
||||
if(MESA_htable_add(_handle->id_table, (const uchar *)(&(digests[i]->id)), sizeof(digests[i]->id), (const void *)info) < 0)
|
||||
{
|
||||
printf("add %d id_table failed!",digests[i]->id);
|
||||
free(info->fh);
|
||||
free(info);
|
||||
continue;
|
||||
}
|
||||
if(GIE_insert_indextable(_handle, info, first_index_key) < 0)
|
||||
{
|
||||
printf("insert %d first failed\n",info->id);
|
||||
assert(0);
|
||||
free(info->fh);
|
||||
free(info);
|
||||
continue;
|
||||
}
|
||||
//printf("(info->first_backtrack)->index_key = %llu\n", (info->first_backtrack)->index_key);
|
||||
success_cnt++;
|
||||
break;
|
||||
}
|
||||
case GIE_DELETE_OPT:
|
||||
{
|
||||
success_cnt += GIE_delete(_handle, digests, i);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
/*struct valuelist_node * tmp = NULL;
|
||||
TAILQ_FOREACH(tmp, _handle->valuelist, vlistentry)
|
||||
{
|
||||
struct index_table_data * tmp_t = (struct index_table_data *)(MESA_htable_search_cb(_handle->index_table, (const uchar *)(&(tmp->value)), sizeof(tmp->value), NULL, NULL, NULL));
|
||||
printf("prev_value = %llu ", tmp_t->prev_value);
|
||||
printf("next_value = %llu ", tmp_t->next_value);
|
||||
printf("value = %llu\n", tmp->value);
|
||||
}*/
|
||||
}
|
||||
return success_cnt;
|
||||
}
|
||||
|
||||
|
||||
int GIE_insert_indextable(GIE_handle_inner_t * handle, struct id_table_data * info, unsigned long long index_key)
|
||||
{
|
||||
struct linklist_node * node_data = (struct linklist_node *)malloc(sizeof(struct linklist_node));
|
||||
//printf("linklist_node malloc success\n");
|
||||
node_data->basicinfo = info;
|
||||
node_data->index_key = index_key;
|
||||
node_data->listname = NULL;
|
||||
|
||||
info->backtrack = node_data; //Backtracking pointer to index table, it is a pointer to a structure pointer
|
||||
//printf("1: (info->first_backtrack)->index_key = %llu\n", (info->first_backtrack)->index_key);
|
||||
|
||||
struct index_table_data * ret = (struct index_table_data *)(MESA_htable_search_cb(handle->index_table, (const uchar *)(&index_key), sizeof(index_key), NULL, NULL, NULL));
|
||||
if(ret != NULL)
|
||||
{
|
||||
//printf("ret != NULL\n");
|
||||
struct linklist_node * tmp = NULL;
|
||||
node_data->listname = ret->listhead;
|
||||
//If there are linked list exists in index table, sorted according to id
|
||||
TAILQ_FOREACH(tmp, ret->listhead, listentry)
|
||||
{
|
||||
if(tmp->basicinfo->id > node_data->basicinfo->id)
|
||||
{
|
||||
TAILQ_INSERT_BEFORE(tmp, node_data, listentry);
|
||||
ret->cnt++;
|
||||
return 0;
|
||||
}
|
||||
if(node_data->basicinfo->id == tmp->basicinfo->id)
|
||||
{
|
||||
printf("invalid insert!");
|
||||
return -1;
|
||||
}
|
||||
//TODO <20><><EFBFBD><EFBFBD>id<69><64><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>id<69><64><EFBFBD><EFBFBD>Ҫ<EFBFBD><D2AA><EFBFBD><EFBFBD>invalid insert
|
||||
}
|
||||
TAILQ_INSERT_TAIL(ret->listhead, node_data, listentry);
|
||||
ret->cnt ++;
|
||||
}
|
||||
else
|
||||
{
|
||||
struct index_table_data * index_data = (struct index_table_data *)malloc(sizeof(struct index_table_data));
|
||||
|
||||
struct valuelist_node * tmp_t = NULL;
|
||||
struct valuelist_node * value_data = (struct valuelist_node *)malloc(sizeof(struct valuelist_node));
|
||||
value_data->value = index_key;
|
||||
value_data->valuelist_name = handle->valuelist;
|
||||
|
||||
int insert_flag = 0;
|
||||
TAILQ_FOREACH(tmp_t, handle->valuelist, vlistentry)
|
||||
{
|
||||
if(tmp_t->value > value_data->value)
|
||||
{
|
||||
TAILQ_INSERT_BEFORE(tmp_t, value_data, vlistentry);
|
||||
insert_flag = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(!insert_flag)
|
||||
{
|
||||
TAILQ_INSERT_TAIL(handle->valuelist, value_data, vlistentry);
|
||||
}
|
||||
|
||||
struct valuelist_node * tmp_prev = TAILQ_PREV(value_data, VL, vlistentry);
|
||||
struct valuelist_node * tmp_next = TAILQ_NEXT(value_data, vlistentry);
|
||||
|
||||
if(tmp_prev != NULL && tmp_next != NULL)
|
||||
{
|
||||
struct index_table_data * index_tmp_prev = MESA_htable_search(handle->index_table, (const uchar *)(&(tmp_prev->value)),\
|
||||
sizeof(tmp_prev->value));
|
||||
|
||||
struct index_table_data * index_tmp_next = MESA_htable_search(handle->index_table, (const uchar *)(&(tmp_next->value)),\
|
||||
sizeof(tmp_next->value));
|
||||
index_tmp_prev->next_value = value_data->value;
|
||||
index_data->prev_value = tmp_prev->value;
|
||||
index_data->next_value = tmp_next->value;
|
||||
index_tmp_next->prev_value = value_data->value;
|
||||
}
|
||||
if(tmp_prev != NULL && tmp_next == NULL)
|
||||
{
|
||||
|
||||
struct index_table_data * index_tmp_prev = MESA_htable_search(handle->index_table, (const uchar *)(&(tmp_prev->value)),\
|
||||
sizeof(tmp_prev->value));
|
||||
|
||||
index_tmp_prev->next_value = value_data->value;
|
||||
index_data->prev_value = tmp_prev->value;
|
||||
index_data->next_value = MAX_UINT64;
|
||||
}
|
||||
if(tmp_prev == NULL && tmp_next != NULL)
|
||||
{
|
||||
|
||||
struct index_table_data * index_tmp_next = MESA_htable_search(handle->index_table, (const uchar *)(&(tmp_next->value)),\
|
||||
sizeof(tmp_next->value));
|
||||
|
||||
index_data->prev_value = MAX_UINT64;
|
||||
index_data->next_value = tmp_next->value;
|
||||
index_tmp_next->prev_value = value_data->value;
|
||||
}
|
||||
if(tmp_prev == NULL && tmp_next == NULL)
|
||||
{
|
||||
index_data->prev_value = MAX_UINT64;
|
||||
index_data->next_value = MAX_UINT64;
|
||||
}
|
||||
|
||||
|
||||
//If there are no entries<65><73> have to create a list head pointer,
|
||||
//and add the corresponding entry in the index table, the data link to the back
|
||||
|
||||
struct TQ * head = (struct TQ *)malloc(sizeof(struct TQ));
|
||||
index_data->listhead = head;
|
||||
index_data->cnt = 0;
|
||||
|
||||
TAILQ_INIT(head);
|
||||
TAILQ_INSERT_TAIL(head, node_data, listentry);
|
||||
index_data->cnt++;
|
||||
node_data->listname = index_data->listhead;
|
||||
|
||||
if(MESA_htable_add(handle->index_table, (const uchar *)(&index_key), sizeof(index_key), (const void *)index_data) < 0)
|
||||
{
|
||||
printf("add index_table failed!\n");
|
||||
assert(0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// struct index_table_data * tmp_v = (struct index_table_data *)(MESA_htable_search_cb(handle->index_table, (const uchar *)(&index_key), sizeof(index_key), NULL, NULL, NULL));
|
||||
// printf("index_data->prev_value = %llu ", index_data->prev_value);
|
||||
// printf("index_data->next_value = %llu ", index_data->next_value);
|
||||
// printf("index_key = %llu ", index_key);
|
||||
// printf("prev_value = %llu ", tmp_v->prev_value);
|
||||
// printf("next_value = %llu\n", tmp_v->next_value);
|
||||
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int GIE_delete_from_indextable_by_key(GIE_handle_inner_t * handle, struct linklist_node * backtrack)
|
||||
{
|
||||
struct linklist_node * backtrack_node = backtrack; //Find the index table in the first meet of the list node pointer by backtracking
|
||||
|
||||
//find the key
|
||||
unsigned long long tmp_key = backtrack_node->index_key;
|
||||
|
||||
//delete the node
|
||||
TAILQ_REMOVE(backtrack_node->listname, backtrack, listentry);
|
||||
|
||||
//if first node is NULL, linklist is NULL, delete the record in the hashtable
|
||||
if(TAILQ_EMPTY(backtrack_node->listname) == 1)
|
||||
{
|
||||
if(MESA_htable_del(handle->index_table, (const uchar *)(&tmp_key), sizeof(tmp_key), indextable_free) < 0)
|
||||
{
|
||||
printf("indextable backtrack delete error!\n");
|
||||
assert(0);
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
struct valuelist_node * tmp = NULL;
|
||||
TAILQ_FOREACH(tmp, handle->valuelist, vlistentry)
|
||||
{
|
||||
if(tmp->value == backtrack_node->index_key)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
struct valuelist_node * tmp_prev = TAILQ_PREV(tmp, VL, vlistentry);
|
||||
struct valuelist_node * tmp_next = TAILQ_NEXT(tmp, vlistentry);
|
||||
if(tmp_prev != NULL && tmp_next != NULL)
|
||||
{
|
||||
struct index_table_data * index_tmp_prev = MESA_htable_search_cb(handle->index_table, (const uchar *)(&(tmp_prev->value)), \
|
||||
sizeof(tmp_prev->value), NULL, NULL, NULL);
|
||||
struct index_table_data * index_tmp_next = MESA_htable_search_cb(handle->index_table, (const uchar *)(&(tmp_next->value)), \
|
||||
sizeof(tmp_next->value), NULL, NULL, NULL);
|
||||
index_tmp_prev->next_value = tmp_next->value;
|
||||
index_tmp_next->prev_value = tmp_prev->value;
|
||||
}
|
||||
if(tmp_prev != NULL && tmp_next == NULL)
|
||||
{
|
||||
struct index_table_data * index_tmp_prev = MESA_htable_search_cb(handle->index_table, (const uchar *)(&(tmp_prev->value)), \
|
||||
sizeof(tmp_prev->value), NULL, NULL, NULL);
|
||||
index_tmp_prev->next_value = MAX_UINT64;
|
||||
}
|
||||
if(tmp_prev == NULL && tmp_next != NULL)
|
||||
{
|
||||
struct index_table_data * index_tmp_next = MESA_htable_search_cb(handle->index_table, (const uchar *)(&(tmp_next->value)), \
|
||||
sizeof(tmp_next->value), NULL, NULL, NULL);
|
||||
index_tmp_next->prev_value = MAX_UINT64;
|
||||
}
|
||||
TAILQ_REMOVE(handle->valuelist, tmp, vlistentry);
|
||||
free(tmp);
|
||||
//printf("indextable backtrack delete success!\n");
|
||||
}
|
||||
}
|
||||
free(backtrack_node);
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
|
||||
int GIE_delete(GIE_handle_inner_t * handle, GIE_digest_t ** digests, int i)
|
||||
{
|
||||
int success_cnt=0;
|
||||
struct id_table_data * ret = (struct id_table_data *) MESA_htable_search(handle->id_table, \
|
||||
(const uchar *)(&(digests[i]->id)), sizeof(digests[i]->id));
|
||||
|
||||
//if the record doesn't exist, printf delID doesn't exist!
|
||||
//printf("ret->id = %u\n", ret->id);
|
||||
//printf("(ret->first_backtrack)->index_key = %llu\n", (ret->first_backtrack)->index_key);
|
||||
if(ret == NULL)
|
||||
{
|
||||
printf("del %d doesn't exist!\n",digests[i]->id);
|
||||
}
|
||||
else
|
||||
{
|
||||
GIE_delete_from_indextable_by_key(handle, ret->backtrack);
|
||||
success_cnt++;
|
||||
}
|
||||
if(MESA_htable_del(handle->id_table, (const uchar *)(&(digests[i]->id)), sizeof(digests[i]->id), idtable_free) < 0)
|
||||
{
|
||||
printf("delete id failed!");
|
||||
assert(0);
|
||||
}
|
||||
return success_cnt;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int GIE_union(struct TQ ** union_list, int list_num, struct id_table_data ** result,\
|
||||
unsigned long long min, unsigned long long max, unsigned long long query_blocksize)
|
||||
{
|
||||
struct TQ * tmp_list = (struct TQ *)malloc(sizeof(struct TQ));
|
||||
TAILQ_INIT(tmp_list);
|
||||
struct linklist_node * tmp_node = NULL;
|
||||
int size = 0;
|
||||
TAILQ_FOREACH(tmp_node, union_list[0], listentry)
|
||||
{
|
||||
if(tmp_node->basicinfo->origin_len >= min && tmp_node->basicinfo->origin_len <= max && tmp_node->basicinfo->blocksize == query_blocksize)
|
||||
{
|
||||
struct linklist_node * new_node = (struct linklist_node *)malloc(sizeof(struct linklist_node));
|
||||
new_node->index_key = tmp_node->index_key;
|
||||
new_node->basicinfo = tmp_node->basicinfo;
|
||||
new_node->listname = tmp_list;
|
||||
TAILQ_INSERT_TAIL(tmp_list, new_node, listentry);
|
||||
}
|
||||
}
|
||||
int i = 0;
|
||||
for(i = 1; i < list_num; i++)
|
||||
{
|
||||
tmp_list = linklist_union(tmp_list, union_list[i], min, max, query_blocksize);
|
||||
}
|
||||
|
||||
struct linklist_node * tmp_node_t = NULL;
|
||||
TAILQ_FOREACH(tmp_node_t, tmp_list, listentry)
|
||||
{
|
||||
result[size++] = tmp_node_t->basicinfo;
|
||||
}
|
||||
|
||||
struct linklist_node * first_node = TAILQ_FIRST(tmp_list);
|
||||
while(first_node != NULL)
|
||||
{
|
||||
struct linklist_node * linklist_tmp = TAILQ_NEXT(first_node, listentry);
|
||||
free(first_node);
|
||||
first_node = linklist_tmp;
|
||||
}
|
||||
free(tmp_list);
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
struct TQ * linklist_union(struct TQ * list_first, struct TQ * list_second, unsigned long long min, unsigned long long max,\
|
||||
unsigned long long query_blocksize)
|
||||
{
|
||||
struct TQ * link_result = (struct TQ *)malloc(sizeof(struct TQ));
|
||||
TAILQ_INIT(link_result);
|
||||
struct linklist_node * tmp_first = TAILQ_FIRST(list_first);
|
||||
struct linklist_node * tmp_second = TAILQ_FIRST(list_second);
|
||||
while(tmp_first != NULL && tmp_second != NULL)
|
||||
{
|
||||
//When combined final result in a relatively small deposit on id, id small pointer will move backward,
|
||||
// if both are equal, both pointers move backward until a move to the tail end of the list
|
||||
if(tmp_first->basicinfo->id < tmp_second->basicinfo->id)
|
||||
{
|
||||
if(tmp_first->basicinfo->origin_len >= min && tmp_first->basicinfo->origin_len <= max && tmp_first->basicinfo->blocksize == query_blocksize)
|
||||
{
|
||||
struct linklist_node * new_node = (struct linklist_node *)malloc(sizeof(struct linklist_node));
|
||||
new_node->index_key = tmp_first->index_key;
|
||||
new_node->basicinfo = tmp_first->basicinfo;
|
||||
new_node->listname = link_result;
|
||||
TAILQ_INSERT_TAIL(link_result, new_node, listentry);
|
||||
|
||||
}
|
||||
tmp_first = TAILQ_NEXT(tmp_first, listentry);
|
||||
}
|
||||
else if(tmp_first->basicinfo->id > tmp_second->basicinfo->id)
|
||||
{
|
||||
if(tmp_second->basicinfo->origin_len >= min && tmp_second->basicinfo->origin_len <= max && tmp_second->basicinfo->blocksize == query_blocksize)
|
||||
{
|
||||
struct linklist_node * new_node = (struct linklist_node *)malloc(sizeof(struct linklist_node));
|
||||
new_node->index_key = tmp_second->index_key;
|
||||
new_node->basicinfo = tmp_second->basicinfo;
|
||||
new_node->listname = link_result;
|
||||
TAILQ_INSERT_TAIL(link_result, new_node, listentry);
|
||||
}
|
||||
tmp_second = TAILQ_NEXT(tmp_second, listentry);
|
||||
}
|
||||
|
||||
/*else
|
||||
{
|
||||
if(tmp_first->basicinfo->origin_len >= min && tmp_first->basicinfo->origin_len <= max && tmp_first->basicinfo->blocksize == query_blocksize)
|
||||
{
|
||||
struct linklist_node * new_node = (struct linklist_node *)malloc(sizeof(struct linklist_node));
|
||||
new_node->index_key = tmp_first->index_key;
|
||||
new_node->basicinfo = tmp_first->basicinfo;
|
||||
new_node->listname = link_result;
|
||||
TAILQ_INSERT_TAIL(link_result, new_node, listentry);
|
||||
}
|
||||
tmp_first = TAILQ_NEXT(tmp_first, listentry);
|
||||
tmp_second = TAILQ_NEXT(tmp_second, listentry);
|
||||
}*/
|
||||
|
||||
}
|
||||
|
||||
//The list is not linked to the end nodes remaining deposit to results
|
||||
while(tmp_first != NULL)
|
||||
{
|
||||
if(tmp_first->basicinfo->origin_len >= min && tmp_first->basicinfo->origin_len <= max && tmp_first->basicinfo->blocksize == query_blocksize)
|
||||
{
|
||||
struct linklist_node * new_node = (struct linklist_node *)malloc(sizeof(struct linklist_node));
|
||||
new_node->index_key = tmp_first->index_key;
|
||||
new_node->basicinfo = tmp_first->basicinfo;
|
||||
new_node->listname = link_result;
|
||||
TAILQ_INSERT_TAIL(link_result, new_node, listentry);
|
||||
}
|
||||
tmp_first = TAILQ_NEXT(tmp_first, listentry);
|
||||
}
|
||||
while(tmp_second != NULL)
|
||||
{
|
||||
if(tmp_second->basicinfo->origin_len >= min && tmp_second->basicinfo->origin_len <= max && tmp_second->basicinfo->blocksize == query_blocksize)
|
||||
{
|
||||
struct linklist_node * new_node = (struct linklist_node *)malloc(sizeof(struct linklist_node));
|
||||
new_node->index_key = tmp_second->index_key;
|
||||
new_node->basicinfo = tmp_second->basicinfo;
|
||||
new_node->listname = link_result;
|
||||
TAILQ_INSERT_TAIL(link_result, new_node, listentry);
|
||||
}
|
||||
tmp_second = TAILQ_NEXT(tmp_second, listentry);
|
||||
}
|
||||
|
||||
|
||||
struct linklist_node * first_node = TAILQ_FIRST(list_first);
|
||||
while(first_node != NULL)
|
||||
{
|
||||
struct linklist_node * linklist_tmp = TAILQ_NEXT(first_node, listentry);
|
||||
free(first_node);
|
||||
first_node = linklist_tmp;
|
||||
}
|
||||
free(list_first);
|
||||
|
||||
|
||||
return link_result;
|
||||
}
|
||||
|
||||
|
||||
int minof3(int x, int y, int z)
|
||||
{
|
||||
x = (x<y)?x:y;
|
||||
return (x<z)?x:z;
|
||||
}
|
||||
|
||||
|
||||
int GIE_edit_distance(char* w1, int l1, const char* w2, int l2)
|
||||
{
|
||||
// dp[x][y] means the min edit distance from partial word1 (0..x-1) to partial word2 (0..y-1)
|
||||
// please note this takes O(mn) space; O(n) solution also available because only last iteration of result needs to be stored
|
||||
int i, j;
|
||||
int ** dp = (int **)malloc(sizeof(int *) * (l1 + 1));
|
||||
for(i = 0; i < l1 + 1; i++)
|
||||
{
|
||||
dp[i] = (int *)malloc(sizeof(int) * (l2 + 1));
|
||||
}
|
||||
|
||||
// init the dynamic programming matrix
|
||||
dp[0][0] = 0;
|
||||
for(i = 1; i<=l1; i++) dp[i][0] = i;
|
||||
for(j = 1; j<=l2; j++) dp[0][j] = j;
|
||||
|
||||
for(i = 1; i<=l1; i++)
|
||||
for(j = 1; j<=l2; j++)
|
||||
if(w1[i-1] != w2[j-1])
|
||||
//different char; so adding/replacing/deleting all takes one more step
|
||||
dp[i][j] = minof3(dp[i][j-1], dp[i-1][j-1], dp[i-1][j]) + 1;
|
||||
else
|
||||
//same char; so no need to replace it; adding/deleting one still takes one more step
|
||||
dp[i][j] = minof3(dp[i][j-1]+1, dp[i-1][j-1], dp[i-1][j]+1);
|
||||
int result = dp[l1][l2];
|
||||
for(i = 0; i < l1 + 1; i++)
|
||||
{
|
||||
free(dp[i]);
|
||||
}
|
||||
free(dp);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
int GIE_edit_distance_with_position(char * fh, const char * fuzzy_string, unsigned long long orilen, int * fuzzy_actual_size, unsigned long long * calculate_len)
|
||||
{
|
||||
*fuzzy_actual_size = 0;
|
||||
*calculate_len = 0;
|
||||
int edit_distance = 0;
|
||||
const char * tmpstr = fuzzy_string;
|
||||
const char * tmp_fuzzy = fuzzy_string;
|
||||
char * fh_tmp = fh;
|
||||
int tmp_fuzzy_len = 0;
|
||||
int fh_actual_len = 0;
|
||||
unsigned long long blocksize = 0;
|
||||
while(*fh_tmp != '\0')
|
||||
{
|
||||
if(*fh_tmp == '[')
|
||||
{
|
||||
break;
|
||||
}
|
||||
fh_actual_len ++;
|
||||
fh_tmp++;
|
||||
}
|
||||
//*fuzzy_all_actual_size = fh_actual_len;
|
||||
if(fh_actual_len != 0)
|
||||
{
|
||||
blocksize = (orilen - 1)/fh_actual_len;
|
||||
}
|
||||
else
|
||||
{
|
||||
blocksize = calc_fh_blocksize(orilen);
|
||||
}
|
||||
while(*tmpstr != '\0')
|
||||
{
|
||||
|
||||
int left = 0;
|
||||
int right = 0;
|
||||
if(*tmpstr == '[')
|
||||
{
|
||||
char numleft[100],numright[100];
|
||||
int i = 0 , j = 0;
|
||||
tmpstr ++;
|
||||
memset(numleft, '\0', sizeof(numleft));
|
||||
memset(numright, '\0', sizeof(numright));
|
||||
while(*tmpstr != '\0' && *tmpstr != ':')
|
||||
{
|
||||
numleft[i++] = *tmpstr;
|
||||
tmpstr ++;
|
||||
}
|
||||
//printf("i = %d\n", i);
|
||||
left = atoi(numleft);
|
||||
tmpstr++;
|
||||
while(*tmpstr != '\0' && *tmpstr !=']')
|
||||
{
|
||||
numright[j++] = *tmpstr;
|
||||
tmpstr ++;
|
||||
}
|
||||
//printf("j = %d\n", j);
|
||||
right = atoi(numright);
|
||||
*calculate_len += right - left;
|
||||
|
||||
//TODO: edit distance compare
|
||||
int index = left/blocksize - TOLERENCE_SIZE > 0 ? left/blocksize - TOLERENCE_SIZE: 0;
|
||||
int fh_size = right/blocksize + TOLERENCE_SIZE - index > fh_actual_len - index ? fh_actual_len - index: right/blocksize + TOLERENCE_SIZE - index;
|
||||
if(tmp_fuzzy_len != 0)
|
||||
{
|
||||
edit_distance += GIE_edit_distance(fh + index, fh_size, tmp_fuzzy, tmp_fuzzy_len);
|
||||
}
|
||||
*fuzzy_actual_size += tmp_fuzzy_len;
|
||||
|
||||
if(*tmpstr == ']')
|
||||
{
|
||||
tmp_fuzzy = tmpstr + 1;
|
||||
tmp_fuzzy_len = 0;
|
||||
}
|
||||
tmpstr ++;
|
||||
}
|
||||
else
|
||||
{
|
||||
tmp_fuzzy_len++;
|
||||
tmpstr ++;
|
||||
}
|
||||
}
|
||||
return edit_distance;
|
||||
}
|
||||
|
||||
|
||||
int GIE_query(GIE_handle_t * handle, unsigned long long origin_len, const char * fuzzy_string, GIE_result_t * results, int size)
|
||||
{
|
||||
GIE_handle_inner_t * _handle = (GIE_handle_inner_t *)handle;
|
||||
|
||||
//find min_index
|
||||
double min_tmp = (double)(origin_len * (1 - _handle->user_query_accuracy));
|
||||
unsigned long long min_tmp_t = (unsigned long long )(floor(min_tmp));
|
||||
unsigned long long min_index = min_tmp_t/(_handle->user_precision)*(_handle->user_precision);
|
||||
|
||||
//find max_index
|
||||
double max_tmp = (double)(origin_len * (1 + _handle->user_query_accuracy));
|
||||
unsigned long long max_tmp_t = (unsigned long long)(floor(max_tmp));
|
||||
unsigned long long max_index = max_tmp_t/(_handle->user_precision)*(_handle->user_precision);
|
||||
|
||||
unsigned long long tmp_size = (max_index - min_index)/(_handle->user_precision) + 1;
|
||||
struct TQ * union_list[tmp_size];
|
||||
|
||||
unsigned long long i = min_index;
|
||||
unsigned long long query_blocksize = calc_fh_blocksize(origin_len);
|
||||
int list_num = 0;
|
||||
int union_size = 0;
|
||||
int union_size_max = 0;
|
||||
int ret_size = 0;
|
||||
|
||||
//find
|
||||
while(i <= max_index)
|
||||
{
|
||||
struct index_table_data * list_tmp = (struct index_table_data *)MESA_htable_search_cb(_handle->index_table, (const uchar * )(&i), \
|
||||
sizeof(i), NULL, NULL, NULL);
|
||||
if(list_tmp != NULL)
|
||||
{
|
||||
union_list[list_num++] = list_tmp->listhead;
|
||||
i = list_tmp->next_value;
|
||||
union_size_max += list_tmp->cnt;
|
||||
}
|
||||
else
|
||||
{
|
||||
i = i + _handle->user_precision;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
struct id_table_data ** result_union = (struct id_table_data **)malloc(sizeof(struct id_table_data *)*union_size_max);
|
||||
|
||||
if(list_num != 0)
|
||||
{
|
||||
union_size = GIE_union(union_list, list_num, result_union, min_tmp_t, max_tmp_t, query_blocksize);
|
||||
//printf("union_size = %d\n", union_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("the fh doesn't exsit!\n");
|
||||
free(result_union);
|
||||
return 0;
|
||||
}
|
||||
|
||||
for(i = 0; i < union_size; i++)
|
||||
{
|
||||
int fuzzy_actual_len;
|
||||
unsigned long long calculate_len;
|
||||
if(result_union[i]->id == 8885)
|
||||
{
|
||||
printf("right\n");
|
||||
}
|
||||
int edit_distance = GIE_edit_distance_with_position(result_union[i]->fh, fuzzy_string, origin_len, &fuzzy_actual_len, &calculate_len);
|
||||
//printf("fuzzy_actual_len = %d\n", fuzzy_actual_len);
|
||||
short conf_tmp;
|
||||
if(fuzzy_actual_len != 0 && edit_distance < fuzzy_actual_len)
|
||||
{
|
||||
//conf_tmp = CONF_MAX - (fuzzy_all_actual_len - (fuzzy_actual_len - edit_distance))*CONF_MAX/fuzzy_all_actual_len;
|
||||
conf_tmp = (fuzzy_actual_len - edit_distance)*(calculate_len + 1)*CONF_MAX/(fuzzy_actual_len * origin_len);
|
||||
//conf_tmp = CONF_MAX - edit_distance*CONF_MAX/fuzzy_actual_len;
|
||||
}
|
||||
else
|
||||
{
|
||||
conf_tmp = 0;
|
||||
}
|
||||
if(conf_tmp >= result_union[i]->cfds_lvl)
|
||||
{
|
||||
results[ret_size].cfds_lvl = conf_tmp;
|
||||
results[ret_size].id = result_union[i]->id;
|
||||
results[ret_size].origin_len = result_union[i]->origin_len;
|
||||
results[ret_size++].tag = result_union[i]->tag;
|
||||
}
|
||||
if(ret_size == size)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
free(result_union);
|
||||
return ret_size;
|
||||
}
|
||||
@@ -99,6 +99,7 @@ int set_iris_descriptor(const char* json_file,cJSON *json,const char*compile_tn,
|
||||
map_register(iris_cfg->str2int_map, "expr_plus",TABLE_TYPE_EXPR_PLUS);
|
||||
map_register(iris_cfg->str2int_map, "intval",TABLE_TYPE_INTERVAL);
|
||||
map_register(iris_cfg->str2int_map, "digest",TABLE_TYPE_DIGEST);
|
||||
map_register(iris_cfg->str2int_map, "similar",TABLE_TYPE_SIMILARITY);
|
||||
|
||||
|
||||
map_register(iris_cfg->str2int_map, "ipv4",4);
|
||||
@@ -460,6 +461,35 @@ int write_digest_rule(cJSON *region_json,struct iris_description_t *p_iris,const
|
||||
|
||||
return direct_write_rule(region_json, p_iris->str2int_map,json_cmd, cmd_cnt,path,logger);
|
||||
|
||||
}
|
||||
int write_similar_rule(cJSON *region_json,struct iris_description_t *p_iris,const char* path,void * logger)
|
||||
{
|
||||
struct traslate_command_t json_cmd[MAX_COLUMN_NUM];
|
||||
int cmd_cnt=0;
|
||||
memset(json_cmd,0,sizeof(json_cmd));
|
||||
|
||||
json_cmd[cmd_cnt].json_string="region_id";
|
||||
json_cmd[cmd_cnt].json_type=cJSON_Number;
|
||||
cmd_cnt++;
|
||||
|
||||
json_cmd[cmd_cnt].json_string="group_id";
|
||||
json_cmd[cmd_cnt].json_type=cJSON_Number;
|
||||
cmd_cnt++;
|
||||
|
||||
json_cmd[cmd_cnt].json_string="target";
|
||||
json_cmd[cmd_cnt].json_type=cJSON_String;
|
||||
cmd_cnt++;
|
||||
|
||||
json_cmd[cmd_cnt].json_string="threshold";
|
||||
json_cmd[cmd_cnt].json_type=cJSON_Number;
|
||||
cmd_cnt++;
|
||||
|
||||
json_cmd[cmd_cnt].json_string="is_valid";
|
||||
json_cmd[cmd_cnt].json_type=cJSON_Number;
|
||||
cmd_cnt++;
|
||||
|
||||
return direct_write_rule(region_json, p_iris->str2int_map,json_cmd, cmd_cnt,path,logger);
|
||||
|
||||
}
|
||||
struct iris_table_t* query_table_info(iris_description_t* p_iris,const char* table_name)
|
||||
{
|
||||
@@ -605,6 +635,9 @@ int write_region_rule(cJSON* region_json,int compile_id,int group_id,iris_descri
|
||||
case TABLE_TYPE_DIGEST:
|
||||
ret=write_digest_rule(table_content, p_iris, table_info->table_path, logger);
|
||||
break;
|
||||
case TABLE_TYPE_SIMILARITY:
|
||||
write_similar_rule(table_content, p_iris,table_info->table_path, logger);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
|
||||
@@ -11,15 +11,24 @@
|
||||
//#define DEBUG_PRINT
|
||||
#define INIT_SIZE 128
|
||||
#define ENTROPY_THRESHOLD 0.5
|
||||
|
||||
const char * sfh_b64 =
|
||||
#define MULTIPLE 4
|
||||
int count = 0;
|
||||
const char * map_to64bytes =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||
|
||||
|
||||
struct entry
|
||||
{
|
||||
unsigned int * r_array;
|
||||
unsigned int r_index;
|
||||
unsigned int r_size;
|
||||
};
|
||||
|
||||
double get_rs_entropy(unsigned int * r_array, unsigned int r_index);
|
||||
int loop_cmp(const void * a, const void * b);
|
||||
int cmp(const void * a, const void * b);
|
||||
void sfh_rs_entropy(IVI_seg_t * seg, void * user_para);
|
||||
void sfh_tune_simulation(IVI_seg_t * seg, void * user_para);
|
||||
|
||||
void sfh_output_state_t(IVI_seg_t * seg, void * user_para);
|
||||
void write_uint_array(fuzzy_handle_inner_t * handle,unsigned int ** array, unsigned int *index,unsigned int *size,unsigned int value);
|
||||
/**
|
||||
* roll_state<74><65>ʼ<EFBFBD><CABC>
|
||||
@@ -88,8 +97,9 @@ fuzzy_handle_t * fuzzy_create_handle(unsigned long long origin_len)
|
||||
handle->effective_length = 0;
|
||||
handle->length_increase = 0;
|
||||
handle->sim_tuned_rs_cnt = 0;
|
||||
handle->blocksize=tmp_blksize;
|
||||
handle->do_tune=0;
|
||||
//handle->blocksize=tmp_blksize;
|
||||
handle->blocksize = 3;
|
||||
handle->do_tune=1;
|
||||
return (fuzzy_handle_t *)handle;
|
||||
}
|
||||
|
||||
@@ -126,23 +136,24 @@ unsigned int fuzzy_feed(fuzzy_handle_t * handle, const char * data, unsigned int
|
||||
_handle->length_increase += length;
|
||||
if(_handle->s_state_cnt>EXPECT_SIGNATURE_LEN&&_handle->do_tune==1)
|
||||
{
|
||||
|
||||
|
||||
//printf("s_state_cnt before:%d\n", _handle->s_state_cnt);
|
||||
//printf("blocksize before:%llu\n", _handle->blocksize);
|
||||
unsigned long long check_length = (_handle->effective_length/_handle->s_state_cnt)*EXPECT_SIGNATURE_LEN;
|
||||
|
||||
if(_handle->length_increase > check_length)
|
||||
{
|
||||
|
||||
|
||||
IVI_traverse(_handle->ivi, sfh_tune_simulation, (void *)_handle);
|
||||
//printf("sim_rs_cnt:%d\n", _handle->sim_tuned_rs_cnt);
|
||||
if(_handle->sim_tuned_rs_cnt>EXPECT_SIGNATURE_LEN)
|
||||
{
|
||||
_handle->blocksize*=2;
|
||||
_handle->blocksize*= MULTIPLE;
|
||||
IVI_traverse(_handle->ivi, sfh_tune_seg, (void *)_handle);
|
||||
}
|
||||
_handle->sim_tuned_rs_cnt = 0;
|
||||
_handle->length_increase = 0;
|
||||
}
|
||||
//printf("s_state_cnt after:%d\n", _handle->s_state_cnt);
|
||||
//printf("blocksize after:%llu\n", _handle->blocksize);
|
||||
}
|
||||
#if 0
|
||||
fuzzy_digest(handle,result, sizeof(result));
|
||||
@@ -157,7 +168,7 @@ void sfh_tune_simulation(IVI_seg_t * seg, void * user_para)
|
||||
sfh_seg_t * tmp = (sfh_seg_t *)(seg->data);
|
||||
int i = 0;
|
||||
fuzzy_handle_inner_t * _handle = (fuzzy_handle_inner_t *)user_para;
|
||||
unsigned long long blocksize = _handle->blocksize * 2;
|
||||
unsigned long long blocksize = _handle->blocksize * MULTIPLE;
|
||||
for(i = 0; i < tmp->r_cnt; i++)
|
||||
{
|
||||
if(tmp->r_array[i] % blocksize == blocksize -1)
|
||||
@@ -331,7 +342,7 @@ unsigned int segment_overlap(fuzzy_handle_inner_t * _handle, unsigned int size,
|
||||
return effective_length;
|
||||
}
|
||||
|
||||
int loop_cmp(const void * a, const void * b)
|
||||
int cmp(const void * a, const void * b)
|
||||
{
|
||||
unsigned int tmp_a = *(unsigned int *)a;
|
||||
unsigned int tmp_b = *(unsigned int *)b;
|
||||
@@ -351,7 +362,7 @@ int loop_cmp(const void * a, const void * b)
|
||||
|
||||
double get_rs_entropy(unsigned int * r_array, unsigned int r_index)
|
||||
{
|
||||
qsort(r_array, r_index, sizeof(unsigned int), loop_cmp);
|
||||
qsort(r_array, r_index, sizeof(unsigned int), cmp);
|
||||
unsigned int current_r = r_array[0];
|
||||
unsigned int * tmp_r = r_array;
|
||||
unsigned int count = 0;
|
||||
@@ -410,6 +421,28 @@ void sfh_tune_seg(IVI_seg_t * seg, void * user_para)
|
||||
fuzzy_handle_inner_t * _handle = (fuzzy_handle_inner_t *)user_para;
|
||||
unsigned long long blocksize = _handle->blocksize;
|
||||
|
||||
/* memcpy(&(p->ps_t),&(p->ps), sizeof(struct zt_state_t));
|
||||
memcpy(&(p->s_state_t),&(p->s_state), sizeof(struct zt_state_t));
|
||||
memcpy(&(p->r_state_t),&(p->r_state), sizeof(struct roll_state_t));
|
||||
|
||||
if(p->r_array_t!=NULL)
|
||||
{
|
||||
free(p->r_array_t);
|
||||
}
|
||||
p->r_cnt_t = p->r_cnt;
|
||||
p->r_size_t = p->r_size;
|
||||
p->r_array_t = (unsigned int *)malloc(sizeof(unsigned int)*(p->r_size_t));
|
||||
memcpy(p->r_array_t, p->r_array, sizeof(unsigned int)*(p->r_cnt_t));
|
||||
|
||||
if(p->s_array_t!=NULL)
|
||||
{
|
||||
free(p->s_array_t);
|
||||
}
|
||||
p->s_cnt_t = p->s_cnt;
|
||||
p->s_size_t = p->s_size;
|
||||
p->s_array_t = (struct zt_state_t *)malloc(sizeof(struct zt_state_t)*(p->s_size_t));
|
||||
memcpy(p->s_array_t, p->s_array, sizeof(struct zt_state_t)*(p->s_cnt_t));*/
|
||||
|
||||
struct zt_state_t tmp_zt;
|
||||
int new_zt_cnt=0;
|
||||
zt_hash_initial(&tmp_zt);
|
||||
@@ -591,12 +624,17 @@ int fuzzy_digest(fuzzy_handle_t * handle, char * result, unsigned int size)
|
||||
final_result * temp = (final_result *)malloc(sizeof(final_result));
|
||||
fuzzy_handle_inner_t* _handle=(fuzzy_handle_inner_t *)handle;
|
||||
temp->data = result;
|
||||
temp->size = size-1;
|
||||
temp->size = size;
|
||||
temp->offset = 0;
|
||||
temp->first_ZTH_offset = 0;
|
||||
temp->last_ZTH_offset = 0;
|
||||
temp->offset+=snprintf(temp->data,temp->size,"%llu:",_handle->blocksize);
|
||||
temp->offset += snprintf(temp->data,temp->size,"%llu:",_handle->blocksize);
|
||||
IVI_traverse(_handle->ivi, sfh_output_state, (void *) temp);
|
||||
_handle->blocksize*= MULTIPLE;
|
||||
IVI_traverse(_handle->ivi, sfh_tune_seg, (void *)_handle);
|
||||
temp->offset += snprintf(temp->data+temp->offset,temp->size,"#%llu:",_handle->blocksize);
|
||||
IVI_traverse(_handle->ivi, sfh_output_state, (void *) temp);
|
||||
//IVI_traverse(_handle->ivi, sfh_output_state_t, (void *) temp);
|
||||
result[temp->offset] = '\0';
|
||||
free(temp);
|
||||
temp = NULL;
|
||||
@@ -611,24 +649,24 @@ void sfh_output_state(IVI_seg_t * seg, void * user_para)
|
||||
char hash_result[node->r_cnt + 1];
|
||||
hash_result[node->r_cnt] = '\0';
|
||||
int i = 0, j = 0, to_copy_len=0,this_len=0;
|
||||
if(node->s_cnt==0&&!(seg->left==0&&node->s_cnt>0))
|
||||
if(node->s_cnt==0&&!(seg->left==0&&node->s_cnt > 0))
|
||||
{
|
||||
return;
|
||||
}
|
||||
memset(hash_result,0,sizeof(hash_result));
|
||||
if(seg->left == 0)
|
||||
{
|
||||
hash_result[j] = sfh_b64[zt_hash_code(&(node->ps)) & 0x3F];
|
||||
hash_result[j] = map_to64bytes[zt_hash_code(&(node->ps)) & 0x3F];
|
||||
j++;
|
||||
}
|
||||
for(i = 0; i < node->s_cnt; i++,j++)
|
||||
{
|
||||
hash_result[j] = sfh_b64[(node->s_array[i].val) & 0x3F];
|
||||
hash_result[j] = map_to64bytes[(node->s_array[i].val) & 0x3F];
|
||||
}
|
||||
hash_result[j+1]='\0';
|
||||
if(0!=memcmp(&(node->s_state),ZT_INIT_VAL,sizeof(ZT_INIT_VAL)))
|
||||
{
|
||||
result->last_char=sfh_b64[zt_hash_code(&(node->s_state)) & 0x3F];
|
||||
result->last_char=map_to64bytes[zt_hash_code(&(node->s_state)) & 0x3F];
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -646,6 +684,52 @@ void sfh_output_state(IVI_seg_t * seg, void * user_para)
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void sfh_output_state_t(IVI_seg_t * seg, void * user_para)
|
||||
{
|
||||
char buffer[2000];
|
||||
final_result * result = (final_result *)user_para;
|
||||
sfh_seg_t * node = (sfh_seg_t *)(seg->data);
|
||||
char hash_result[node->r_cnt_t + 1];
|
||||
hash_result[node->r_cnt_t] = '\0';
|
||||
int i = 0, j = 0, to_copy_len=0,this_len=0;
|
||||
if(node->s_cnt_t==0&&!(seg->left==0&&node->s_cnt_t > 0))
|
||||
{
|
||||
return;
|
||||
}
|
||||
memset(hash_result,0,sizeof(hash_result));
|
||||
if(seg->left == 0)
|
||||
{
|
||||
hash_result[j] = map_to64bytes[zt_hash_code(&(node->ps_t)) & 0x3F];
|
||||
j++;
|
||||
}
|
||||
for(i = 0; i < node->s_cnt_t; i++,j++)
|
||||
{
|
||||
hash_result[j] = map_to64bytes[(node->s_array_t[i].val) & 0x3F];
|
||||
}
|
||||
hash_result[j+1]='\0';
|
||||
if(0!=memcmp(&(node->s_state_t),ZT_INIT_VAL,sizeof(ZT_INIT_VAL)))
|
||||
{
|
||||
result->last_char=map_to64bytes[zt_hash_code(&(node->s_state_t)) & 0x3F];
|
||||
}
|
||||
else
|
||||
{
|
||||
result->last_char='\0';
|
||||
}
|
||||
hash_result[j+1] = '\0';
|
||||
this_len=snprintf(buffer,sizeof(buffer), "[%llu:%llu]",seg->left, seg->right);
|
||||
this_len+=j;
|
||||
// this_len++;
|
||||
to_copy_len=MIN(this_len,result->size-result->offset);
|
||||
memcpy(result->data+result->offset,hash_result,j);
|
||||
result->offset+=j;
|
||||
memcpy(result->data+result->offset,buffer,to_copy_len-j);
|
||||
result->offset += to_copy_len-j;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* <20><><EFBFBD><EFBFBD>fuzzy_hash<73>ĸ<EFBFBD><C4B8>ֳ<EFBFBD><D6B3><EFBFBD>
|
||||
*/
|
||||
@@ -685,10 +769,13 @@ void fuzzy_hash_length(IVI_seg_t * seg, void * user_para)
|
||||
char buffer[100];
|
||||
final_length * tmp = (final_length *)user_para;
|
||||
sfh_seg_t * node = (sfh_seg_t *)(seg->data);
|
||||
|
||||
sprintf(buffer, "[%llu:%llu]", seg->left, seg->right);
|
||||
|
||||
tmp->hash_length += node->r_cnt*sizeof(char) + strlen(buffer);
|
||||
if(node->s_cnt==0&&!(seg->left==0&&node->r_cnt > 0))
|
||||
{
|
||||
return;
|
||||
}
|
||||
snprintf(buffer, sizeof(buffer), "[%llu:%llu]", seg->left, seg->right);
|
||||
tmp->hash_length += 2*node->r_cnt*sizeof(char) + 2*strlen(buffer);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -46,17 +46,33 @@ typedef struct
|
||||
char pad[8-ROLLING_WINDOW+1];
|
||||
int slice_num;
|
||||
unsigned int msize;
|
||||
|
||||
struct zt_state_t ps; //partial strong hash value
|
||||
struct zt_state_t s_state;//strong hash state
|
||||
struct zt_state_t s_state; //strong hash state
|
||||
struct roll_state_t r_state;
|
||||
|
||||
struct zt_state_t ps_t;
|
||||
struct zt_state_t s_state_t;
|
||||
struct roll_state_t r_state_t;
|
||||
|
||||
|
||||
unsigned long long left_offset;
|
||||
unsigned long long right_offset;
|
||||
struct roll_state_t r_state;
|
||||
|
||||
unsigned int * r_array; //array to store rolling hash value
|
||||
unsigned int r_cnt;
|
||||
unsigned int r_size;
|
||||
struct zt_state_t * s_array; //array to store strong(Tillichi-Zemor) hash value
|
||||
unsigned int s_cnt; //always point to the next available position
|
||||
unsigned int s_size;
|
||||
|
||||
unsigned int * r_array_t;
|
||||
unsigned int r_cnt_t;
|
||||
unsigned int r_size_t;
|
||||
struct zt_state_t * s_array_t;
|
||||
unsigned int s_cnt_t;
|
||||
unsigned int s_size_t;
|
||||
|
||||
}sfh_seg_t;
|
||||
|
||||
|
||||
@@ -104,3 +120,4 @@ void sfh_output_state(IVI_seg_t * seg, void * user_para);
|
||||
void fuzzy_hash_length(IVI_seg_t * seg, void * user_para);
|
||||
unsigned long long fuzzy_status(fuzzy_handle_t * handle, int type);
|
||||
#endif
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ struct zt_state_t
|
||||
};
|
||||
};
|
||||
|
||||
const unsigned char table_char2matrix[256][4] =
|
||||
const unsigned char zt_multi_table[256][4] =
|
||||
{
|
||||
{76,28,128,81},{76,204,128,209},{204,128,209,81},{204,76,209,128},{238,209,196,115},{238,63,196,183},{63,209,183,115},{63,238,183,196},{230,196,193,123},{230,34,193,186},{34,196,186,123},{34,230,186,193},{0,183,175,76},{0,183,175,227},{183,183,227,76},{183,0,227,175},{228,193,192,121},{228,37,192,185},{37,193,185,121},{37,228,185,192},{18,186,164,75},{18,168,164,239},{168,186,239,75},{168,18,239,164},{15,175,169,67},{15,160,169,234},{160,175,234,67},{160,15,234,169},{151,227,247,108},{151,116,247,155},{116,227,155,108},{116,151,155,247},{228,192,193,121},{228,36,193,184},{36,192,184,121},{36,228,184,193},{22,185,167,74},{22,175,167,237},{175,185,237,74},{175,22,237,167},{30,164,162,71},{30,186,162,229},{186,164,229,71},{186,30,229,162},{136,239,250,107},{136,103,250,145},{103,239,145,107},{103,136,145,250},{12,169,169,64},{12,165,169,233},{165,169,233,64},{165,12,233,169},{138,234,251,105},{138,96,251,146},{96,234,146,105},{96,138,146,251},{159,247,243,100},{159,104,243,151},{104,247,151,100},{104,159,151,243},{71,155,133,95},{71,220,133,218},{220,155,218,95},{220,71,218,133},{230,193,196,123},{230,39,196,191},{39,193,191,123},{39,230,191,196},{20,184,160,73},{20,172,160,233},{172,184,233,73},{172,20,233,160},{25,167,167,69},{25,190,167,226},{190,167,226,69},{190,25,226,167},{141,237,253,104},{141,96,253,149},{96,237,149,104},{96,141,149,253},{30,162,164,71},{30,188,164,227},{188,162,227,71},{188,30,227,164},{144,229,240,109},{144,117,240,157},{117,229,157,109},{117,144,157,240},{130,250,251,97},{130,120,251,154},{120,250,154,97},{120,130,154,251},{84,145,137,88},{84,197,137,209},{197,145,209,88},{197,84,209,137},{15,169,175,67},{15,166,175,236},{166,169,236,67},{166,15,236,175},{143,233,253,106},{143,102,253,151},{102,233,151,106},{102,143,151,253},{130,251,250,97},{130,121,250,155},{121,251,155,97},{121,130,155,250},{80,146,138,89},{80,194,138,211},{194,146,211,89},{194,80,211,138},{159,243,247,100},{159,108,247,147},{108,243,147,100},{108,159,147,247},{87,151,137,91},{87,192,137,210},{192,151,210,91},{192,87,210,137},{72,133,133,80},{72,205,133,213},{205,133,213,80},{205,72,213,133},{246,218,207,117},{246,44,207,186},{44,218,186,117},{44,246,186,207},{238,196,209,115},{238,42,209,162},{42,196,162,115},{42,238,162,209},{24,191,191,68},{24,167,191,251},{167,191,251,68},{167,24,251,191},{20,160,184,73},{20,180,184,241},{180,160,241,73},{180,20,241,184},{134,233,236,99},{134,111,236,143},{111,233,143,99},{111,134,143,236},{22,167,185,74},{22,177,185,243},{177,167,243,74},{177,22,243,185},{156,226,227,103},{156,126,227,132},{126,226,132,103},{126,156,132,227},{143,253,233,106},{143,114,233,131},{114,253,131,106},{114,143,131,233},{95,149,147,87},{95,202,147,196},{202,149,196,87},{202,95,196,147},{18,164,186,75},{18,182,186,241},{182,164,241,75},{182,18,241,186},{156,227,226,103},{156,127,226,133},{127,227,133,103},{127,156,133,226},{144,240,229,109},{144,96,229,136},{96,240,136,109},{96,144,136,229},{74,157,155,82},{74,215,155,201},{215,157,201,82},{215,74,201,155},{138,251,234,105},{138,113,234,131},{113,251,131,105},{113,138,131,234},{72,154,154,81},{72,210,154,203},{210,154,203,81},{210,72,203,154},{87,137,151,91},{87,222,151,204},{222,137,204,91},{222,87,204,151},{231,209,213,122},{231,54,213,175},{54,209,175,122},{54,231,175,213},{0,175,183,76},{0,175,183,251},{175,175,251,76},{175,0,251,183},{134,236,233,99},{134,106,233,138},{106,236,138,99},{106,134,138,233},{141,253,237,104},{141,112,237,133},{112,253,133,104},{112,141,133,237},{89,151,151,85},{89,206,151,194},{206,151,194,85},{206,89,194,151},{136,250,239,107},{136,114,239,132},{114,250,132,107},{114,136,132,239},{74,155,157,82},{74,209,157,207},{209,155,207,82},{209,74,207,157},{80,138,146,89},{80,218,146,203},{218,138,203,89},{218,80,203,146},{226,211,210,121},{226,49,210,171},{49,211,171,121},{49,226,171,210},{151,247,227,108},{151,96,227,143},{96,247,143,108},{96,151,143,227},{95,147,149,87},{95,204,149,194},{204,147,194,87},{204,95,194,149},{84,137,145,88},{84,221,145,201},{221,137,201,88},{221,84,201,145},{226,210,211,121},{226,48,211,170},{48,210,170,121},{48,226,170,211},{71,133,155,95},{71,194,155,196},{194,133,196,95},{194,71,196,155},{231,213,209,122},{231,50,209,171},{50,213,171,122},{50,231,171,209},{246,207,218,117},{246,57,218,175},{57,207,175,117},{57,246,175,218},{28,186,186,69},{28,166,186,255},{166,186,255,69},{166,28,255,186}
|
||||
};
|
||||
@@ -124,7 +124,7 @@ static inline void zt_hash_arymul(struct zt_state_t * a, struct zt_state_t* b)
|
||||
}
|
||||
|
||||
/*
|
||||
** this function is used to create the table_char2matrix[4][256]
|
||||
** this function is used to create the table[4][256]
|
||||
*/
|
||||
/*void convert(int number, unsigned char * ret)
|
||||
{
|
||||
@@ -172,26 +172,26 @@ static inline void zt_hash_arymul(struct zt_state_t * a, struct zt_state_t* b)
|
||||
|
||||
|
||||
/*
|
||||
** this function is used to create table_char2matrix[4][256]
|
||||
** this function is used to create table[4][256]
|
||||
*/
|
||||
/*void zt_hash_create_table()
|
||||
{
|
||||
unsigned char ret[4]={0};
|
||||
int i = 0;
|
||||
FILE * fp;
|
||||
fp = fopen("/home/lixiang/zt_hash/table_char2matrix.txt","a");
|
||||
fp = fopen("/home/lixiang/zt_hash/table.txt","a");
|
||||
//galois_create_mult_tables(8); //it should not be a comment
|
||||
for(i = 0; i < 256; i++)
|
||||
{
|
||||
convert(i, ret);
|
||||
table_char2matrix[i].matrix[0] = ret[0];
|
||||
table_char2matrix[i].matrix[1] = ret[1];
|
||||
table_char2matrix[i].matrix[2] = ret[2];
|
||||
table_char2matrix[i].matrix[3] = ret[3];
|
||||
fprintf(fp, "{%d,%d,%d,%d},", table_char2matrix[i].matrix[0],
|
||||
table_char2matrix[i].matrix[1],
|
||||
table_char2matrix[i].matrix[2],
|
||||
table_char2matrix[i].matrix[3]);
|
||||
table[i].matrix[0] = ret[0];
|
||||
table[i].matrix[1] = ret[1];
|
||||
table[i].matrix[2] = ret[2];
|
||||
table[i].matrix[3] = ret[3];
|
||||
fprintf(fp, "{%d,%d,%d,%d},", table[i].matrix[0],
|
||||
table[i].matrix[1],
|
||||
table[i].matrix[2],
|
||||
table[i].matrix[3]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -207,7 +207,7 @@ void zt_hash_destroy_table()
|
||||
|
||||
inline void zt_hash(struct zt_state_t* array, unsigned char c)
|
||||
{
|
||||
zt_hash_arymul(array, (struct zt_state_t *)(table_char2matrix[c]));
|
||||
zt_hash_arymul(array, (struct zt_state_t *)(zt_multi_table[c]));
|
||||
}
|
||||
|
||||
unsigned char ZT_INIT_VAL[4]={1,0,0,1};
|
||||
|
||||
@@ -74,7 +74,7 @@ void dir_digest(int argc, char * argv[])
|
||||
}
|
||||
while((file = readdir(dir)) != NULL)
|
||||
{
|
||||
if(!strcmp(file->d_name, ".") ||!strcmp(file->d_name, "..")||file->d_type!=DT_REG)
|
||||
if(!strcmp(file->d_name, ".") ||!strcmp(file->d_name, ".."))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -180,8 +180,8 @@
|
||||
"table_type": "digest",
|
||||
"table_content": {
|
||||
"raw_len": 1160164,
|
||||
"digest": "12288:UChtbFS6pypdTy4m2[0:1160163]",
|
||||
"cfds_level": 3
|
||||
"digest": "3072:Xk/maCm4yLYtRIFDFnVfHH+CAQI6VD5mekDmaa/4qCuFnqak1s3/+Gn1IJHa/AvybUsbGWcIAy9grTp2s5bbj/TaKxONfb[0:1160163]#12288:UChtbFS6pypdTy4m2[0:1160163]",
|
||||
"cfds_level": 70
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -350,8 +350,7 @@
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
,
|
||||
},
|
||||
{
|
||||
"compile_id": 134,
|
||||
"service": 1,
|
||||
@@ -378,6 +377,31 @@
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"compile_id": 135,
|
||||
"service": 1,
|
||||
"action": 1,
|
||||
"do_blacklist": 1,
|
||||
"do_log": 1,
|
||||
"effective_rage": 0,
|
||||
"user_region": "anything",
|
||||
"is_valid": "yes",
|
||||
"groups": [
|
||||
{
|
||||
"group_name": "Untitled",
|
||||
"regions": [
|
||||
{
|
||||
"table_name": "SIM_URL",
|
||||
"table_type": "similar",
|
||||
"table_content": {
|
||||
"target": "mwss.xiu.youku.com/live/hls/v1/0000000000000000000000001526a0a8/709.ts?&token=98765",
|
||||
"threshold": 90
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"plugin_table": [
|
||||
|
||||
@@ -442,6 +442,25 @@ int test_expr_plus(Maat_feather_t feather,const char* table_name,scan_status_t*
|
||||
return ret;
|
||||
|
||||
}
|
||||
int test_string_similar_scan(Maat_feather_t feather,const char* table_name,scan_status_t* mid)
|
||||
{
|
||||
int ret=0;
|
||||
int table_id=0;
|
||||
struct Maat_rule_t result[4];
|
||||
const char* scan_data="mwss.xiu.youku.com/live/hls/v1/0000000000000000000000001526a0a8/714.ts?&token=98765";
|
||||
table_id=Maat_table_register(feather,table_name);
|
||||
if(table_id==-1)
|
||||
{
|
||||
printf("Database table %s register failed.\n",table_name);
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret=Maat_similar_scan_string(feather, table_id, scan_data, strlen(scan_data),
|
||||
result, 4,
|
||||
mid, 0);
|
||||
printf("Similar String Scan:%s\n",print_maat_result(result,ret));
|
||||
return ret;
|
||||
}
|
||||
int test_table_conjunction(Maat_feather_t feather,const char* table_name,const char* conj_table_name,scan_status_t* mid)
|
||||
{
|
||||
int ret=0;
|
||||
@@ -695,6 +714,9 @@ int main(int argc,char* argv[])
|
||||
test_str_stream_scan(feather,"HTTP_URL", &mid);
|
||||
Maat_clean_status(&mid);
|
||||
|
||||
test_string_similar_scan(feather,"SIM_URL",&mid);
|
||||
Maat_clean_status(&mid);
|
||||
|
||||
test_table_conjunction(feather, "HTTP_URL", "HTTP_HOST", &mid);
|
||||
Maat_clean_status(&mid);
|
||||
if(1==using_redis)
|
||||
|
||||
@@ -16,3 +16,4 @@
|
||||
6 QD_ENTRY_INFO plugin 4
|
||||
7 FILE_DIGEST digest --
|
||||
8 HTTP_REGION expr_plus GBK GBK no 0
|
||||
9 SIM_URL similar --
|
||||
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user