diff --git a/inc/mesa_fuzzy.h b/inc/stream_fuzzy_hash.h similarity index 80% rename from inc/mesa_fuzzy.h rename to inc/stream_fuzzy_hash.h index ddd97c6..5afa9b4 100644 --- a/inc/mesa_fuzzy.h +++ b/inc/stream_fuzzy_hash.h @@ -25,22 +25,22 @@ extern "C" { #define EFFECTIVE_LENGTH 1 #define HASH_LENGTH 2 -// typedef fuzzy_handle_t void*; +// typedef sfh_instance_t void*; typedef struct { -}fuzzy_handle_t; +}sfh_instance_t; /** * create a fuzzy hash handle and return it. * @return [handle] */ -fuzzy_handle_t * fuzzy_create_handle(unsigned long long origin_len); +sfh_instance_t * SFH_instance(unsigned long long origin_len); /** * destroy context by a fuzzy hash handle. * @param handle [handle] */ -void fuzzy_destroy_handle(fuzzy_handle_t * handle); +void SFH_release(sfh_instance_t * handle); /** * Feed the function your data. @@ -51,7 +51,7 @@ void fuzzy_destroy_handle(fuzzy_handle_t * handle); * @param offset [offset] * @return [return effective data length in current feed] */ -unsigned int fuzzy_feed(fuzzy_handle_t * handle, const char* data, unsigned int size, unsigned long long offset); +unsigned int SFH_feed(sfh_instance_t * handle, const char* data, unsigned int size, unsigned long long offset); /** * Obtain the fuzzy hash values. @@ -62,7 +62,7 @@ unsigned int fuzzy_feed(fuzzy_handle_t * handle, const char* data, unsigned int * @param size [@result size] * @return [return zero on success, non-zero on error] */ -int fuzzy_digest(fuzzy_handle_t * handle, char* result, unsigned int size); +int SFH_digest(sfh_instance_t * handle, char* result, unsigned int size); /** * Obtain certain length of fuzzy hash status. @@ -74,7 +74,7 @@ int fuzzy_digest(fuzzy_handle_t * handle, char* result, unsigned int size); * HASH_LENGTH:Hash result length. * @return [length value] */ -unsigned long long fuzzy_status(fuzzy_handle_t * handle, int type); +unsigned long long SFH_status(sfh_instance_t * handle, int type); #ifdef __cplusplus diff --git a/src/entry/Maat_api.cpp b/src/entry/Maat_api.cpp index 817a4d8..e31fefb 100644 --- a/src/entry/Maat_api.cpp +++ b/src/entry/Maat_api.cpp @@ -1500,7 +1500,7 @@ stream_para_t Maat_stream_scan_digest_start(Maat_feather_t feather,int table_id, { struct _Maat_feather_t* _feather=(_Maat_feather_t*)feather; struct _Maat_scanner_t* scanner=NULL; - fuzzy_handle_t * tmp_fuzzy_handle=NULL; + sfh_instance_t * tmp_fuzzy_handle=NULL; struct _Maat_table_info_t *p_table=NULL; p_table=acqurie_table(_feather, table_id, TABLE_TYPE_DIGEST); if(p_table==NULL) @@ -1508,7 +1508,7 @@ stream_para_t Maat_stream_scan_digest_start(Maat_feather_t feather,int table_id, _feather->scan_err_cnt++; return NULL; } - tmp_fuzzy_handle=fuzzy_create_handle(total_len); + tmp_fuzzy_handle=SFH_instance(total_len); if(tmp_fuzzy_handle==NULL) { _feather->scan_err_cnt++; @@ -1584,7 +1584,7 @@ int Maat_stream_scan_digest(stream_para_t * stream_para, const char * data, int } aligment_int64_array_add(sp->feather->thread_call_cnt, sp->thread_num, 1); pthread_mutex_lock(&(sp->fuzzy_mutex)); - sp->acc_scan_len+=fuzzy_feed(sp->fuzzy_hash_handle, data, (unsigned int)data_len,offset); + sp->acc_scan_len+=SFH_feed(sp->fuzzy_hash_handle, data, (unsigned int)data_len,offset); pthread_mutex_unlock(&(sp->fuzzy_mutex)); do_query=REACH_QUERY_THRESH(sp->total_len, sp->acc_scan_len, sp->query_point,8); if(do_query==0) @@ -1592,7 +1592,7 @@ int Maat_stream_scan_digest(stream_para_t * stream_para, const char * data, int goto fast_out; } pthread_mutex_lock(&(sp->fuzzy_mutex)); - digest_len=fuzzy_status(sp->fuzzy_hash_handle, HASH_LENGTH); + digest_len=SFH_status(sp->fuzzy_hash_handle, HASH_LENGTH); pthread_mutex_unlock(&(sp->fuzzy_mutex)); if(digest_len==0) { @@ -1600,7 +1600,7 @@ int Maat_stream_scan_digest(stream_para_t * stream_para, const char * data, int } digest_buff=(char*)malloc(sizeof(char)*digest_len); pthread_mutex_lock(&(sp->fuzzy_mutex)); - fuzzy_digest(sp->fuzzy_hash_handle,digest_buff, digest_len); + SFH_digest(sp->fuzzy_hash_handle,digest_buff, digest_len); pthread_mutex_unlock(&(sp->fuzzy_mutex)); if(GIE_handle!=NULL) @@ -1661,7 +1661,7 @@ void Maat_stream_scan_digest_end(stream_para_t* stream_para) DEC_SCANNER_REF(scanner, sp->thread_num); } } - fuzzy_destroy_handle(sp->fuzzy_hash_handle); + SFH_release(sp->fuzzy_hash_handle); pthread_mutex_destroy(&(sp->fuzzy_mutex)); assert(sp->last_cache==NULL); assert(sp->scan_buff==NULL); diff --git a/src/entry/Maat_rule_internal.h b/src/entry/Maat_rule_internal.h index 9d832af..c74dde2 100644 --- a/src/entry/Maat_rule_internal.h +++ b/src/entry/Maat_rule_internal.h @@ -12,7 +12,7 @@ #include "rulescan.h" #include "hiredis.h" -#include "mesa_fuzzy.h" +#include "stream_fuzzy_hash.h" #include "gram_index_engine.h" #include "aligment_int64.h" #include @@ -314,7 +314,7 @@ struct _stream_para_t void* rs_stream_para; long acc_scan_len; unsigned long long total_len; - fuzzy_handle_t *fuzzy_hash_handle; + sfh_instance_t *fuzzy_hash_handle; pthread_mutex_t fuzzy_mutex; unsigned char query_point[8]; }; diff --git a/src/entry/Makefile b/src/entry/Makefile index 826d7fe..5c8f6ec 100644 --- a/src/entry/Makefile +++ b/src/entry/Makefile @@ -19,7 +19,7 @@ LIBMAAT = libmaatframe.a LIBMAAT_SO = libmaatframe.so OBJS=config_monitor.o Maat_rule.o Maat_api.o Maat_command.o Maat_stat.o UniversalBoolMatch.o dynamic_array.o\ - cJSON.o json2iris.o map_str2int.o interval_index.o gram_index_engine.o mesa_fuzzy.o rbtree.o + cJSON.o json2iris.o map_str2int.o interval_index.o gram_index_engine.o stream_fuzzy_hash.o rbtree.o .c.o: $(CC) -c $(CFLAGS) -I. $(H_DIR) $< diff --git a/src/entry/sfh_internal.h b/src/entry/sfh_internal.h index 86fb064..a259826 100644 --- a/src/entry/sfh_internal.h +++ b/src/entry/sfh_internal.h @@ -1,6 +1,6 @@ -#include -#include -#include +#include "zt_hash.h" +#include "interval_index.h" +#include "stream_fuzzy_hash.h" #ifndef __SFH_INTERNAL_H_INCLUDE_ #define __SFH_INTERNAL_H_INCLUDE_ @@ -106,6 +106,5 @@ unsigned int segment_overlap(fuzzy_handle_inner_t * handle, unsigned int size, u void sfh_tune_callback(IVI_seg_t * seg, void * user_para); void sfh_output_callback(IVI_seg_t * seg, void * user_para); void fuzzy_hash_length(IVI_seg_t * seg, void * user_para); -unsigned long long fuzzy_status(fuzzy_handle_t * handle, int type); #endif diff --git a/src/entry/mesa_fuzzy.c b/src/entry/stream_fuzzy_hash.c similarity index 97% rename from src/entry/mesa_fuzzy.c rename to src/entry/stream_fuzzy_hash.c index ab4161b..2b0b6e6 100644 --- a/src/entry/mesa_fuzzy.c +++ b/src/entry/stream_fuzzy_hash.c @@ -72,7 +72,7 @@ static inline unsigned int sum_hash(unsigned char c, unsigned int h) /** * 创建handle */ -fuzzy_handle_t * fuzzy_create_handle(unsigned long long origin_len) +sfh_instance_t * SFH_instance(unsigned long long origin_len) { fuzzy_handle_inner_t * handle = NULL; unsigned long long tmp_blksize = 0; @@ -94,7 +94,7 @@ fuzzy_handle_t * fuzzy_create_handle(unsigned long long origin_len) //handle->blocksize=tmp_blksize; handle->blocksize = 3; handle->do_tune=1; - return (fuzzy_handle_t *)handle; + return (sfh_instance_t *)handle; } @@ -110,7 +110,7 @@ void fuzzy_node_free(IVI_seg_t * seg, void * usr_para) } -void fuzzy_destroy_handle(fuzzy_handle_t * handle) +void SFH_release(sfh_instance_t * handle) { IVI_destroy(((fuzzy_handle_inner_t *)handle)->ivi, fuzzy_node_free, (void *)handle); ((fuzzy_handle_inner_t *)handle)->fuzzy_node_memory -= sizeof(fuzzy_handle_inner_t); @@ -118,7 +118,7 @@ void fuzzy_destroy_handle(fuzzy_handle_t * handle) return; } -unsigned int fuzzy_feed(fuzzy_handle_t * handle, const char * data, unsigned int size, unsigned long long offset) +unsigned int SFH_feed(sfh_instance_t * handle, const char * data, unsigned int size, unsigned long long offset) { fuzzy_handle_inner_t * _handle=(fuzzy_handle_inner_t *)handle; if(data == NULL || size == 0) @@ -150,7 +150,7 @@ unsigned int fuzzy_feed(fuzzy_handle_t * handle, const char * data, unsigned int //printf("blocksize after:%llu\n", _handle->blocksize); } #if 0 - fuzzy_digest(handle,result, sizeof(result)); + SFH_digest(handle,result, sizeof(result)); printf("%llu %s\n",offset,result); #endif return length; @@ -575,7 +575,7 @@ int sfh_merge_seg(fuzzy_handle_inner_t * _handle, sfh_seg_t * p, sfh_seg_t * n,u /** * 取出区间链表里面的hash_result值,并进行拼接,形成最后的result输出,并且满足abc[1:100]def[200:300]这种格式 */ -int fuzzy_digest(fuzzy_handle_t * handle, char * hash_buffer, unsigned int size) +int SFH_digest(sfh_instance_t * handle, char * hash_buffer, unsigned int size) { fuzzy_handle_inner_t* _handle=(fuzzy_handle_inner_t *)handle; unsigned int estimate_len=_handle->s_state_cnt+IVI_seg_cnt(_handle->ivi)*24+1; @@ -666,7 +666,7 @@ void sfh_output_callback(IVI_seg_t * seg, void * user_para) /** * 计算fuzzy_hash的各种长度 */ -unsigned long long fuzzy_status(fuzzy_handle_t * handle, int type) +unsigned long long SFH_status(sfh_instance_t * handle, int type) { unsigned long long length; fuzzy_handle_inner_t * _handle = (fuzzy_handle_inner_t *)(handle); diff --git a/test/digest_gen.c b/test/digest_gen.c index 7f0a81e..96232a9 100644 --- a/test/digest_gen.c +++ b/test/digest_gen.c @@ -1,12 +1,12 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include "mesa_fuzzy.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "stream_fuzzy_hash.h" void* entropy_start(void) { @@ -40,96 +40,60 @@ double entropy_stop(void* handle) free(handle); return (-sum); } - -void dir_digest(int argc, char * argv[]) +void hash_file(const char* path) { - - if(argc != 2) - { - printf("uasge: ./digest_gen [Dir]\n"); - exit(-1); - } - - DIR * dir; - struct dirent * file; - char * dir_path = argv[1]; - char read_buff[1024*4]; unsigned long long read_size=0,feed_offset=0; - dir = opendir(dir_path); - chdir(dir_path); - int ret =0; - unsigned int file_id = 1; - unsigned long hash_length=0,file_effective_length=0; - FILE * result_fp = NULL,*fp=NULL; - struct stat digest_fstat; - char * digest_result_buff=NULL; - const char* result_file="./digest_result.txt"; - result_fp = fopen(result_file,"a"); + char read_buff[1024*4]; void * entropy_handle=NULL; double file_entropy=0.0; - if(NULL == result_fp) + int hash_length; + char * digest_result_buff=NULL; + struct stat digest_fstat; + FILE* fp; + stat(path,&digest_fstat); + fp = fopen(path, "r"); + if(NULL == fp) { - printf("open file failed!"); - exit(-1); + printf("Open %s failed\n", path); + return; } - while((file = readdir(dir)) != NULL) + read_size=0; + feed_offset=0; + sfh_instance_t * fhandle = SFH_instance(0); + entropy_handle=entropy_start(); + while(0==feof(fp)) { - if(!strcmp(file->d_name, ".") ||!strcmp(file->d_name, "..")) - { - continue; - } - ret=stat(file->d_name,&digest_fstat); - if(ret!=0) - { - printf("fstat %s error.\n",file->d_name); - continue; - } - - off_t file_size = digest_fstat.st_size; - fp = fopen(file->d_name, "r"); - if(NULL == fp) - { - printf("Can't open file %s\n", file->d_name); - continue; - } - read_size=0; - feed_offset=0; - fuzzy_handle_t * fhandle = fuzzy_create_handle((unsigned long long)file_size); - entropy_handle=entropy_start(); - while(0==feof(fp)) - { - read_size=fread(read_buff,1,sizeof(read_buff),fp); - fuzzy_feed(fhandle,read_buff,read_size,feed_offset); - feed_offset+=read_size; - entropy_feed(entropy_handle,(const unsigned char*) read_buff, read_size); - } - file_entropy=entropy_stop(entropy_handle); - hash_length = fuzzy_status(fhandle, HASH_LENGTH); - file_effective_length = fuzzy_status(fhandle, EFFECTIVE_LENGTH); - digest_result_buff= (char *)malloc(sizeof(char) * (hash_length)); - if(fuzzy_digest(fhandle, digest_result_buff, hash_length) < 0) - { - printf("error\n"); - continue; - } - fprintf(result_fp, "%u\t%s\t%llu\t%lu\t%lf\n", file_id, file->d_name,file_size, hash_length,file_entropy); - fprintf(result_fp, "%s\n", digest_result_buff); - printf("%u %s\n", file_id,file->d_name); - file_id++; - fuzzy_destroy_handle(fhandle); - fclose(fp); - free(digest_result_buff); + read_size=fread(read_buff,1,sizeof(read_buff),fp); + SFH_feed(fhandle,read_buff,read_size,feed_offset); + feed_offset+=read_size; + entropy_feed(entropy_handle,(const unsigned char*) read_buff, read_size); } - fclose(result_fp); - closedir(dir); - printf("write result to %s\n", result_file); - + file_entropy=entropy_stop(entropy_handle); + hash_length = SFH_status(fhandle, HASH_LENGTH); + digest_result_buff= (char *)malloc(sizeof(char) * (hash_length)); + SFH_digest(fhandle, digest_result_buff, hash_length); + printf("%s %u %lf %s\n",path,digest_fstat.st_size,file_entropy,digest_result_buff); + SFH_release(fhandle); + free(digest_result_buff); + fclose(fp); } - int main(int argc, char * argv[]) { - dir_digest(argc, argv); - //overlap_test(argc, argv); + char path[256]; + if(argc == 2) + { + hash_file(argv[1]); + } + else if(NULL!=fgets(path,sizeof(path),stdin)) + { + hash_file(path); + } + else + { + printf("SFH uasge: ./digest_gen [Dir]\n"); + exit(-1); + } + return 0; }