diff --git a/inc/stream_fuzzy_hash.h b/inc/stream_fuzzy_hash.h index 5afa9b4..9e85e81 100644 --- a/inc/stream_fuzzy_hash.h +++ b/inc/stream_fuzzy_hash.h @@ -1,16 +1,9 @@ -#ifndef _MESA_FUZZY_ -#define _MESA_FUZZY_ +#ifndef _STREAM_FUZZY_HASH_ +#define _STREAM_FUZZY_HASH_ /* * Copyright (C) MESA 2015 - * - * These functions allow a programmer to compute the fuzzy hashes - * (also called the context-triggered piecewise hashes) of - * buffer[s] of text. - * - * See also: - * ssdeep, and - * Identifying almost identical files using context triggered piecewise hashing + * */ diff --git a/src/entry/Maat_rule.cpp b/src/entry/Maat_rule.cpp index 5cb979c..1fa3f49 100644 --- a/src/entry/Maat_rule.cpp +++ b/src/entry/Maat_rule.cpp @@ -30,7 +30,7 @@ #include "stream_fuzzy_hash.h" #include "gram_index_engine.h" -int MAAT_FRAME_VERSION_2_1_20171011=1; +int MAAT_FRAME_VERSION_2_1_20171107=1; const char* CHARSET_STRING[]={"NONE","gbk","big5","unicode","utf8","bin", "unicode_ascii_esc","unicode_ascii_aligned","unicode_ncr_dec","unicode_ncr_hex","url_encode_gb2312","url_encode_utf8",""}; diff --git a/src/entry/sfh_internal.h b/src/entry/sfh_internal.h index 58fd214..83c8f13 100644 --- a/src/entry/sfh_internal.h +++ b/src/entry/sfh_internal.h @@ -96,7 +96,6 @@ typedef struct unsigned long long hash_length; }final_length; -sfh_seg_t* create_sfh_seg(fuzzy_handle_inner_t * _handle); int destroy_sfh_seg(sfh_seg_t*p); unsigned long long get_blocksize(unsigned long long orilen); int sfh_merge_seg(fuzzy_handle_inner_t * _handle,sfh_seg_t * seg, sfh_seg_t * next_seg, unsigned long long blocksize); diff --git a/src/entry/stream_fuzzy_hash.c b/src/entry/stream_fuzzy_hash.c index eb4dca5..48ed48c 100644 --- a/src/entry/stream_fuzzy_hash.c +++ b/src/entry/stream_fuzzy_hash.c @@ -21,7 +21,6 @@ const char * map_to64bytes = double get_rs_entropy(unsigned int * r_array, unsigned int r_index); int cmp(const void * a, const void * b); void sfh_rs_entropy(IVI_seg_t * seg, void * user_para); -void sfh_tune_simulation(IVI_seg_t * seg, void * user_para); void sfh_output_state_t(IVI_seg_t * seg, void * user_para); int write_uint_array(unsigned int ** array, unsigned int *index,unsigned int *size,unsigned int value); /** @@ -117,46 +116,6 @@ void SFH_release(sfh_instance_t * handle) free((fuzzy_handle_inner_t *)handle); return; } - -unsigned int SFH_feed(sfh_instance_t * handle, const char * data, unsigned int size, unsigned long long offset) -{ - fuzzy_handle_inner_t * _handle=(fuzzy_handle_inner_t *)handle; - if(data == NULL || size == 0) - { - return 0; - } - unsigned int length = segment_overlap(_handle, size, offset, data); - _handle->effective_length += length; - _handle->length_increase += length; - if(_handle->s_state_cnt>EXPECT_SIGNATURE_LEN&&_handle->do_tune==1) - { - //printf("s_state_cnt before:%d\n", _handle->s_state_cnt); - //printf("blocksize before:%llu\n", _handle->blocksize); - unsigned long long check_length = (_handle->effective_length/_handle->s_state_cnt)*EXPECT_SIGNATURE_LEN; - - if(_handle->length_increase > check_length) - { - IVI_traverse(_handle->ivi, sfh_tune_simulation, (void *)_handle); - //printf("sim_rs_cnt:%d\n", _handle->sim_tuned_rs_cnt); - if(_handle->sim_tuned_rs_cnt>EXPECT_SIGNATURE_LEN) - { - _handle->blocksize*= MULTIPLE; - IVI_traverse(_handle->ivi, sfh_tune_callback, (void *)_handle); - } - _handle->sim_tuned_rs_cnt = 0; - _handle->length_increase = 0; - } - //printf("s_state_cnt after:%d\n", _handle->s_state_cnt); - //printf("blocksize after:%llu\n", _handle->blocksize); - } -#if 0 - SFH_digest(handle,result, sizeof(result)); - printf("%llu %s\n",offset,result); -#endif - return length; -} - - void sfh_tune_simulation(IVI_seg_t * seg, void * user_para) { sfh_seg_t * tmp = (sfh_seg_t *)(seg->data); @@ -171,6 +130,113 @@ void sfh_tune_simulation(IVI_seg_t * seg, void * user_para) } } } +void sfh_tune_seg(sfh_seg_t * p, unsigned long long blocksize) +{ + int i = 0, j = 0; + struct zt_state_t tmp_zt; + int new_zt_cnt=0; + zt_hash_initial(&tmp_zt); + + for(j = 0; j < p->r_cnt; j++) + { + if(j == 0) + { + zt_hash_arymul(&tmp_zt, &(p->p_state)); + } + else + { + zt_hash_arymul(&tmp_zt, &(p->s_array[j - 1])); + } + if(p->r_array[j] % blocksize == blocksize - 1) + { + p->r_array[i]=p->r_array[j]; + i++; + if(i>1) + { + p->s_array[new_zt_cnt].val=tmp_zt.val; + new_zt_cnt++; + } + else + { + p->p_state.val=tmp_zt.val; + } + zt_hash_initial(&tmp_zt); + } + } + zt_hash_arymul(&tmp_zt, &(p->s_state)); + if(i == 0) + { + zt_hash_initial(&(p->p_state)); + } + p->s_state.val = tmp_zt.val; + p->s_cnt = new_zt_cnt; + p->r_cnt = i; + assert(p->r_cnt>=p->s_cnt); +} +void sfh_tune_callback(IVI_seg_t * seg, void * user_para) +{ + sfh_seg_t * p = (sfh_seg_t *)(seg->data); + if(p->r_cnt== 0) + { + return; + } + + fuzzy_handle_inner_t * _handle = (fuzzy_handle_inner_t *)user_para; + unsigned long long blocksize = _handle->blocksize; + _handle->s_state_cnt-=p->s_cnt; + sfh_tune_seg(p, blocksize); + _handle->s_state_cnt+=p->s_cnt; + //printf("after state_cnt:%d,block:%llu\n",_handle->s_state_cnt,_handle->blocksize); +} + +void do_sfh_tune(sfh_instance_t * handle) +{ + fuzzy_handle_inner_t * _handle=(fuzzy_handle_inner_t *)handle; + do{ + _handle->sim_tuned_rs_cnt = 0; + IVI_traverse(_handle->ivi, sfh_tune_simulation, (void *)_handle); + if(_handle->sim_tuned_rs_cnt>EXPECT_SIGNATURE_LEN) + { + _handle->blocksize*= MULTIPLE; + IVI_traverse(_handle->ivi, sfh_tune_callback, (void *)_handle); + } + else + { + break; + } + + }while(_handle->s_state_cnt>EXPECT_SIGNATURE_LEN); + return; +} +unsigned int SFH_feed(sfh_instance_t * handle, const char * data, unsigned int size, unsigned long long offset) +{ + fuzzy_handle_inner_t * _handle=(fuzzy_handle_inner_t *)handle; + if(data == NULL || size == 0) + { + return 0; + } + unsigned int length = segment_overlap(_handle, size, offset, data); + _handle->effective_length += length; + _handle->length_increase += length; + if(_handle->s_state_cnt>EXPECT_SIGNATURE_LEN&&_handle->do_tune==1) + { + unsigned long long check_length = (_handle->effective_length/_handle->s_state_cnt)*EXPECT_SIGNATURE_LEN; + + if(_handle->length_increase > check_length) + { + do_sfh_tune(handle); + _handle->length_increase = 0; + } + } +#if 0 + SFH_digest(handle,result, sizeof(result)); + printf("%llu %s\n",offset,result); +#endif + return length; +} + + + unsigned long long get_blocksize(unsigned long long orilen) @@ -187,7 +253,7 @@ unsigned long long get_blocksize(unsigned long long orilen) // return BLOCKSIZE_MIN; } -sfh_seg_t* create_sfh_seg(fuzzy_handle_inner_t * _handle) +sfh_seg_t* create_sfh_seg(fuzzy_handle_inner_t * _handle,unsigned long long offset) { sfh_seg_t*p=(sfh_seg_t*)calloc(sizeof(sfh_seg_t),1); roll_init(&(p->r_state)); @@ -195,6 +261,7 @@ sfh_seg_t* create_sfh_seg(fuzzy_handle_inner_t * _handle) p->s_cnt=0; p->r_size = INIT_SIZE; p->r_cnt=0; + p->left_offset=p->right_offset=offset; p->r_array = (unsigned int*)malloc(sizeof(unsigned int)*(p->r_size)); _handle->fuzzy_node_memory+=sizeof(unsigned int)*(p->r_size); p->s_array = (struct zt_state_t*)malloc(sizeof(struct zt_state_t)*(p->s_size)); @@ -254,7 +321,7 @@ unsigned int segment_overlap(fuzzy_handle_inner_t * _handle, unsigned int size, if(overlap_segnum==0||offsetleft) { - sfh_seg=create_sfh_seg(_handle); + sfh_seg=create_sfh_seg(_handle,offset); calc_begin=offset; if(overlap_segnum == 0) { @@ -389,64 +456,7 @@ double get_rs_entropy(unsigned int * r_array, unsigned int r_index) } -void sfh_tune_seg(sfh_seg_t * p, unsigned long long blocksize) -{ - int i = 0, j = 0; - struct zt_state_t tmp_zt; - int new_zt_cnt=0; - zt_hash_initial(&tmp_zt); - for(j = 0; j < p->r_cnt; j++) - { - if(j == 0) - { - zt_hash_arymul(&tmp_zt, &(p->p_state)); - } - else - { - zt_hash_arymul(&tmp_zt, &(p->s_array[j - 1])); - } - if(p->r_array[j] % blocksize == blocksize - 1) - { - p->r_array[i]=p->r_array[j]; - i++; - if(i>1) - { - p->s_array[new_zt_cnt].val=tmp_zt.val; - new_zt_cnt++; - } - else - { - p->p_state.val=tmp_zt.val; - } - zt_hash_initial(&tmp_zt); - } - } - zt_hash_arymul(&tmp_zt, &(p->s_state)); - if(i == 0) - { - zt_hash_initial(&(p->p_state)); - } - p->s_state.val = tmp_zt.val; - p->s_cnt = new_zt_cnt; - p->r_cnt = i; - assert(p->r_cnt>=p->s_cnt); -} -void sfh_tune_callback(IVI_seg_t * seg, void * user_para) -{ - sfh_seg_t * p = (sfh_seg_t *)(seg->data); - if(p->r_cnt== 0) - { - return; - } - - fuzzy_handle_inner_t * _handle = (fuzzy_handle_inner_t *)user_para; - unsigned long long blocksize = _handle->blocksize; - _handle->s_state_cnt-=p->s_cnt; - sfh_tune_seg(p, blocksize); - _handle->s_state_cnt+=p->s_cnt; - //printf("after state_cnt:%d,block:%llu\n",_handle->s_state_cnt,_handle->blocksize); -} int write_uint_array(unsigned int ** array,unsigned int *index, unsigned int *size,unsigned int value) { @@ -568,7 +578,7 @@ int sfh_merge_seg(fuzzy_handle_inner_t * _handle, sfh_seg_t * p, sfh_seg_t * n,u } memcpy(&(p->r_state),&(n->r_state),sizeof(p->r_state)); assert(p->r_cnt>=p->s_cnt); - + p->right_offset=n->right_offset; return state_inc_cnt; } @@ -676,8 +686,6 @@ void sfh_output_callback(IVI_seg_t * seg, void * user_para) return; } - - /** * 计算fuzzy_hash的各种长度 */ diff --git a/test/maat_test.cpp b/test/maat_test.cpp index 1dea4b7..bdc006d 100644 --- a/test/maat_test.cpp +++ b/test/maat_test.cpp @@ -1,4 +1,5 @@ #include "Maat_rule.h" +#include "stream_fuzzy_hash.h" #include "Maat_command.h" #include #include @@ -248,10 +249,9 @@ int test_ipv6_scan(Maat_feather_t feather,const char* table_name,scan_status_t* } return ret; } -int test_digest_scan(Maat_feather_t feather,const char* table_name,scan_status_t* mid) +int test_digest_scan(Maat_feather_t feather,const char* table_name,const char* file_name,scan_status_t* mid) { int table_id=0,ret=0; - const char* digest_test_file="./testdata/digest_test.data"; struct stat digest_fstat; unsigned long long read_size=0,scan_offset=0; char digest_test_buff[4096]={0}; @@ -264,13 +264,13 @@ int test_digest_scan(Maat_feather_t feather,const char* table_name,scan_status_t printf("registe table %s error.\n",table_name); return 0; } - ret=stat(digest_test_file,&digest_fstat); + ret=stat(file_name,&digest_fstat); if(ret!=0) { - printf("fstat %s error.\n",digest_test_file); + printf("fstat %s error.\n",file_name); return 0; } - FILE* fp=fopen(digest_test_file,"r"); + FILE* fp=fopen(file_name,"r"); if(fp!=NULL) { sp=Maat_stream_scan_digest_start(feather, table_id, digest_fstat.st_size, 0); @@ -289,7 +289,7 @@ int test_digest_scan(Maat_feather_t feather,const char* table_name,scan_status_t } else { - printf("fopen %s error.\n",digest_test_file); + printf("fopen %s error.\n",file_name); } Maat_stream_scan_string_end(&sp); return ret; @@ -808,6 +808,95 @@ void test_command(Maat_feather_t feather) } Maat_clean_status(&mid); } +#define FILE_CHUNK_SIZE 4096 +void test_sfh_digest(const char* filename) +{ + char * file_buff=NULL,*sfh_ordered=NULL,*sfh_unorder=NULL; + int read_size=0,ret=0,chunk_num=0,i=0,idx=0; + unsigned long long *offset=NULL; + unsigned long long file_size=0,tmp=0,hash_length=0; + FILE* fp=fopen(filename,"r"); + sfh_instance_t * fhandle = NULL; + struct stat file_info; + ret=stat(filename, &file_info); + if(ret!=0) + { + printf("%s stat file %s error.\n",__FUNCTION__,filename); + goto error_out; + } + file_size=file_info.st_size; + file_buff=(char*)malloc(file_size); + ret=fread(file_buff,1,file_size,fp); + if((unsigned long long)ret!=file_size) + { + printf("%s read file %s error.\n",__FUNCTION__,filename); + free(file_buff); + goto error_out; + } + chunk_num=file_size/FILE_CHUNK_SIZE; + if(file_size%FILE_CHUNK_SIZE==0) + { + chunk_num=file_size/FILE_CHUNK_SIZE; + } + else + { + chunk_num=file_size/FILE_CHUNK_SIZE+1; + } + offset=(unsigned long long*)malloc(sizeof(unsigned long long)*chunk_num); + for(i=0;ifile_size) + { + read_size=file_size-offset[i]; + } + else + { + read_size=FILE_CHUNK_SIZE; + } + SFH_feed(fhandle,file_buff+offset[i],read_size,offset[i]); + } + hash_length = SFH_status(fhandle, HASH_LENGTH); + sfh_unorder=(char*)malloc(hash_length); + SFH_digest(fhandle, sfh_unorder, hash_length); + //printf("%s %u %lf %s\n",path,digest_fstat.st_size,file_entropy,digest_result_buff); + SFH_release(fhandle); + if(0==strcmp(sfh_ordered,sfh_unorder)) + { + printf("Test SFH success.\n"); + } + else + { + printf("Test SFH failed.\n"); + } +error_out: + fclose(fp); + free(file_buff); + free(sfh_ordered); + free(sfh_unorder); + free(offset); +} void maat_test_print_usage(void) { printf("Maat Test Usage:\n"); @@ -830,6 +919,7 @@ int main(int argc,char* argv[]) const char* log_file="./test.log"; const char* stat_file="./scan_staus.log"; const char* decrypt_key="mesa2017wy"; + const char* test_digest_file="./testdata/digest_test.data"; int scan_interval_ms=10; int effective_interval_ms=10; @@ -913,7 +1003,7 @@ int main(int argc,char* argv[]) test_ipv6_scan(feather, "IP_CONFIG", &mid); Maat_clean_status(&mid); - test_digest_scan(feather,"FILE_DIGEST", &mid); + test_digest_scan(feather,"FILE_DIGEST",test_digest_file,&mid); Maat_clean_status(&mid); test_expr_plus(feather, "HTTP_REGION", &mid); @@ -945,6 +1035,7 @@ int main(int argc,char* argv[]) test_set_cmd_line(feather); test_add_ip_command(feather,"IP_CONFIG"); } + test_sfh_digest(test_digest_file); sleep(wait_second); Maat_burn_feather(feather);