1)修复SFH摘要偏移量输出错误的bug,2)修复不同输入次数导致摘要值错误的bug,其原因是tune的次数由feed触发;
This commit is contained in:
@@ -30,7 +30,7 @@
|
||||
#include "stream_fuzzy_hash.h"
|
||||
#include "gram_index_engine.h"
|
||||
|
||||
int MAAT_FRAME_VERSION_2_1_20171011=1;
|
||||
int MAAT_FRAME_VERSION_2_1_20171107=1;
|
||||
|
||||
const char* CHARSET_STRING[]={"NONE","gbk","big5","unicode","utf8","bin",
|
||||
"unicode_ascii_esc","unicode_ascii_aligned","unicode_ncr_dec","unicode_ncr_hex","url_encode_gb2312","url_encode_utf8",""};
|
||||
|
||||
@@ -96,7 +96,6 @@ typedef struct
|
||||
unsigned long long hash_length;
|
||||
}final_length;
|
||||
|
||||
sfh_seg_t* create_sfh_seg(fuzzy_handle_inner_t * _handle);
|
||||
int destroy_sfh_seg(sfh_seg_t*p);
|
||||
unsigned long long get_blocksize(unsigned long long orilen);
|
||||
int sfh_merge_seg(fuzzy_handle_inner_t * _handle,sfh_seg_t * seg, sfh_seg_t * next_seg, unsigned long long blocksize);
|
||||
|
||||
@@ -21,7 +21,6 @@ const char * map_to64bytes =
|
||||
double get_rs_entropy(unsigned int * r_array, unsigned int r_index);
|
||||
int cmp(const void * a, const void * b);
|
||||
void sfh_rs_entropy(IVI_seg_t * seg, void * user_para);
|
||||
void sfh_tune_simulation(IVI_seg_t * seg, void * user_para);
|
||||
void sfh_output_state_t(IVI_seg_t * seg, void * user_para);
|
||||
int write_uint_array(unsigned int ** array, unsigned int *index,unsigned int *size,unsigned int value);
|
||||
/**
|
||||
@@ -117,46 +116,6 @@ void SFH_release(sfh_instance_t * handle)
|
||||
free((fuzzy_handle_inner_t *)handle);
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned int SFH_feed(sfh_instance_t * handle, const char * data, unsigned int size, unsigned long long offset)
|
||||
{
|
||||
fuzzy_handle_inner_t * _handle=(fuzzy_handle_inner_t *)handle;
|
||||
if(data == NULL || size == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
unsigned int length = segment_overlap(_handle, size, offset, data);
|
||||
_handle->effective_length += length;
|
||||
_handle->length_increase += length;
|
||||
if(_handle->s_state_cnt>EXPECT_SIGNATURE_LEN&&_handle->do_tune==1)
|
||||
{
|
||||
//printf("s_state_cnt before:%d\n", _handle->s_state_cnt);
|
||||
//printf("blocksize before:%llu\n", _handle->blocksize);
|
||||
unsigned long long check_length = (_handle->effective_length/_handle->s_state_cnt)*EXPECT_SIGNATURE_LEN;
|
||||
|
||||
if(_handle->length_increase > check_length)
|
||||
{
|
||||
IVI_traverse(_handle->ivi, sfh_tune_simulation, (void *)_handle);
|
||||
//printf("sim_rs_cnt:%d\n", _handle->sim_tuned_rs_cnt);
|
||||
if(_handle->sim_tuned_rs_cnt>EXPECT_SIGNATURE_LEN)
|
||||
{
|
||||
_handle->blocksize*= MULTIPLE;
|
||||
IVI_traverse(_handle->ivi, sfh_tune_callback, (void *)_handle);
|
||||
}
|
||||
_handle->sim_tuned_rs_cnt = 0;
|
||||
_handle->length_increase = 0;
|
||||
}
|
||||
//printf("s_state_cnt after:%d\n", _handle->s_state_cnt);
|
||||
//printf("blocksize after:%llu\n", _handle->blocksize);
|
||||
}
|
||||
#if 0
|
||||
SFH_digest(handle,result, sizeof(result));
|
||||
printf("%llu %s\n",offset,result);
|
||||
#endif
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
void sfh_tune_simulation(IVI_seg_t * seg, void * user_para)
|
||||
{
|
||||
sfh_seg_t * tmp = (sfh_seg_t *)(seg->data);
|
||||
@@ -171,6 +130,113 @@ void sfh_tune_simulation(IVI_seg_t * seg, void * user_para)
|
||||
}
|
||||
}
|
||||
}
|
||||
void sfh_tune_seg(sfh_seg_t * p, unsigned long long blocksize)
|
||||
{
|
||||
int i = 0, j = 0;
|
||||
struct zt_state_t tmp_zt;
|
||||
int new_zt_cnt=0;
|
||||
zt_hash_initial(&tmp_zt);
|
||||
|
||||
for(j = 0; j < p->r_cnt; j++)
|
||||
{
|
||||
if(j == 0)
|
||||
{
|
||||
zt_hash_arymul(&tmp_zt, &(p->p_state));
|
||||
}
|
||||
else
|
||||
{
|
||||
zt_hash_arymul(&tmp_zt, &(p->s_array[j - 1]));
|
||||
}
|
||||
if(p->r_array[j] % blocksize == blocksize - 1)
|
||||
{
|
||||
p->r_array[i]=p->r_array[j];
|
||||
i++;
|
||||
if(i>1)
|
||||
{
|
||||
p->s_array[new_zt_cnt].val=tmp_zt.val;
|
||||
new_zt_cnt++;
|
||||
}
|
||||
else
|
||||
{
|
||||
p->p_state.val=tmp_zt.val;
|
||||
}
|
||||
zt_hash_initial(&tmp_zt);
|
||||
}
|
||||
}
|
||||
zt_hash_arymul(&tmp_zt, &(p->s_state));
|
||||
if(i == 0)
|
||||
{
|
||||
zt_hash_initial(&(p->p_state));
|
||||
}
|
||||
p->s_state.val = tmp_zt.val;
|
||||
p->s_cnt = new_zt_cnt;
|
||||
p->r_cnt = i;
|
||||
assert(p->r_cnt>=p->s_cnt);
|
||||
}
|
||||
void sfh_tune_callback(IVI_seg_t * seg, void * user_para)
|
||||
{
|
||||
sfh_seg_t * p = (sfh_seg_t *)(seg->data);
|
||||
if(p->r_cnt== 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
fuzzy_handle_inner_t * _handle = (fuzzy_handle_inner_t *)user_para;
|
||||
unsigned long long blocksize = _handle->blocksize;
|
||||
_handle->s_state_cnt-=p->s_cnt;
|
||||
sfh_tune_seg(p, blocksize);
|
||||
_handle->s_state_cnt+=p->s_cnt;
|
||||
//printf("after state_cnt:%d,block:%llu\n",_handle->s_state_cnt,_handle->blocksize);
|
||||
}
|
||||
|
||||
void do_sfh_tune(sfh_instance_t * handle)
|
||||
{
|
||||
fuzzy_handle_inner_t * _handle=(fuzzy_handle_inner_t *)handle;
|
||||
do{
|
||||
_handle->sim_tuned_rs_cnt = 0;
|
||||
IVI_traverse(_handle->ivi, sfh_tune_simulation, (void *)_handle);
|
||||
if(_handle->sim_tuned_rs_cnt>EXPECT_SIGNATURE_LEN)
|
||||
{
|
||||
_handle->blocksize*= MULTIPLE;
|
||||
IVI_traverse(_handle->ivi, sfh_tune_callback, (void *)_handle);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
}while(_handle->s_state_cnt>EXPECT_SIGNATURE_LEN);
|
||||
return;
|
||||
}
|
||||
unsigned int SFH_feed(sfh_instance_t * handle, const char * data, unsigned int size, unsigned long long offset)
|
||||
{
|
||||
fuzzy_handle_inner_t * _handle=(fuzzy_handle_inner_t *)handle;
|
||||
if(data == NULL || size == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
unsigned int length = segment_overlap(_handle, size, offset, data);
|
||||
_handle->effective_length += length;
|
||||
_handle->length_increase += length;
|
||||
if(_handle->s_state_cnt>EXPECT_SIGNATURE_LEN&&_handle->do_tune==1)
|
||||
{
|
||||
unsigned long long check_length = (_handle->effective_length/_handle->s_state_cnt)*EXPECT_SIGNATURE_LEN;
|
||||
|
||||
if(_handle->length_increase > check_length)
|
||||
{
|
||||
do_sfh_tune(handle);
|
||||
_handle->length_increase = 0;
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
SFH_digest(handle,result, sizeof(result));
|
||||
printf("%llu %s\n",offset,result);
|
||||
#endif
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
unsigned long long get_blocksize(unsigned long long orilen)
|
||||
@@ -187,7 +253,7 @@ unsigned long long get_blocksize(unsigned long long orilen)
|
||||
// return BLOCKSIZE_MIN;
|
||||
}
|
||||
|
||||
sfh_seg_t* create_sfh_seg(fuzzy_handle_inner_t * _handle)
|
||||
sfh_seg_t* create_sfh_seg(fuzzy_handle_inner_t * _handle,unsigned long long offset)
|
||||
{
|
||||
sfh_seg_t*p=(sfh_seg_t*)calloc(sizeof(sfh_seg_t),1);
|
||||
roll_init(&(p->r_state));
|
||||
@@ -195,6 +261,7 @@ sfh_seg_t* create_sfh_seg(fuzzy_handle_inner_t * _handle)
|
||||
p->s_cnt=0;
|
||||
p->r_size = INIT_SIZE;
|
||||
p->r_cnt=0;
|
||||
p->left_offset=p->right_offset=offset;
|
||||
p->r_array = (unsigned int*)malloc(sizeof(unsigned int)*(p->r_size));
|
||||
_handle->fuzzy_node_memory+=sizeof(unsigned int)*(p->r_size);
|
||||
p->s_array = (struct zt_state_t*)malloc(sizeof(struct zt_state_t)*(p->s_size));
|
||||
@@ -254,7 +321,7 @@ unsigned int segment_overlap(fuzzy_handle_inner_t * _handle, unsigned int size,
|
||||
|
||||
if(overlap_segnum==0||offset<overlap_segs[0]->left)
|
||||
{
|
||||
sfh_seg=create_sfh_seg(_handle);
|
||||
sfh_seg=create_sfh_seg(_handle,offset);
|
||||
calc_begin=offset;
|
||||
if(overlap_segnum == 0)
|
||||
{
|
||||
@@ -389,64 +456,7 @@ double get_rs_entropy(unsigned int * r_array, unsigned int r_index)
|
||||
}
|
||||
|
||||
|
||||
void sfh_tune_seg(sfh_seg_t * p, unsigned long long blocksize)
|
||||
{
|
||||
int i = 0, j = 0;
|
||||
struct zt_state_t tmp_zt;
|
||||
int new_zt_cnt=0;
|
||||
zt_hash_initial(&tmp_zt);
|
||||
|
||||
for(j = 0; j < p->r_cnt; j++)
|
||||
{
|
||||
if(j == 0)
|
||||
{
|
||||
zt_hash_arymul(&tmp_zt, &(p->p_state));
|
||||
}
|
||||
else
|
||||
{
|
||||
zt_hash_arymul(&tmp_zt, &(p->s_array[j - 1]));
|
||||
}
|
||||
if(p->r_array[j] % blocksize == blocksize - 1)
|
||||
{
|
||||
p->r_array[i]=p->r_array[j];
|
||||
i++;
|
||||
if(i>1)
|
||||
{
|
||||
p->s_array[new_zt_cnt].val=tmp_zt.val;
|
||||
new_zt_cnt++;
|
||||
}
|
||||
else
|
||||
{
|
||||
p->p_state.val=tmp_zt.val;
|
||||
}
|
||||
zt_hash_initial(&tmp_zt);
|
||||
}
|
||||
}
|
||||
zt_hash_arymul(&tmp_zt, &(p->s_state));
|
||||
if(i == 0)
|
||||
{
|
||||
zt_hash_initial(&(p->p_state));
|
||||
}
|
||||
p->s_state.val = tmp_zt.val;
|
||||
p->s_cnt = new_zt_cnt;
|
||||
p->r_cnt = i;
|
||||
assert(p->r_cnt>=p->s_cnt);
|
||||
}
|
||||
void sfh_tune_callback(IVI_seg_t * seg, void * user_para)
|
||||
{
|
||||
sfh_seg_t * p = (sfh_seg_t *)(seg->data);
|
||||
if(p->r_cnt== 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
fuzzy_handle_inner_t * _handle = (fuzzy_handle_inner_t *)user_para;
|
||||
unsigned long long blocksize = _handle->blocksize;
|
||||
_handle->s_state_cnt-=p->s_cnt;
|
||||
sfh_tune_seg(p, blocksize);
|
||||
_handle->s_state_cnt+=p->s_cnt;
|
||||
//printf("after state_cnt:%d,block:%llu\n",_handle->s_state_cnt,_handle->blocksize);
|
||||
}
|
||||
|
||||
int write_uint_array(unsigned int ** array,unsigned int *index, unsigned int *size,unsigned int value)
|
||||
{
|
||||
@@ -568,7 +578,7 @@ int sfh_merge_seg(fuzzy_handle_inner_t * _handle, sfh_seg_t * p, sfh_seg_t * n,u
|
||||
}
|
||||
memcpy(&(p->r_state),&(n->r_state),sizeof(p->r_state));
|
||||
assert(p->r_cnt>=p->s_cnt);
|
||||
|
||||
p->right_offset=n->right_offset;
|
||||
return state_inc_cnt;
|
||||
}
|
||||
|
||||
@@ -676,8 +686,6 @@ void sfh_output_callback(IVI_seg_t * seg, void * user_para)
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* <20><><EFBFBD><EFBFBD>fuzzy_hash<73>ĸ<EFBFBD><C4B8>ֳ<EFBFBD><D6B3><EFBFBD>
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user