2015-11-13 11:41:52 +08:00
|
|
|
|
#include <stdio.h>
|
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
#include <stdint.h>
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
#include <assert.h>
|
|
|
|
|
|
#include <math.h>
|
|
|
|
|
|
#include "mesa_fuzzy.h"
|
|
|
|
|
|
#include "interval_index.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define ROLLING_WINDOW 7
|
|
|
|
|
|
#define BLOCKSIZE_MIN 3
|
|
|
|
|
|
#define MAXSIZE 10000
|
|
|
|
|
|
#define HASH_PRIME 0x01000193
|
|
|
|
|
|
#define HASH_INIT 0x28021967
|
|
|
|
|
|
|
|
|
|
|
|
#define DEBUG (0)
|
|
|
|
|
|
|
|
|
|
|
|
struct roll_state
|
|
|
|
|
|
{
|
|
|
|
|
|
unsigned char window[ROLLING_WINDOW];
|
|
|
|
|
|
unsigned int h1, h2, h3;
|
|
|
|
|
|
unsigned int n;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
|
|
{
|
|
|
|
|
|
char * left_data; //ָ<><D6B8><EFBFBD><EFBFBD><EFBFBD>ݵ<EFBFBD>ͷָ<CDB7><D6B8>
|
|
|
|
|
|
unsigned int left_len; //<2F><><EFBFBD>߱<EFBFBD><DFB1><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݵij<DDB5><C4B3><EFBFBD>
|
|
|
|
|
|
|
|
|
|
|
|
char * hash_result; //<2F><><EFBFBD><EFBFBD>segment<6E><74>FNVֵ
|
|
|
|
|
|
unsigned long long left_offset;
|
|
|
|
|
|
unsigned long long right_offset;
|
|
|
|
|
|
|
|
|
|
|
|
struct roll_state * right_status_r; //<2F>ұ߽<D2B1><DFBD><EFBFBD>rollhash״̬
|
|
|
|
|
|
unsigned int right_status_shash; //<2F>ұ߽<D2B1><DFBD><EFBFBD>FNVֵ
|
|
|
|
|
|
unsigned int right_len;//<2F>ұ߽<D2B1><DFBD>ij<EFBFBD><C4B3><EFBFBD>
|
|
|
|
|
|
int slice_num;
|
|
|
|
|
|
|
|
|
|
|
|
}fuzzy_node;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
|
|
{
|
2015-11-30 16:44:23 +08:00
|
|
|
|
unsigned long long orilen;
|
|
|
|
|
|
IVI_t * ivi; //ÿһ<C3BF><D2BB>handle<6C><65><EFBFBD>汣<EFBFBD><E6B1A3>һ<EFBFBD><D2BB>IVIָ<49>룬һ<EBA3AC><D2BB>IVI<56><49><EFBFBD>汣<EFBFBD><E6B1A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD>ļ<EFBFBD><C4BC><EFBFBD><EFBFBD><EFBFBD>Ƭ
|
|
|
|
|
|
unsigned long long effective_length;
|
|
|
|
|
|
unsigned long long blocksize;
|
2015-11-13 11:41:52 +08:00
|
|
|
|
}fuzzy_handle_inner_t;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
|
|
{
|
|
|
|
|
|
char * head; //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>char<61><72><EFBFBD><EFBFBD>
|
|
|
|
|
|
unsigned int size;
|
|
|
|
|
|
unsigned int offset; //<2F><><EFBFBD>鳤<EFBFBD><E9B3A4>
|
|
|
|
|
|
unsigned long long first_FNV_offset;
|
|
|
|
|
|
unsigned long long last_FNV_offset;
|
|
|
|
|
|
}final_result;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
|
|
{
|
|
|
|
|
|
unsigned long long first_FNV_offset;
|
|
|
|
|
|
unsigned long long last_FNV_offset;
|
|
|
|
|
|
unsigned long long hash_length;
|
|
|
|
|
|
}final_length;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
unsigned int fuzzy_hash_calculate(IVI_seg_t * seg, const char * data, unsigned long long offset, unsigned long long blocksize);
|
|
|
|
|
|
void fuzzy_calculate_self(IVI_seg_t * seg, const char * data, unsigned long long offset, unsigned long long blocksize);
|
|
|
|
|
|
void fuzzy_calculate_self_with_prev(IVI_seg_t * prev_seg, IVI_seg_t * seg, const char * data, unsigned long long blocksize);
|
|
|
|
|
|
void fuzzy_modify_next(IVI_seg_t * seg, IVI_seg_t * next_seg, unsigned long long blocksize);
|
|
|
|
|
|
unsigned long long get_prev_continous_length(IVI_seg_t * seg);
|
|
|
|
|
|
unsigned int segment_overlap(fuzzy_handle_t * handle, fuzzy_node * fnode, unsigned int size, unsigned long long offset, const char * data);
|
|
|
|
|
|
void fuzzy_hash_merge(IVI_seg_t * seg, void * user_para);
|
|
|
|
|
|
void fuzzy_hash_merge_new(IVI_seg_t * seg, void * user_para);
|
|
|
|
|
|
void fuzzy_hash_length(IVI_seg_t * seg, void * user_para);
|
|
|
|
|
|
unsigned long long fuzzy_status(fuzzy_handle_t * handle, int type);
|
2015-11-30 16:44:23 +08:00
|
|
|
|
unsigned long long get_blocksize(unsigned long long orilen);
|
2015-11-13 11:41:52 +08:00
|
|
|
|
|
|
|
|
|
|
char * b64 =
|
|
|
|
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* roll_state<EFBFBD><EFBFBD>ʼ<EFBFBD><EFBFBD>
|
|
|
|
|
|
*/
|
|
|
|
|
|
static void roll_init(struct roll_state * self)
|
|
|
|
|
|
{
|
|
|
|
|
|
memset(self, 0, sizeof(struct roll_state));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>roll_hashֵ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ⲿ<EFBFBD><EFBFBD><EFBFBD>ݶ<EFBFBD>ȡ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
|
*/
|
|
|
|
|
|
static void roll_hash(struct roll_state * self, unsigned char c)
|
|
|
|
|
|
{
|
|
|
|
|
|
self->h2 -= self->h1;
|
|
|
|
|
|
self->h2 += ROLLING_WINDOW * (unsigned int)c;
|
|
|
|
|
|
|
|
|
|
|
|
self->h1 += (unsigned int)c;
|
|
|
|
|
|
self->h1 -= (unsigned int)self->window[self->n];
|
|
|
|
|
|
|
|
|
|
|
|
self->window[self->n] = c;
|
|
|
|
|
|
self->n++;
|
|
|
|
|
|
if (self->n == ROLLING_WINDOW)
|
|
|
|
|
|
self->n = 0;
|
|
|
|
|
|
self->h3 <<= 5;
|
|
|
|
|
|
self->h3 ^= c;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* <EFBFBD><EFBFBD><EFBFBD>㴰<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>roll_hashֵ<EFBFBD><EFBFBD>ÿ<EFBFBD><EFBFBD>roll_hashֵ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƭ
|
|
|
|
|
|
*/
|
|
|
|
|
|
static unsigned int roll_sum(const struct roll_state * self)
|
|
|
|
|
|
{
|
|
|
|
|
|
return self->h1 + self->h2 + self->h3;
|
|
|
|
|
|
/* return self->h1 + self->h2; */
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><EFBFBD>FNVֵ
|
|
|
|
|
|
*/
|
|
|
|
|
|
static unsigned int sum_hash(unsigned char c, unsigned int h)
|
|
|
|
|
|
{
|
|
|
|
|
|
return (h * HASH_PRIME) ^ c;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>handle
|
|
|
|
|
|
*/
|
|
|
|
|
|
fuzzy_handle_t * fuzzy_create_handle(unsigned long long origin_len)
|
|
|
|
|
|
{
|
2015-11-30 16:44:23 +08:00
|
|
|
|
fuzzy_handle_inner_t * handle = NULL;
|
|
|
|
|
|
unsigned long long tmp_blksize=get_blocksize(origin_len);
|
|
|
|
|
|
if(tmp_blksize==0)
|
|
|
|
|
|
{
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
}
|
|
|
|
|
|
handle = (fuzzy_handle_inner_t *)malloc(sizeof(fuzzy_handle_inner_t));
|
2015-11-13 11:41:52 +08:00
|
|
|
|
handle->orilen = origin_len;
|
2015-11-30 16:44:23 +08:00
|
|
|
|
handle->ivi = IVI_create();
|
|
|
|
|
|
handle->effective_length = 0;
|
|
|
|
|
|
handle->blocksize=tmp_blksize;
|
|
|
|
|
|
return (fuzzy_handle_t *)handle;
|
2015-11-13 11:41:52 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* IVI_destroy<EFBFBD>Ļص<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>IVI<EFBFBD>е<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
|
*/
|
|
|
|
|
|
void fuzzy_node_free(IVI_seg_t * seg, void * usr_para)
|
|
|
|
|
|
{
|
|
|
|
|
|
//printf("free seg[%lu, %lu]\n", seg->left, seg->right);
|
|
|
|
|
|
fuzzy_node * temp = (fuzzy_node*)(seg->data);
|
|
|
|
|
|
if(temp->left_data != NULL)
|
|
|
|
|
|
{
|
|
|
|
|
|
free(temp->left_data);
|
|
|
|
|
|
temp->left_data = NULL;
|
|
|
|
|
|
}
|
|
|
|
|
|
if(temp->hash_result != NULL)
|
|
|
|
|
|
{
|
|
|
|
|
|
free(temp->hash_result);
|
|
|
|
|
|
temp->hash_result = NULL;
|
|
|
|
|
|
}
|
|
|
|
|
|
free(temp->right_status_r);
|
|
|
|
|
|
temp->right_status_r = NULL;
|
|
|
|
|
|
free(temp);
|
|
|
|
|
|
temp = NULL;
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>handle
|
|
|
|
|
|
*/
|
|
|
|
|
|
void fuzzy_destroy_handle(fuzzy_handle_t * handle)
|
|
|
|
|
|
{
|
|
|
|
|
|
IVI_destroy(((fuzzy_handle_inner_t *)handle)->ivi, fuzzy_node_free, NULL);
|
|
|
|
|
|
free((fuzzy_handle_inner_t *)handle);
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҽ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݵ<EFBFBD>fuzzy_hashֵ
|
|
|
|
|
|
*/
|
|
|
|
|
|
unsigned int fuzzy_feed(fuzzy_handle_t * handle, const char * data, unsigned int size, unsigned long long offset)
|
|
|
|
|
|
{
|
2015-11-19 16:34:40 +08:00
|
|
|
|
if(data == NULL || size == 0)
|
2015-11-13 18:08:55 +08:00
|
|
|
|
{
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
2015-11-13 11:41:52 +08:00
|
|
|
|
fuzzy_node * node = (fuzzy_node *)calloc(sizeof(fuzzy_node), 1);
|
|
|
|
|
|
node->right_status_r = (struct roll_state *)calloc(sizeof (struct roll_state), 1);
|
|
|
|
|
|
roll_init(node->right_status_r);
|
|
|
|
|
|
node->slice_num = 0;
|
|
|
|
|
|
unsigned int length = segment_overlap(handle, node, size, offset, data);
|
|
|
|
|
|
if(offset == 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
((fuzzy_handle_inner_t *)handle)->effective_length += size - node->right_len;
|
|
|
|
|
|
return (size - node->right_len);
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
((fuzzy_handle_inner_t *)handle)->effective_length += length;
|
|
|
|
|
|
}
|
|
|
|
|
|
return length; //<2F><><EFBFBD><EFBFBD><EFBFBD>Ѿ<EFBFBD><D1BE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ч<EFBFBD><D0A7><EFBFBD><EFBFBD>
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
unsigned long long get_blocksize(unsigned long long orilen)
|
|
|
|
|
|
{
|
|
|
|
|
|
double tmp = orilen/(64 * BLOCKSIZE_MIN);
|
|
|
|
|
|
double index = floor(log(tmp)/log(2));
|
|
|
|
|
|
double tmp_t = pow(2, index);
|
|
|
|
|
|
unsigned long long blocksize = (unsigned long long)(tmp_t * BLOCKSIZE_MIN);
|
|
|
|
|
|
return blocksize;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* <EFBFBD>ж<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƿ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ѿ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>и<EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
|
*/
|
|
|
|
|
|
unsigned int segment_overlap(fuzzy_handle_t * handle, fuzzy_node * fnode, unsigned int size, unsigned long long offset, const char * data)
|
|
|
|
|
|
{
|
2015-11-30 16:44:23 +08:00
|
|
|
|
IVI_seg_t ** overlap_segs = NULL;
|
|
|
|
|
|
IVI_seg_t * seg = IVI_seg_malloc(offset, offset + size -1, (void *)fnode);
|
|
|
|
|
|
int overlap_segnum = 0;
|
|
|
|
|
|
unsigned int effective_length = 0;
|
|
|
|
|
|
unsigned int total_length = 0;
|
|
|
|
|
|
unsigned long long blocksize = ((fuzzy_handle_inner_t *)handle)->blocksize;
|
2015-11-13 11:41:52 +08:00
|
|
|
|
|
|
|
|
|
|
/*<2A><>ѯ<EFBFBD>Ƿ<EFBFBD><C7B7>и<EFBFBD><D0B8>ǣ<EFBFBD><C7A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD>и<EFBFBD><D0B8>ǣ<EFBFBD><C7A3><EFBFBD><EFBFBD>ظ<EFBFBD><D8B8>ǵ<EFBFBD>segment<6E><74>Ƭ<EFBFBD><C6AC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>û<EFBFBD>и<EFBFBD><D0B8>ǣ<EFBFBD><C7A3><EFBFBD><EFBFBD><EFBFBD>0*/
|
|
|
|
|
|
overlap_segnum = IVI_query(((fuzzy_handle_inner_t *)handle)->ivi, offset, offset + size - 1, &overlap_segs);
|
|
|
|
|
|
|
|
|
|
|
|
/*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵΪ<D6B5><CEAA><EFBFBD><EFBFBD><EFBFBD><EFBFBD>˵<EFBFBD><CBB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>IJ<EFBFBD><C4B2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>⣬<EFBFBD><E2A3AC>ӡ<EFBFBD><D3A1><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϣ*/
|
|
|
|
|
|
if(overlap_segnum < 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
printf("fragment info error!\n");
|
|
|
|
|
|
IVI_seg_free(seg, fuzzy_node_free, NULL);
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵΪ0<CEAA><30>˵<EFBFBD><CBB5>û<EFBFBD>и<EFBFBD><D0B8>ǵ<EFBFBD><C7B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֱ<EFBFBD>Ӳ<EFBFBD><D3B2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*/
|
|
|
|
|
|
if(overlap_segnum == 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
IVI_insert(((fuzzy_handle_inner_t *)handle)->ivi,seg);
|
|
|
|
|
|
effective_length = fuzzy_hash_calculate(seg, data, offset, blocksize);
|
|
|
|
|
|
|
|
|
|
|
|
total_length = seg->right - seg->left + 1;
|
|
|
|
|
|
return effective_length;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵΪ<D6B5><CEAA><EFBFBD>ǵ<EFBFBD>Ƭ<EFBFBD><C6AC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD><D2AA><EFBFBD>ݸ<EFBFBD><DDB8><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һһ<D2BB><D2BB><EFBFBD>д<EFBFBD><D0B4><EFBFBD>*/
|
|
|
|
|
|
int flag = 0;
|
|
|
|
|
|
int i;
|
|
|
|
|
|
for(i = 0; i < overlap_segnum; i++)
|
|
|
|
|
|
{
|
|
|
|
|
|
switch(IVI_relative_position(seg, overlap_segs[i]))
|
|
|
|
|
|
{
|
|
|
|
|
|
case LEFT_OVERLAP: //<2F>ǣ<F3B8B2B8><C7A3><EFBFBD>seg<65><67><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5>Ϊoverlap_seg<65><67><EFBFBD><EFBFBD>ֵ
|
|
|
|
|
|
{
|
|
|
|
|
|
seg->right = overlap_segs[i]->left - 1;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
case CONTAIN: //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ϵ<EFBFBD><CFB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Dz<EFBFBD><C7B2><EFBFBD>ֱ<EFBFBD>Ӳ<EFBFBD><D3B2>룬Ȼ<EBA3AC><C8BB><EFBFBD>ı<EFBFBD>seg<65><67><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>data<74>ƶ<EFBFBD><C6B6><EFBFBD>ָ<EFBFBD><D6B8><EFBFBD><EFBFBD>λ<EFBFBD><CEBB>
|
|
|
|
|
|
{
|
2015-11-19 16:34:40 +08:00
|
|
|
|
if(overlap_segs[i]->left >= 1 && overlap_segs[i]->left - 1 >= seg->left)
|
2015-11-13 11:41:52 +08:00
|
|
|
|
{
|
|
|
|
|
|
fuzzy_node * node = (fuzzy_node *)calloc(sizeof(fuzzy_node), 1);
|
|
|
|
|
|
memcpy(node, fnode, sizeof(fuzzy_node));
|
|
|
|
|
|
node->right_status_r = (struct roll_state *)calloc(sizeof (struct roll_state), 1);
|
|
|
|
|
|
roll_init(node->right_status_r);
|
|
|
|
|
|
IVI_seg_t * thseg = IVI_seg_malloc(seg->left, overlap_segs[i]->left - 1, (void *)node);
|
|
|
|
|
|
IVI_insert(((fuzzy_handle_inner_t *)handle)->ivi,thseg);
|
|
|
|
|
|
effective_length += fuzzy_hash_calculate(thseg, data, offset, blocksize);
|
|
|
|
|
|
total_length += thseg->right - thseg->left + 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
seg->left = overlap_segs[i]->right + 1;
|
|
|
|
|
|
data = data + ((seg->left) - offset);
|
|
|
|
|
|
offset = seg->left;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
case RIGHT_OVERLAP: //<2F>Ҹ<EFBFBD><D2B8>ǣ<EFBFBD><C7A3><EFBFBD>seg<65><67><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5>Ϊoverlap_seg<65><67><EFBFBD><EFBFBD>ֵ
|
|
|
|
|
|
{
|
|
|
|
|
|
seg->left = overlap_segs[i]->right + 1;
|
|
|
|
|
|
data = data + ((seg->left) - offset);
|
|
|
|
|
|
offset = seg->left;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
case CONTAINED: //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֱ<EFBFBD><D6B1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƭ
|
|
|
|
|
|
{
|
|
|
|
|
|
flag = 1;
|
|
|
|
|
|
//printf("contained! free seg\n");
|
|
|
|
|
|
IVI_seg_free(seg, fuzzy_node_free, NULL);
|
|
|
|
|
|
free(overlap_segs);
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
default:
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
if(flag == 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*<2A><><EFBFBD><EFBFBD><EFBFBD>µ<EFBFBD><C2B5><EFBFBD><EFBFBD>ݲ<EFBFBD><DDB2>뵽<EFBFBD><EBB5BD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>棬<EFBFBD><E6A3AC><EFBFBD>ҽ<EFBFBD><D2BD>м<EFBFBD><D0BC><EFBFBD>*/
|
|
|
|
|
|
if(seg->left <= seg->right)
|
|
|
|
|
|
{
|
|
|
|
|
|
IVI_insert(((fuzzy_handle_inner_t *)handle)->ivi, seg);
|
|
|
|
|
|
effective_length += fuzzy_hash_calculate(seg, data, offset, blocksize);
|
|
|
|
|
|
total_length += seg->right - seg->left + 1;
|
|
|
|
|
|
//((fuzzy_handle_inner_t *)handle)->effective_length += effective_length;
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
IVI_seg_free(seg, fuzzy_node_free, NULL);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
free(overlap_segs);
|
|
|
|
|
|
return effective_length;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǰ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>fuzzy_hashֵ
|
|
|
|
|
|
*/
|
|
|
|
|
|
unsigned int fuzzy_hash_calculate(IVI_seg_t * seg, const char * data, unsigned long long offset, unsigned long long blocksize)
|
|
|
|
|
|
{
|
|
|
|
|
|
IVI_seg_t * prev_seg;
|
|
|
|
|
|
IVI_seg_t * next_seg;
|
|
|
|
|
|
unsigned int effective_length = 0;
|
|
|
|
|
|
|
|
|
|
|
|
prev_seg = IVI_prev_continuous_seg(seg);
|
|
|
|
|
|
next_seg = IVI_next_continuous_seg(seg);
|
|
|
|
|
|
//printf("seg->right = %lu, seg->left = %lu\n", seg->right, seg->left);
|
|
|
|
|
|
unsigned int size = seg->right - seg->left + 1;
|
|
|
|
|
|
fuzzy_node * node = (fuzzy_node *)(seg->data);
|
|
|
|
|
|
if(NULL == prev_seg)
|
|
|
|
|
|
{
|
|
|
|
|
|
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC>ֱ<EFBFBD>ӳ<EFBFBD>ʼ<EFBFBD><CABC>roll_state<74><65><EFBFBD>м<EFBFBD><D0BC><EFBFBD>
|
|
|
|
|
|
roll_init(node->right_status_r);
|
|
|
|
|
|
fuzzy_calculate_self(seg, data, offset, blocksize);
|
|
|
|
|
|
effective_length = size - node->left_len;
|
|
|
|
|
|
node->left_offset = offset + node->left_len;
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC>ȡ<EFBFBD><C8A1>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC><EFBFBD>ұ߽<D2B1><DFBD><EFBFBD><EFBFBD>м<EFBFBD>״ֵ̬<CCAC><D6B5><EFBFBD>м<EFBFBD><D0BC><EFBFBD>
|
|
|
|
|
|
|
|
|
|
|
|
fuzzy_calculate_self_with_prev(prev_seg, seg, data, blocksize);
|
|
|
|
|
|
effective_length = size + ((fuzzy_node *)(prev_seg->data))->right_len;
|
|
|
|
|
|
node->left_offset = offset - ((fuzzy_node *)(prev_seg->data))->right_len;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* <20><><EFBFBD><EFBFBD><EFBFBD>к<EFBFBD><D0BA><EFBFBD>Ƭ,<2C><><EFBFBD><EFBFBD><EFBFBD>Լ<EFBFBD><D4BC><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ľ<EFBFBD><C4BD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD><D2AA>Ƭ,<2C><><EFBFBD>ĺ<DEB8><C4BA><EFBFBD><EFBFBD>ķ<EFBFBD>Ƭ */
|
|
|
|
|
|
if(next_seg != NULL)
|
|
|
|
|
|
{
|
|
|
|
|
|
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ں<EFBFBD><DABA><EFBFBD>Ƭ<EFBFBD><C6AC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><C6AC><EFBFBD>ұ߽<D2B1><DFBD><EFBFBD><EFBFBD>м<EFBFBD>״ֵ̬ȡ<D6B5><C8A1><EFBFBD><EFBFBD><EFBFBD>ͺ<EFBFBD><CDBA><EFBFBD>Ƭ<EFBFBD><C6AC><EFBFBD><EFBFBD><EFBFBD>߽<EFBFBD><DFBD><EFBFBD><EFBFBD>м<EFBFBD>״̬<D7B4><CCAC><EFBFBD>м<EFBFBD><D0BC><EFBFBD>
|
|
|
|
|
|
fuzzy_modify_next(seg, next_seg, blocksize);
|
|
|
|
|
|
|
|
|
|
|
|
effective_length += ((fuzzy_node *)(next_seg->data))->left_len;
|
|
|
|
|
|
node->right_offset = offset + size + ((fuzzy_node *)(next_seg->data))->left_len;
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
effective_length -= node->right_len;
|
|
|
|
|
|
node->right_offset = offset + (size - (node->right_len));
|
|
|
|
|
|
}
|
|
|
|
|
|
return effective_length;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void fuzzy_calculate_self(IVI_seg_t * seg, const char * data, unsigned long long offset, unsigned long long blocksize)
|
|
|
|
|
|
{
|
|
|
|
|
|
fuzzy_node * node = (fuzzy_node *)(seg->data);
|
|
|
|
|
|
struct roll_state * rs = node->right_status_r;
|
|
|
|
|
|
unsigned long long size = seg->right - seg->left + 1;
|
|
|
|
|
|
unsigned int FNV_hash_value = HASH_INIT;
|
|
|
|
|
|
|
|
|
|
|
|
char * FNV_hash = (char *)malloc(sizeof(char)*size);
|
2015-11-30 16:44:23 +08:00
|
|
|
|
unsigned long long fnv_index = 0, i=0, last_slice_index=0;
|
2015-11-13 11:41:52 +08:00
|
|
|
|
unsigned int roll_hash_value;
|
|
|
|
|
|
for(i = 0; i < size; i++)
|
|
|
|
|
|
{
|
|
|
|
|
|
roll_hash(rs, data[i]);
|
|
|
|
|
|
roll_hash_value = roll_sum(rs);
|
|
|
|
|
|
FNV_hash_value = sum_hash(data[i], FNV_hash_value);
|
|
|
|
|
|
if(i >= ROLLING_WINDOW - 1 && roll_hash_value % blocksize == blocksize - 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
node->slice_num ++;
|
|
|
|
|
|
|
|
|
|
|
|
if(node->slice_num == 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
node->left_len = i + 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
last_slice_index = i;
|
|
|
|
|
|
/* <20><><EFBFBD><EFBFBD>FNV<4E><56>ֵ */
|
|
|
|
|
|
FNV_hash[fnv_index ++] = b64[FNV_hash_value % 64];
|
|
|
|
|
|
//printf("data[%lu]=%c, FNV_hash = %c\n", i, data[i], b64[FNV_hash_value % 64]);
|
|
|
|
|
|
FNV_hash_value = HASH_INIT;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* һƬ<D2BB><C6AC>û<EFBFBD><C3BB><EFBFBD>ҵ<EFBFBD> */
|
|
|
|
|
|
if(node->slice_num == 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
node->left_len = size;
|
|
|
|
|
|
node->right_len = 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
node->right_len = size - last_slice_index - 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
node->right_status_shash = FNV_hash_value;
|
|
|
|
|
|
|
|
|
|
|
|
/* <20><><EFBFBD>ƽ<EFBFBD><C6BD><EFBFBD><EFBFBD><EFBFBD>hash_result<6C><74> */
|
|
|
|
|
|
node->hash_result = (char *)malloc(sizeof(char) * (fnv_index + 1));
|
|
|
|
|
|
memcpy(node->hash_result, FNV_hash, fnv_index);
|
|
|
|
|
|
(node->hash_result)[fnv_index] = '\0';
|
|
|
|
|
|
|
|
|
|
|
|
node->left_data = (char *)malloc(sizeof(char) * (node->left_len));
|
|
|
|
|
|
memcpy(node->left_data, data, node->left_len);
|
|
|
|
|
|
|
|
|
|
|
|
free(FNV_hash);
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
unsigned long long get_prev_continous_length(IVI_seg_t * seg)
|
|
|
|
|
|
{
|
|
|
|
|
|
unsigned long long length = 0;
|
|
|
|
|
|
IVI_seg_t * temp = seg;
|
|
|
|
|
|
while(temp != NULL)
|
|
|
|
|
|
{
|
|
|
|
|
|
length += temp->right - temp->left + 1;
|
|
|
|
|
|
if(length >= ROLLING_WINDOW)
|
|
|
|
|
|
return length;
|
|
|
|
|
|
temp = IVI_prev_continuous_seg(temp);
|
|
|
|
|
|
}
|
|
|
|
|
|
return length;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǰ<EFBFBD>εı<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
|
*/
|
|
|
|
|
|
void fuzzy_calculate_self_with_prev(IVI_seg_t * prev_seg, IVI_seg_t * seg, const char * data, unsigned long long blocksize)
|
|
|
|
|
|
{
|
|
|
|
|
|
fuzzy_node * prev_node = (fuzzy_node *)(prev_seg->data);
|
|
|
|
|
|
fuzzy_node * node = (fuzzy_node *)(seg->data);
|
|
|
|
|
|
|
|
|
|
|
|
/* ʹ<><CAB9>ǰ<EFBFBD>ε<EFBFBD>roll state */
|
|
|
|
|
|
memcpy(node->right_status_r, prev_node->right_status_r, sizeof(struct roll_state));
|
|
|
|
|
|
struct roll_state * rs = node->right_status_r;
|
|
|
|
|
|
unsigned long long size = seg->right - seg->left + 1;
|
|
|
|
|
|
unsigned int FNV_hash_value = prev_node->right_status_shash;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
char * FNV_hash = (char *)malloc(sizeof(char)*size);
|
2015-11-30 16:44:23 +08:00
|
|
|
|
unsigned long long fnv_index = 0, i=0, last_slice_index=0;
|
2015-11-13 11:41:52 +08:00
|
|
|
|
unsigned int roll_hash_value;
|
|
|
|
|
|
unsigned long long prev_len = get_prev_continous_length(prev_seg);
|
|
|
|
|
|
|
|
|
|
|
|
for(i = 0; i < size; i++)
|
|
|
|
|
|
{
|
|
|
|
|
|
roll_hash(rs, data[i]);
|
|
|
|
|
|
roll_hash_value = roll_sum(rs);
|
|
|
|
|
|
FNV_hash_value = sum_hash(data[i], FNV_hash_value);
|
|
|
|
|
|
if(i + prev_len >= ROLLING_WINDOW \
|
|
|
|
|
|
&& roll_hash_value % blocksize == blocksize - 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
node->slice_num ++;
|
|
|
|
|
|
if(node->slice_num == 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
node->left_len = i + 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
last_slice_index = i;
|
|
|
|
|
|
/* <20><><EFBFBD><EFBFBD>FNV<4E><56>ֵ */
|
|
|
|
|
|
FNV_hash[fnv_index ++] = b64[FNV_hash_value % 64];
|
|
|
|
|
|
//printf("data[%lu]=%c, FNV_hash = %c\n", i, data[i], b64[FNV_hash_value % 64]);
|
|
|
|
|
|
FNV_hash_value = HASH_INIT;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* һƬ<D2BB><C6AC>û<EFBFBD><C3BB><EFBFBD>ҵ<EFBFBD> */
|
|
|
|
|
|
if(node->slice_num == 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
node->left_len = size;
|
|
|
|
|
|
node->right_len = 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
node->right_len = size - last_slice_index - 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
node->right_status_shash = FNV_hash_value;
|
|
|
|
|
|
|
|
|
|
|
|
/* <20><><EFBFBD>ƽ<EFBFBD><C6BD><EFBFBD><EFBFBD><EFBFBD>hash_result<6C><74> */
|
|
|
|
|
|
node->hash_result = (char *)malloc(sizeof(char) * (fnv_index + 1));
|
|
|
|
|
|
memcpy(node->hash_result, FNV_hash, fnv_index);
|
|
|
|
|
|
(node->hash_result)[fnv_index] = '\0';
|
|
|
|
|
|
|
|
|
|
|
|
node->left_data = (char *)malloc(sizeof(char) * (node->left_len));
|
|
|
|
|
|
memcpy(node->left_data, data, node->left_len);
|
|
|
|
|
|
|
|
|
|
|
|
free(FNV_hash);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void fuzzy_modify_self_with_prev(IVI_seg_t * prev_seg, IVI_seg_t * seg, char * data, unsigned long long blocksize)
|
|
|
|
|
|
{
|
|
|
|
|
|
fuzzy_node * prev_node = (fuzzy_node *)(prev_seg->data);
|
|
|
|
|
|
fuzzy_node * node = (fuzzy_node *)(seg->data);
|
|
|
|
|
|
|
|
|
|
|
|
/* ʹ<><CAB9>ǰ<EFBFBD>ε<EFBFBD>roll state */
|
|
|
|
|
|
memcpy(node->right_status_r, prev_node->right_status_r, sizeof(struct roll_state));
|
|
|
|
|
|
struct roll_state * rs = node->right_status_r;
|
|
|
|
|
|
unsigned long long size = seg->right - seg->left + 1;
|
|
|
|
|
|
unsigned int FNV_hash_value = prev_node->right_status_shash;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
char * FNV_hash = (char *)malloc(sizeof(char)*size);
|
2015-11-30 16:44:23 +08:00
|
|
|
|
unsigned long long fnv_index = 0, i=0, last_slice_index=0;
|
2015-11-13 11:41:52 +08:00
|
|
|
|
unsigned int roll_hash_value;
|
|
|
|
|
|
unsigned long long prev_len = get_prev_continous_length(prev_seg);
|
|
|
|
|
|
for(i = 0; i < size; i++)
|
|
|
|
|
|
{
|
|
|
|
|
|
roll_hash(rs, data[i]);
|
|
|
|
|
|
roll_hash_value = roll_sum(rs);
|
|
|
|
|
|
FNV_hash_value = sum_hash(data[i], FNV_hash_value);
|
|
|
|
|
|
if(i + prev_len >= ROLLING_WINDOW \
|
|
|
|
|
|
&& roll_hash_value % blocksize == blocksize- 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
node->slice_num ++;
|
|
|
|
|
|
if(node->slice_num == 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
node->left_len = i + 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
last_slice_index = i;
|
|
|
|
|
|
/* <20><><EFBFBD><EFBFBD>FNV<4E><56>ֵ */
|
|
|
|
|
|
FNV_hash[fnv_index ++] = b64[FNV_hash_value % 64];
|
|
|
|
|
|
//printf("data[%lu]=%c, FNV_hash = %c\n", i, data[i], b64[FNV_hash_value % 64]);
|
|
|
|
|
|
FNV_hash_value = HASH_INIT;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* һƬ<D2BB><C6AC>û<EFBFBD><C3BB><EFBFBD>ҵ<EFBFBD> */
|
|
|
|
|
|
if(node->slice_num == 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
node->left_len = size;
|
|
|
|
|
|
node->right_len = 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
node->right_len = size - last_slice_index - 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
node->right_status_shash = FNV_hash_value;
|
|
|
|
|
|
|
|
|
|
|
|
/* <20><><EFBFBD>ƽ<EFBFBD><C6BD><EFBFBD><EFBFBD><EFBFBD>hash_result<6C><74> */
|
|
|
|
|
|
free(node->hash_result);
|
|
|
|
|
|
node->hash_result = (char *)malloc(sizeof(char) * (fnv_index + 1));
|
|
|
|
|
|
memcpy(node->hash_result, FNV_hash, fnv_index);
|
|
|
|
|
|
(node->hash_result)[fnv_index] = '\0';
|
|
|
|
|
|
|
|
|
|
|
|
//printf("old node->left_data = %s\n", node->left_data);
|
|
|
|
|
|
free(node->left_data);
|
|
|
|
|
|
node->left_data = (char *)malloc(sizeof(char) * (node->left_len));
|
|
|
|
|
|
memcpy(node->left_data, data, node->left_len);
|
|
|
|
|
|
//printf("new node->left_data = %s\n", node->left_data);
|
|
|
|
|
|
free(FNV_hash);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>εı<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
|
*/
|
|
|
|
|
|
void fuzzy_modify_next(IVI_seg_t * seg, IVI_seg_t * next_seg, unsigned long long blocksize)
|
|
|
|
|
|
{
|
|
|
|
|
|
IVI_seg_t * tmp_curr_seg = seg;
|
|
|
|
|
|
IVI_seg_t * tmp_next_seg = next_seg;
|
|
|
|
|
|
while(tmp_next_seg != NULL)
|
|
|
|
|
|
{
|
|
|
|
|
|
fuzzy_node * tmp_next_node = (fuzzy_node *)(tmp_next_seg->data);
|
|
|
|
|
|
if(tmp_next_node->slice_num != 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* <20><>һ<EFBFBD><D2BB>û<EFBFBD>з<EFBFBD>Ƭ, <20><><EFBFBD><EFBFBD><EFBFBD>¼<EFBFBD><C2BC><EFBFBD> */
|
|
|
|
|
|
|
|
|
|
|
|
char * data = (char *)malloc(sizeof(char) * (tmp_next_node->left_len));
|
|
|
|
|
|
memcpy(data, tmp_next_node->left_data, tmp_next_node->left_len);
|
|
|
|
|
|
fuzzy_modify_self_with_prev(tmp_curr_seg, tmp_next_seg, data, blocksize);
|
|
|
|
|
|
free(data);
|
|
|
|
|
|
|
|
|
|
|
|
tmp_curr_seg = tmp_next_seg;
|
|
|
|
|
|
tmp_next_seg = IVI_next_continuous_seg(tmp_next_seg);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
unsigned long long prev_len = get_prev_continous_length(tmp_curr_seg);
|
|
|
|
|
|
/* tmp_next_seg<65><67><EFBFBD><EFBFBD><EFBFBD>з<EFBFBD>Ƭ<EFBFBD><C6AC> */
|
|
|
|
|
|
if(tmp_next_seg != NULL)
|
|
|
|
|
|
{
|
|
|
|
|
|
fuzzy_node * tmp_curr_node = (fuzzy_node *)(tmp_curr_seg->data);
|
|
|
|
|
|
fuzzy_node * tmp_next_node = (fuzzy_node *)(tmp_next_seg->data);
|
|
|
|
|
|
|
|
|
|
|
|
unsigned long long size = tmp_next_node->left_len;
|
|
|
|
|
|
|
|
|
|
|
|
char * FNV_hash = (char *)malloc(sizeof(char)*size);
|
|
|
|
|
|
unsigned long long fnv_index = 0, i;
|
|
|
|
|
|
unsigned int roll_hash_value;
|
|
|
|
|
|
|
|
|
|
|
|
struct roll_state rs;
|
|
|
|
|
|
memcpy(&rs, tmp_curr_node->right_status_r, sizeof(struct roll_state));
|
|
|
|
|
|
char * data = tmp_next_node->left_data;
|
|
|
|
|
|
unsigned int FNV_hash_value = tmp_curr_node->right_status_shash;
|
|
|
|
|
|
for(i = 0; i < size; i++)
|
|
|
|
|
|
{
|
|
|
|
|
|
roll_hash(&rs, data[i]);
|
|
|
|
|
|
roll_hash_value = roll_sum(&rs);
|
|
|
|
|
|
FNV_hash_value = sum_hash(data[i], FNV_hash_value);
|
|
|
|
|
|
|
|
|
|
|
|
if((i + prev_len >= ROLLING_WINDOW) \
|
|
|
|
|
|
&& roll_hash_value % blocksize == blocksize - 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
tmp_next_node->slice_num ++;
|
|
|
|
|
|
FNV_hash[fnv_index ++] = b64[FNV_hash_value % 64];
|
|
|
|
|
|
//printf("data[%lu]=%c, FNV_hash = %c\n", i, data[i], b64[FNV_hash_value % 64]);
|
|
|
|
|
|
FNV_hash_value = HASH_INIT;
|
|
|
|
|
|
|
|
|
|
|
|
if(fnv_index == 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
tmp_next_node->left_len = i + 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
tmp_next_node->slice_num --;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* <20><><EFBFBD>ƽ<EFBFBD><C6BD><EFBFBD><EFBFBD><EFBFBD>hash_result<6C><74> */
|
|
|
|
|
|
unsigned long long old_len = strlen(tmp_next_node->hash_result);
|
|
|
|
|
|
if(old_len == 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
free(tmp_next_node->hash_result);
|
|
|
|
|
|
tmp_next_node->hash_result = (char *)malloc(sizeof(char) * (fnv_index + 1));
|
|
|
|
|
|
memcpy(tmp_next_node->hash_result, FNV_hash, fnv_index);
|
|
|
|
|
|
(tmp_next_node->hash_result)[fnv_index] = '\0';
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
unsigned long long new_len = old_len - 1 + fnv_index;
|
|
|
|
|
|
char tmp[old_len - 1];
|
|
|
|
|
|
char * old_hash = (tmp_next_node->hash_result) + 1;
|
|
|
|
|
|
memcpy(tmp, old_hash, old_len - 1);
|
|
|
|
|
|
free(tmp_next_node->hash_result);
|
|
|
|
|
|
tmp_next_node->hash_result = (char *)malloc(sizeof(char) * (new_len + 1));
|
|
|
|
|
|
memset(tmp_next_node->hash_result, '\0', (new_len + 1));
|
|
|
|
|
|
memcpy(tmp_next_node->hash_result, FNV_hash, fnv_index);
|
|
|
|
|
|
strncat(tmp_next_node->hash_result, tmp, old_len - 1);
|
|
|
|
|
|
(tmp_next_node->hash_result)[new_len] = '\0';
|
|
|
|
|
|
}
|
|
|
|
|
|
free(FNV_hash);
|
|
|
|
|
|
}
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* ȡ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>hash_resultֵ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ƴ<EFBFBD>ӣ<EFBFBD><EFBFBD>γ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>result<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>abc[1:100]def[200:300]<EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD>ʽ
|
|
|
|
|
|
*/
|
|
|
|
|
|
int fuzzy_digest(fuzzy_handle_t * handle, char * result, unsigned int size)
|
|
|
|
|
|
{
|
|
|
|
|
|
final_result * temp = (final_result *)malloc(sizeof(final_result));
|
|
|
|
|
|
temp->head = result;
|
|
|
|
|
|
temp->size = size;
|
|
|
|
|
|
temp->offset = 0;
|
|
|
|
|
|
temp->first_FNV_offset = 0;
|
|
|
|
|
|
temp->last_FNV_offset = 0;
|
|
|
|
|
|
//final_result * temp = (final_result *)malloc(sizeof(final_result));
|
|
|
|
|
|
//temp->offset = 0;
|
|
|
|
|
|
IVI_traverse(((fuzzy_handle_inner_t *)handle)->ivi, fuzzy_hash_merge_new, (void *) temp);
|
|
|
|
|
|
result[size - 1] = '\0';
|
|
|
|
|
|
//memcpy(result, temp->result, size);
|
|
|
|
|
|
free(temp);
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void fuzzy_hash_merge_new(IVI_seg_t * seg, void * user_para)
|
|
|
|
|
|
{
|
|
|
|
|
|
IVI_seg_t * prev_seg;
|
|
|
|
|
|
IVI_seg_t * next_seg;
|
|
|
|
|
|
prev_seg = IVI_prev_continuous_seg(seg);
|
|
|
|
|
|
next_seg = IVI_next_continuous_seg(seg);
|
|
|
|
|
|
char buffer[MAXSIZE];
|
|
|
|
|
|
final_result * tmp = (final_result *)user_para;
|
|
|
|
|
|
fuzzy_node * node = (fuzzy_node *)(seg->data);
|
|
|
|
|
|
if(node->slice_num != 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
tmp->last_FNV_offset = seg->right - node->right_len;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if(prev_seg == NULL && next_seg == NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD>ͺ<EFBFBD><CDBA><EFBFBD>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ƴ<EFBFBD><C6B4>
|
|
|
|
|
|
{
|
|
|
|
|
|
tmp->first_FNV_offset = seg->left;
|
|
|
|
|
|
tmp->last_FNV_offset = seg->right - node->right_len;
|
|
|
|
|
|
sprintf(buffer, "%s[%llu:%llu]", node->hash_result, tmp->first_FNV_offset, seg->right);
|
|
|
|
|
|
}
|
|
|
|
|
|
if(prev_seg == NULL && next_seg != NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>ƬΪ<C6AC>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>FNVֵ<56><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȥ
|
|
|
|
|
|
{
|
|
|
|
|
|
tmp->first_FNV_offset = seg->left;
|
|
|
|
|
|
|
|
|
|
|
|
sprintf(buffer, "%s", node->hash_result);
|
|
|
|
|
|
}
|
|
|
|
|
|
if(prev_seg != NULL && next_seg == NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD>ƬΪ<C6AC>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>ƫ<EFBFBD><C6AB>
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
|
|
sprintf(buffer, "%s[%llu:%llu]", node->hash_result, tmp->first_FNV_offset, seg->right);
|
|
|
|
|
|
}
|
|
|
|
|
|
if(prev_seg != NULL && next_seg != NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>FNVֵ<56><D6B5>ȥ
|
|
|
|
|
|
{
|
|
|
|
|
|
sprintf(buffer, "%s", node->hash_result);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
unsigned int inner_size = strlen(buffer);
|
|
|
|
|
|
tmp->offset += inner_size;
|
|
|
|
|
|
if(tmp->offset <= tmp->size)
|
|
|
|
|
|
{
|
|
|
|
|
|
memcpy(tmp->head, buffer, inner_size);
|
|
|
|
|
|
tmp->head += inner_size;
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
unsigned int length = (tmp->size - (tmp->offset - inner_size));
|
|
|
|
|
|
if(length != 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
memcpy(tmp->head, buffer, length);
|
|
|
|
|
|
}
|
|
|
|
|
|
tmp->offset = tmp->size;
|
|
|
|
|
|
tmp->head += length;
|
|
|
|
|
|
}
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>fuzzy_hash<EFBFBD>ĸ<EFBFBD><EFBFBD>ֳ<EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
|
*/
|
|
|
|
|
|
unsigned long long fuzzy_status(fuzzy_handle_t * handle, int type)
|
|
|
|
|
|
{
|
2015-11-30 16:44:23 +08:00
|
|
|
|
unsigned long long length;
|
|
|
|
|
|
final_length tmp_length;
|
|
|
|
|
|
fuzzy_handle_inner_t * _handle = (fuzzy_handle_inner_t *)(handle);
|
|
|
|
|
|
switch(type)
|
|
|
|
|
|
{
|
|
|
|
|
|
case TOTAL_LENGTH: //<2F>Ѿ<EFBFBD><D1BE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>hashֵ<68><D6B5>ȫ<EFBFBD><C8AB><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
|
length = IVI_seg_length(_handle->ivi);
|
|
|
|
|
|
break;
|
|
|
|
|
|
case EFFECTIVE_LENGTH: //<2F><><EFBFBD><EFBFBD><EFBFBD>ڼ<EFBFBD><DABC><EFBFBD>hashֵ<68><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ч<EFBFBD><D0A7><EFBFBD><EFBFBD>
|
|
|
|
|
|
length = _handle->effective_length;
|
|
|
|
|
|
break;
|
|
|
|
|
|
case HASH_LENGTH: //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ϣ<EFBFBD><CFA3><EFBFBD><EFBFBD><EFBFBD>ij<EFBFBD><C4B3><EFBFBD>
|
|
|
|
|
|
tmp_length.hash_length = 0;
|
|
|
|
|
|
tmp_length.first_FNV_offset = 0;
|
|
|
|
|
|
tmp_length.last_FNV_offset = 0;
|
|
|
|
|
|
IVI_traverse(_handle->ivi, fuzzy_hash_length, (void *)&tmp_length);
|
|
|
|
|
|
length = tmp_length.hash_length + 1;
|
|
|
|
|
|
break;
|
|
|
|
|
|
default:
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
return length;
|
2015-11-13 11:41:52 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void fuzzy_hash_length(IVI_seg_t * seg, void * user_para)
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
|
|
IVI_seg_t * prev_seg;
|
|
|
|
|
|
IVI_seg_t * next_seg;
|
|
|
|
|
|
prev_seg = IVI_prev_continuous_seg(seg);
|
|
|
|
|
|
next_seg = IVI_next_continuous_seg(seg);
|
|
|
|
|
|
char buffer[MAXSIZE];
|
|
|
|
|
|
final_length * tmp = (final_length *)user_para;
|
|
|
|
|
|
fuzzy_node * node = (fuzzy_node *)(seg->data);
|
|
|
|
|
|
if(node->slice_num != 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
//printf("node->slice_num != 0\n");
|
|
|
|
|
|
tmp->last_FNV_offset = seg->right - node->right_len;
|
|
|
|
|
|
//printf("%lu\n", tmp->last_FNV_offset);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if(prev_seg == NULL && next_seg == NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD>ͺ<EFBFBD><CDBA><EFBFBD>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ƴ<EFBFBD><C6B4>
|
|
|
|
|
|
{
|
|
|
|
|
|
tmp->first_FNV_offset = seg->left;
|
|
|
|
|
|
tmp->last_FNV_offset = seg->right - node->right_len;
|
|
|
|
|
|
sprintf(buffer, "%s[%llu:%llu]", node->hash_result, tmp->first_FNV_offset, seg->right);
|
|
|
|
|
|
}
|
|
|
|
|
|
if(prev_seg == NULL && next_seg != NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>ƬΪ<C6AC>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>FNVֵ<56><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȥ
|
|
|
|
|
|
{
|
|
|
|
|
|
tmp->first_FNV_offset = seg->left;
|
|
|
|
|
|
|
|
|
|
|
|
sprintf(buffer, "%s", node->hash_result);
|
|
|
|
|
|
}
|
|
|
|
|
|
if(prev_seg != NULL && next_seg == NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD>ƬΪ<C6AC>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>ƫ<EFBFBD><C6AB>
|
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
|
|
sprintf(buffer, "%s[%llu:%llu]", node->hash_result, tmp->first_FNV_offset, seg->right);
|
|
|
|
|
|
}
|
|
|
|
|
|
if(prev_seg != NULL && next_seg != NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>FNVֵ<56><D6B5>ȥ
|
|
|
|
|
|
{
|
|
|
|
|
|
sprintf(buffer, "%s", node->hash_result);
|
|
|
|
|
|
}
|
|
|
|
|
|
tmp->hash_length += strlen(buffer);
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|