This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
tango-maat/src/entry/mesa_fuzzy.c

829 lines
25 KiB
C
Raw Normal View History

#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <math.h>
#include "mesa_fuzzy.h"
#include "interval_index.h"
#define ROLLING_WINDOW 7
#define BLOCKSIZE_MIN 3
#define MAXSIZE 10000
#define HASH_PRIME 0x01000193
#define HASH_INIT 0x28021967
#define DEBUG (0)
struct roll_state
{
unsigned char window[ROLLING_WINDOW];
unsigned int h1, h2, h3;
unsigned int n;
};
typedef struct
{
char * left_data; //ָ<><D6B8><EFBFBD><EFBFBD><EFBFBD>ݵ<EFBFBD>ͷָ<CDB7><D6B8>
unsigned int left_len; //<2F><><EFBFBD>߱<EFBFBD><DFB1><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݵij<DDB5><C4B3><EFBFBD>
char * hash_result; //<2F><><EFBFBD><EFBFBD>segment<6E><74>FNVֵ
unsigned long long left_offset;
unsigned long long right_offset;
struct roll_state * right_status_r; //<2F>ұ߽<D2B1><DFBD><EFBFBD>rollhash״̬
unsigned int right_status_shash; //<2F>ұ߽<D2B1><DFBD><EFBFBD>FNVֵ
unsigned int right_len;//<2F>ұ߽<D2B1><DFBD>ij<EFBFBD><C4B3><EFBFBD>
int slice_num;
}fuzzy_node;
typedef struct
{
unsigned long long orilen;
IVI_t * ivi; //ÿһ<C3BF><D2BB>handle<6C><65><EFBFBD><EFBFBD><E6B1A3>һ<EFBFBD><D2BB>IVIָ<49>һ<EBA3AC><D2BB>IVI<56><49><EFBFBD><EFBFBD><E6B1A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD>ļ<EFBFBD><C4BC><EFBFBD><EFBFBD><EFBFBD>Ƭ
unsigned long long effective_length;
}fuzzy_handle_inner_t;
typedef struct
{
char * head; //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>char<61><72><EFBFBD><EFBFBD>
unsigned int size;
unsigned int offset; //<2F><><EFBFBD><EFBFBD><E9B3A4>
unsigned long long first_FNV_offset;
unsigned long long last_FNV_offset;
}final_result;
typedef struct
{
unsigned long long first_FNV_offset;
unsigned long long last_FNV_offset;
unsigned long long hash_length;
}final_length;
unsigned int fuzzy_hash_calculate(IVI_seg_t * seg, const char * data, unsigned long long offset, unsigned long long blocksize);
void fuzzy_calculate_self(IVI_seg_t * seg, const char * data, unsigned long long offset, unsigned long long blocksize);
void fuzzy_calculate_self_with_prev(IVI_seg_t * prev_seg, IVI_seg_t * seg, const char * data, unsigned long long blocksize);
void fuzzy_modify_next(IVI_seg_t * seg, IVI_seg_t * next_seg, unsigned long long blocksize);
unsigned long long get_prev_continous_length(IVI_seg_t * seg);
unsigned int segment_overlap(fuzzy_handle_t * handle, fuzzy_node * fnode, unsigned int size, unsigned long long offset, const char * data);
void fuzzy_hash_merge(IVI_seg_t * seg, void * user_para);
void fuzzy_hash_merge_new(IVI_seg_t * seg, void * user_para);
void fuzzy_hash_length(IVI_seg_t * seg, void * user_para);
unsigned long long fuzzy_status(fuzzy_handle_t * handle, int type);
char * b64 =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
/**
* roll_state<EFBFBD><EFBFBD>ʼ<EFBFBD><EFBFBD>
*/
static void roll_init(struct roll_state * self)
{
memset(self, 0, sizeof(struct roll_state));
}
/**
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>roll_hashֵ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ⲿ<EFBFBD><EFBFBD><EFBFBD>ݶ<EFBFBD>ȡ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
*/
static void roll_hash(struct roll_state * self, unsigned char c)
{
self->h2 -= self->h1;
self->h2 += ROLLING_WINDOW * (unsigned int)c;
self->h1 += (unsigned int)c;
self->h1 -= (unsigned int)self->window[self->n];
self->window[self->n] = c;
self->n++;
if (self->n == ROLLING_WINDOW)
self->n = 0;
self->h3 <<= 5;
self->h3 ^= c;
}
/**
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>roll_hashֵ<EFBFBD><EFBFBD>ÿ<EFBFBD><EFBFBD>roll_hashֵ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƭ
*/
static unsigned int roll_sum(const struct roll_state * self)
{
return self->h1 + self->h2 + self->h3;
/* return self->h1 + self->h2; */
}
/**
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><EFBFBD>FNVֵ
*/
static unsigned int sum_hash(unsigned char c, unsigned int h)
{
return (h * HASH_PRIME) ^ c;
}
/**
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>handle
*/
fuzzy_handle_t * fuzzy_create_handle(unsigned long long origin_len)
{
fuzzy_handle_inner_t * handle = (fuzzy_handle_inner_t *)malloc(sizeof(fuzzy_handle_inner_t));
handle->orilen = origin_len;
handle->ivi = IVI_create();
handle->effective_length = 0;
return (fuzzy_handle_t *)handle;
}
/**
* IVI_destroy<EFBFBD>Ļص<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>IVI<EFBFBD>е<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
*/
void fuzzy_node_free(IVI_seg_t * seg, void * usr_para)
{
//printf("free seg[%lu, %lu]\n", seg->left, seg->right);
fuzzy_node * temp = (fuzzy_node*)(seg->data);
if(temp->left_data != NULL)
{
free(temp->left_data);
temp->left_data = NULL;
}
if(temp->hash_result != NULL)
{
free(temp->hash_result);
temp->hash_result = NULL;
}
free(temp->right_status_r);
temp->right_status_r = NULL;
free(temp);
temp = NULL;
return;
}
/**
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>handle
*/
void fuzzy_destroy_handle(fuzzy_handle_t * handle)
{
IVI_destroy(((fuzzy_handle_inner_t *)handle)->ivi, fuzzy_node_free, NULL);
free((fuzzy_handle_inner_t *)handle);
return;
}
/**
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҽ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݵ<EFBFBD>fuzzy_hashֵ
*/
unsigned int fuzzy_feed(fuzzy_handle_t * handle, const char * data, unsigned int size, unsigned long long offset)
{
fuzzy_node * node = (fuzzy_node *)calloc(sizeof(fuzzy_node), 1);
node->right_status_r = (struct roll_state *)calloc(sizeof (struct roll_state), 1);
roll_init(node->right_status_r);
node->slice_num = 0;
unsigned int length = segment_overlap(handle, node, size, offset, data);
if(offset == 0)
{
((fuzzy_handle_inner_t *)handle)->effective_length += size - node->right_len;
return (size - node->right_len);
}
else
{
((fuzzy_handle_inner_t *)handle)->effective_length += length;
}
return length; //<2F><><EFBFBD><EFBFBD><EFBFBD>Ѿ<EFBFBD><D1BE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ч<EFBFBD><D0A7><EFBFBD><EFBFBD>
}
unsigned long long get_blocksize(unsigned long long orilen)
{
double tmp = orilen/(64 * BLOCKSIZE_MIN);
double index = floor(log(tmp)/log(2));
double tmp_t = pow(2, index);
unsigned long long blocksize = (unsigned long long)(tmp_t * BLOCKSIZE_MIN);
return blocksize;
}
/**
* <EFBFBD>ж<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƿ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ѿ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>и<EFBFBD><EFBFBD><EFBFBD>
*/
unsigned int segment_overlap(fuzzy_handle_t * handle, fuzzy_node * fnode, unsigned int size, unsigned long long offset, const char * data)
{
IVI_seg_t ** overlap_segs = NULL;
IVI_seg_t * seg = IVI_seg_malloc(offset, offset + size -1, (void *)fnode);
int overlap_segnum = 0;
unsigned int effective_length = 0;
unsigned int total_length = 0;
unsigned long long blocksize = get_blocksize(((fuzzy_handle_inner_t *)handle)->orilen);
/*<2A><>ѯ<EFBFBD>Ƿ<EFBFBD><C7B7>и<EFBFBD><D0B8>ǣ<EFBFBD><C7A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD>и<EFBFBD><D0B8>ǣ<EFBFBD><C7A3><EFBFBD><EFBFBD>ظ<EFBFBD><D8B8>ǵ<EFBFBD>segment<6E><74>Ƭ<EFBFBD><C6AC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>û<EFBFBD>и<EFBFBD><D0B8>ǣ<EFBFBD><C7A3><EFBFBD><EFBFBD><EFBFBD>0*/
overlap_segnum = IVI_query(((fuzzy_handle_inner_t *)handle)->ivi, offset, offset + size - 1, &overlap_segs);
/*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵΪ<D6B5><CEAA><EFBFBD><EFBFBD><EFBFBD><EFBFBD>˵<EFBFBD><CBB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>IJ<EFBFBD><C4B2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><E2A3AC>ӡ<EFBFBD><D3A1><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϣ*/
if(overlap_segnum < 0)
{
printf("fragment info error!\n");
IVI_seg_free(seg, fuzzy_node_free, NULL);
return 0;
}
/*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵΪ0<CEAA><30>˵<EFBFBD><CBB5>û<EFBFBD>и<EFBFBD><D0B8>ǵ<EFBFBD><C7B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֱ<EFBFBD>Ӳ<EFBFBD><D3B2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*/
if(overlap_segnum == 0)
{
IVI_insert(((fuzzy_handle_inner_t *)handle)->ivi,seg);
effective_length = fuzzy_hash_calculate(seg, data, offset, blocksize);
total_length = seg->right - seg->left + 1;
return effective_length;
}
/*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵΪ<D6B5><CEAA><EFBFBD>ǵ<EFBFBD>Ƭ<EFBFBD><C6AC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD><D2AA><EFBFBD>ݸ<EFBFBD><DDB8><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һһ<D2BB><D2BB><EFBFBD>д<EFBFBD><D0B4><EFBFBD>*/
int flag = 0;
int i;
for(i = 0; i < overlap_segnum; i++)
{
switch(IVI_relative_position(seg, overlap_segs[i]))
{
case LEFT_OVERLAP: //<2F>󸲸ǣ<F3B8B2B8><C7A3><EFBFBD>seg<65><67><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5>Ϊoverlap_seg<65><67><EFBFBD><EFBFBD>ֵ
{
seg->right = overlap_segs[i]->left - 1;
break;
}
case CONTAIN: //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ϵ<EFBFBD><CFB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Dz<EFBFBD><C7B2><EFBFBD>ֱ<EFBFBD>Ӳ<EFBFBD><D3B2>룬Ȼ<EBA3AC><C8BB><EFBFBD>ı<EFBFBD>seg<65><67><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>data<74>ƶ<EFBFBD><C6B6><EFBFBD>ָ<EFBFBD><D6B8><EFBFBD><EFBFBD>λ<EFBFBD><CEBB>
{
if(overlap_segs[i]->left - 1 >= seg->left)
{
fuzzy_node * node = (fuzzy_node *)calloc(sizeof(fuzzy_node), 1);
memcpy(node, fnode, sizeof(fuzzy_node));
node->right_status_r = (struct roll_state *)calloc(sizeof (struct roll_state), 1);
roll_init(node->right_status_r);
IVI_seg_t * thseg = IVI_seg_malloc(seg->left, overlap_segs[i]->left - 1, (void *)node);
IVI_insert(((fuzzy_handle_inner_t *)handle)->ivi,thseg);
effective_length += fuzzy_hash_calculate(thseg, data, offset, blocksize);
total_length += thseg->right - thseg->left + 1;
}
seg->left = overlap_segs[i]->right + 1;
data = data + ((seg->left) - offset);
offset = seg->left;
break;
}
case RIGHT_OVERLAP: //<2F>Ҹ<EFBFBD><D2B8>ǣ<EFBFBD><C7A3><EFBFBD>seg<65><67><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5>Ϊoverlap_seg<65><67><EFBFBD><EFBFBD>ֵ
{
seg->left = overlap_segs[i]->right + 1;
data = data + ((seg->left) - offset);
offset = seg->left;
break;
}
case CONTAINED: //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֱ<EFBFBD><D6B1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƭ
{
flag = 1;
//printf("contained! free seg\n");
IVI_seg_free(seg, fuzzy_node_free, NULL);
free(overlap_segs);
break;
}
default:
break;
}
if(flag == 1)
{
return 0;
}
}
/*<2A><><EFBFBD><EFBFBD><EFBFBD>µ<EFBFBD><C2B5><EFBFBD><EFBFBD>ݲ<EFBFBD><DDB2><EFBFBD><EBB5BD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><E6A3AC><EFBFBD>ҽ<EFBFBD><D2BD>м<EFBFBD><D0BC><EFBFBD>*/
if(seg->left <= seg->right)
{
IVI_insert(((fuzzy_handle_inner_t *)handle)->ivi, seg);
effective_length += fuzzy_hash_calculate(seg, data, offset, blocksize);
total_length += seg->right - seg->left + 1;
//((fuzzy_handle_inner_t *)handle)->effective_length += effective_length;
}
else
{
IVI_seg_free(seg, fuzzy_node_free, NULL);
}
free(overlap_segs);
return effective_length;
}
/**
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǰ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>fuzzy_hashֵ
*/
unsigned int fuzzy_hash_calculate(IVI_seg_t * seg, const char * data, unsigned long long offset, unsigned long long blocksize)
{
IVI_seg_t * prev_seg;
IVI_seg_t * next_seg;
unsigned int effective_length = 0;
prev_seg = IVI_prev_continuous_seg(seg);
next_seg = IVI_next_continuous_seg(seg);
//printf("seg->right = %lu, seg->left = %lu\n", seg->right, seg->left);
unsigned int size = seg->right - seg->left + 1;
fuzzy_node * node = (fuzzy_node *)(seg->data);
if(NULL == prev_seg)
{
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC>ֱ<EFBFBD>ӳ<EFBFBD>ʼ<EFBFBD><CABC>roll_state<74><65><EFBFBD>м<EFBFBD><D0BC><EFBFBD>
roll_init(node->right_status_r);
fuzzy_calculate_self(seg, data, offset, blocksize);
effective_length = size - node->left_len;
node->left_offset = offset + node->left_len;
}
else
{
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC>ȡ<EFBFBD><C8A1>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC><EFBFBD>ұ߽<D2B1><DFBD><EFBFBD><EFBFBD>м<EFBFBD>״ֵ̬<CCAC><D6B5><EFBFBD>м<EFBFBD><D0BC><EFBFBD>
fuzzy_calculate_self_with_prev(prev_seg, seg, data, blocksize);
effective_length = size + ((fuzzy_node *)(prev_seg->data))->right_len;
node->left_offset = offset - ((fuzzy_node *)(prev_seg->data))->right_len;
}
/* <20><><EFBFBD><EFBFBD><EFBFBD>к<EFBFBD><D0BA><EFBFBD>Ƭ,<2C><><EFBFBD><EFBFBD><EFBFBD>Լ<EFBFBD><D4BC><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ľ<EFBFBD><C4BD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD><D2AA>Ƭ,<2C><><EFBFBD>޸ĺ<DEB8><C4BA><EFBFBD><EFBFBD>ķ<EFBFBD>Ƭ */
if(next_seg != NULL)
{
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ں<EFBFBD><DABA><EFBFBD>Ƭ<EFBFBD><C6AC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><C6AC><EFBFBD>ұ߽<D2B1><DFBD><EFBFBD><EFBFBD>м<EFBFBD>״ֵ̬ȡ<D6B5><C8A1><EFBFBD><EFBFBD><EFBFBD>ͺ<EFBFBD><CDBA><EFBFBD>Ƭ<EFBFBD><C6AC><EFBFBD><EFBFBD><EFBFBD>߽<EFBFBD><DFBD><EFBFBD><EFBFBD>м<EFBFBD>״̬<D7B4><CCAC><EFBFBD>м<EFBFBD><D0BC><EFBFBD>
fuzzy_modify_next(seg, next_seg, blocksize);
effective_length += ((fuzzy_node *)(next_seg->data))->left_len;
node->right_offset = offset + size + ((fuzzy_node *)(next_seg->data))->left_len;
}
else
{
effective_length -= node->right_len;
node->right_offset = offset + (size - (node->right_len));
}
return effective_length;
}
void fuzzy_calculate_self(IVI_seg_t * seg, const char * data, unsigned long long offset, unsigned long long blocksize)
{
fuzzy_node * node = (fuzzy_node *)(seg->data);
struct roll_state * rs = node->right_status_r;
unsigned long long size = seg->right - seg->left + 1;
unsigned int FNV_hash_value = HASH_INIT;
char * FNV_hash = (char *)malloc(sizeof(char)*size);
unsigned long long fnv_index = 0, i, last_slice_index;
unsigned int roll_hash_value;
for(i = 0; i < size; i++)
{
roll_hash(rs, data[i]);
roll_hash_value = roll_sum(rs);
FNV_hash_value = sum_hash(data[i], FNV_hash_value);
if(i >= ROLLING_WINDOW - 1 && roll_hash_value % blocksize == blocksize - 1)
{
node->slice_num ++;
if(node->slice_num == 1)
{
node->left_len = i + 1;
}
last_slice_index = i;
/* <20><><EFBFBD><EFBFBD>FNV<4E><56>ֵ */
FNV_hash[fnv_index ++] = b64[FNV_hash_value % 64];
//printf("data[%lu]=%c, FNV_hash = %c\n", i, data[i], b64[FNV_hash_value % 64]);
FNV_hash_value = HASH_INIT;
}
}
/* һƬ<D2BB><C6AC>û<EFBFBD><C3BB><EFBFBD>ҵ<EFBFBD> */
if(node->slice_num == 0)
{
node->left_len = size;
node->right_len = 0;
}
else
{
node->right_len = size - last_slice_index - 1;
}
node->right_status_shash = FNV_hash_value;
/* <20><><EFBFBD>ƽ<EFBFBD><C6BD><EFBFBD><EFBFBD><EFBFBD>hash_result<6C><74> */
node->hash_result = (char *)malloc(sizeof(char) * (fnv_index + 1));
memcpy(node->hash_result, FNV_hash, fnv_index);
(node->hash_result)[fnv_index] = '\0';
node->left_data = (char *)malloc(sizeof(char) * (node->left_len));
memcpy(node->left_data, data, node->left_len);
free(FNV_hash);
return;
}
unsigned long long get_prev_continous_length(IVI_seg_t * seg)
{
unsigned long long length = 0;
IVI_seg_t * temp = seg;
while(temp != NULL)
{
length += temp->right - temp->left + 1;
if(length >= ROLLING_WINDOW)
return length;
temp = IVI_prev_continuous_seg(temp);
}
return length;
}
/**
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǰ<EFBFBD>εı<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
*/
void fuzzy_calculate_self_with_prev(IVI_seg_t * prev_seg, IVI_seg_t * seg, const char * data, unsigned long long blocksize)
{
fuzzy_node * prev_node = (fuzzy_node *)(prev_seg->data);
fuzzy_node * node = (fuzzy_node *)(seg->data);
/* ʹ<><CAB9>ǰ<EFBFBD>ε<EFBFBD>roll state */
memcpy(node->right_status_r, prev_node->right_status_r, sizeof(struct roll_state));
struct roll_state * rs = node->right_status_r;
unsigned long long size = seg->right - seg->left + 1;
unsigned int FNV_hash_value = prev_node->right_status_shash;
char * FNV_hash = (char *)malloc(sizeof(char)*size);
unsigned long long fnv_index = 0, i, last_slice_index;
unsigned int roll_hash_value;
unsigned long long prev_len = get_prev_continous_length(prev_seg);
for(i = 0; i < size; i++)
{
roll_hash(rs, data[i]);
roll_hash_value = roll_sum(rs);
FNV_hash_value = sum_hash(data[i], FNV_hash_value);
if(i + prev_len >= ROLLING_WINDOW \
&& roll_hash_value % blocksize == blocksize - 1)
{
node->slice_num ++;
if(node->slice_num == 1)
{
node->left_len = i + 1;
}
last_slice_index = i;
/* <20><><EFBFBD><EFBFBD>FNV<4E><56>ֵ */
FNV_hash[fnv_index ++] = b64[FNV_hash_value % 64];
//printf("data[%lu]=%c, FNV_hash = %c\n", i, data[i], b64[FNV_hash_value % 64]);
FNV_hash_value = HASH_INIT;
}
}
/* һƬ<D2BB><C6AC>û<EFBFBD><C3BB><EFBFBD>ҵ<EFBFBD> */
if(node->slice_num == 0)
{
node->left_len = size;
node->right_len = 0;
}
else
{
node->right_len = size - last_slice_index - 1;
}
node->right_status_shash = FNV_hash_value;
/* <20><><EFBFBD>ƽ<EFBFBD><C6BD><EFBFBD><EFBFBD><EFBFBD>hash_result<6C><74> */
node->hash_result = (char *)malloc(sizeof(char) * (fnv_index + 1));
memcpy(node->hash_result, FNV_hash, fnv_index);
(node->hash_result)[fnv_index] = '\0';
node->left_data = (char *)malloc(sizeof(char) * (node->left_len));
memcpy(node->left_data, data, node->left_len);
free(FNV_hash);
}
void fuzzy_modify_self_with_prev(IVI_seg_t * prev_seg, IVI_seg_t * seg, char * data, unsigned long long blocksize)
{
fuzzy_node * prev_node = (fuzzy_node *)(prev_seg->data);
fuzzy_node * node = (fuzzy_node *)(seg->data);
/* ʹ<><CAB9>ǰ<EFBFBD>ε<EFBFBD>roll state */
memcpy(node->right_status_r, prev_node->right_status_r, sizeof(struct roll_state));
struct roll_state * rs = node->right_status_r;
unsigned long long size = seg->right - seg->left + 1;
unsigned int FNV_hash_value = prev_node->right_status_shash;
char * FNV_hash = (char *)malloc(sizeof(char)*size);
unsigned long long fnv_index = 0, i, last_slice_index;
unsigned int roll_hash_value;
unsigned long long prev_len = get_prev_continous_length(prev_seg);
for(i = 0; i < size; i++)
{
roll_hash(rs, data[i]);
roll_hash_value = roll_sum(rs);
FNV_hash_value = sum_hash(data[i], FNV_hash_value);
if(i + prev_len >= ROLLING_WINDOW \
&& roll_hash_value % blocksize == blocksize- 1)
{
node->slice_num ++;
if(node->slice_num == 1)
{
node->left_len = i + 1;
}
last_slice_index = i;
/* <20><><EFBFBD><EFBFBD>FNV<4E><56>ֵ */
FNV_hash[fnv_index ++] = b64[FNV_hash_value % 64];
//printf("data[%lu]=%c, FNV_hash = %c\n", i, data[i], b64[FNV_hash_value % 64]);
FNV_hash_value = HASH_INIT;
}
}
/* һƬ<D2BB><C6AC>û<EFBFBD><C3BB><EFBFBD>ҵ<EFBFBD> */
if(node->slice_num == 0)
{
node->left_len = size;
node->right_len = 0;
}
else
{
node->right_len = size - last_slice_index - 1;
}
node->right_status_shash = FNV_hash_value;
/* <20><><EFBFBD>ƽ<EFBFBD><C6BD><EFBFBD><EFBFBD><EFBFBD>hash_result<6C><74> */
free(node->hash_result);
node->hash_result = (char *)malloc(sizeof(char) * (fnv_index + 1));
memcpy(node->hash_result, FNV_hash, fnv_index);
(node->hash_result)[fnv_index] = '\0';
//printf("old node->left_data = %s\n", node->left_data);
free(node->left_data);
node->left_data = (char *)malloc(sizeof(char) * (node->left_len));
memcpy(node->left_data, data, node->left_len);
//printf("new node->left_data = %s\n", node->left_data);
free(FNV_hash);
}
/**
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>εı<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
*/
void fuzzy_modify_next(IVI_seg_t * seg, IVI_seg_t * next_seg, unsigned long long blocksize)
{
IVI_seg_t * tmp_curr_seg = seg;
IVI_seg_t * tmp_next_seg = next_seg;
while(tmp_next_seg != NULL)
{
fuzzy_node * tmp_next_node = (fuzzy_node *)(tmp_next_seg->data);
if(tmp_next_node->slice_num != 0)
{
break;
}
/* <20><>һ<EFBFBD><D2BB>û<EFBFBD>з<EFBFBD>Ƭ, <20><><EFBFBD><EFBFBD><EFBFBD>¼<EFBFBD><C2BC><EFBFBD> */
char * data = (char *)malloc(sizeof(char) * (tmp_next_node->left_len));
memcpy(data, tmp_next_node->left_data, tmp_next_node->left_len);
fuzzy_modify_self_with_prev(tmp_curr_seg, tmp_next_seg, data, blocksize);
free(data);
tmp_curr_seg = tmp_next_seg;
tmp_next_seg = IVI_next_continuous_seg(tmp_next_seg);
}
unsigned long long prev_len = get_prev_continous_length(tmp_curr_seg);
/* tmp_next_seg<65><67><EFBFBD><EFBFBD><EFBFBD>з<EFBFBD>Ƭ<EFBFBD><C6AC> */
if(tmp_next_seg != NULL)
{
fuzzy_node * tmp_curr_node = (fuzzy_node *)(tmp_curr_seg->data);
fuzzy_node * tmp_next_node = (fuzzy_node *)(tmp_next_seg->data);
unsigned long long size = tmp_next_node->left_len;
char * FNV_hash = (char *)malloc(sizeof(char)*size);
unsigned long long fnv_index = 0, i;
unsigned int roll_hash_value;
struct roll_state rs;
memcpy(&rs, tmp_curr_node->right_status_r, sizeof(struct roll_state));
char * data = tmp_next_node->left_data;
unsigned int FNV_hash_value = tmp_curr_node->right_status_shash;
for(i = 0; i < size; i++)
{
roll_hash(&rs, data[i]);
roll_hash_value = roll_sum(&rs);
FNV_hash_value = sum_hash(data[i], FNV_hash_value);
if((i + prev_len >= ROLLING_WINDOW) \
&& roll_hash_value % blocksize == blocksize - 1)
{
tmp_next_node->slice_num ++;
FNV_hash[fnv_index ++] = b64[FNV_hash_value % 64];
//printf("data[%lu]=%c, FNV_hash = %c\n", i, data[i], b64[FNV_hash_value % 64]);
FNV_hash_value = HASH_INIT;
if(fnv_index == 1)
{
tmp_next_node->left_len = i + 1;
}
}
}
tmp_next_node->slice_num --;
/* <20><><EFBFBD>ƽ<EFBFBD><C6BD><EFBFBD><EFBFBD><EFBFBD>hash_result<6C><74> */
unsigned long long old_len = strlen(tmp_next_node->hash_result);
if(old_len == 1)
{
free(tmp_next_node->hash_result);
tmp_next_node->hash_result = (char *)malloc(sizeof(char) * (fnv_index + 1));
memcpy(tmp_next_node->hash_result, FNV_hash, fnv_index);
(tmp_next_node->hash_result)[fnv_index] = '\0';
}
else
{
unsigned long long new_len = old_len - 1 + fnv_index;
char tmp[old_len - 1];
char * old_hash = (tmp_next_node->hash_result) + 1;
memcpy(tmp, old_hash, old_len - 1);
free(tmp_next_node->hash_result);
tmp_next_node->hash_result = (char *)malloc(sizeof(char) * (new_len + 1));
memset(tmp_next_node->hash_result, '\0', (new_len + 1));
memcpy(tmp_next_node->hash_result, FNV_hash, fnv_index);
strncat(tmp_next_node->hash_result, tmp, old_len - 1);
(tmp_next_node->hash_result)[new_len] = '\0';
}
free(FNV_hash);
}
return;
}
/**
* ȡ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>hash_resultֵ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ƴ<EFBFBD>ӣ<EFBFBD><EFBFBD>γ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>result<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>abc[1:100]def[200:300]<EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD>ʽ
*/
int fuzzy_digest(fuzzy_handle_t * handle, char * result, unsigned int size)
{
final_result * temp = (final_result *)malloc(sizeof(final_result));
temp->head = result;
temp->size = size;
temp->offset = 0;
temp->first_FNV_offset = 0;
temp->last_FNV_offset = 0;
//final_result * temp = (final_result *)malloc(sizeof(final_result));
//temp->offset = 0;
IVI_traverse(((fuzzy_handle_inner_t *)handle)->ivi, fuzzy_hash_merge_new, (void *) temp);
result[size - 1] = '\0';
//memcpy(result, temp->result, size);
free(temp);
return 0;
}
void fuzzy_hash_merge_new(IVI_seg_t * seg, void * user_para)
{
IVI_seg_t * prev_seg;
IVI_seg_t * next_seg;
prev_seg = IVI_prev_continuous_seg(seg);
next_seg = IVI_next_continuous_seg(seg);
char buffer[MAXSIZE];
final_result * tmp = (final_result *)user_para;
fuzzy_node * node = (fuzzy_node *)(seg->data);
if(node->slice_num != 0)
{
tmp->last_FNV_offset = seg->right - node->right_len;
}
if(prev_seg == NULL && next_seg == NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD>ͺ<EFBFBD><CDBA><EFBFBD>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ƴ<EFBFBD><C6B4>
{
tmp->first_FNV_offset = seg->left;
tmp->last_FNV_offset = seg->right - node->right_len;
sprintf(buffer, "%s[%llu:%llu]", node->hash_result, tmp->first_FNV_offset, seg->right);
}
if(prev_seg == NULL && next_seg != NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>ƬΪ<C6AC>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>FNVֵ<56><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȥ
{
tmp->first_FNV_offset = seg->left;
sprintf(buffer, "%s", node->hash_result);
}
if(prev_seg != NULL && next_seg == NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD>ƬΪ<C6AC>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>ƫ<EFBFBD><C6AB>
{
sprintf(buffer, "%s[%llu:%llu]", node->hash_result, tmp->first_FNV_offset, seg->right);
}
if(prev_seg != NULL && next_seg != NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>FNVֵ<56><D6B5>ȥ
{
sprintf(buffer, "%s", node->hash_result);
}
unsigned int inner_size = strlen(buffer);
tmp->offset += inner_size;
if(tmp->offset <= tmp->size)
{
memcpy(tmp->head, buffer, inner_size);
tmp->head += inner_size;
}
else
{
unsigned int length = (tmp->size - (tmp->offset - inner_size));
if(length != 0)
{
memcpy(tmp->head, buffer, length);
}
tmp->offset = tmp->size;
tmp->head += length;
}
return;
}
/**
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>fuzzy_hash<EFBFBD>ĸ<EFBFBD><EFBFBD>ֳ<EFBFBD><EFBFBD><EFBFBD>
*/
unsigned long long fuzzy_status(fuzzy_handle_t * handle, int type)
{
unsigned long long length;
fuzzy_handle_inner_t * _handle = (fuzzy_handle_inner_t *)(handle);
switch(type)
{
case TOTAL_LENGTH: //<2F>Ѿ<EFBFBD><D1BE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>hashֵ<68><D6B5>ȫ<EFBFBD><C8AB><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
{
length = IVI_seg_length(_handle->ivi);
break;
}
case EFFECTIVE_LENGTH: //<2F><><EFBFBD><EFBFBD><EFBFBD>ڼ<EFBFBD><DABC><EFBFBD>hashֵ<68><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ч<EFBFBD><D0A7><EFBFBD><EFBFBD>
{
length = _handle->effective_length;
break;
}
case HASH_LENGTH: //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ϣ<EFBFBD><CFA3><EFBFBD><EFBFBD><EFBFBD>ij<EFBFBD><C4B3><EFBFBD>
{
final_length tmp_length;
tmp_length.hash_length = 0;
tmp_length.first_FNV_offset = 0;
tmp_length.last_FNV_offset = 0;
IVI_traverse(_handle->ivi, fuzzy_hash_length, (void *)&tmp_length);
length = tmp_length.hash_length + 1;
break;
}
default:
return 0;
}
return length;
}
void fuzzy_hash_length(IVI_seg_t * seg, void * user_para)
{
IVI_seg_t * prev_seg;
IVI_seg_t * next_seg;
prev_seg = IVI_prev_continuous_seg(seg);
next_seg = IVI_next_continuous_seg(seg);
char buffer[MAXSIZE];
final_length * tmp = (final_length *)user_para;
fuzzy_node * node = (fuzzy_node *)(seg->data);
if(node->slice_num != 0)
{
//printf("node->slice_num != 0\n");
tmp->last_FNV_offset = seg->right - node->right_len;
//printf("%lu\n", tmp->last_FNV_offset);
}
if(prev_seg == NULL && next_seg == NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD>ͺ<EFBFBD><CDBA><EFBFBD>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ƴ<EFBFBD><C6B4>
{
tmp->first_FNV_offset = seg->left;
tmp->last_FNV_offset = seg->right - node->right_len;
sprintf(buffer, "%s[%llu:%llu]", node->hash_result, tmp->first_FNV_offset, seg->right);
}
if(prev_seg == NULL && next_seg != NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>ƬΪ<C6AC>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>FNVֵ<56><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȥ
{
tmp->first_FNV_offset = seg->left;
sprintf(buffer, "%s", node->hash_result);
}
if(prev_seg != NULL && next_seg == NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD>ƬΪ<C6AC>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>ƫ<EFBFBD><C6AB>
{
sprintf(buffer, "%s[%llu:%llu]", node->hash_result, tmp->first_FNV_offset, seg->right);
}
if(prev_seg != NULL && next_seg != NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>FNVֵ<56><D6B5>ȥ
{
sprintf(buffer, "%s", node->hash_result);
}
tmp->hash_length += strlen(buffer);
return;
}