将mesafuzzy和GIE的源代码集成进入Maat,并修改Makefile。
This commit is contained in:
@@ -1,11 +1,11 @@
|
||||
#opt: OPTFLAGS = -O2
|
||||
#export OPTFLAGS
|
||||
|
||||
CC = g++
|
||||
CC = gcc
|
||||
CCC = g++
|
||||
CFLAGS = -Wall -g -fPIC
|
||||
CFLAGS += $(OPTFLAGS)
|
||||
LDFLAGS = -lMESA_handle_logger -lMESA_htable -lpthread
|
||||
LDFLAGS = -lMESA_handle_logger -lMESA_htable -lpthread -lm
|
||||
MAILLIB = ../lib
|
||||
|
||||
G_H_DIR =../inc_internal
|
||||
@@ -13,7 +13,8 @@ H_DIR =-I$(G_H_DIR) -I../../inc
|
||||
LIBMAAT = libmaatframe.a
|
||||
LIBMAAT_SO = libmaatframe.so
|
||||
|
||||
OBJS=config_monitor.o Maat_rule.o Maat_api.o UniversalBoolMatch.o dynamic_array.o cJSON.o json2iris.o map_str2int.o
|
||||
OBJS=config_monitor.o Maat_rule.o Maat_api.o UniversalBoolMatch.o dynamic_array.o cJSON.o json2iris.o map_str2int.o\
|
||||
interval_index.o great_index_engine.o mesa_fuzzy.o
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -I. $(H_DIR) $<
|
||||
|
||||
|
||||
864
src/entry/great_index_engine.c
Normal file
864
src/entry/great_index_engine.c
Normal file
@@ -0,0 +1,864 @@
|
||||
#include<stdio.h>
|
||||
#include<stdlib.h>
|
||||
#include<string.h>
|
||||
#include<math.h>
|
||||
#include<assert.h>
|
||||
#include<MESA/MESA_htable.h>
|
||||
#include "great_index_engine.h"
|
||||
#include "queue.h"
|
||||
int GIE_VERSION_1_0_20151109=1;
|
||||
#define HTABLE_SIZE 1024*1024
|
||||
#define MAX 10000
|
||||
#define FIRST_INSERT 1
|
||||
#define SECOND_INSERT 0
|
||||
#define TOLERENCE_SIZE 0
|
||||
#define CONF_MAX 10
|
||||
#define BLOCKSIZE_MIN 3
|
||||
#define MAX_UINT64 (0xFFFFFFFFFFFFFFFF)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned long long user_precision;
|
||||
//int user_confidence_level_threshold;
|
||||
double user_query_accuracy;
|
||||
MESA_htable_handle id_table;
|
||||
MESA_htable_handle index_table;
|
||||
struct VL * valuelist;
|
||||
}GIE_handle_inner_t;
|
||||
|
||||
struct valuelist_node
|
||||
{
|
||||
unsigned long long value;
|
||||
struct VL * valuelist_name;
|
||||
TAILQ_ENTRY(valuelist_node) vlistentry;
|
||||
};
|
||||
|
||||
struct linklist_node
|
||||
{
|
||||
unsigned long long index_key;
|
||||
struct TQ * listname;
|
||||
struct id_table_data * basicinfo;
|
||||
TAILQ_ENTRY(linklist_node) listentry;
|
||||
};
|
||||
|
||||
struct index_table_data
|
||||
{
|
||||
struct TQ * listhead;
|
||||
int cnt;
|
||||
unsigned long long prev_value;
|
||||
unsigned long long next_value;
|
||||
};
|
||||
|
||||
struct id_table_data
|
||||
{
|
||||
unsigned int id;
|
||||
unsigned long long origin_len;
|
||||
unsigned long long blocksize;
|
||||
char * fh;
|
||||
short cfds_lvl;
|
||||
void * tag;
|
||||
struct linklist_node * first_backtrack;
|
||||
struct linklist_node * second_backtrack;
|
||||
};
|
||||
|
||||
TAILQ_HEAD(TQ, linklist_node);
|
||||
TAILQ_HEAD(VL, valuelist_node);
|
||||
|
||||
void idtable_free(void * data);
|
||||
void indextable_free(void * data);
|
||||
int GIE_insert_indextable(GIE_handle_inner_t * handle, struct id_table_data * info, unsigned long long index_key, int flag);
|
||||
int GIE_delete_from_indextable_by_key(GIE_handle_inner_t * handle, struct linklist_node * backtrack);
|
||||
int GIE_delete(GIE_handle_inner_t * handle, GIE_digest_t ** digests, int size);
|
||||
int GIE_union(struct TQ ** union_list, int list_num, struct id_table_data ** result,\
|
||||
unsigned long long min, unsigned long long max, unsigned long long query_blocksize);
|
||||
|
||||
struct TQ * linklist_union(struct TQ * list_first, struct TQ * list_second, unsigned long long min, unsigned long long max,\
|
||||
unsigned long long query_blocksize);
|
||||
|
||||
|
||||
int minof3(int x, int y, int z);
|
||||
int GIE_edit_distance(char* w1, int l1, const char* w2, int l2);
|
||||
int GIE_edit_distance_with_position(char * fh, const char * fuzzy_string, unsigned long long orilen, int * fuzzy_actual_size,\
|
||||
unsigned long long * calculate_len);
|
||||
|
||||
GIE_handle_t * GIE_create(const GIE_create_para_t * para)
|
||||
{
|
||||
GIE_handle_inner_t * handle = (GIE_handle_inner_t *)malloc(sizeof(GIE_handle_inner_t));
|
||||
handle->user_precision = para->index_interval;
|
||||
//handle->user_confidence_level_threshold = para->confidence_level_threshold;
|
||||
handle->user_query_accuracy = para->query_accuracy;
|
||||
|
||||
struct VL * head = (struct VL *)malloc(sizeof(struct VL));
|
||||
TAILQ_INIT(head);
|
||||
handle->valuelist = head;
|
||||
|
||||
|
||||
MESA_htable_create_args_t idtable_args,indextable_args;
|
||||
memset(&idtable_args, 0, sizeof(idtable_args));
|
||||
memset(&indextable_args, 0, sizeof(indextable_args));
|
||||
|
||||
|
||||
idtable_args.thread_safe = 0;
|
||||
idtable_args.hash_slot_size = HTABLE_SIZE;
|
||||
idtable_args.max_elem_num = 4 * HTABLE_SIZE;
|
||||
idtable_args.expire_time = 0;
|
||||
idtable_args.eliminate_type = HASH_ELIMINATE_ALGO_LRU;
|
||||
idtable_args.key_comp = NULL;
|
||||
idtable_args.key2index = NULL;
|
||||
idtable_args.data_free = idtable_free;
|
||||
idtable_args.data_expire_with_condition = NULL;
|
||||
idtable_args.recursive = 1;
|
||||
|
||||
indextable_args.thread_safe = 0;
|
||||
indextable_args.hash_slot_size = HTABLE_SIZE;
|
||||
indextable_args.max_elem_num = 4 * HTABLE_SIZE;
|
||||
indextable_args.expire_time = 0;
|
||||
indextable_args.eliminate_type = HASH_ELIMINATE_ALGO_LRU;
|
||||
indextable_args.key_comp = NULL;
|
||||
indextable_args.key2index = NULL;
|
||||
indextable_args.data_free = indextable_free;
|
||||
indextable_args.data_expire_with_condition = NULL;
|
||||
indextable_args.recursive = 1;
|
||||
|
||||
handle->id_table = MESA_htable_create(&idtable_args, sizeof(idtable_args));
|
||||
handle->index_table = MESA_htable_create(&indextable_args, sizeof(indextable_args));
|
||||
|
||||
return (GIE_handle_t *)(handle);
|
||||
}
|
||||
|
||||
void idtable_free(void * data)
|
||||
{
|
||||
struct id_table_data * tmp = (struct id_table_data *)data;
|
||||
free(tmp->fh);
|
||||
free(tmp);
|
||||
// printf("free id_table_data!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
void indextable_free(void * data)
|
||||
{
|
||||
// printf("free index_table_data!\n");
|
||||
struct index_table_data * tmp = (struct index_table_data *)data;
|
||||
struct linklist_node * tmp_node = TAILQ_FIRST(tmp->listhead);
|
||||
while(tmp_node != NULL)
|
||||
{
|
||||
struct linklist_node * linklist_tmp = TAILQ_NEXT(tmp_node, listentry);
|
||||
free(tmp_node);
|
||||
// printf("free list_node_data!\n");
|
||||
tmp_node = linklist_tmp;
|
||||
}
|
||||
free(tmp->listhead);
|
||||
free(tmp);
|
||||
return;
|
||||
}
|
||||
|
||||
void GIE_destory(GIE_handle_t * handle)
|
||||
{
|
||||
GIE_handle_inner_t * _handle = (GIE_handle_inner_t *)(handle);
|
||||
MESA_htable_destroy(_handle->index_table, NULL);
|
||||
MESA_htable_destroy(_handle->id_table, NULL);
|
||||
|
||||
struct valuelist_node * tmp_node = TAILQ_FIRST(_handle->valuelist);
|
||||
while(tmp_node != NULL)
|
||||
{
|
||||
struct valuelist_node * valuelist_tmp = TAILQ_NEXT(tmp_node, vlistentry);
|
||||
free(tmp_node);
|
||||
tmp_node = valuelist_tmp;
|
||||
}
|
||||
free(_handle->valuelist);
|
||||
free(_handle);
|
||||
}
|
||||
|
||||
|
||||
unsigned long long calc_fh_blocksize(unsigned long long orilen)
|
||||
{
|
||||
double tmp = orilen/(64 * BLOCKSIZE_MIN);
|
||||
double index = floor(log(tmp)/log(2));
|
||||
double tmp_t = pow(2, index);
|
||||
unsigned long long blocksize = (unsigned long long)(tmp_t * BLOCKSIZE_MIN);
|
||||
return blocksize;
|
||||
}
|
||||
|
||||
void print_item_iterate(const uchar * key, uint size, void * data, void * user)
|
||||
{
|
||||
//unsigned long long index_key = (unsigned long long)(* key);
|
||||
struct index_table_data * index_data = (struct index_table_data *)data;
|
||||
struct linklist_node * first_node = TAILQ_FIRST(index_data->listhead);
|
||||
printf("index_key = %llu\n", first_node->index_key);
|
||||
struct linklist_node * tmp_node = NULL;
|
||||
TAILQ_FOREACH(tmp_node, index_data->listhead, listentry)
|
||||
{
|
||||
printf("id = %u orilen = %llu ", tmp_node->basicinfo->id, tmp_node->basicinfo->origin_len);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int GIE_update(GIE_handle_t * handle, GIE_digest_t ** digests, int size)
|
||||
{
|
||||
GIE_handle_inner_t * _handle = (GIE_handle_inner_t *)(handle);
|
||||
struct id_table_data * info=NULL;
|
||||
int success_cnt=0;
|
||||
int i = 0;
|
||||
|
||||
unsigned int input_fh_len=0;
|
||||
|
||||
for(i = 0; i < size; i++)
|
||||
{
|
||||
switch(digests[i]->operation)
|
||||
{
|
||||
case GIE_INSERT_OPT:
|
||||
{
|
||||
unsigned long long first_index_key = (digests[i]->origin_len)/(_handle->user_precision)*(_handle->user_precision);
|
||||
unsigned long long second_index_key = ((digests[i]->origin_len)/(_handle->user_precision) + 1)*(_handle->user_precision);
|
||||
info = (struct id_table_data *)malloc(sizeof(struct id_table_data));
|
||||
//printf("malloc id_table_data!\n");
|
||||
input_fh_len=strlen(digests[i]->fuzzy_hash);
|
||||
info->fh = (char *)calloc(sizeof(char),input_fh_len+1);
|
||||
memcpy(info->fh, digests[i]->fuzzy_hash, input_fh_len);
|
||||
|
||||
info->origin_len = digests[i]->origin_len;
|
||||
info->blocksize = calc_fh_blocksize(digests[i]->origin_len);
|
||||
info->tag = digests[i]->tag;
|
||||
info->id = digests[i]->id;
|
||||
info->cfds_lvl = digests[i]->cfds_lvl;
|
||||
|
||||
info->first_backtrack = NULL;
|
||||
info->second_backtrack = NULL;
|
||||
if(MESA_htable_add(_handle->id_table, (const uchar *)(&(digests[i]->id)), sizeof(digests[i]->id), (const void *)info) < 0)
|
||||
{
|
||||
printf("add %d id_table failed!",digests[i]->id);
|
||||
free(info->fh);
|
||||
free(info);
|
||||
continue;
|
||||
}
|
||||
if(GIE_insert_indextable(_handle, info, first_index_key, FIRST_INSERT) < 0)
|
||||
{
|
||||
printf("insert %d first failed\n",info->id);
|
||||
assert(0);
|
||||
free(info->fh);
|
||||
free(info);
|
||||
continue;
|
||||
}
|
||||
//printf("(info->first_backtrack)->index_key = %llu\n", (info->first_backtrack)->index_key);
|
||||
|
||||
if(GIE_insert_indextable(_handle, info, second_index_key, SECOND_INSERT) < 0)
|
||||
{
|
||||
printf("insert %d second failed\n",info->id);
|
||||
assert(0);
|
||||
free(info->fh);
|
||||
free(info);
|
||||
continue;
|
||||
}
|
||||
|
||||
success_cnt++;
|
||||
break;
|
||||
}
|
||||
case GIE_DELETE_OPT:
|
||||
{
|
||||
success_cnt += GIE_delete(_handle, digests, i);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
/*struct valuelist_node * tmp = NULL;
|
||||
TAILQ_FOREACH(tmp, _handle->valuelist, vlistentry)
|
||||
{
|
||||
struct index_table_data * tmp_t = (struct index_table_data *)(MESA_htable_search_cb(_handle->index_table, (const uchar *)(&(tmp->value)), sizeof(tmp->value), NULL, NULL, NULL));
|
||||
printf("prev_value = %llu ", tmp_t->prev_value);
|
||||
printf("next_value = %llu ", tmp_t->next_value);
|
||||
printf("value = %llu\n", tmp->value);
|
||||
}*/
|
||||
}
|
||||
return success_cnt;
|
||||
}
|
||||
|
||||
|
||||
int GIE_insert_indextable(GIE_handle_inner_t * handle, struct id_table_data * info, unsigned long long index_key, int flag)
|
||||
{
|
||||
struct linklist_node * node_data = (struct linklist_node *)malloc(sizeof(struct linklist_node));
|
||||
// printf("linklist_node malloc success\n");
|
||||
node_data->basicinfo = info;
|
||||
node_data->index_key = index_key;
|
||||
node_data->listname = NULL;
|
||||
|
||||
if(flag == FIRST_INSERT)
|
||||
{
|
||||
info->first_backtrack = node_data; //Backtracking pointer to index table, it is a pointer to a structure pointer
|
||||
// printf("1: (info->first_backtrack)->index_key = %llu\n", (info->first_backtrack)->index_key);
|
||||
}
|
||||
else
|
||||
{
|
||||
info->second_backtrack = node_data;
|
||||
}
|
||||
|
||||
struct index_table_data * ret = (struct index_table_data *)(MESA_htable_search_cb(handle->index_table, (const uchar *)(&index_key), sizeof(index_key), NULL, NULL, NULL));
|
||||
if(ret != NULL)
|
||||
{
|
||||
//printf("ret != NULL\n");
|
||||
struct linklist_node * tmp = NULL;
|
||||
node_data->listname = ret->listhead;
|
||||
//If there are linked list exists in index table, sorted according to id
|
||||
TAILQ_FOREACH(tmp, ret->listhead, listentry)
|
||||
{
|
||||
if(tmp->basicinfo->id > node_data->basicinfo->id)
|
||||
{
|
||||
TAILQ_INSERT_BEFORE(tmp, node_data, listentry);
|
||||
ret->cnt++;
|
||||
return 0;
|
||||
}
|
||||
if(node_data->basicinfo->id == tmp->basicinfo->id)
|
||||
{
|
||||
printf("invalid insert!");
|
||||
return -1;
|
||||
}
|
||||
//TODO <20><><EFBFBD><EFBFBD>id<69><64><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>id<69><64><EFBFBD><EFBFBD>Ҫ<EFBFBD><D2AA><EFBFBD><EFBFBD>invalid insert
|
||||
}
|
||||
TAILQ_INSERT_TAIL(ret->listhead, node_data, listentry);
|
||||
ret->cnt ++;
|
||||
}
|
||||
else
|
||||
{
|
||||
struct index_table_data * index_data = (struct index_table_data *)malloc(sizeof(struct index_table_data));
|
||||
|
||||
struct valuelist_node * tmp_t = NULL;
|
||||
struct valuelist_node * value_data = (struct valuelist_node *)malloc(sizeof(struct valuelist_node));
|
||||
value_data->value = index_key;
|
||||
value_data->valuelist_name = handle->valuelist;
|
||||
|
||||
int insert_flag = 0;
|
||||
TAILQ_FOREACH(tmp_t, handle->valuelist, vlistentry)
|
||||
{
|
||||
if(tmp_t->value > value_data->value)
|
||||
{
|
||||
TAILQ_INSERT_BEFORE(tmp_t, value_data, vlistentry);
|
||||
insert_flag = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(!insert_flag)
|
||||
{
|
||||
TAILQ_INSERT_TAIL(handle->valuelist, value_data, vlistentry);
|
||||
}
|
||||
|
||||
struct valuelist_node * tmp_prev = TAILQ_PREV(value_data, VL, vlistentry);
|
||||
struct valuelist_node * tmp_next = TAILQ_NEXT(value_data, vlistentry);
|
||||
|
||||
if(tmp_prev != NULL && tmp_next != NULL)
|
||||
{
|
||||
struct index_table_data * index_tmp_prev = MESA_htable_search(handle->index_table, (const uchar *)(&(tmp_prev->value)),\
|
||||
sizeof(tmp_prev->value));
|
||||
|
||||
struct index_table_data * index_tmp_next = MESA_htable_search(handle->index_table, (const uchar *)(&(tmp_next->value)),\
|
||||
sizeof(tmp_next->value));
|
||||
index_tmp_prev->next_value = value_data->value;
|
||||
index_data->prev_value = tmp_prev->value;
|
||||
index_data->next_value = tmp_next->value;
|
||||
index_tmp_next->prev_value = value_data->value;
|
||||
}
|
||||
if(tmp_prev != NULL && tmp_next == NULL)
|
||||
{
|
||||
|
||||
struct index_table_data * index_tmp_prev = MESA_htable_search(handle->index_table, (const uchar *)(&(tmp_prev->value)),\
|
||||
sizeof(tmp_prev->value));
|
||||
|
||||
index_tmp_prev->next_value = value_data->value;
|
||||
index_data->prev_value = tmp_prev->value;
|
||||
index_data->next_value = MAX_UINT64;
|
||||
}
|
||||
if(tmp_prev == NULL && tmp_next != NULL)
|
||||
{
|
||||
|
||||
struct index_table_data * index_tmp_next = MESA_htable_search(handle->index_table, (const uchar *)(&(tmp_next->value)),\
|
||||
sizeof(tmp_next->value));
|
||||
|
||||
index_data->prev_value = MAX_UINT64;
|
||||
index_data->next_value = tmp_next->value;
|
||||
index_tmp_next->prev_value = value_data->value;
|
||||
}
|
||||
if(tmp_prev == NULL && tmp_next == NULL)
|
||||
{
|
||||
index_data->prev_value = MAX_UINT64;
|
||||
index_data->next_value = MAX_UINT64;
|
||||
}
|
||||
|
||||
|
||||
//If there are no entries<65><73> have to create a list head pointer,
|
||||
//and add the corresponding entry in the index table, the data link to the back
|
||||
|
||||
struct TQ * head = (struct TQ *)malloc(sizeof(struct TQ));
|
||||
index_data->listhead = head;
|
||||
index_data->cnt = 0;
|
||||
|
||||
TAILQ_INIT(head);
|
||||
TAILQ_INSERT_TAIL(head, node_data, listentry);
|
||||
index_data->cnt++;
|
||||
node_data->listname = index_data->listhead;
|
||||
|
||||
if(MESA_htable_add(handle->index_table, (const uchar *)(&index_key), sizeof(index_key), (const void *)index_data) < 0)
|
||||
{
|
||||
printf("add index_table failed!\n");
|
||||
assert(0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// struct index_table_data * tmp_v = (struct index_table_data *)(MESA_htable_search_cb(handle->index_table, (const uchar *)(&index_key), sizeof(index_key), NULL, NULL, NULL));
|
||||
// printf("index_data->prev_value = %llu ", index_data->prev_value);
|
||||
// printf("index_data->next_value = %llu ", index_data->next_value);
|
||||
// printf("index_key = %llu ", index_key);
|
||||
// printf("prev_value = %llu ", tmp_v->prev_value);
|
||||
// printf("next_value = %llu\n", tmp_v->next_value);
|
||||
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int GIE_delete_from_indextable_by_key(GIE_handle_inner_t * handle, struct linklist_node * backtrack)
|
||||
{
|
||||
struct linklist_node * backtrack_node = backtrack; //Find the index table in the first meet of the list node pointer by backtracking
|
||||
|
||||
//find the key
|
||||
unsigned long long tmp_key = backtrack_node->index_key;
|
||||
|
||||
//delete the node
|
||||
TAILQ_REMOVE(backtrack_node->listname, backtrack, listentry);
|
||||
|
||||
//if first node is NULL, linklist is NULL, delete the record in the hashtable
|
||||
if(TAILQ_EMPTY(backtrack_node->listname) == 1)
|
||||
{
|
||||
if(MESA_htable_del(handle->index_table, (const uchar *)(&tmp_key), sizeof(tmp_key), indextable_free) < 0)
|
||||
{
|
||||
printf("indextable backtrack delete error!\n");
|
||||
assert(0);
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
struct valuelist_node * tmp = NULL;
|
||||
TAILQ_FOREACH(tmp, handle->valuelist, vlistentry)
|
||||
{
|
||||
if(tmp->value == backtrack_node->index_key)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
struct valuelist_node * tmp_prev = TAILQ_PREV(tmp, VL, vlistentry);
|
||||
struct valuelist_node * tmp_next = TAILQ_NEXT(tmp, vlistentry);
|
||||
if(tmp_prev != NULL && tmp_next != NULL)
|
||||
{
|
||||
struct index_table_data * index_tmp_prev = MESA_htable_search_cb(handle->index_table, (const uchar *)(&(tmp_prev->value)), \
|
||||
sizeof(tmp_prev->value), NULL, NULL, NULL);
|
||||
struct index_table_data * index_tmp_next = MESA_htable_search_cb(handle->index_table, (const uchar *)(&(tmp_next->value)), \
|
||||
sizeof(tmp_next->value), NULL, NULL, NULL);
|
||||
index_tmp_prev->next_value = tmp_next->value;
|
||||
index_tmp_next->prev_value = tmp_prev->value;
|
||||
}
|
||||
if(tmp_prev != NULL && tmp_next == NULL)
|
||||
{
|
||||
struct index_table_data * index_tmp_prev = MESA_htable_search_cb(handle->index_table, (const uchar *)(&(tmp_prev->value)), \
|
||||
sizeof(tmp_prev->value), NULL, NULL, NULL);
|
||||
index_tmp_prev->next_value = MAX_UINT64;
|
||||
}
|
||||
if(tmp_prev == NULL && tmp_next != NULL)
|
||||
{
|
||||
struct index_table_data * index_tmp_next = MESA_htable_search_cb(handle->index_table, (const uchar *)(&(tmp_next->value)), \
|
||||
sizeof(tmp_next->value), NULL, NULL, NULL);
|
||||
index_tmp_next->prev_value = MAX_UINT64;
|
||||
}
|
||||
TAILQ_REMOVE(handle->valuelist, tmp, vlistentry);
|
||||
free(tmp);
|
||||
//printf("indextable backtrack delete success!\n");
|
||||
}
|
||||
}
|
||||
free(backtrack_node);
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
|
||||
int GIE_delete(GIE_handle_inner_t * handle, GIE_digest_t ** digests, int i)
|
||||
{
|
||||
int success_cnt=0;
|
||||
struct id_table_data * ret = (struct id_table_data *) MESA_htable_search(handle->id_table, \
|
||||
(const uchar *)(&(digests[i]->id)), sizeof(digests[i]->id));
|
||||
|
||||
//if the record doesn't exist, printf delID doesn't exist!
|
||||
//printf("ret->id = %u\n", ret->id);
|
||||
//printf("(ret->first_backtrack)->index_key = %llu\n", (ret->first_backtrack)->index_key);
|
||||
if(ret == NULL)
|
||||
{
|
||||
printf("del %d doesn't exist!\n",digests[i]->id);
|
||||
}
|
||||
else
|
||||
{
|
||||
GIE_delete_from_indextable_by_key(handle, ret->first_backtrack);
|
||||
GIE_delete_from_indextable_by_key(handle, ret->second_backtrack);
|
||||
success_cnt++;
|
||||
}
|
||||
if(MESA_htable_del(handle->id_table, (const uchar *)(&(digests[i]->id)), sizeof(digests[i]->id), idtable_free) < 0)
|
||||
{
|
||||
printf("delete id failed!");
|
||||
assert(0);
|
||||
}
|
||||
return success_cnt;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int GIE_union(struct TQ ** union_list, int list_num, struct id_table_data ** result,\
|
||||
unsigned long long min, unsigned long long max, unsigned long long query_blocksize)
|
||||
{
|
||||
struct TQ * tmp_list = (struct TQ *)malloc(sizeof(struct TQ));
|
||||
TAILQ_INIT(tmp_list);
|
||||
struct linklist_node * tmp_node = NULL;
|
||||
int size = 0;
|
||||
TAILQ_FOREACH(tmp_node, union_list[0], listentry)
|
||||
{
|
||||
if(tmp_node->basicinfo->origin_len >= min && tmp_node->basicinfo->origin_len <= max && tmp_node->basicinfo->blocksize == query_blocksize)
|
||||
{
|
||||
struct linklist_node * new_node = (struct linklist_node *)malloc(sizeof(struct linklist_node));
|
||||
new_node->index_key = tmp_node->index_key;
|
||||
new_node->basicinfo = tmp_node->basicinfo;
|
||||
new_node->listname = tmp_list;
|
||||
TAILQ_INSERT_TAIL(tmp_list, new_node, listentry);
|
||||
}
|
||||
}
|
||||
int i = 0;
|
||||
for(i = 1; i < list_num; i++)
|
||||
{
|
||||
tmp_list = linklist_union(tmp_list, union_list[i], min, max, query_blocksize);
|
||||
}
|
||||
|
||||
struct linklist_node * tmp_node_t = NULL;
|
||||
TAILQ_FOREACH(tmp_node_t, tmp_list, listentry)
|
||||
{
|
||||
result[size++] = tmp_node_t->basicinfo;
|
||||
}
|
||||
|
||||
struct linklist_node * first_node = TAILQ_FIRST(tmp_list);
|
||||
while(first_node != NULL)
|
||||
{
|
||||
struct linklist_node * linklist_tmp = TAILQ_NEXT(first_node, listentry);
|
||||
free(first_node);
|
||||
first_node = linklist_tmp;
|
||||
}
|
||||
free(tmp_list);
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
struct TQ * linklist_union(struct TQ * list_first, struct TQ * list_second, unsigned long long min, unsigned long long max,\
|
||||
unsigned long long query_blocksize)
|
||||
{
|
||||
struct TQ * link_result = (struct TQ *)malloc(sizeof(struct TQ));
|
||||
TAILQ_INIT(link_result);
|
||||
struct linklist_node * tmp_first = TAILQ_FIRST(list_first);
|
||||
struct linklist_node * tmp_second = TAILQ_FIRST(list_second);
|
||||
while(tmp_first != NULL && tmp_second != NULL)
|
||||
{
|
||||
//When combined final result in a relatively small deposit on id, id small pointer will move backward,
|
||||
// if both are equal, both pointers move backward until a move to the tail end of the list
|
||||
if(tmp_first->basicinfo->id < tmp_second->basicinfo->id)
|
||||
{
|
||||
if(tmp_first->basicinfo->origin_len >= min && tmp_first->basicinfo->origin_len <= max && tmp_first->basicinfo->blocksize == query_blocksize)
|
||||
{
|
||||
struct linklist_node * new_node = (struct linklist_node *)malloc(sizeof(struct linklist_node));
|
||||
new_node->index_key = tmp_first->index_key;
|
||||
new_node->basicinfo = tmp_first->basicinfo;
|
||||
new_node->listname = link_result;
|
||||
TAILQ_INSERT_TAIL(link_result, new_node, listentry);
|
||||
|
||||
}
|
||||
tmp_first = TAILQ_NEXT(tmp_first, listentry);
|
||||
}
|
||||
else if(tmp_first->basicinfo->id > tmp_second->basicinfo->id)
|
||||
{
|
||||
if(tmp_second->basicinfo->origin_len >= min && tmp_second->basicinfo->origin_len <= max && tmp_second->basicinfo->blocksize == query_blocksize)
|
||||
{
|
||||
struct linklist_node * new_node = (struct linklist_node *)malloc(sizeof(struct linklist_node));
|
||||
new_node->index_key = tmp_second->index_key;
|
||||
new_node->basicinfo = tmp_second->basicinfo;
|
||||
new_node->listname = link_result;
|
||||
TAILQ_INSERT_TAIL(link_result, new_node, listentry);
|
||||
}
|
||||
tmp_second = TAILQ_NEXT(tmp_second, listentry);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(tmp_first->basicinfo->origin_len >= min && tmp_first->basicinfo->origin_len <= max && tmp_first->basicinfo->blocksize == query_blocksize)
|
||||
{
|
||||
struct linklist_node * new_node = (struct linklist_node *)malloc(sizeof(struct linklist_node));
|
||||
new_node->index_key = tmp_first->index_key;
|
||||
new_node->basicinfo = tmp_first->basicinfo;
|
||||
new_node->listname = link_result;
|
||||
TAILQ_INSERT_TAIL(link_result, new_node, listentry);
|
||||
}
|
||||
tmp_first = TAILQ_NEXT(tmp_first, listentry);
|
||||
tmp_second = TAILQ_NEXT(tmp_second, listentry);
|
||||
}
|
||||
}
|
||||
|
||||
//The list is not linked to the end nodes remaining deposit to results
|
||||
while(tmp_first != NULL)
|
||||
{
|
||||
if(tmp_first->basicinfo->origin_len >= min && tmp_first->basicinfo->origin_len <= max && tmp_first->basicinfo->blocksize == query_blocksize)
|
||||
{
|
||||
struct linklist_node * new_node = (struct linklist_node *)malloc(sizeof(struct linklist_node));
|
||||
new_node->index_key = tmp_first->index_key;
|
||||
new_node->basicinfo = tmp_first->basicinfo;
|
||||
new_node->listname = link_result;
|
||||
TAILQ_INSERT_TAIL(link_result, new_node, listentry);
|
||||
}
|
||||
tmp_first = TAILQ_NEXT(tmp_first, listentry);
|
||||
}
|
||||
while(tmp_second != NULL)
|
||||
{
|
||||
if(tmp_second->basicinfo->origin_len >= min && tmp_second->basicinfo->origin_len <= max && tmp_second->basicinfo->blocksize == query_blocksize)
|
||||
{
|
||||
struct linklist_node * new_node = (struct linklist_node *)malloc(sizeof(struct linklist_node));
|
||||
new_node->index_key = tmp_second->index_key;
|
||||
new_node->basicinfo = tmp_second->basicinfo;
|
||||
new_node->listname = link_result;
|
||||
TAILQ_INSERT_TAIL(link_result, new_node, listentry);
|
||||
}
|
||||
tmp_second = TAILQ_NEXT(tmp_second, listentry);
|
||||
}
|
||||
|
||||
|
||||
struct linklist_node * first_node = TAILQ_FIRST(list_first);
|
||||
while(first_node != NULL)
|
||||
{
|
||||
struct linklist_node * linklist_tmp = TAILQ_NEXT(first_node, listentry);
|
||||
free(first_node);
|
||||
first_node = linklist_tmp;
|
||||
}
|
||||
free(list_first);
|
||||
|
||||
|
||||
return link_result;
|
||||
}
|
||||
|
||||
|
||||
int minof3(int x, int y, int z)
|
||||
{
|
||||
x = (x<y)?x:y;
|
||||
return (x<z)?x:z;
|
||||
}
|
||||
|
||||
|
||||
int GIE_edit_distance(char* w1, int l1, const char* w2, int l2)
|
||||
{
|
||||
// dp[x][y] means the min edit distance from partial word1 (0..x-1) to partial word2 (0..y-1)
|
||||
// please note this takes O(mn) space; O(n) solution also available because only last iteration of result needs to be stored
|
||||
int i, j;
|
||||
int ** dp = (int **)malloc(sizeof(int *) * (l1 + 1));
|
||||
for(i = 0; i < l1 + 1; i++)
|
||||
{
|
||||
dp[i] = (int *)malloc(sizeof(int) * (l2 + 1));
|
||||
}
|
||||
|
||||
// init the dynamic programming matrix
|
||||
dp[0][0] = 0;
|
||||
for(i = 1; i<=l1; i++) dp[i][0] = i;
|
||||
for(j = 1; j<=l2; j++) dp[0][j] = j;
|
||||
|
||||
for(i = 1; i<=l1; i++)
|
||||
for(j = 1; j<=l2; j++)
|
||||
if(w1[i-1] != w2[j-1])
|
||||
//different char; so adding/replacing/deleting all takes one more step
|
||||
dp[i][j] = minof3(dp[i][j-1], dp[i-1][j-1], dp[i-1][j]) + 1;
|
||||
else
|
||||
//same char; so no need to replace it; adding/deleting one still takes one more step
|
||||
dp[i][j] = minof3(dp[i][j-1]+1, dp[i-1][j-1], dp[i-1][j]+1);
|
||||
int result = dp[l1][l2];
|
||||
for(i = 0; i < l1 + 1; i++)
|
||||
{
|
||||
free(dp[i]);
|
||||
}
|
||||
free(dp);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
int GIE_edit_distance_with_position(char * fh, const char * fuzzy_string, unsigned long long orilen, int * fuzzy_actual_size, unsigned long long * calculate_len)
|
||||
{
|
||||
*fuzzy_actual_size = 0;
|
||||
*calculate_len = 0;
|
||||
int edit_distance = 0;
|
||||
const char * tmpstr = fuzzy_string;
|
||||
const char * tmp_fuzzy = fuzzy_string;
|
||||
char * fh_tmp = fh;
|
||||
int tmp_fuzzy_len = 0;
|
||||
int fh_actual_len = 0;
|
||||
unsigned long long blocksize = 0;
|
||||
while(*fh_tmp != '\0')
|
||||
{
|
||||
if(*fh_tmp == '[')
|
||||
{
|
||||
break;
|
||||
}
|
||||
fh_actual_len ++;
|
||||
fh_tmp++;
|
||||
}
|
||||
//*fuzzy_all_actual_size = fh_actual_len;
|
||||
if(fh_actual_len != 0)
|
||||
{
|
||||
blocksize = (orilen - 1)/fh_actual_len;
|
||||
}
|
||||
else
|
||||
{
|
||||
blocksize = calc_fh_blocksize(orilen);
|
||||
}
|
||||
while(*tmpstr != '\0')
|
||||
{
|
||||
|
||||
int left = 0;
|
||||
int right = 0;
|
||||
if(*tmpstr == '[')
|
||||
{
|
||||
char numleft[100],numright[100];
|
||||
int i = 0 , j = 0;
|
||||
tmpstr ++;
|
||||
memset(numleft, '\0', sizeof(char));
|
||||
memset(numright, '\0', sizeof(char));
|
||||
while(*tmpstr != '\0' && *tmpstr != ':')
|
||||
{
|
||||
numleft[i++] = *tmpstr;
|
||||
tmpstr ++;
|
||||
}
|
||||
//printf("i = %d\n", i);
|
||||
left = atoi(numleft);
|
||||
tmpstr++;
|
||||
while(*tmpstr != '\0' && *tmpstr !=']')
|
||||
{
|
||||
numright[j++] = *tmpstr;
|
||||
tmpstr ++;
|
||||
}
|
||||
//printf("j = %d\n", j);
|
||||
right = atoi(numright);
|
||||
*calculate_len += right - left;
|
||||
|
||||
//TODO: edit distance compare
|
||||
int index = left/blocksize - TOLERENCE_SIZE > 0 ? left/blocksize - TOLERENCE_SIZE: 0;
|
||||
int fh_size = right/blocksize + TOLERENCE_SIZE - index > fh_actual_len - index ? fh_actual_len - index: right/blocksize + TOLERENCE_SIZE - index;
|
||||
edit_distance += GIE_edit_distance(fh + index, fh_size, tmp_fuzzy, tmp_fuzzy_len);
|
||||
*fuzzy_actual_size += tmp_fuzzy_len;
|
||||
|
||||
if(*tmpstr !=']')
|
||||
{
|
||||
tmp_fuzzy = tmpstr + 1;
|
||||
tmp_fuzzy_len = 0;
|
||||
}
|
||||
tmpstr ++;
|
||||
}
|
||||
else
|
||||
{
|
||||
tmp_fuzzy_len++;
|
||||
tmpstr ++;
|
||||
}
|
||||
}
|
||||
return edit_distance;
|
||||
}
|
||||
|
||||
|
||||
int GIE_query(GIE_handle_t * handle, unsigned long long origin_len, const char * fuzzy_string, GIE_result_t * results, int size)
|
||||
{
|
||||
GIE_handle_inner_t * _handle = (GIE_handle_inner_t *)handle;
|
||||
|
||||
//find min_index
|
||||
double min_tmp = (double)(origin_len * (1 - _handle->user_query_accuracy));
|
||||
unsigned long long min_tmp_t = (unsigned long long )(floor(min_tmp));
|
||||
unsigned long long min_index = min_tmp_t/(_handle->user_precision)*(_handle->user_precision);
|
||||
|
||||
//find max_index
|
||||
double max_tmp = (double)(origin_len * (1 + _handle->user_query_accuracy));
|
||||
unsigned long long max_tmp_t = (unsigned long long)(floor(max_tmp));
|
||||
unsigned long long max_index = (max_tmp_t/(_handle->user_precision) + 1)*(_handle->user_precision);
|
||||
|
||||
unsigned long long tmp_size = (max_index - min_index)/(_handle->user_precision) + 1;
|
||||
struct TQ * union_list[tmp_size];
|
||||
|
||||
unsigned long long i = min_index;
|
||||
unsigned long long query_blocksize = calc_fh_blocksize(origin_len);
|
||||
int list_num = 0;
|
||||
int union_size = 0;
|
||||
int union_size_max = 0;
|
||||
int ret_size = 0;
|
||||
|
||||
//find
|
||||
while(i <= max_index)
|
||||
{
|
||||
struct index_table_data * list_tmp = (struct index_table_data *)MESA_htable_search_cb(_handle->index_table, (const uchar * )(&i), \
|
||||
sizeof(i), NULL, NULL, NULL);
|
||||
if(list_tmp != NULL)
|
||||
{
|
||||
union_list[list_num++] = list_tmp->listhead;
|
||||
i = list_tmp->next_value;
|
||||
union_size_max += list_tmp->cnt;
|
||||
}
|
||||
else
|
||||
{
|
||||
i = i + _handle->user_precision;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
struct id_table_data ** result_union = (struct id_table_data **)malloc(sizeof(struct id_table_data *)*union_size_max);
|
||||
|
||||
if(list_num != 0)
|
||||
{
|
||||
union_size = GIE_union(union_list, list_num, result_union, min_tmp_t, max_tmp_t, query_blocksize);
|
||||
//printf("union_size = %d\n", union_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("the fh doesn't exsit!\n");
|
||||
free(result_union);
|
||||
return 0;
|
||||
}
|
||||
|
||||
for(i = 0; i < union_size; i++)
|
||||
{
|
||||
int fuzzy_actual_len;
|
||||
unsigned long long calculate_len;
|
||||
/*if(result_union[i]->id == 2391)
|
||||
{
|
||||
printf("right\n");
|
||||
}*/
|
||||
int edit_distance = GIE_edit_distance_with_position(result_union[i]->fh, fuzzy_string, origin_len, &fuzzy_actual_len, &calculate_len);
|
||||
//printf("fuzzy_actual_len = %d\n", fuzzy_actual_len);
|
||||
short conf_tmp;
|
||||
if(fuzzy_actual_len != 0 && edit_distance < fuzzy_actual_len)
|
||||
{
|
||||
//conf_tmp = CONF_MAX - (fuzzy_all_actual_len - (fuzzy_actual_len - edit_distance))*CONF_MAX/fuzzy_all_actual_len;
|
||||
conf_tmp = (fuzzy_actual_len - edit_distance)*(calculate_len + 1)*CONF_MAX/(fuzzy_actual_len * origin_len);
|
||||
//conf_tmp = CONF_MAX - edit_distance*CONF_MAX/fuzzy_actual_len;
|
||||
}
|
||||
else
|
||||
{
|
||||
conf_tmp = 0;
|
||||
}
|
||||
if(conf_tmp >= result_union[i]->cfds_lvl)
|
||||
{
|
||||
results[ret_size].cfds_lvl = conf_tmp;
|
||||
results[ret_size].id = result_union[i]->id;
|
||||
results[ret_size].origin_len = result_union[i]->origin_len;
|
||||
results[ret_size++].tag = result_union[i]->tag;
|
||||
}
|
||||
if(ret_size == size)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
free(result_union);
|
||||
return ret_size;
|
||||
}
|
||||
|
||||
|
||||
|
||||
736
src/entry/interval_index.c
Normal file
736
src/entry/interval_index.c
Normal file
@@ -0,0 +1,736 @@
|
||||
#include<stdio.h>
|
||||
#include<stdlib.h>
|
||||
#include"interval_index.h"
|
||||
|
||||
|
||||
/**
|
||||
* There is a trick here. In order to hide specific
|
||||
* realization of some structures, we use some approaches.
|
||||
* Then the inner structure is named with "shadow", and
|
||||
* the outer structure is named with "light". These words
|
||||
* come from movie <<The Grand Master>>. Enjoy it :)
|
||||
**/
|
||||
|
||||
|
||||
/**
|
||||
* Structure of inner segment
|
||||
**/
|
||||
typedef struct __IVI_shadow_seg_t{
|
||||
IVI_seg_t lightseg;
|
||||
TAILQ_ENTRY(__IVI_shadow_seg_t) ENTRY;
|
||||
}IVI_shadow_seg_t;
|
||||
|
||||
|
||||
TAILQ_HEAD(TQ, __IVI_shadow_seg_t);
|
||||
|
||||
/* Structure of inner InterVal Index */
|
||||
typedef struct __IVI_shadow_t{
|
||||
struct TQ ivi_queue;
|
||||
int segs_cnt;
|
||||
OFFSET_TYPE segs_length;
|
||||
}IVI_shadow_t;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* new is closer to head or tail ?
|
||||
* Return 1 if closer to head than tail
|
||||
* Else return 0
|
||||
*/
|
||||
int closer_to_head(IVI_shadow_seg_t * head, IVI_shadow_seg_t * tail, OFFSET_TYPE target)
|
||||
{
|
||||
if(head == NULL || tail == NULL)
|
||||
return 1;
|
||||
S_OFFSET_TYPE tmp1 = (S_OFFSET_TYPE)(target - head->lightseg.left);
|
||||
S_OFFSET_TYPE tmp2 = (S_OFFSET_TYPE)(target - tail->lightseg.left);
|
||||
S_OFFSET_TYPE distance_to_head = tmp1 > 0 ? tmp1 : -tmp1;
|
||||
S_OFFSET_TYPE distance_to_tail = tmp2 > 0 ? tmp2 : -tmp2;
|
||||
return (distance_to_tail - distance_to_head > 0);
|
||||
}
|
||||
|
||||
|
||||
IVI_seg_t * IVI_prev_continuous_seg(IVI_seg_t * seg)
|
||||
{
|
||||
if(NULL == seg)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
IVI_shadow_seg_t * _seg = (IVI_shadow_seg_t *)seg;
|
||||
IVI_shadow_seg_t * prev = TAILQ_PREV(_seg, TQ, ENTRY);
|
||||
if(NULL == prev)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
if(continuous((prev->lightseg).right, seg->left))
|
||||
return (IVI_seg_t *)prev;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
IVI_seg_t * IVI_next_continuous_seg(IVI_seg_t * seg)
|
||||
{
|
||||
if(NULL == seg)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
IVI_shadow_seg_t * _seg = (IVI_shadow_seg_t *)seg;
|
||||
IVI_shadow_seg_t * next = TAILQ_NEXT(_seg, ENTRY);
|
||||
if(NULL == next)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
if(continuous(seg->right, (next->lightseg).left))
|
||||
return (IVI_seg_t *)next;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_relative_position
|
||||
* Description:
|
||||
* Get relative position of given two interval segments
|
||||
* Params:
|
||||
* seg1: Subject of relation
|
||||
* seg2: Object of relation
|
||||
* Relation:
|
||||
* On success, return the relation of two segments with enum;
|
||||
* Else, return ERROR in enum;
|
||||
**/
|
||||
Relation_t IVI_relative_position(IVI_seg_t * seg1, IVI_seg_t * seg2)
|
||||
{
|
||||
if(NULL == seg1 || NULL == seg2)
|
||||
{
|
||||
return ERROR;
|
||||
}
|
||||
|
||||
if(before(seg1->right, seg2->left))
|
||||
{
|
||||
return LEFT_NO_OVERLAP;
|
||||
}
|
||||
|
||||
if(!before(seg1->right, seg2->left) && before(seg1->right, seg2->right) && before(seg1->left, seg2->left))
|
||||
{
|
||||
return LEFT_OVERLAP;
|
||||
}
|
||||
|
||||
if(!before(seg1->left, seg2->left) && !after(seg1->right, seg2->right))
|
||||
{
|
||||
return CONTAINED;
|
||||
}
|
||||
|
||||
if(!after(seg1->left, seg2->left) && !before(seg1->right, seg2->right))
|
||||
{
|
||||
return CONTAIN;
|
||||
}
|
||||
|
||||
if(!after(seg1->left, seg2->right) && after(seg1->right, seg2->right) && after(seg1->left, seg2->left))
|
||||
{
|
||||
return RIGHT_OVERLAP;
|
||||
}
|
||||
|
||||
if(after(seg1->left, seg2->right))
|
||||
{
|
||||
return RIGHT_NO_OVERLAP;
|
||||
}
|
||||
return ERROR;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_create
|
||||
* Description:
|
||||
* Create an InterVal Index
|
||||
* Params:
|
||||
* void
|
||||
* Return:
|
||||
* Return a handler of this InterVal Index
|
||||
**/
|
||||
IVI_t * IVI_create(void)
|
||||
{
|
||||
IVI_shadow_t * shadow_ivi = (IVI_shadow_t *)malloc(sizeof(IVI_shadow_t));
|
||||
TAILQ_INIT(&(shadow_ivi->ivi_queue));
|
||||
shadow_ivi->segs_cnt = 0;
|
||||
shadow_ivi->segs_length = 0;
|
||||
return (IVI_t *)shadow_ivi;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_destroy
|
||||
* Description:
|
||||
* Destroy a given InterVal Index's handler
|
||||
* Params:
|
||||
* handler: The InterVal Index you want to destroy
|
||||
* cb: Callback function for user to free data in segement
|
||||
* usr_para: User parameter
|
||||
* Return:
|
||||
* void
|
||||
**/
|
||||
void IVI_destroy(IVI_t * handler, IVI_callback_t cb, void * usr_para)
|
||||
{
|
||||
if(handler == NULL)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
IVI_shadow_t * shadow_ivi = (IVI_shadow_t *)handler;
|
||||
IVI_shadow_seg_t * tmpseg = TAILQ_FIRST(&(shadow_ivi->ivi_queue));
|
||||
IVI_shadow_seg_t * tmp;
|
||||
/* Free each seg in IVI */
|
||||
while(tmpseg != NULL)
|
||||
{
|
||||
tmp = TAILQ_NEXT(tmpseg, ENTRY);
|
||||
/* Free *data in seg */
|
||||
if(NULL != cb)
|
||||
{
|
||||
cb(&(tmpseg->lightseg), usr_para);
|
||||
}
|
||||
free(tmpseg);
|
||||
tmpseg = tmp;
|
||||
}
|
||||
|
||||
/* Free IVI */
|
||||
free(shadow_ivi);
|
||||
handler = NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_seg_malloc
|
||||
* Description:
|
||||
* Malloc a segment with given parameters
|
||||
* Params:
|
||||
* left: Left point of segment
|
||||
* right: Right point of segment
|
||||
* data: User data
|
||||
* Return:
|
||||
* Return a pointer of segment structure.
|
||||
**/
|
||||
IVI_seg_t * IVI_seg_malloc(OFFSET_TYPE left, OFFSET_TYPE right, void * data)
|
||||
{
|
||||
/* Left must <= Right */
|
||||
if(after(left, right))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
IVI_shadow_seg_t * shadow_seg = (IVI_shadow_seg_t *)malloc(sizeof(IVI_shadow_seg_t));
|
||||
shadow_seg->lightseg.left = left;
|
||||
shadow_seg->lightseg.right= right;
|
||||
shadow_seg->lightseg.data = data;
|
||||
|
||||
return (IVI_seg_t *)shadow_seg;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_seg_free
|
||||
* Description:
|
||||
* Free the memory of given segment
|
||||
* Params:
|
||||
* seg: The segment that you want to free
|
||||
* cb: Callback function for user to free *data in seg
|
||||
* usr_para: User parameter for cb
|
||||
* Return:
|
||||
* void
|
||||
**/
|
||||
void IVI_seg_free(IVI_seg_t * seg, IVI_callback_t cb, void * usr_para)
|
||||
{
|
||||
/* Free user data first */
|
||||
if(cb != NULL)
|
||||
{
|
||||
cb(seg, usr_para);
|
||||
}
|
||||
IVI_shadow_seg_t * shadow_seg = (IVI_shadow_seg_t *)seg;
|
||||
|
||||
/* Free seg */
|
||||
free(shadow_seg);
|
||||
seg = NULL;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_insert
|
||||
* Description:
|
||||
* Insert a segment to an InterVal Index handler,and the segment
|
||||
* MUST not be overlapped with others in handler.
|
||||
* Params:
|
||||
* handler: The handler of InterVal Index created by IVI_create
|
||||
* seg: A segment that user wants to add. It MUST be created
|
||||
* by IVI_seg_malloc.
|
||||
* Return:
|
||||
* On success, 0 is returned;
|
||||
* Else when overlapp occures or error occures, -1 is returned.
|
||||
**/
|
||||
int IVI_insert(IVI_t * handler, IVI_seg_t * seg)
|
||||
{
|
||||
IVI_shadow_t * shadow_ivi;
|
||||
IVI_shadow_seg_t *head, *tail, *new_seg, *tmp_seg;
|
||||
|
||||
if(NULL == handler || NULL == seg)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
shadow_ivi = (IVI_shadow_t *)handler;
|
||||
new_seg = (IVI_shadow_seg_t *)seg;
|
||||
head = TAILQ_FIRST(&(shadow_ivi->ivi_queue));
|
||||
tail = TAILQ_LAST(&(shadow_ivi->ivi_queue), TQ);
|
||||
|
||||
if(closer_to_head(head, tail, seg->left))
|
||||
{
|
||||
TAILQ_FOREACH(tmp_seg, &(shadow_ivi->ivi_queue), ENTRY)
|
||||
{
|
||||
/* Find the first seg whose left is bigger than given seg's right, we will insert new seg before it */
|
||||
if(after(tmp_seg->lightseg.left, new_seg->lightseg.right))
|
||||
{
|
||||
TAILQ_INSERT_BEFORE(tmp_seg, new_seg, ENTRY);
|
||||
shadow_ivi->segs_cnt ++;
|
||||
shadow_ivi->segs_length += (seg->right - seg->left + 1);
|
||||
return 0;
|
||||
}
|
||||
else if(before(tmp_seg->lightseg.right, new_seg->lightseg.left))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
else /* Overlap */
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* If have searched to the end of list, we will inset it to the tail */
|
||||
TAILQ_INSERT_TAIL(&(shadow_ivi->ivi_queue), new_seg, ENTRY);
|
||||
shadow_ivi->segs_cnt ++;
|
||||
shadow_ivi->segs_length += (seg->right - seg->left + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
TAILQ_FOREACH_REVERSE(tmp_seg, &(shadow_ivi->ivi_queue), TQ, ENTRY)
|
||||
{
|
||||
/* Find the first seg whose right is smaller than given seg's left, we will insert new seg after it */
|
||||
if(before(tmp_seg->lightseg.right, new_seg->lightseg.left))
|
||||
{
|
||||
TAILQ_INSERT_AFTER(&(shadow_ivi->ivi_queue), tmp_seg, new_seg, ENTRY);
|
||||
shadow_ivi->segs_cnt ++;
|
||||
shadow_ivi->segs_length += (seg->right - seg->left + 1);
|
||||
return 0;
|
||||
}
|
||||
else if(after(tmp_seg->lightseg.left, new_seg->lightseg.right))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
else /* Overlap */
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* If have searched to the head of list, we will inset it to the head */
|
||||
TAILQ_INSERT_HEAD(&(shadow_ivi->ivi_queue), new_seg, ENTRY);
|
||||
shadow_ivi->segs_cnt ++;
|
||||
shadow_ivi->segs_length += (seg->right - seg->left + 1);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_remove
|
||||
* Description:
|
||||
* Remove a given segment from given InterVal Index handler.
|
||||
* Params:
|
||||
* handler: The handler of InterVal Index created by IVI_create
|
||||
* seg: A segment that user wants to delete. It MUST be created
|
||||
* by IVI_seg_malloc.
|
||||
* Return:
|
||||
* On success, 0 is returned;
|
||||
* Else when overlapp occures, -1 is returned.
|
||||
**/
|
||||
int IVI_remove(IVI_t * handler, IVI_seg_t * seg)
|
||||
{
|
||||
if(NULL == handler || NULL == seg)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
IVI_shadow_t * shadow_ivi = (IVI_shadow_t *)handler;
|
||||
IVI_shadow_seg_t * shadow_seg = (IVI_shadow_seg_t *)seg;
|
||||
|
||||
TAILQ_REMOVE(&(shadow_ivi->ivi_queue), shadow_seg, ENTRY);
|
||||
shadow_ivi->segs_cnt --;
|
||||
shadow_ivi->segs_length -= (seg->right - seg->left + 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_query
|
||||
* Description:
|
||||
* Query from given InterVal Index and get the number of segments
|
||||
* which are overlapped with given interval, and store those segments
|
||||
* in the last parameter.
|
||||
* Params:
|
||||
* handler: The handler of interval index created by IVI_create
|
||||
* left: Left point of given interval
|
||||
* right: Right point of given interval
|
||||
* segs: An address of a segment pointer array to store those segments which
|
||||
* are overlapped with given interval. NOTE that user should not malloc
|
||||
* the array, and segs need to be freed by user. The element of *segs
|
||||
* MUST not be freed by user.
|
||||
* Return:
|
||||
* Return the number of segments which are overlapped with given interval
|
||||
**/
|
||||
int IVI_query(IVI_t * handler, OFFSET_TYPE left, OFFSET_TYPE right, IVI_seg_t *** segs)
|
||||
{
|
||||
IVI_shadow_t * shadow_ivi;
|
||||
IVI_shadow_seg_t *head, *tail, *tmp, *left_tmp, *right_tmp;
|
||||
int interval_cnt = 0, i;
|
||||
|
||||
if(NULL == handler || after(left, right))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
shadow_ivi = (IVI_shadow_t *)handler;
|
||||
head = TAILQ_FIRST(&(shadow_ivi->ivi_queue));
|
||||
tail = TAILQ_LAST(&(shadow_ivi->ivi_queue), TQ);
|
||||
|
||||
/* Traverse from head or tail? We need to decide */
|
||||
if(closer_to_head(head, tail, left))
|
||||
{
|
||||
tmp = head;
|
||||
while(tmp != NULL)
|
||||
{
|
||||
if(after(left, tmp->lightseg.right))
|
||||
{
|
||||
tmp = TAILQ_NEXT(tmp, ENTRY);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Get the seg which left is in or before*/
|
||||
left_tmp = tmp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(tmp == NULL)
|
||||
{
|
||||
*segs = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Get the num of overlapped segs */
|
||||
while(tmp != NULL)
|
||||
{
|
||||
if(!before(right, tmp->lightseg.left))
|
||||
{
|
||||
tmp = TAILQ_NEXT(tmp, ENTRY);
|
||||
interval_cnt ++;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
tmp = left_tmp;
|
||||
if(interval_cnt == 0)
|
||||
{
|
||||
*segs = NULL;
|
||||
return 0;
|
||||
}
|
||||
*segs = (IVI_seg_t **)malloc(interval_cnt * sizeof(IVI_seg_t *));
|
||||
for(i = 0; i < interval_cnt; i++)
|
||||
{
|
||||
(*segs)[i] = (IVI_seg_t *)tmp;
|
||||
tmp = TAILQ_NEXT(tmp, ENTRY);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
tmp = tail;
|
||||
while(tmp != NULL)
|
||||
{
|
||||
if(before(right, tmp->lightseg.left))
|
||||
{
|
||||
tmp = TAILQ_PREV(tmp, TQ, ENTRY);
|
||||
}
|
||||
else
|
||||
{
|
||||
right_tmp = tmp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(tmp == NULL)
|
||||
{
|
||||
*segs = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Get the num of overlapped segs */
|
||||
while(tmp != NULL)
|
||||
{
|
||||
if(!after(left, tmp->lightseg.right))
|
||||
{
|
||||
tmp = TAILQ_PREV(tmp, TQ, ENTRY);
|
||||
interval_cnt ++;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
tmp = right_tmp;
|
||||
if(interval_cnt == 0)
|
||||
{
|
||||
*segs = NULL;
|
||||
return 0;
|
||||
}
|
||||
*segs = (IVI_seg_t **)malloc(interval_cnt * sizeof(IVI_seg_t *));
|
||||
for(i = interval_cnt - 1; i >= 0; i--)
|
||||
{
|
||||
(*segs)[i] = (IVI_seg_t *)tmp;
|
||||
tmp = TAILQ_PREV(tmp, TQ, ENTRY);
|
||||
}
|
||||
}
|
||||
return interval_cnt;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_query_continuous
|
||||
* Description:
|
||||
* Query from interval index handler and get the number of continous segments
|
||||
* which are overlapped with given interval.
|
||||
* Params:
|
||||
* handler: The handler of InterVal Index created by IVI_create.
|
||||
* left: Left point of given interval
|
||||
* right: Right point of given interval
|
||||
* segs: An address of a segment pointer array to store those segments which
|
||||
* are overlapped with given interval. NOTE that user should not malloc
|
||||
* the array, and segs need to be freed by user. The element of *segs
|
||||
* MUST not be freed by user.
|
||||
* Return:
|
||||
* Return the number of continous segments which are overlapped with given interval
|
||||
**/
|
||||
int IVI_query_continuous(IVI_t * handler, OFFSET_TYPE left, OFFSET_TYPE right, IVI_seg_t *** segs)
|
||||
{
|
||||
IVI_shadow_t * shadow_ivi;
|
||||
IVI_shadow_seg_t *head, *tail, *tmp, *left_tmp, *right_tmp;
|
||||
int interval_cnt = 0, i;
|
||||
|
||||
if(NULL == handler || after(left, right))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
shadow_ivi = (IVI_shadow_t *)handler;
|
||||
head = TAILQ_FIRST(&(shadow_ivi->ivi_queue));
|
||||
tail = TAILQ_LAST(&(shadow_ivi->ivi_queue), TQ);
|
||||
|
||||
|
||||
|
||||
/* Traverse from head or tail? We need to decide */
|
||||
if(closer_to_head(head, tail, left))
|
||||
{
|
||||
tmp = head;
|
||||
while(tmp != NULL)
|
||||
{
|
||||
if(after(left, tmp->lightseg.right))
|
||||
{
|
||||
tmp = TAILQ_NEXT(tmp, ENTRY);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Get the seg which left is in or before*/
|
||||
left_tmp = tmp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(tmp == NULL)
|
||||
{
|
||||
*segs = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Get the num of overlapped segs */
|
||||
while(tmp != NULL)
|
||||
{
|
||||
if(!before(right, tmp->lightseg.left))
|
||||
{
|
||||
tmp = TAILQ_NEXT(tmp, ENTRY);
|
||||
interval_cnt ++;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
IVI_shadow_seg_t * prev = TAILQ_PREV(tmp, TQ, ENTRY);
|
||||
if(tmp != NULL && !continuous(prev->lightseg.right, tmp->lightseg.left))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
tmp = left_tmp;
|
||||
if(interval_cnt == 0)
|
||||
{
|
||||
*segs = NULL;
|
||||
return 0;
|
||||
}
|
||||
*segs = (IVI_seg_t **)malloc(interval_cnt * sizeof(IVI_seg_t *));
|
||||
for(i = 0; i < interval_cnt; i++)
|
||||
{
|
||||
(*segs)[i] = (IVI_seg_t *)tmp;
|
||||
tmp = TAILQ_NEXT(tmp, ENTRY);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
tmp = tail;
|
||||
while(tmp != NULL)
|
||||
{
|
||||
if(before(right, tmp->lightseg.left))
|
||||
{
|
||||
tmp = TAILQ_PREV(tmp, TQ, ENTRY);
|
||||
}
|
||||
else
|
||||
{
|
||||
right_tmp = tmp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(tmp == NULL)
|
||||
{
|
||||
*segs = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Get the num of overlapped segs */
|
||||
while(tmp != NULL)
|
||||
{
|
||||
if(!after(left, tmp->lightseg.right))
|
||||
{
|
||||
tmp = TAILQ_PREV(tmp, TQ, ENTRY);
|
||||
interval_cnt ++;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
IVI_shadow_seg_t * next = TAILQ_NEXT(tmp, ENTRY);
|
||||
if(tmp != NULL && !continuous(tmp->lightseg.right, next->lightseg.left))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
tmp = right_tmp;
|
||||
if(interval_cnt == 0)
|
||||
{
|
||||
*segs = NULL;
|
||||
return 0;
|
||||
}
|
||||
*segs = (IVI_seg_t **)malloc(interval_cnt * sizeof(IVI_seg_t *));
|
||||
for(i = interval_cnt - 1; i >= 0; i--)
|
||||
{
|
||||
(*segs)[i] = (IVI_seg_t *)tmp;
|
||||
tmp = TAILQ_PREV(tmp, TQ, ENTRY);
|
||||
}
|
||||
}
|
||||
|
||||
return interval_cnt;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_seg_cnt
|
||||
* Description:
|
||||
* Get the count of segments in given interval index handler
|
||||
* Params:
|
||||
* handler: The handler of InterVal Index created by IVI_create.
|
||||
* Return:
|
||||
* Return the count of segments in given interval index handler
|
||||
**/
|
||||
int IVI_seg_cnt(IVI_t * handler)
|
||||
{
|
||||
if(handler == NULL)
|
||||
return -1;
|
||||
IVI_shadow_t * shadow_ivi = (IVI_shadow_t *)handler;
|
||||
return shadow_ivi->segs_cnt;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_seg_len
|
||||
* Description:
|
||||
* Get the length of whole segments in given interval index handler
|
||||
* Params:
|
||||
* handler: The handler of InterVal Index created by IVI_create.
|
||||
* Return:
|
||||
* Return the length of whole segments in given interval index handler
|
||||
**/
|
||||
OFFSET_TYPE IVI_seg_length(IVI_t * handler)
|
||||
{
|
||||
if(handler == NULL)
|
||||
return -1;
|
||||
IVI_shadow_t * shadow_ivi = (IVI_shadow_t *)handler;
|
||||
return shadow_ivi->segs_length;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_traverse
|
||||
* Description:
|
||||
* Traverse given InterVal Index and execute given callback function
|
||||
* one time for each seg in InterVal Index.
|
||||
* Params:
|
||||
* handler: The handler of InterVal Index created by IVI_create.
|
||||
* IVI_callback_t: Callback function for user to define.
|
||||
* usr_para: Parameter user want to pass to callback function.
|
||||
* Return:
|
||||
* void
|
||||
**/
|
||||
void IVI_traverse(IVI_t * handler, IVI_callback_t cb, void * usr_para)
|
||||
{
|
||||
if(NULL == handler || NULL == cb)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
IVI_shadow_t * shadow_ivi = (IVI_shadow_t *)handler;
|
||||
IVI_shadow_seg_t * tmp_seg = TAILQ_FIRST(&(shadow_ivi->ivi_queue));
|
||||
IVI_shadow_seg_t * tmp;
|
||||
/* Traverse the IVI */
|
||||
while(tmp_seg != NULL)
|
||||
{
|
||||
/*
|
||||
* The place we can't use TAILQ_FOREACH because we
|
||||
* do not no what will callback funciton does.
|
||||
* */
|
||||
tmp = TAILQ_NEXT(tmp_seg, ENTRY);
|
||||
cb((IVI_seg_t *)tmp_seg, usr_para);
|
||||
tmp_seg = tmp;
|
||||
}
|
||||
}
|
||||
828
src/entry/mesa_fuzzy.c
Normal file
828
src/entry/mesa_fuzzy.c
Normal file
@@ -0,0 +1,828 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include "mesa_fuzzy.h"
|
||||
#include "interval_index.h"
|
||||
|
||||
|
||||
#define ROLLING_WINDOW 7
|
||||
#define BLOCKSIZE_MIN 3
|
||||
#define MAXSIZE 10000
|
||||
#define HASH_PRIME 0x01000193
|
||||
#define HASH_INIT 0x28021967
|
||||
|
||||
#define DEBUG (0)
|
||||
|
||||
struct roll_state
|
||||
{
|
||||
unsigned char window[ROLLING_WINDOW];
|
||||
unsigned int h1, h2, h3;
|
||||
unsigned int n;
|
||||
};
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char * left_data; //ָ<><D6B8><EFBFBD><EFBFBD><EFBFBD>ݵ<EFBFBD>ͷָ<CDB7><D6B8>
|
||||
unsigned int left_len; //<2F><><EFBFBD>߱<EFBFBD><DFB1><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݵij<DDB5><C4B3><EFBFBD>
|
||||
|
||||
char * hash_result; //<2F><><EFBFBD><EFBFBD>segment<6E><74>FNVֵ
|
||||
unsigned long long left_offset;
|
||||
unsigned long long right_offset;
|
||||
|
||||
struct roll_state * right_status_r; //<2F>ұ߽<D2B1><DFBD><EFBFBD>rollhash״̬
|
||||
unsigned int right_status_shash; //<2F>ұ߽<D2B1><DFBD><EFBFBD>FNVֵ
|
||||
unsigned int right_len;//<2F>ұ߽<D2B1><DFBD>ij<EFBFBD><C4B3><EFBFBD>
|
||||
int slice_num;
|
||||
|
||||
}fuzzy_node;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned long long orilen;
|
||||
IVI_t * ivi; //ÿһ<C3BF><D2BB>handle<6C><65><EFBFBD>汣<EFBFBD><E6B1A3>һ<EFBFBD><D2BB>IVIָ<49>룬һ<EBA3AC><D2BB>IVI<56><49><EFBFBD>汣<EFBFBD><E6B1A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD>ļ<EFBFBD><C4BC><EFBFBD><EFBFBD><EFBFBD>Ƭ
|
||||
unsigned long long effective_length;
|
||||
}fuzzy_handle_inner_t;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
char * head; //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>char<61><72><EFBFBD><EFBFBD>
|
||||
unsigned int size;
|
||||
unsigned int offset; //<2F><><EFBFBD>鳤<EFBFBD><E9B3A4>
|
||||
unsigned long long first_FNV_offset;
|
||||
unsigned long long last_FNV_offset;
|
||||
}final_result;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned long long first_FNV_offset;
|
||||
unsigned long long last_FNV_offset;
|
||||
unsigned long long hash_length;
|
||||
}final_length;
|
||||
|
||||
|
||||
unsigned int fuzzy_hash_calculate(IVI_seg_t * seg, const char * data, unsigned long long offset, unsigned long long blocksize);
|
||||
void fuzzy_calculate_self(IVI_seg_t * seg, const char * data, unsigned long long offset, unsigned long long blocksize);
|
||||
void fuzzy_calculate_self_with_prev(IVI_seg_t * prev_seg, IVI_seg_t * seg, const char * data, unsigned long long blocksize);
|
||||
void fuzzy_modify_next(IVI_seg_t * seg, IVI_seg_t * next_seg, unsigned long long blocksize);
|
||||
unsigned long long get_prev_continous_length(IVI_seg_t * seg);
|
||||
unsigned int segment_overlap(fuzzy_handle_t * handle, fuzzy_node * fnode, unsigned int size, unsigned long long offset, const char * data);
|
||||
void fuzzy_hash_merge(IVI_seg_t * seg, void * user_para);
|
||||
void fuzzy_hash_merge_new(IVI_seg_t * seg, void * user_para);
|
||||
void fuzzy_hash_length(IVI_seg_t * seg, void * user_para);
|
||||
unsigned long long fuzzy_status(fuzzy_handle_t * handle, int type);
|
||||
|
||||
char * b64 =
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||
|
||||
|
||||
/**
|
||||
* roll_state<74><65>ʼ<EFBFBD><CABC>
|
||||
*/
|
||||
static void roll_init(struct roll_state * self)
|
||||
{
|
||||
memset(self, 0, sizeof(struct roll_state));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* <20><><EFBFBD><EFBFBD>roll_hashֵ<68><D6B5><EFBFBD><EFBFBD><EFBFBD>ⲿ<EFBFBD><E2B2BF><EFBFBD>ݶ<EFBFBD>ȡ<EFBFBD><C8A1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
*/
|
||||
static void roll_hash(struct roll_state * self, unsigned char c)
|
||||
{
|
||||
self->h2 -= self->h1;
|
||||
self->h2 += ROLLING_WINDOW * (unsigned int)c;
|
||||
|
||||
self->h1 += (unsigned int)c;
|
||||
self->h1 -= (unsigned int)self->window[self->n];
|
||||
|
||||
self->window[self->n] = c;
|
||||
self->n++;
|
||||
if (self->n == ROLLING_WINDOW)
|
||||
self->n = 0;
|
||||
self->h3 <<= 5;
|
||||
self->h3 ^= c;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* <20><><EFBFBD>㴰<EFBFBD><E3B4B0><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>roll_hashֵ<68><D6B5>ÿ<EFBFBD><C3BF>roll_hashֵ<68><D6B5><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƭ
|
||||
*/
|
||||
static unsigned int roll_sum(const struct roll_state * self)
|
||||
{
|
||||
return self->h1 + self->h2 + self->h3;
|
||||
/* return self->h1 + self->h2; */
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><C6AC>FNVֵ
|
||||
*/
|
||||
static unsigned int sum_hash(unsigned char c, unsigned int h)
|
||||
{
|
||||
return (h * HASH_PRIME) ^ c;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* <20><><EFBFBD><EFBFBD>handle
|
||||
*/
|
||||
fuzzy_handle_t * fuzzy_create_handle(unsigned long long origin_len)
|
||||
{
|
||||
fuzzy_handle_inner_t * handle = (fuzzy_handle_inner_t *)malloc(sizeof(fuzzy_handle_inner_t));
|
||||
handle->orilen = origin_len;
|
||||
handle->ivi = IVI_create();
|
||||
handle->effective_length = 0;
|
||||
return (fuzzy_handle_t *)handle;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* IVI_destroy<6F>Ļص<C4BB><D8B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>IVI<56>е<EFBFBD><D0B5><EFBFBD><EFBFBD><EFBFBD>
|
||||
*/
|
||||
void fuzzy_node_free(IVI_seg_t * seg, void * usr_para)
|
||||
{
|
||||
//printf("free seg[%lu, %lu]\n", seg->left, seg->right);
|
||||
fuzzy_node * temp = (fuzzy_node*)(seg->data);
|
||||
if(temp->left_data != NULL)
|
||||
{
|
||||
free(temp->left_data);
|
||||
temp->left_data = NULL;
|
||||
}
|
||||
if(temp->hash_result != NULL)
|
||||
{
|
||||
free(temp->hash_result);
|
||||
temp->hash_result = NULL;
|
||||
}
|
||||
free(temp->right_status_r);
|
||||
temp->right_status_r = NULL;
|
||||
free(temp);
|
||||
temp = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* <20><><EFBFBD><EFBFBD>handle
|
||||
*/
|
||||
void fuzzy_destroy_handle(fuzzy_handle_t * handle)
|
||||
{
|
||||
IVI_destroy(((fuzzy_handle_inner_t *)handle)->ivi, fuzzy_node_free, NULL);
|
||||
free((fuzzy_handle_inner_t *)handle);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݣ<EFBFBD><DDA3><EFBFBD><EFBFBD>Ҽ<EFBFBD><D2BC><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݵ<EFBFBD>fuzzy_hashֵ
|
||||
*/
|
||||
unsigned int fuzzy_feed(fuzzy_handle_t * handle, const char * data, unsigned int size, unsigned long long offset)
|
||||
{
|
||||
fuzzy_node * node = (fuzzy_node *)calloc(sizeof(fuzzy_node), 1);
|
||||
node->right_status_r = (struct roll_state *)calloc(sizeof (struct roll_state), 1);
|
||||
roll_init(node->right_status_r);
|
||||
node->slice_num = 0;
|
||||
unsigned int length = segment_overlap(handle, node, size, offset, data);
|
||||
if(offset == 0)
|
||||
{
|
||||
((fuzzy_handle_inner_t *)handle)->effective_length += size - node->right_len;
|
||||
return (size - node->right_len);
|
||||
}
|
||||
else
|
||||
{
|
||||
((fuzzy_handle_inner_t *)handle)->effective_length += length;
|
||||
}
|
||||
return length; //<2F><><EFBFBD><EFBFBD><EFBFBD>Ѿ<EFBFBD><D1BE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ч<EFBFBD><D0A7><EFBFBD><EFBFBD>
|
||||
}
|
||||
|
||||
|
||||
|
||||
unsigned long long get_blocksize(unsigned long long orilen)
|
||||
{
|
||||
double tmp = orilen/(64 * BLOCKSIZE_MIN);
|
||||
double index = floor(log(tmp)/log(2));
|
||||
double tmp_t = pow(2, index);
|
||||
unsigned long long blocksize = (unsigned long long)(tmp_t * BLOCKSIZE_MIN);
|
||||
return blocksize;
|
||||
}
|
||||
|
||||
/**
|
||||
* <20>ж<EFBFBD><D0B6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƿ<EFBFBD><C7B7><EFBFBD><EFBFBD>Ѿ<EFBFBD><D1BE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>и<EFBFBD><D0B8><EFBFBD>
|
||||
*/
|
||||
unsigned int segment_overlap(fuzzy_handle_t * handle, fuzzy_node * fnode, unsigned int size, unsigned long long offset, const char * data)
|
||||
{
|
||||
IVI_seg_t ** overlap_segs = NULL;
|
||||
IVI_seg_t * seg = IVI_seg_malloc(offset, offset + size -1, (void *)fnode);
|
||||
int overlap_segnum = 0;
|
||||
unsigned int effective_length = 0;
|
||||
unsigned int total_length = 0;
|
||||
unsigned long long blocksize = get_blocksize(((fuzzy_handle_inner_t *)handle)->orilen);
|
||||
|
||||
/*<2A><>ѯ<EFBFBD>Ƿ<EFBFBD><C7B7>и<EFBFBD><D0B8>ǣ<EFBFBD><C7A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD>и<EFBFBD><D0B8>ǣ<EFBFBD><C7A3><EFBFBD><EFBFBD>ظ<EFBFBD><D8B8>ǵ<EFBFBD>segment<6E><74>Ƭ<EFBFBD><C6AC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>û<EFBFBD>и<EFBFBD><D0B8>ǣ<EFBFBD><C7A3><EFBFBD><EFBFBD><EFBFBD>0*/
|
||||
overlap_segnum = IVI_query(((fuzzy_handle_inner_t *)handle)->ivi, offset, offset + size - 1, &overlap_segs);
|
||||
|
||||
/*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵΪ<D6B5><CEAA><EFBFBD><EFBFBD><EFBFBD><EFBFBD>˵<EFBFBD><CBB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>IJ<EFBFBD><C4B2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>⣬<EFBFBD><E2A3AC>ӡ<EFBFBD><D3A1><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϣ*/
|
||||
if(overlap_segnum < 0)
|
||||
{
|
||||
printf("fragment info error!\n");
|
||||
IVI_seg_free(seg, fuzzy_node_free, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵΪ0<CEAA><30>˵<EFBFBD><CBB5>û<EFBFBD>и<EFBFBD><D0B8>ǵ<EFBFBD><C7B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֱ<EFBFBD>Ӳ<EFBFBD><D3B2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*/
|
||||
if(overlap_segnum == 0)
|
||||
{
|
||||
IVI_insert(((fuzzy_handle_inner_t *)handle)->ivi,seg);
|
||||
effective_length = fuzzy_hash_calculate(seg, data, offset, blocksize);
|
||||
|
||||
total_length = seg->right - seg->left + 1;
|
||||
return effective_length;
|
||||
}
|
||||
|
||||
/*<2A><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵΪ<D6B5><CEAA><EFBFBD>ǵ<EFBFBD>Ƭ<EFBFBD><C6AC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD><D2AA><EFBFBD>ݸ<EFBFBD><DDB8><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һһ<D2BB><D2BB><EFBFBD>д<EFBFBD><D0B4><EFBFBD>*/
|
||||
int flag = 0;
|
||||
int i;
|
||||
for(i = 0; i < overlap_segnum; i++)
|
||||
{
|
||||
switch(IVI_relative_position(seg, overlap_segs[i]))
|
||||
{
|
||||
case LEFT_OVERLAP: //<2F>ǣ<F3B8B2B8><C7A3><EFBFBD>seg<65><67><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5>Ϊoverlap_seg<65><67><EFBFBD><EFBFBD>ֵ
|
||||
{
|
||||
seg->right = overlap_segs[i]->left - 1;
|
||||
break;
|
||||
}
|
||||
case CONTAIN: //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ϵ<EFBFBD><CFB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Dz<EFBFBD><C7B2><EFBFBD>ֱ<EFBFBD>Ӳ<EFBFBD><D3B2>룬Ȼ<EBA3AC><C8BB><EFBFBD>ı<EFBFBD>seg<65><67><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>data<74>ƶ<EFBFBD><C6B6><EFBFBD>ָ<EFBFBD><D6B8><EFBFBD><EFBFBD>λ<EFBFBD><CEBB>
|
||||
{
|
||||
if(overlap_segs[i]->left - 1 >= seg->left)
|
||||
{
|
||||
fuzzy_node * node = (fuzzy_node *)calloc(sizeof(fuzzy_node), 1);
|
||||
memcpy(node, fnode, sizeof(fuzzy_node));
|
||||
node->right_status_r = (struct roll_state *)calloc(sizeof (struct roll_state), 1);
|
||||
roll_init(node->right_status_r);
|
||||
IVI_seg_t * thseg = IVI_seg_malloc(seg->left, overlap_segs[i]->left - 1, (void *)node);
|
||||
IVI_insert(((fuzzy_handle_inner_t *)handle)->ivi,thseg);
|
||||
effective_length += fuzzy_hash_calculate(thseg, data, offset, blocksize);
|
||||
total_length += thseg->right - thseg->left + 1;
|
||||
}
|
||||
seg->left = overlap_segs[i]->right + 1;
|
||||
data = data + ((seg->left) - offset);
|
||||
offset = seg->left;
|
||||
break;
|
||||
}
|
||||
case RIGHT_OVERLAP: //<2F>Ҹ<EFBFBD><D2B8>ǣ<EFBFBD><C7A3><EFBFBD>seg<65><67><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5>Ϊoverlap_seg<65><67><EFBFBD><EFBFBD>ֵ
|
||||
{
|
||||
seg->left = overlap_segs[i]->right + 1;
|
||||
data = data + ((seg->left) - offset);
|
||||
offset = seg->left;
|
||||
break;
|
||||
}
|
||||
case CONTAINED: //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֱ<EFBFBD><D6B1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƭ
|
||||
{
|
||||
flag = 1;
|
||||
//printf("contained! free seg\n");
|
||||
IVI_seg_free(seg, fuzzy_node_free, NULL);
|
||||
free(overlap_segs);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if(flag == 1)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*<2A><><EFBFBD><EFBFBD><EFBFBD>µ<EFBFBD><C2B5><EFBFBD><EFBFBD>ݲ<EFBFBD><DDB2>뵽<EFBFBD><EBB5BD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>棬<EFBFBD><E6A3AC><EFBFBD>ҽ<EFBFBD><D2BD>м<EFBFBD><D0BC><EFBFBD>*/
|
||||
if(seg->left <= seg->right)
|
||||
{
|
||||
IVI_insert(((fuzzy_handle_inner_t *)handle)->ivi, seg);
|
||||
effective_length += fuzzy_hash_calculate(seg, data, offset, blocksize);
|
||||
total_length += seg->right - seg->left + 1;
|
||||
//((fuzzy_handle_inner_t *)handle)->effective_length += effective_length;
|
||||
}
|
||||
else
|
||||
{
|
||||
IVI_seg_free(seg, fuzzy_node_free, NULL);
|
||||
}
|
||||
|
||||
free(overlap_segs);
|
||||
return effective_length;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* <20><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0><EFBFBD><EFBFBD>Ƭ<EFBFBD><C6AC><EFBFBD><EFBFBD><EFBFBD><EFBFBD>fuzzy_hashֵ
|
||||
*/
|
||||
unsigned int fuzzy_hash_calculate(IVI_seg_t * seg, const char * data, unsigned long long offset, unsigned long long blocksize)
|
||||
{
|
||||
IVI_seg_t * prev_seg;
|
||||
IVI_seg_t * next_seg;
|
||||
unsigned int effective_length = 0;
|
||||
|
||||
prev_seg = IVI_prev_continuous_seg(seg);
|
||||
next_seg = IVI_next_continuous_seg(seg);
|
||||
//printf("seg->right = %lu, seg->left = %lu\n", seg->right, seg->left);
|
||||
unsigned int size = seg->right - seg->left + 1;
|
||||
fuzzy_node * node = (fuzzy_node *)(seg->data);
|
||||
if(NULL == prev_seg)
|
||||
{
|
||||
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC>ֱ<EFBFBD>ӳ<EFBFBD>ʼ<EFBFBD><CABC>roll_state<74><65><EFBFBD>м<EFBFBD><D0BC><EFBFBD>
|
||||
roll_init(node->right_status_r);
|
||||
fuzzy_calculate_self(seg, data, offset, blocksize);
|
||||
effective_length = size - node->left_len;
|
||||
node->left_offset = offset + node->left_len;
|
||||
}
|
||||
else
|
||||
{
|
||||
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC>ȡ<EFBFBD><C8A1>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC><EFBFBD>ұ߽<D2B1><DFBD><EFBFBD><EFBFBD>м<EFBFBD>״ֵ̬<CCAC><D6B5><EFBFBD>м<EFBFBD><D0BC><EFBFBD>
|
||||
|
||||
fuzzy_calculate_self_with_prev(prev_seg, seg, data, blocksize);
|
||||
effective_length = size + ((fuzzy_node *)(prev_seg->data))->right_len;
|
||||
node->left_offset = offset - ((fuzzy_node *)(prev_seg->data))->right_len;
|
||||
}
|
||||
|
||||
/* <20><><EFBFBD><EFBFBD><EFBFBD>к<EFBFBD><D0BA><EFBFBD>Ƭ,<2C><><EFBFBD><EFBFBD><EFBFBD>Լ<EFBFBD><D4BC><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ľ<EFBFBD><C4BD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD><D2AA>Ƭ,<2C><><EFBFBD>ĺ<DEB8><C4BA><EFBFBD><EFBFBD>ķ<EFBFBD>Ƭ */
|
||||
if(next_seg != NULL)
|
||||
{
|
||||
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ں<EFBFBD><DABA><EFBFBD>Ƭ<EFBFBD><C6AC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><C6AC><EFBFBD>ұ߽<D2B1><DFBD><EFBFBD><EFBFBD>м<EFBFBD>״ֵ̬ȡ<D6B5><C8A1><EFBFBD><EFBFBD><EFBFBD>ͺ<EFBFBD><CDBA><EFBFBD>Ƭ<EFBFBD><C6AC><EFBFBD><EFBFBD><EFBFBD>߽<EFBFBD><DFBD><EFBFBD><EFBFBD>м<EFBFBD>״̬<D7B4><CCAC><EFBFBD>м<EFBFBD><D0BC><EFBFBD>
|
||||
fuzzy_modify_next(seg, next_seg, blocksize);
|
||||
|
||||
effective_length += ((fuzzy_node *)(next_seg->data))->left_len;
|
||||
node->right_offset = offset + size + ((fuzzy_node *)(next_seg->data))->left_len;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
effective_length -= node->right_len;
|
||||
node->right_offset = offset + (size - (node->right_len));
|
||||
}
|
||||
return effective_length;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void fuzzy_calculate_self(IVI_seg_t * seg, const char * data, unsigned long long offset, unsigned long long blocksize)
|
||||
{
|
||||
fuzzy_node * node = (fuzzy_node *)(seg->data);
|
||||
struct roll_state * rs = node->right_status_r;
|
||||
unsigned long long size = seg->right - seg->left + 1;
|
||||
unsigned int FNV_hash_value = HASH_INIT;
|
||||
|
||||
char * FNV_hash = (char *)malloc(sizeof(char)*size);
|
||||
unsigned long long fnv_index = 0, i, last_slice_index;
|
||||
unsigned int roll_hash_value;
|
||||
for(i = 0; i < size; i++)
|
||||
{
|
||||
roll_hash(rs, data[i]);
|
||||
roll_hash_value = roll_sum(rs);
|
||||
FNV_hash_value = sum_hash(data[i], FNV_hash_value);
|
||||
if(i >= ROLLING_WINDOW - 1 && roll_hash_value % blocksize == blocksize - 1)
|
||||
{
|
||||
node->slice_num ++;
|
||||
|
||||
if(node->slice_num == 1)
|
||||
{
|
||||
node->left_len = i + 1;
|
||||
}
|
||||
last_slice_index = i;
|
||||
/* <20><><EFBFBD><EFBFBD>FNV<4E><56>ֵ */
|
||||
FNV_hash[fnv_index ++] = b64[FNV_hash_value % 64];
|
||||
//printf("data[%lu]=%c, FNV_hash = %c\n", i, data[i], b64[FNV_hash_value % 64]);
|
||||
FNV_hash_value = HASH_INIT;
|
||||
}
|
||||
}
|
||||
|
||||
/* һƬ<D2BB><C6AC>û<EFBFBD><C3BB><EFBFBD>ҵ<EFBFBD> */
|
||||
if(node->slice_num == 0)
|
||||
{
|
||||
node->left_len = size;
|
||||
node->right_len = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
node->right_len = size - last_slice_index - 1;
|
||||
}
|
||||
node->right_status_shash = FNV_hash_value;
|
||||
|
||||
/* <20><><EFBFBD>ƽ<EFBFBD><C6BD><EFBFBD><EFBFBD><EFBFBD>hash_result<6C><74> */
|
||||
node->hash_result = (char *)malloc(sizeof(char) * (fnv_index + 1));
|
||||
memcpy(node->hash_result, FNV_hash, fnv_index);
|
||||
(node->hash_result)[fnv_index] = '\0';
|
||||
|
||||
node->left_data = (char *)malloc(sizeof(char) * (node->left_len));
|
||||
memcpy(node->left_data, data, node->left_len);
|
||||
|
||||
free(FNV_hash);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
unsigned long long get_prev_continous_length(IVI_seg_t * seg)
|
||||
{
|
||||
unsigned long long length = 0;
|
||||
IVI_seg_t * temp = seg;
|
||||
while(temp != NULL)
|
||||
{
|
||||
length += temp->right - temp->left + 1;
|
||||
if(length >= ROLLING_WINDOW)
|
||||
return length;
|
||||
temp = IVI_prev_continuous_seg(temp);
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
/**
|
||||
* <20><><EFBFBD><EFBFBD>ǰ<EFBFBD>εı<CEB5><C4B1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
*/
|
||||
void fuzzy_calculate_self_with_prev(IVI_seg_t * prev_seg, IVI_seg_t * seg, const char * data, unsigned long long blocksize)
|
||||
{
|
||||
fuzzy_node * prev_node = (fuzzy_node *)(prev_seg->data);
|
||||
fuzzy_node * node = (fuzzy_node *)(seg->data);
|
||||
|
||||
/* ʹ<><CAB9>ǰ<EFBFBD>ε<EFBFBD>roll state */
|
||||
memcpy(node->right_status_r, prev_node->right_status_r, sizeof(struct roll_state));
|
||||
struct roll_state * rs = node->right_status_r;
|
||||
unsigned long long size = seg->right - seg->left + 1;
|
||||
unsigned int FNV_hash_value = prev_node->right_status_shash;
|
||||
|
||||
|
||||
char * FNV_hash = (char *)malloc(sizeof(char)*size);
|
||||
unsigned long long fnv_index = 0, i, last_slice_index;
|
||||
unsigned int roll_hash_value;
|
||||
unsigned long long prev_len = get_prev_continous_length(prev_seg);
|
||||
|
||||
for(i = 0; i < size; i++)
|
||||
{
|
||||
roll_hash(rs, data[i]);
|
||||
roll_hash_value = roll_sum(rs);
|
||||
FNV_hash_value = sum_hash(data[i], FNV_hash_value);
|
||||
if(i + prev_len >= ROLLING_WINDOW \
|
||||
&& roll_hash_value % blocksize == blocksize - 1)
|
||||
{
|
||||
node->slice_num ++;
|
||||
if(node->slice_num == 1)
|
||||
{
|
||||
node->left_len = i + 1;
|
||||
}
|
||||
|
||||
last_slice_index = i;
|
||||
/* <20><><EFBFBD><EFBFBD>FNV<4E><56>ֵ */
|
||||
FNV_hash[fnv_index ++] = b64[FNV_hash_value % 64];
|
||||
//printf("data[%lu]=%c, FNV_hash = %c\n", i, data[i], b64[FNV_hash_value % 64]);
|
||||
FNV_hash_value = HASH_INIT;
|
||||
}
|
||||
}
|
||||
|
||||
/* һƬ<D2BB><C6AC>û<EFBFBD><C3BB><EFBFBD>ҵ<EFBFBD> */
|
||||
if(node->slice_num == 0)
|
||||
{
|
||||
node->left_len = size;
|
||||
node->right_len = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
node->right_len = size - last_slice_index - 1;
|
||||
}
|
||||
node->right_status_shash = FNV_hash_value;
|
||||
|
||||
/* <20><><EFBFBD>ƽ<EFBFBD><C6BD><EFBFBD><EFBFBD><EFBFBD>hash_result<6C><74> */
|
||||
node->hash_result = (char *)malloc(sizeof(char) * (fnv_index + 1));
|
||||
memcpy(node->hash_result, FNV_hash, fnv_index);
|
||||
(node->hash_result)[fnv_index] = '\0';
|
||||
|
||||
node->left_data = (char *)malloc(sizeof(char) * (node->left_len));
|
||||
memcpy(node->left_data, data, node->left_len);
|
||||
|
||||
free(FNV_hash);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void fuzzy_modify_self_with_prev(IVI_seg_t * prev_seg, IVI_seg_t * seg, char * data, unsigned long long blocksize)
|
||||
{
|
||||
fuzzy_node * prev_node = (fuzzy_node *)(prev_seg->data);
|
||||
fuzzy_node * node = (fuzzy_node *)(seg->data);
|
||||
|
||||
/* ʹ<><CAB9>ǰ<EFBFBD>ε<EFBFBD>roll state */
|
||||
memcpy(node->right_status_r, prev_node->right_status_r, sizeof(struct roll_state));
|
||||
struct roll_state * rs = node->right_status_r;
|
||||
unsigned long long size = seg->right - seg->left + 1;
|
||||
unsigned int FNV_hash_value = prev_node->right_status_shash;
|
||||
|
||||
|
||||
char * FNV_hash = (char *)malloc(sizeof(char)*size);
|
||||
unsigned long long fnv_index = 0, i, last_slice_index;
|
||||
unsigned int roll_hash_value;
|
||||
unsigned long long prev_len = get_prev_continous_length(prev_seg);
|
||||
for(i = 0; i < size; i++)
|
||||
{
|
||||
roll_hash(rs, data[i]);
|
||||
roll_hash_value = roll_sum(rs);
|
||||
FNV_hash_value = sum_hash(data[i], FNV_hash_value);
|
||||
if(i + prev_len >= ROLLING_WINDOW \
|
||||
&& roll_hash_value % blocksize == blocksize- 1)
|
||||
{
|
||||
node->slice_num ++;
|
||||
if(node->slice_num == 1)
|
||||
{
|
||||
node->left_len = i + 1;
|
||||
}
|
||||
|
||||
last_slice_index = i;
|
||||
/* <20><><EFBFBD><EFBFBD>FNV<4E><56>ֵ */
|
||||
FNV_hash[fnv_index ++] = b64[FNV_hash_value % 64];
|
||||
//printf("data[%lu]=%c, FNV_hash = %c\n", i, data[i], b64[FNV_hash_value % 64]);
|
||||
FNV_hash_value = HASH_INIT;
|
||||
}
|
||||
}
|
||||
|
||||
/* һƬ<D2BB><C6AC>û<EFBFBD><C3BB><EFBFBD>ҵ<EFBFBD> */
|
||||
if(node->slice_num == 0)
|
||||
{
|
||||
node->left_len = size;
|
||||
node->right_len = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
node->right_len = size - last_slice_index - 1;
|
||||
}
|
||||
node->right_status_shash = FNV_hash_value;
|
||||
|
||||
/* <20><><EFBFBD>ƽ<EFBFBD><C6BD><EFBFBD><EFBFBD><EFBFBD>hash_result<6C><74> */
|
||||
free(node->hash_result);
|
||||
node->hash_result = (char *)malloc(sizeof(char) * (fnv_index + 1));
|
||||
memcpy(node->hash_result, FNV_hash, fnv_index);
|
||||
(node->hash_result)[fnv_index] = '\0';
|
||||
|
||||
//printf("old node->left_data = %s\n", node->left_data);
|
||||
free(node->left_data);
|
||||
node->left_data = (char *)malloc(sizeof(char) * (node->left_len));
|
||||
memcpy(node->left_data, data, node->left_len);
|
||||
//printf("new node->left_data = %s\n", node->left_data);
|
||||
free(FNV_hash);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>εı<CEB5><C4B1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
*/
|
||||
void fuzzy_modify_next(IVI_seg_t * seg, IVI_seg_t * next_seg, unsigned long long blocksize)
|
||||
{
|
||||
IVI_seg_t * tmp_curr_seg = seg;
|
||||
IVI_seg_t * tmp_next_seg = next_seg;
|
||||
while(tmp_next_seg != NULL)
|
||||
{
|
||||
fuzzy_node * tmp_next_node = (fuzzy_node *)(tmp_next_seg->data);
|
||||
if(tmp_next_node->slice_num != 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
/* <20><>һ<EFBFBD><D2BB>û<EFBFBD>з<EFBFBD>Ƭ, <20><><EFBFBD><EFBFBD><EFBFBD>¼<EFBFBD><C2BC><EFBFBD> */
|
||||
|
||||
char * data = (char *)malloc(sizeof(char) * (tmp_next_node->left_len));
|
||||
memcpy(data, tmp_next_node->left_data, tmp_next_node->left_len);
|
||||
fuzzy_modify_self_with_prev(tmp_curr_seg, tmp_next_seg, data, blocksize);
|
||||
free(data);
|
||||
|
||||
tmp_curr_seg = tmp_next_seg;
|
||||
tmp_next_seg = IVI_next_continuous_seg(tmp_next_seg);
|
||||
}
|
||||
|
||||
unsigned long long prev_len = get_prev_continous_length(tmp_curr_seg);
|
||||
/* tmp_next_seg<65><67><EFBFBD><EFBFBD><EFBFBD>з<EFBFBD>Ƭ<EFBFBD><C6AC> */
|
||||
if(tmp_next_seg != NULL)
|
||||
{
|
||||
fuzzy_node * tmp_curr_node = (fuzzy_node *)(tmp_curr_seg->data);
|
||||
fuzzy_node * tmp_next_node = (fuzzy_node *)(tmp_next_seg->data);
|
||||
|
||||
unsigned long long size = tmp_next_node->left_len;
|
||||
|
||||
char * FNV_hash = (char *)malloc(sizeof(char)*size);
|
||||
unsigned long long fnv_index = 0, i;
|
||||
unsigned int roll_hash_value;
|
||||
|
||||
struct roll_state rs;
|
||||
memcpy(&rs, tmp_curr_node->right_status_r, sizeof(struct roll_state));
|
||||
char * data = tmp_next_node->left_data;
|
||||
unsigned int FNV_hash_value = tmp_curr_node->right_status_shash;
|
||||
for(i = 0; i < size; i++)
|
||||
{
|
||||
roll_hash(&rs, data[i]);
|
||||
roll_hash_value = roll_sum(&rs);
|
||||
FNV_hash_value = sum_hash(data[i], FNV_hash_value);
|
||||
|
||||
if((i + prev_len >= ROLLING_WINDOW) \
|
||||
&& roll_hash_value % blocksize == blocksize - 1)
|
||||
{
|
||||
tmp_next_node->slice_num ++;
|
||||
FNV_hash[fnv_index ++] = b64[FNV_hash_value % 64];
|
||||
//printf("data[%lu]=%c, FNV_hash = %c\n", i, data[i], b64[FNV_hash_value % 64]);
|
||||
FNV_hash_value = HASH_INIT;
|
||||
|
||||
if(fnv_index == 1)
|
||||
{
|
||||
tmp_next_node->left_len = i + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tmp_next_node->slice_num --;
|
||||
|
||||
|
||||
/* <20><><EFBFBD>ƽ<EFBFBD><C6BD><EFBFBD><EFBFBD><EFBFBD>hash_result<6C><74> */
|
||||
unsigned long long old_len = strlen(tmp_next_node->hash_result);
|
||||
if(old_len == 1)
|
||||
{
|
||||
free(tmp_next_node->hash_result);
|
||||
tmp_next_node->hash_result = (char *)malloc(sizeof(char) * (fnv_index + 1));
|
||||
memcpy(tmp_next_node->hash_result, FNV_hash, fnv_index);
|
||||
(tmp_next_node->hash_result)[fnv_index] = '\0';
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned long long new_len = old_len - 1 + fnv_index;
|
||||
char tmp[old_len - 1];
|
||||
char * old_hash = (tmp_next_node->hash_result) + 1;
|
||||
memcpy(tmp, old_hash, old_len - 1);
|
||||
free(tmp_next_node->hash_result);
|
||||
tmp_next_node->hash_result = (char *)malloc(sizeof(char) * (new_len + 1));
|
||||
memset(tmp_next_node->hash_result, '\0', (new_len + 1));
|
||||
memcpy(tmp_next_node->hash_result, FNV_hash, fnv_index);
|
||||
strncat(tmp_next_node->hash_result, tmp, old_len - 1);
|
||||
(tmp_next_node->hash_result)[new_len] = '\0';
|
||||
}
|
||||
free(FNV_hash);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* ȡ<><C8A1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>hash_resultֵ<74><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ƴ<EFBFBD>ӣ<EFBFBD><D3A3>γ<EFBFBD><CEB3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>result<6C><74><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>abc[1:100]def[200:300]<5D><><EFBFBD>ָ<EFBFBD>ʽ
|
||||
*/
|
||||
int fuzzy_digest(fuzzy_handle_t * handle, char * result, unsigned int size)
|
||||
{
|
||||
final_result * temp = (final_result *)malloc(sizeof(final_result));
|
||||
temp->head = result;
|
||||
temp->size = size;
|
||||
temp->offset = 0;
|
||||
temp->first_FNV_offset = 0;
|
||||
temp->last_FNV_offset = 0;
|
||||
//final_result * temp = (final_result *)malloc(sizeof(final_result));
|
||||
//temp->offset = 0;
|
||||
IVI_traverse(((fuzzy_handle_inner_t *)handle)->ivi, fuzzy_hash_merge_new, (void *) temp);
|
||||
result[size - 1] = '\0';
|
||||
//memcpy(result, temp->result, size);
|
||||
free(temp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void fuzzy_hash_merge_new(IVI_seg_t * seg, void * user_para)
|
||||
{
|
||||
IVI_seg_t * prev_seg;
|
||||
IVI_seg_t * next_seg;
|
||||
prev_seg = IVI_prev_continuous_seg(seg);
|
||||
next_seg = IVI_next_continuous_seg(seg);
|
||||
char buffer[MAXSIZE];
|
||||
final_result * tmp = (final_result *)user_para;
|
||||
fuzzy_node * node = (fuzzy_node *)(seg->data);
|
||||
if(node->slice_num != 0)
|
||||
{
|
||||
tmp->last_FNV_offset = seg->right - node->right_len;
|
||||
}
|
||||
|
||||
if(prev_seg == NULL && next_seg == NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD>ͺ<EFBFBD><CDBA><EFBFBD>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ƴ<EFBFBD><C6B4>
|
||||
{
|
||||
tmp->first_FNV_offset = seg->left;
|
||||
tmp->last_FNV_offset = seg->right - node->right_len;
|
||||
sprintf(buffer, "%s[%llu:%llu]", node->hash_result, tmp->first_FNV_offset, seg->right);
|
||||
}
|
||||
if(prev_seg == NULL && next_seg != NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>ƬΪ<C6AC>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>FNVֵ<56><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȥ
|
||||
{
|
||||
tmp->first_FNV_offset = seg->left;
|
||||
|
||||
sprintf(buffer, "%s", node->hash_result);
|
||||
}
|
||||
if(prev_seg != NULL && next_seg == NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD>ƬΪ<C6AC>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>ƫ<EFBFBD><C6AB>
|
||||
{
|
||||
|
||||
sprintf(buffer, "%s[%llu:%llu]", node->hash_result, tmp->first_FNV_offset, seg->right);
|
||||
}
|
||||
if(prev_seg != NULL && next_seg != NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>FNVֵ<56><D6B5>ȥ
|
||||
{
|
||||
sprintf(buffer, "%s", node->hash_result);
|
||||
}
|
||||
|
||||
unsigned int inner_size = strlen(buffer);
|
||||
tmp->offset += inner_size;
|
||||
if(tmp->offset <= tmp->size)
|
||||
{
|
||||
memcpy(tmp->head, buffer, inner_size);
|
||||
tmp->head += inner_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned int length = (tmp->size - (tmp->offset - inner_size));
|
||||
if(length != 0)
|
||||
{
|
||||
memcpy(tmp->head, buffer, length);
|
||||
}
|
||||
tmp->offset = tmp->size;
|
||||
tmp->head += length;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* <20><><EFBFBD><EFBFBD>fuzzy_hash<73>ĸ<EFBFBD><C4B8>ֳ<EFBFBD><D6B3><EFBFBD>
|
||||
*/
|
||||
unsigned long long fuzzy_status(fuzzy_handle_t * handle, int type)
|
||||
{
|
||||
unsigned long long length;
|
||||
fuzzy_handle_inner_t * _handle = (fuzzy_handle_inner_t *)(handle);
|
||||
switch(type)
|
||||
{
|
||||
case TOTAL_LENGTH: //<2F>Ѿ<EFBFBD><D1BE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>hashֵ<68><D6B5>ȫ<EFBFBD><C8AB><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
{
|
||||
length = IVI_seg_length(_handle->ivi);
|
||||
break;
|
||||
}
|
||||
case EFFECTIVE_LENGTH: //<2F><><EFBFBD><EFBFBD><EFBFBD>ڼ<EFBFBD><DABC><EFBFBD>hashֵ<68><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ч<EFBFBD><D0A7><EFBFBD><EFBFBD>
|
||||
{
|
||||
length = _handle->effective_length;
|
||||
break;
|
||||
}
|
||||
case HASH_LENGTH: //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ϣ<EFBFBD><CFA3><EFBFBD><EFBFBD><EFBFBD>ij<EFBFBD><C4B3><EFBFBD>
|
||||
{
|
||||
final_length tmp_length;
|
||||
tmp_length.hash_length = 0;
|
||||
tmp_length.first_FNV_offset = 0;
|
||||
tmp_length.last_FNV_offset = 0;
|
||||
IVI_traverse(_handle->ivi, fuzzy_hash_length, (void *)&tmp_length);
|
||||
length = tmp_length.hash_length + 1;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
void fuzzy_hash_length(IVI_seg_t * seg, void * user_para)
|
||||
{
|
||||
|
||||
IVI_seg_t * prev_seg;
|
||||
IVI_seg_t * next_seg;
|
||||
prev_seg = IVI_prev_continuous_seg(seg);
|
||||
next_seg = IVI_next_continuous_seg(seg);
|
||||
char buffer[MAXSIZE];
|
||||
final_length * tmp = (final_length *)user_para;
|
||||
fuzzy_node * node = (fuzzy_node *)(seg->data);
|
||||
if(node->slice_num != 0)
|
||||
{
|
||||
//printf("node->slice_num != 0\n");
|
||||
tmp->last_FNV_offset = seg->right - node->right_len;
|
||||
//printf("%lu\n", tmp->last_FNV_offset);
|
||||
}
|
||||
|
||||
if(prev_seg == NULL && next_seg == NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD>ͺ<EFBFBD><CDBA><EFBFBD>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ƴ<EFBFBD><C6B4>
|
||||
{
|
||||
tmp->first_FNV_offset = seg->left;
|
||||
tmp->last_FNV_offset = seg->right - node->right_len;
|
||||
sprintf(buffer, "%s[%llu:%llu]", node->hash_result, tmp->first_FNV_offset, seg->right);
|
||||
}
|
||||
if(prev_seg == NULL && next_seg != NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>ƬΪ<C6AC>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>FNVֵ<56><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȥ
|
||||
{
|
||||
tmp->first_FNV_offset = seg->left;
|
||||
|
||||
sprintf(buffer, "%s", node->hash_result);
|
||||
}
|
||||
if(prev_seg != NULL && next_seg == NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD>ƬΪ<C6AC>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>ƫ<EFBFBD><C6AB>
|
||||
{
|
||||
|
||||
sprintf(buffer, "%s[%llu:%llu]", node->hash_result, tmp->first_FNV_offset, seg->right);
|
||||
}
|
||||
if(prev_seg != NULL && next_seg != NULL) //<2F><><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD>Ƭ<EFBFBD><C6AC>Ϊ<EFBFBD>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>FNVֵ<56><D6B5>ȥ
|
||||
{
|
||||
sprintf(buffer, "%s", node->hash_result);
|
||||
}
|
||||
tmp->hash_length += strlen(buffer);
|
||||
return;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user