1、调整SFH的函数名和源文件名,原有使用sfh的用户会受到影响;2、digest_gen由目录遍历,改为单文件并在屏幕输出结果,便于通过命令行调用。
This commit is contained in:
@@ -25,22 +25,22 @@ extern "C" {
|
||||
#define EFFECTIVE_LENGTH 1
|
||||
#define HASH_LENGTH 2
|
||||
|
||||
// typedef fuzzy_handle_t void*;
|
||||
// typedef sfh_instance_t void*;
|
||||
typedef struct
|
||||
{
|
||||
}fuzzy_handle_t;
|
||||
}sfh_instance_t;
|
||||
|
||||
/**
|
||||
* create a fuzzy hash handle and return it.
|
||||
* @return [handle]
|
||||
*/
|
||||
fuzzy_handle_t * fuzzy_create_handle(unsigned long long origin_len);
|
||||
sfh_instance_t * SFH_instance(unsigned long long origin_len);
|
||||
|
||||
/**
|
||||
* destroy context by a fuzzy hash handle.
|
||||
* @param handle [handle]
|
||||
*/
|
||||
void fuzzy_destroy_handle(fuzzy_handle_t * handle);
|
||||
void SFH_release(sfh_instance_t * handle);
|
||||
|
||||
/**
|
||||
* Feed the function your data.
|
||||
@@ -51,7 +51,7 @@ void fuzzy_destroy_handle(fuzzy_handle_t * handle);
|
||||
* @param offset [offset]
|
||||
* @return [return effective data length in current feed]
|
||||
*/
|
||||
unsigned int fuzzy_feed(fuzzy_handle_t * handle, const char* data, unsigned int size, unsigned long long offset);
|
||||
unsigned int SFH_feed(sfh_instance_t * handle, const char* data, unsigned int size, unsigned long long offset);
|
||||
|
||||
/**
|
||||
* Obtain the fuzzy hash values.
|
||||
@@ -62,7 +62,7 @@ unsigned int fuzzy_feed(fuzzy_handle_t * handle, const char* data, unsigned int
|
||||
* @param size [@result size]
|
||||
* @return [return zero on success, non-zero on error]
|
||||
*/
|
||||
int fuzzy_digest(fuzzy_handle_t * handle, char* result, unsigned int size);
|
||||
int SFH_digest(sfh_instance_t * handle, char* result, unsigned int size);
|
||||
|
||||
/**
|
||||
* Obtain certain length of fuzzy hash status.
|
||||
@@ -74,7 +74,7 @@ int fuzzy_digest(fuzzy_handle_t * handle, char* result, unsigned int size);
|
||||
* HASH_LENGTH:Hash result length.
|
||||
* @return [length value]
|
||||
*/
|
||||
unsigned long long fuzzy_status(fuzzy_handle_t * handle, int type);
|
||||
unsigned long long SFH_status(sfh_instance_t * handle, int type);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
@@ -1500,7 +1500,7 @@ stream_para_t Maat_stream_scan_digest_start(Maat_feather_t feather,int table_id,
|
||||
{
|
||||
struct _Maat_feather_t* _feather=(_Maat_feather_t*)feather;
|
||||
struct _Maat_scanner_t* scanner=NULL;
|
||||
fuzzy_handle_t * tmp_fuzzy_handle=NULL;
|
||||
sfh_instance_t * tmp_fuzzy_handle=NULL;
|
||||
struct _Maat_table_info_t *p_table=NULL;
|
||||
p_table=acqurie_table(_feather, table_id, TABLE_TYPE_DIGEST);
|
||||
if(p_table==NULL)
|
||||
@@ -1508,7 +1508,7 @@ stream_para_t Maat_stream_scan_digest_start(Maat_feather_t feather,int table_id,
|
||||
_feather->scan_err_cnt++;
|
||||
return NULL;
|
||||
}
|
||||
tmp_fuzzy_handle=fuzzy_create_handle(total_len);
|
||||
tmp_fuzzy_handle=SFH_instance(total_len);
|
||||
if(tmp_fuzzy_handle==NULL)
|
||||
{
|
||||
_feather->scan_err_cnt++;
|
||||
@@ -1584,7 +1584,7 @@ int Maat_stream_scan_digest(stream_para_t * stream_para, const char * data, int
|
||||
}
|
||||
aligment_int64_array_add(sp->feather->thread_call_cnt, sp->thread_num, 1);
|
||||
pthread_mutex_lock(&(sp->fuzzy_mutex));
|
||||
sp->acc_scan_len+=fuzzy_feed(sp->fuzzy_hash_handle, data, (unsigned int)data_len,offset);
|
||||
sp->acc_scan_len+=SFH_feed(sp->fuzzy_hash_handle, data, (unsigned int)data_len,offset);
|
||||
pthread_mutex_unlock(&(sp->fuzzy_mutex));
|
||||
do_query=REACH_QUERY_THRESH(sp->total_len, sp->acc_scan_len, sp->query_point,8);
|
||||
if(do_query==0)
|
||||
@@ -1592,7 +1592,7 @@ int Maat_stream_scan_digest(stream_para_t * stream_para, const char * data, int
|
||||
goto fast_out;
|
||||
}
|
||||
pthread_mutex_lock(&(sp->fuzzy_mutex));
|
||||
digest_len=fuzzy_status(sp->fuzzy_hash_handle, HASH_LENGTH);
|
||||
digest_len=SFH_status(sp->fuzzy_hash_handle, HASH_LENGTH);
|
||||
pthread_mutex_unlock(&(sp->fuzzy_mutex));
|
||||
if(digest_len==0)
|
||||
{
|
||||
@@ -1600,7 +1600,7 @@ int Maat_stream_scan_digest(stream_para_t * stream_para, const char * data, int
|
||||
}
|
||||
digest_buff=(char*)malloc(sizeof(char)*digest_len);
|
||||
pthread_mutex_lock(&(sp->fuzzy_mutex));
|
||||
fuzzy_digest(sp->fuzzy_hash_handle,digest_buff, digest_len);
|
||||
SFH_digest(sp->fuzzy_hash_handle,digest_buff, digest_len);
|
||||
pthread_mutex_unlock(&(sp->fuzzy_mutex));
|
||||
|
||||
if(GIE_handle!=NULL)
|
||||
@@ -1661,7 +1661,7 @@ void Maat_stream_scan_digest_end(stream_para_t* stream_para)
|
||||
DEC_SCANNER_REF(scanner, sp->thread_num);
|
||||
}
|
||||
}
|
||||
fuzzy_destroy_handle(sp->fuzzy_hash_handle);
|
||||
SFH_release(sp->fuzzy_hash_handle);
|
||||
pthread_mutex_destroy(&(sp->fuzzy_mutex));
|
||||
assert(sp->last_cache==NULL);
|
||||
assert(sp->scan_buff==NULL);
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
#include "rulescan.h"
|
||||
#include "hiredis.h"
|
||||
|
||||
#include "mesa_fuzzy.h"
|
||||
#include "stream_fuzzy_hash.h"
|
||||
#include "gram_index_engine.h"
|
||||
#include "aligment_int64.h"
|
||||
#include <pthread.h>
|
||||
@@ -314,7 +314,7 @@ struct _stream_para_t
|
||||
void* rs_stream_para;
|
||||
long acc_scan_len;
|
||||
unsigned long long total_len;
|
||||
fuzzy_handle_t *fuzzy_hash_handle;
|
||||
sfh_instance_t *fuzzy_hash_handle;
|
||||
pthread_mutex_t fuzzy_mutex;
|
||||
unsigned char query_point[8];
|
||||
};
|
||||
|
||||
@@ -19,7 +19,7 @@ LIBMAAT = libmaatframe.a
|
||||
LIBMAAT_SO = libmaatframe.so
|
||||
|
||||
OBJS=config_monitor.o Maat_rule.o Maat_api.o Maat_command.o Maat_stat.o UniversalBoolMatch.o dynamic_array.o\
|
||||
cJSON.o json2iris.o map_str2int.o interval_index.o gram_index_engine.o mesa_fuzzy.o rbtree.o
|
||||
cJSON.o json2iris.o map_str2int.o interval_index.o gram_index_engine.o stream_fuzzy_hash.o rbtree.o
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -I. $(H_DIR) $<
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#include<zt_hash.h>
|
||||
#include<interval_index.h>
|
||||
#include<mesa_fuzzy.h>
|
||||
#include "zt_hash.h"
|
||||
#include "interval_index.h"
|
||||
#include "stream_fuzzy_hash.h"
|
||||
|
||||
#ifndef __SFH_INTERNAL_H_INCLUDE_
|
||||
#define __SFH_INTERNAL_H_INCLUDE_
|
||||
@@ -106,6 +106,5 @@ unsigned int segment_overlap(fuzzy_handle_inner_t * handle, unsigned int size, u
|
||||
void sfh_tune_callback(IVI_seg_t * seg, void * user_para);
|
||||
void sfh_output_callback(IVI_seg_t * seg, void * user_para);
|
||||
void fuzzy_hash_length(IVI_seg_t * seg, void * user_para);
|
||||
unsigned long long fuzzy_status(fuzzy_handle_t * handle, int type);
|
||||
#endif
|
||||
|
||||
|
||||
@@ -72,7 +72,7 @@ static inline unsigned int sum_hash(unsigned char c, unsigned int h)
|
||||
/**
|
||||
* 创建handle
|
||||
*/
|
||||
fuzzy_handle_t * fuzzy_create_handle(unsigned long long origin_len)
|
||||
sfh_instance_t * SFH_instance(unsigned long long origin_len)
|
||||
{
|
||||
fuzzy_handle_inner_t * handle = NULL;
|
||||
unsigned long long tmp_blksize = 0;
|
||||
@@ -94,7 +94,7 @@ fuzzy_handle_t * fuzzy_create_handle(unsigned long long origin_len)
|
||||
//handle->blocksize=tmp_blksize;
|
||||
handle->blocksize = 3;
|
||||
handle->do_tune=1;
|
||||
return (fuzzy_handle_t *)handle;
|
||||
return (sfh_instance_t *)handle;
|
||||
}
|
||||
|
||||
|
||||
@@ -110,7 +110,7 @@ void fuzzy_node_free(IVI_seg_t * seg, void * usr_para)
|
||||
}
|
||||
|
||||
|
||||
void fuzzy_destroy_handle(fuzzy_handle_t * handle)
|
||||
void SFH_release(sfh_instance_t * handle)
|
||||
{
|
||||
IVI_destroy(((fuzzy_handle_inner_t *)handle)->ivi, fuzzy_node_free, (void *)handle);
|
||||
((fuzzy_handle_inner_t *)handle)->fuzzy_node_memory -= sizeof(fuzzy_handle_inner_t);
|
||||
@@ -118,7 +118,7 @@ void fuzzy_destroy_handle(fuzzy_handle_t * handle)
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned int fuzzy_feed(fuzzy_handle_t * handle, const char * data, unsigned int size, unsigned long long offset)
|
||||
unsigned int SFH_feed(sfh_instance_t * handle, const char * data, unsigned int size, unsigned long long offset)
|
||||
{
|
||||
fuzzy_handle_inner_t * _handle=(fuzzy_handle_inner_t *)handle;
|
||||
if(data == NULL || size == 0)
|
||||
@@ -150,7 +150,7 @@ unsigned int fuzzy_feed(fuzzy_handle_t * handle, const char * data, unsigned int
|
||||
//printf("blocksize after:%llu\n", _handle->blocksize);
|
||||
}
|
||||
#if 0
|
||||
fuzzy_digest(handle,result, sizeof(result));
|
||||
SFH_digest(handle,result, sizeof(result));
|
||||
printf("%llu %s\n",offset,result);
|
||||
#endif
|
||||
return length;
|
||||
@@ -575,7 +575,7 @@ int sfh_merge_seg(fuzzy_handle_inner_t * _handle, sfh_seg_t * p, sfh_seg_t * n,u
|
||||
/**
|
||||
* 取出区间链表里面的hash_result值,并进行拼接,形成最后的result输出,并且满足abc[1:100]def[200:300]这种格式
|
||||
*/
|
||||
int fuzzy_digest(fuzzy_handle_t * handle, char * hash_buffer, unsigned int size)
|
||||
int SFH_digest(sfh_instance_t * handle, char * hash_buffer, unsigned int size)
|
||||
{
|
||||
fuzzy_handle_inner_t* _handle=(fuzzy_handle_inner_t *)handle;
|
||||
unsigned int estimate_len=_handle->s_state_cnt+IVI_seg_cnt(_handle->ivi)*24+1;
|
||||
@@ -666,7 +666,7 @@ void sfh_output_callback(IVI_seg_t * seg, void * user_para)
|
||||
/**
|
||||
* 计算fuzzy_hash的各种长度
|
||||
*/
|
||||
unsigned long long fuzzy_status(fuzzy_handle_t * handle, int type)
|
||||
unsigned long long SFH_status(sfh_instance_t * handle, int type)
|
||||
{
|
||||
unsigned long long length;
|
||||
fuzzy_handle_inner_t * _handle = (fuzzy_handle_inner_t *)(handle);
|
||||
@@ -6,7 +6,7 @@
|
||||
#include <sys/stat.h>
|
||||
#include <time.h>
|
||||
#include <math.h>
|
||||
#include "mesa_fuzzy.h"
|
||||
#include "stream_fuzzy_hash.h"
|
||||
|
||||
void* entropy_start(void)
|
||||
{
|
||||
@@ -40,96 +40,60 @@ double entropy_stop(void* handle)
|
||||
free(handle);
|
||||
return (-sum);
|
||||
}
|
||||
|
||||
void dir_digest(int argc, char * argv[])
|
||||
void hash_file(const char* path)
|
||||
{
|
||||
|
||||
if(argc != 2)
|
||||
{
|
||||
printf("uasge: ./digest_gen [Dir]\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
DIR * dir;
|
||||
struct dirent * file;
|
||||
char * dir_path = argv[1];
|
||||
char read_buff[1024*4];
|
||||
unsigned long long read_size=0,feed_offset=0;
|
||||
dir = opendir(dir_path);
|
||||
chdir(dir_path);
|
||||
int ret =0;
|
||||
unsigned int file_id = 1;
|
||||
unsigned long hash_length=0,file_effective_length=0;
|
||||
FILE * result_fp = NULL,*fp=NULL;
|
||||
struct stat digest_fstat;
|
||||
char * digest_result_buff=NULL;
|
||||
const char* result_file="./digest_result.txt";
|
||||
result_fp = fopen(result_file,"a");
|
||||
char read_buff[1024*4];
|
||||
void * entropy_handle=NULL;
|
||||
double file_entropy=0.0;
|
||||
if(NULL == result_fp)
|
||||
{
|
||||
printf("open file failed!");
|
||||
exit(-1);
|
||||
}
|
||||
while((file = readdir(dir)) != NULL)
|
||||
{
|
||||
if(!strcmp(file->d_name, ".") ||!strcmp(file->d_name, ".."))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
ret=stat(file->d_name,&digest_fstat);
|
||||
if(ret!=0)
|
||||
{
|
||||
printf("fstat %s error.\n",file->d_name);
|
||||
continue;
|
||||
}
|
||||
|
||||
off_t file_size = digest_fstat.st_size;
|
||||
fp = fopen(file->d_name, "r");
|
||||
int hash_length;
|
||||
char * digest_result_buff=NULL;
|
||||
struct stat digest_fstat;
|
||||
FILE* fp;
|
||||
stat(path,&digest_fstat);
|
||||
fp = fopen(path, "r");
|
||||
if(NULL == fp)
|
||||
{
|
||||
printf("Can't open file %s\n", file->d_name);
|
||||
continue;
|
||||
printf("Open %s failed\n", path);
|
||||
return;
|
||||
}
|
||||
read_size=0;
|
||||
feed_offset=0;
|
||||
fuzzy_handle_t * fhandle = fuzzy_create_handle((unsigned long long)file_size);
|
||||
sfh_instance_t * fhandle = SFH_instance(0);
|
||||
entropy_handle=entropy_start();
|
||||
while(0==feof(fp))
|
||||
{
|
||||
read_size=fread(read_buff,1,sizeof(read_buff),fp);
|
||||
fuzzy_feed(fhandle,read_buff,read_size,feed_offset);
|
||||
SFH_feed(fhandle,read_buff,read_size,feed_offset);
|
||||
feed_offset+=read_size;
|
||||
entropy_feed(entropy_handle,(const unsigned char*) read_buff, read_size);
|
||||
}
|
||||
file_entropy=entropy_stop(entropy_handle);
|
||||
hash_length = fuzzy_status(fhandle, HASH_LENGTH);
|
||||
file_effective_length = fuzzy_status(fhandle, EFFECTIVE_LENGTH);
|
||||
hash_length = SFH_status(fhandle, HASH_LENGTH);
|
||||
digest_result_buff= (char *)malloc(sizeof(char) * (hash_length));
|
||||
if(fuzzy_digest(fhandle, digest_result_buff, hash_length) < 0)
|
||||
{
|
||||
printf("error\n");
|
||||
continue;
|
||||
}
|
||||
fprintf(result_fp, "%u\t%s\t%llu\t%lu\t%lf\n", file_id, file->d_name,file_size, hash_length,file_entropy);
|
||||
fprintf(result_fp, "%s\n", digest_result_buff);
|
||||
printf("%u %s\n", file_id,file->d_name);
|
||||
file_id++;
|
||||
fuzzy_destroy_handle(fhandle);
|
||||
fclose(fp);
|
||||
SFH_digest(fhandle, digest_result_buff, hash_length);
|
||||
printf("%s %u %lf %s\n",path,digest_fstat.st_size,file_entropy,digest_result_buff);
|
||||
SFH_release(fhandle);
|
||||
free(digest_result_buff);
|
||||
fclose(fp);
|
||||
}
|
||||
fclose(result_fp);
|
||||
closedir(dir);
|
||||
printf("write result to %s\n", result_file);
|
||||
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char * argv[])
|
||||
{
|
||||
dir_digest(argc, argv);
|
||||
//overlap_test(argc, argv);
|
||||
char path[256];
|
||||
if(argc == 2)
|
||||
{
|
||||
hash_file(argv[1]);
|
||||
}
|
||||
else if(NULL!=fgets(path,sizeof(path),stdin))
|
||||
{
|
||||
hash_file(path);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("SFH uasge: ./digest_gen [Dir]\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user