1、调整SFH的函数名和源文件名,原有使用sfh的用户会受到影响;2、digest_gen由目录遍历,改为单文件并在屏幕输出结果,便于通过命令行调用。

This commit is contained in:
zhengchao
2017-07-08 19:23:17 +08:00
parent 5ba84a69f1
commit 6ffc3e3ded
7 changed files with 78 additions and 115 deletions

View File

@@ -25,22 +25,22 @@ extern "C" {
#define EFFECTIVE_LENGTH 1 #define EFFECTIVE_LENGTH 1
#define HASH_LENGTH 2 #define HASH_LENGTH 2
// typedef fuzzy_handle_t void*; // typedef sfh_instance_t void*;
typedef struct typedef struct
{ {
}fuzzy_handle_t; }sfh_instance_t;
/** /**
* create a fuzzy hash handle and return it. * create a fuzzy hash handle and return it.
* @return [handle] * @return [handle]
*/ */
fuzzy_handle_t * fuzzy_create_handle(unsigned long long origin_len); sfh_instance_t * SFH_instance(unsigned long long origin_len);
/** /**
* destroy context by a fuzzy hash handle. * destroy context by a fuzzy hash handle.
* @param handle [handle] * @param handle [handle]
*/ */
void fuzzy_destroy_handle(fuzzy_handle_t * handle); void SFH_release(sfh_instance_t * handle);
/** /**
* Feed the function your data. * Feed the function your data.
@@ -51,7 +51,7 @@ void fuzzy_destroy_handle(fuzzy_handle_t * handle);
* @param offset [offset] * @param offset [offset]
* @return [return effective data length in current feed] * @return [return effective data length in current feed]
*/ */
unsigned int fuzzy_feed(fuzzy_handle_t * handle, const char* data, unsigned int size, unsigned long long offset); unsigned int SFH_feed(sfh_instance_t * handle, const char* data, unsigned int size, unsigned long long offset);
/** /**
* Obtain the fuzzy hash values. * Obtain the fuzzy hash values.
@@ -62,7 +62,7 @@ unsigned int fuzzy_feed(fuzzy_handle_t * handle, const char* data, unsigned int
* @param size [@result size] * @param size [@result size]
* @return [return zero on success, non-zero on error] * @return [return zero on success, non-zero on error]
*/ */
int fuzzy_digest(fuzzy_handle_t * handle, char* result, unsigned int size); int SFH_digest(sfh_instance_t * handle, char* result, unsigned int size);
/** /**
* Obtain certain length of fuzzy hash status. * Obtain certain length of fuzzy hash status.
@@ -74,7 +74,7 @@ int fuzzy_digest(fuzzy_handle_t * handle, char* result, unsigned int size);
* HASH_LENGTH:Hash result length. * HASH_LENGTH:Hash result length.
* @return [length value] * @return [length value]
*/ */
unsigned long long fuzzy_status(fuzzy_handle_t * handle, int type); unsigned long long SFH_status(sfh_instance_t * handle, int type);
#ifdef __cplusplus #ifdef __cplusplus

View File

@@ -1500,7 +1500,7 @@ stream_para_t Maat_stream_scan_digest_start(Maat_feather_t feather,int table_id,
{ {
struct _Maat_feather_t* _feather=(_Maat_feather_t*)feather; struct _Maat_feather_t* _feather=(_Maat_feather_t*)feather;
struct _Maat_scanner_t* scanner=NULL; struct _Maat_scanner_t* scanner=NULL;
fuzzy_handle_t * tmp_fuzzy_handle=NULL; sfh_instance_t * tmp_fuzzy_handle=NULL;
struct _Maat_table_info_t *p_table=NULL; struct _Maat_table_info_t *p_table=NULL;
p_table=acqurie_table(_feather, table_id, TABLE_TYPE_DIGEST); p_table=acqurie_table(_feather, table_id, TABLE_TYPE_DIGEST);
if(p_table==NULL) if(p_table==NULL)
@@ -1508,7 +1508,7 @@ stream_para_t Maat_stream_scan_digest_start(Maat_feather_t feather,int table_id,
_feather->scan_err_cnt++; _feather->scan_err_cnt++;
return NULL; return NULL;
} }
tmp_fuzzy_handle=fuzzy_create_handle(total_len); tmp_fuzzy_handle=SFH_instance(total_len);
if(tmp_fuzzy_handle==NULL) if(tmp_fuzzy_handle==NULL)
{ {
_feather->scan_err_cnt++; _feather->scan_err_cnt++;
@@ -1584,7 +1584,7 @@ int Maat_stream_scan_digest(stream_para_t * stream_para, const char * data, int
} }
aligment_int64_array_add(sp->feather->thread_call_cnt, sp->thread_num, 1); aligment_int64_array_add(sp->feather->thread_call_cnt, sp->thread_num, 1);
pthread_mutex_lock(&(sp->fuzzy_mutex)); pthread_mutex_lock(&(sp->fuzzy_mutex));
sp->acc_scan_len+=fuzzy_feed(sp->fuzzy_hash_handle, data, (unsigned int)data_len,offset); sp->acc_scan_len+=SFH_feed(sp->fuzzy_hash_handle, data, (unsigned int)data_len,offset);
pthread_mutex_unlock(&(sp->fuzzy_mutex)); pthread_mutex_unlock(&(sp->fuzzy_mutex));
do_query=REACH_QUERY_THRESH(sp->total_len, sp->acc_scan_len, sp->query_point,8); do_query=REACH_QUERY_THRESH(sp->total_len, sp->acc_scan_len, sp->query_point,8);
if(do_query==0) if(do_query==0)
@@ -1592,7 +1592,7 @@ int Maat_stream_scan_digest(stream_para_t * stream_para, const char * data, int
goto fast_out; goto fast_out;
} }
pthread_mutex_lock(&(sp->fuzzy_mutex)); pthread_mutex_lock(&(sp->fuzzy_mutex));
digest_len=fuzzy_status(sp->fuzzy_hash_handle, HASH_LENGTH); digest_len=SFH_status(sp->fuzzy_hash_handle, HASH_LENGTH);
pthread_mutex_unlock(&(sp->fuzzy_mutex)); pthread_mutex_unlock(&(sp->fuzzy_mutex));
if(digest_len==0) if(digest_len==0)
{ {
@@ -1600,7 +1600,7 @@ int Maat_stream_scan_digest(stream_para_t * stream_para, const char * data, int
} }
digest_buff=(char*)malloc(sizeof(char)*digest_len); digest_buff=(char*)malloc(sizeof(char)*digest_len);
pthread_mutex_lock(&(sp->fuzzy_mutex)); pthread_mutex_lock(&(sp->fuzzy_mutex));
fuzzy_digest(sp->fuzzy_hash_handle,digest_buff, digest_len); SFH_digest(sp->fuzzy_hash_handle,digest_buff, digest_len);
pthread_mutex_unlock(&(sp->fuzzy_mutex)); pthread_mutex_unlock(&(sp->fuzzy_mutex));
if(GIE_handle!=NULL) if(GIE_handle!=NULL)
@@ -1661,7 +1661,7 @@ void Maat_stream_scan_digest_end(stream_para_t* stream_para)
DEC_SCANNER_REF(scanner, sp->thread_num); DEC_SCANNER_REF(scanner, sp->thread_num);
} }
} }
fuzzy_destroy_handle(sp->fuzzy_hash_handle); SFH_release(sp->fuzzy_hash_handle);
pthread_mutex_destroy(&(sp->fuzzy_mutex)); pthread_mutex_destroy(&(sp->fuzzy_mutex));
assert(sp->last_cache==NULL); assert(sp->last_cache==NULL);
assert(sp->scan_buff==NULL); assert(sp->scan_buff==NULL);

View File

@@ -12,7 +12,7 @@
#include "rulescan.h" #include "rulescan.h"
#include "hiredis.h" #include "hiredis.h"
#include "mesa_fuzzy.h" #include "stream_fuzzy_hash.h"
#include "gram_index_engine.h" #include "gram_index_engine.h"
#include "aligment_int64.h" #include "aligment_int64.h"
#include <pthread.h> #include <pthread.h>
@@ -314,7 +314,7 @@ struct _stream_para_t
void* rs_stream_para; void* rs_stream_para;
long acc_scan_len; long acc_scan_len;
unsigned long long total_len; unsigned long long total_len;
fuzzy_handle_t *fuzzy_hash_handle; sfh_instance_t *fuzzy_hash_handle;
pthread_mutex_t fuzzy_mutex; pthread_mutex_t fuzzy_mutex;
unsigned char query_point[8]; unsigned char query_point[8];
}; };

View File

@@ -19,7 +19,7 @@ LIBMAAT = libmaatframe.a
LIBMAAT_SO = libmaatframe.so LIBMAAT_SO = libmaatframe.so
OBJS=config_monitor.o Maat_rule.o Maat_api.o Maat_command.o Maat_stat.o UniversalBoolMatch.o dynamic_array.o\ OBJS=config_monitor.o Maat_rule.o Maat_api.o Maat_command.o Maat_stat.o UniversalBoolMatch.o dynamic_array.o\
cJSON.o json2iris.o map_str2int.o interval_index.o gram_index_engine.o mesa_fuzzy.o rbtree.o cJSON.o json2iris.o map_str2int.o interval_index.o gram_index_engine.o stream_fuzzy_hash.o rbtree.o
.c.o: .c.o:
$(CC) -c $(CFLAGS) -I. $(H_DIR) $< $(CC) -c $(CFLAGS) -I. $(H_DIR) $<

View File

@@ -1,6 +1,6 @@
#include<zt_hash.h> #include "zt_hash.h"
#include<interval_index.h> #include "interval_index.h"
#include<mesa_fuzzy.h> #include "stream_fuzzy_hash.h"
#ifndef __SFH_INTERNAL_H_INCLUDE_ #ifndef __SFH_INTERNAL_H_INCLUDE_
#define __SFH_INTERNAL_H_INCLUDE_ #define __SFH_INTERNAL_H_INCLUDE_
@@ -106,6 +106,5 @@ unsigned int segment_overlap(fuzzy_handle_inner_t * handle, unsigned int size, u
void sfh_tune_callback(IVI_seg_t * seg, void * user_para); void sfh_tune_callback(IVI_seg_t * seg, void * user_para);
void sfh_output_callback(IVI_seg_t * seg, void * user_para); void sfh_output_callback(IVI_seg_t * seg, void * user_para);
void fuzzy_hash_length(IVI_seg_t * seg, void * user_para); void fuzzy_hash_length(IVI_seg_t * seg, void * user_para);
unsigned long long fuzzy_status(fuzzy_handle_t * handle, int type);
#endif #endif

View File

@@ -72,7 +72,7 @@ static inline unsigned int sum_hash(unsigned char c, unsigned int h)
/** /**
* handle * handle
*/ */
fuzzy_handle_t * fuzzy_create_handle(unsigned long long origin_len) sfh_instance_t * SFH_instance(unsigned long long origin_len)
{ {
fuzzy_handle_inner_t * handle = NULL; fuzzy_handle_inner_t * handle = NULL;
unsigned long long tmp_blksize = 0; unsigned long long tmp_blksize = 0;
@@ -94,7 +94,7 @@ fuzzy_handle_t * fuzzy_create_handle(unsigned long long origin_len)
//handle->blocksize=tmp_blksize; //handle->blocksize=tmp_blksize;
handle->blocksize = 3; handle->blocksize = 3;
handle->do_tune=1; handle->do_tune=1;
return (fuzzy_handle_t *)handle; return (sfh_instance_t *)handle;
} }
@@ -110,7 +110,7 @@ void fuzzy_node_free(IVI_seg_t * seg, void * usr_para)
} }
void fuzzy_destroy_handle(fuzzy_handle_t * handle) void SFH_release(sfh_instance_t * handle)
{ {
IVI_destroy(((fuzzy_handle_inner_t *)handle)->ivi, fuzzy_node_free, (void *)handle); IVI_destroy(((fuzzy_handle_inner_t *)handle)->ivi, fuzzy_node_free, (void *)handle);
((fuzzy_handle_inner_t *)handle)->fuzzy_node_memory -= sizeof(fuzzy_handle_inner_t); ((fuzzy_handle_inner_t *)handle)->fuzzy_node_memory -= sizeof(fuzzy_handle_inner_t);
@@ -118,7 +118,7 @@ void fuzzy_destroy_handle(fuzzy_handle_t * handle)
return; return;
} }
unsigned int fuzzy_feed(fuzzy_handle_t * handle, const char * data, unsigned int size, unsigned long long offset) unsigned int SFH_feed(sfh_instance_t * handle, const char * data, unsigned int size, unsigned long long offset)
{ {
fuzzy_handle_inner_t * _handle=(fuzzy_handle_inner_t *)handle; fuzzy_handle_inner_t * _handle=(fuzzy_handle_inner_t *)handle;
if(data == NULL || size == 0) if(data == NULL || size == 0)
@@ -150,7 +150,7 @@ unsigned int fuzzy_feed(fuzzy_handle_t * handle, const char * data, unsigned int
//printf("blocksize after:%llu\n", _handle->blocksize); //printf("blocksize after:%llu\n", _handle->blocksize);
} }
#if 0 #if 0
fuzzy_digest(handle,result, sizeof(result)); SFH_digest(handle,result, sizeof(result));
printf("%llu %s\n",offset,result); printf("%llu %s\n",offset,result);
#endif #endif
return length; return length;
@@ -575,7 +575,7 @@ int sfh_merge_seg(fuzzy_handle_inner_t * _handle, sfh_seg_t * p, sfh_seg_t * n,u
/** /**
* hash_result值result输出abc[1:100]def[200:300] * hash_result值result输出abc[1:100]def[200:300]
*/ */
int fuzzy_digest(fuzzy_handle_t * handle, char * hash_buffer, unsigned int size) int SFH_digest(sfh_instance_t * handle, char * hash_buffer, unsigned int size)
{ {
fuzzy_handle_inner_t* _handle=(fuzzy_handle_inner_t *)handle; fuzzy_handle_inner_t* _handle=(fuzzy_handle_inner_t *)handle;
unsigned int estimate_len=_handle->s_state_cnt+IVI_seg_cnt(_handle->ivi)*24+1; unsigned int estimate_len=_handle->s_state_cnt+IVI_seg_cnt(_handle->ivi)*24+1;
@@ -666,7 +666,7 @@ void sfh_output_callback(IVI_seg_t * seg, void * user_para)
/** /**
* fuzzy_hash的各种长度 * fuzzy_hash的各种长度
*/ */
unsigned long long fuzzy_status(fuzzy_handle_t * handle, int type) unsigned long long SFH_status(sfh_instance_t * handle, int type)
{ {
unsigned long long length; unsigned long long length;
fuzzy_handle_inner_t * _handle = (fuzzy_handle_inner_t *)(handle); fuzzy_handle_inner_t * _handle = (fuzzy_handle_inner_t *)(handle);

View File

@@ -1,12 +1,12 @@
#include<stdio.h> #include <stdio.h>
#include<stdlib.h> #include <stdlib.h>
#include<string.h> #include <string.h>
#include<unistd.h> #include <unistd.h>
#include<dirent.h> #include <dirent.h>
#include<sys/stat.h> #include <sys/stat.h>
#include<time.h> #include <time.h>
#include<math.h> #include <math.h>
#include "mesa_fuzzy.h" #include "stream_fuzzy_hash.h"
void* entropy_start(void) void* entropy_start(void)
{ {
@@ -40,96 +40,60 @@ double entropy_stop(void* handle)
free(handle); free(handle);
return (-sum); return (-sum);
} }
void hash_file(const char* path)
void dir_digest(int argc, char * argv[])
{ {
if(argc != 2)
{
printf("uasge: ./digest_gen [Dir]\n");
exit(-1);
}
DIR * dir;
struct dirent * file;
char * dir_path = argv[1];
char read_buff[1024*4];
unsigned long long read_size=0,feed_offset=0; unsigned long long read_size=0,feed_offset=0;
dir = opendir(dir_path); char read_buff[1024*4];
chdir(dir_path);
int ret =0;
unsigned int file_id = 1;
unsigned long hash_length=0,file_effective_length=0;
FILE * result_fp = NULL,*fp=NULL;
struct stat digest_fstat;
char * digest_result_buff=NULL;
const char* result_file="./digest_result.txt";
result_fp = fopen(result_file,"a");
void * entropy_handle=NULL; void * entropy_handle=NULL;
double file_entropy=0.0; double file_entropy=0.0;
if(NULL == result_fp) int hash_length;
char * digest_result_buff=NULL;
struct stat digest_fstat;
FILE* fp;
stat(path,&digest_fstat);
fp = fopen(path, "r");
if(NULL == fp)
{ {
printf("open file failed!"); printf("Open %s failed\n", path);
exit(-1); return;
} }
while((file = readdir(dir)) != NULL) read_size=0;
feed_offset=0;
sfh_instance_t * fhandle = SFH_instance(0);
entropy_handle=entropy_start();
while(0==feof(fp))
{ {
if(!strcmp(file->d_name, ".") ||!strcmp(file->d_name, "..")) read_size=fread(read_buff,1,sizeof(read_buff),fp);
{ SFH_feed(fhandle,read_buff,read_size,feed_offset);
continue; feed_offset+=read_size;
} entropy_feed(entropy_handle,(const unsigned char*) read_buff, read_size);
ret=stat(file->d_name,&digest_fstat);
if(ret!=0)
{
printf("fstat %s error.\n",file->d_name);
continue;
}
off_t file_size = digest_fstat.st_size;
fp = fopen(file->d_name, "r");
if(NULL == fp)
{
printf("Can't open file %s\n", file->d_name);
continue;
}
read_size=0;
feed_offset=0;
fuzzy_handle_t * fhandle = fuzzy_create_handle((unsigned long long)file_size);
entropy_handle=entropy_start();
while(0==feof(fp))
{
read_size=fread(read_buff,1,sizeof(read_buff),fp);
fuzzy_feed(fhandle,read_buff,read_size,feed_offset);
feed_offset+=read_size;
entropy_feed(entropy_handle,(const unsigned char*) read_buff, read_size);
}
file_entropy=entropy_stop(entropy_handle);
hash_length = fuzzy_status(fhandle, HASH_LENGTH);
file_effective_length = fuzzy_status(fhandle, EFFECTIVE_LENGTH);
digest_result_buff= (char *)malloc(sizeof(char) * (hash_length));
if(fuzzy_digest(fhandle, digest_result_buff, hash_length) < 0)
{
printf("error\n");
continue;
}
fprintf(result_fp, "%u\t%s\t%llu\t%lu\t%lf\n", file_id, file->d_name,file_size, hash_length,file_entropy);
fprintf(result_fp, "%s\n", digest_result_buff);
printf("%u %s\n", file_id,file->d_name);
file_id++;
fuzzy_destroy_handle(fhandle);
fclose(fp);
free(digest_result_buff);
} }
fclose(result_fp); file_entropy=entropy_stop(entropy_handle);
closedir(dir); hash_length = SFH_status(fhandle, HASH_LENGTH);
printf("write result to %s\n", result_file); digest_result_buff= (char *)malloc(sizeof(char) * (hash_length));
SFH_digest(fhandle, digest_result_buff, hash_length);
printf("%s %u %lf %s\n",path,digest_fstat.st_size,file_entropy,digest_result_buff);
SFH_release(fhandle);
free(digest_result_buff);
fclose(fp);
} }
int main(int argc, char * argv[]) int main(int argc, char * argv[])
{ {
dir_digest(argc, argv); char path[256];
//overlap_test(argc, argv); if(argc == 2)
{
hash_file(argv[1]);
}
else if(NULL!=fgets(path,sizeof(path),stdin))
{
hash_file(path);
}
else
{
printf("SFH uasge: ./digest_gen [Dir]\n");
exit(-1);
}
return 0; return 0;
} }