增加新的table_type:文件摘要,根据摘要识别大体积文件。

This commit is contained in:
zhengchao
2015-11-09 16:07:50 +08:00
parent dbaaffd26a
commit 3606a1b1e6
4 changed files with 145 additions and 1 deletions

View File

@@ -21,7 +21,7 @@
#include "rulescan.h" #include "rulescan.h"
#include "UniversalBoolMatch.h" #include "UniversalBoolMatch.h"
int MAAT_FRAME_VERSION_1_2_20151103 int MAAT_FRAME_VERSION_1_2_20151103=1
const char *maat_module="MAAT Frame"; const char *maat_module="MAAT Frame";
const char* CHARSET_STRING[]={"CHARSET_NONE","GBK","BIG5","UNICODE","UTF-8"}; const char* CHARSET_STRING[]={"CHARSET_NONE","GBK","BIG5","UNICODE","UTF-8"};

View File

@@ -54,6 +54,7 @@ enum MAAT_TABLE_TYPE
TABLE_TYPE_COMPILE, TABLE_TYPE_COMPILE,
TABLE_TYPE_PLUGIN, TABLE_TYPE_PLUGIN,
TABLE_TYPE_INTVAL, TABLE_TYPE_INTVAL,
TABLE_TYPE_DIGEST,
TABLE_TYPE_GROUP TABLE_TYPE_GROUP
}; };

View File

@@ -0,0 +1,59 @@
#ifndef _GREAT_INDEX_ENGINE_
#define _GREAT_INDEX_ENGINE_
#ifdef __cplusplus
extern "C" {
#endif
#define GIE_INSERT_OPT 0
#define GIE_DELETE_OPT 1
typedef struct
{
/* data */
}GIE_handle_t;
typedef struct
{
unsigned int id;
unsigned long long orilen;
char * fh;
void * tag;
}fuzzy_digest_t;
typedef struct
{
unsigned int id;
int confidence_level;
unsigned long long orilen;
void * tag;
}result_t;
typedef struct
{
unsigned long long precision;
int confidence_level_threshold;
double query_accuracy;
}GIE_create_para_t;
GIE_handle_t * GIE_create(GIE_create_para_t * usrpara);
int GIE_update(GIE_handle_t * handle, fuzzy_digest_t ** digests, int size, int opration);
int GIE_query(GIE_handle_t * handle, unsigned long long orilen, char * fuzzy_string, int strsize, result_t * results, int size);
void GIE_destory(GIE_handle_t * handle);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,84 @@
#ifndef _MESA_FUZZY_
#define _MESA_FUZZY_
/*
* Copyright (C) MESA 2015
*
* These functions allow a programmer to compute the fuzzy hashes
* (also called the context-triggered piecewise hashes) of
* buffer[s] of text.
*
* See also:
* ssdeep, and
* Identifying almost identical files using context triggered piecewise hashing
*
*/
#include <stdint.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
#define TOTAL_LENGTH 0
#define EFFECTIVE_LENGTH 1
#define HASH_LENGTH 2
// typedef fuzzy_handle_t void*;
typedef struct
{
}fuzzy_handle_t;
/**
* create a fuzzy hash handle and return it.
* @return [handle]
*/
fuzzy_handle_t * fuzzy_create_handle(void);
/**
* destroy context by a fuzzy hash handle.
* @param handle [handle]
*/
void fuzzy_destroy_handle(fuzzy_handle_t * handle);
/**
* Feed the function your data.
* Call this function several times, if you have several parts of data to feed.
* @param handle [handle]
* @param data [data that you want to fuzzy_hash]
* @param size [data size]
* @param offset [offset]
* @return [return effective data length]
*/
uint fuzzy_feed(fuzzy_handle_t * handle, const char* data, uint size, unsigned long offset);
/**
* Obtain the fuzzy hash values.
* @param handle [handle]
* @param result [fuzzy hash result]
* Fuzzy hash result with offsets(in the square brackets, with colon splitted).
* eg. abc[1:100]def[200:300]
* @param size [@result size]
* @return [return zero on success, non-zero on error]
*/
int fuzzy_digest(fuzzy_handle_t * handle, char* result, uint size);
/**
* Obtain certain length of fuzzy hash status.
* @param handle [handle]
* @param type [length type]
* TOTAL_LENGTH:Total length of data you have fed.
* Overlapped data will NOT count for 2 times.
* EFFECTIVE_LENGTH:Length of data that involved in the calculation of hash.
* HASH_LENGTH:Hash result length.
* @return [length value]
*/
unsigned long fuzzy_status(fuzzy_handle_t * handle, int type);
#ifdef __cplusplus
}
#endif
#endif