增加新的table_type:文件摘要,根据摘要识别大体积文件。
This commit is contained in:
@@ -21,7 +21,7 @@
|
|||||||
#include "rulescan.h"
|
#include "rulescan.h"
|
||||||
#include "UniversalBoolMatch.h"
|
#include "UniversalBoolMatch.h"
|
||||||
|
|
||||||
int MAAT_FRAME_VERSION_1_2_20151103
|
int MAAT_FRAME_VERSION_1_2_20151103=1
|
||||||
const char *maat_module="MAAT Frame";
|
const char *maat_module="MAAT Frame";
|
||||||
const char* CHARSET_STRING[]={"CHARSET_NONE","GBK","BIG5","UNICODE","UTF-8"};
|
const char* CHARSET_STRING[]={"CHARSET_NONE","GBK","BIG5","UNICODE","UTF-8"};
|
||||||
|
|
||||||
|
|||||||
@@ -54,6 +54,7 @@ enum MAAT_TABLE_TYPE
|
|||||||
TABLE_TYPE_COMPILE,
|
TABLE_TYPE_COMPILE,
|
||||||
TABLE_TYPE_PLUGIN,
|
TABLE_TYPE_PLUGIN,
|
||||||
TABLE_TYPE_INTVAL,
|
TABLE_TYPE_INTVAL,
|
||||||
|
TABLE_TYPE_DIGEST,
|
||||||
TABLE_TYPE_GROUP
|
TABLE_TYPE_GROUP
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|||||||
59
src/inc_internal/great_index_engine.h
Normal file
59
src/inc_internal/great_index_engine.h
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
#ifndef _GREAT_INDEX_ENGINE_
|
||||||
|
#define _GREAT_INDEX_ENGINE_
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define GIE_INSERT_OPT 0
|
||||||
|
#define GIE_DELETE_OPT 1
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
/* data */
|
||||||
|
}GIE_handle_t;
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
unsigned int id;
|
||||||
|
unsigned long long orilen;
|
||||||
|
char * fh;
|
||||||
|
void * tag;
|
||||||
|
}fuzzy_digest_t;
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
unsigned int id;
|
||||||
|
int confidence_level;
|
||||||
|
unsigned long long orilen;
|
||||||
|
void * tag;
|
||||||
|
}result_t;
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
unsigned long long precision;
|
||||||
|
int confidence_level_threshold;
|
||||||
|
double query_accuracy;
|
||||||
|
}GIE_create_para_t;
|
||||||
|
|
||||||
|
|
||||||
|
GIE_handle_t * GIE_create(GIE_create_para_t * usrpara);
|
||||||
|
|
||||||
|
|
||||||
|
int GIE_update(GIE_handle_t * handle, fuzzy_digest_t ** digests, int size, int opration);
|
||||||
|
|
||||||
|
|
||||||
|
int GIE_query(GIE_handle_t * handle, unsigned long long orilen, char * fuzzy_string, int strsize, result_t * results, int size);
|
||||||
|
|
||||||
|
|
||||||
|
void GIE_destory(GIE_handle_t * handle);
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
84
src/inc_internal/mesa_fuzzy.h
Normal file
84
src/inc_internal/mesa_fuzzy.h
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
#ifndef _MESA_FUZZY_
|
||||||
|
#define _MESA_FUZZY_
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (C) MESA 2015
|
||||||
|
*
|
||||||
|
* These functions allow a programmer to compute the fuzzy hashes
|
||||||
|
* (also called the context-triggered piecewise hashes) of
|
||||||
|
* buffer[s] of text.
|
||||||
|
*
|
||||||
|
* See also:
|
||||||
|
* ssdeep, and
|
||||||
|
* Identifying almost identical files using context triggered piecewise hashing
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define TOTAL_LENGTH 0
|
||||||
|
#define EFFECTIVE_LENGTH 1
|
||||||
|
#define HASH_LENGTH 2
|
||||||
|
|
||||||
|
// typedef fuzzy_handle_t void*;
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
}fuzzy_handle_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create a fuzzy hash handle and return it.
|
||||||
|
* @return [handle]
|
||||||
|
*/
|
||||||
|
fuzzy_handle_t * fuzzy_create_handle(void);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* destroy context by a fuzzy hash handle.
|
||||||
|
* @param handle [handle]
|
||||||
|
*/
|
||||||
|
void fuzzy_destroy_handle(fuzzy_handle_t * handle);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Feed the function your data.
|
||||||
|
* Call this function several times, if you have several parts of data to feed.
|
||||||
|
* @param handle [handle]
|
||||||
|
* @param data [data that you want to fuzzy_hash]
|
||||||
|
* @param size [data size]
|
||||||
|
* @param offset [offset]
|
||||||
|
* @return [return effective data length]
|
||||||
|
*/
|
||||||
|
uint fuzzy_feed(fuzzy_handle_t * handle, const char* data, uint size, unsigned long offset);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Obtain the fuzzy hash values.
|
||||||
|
* @param handle [handle]
|
||||||
|
* @param result [fuzzy hash result]
|
||||||
|
* Fuzzy hash result with offsets(in the square brackets, with colon splitted).
|
||||||
|
* eg. abc[1:100]def[200:300]
|
||||||
|
* @param size [@result size]
|
||||||
|
* @return [return zero on success, non-zero on error]
|
||||||
|
*/
|
||||||
|
int fuzzy_digest(fuzzy_handle_t * handle, char* result, uint size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Obtain certain length of fuzzy hash status.
|
||||||
|
* @param handle [handle]
|
||||||
|
* @param type [length type]
|
||||||
|
* TOTAL_LENGTH:Total length of data you have fed.
|
||||||
|
* Overlapped data will NOT count for 2 times.
|
||||||
|
* EFFECTIVE_LENGTH:Length of data that involved in the calculation of hash.
|
||||||
|
* HASH_LENGTH:Hash result length.
|
||||||
|
* @return [length value]
|
||||||
|
*/
|
||||||
|
unsigned long fuzzy_status(fuzzy_handle_t * handle, int type);
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
Reference in New Issue
Block a user