合并新版本mesa_fuzzy(SFH)到maat,更节省内存。
This commit is contained in:
@@ -1,63 +0,0 @@
|
||||
#ifndef _GREAT_INDEX_ENGINE_
|
||||
#define _GREAT_INDEX_ENGINE_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define GIE_INSERT_OPT 0
|
||||
#define GIE_DELETE_OPT 1
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* data */
|
||||
}GIE_handle_t;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned int id;
|
||||
short operation;//GIE_INSERT_OPT or GIE_DELETE_OPT.if operation is GIE_DELETE_OPT, only id is needed;
|
||||
short cfds_lvl;
|
||||
unsigned long long origin_len;
|
||||
char * fuzzy_hash;
|
||||
void * tag;
|
||||
}GIE_digest_t;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned int id;
|
||||
short cfds_lvl;
|
||||
unsigned long long origin_len;
|
||||
void * tag;
|
||||
}GIE_result_t;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned long long index_interval;
|
||||
// int confidence_level_threshold;
|
||||
double query_accuracy;
|
||||
}GIE_create_para_t;
|
||||
|
||||
|
||||
GIE_handle_t * GIE_create(const GIE_create_para_t * para);
|
||||
|
||||
|
||||
int GIE_update(GIE_handle_t * handle, GIE_digest_t ** digests, int size);
|
||||
|
||||
//return actual matched result count
|
||||
//return 0 when matched nothing;
|
||||
//return -1 when error occurs;
|
||||
int GIE_query(GIE_handle_t * handle, unsigned long long origin_len, const char * fuzzy_string, GIE_result_t * results, int result_size);
|
||||
|
||||
|
||||
void GIE_destory(GIE_handle_t * handle);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -1,298 +0,0 @@
|
||||
/************************************************************************
|
||||
* InterVal Index interface
|
||||
* NOTE that:
|
||||
* (1) There are no overlapping intervals in InterVal Index;
|
||||
* (2) Each interval is closed;
|
||||
* (3) The interval supports rollback.
|
||||
*
|
||||
* author: zhengchao@iie.ac.cn tangqi@iie.ac.cn
|
||||
* last modify time: 2015-08-29
|
||||
*************************************************************************/
|
||||
|
||||
#ifndef _INTERVAL_INDEX_H_
|
||||
#define _INTERVAL_INDEX_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include "queue.h"
|
||||
|
||||
#define SIZE_8
|
||||
|
||||
#ifdef SIZE_8
|
||||
typedef unsigned long long OFFSET_TYPE;
|
||||
typedef signed long long S_OFFSET_TYPE;
|
||||
#else
|
||||
typedef unsigned int OFFSET_TYPE;
|
||||
typedef signed int S_OFFSET_TYPE;
|
||||
#endif
|
||||
|
||||
|
||||
typedef struct{
|
||||
}IVI_t;
|
||||
|
||||
|
||||
/**
|
||||
* structure of segment
|
||||
**/
|
||||
typedef struct __IVI_seg_t{
|
||||
OFFSET_TYPE left;
|
||||
OFFSET_TYPE right;
|
||||
void * data;
|
||||
}IVI_seg_t;
|
||||
|
||||
|
||||
typedef void IVI_callback_t(IVI_seg_t * seg, void * usr_para);
|
||||
|
||||
/**
|
||||
* Deal with rollback
|
||||
* Refering to the approach of Linux's kernel to solute tcp seq rollback
|
||||
**/
|
||||
static inline int before(OFFSET_TYPE off1, OFFSET_TYPE off2)
|
||||
{
|
||||
return (S_OFFSET_TYPE)(off1 - off2) < 0;
|
||||
}
|
||||
#define after(off2, off1) before(off1, off2)
|
||||
|
||||
static inline int continuous(OFFSET_TYPE prev, OFFSET_TYPE next)
|
||||
{
|
||||
return ((next - prev) == 1);
|
||||
}
|
||||
|
||||
|
||||
IVI_seg_t * IVI_prev_continuous_seg(IVI_seg_t * seg);
|
||||
IVI_seg_t * IVI_next_continuous_seg(IVI_seg_t * seg);
|
||||
|
||||
|
||||
/**
|
||||
* Relation of two segments
|
||||
**/
|
||||
typedef enum __Relation_t{
|
||||
LEFT_NO_OVERLAP = 1, // |___A___|
|
||||
// |___B___|
|
||||
|
||||
LEFT_OVERLAP, // |___A___|
|
||||
// |___B___|
|
||||
|
||||
CONTAINED, // |___A___|
|
||||
// |_____B_____|
|
||||
|
||||
CONTAIN, // |_____A_____|
|
||||
// |___B___|
|
||||
|
||||
RIGHT_OVERLAP, // |___A___|
|
||||
// |___B___|
|
||||
|
||||
RIGHT_NO_OVERLAP, // |___A___|
|
||||
// |___B___|
|
||||
|
||||
ERROR
|
||||
}Relation_t;
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_relative_position
|
||||
* Description:
|
||||
* Get relative position of given two interval segments
|
||||
* Params:
|
||||
* seg1: Subject of relation
|
||||
* seg2: Object of relation
|
||||
* Relation:
|
||||
* On success, return the relation of two segments with enum;
|
||||
* Else, return ERROR in enum;
|
||||
**/
|
||||
Relation_t IVI_relative_position(IVI_seg_t * seg1, IVI_seg_t * seg2);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_create
|
||||
* Description:
|
||||
* Create an InterVal Index
|
||||
* Params:
|
||||
* void
|
||||
* Return:
|
||||
* Return a handler of this InterVal Index
|
||||
**/
|
||||
IVI_t * IVI_create(void);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_destroy
|
||||
* Description:
|
||||
* Destroy a given InterVal Index's handler
|
||||
* Params:
|
||||
* handler: The InterVal Index you want to destroy
|
||||
* cb: Callback function for user to free data in segement
|
||||
* usr_para: User parameter
|
||||
* Return:
|
||||
* void
|
||||
**/
|
||||
void IVI_destroy(IVI_t * handler, IVI_callback_t cb, void * usr_para);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_seg_malloc
|
||||
* Description:
|
||||
* Malloc a segment with given parameters
|
||||
* Params:
|
||||
* left: Left point of segment
|
||||
* right: Right point of segment
|
||||
* data: User data
|
||||
* Return:
|
||||
* Return a pointer of segment structure.
|
||||
**/
|
||||
IVI_seg_t * IVI_seg_malloc(OFFSET_TYPE left, OFFSET_TYPE right, void * data);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_seg_free
|
||||
* Description:
|
||||
* Free the memory of given segment
|
||||
* Params:
|
||||
* seg: The segment that you want to free
|
||||
* cb: Callback function for user to free *data in seg
|
||||
* usr_para: User parameter for cb
|
||||
* Return:
|
||||
* void
|
||||
**/
|
||||
void IVI_seg_free(IVI_seg_t * seg, IVI_callback_t cb, void * usr_para);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_insert
|
||||
* Description:
|
||||
* Insert a segment to an InterVal Index handler,and the segment
|
||||
* MUST not be overlapped with others in handler.
|
||||
* Params:
|
||||
* handler: The handler of InterVal Index created by IVI_create
|
||||
* seg: A segment that user wants to add. It MUST be created
|
||||
* by IVI_seg_malloc.
|
||||
* Return:
|
||||
* On success, 0 is returned;
|
||||
* Else when overlapp occures or error occures, -1 is returned.
|
||||
**/
|
||||
int IVI_insert(IVI_t * handler, IVI_seg_t * seg);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_remove
|
||||
* Description:
|
||||
* Remove a given segment from given InterVal Index handler.
|
||||
* Params:
|
||||
* handler: The handler of InterVal Index created by IVI_create
|
||||
* seg: A segment that user wants to delete. It MUST be created
|
||||
* by IVI_seg_malloc.
|
||||
* Return:
|
||||
* On success, 0 is returned;
|
||||
* Else when overlapp occures, -1 is returned.
|
||||
**/
|
||||
int IVI_remove(IVI_t * handler, IVI_seg_t * seg);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_query
|
||||
* Description:
|
||||
* Query from given InterVal Index and get the number of segments
|
||||
* which are overlapped with given interval, and store those segments
|
||||
* in the last parameter.
|
||||
* Params:
|
||||
* handler: The handler of interval index created by IVI_create
|
||||
* left: Left point of given interval
|
||||
* right: Right point of given interval
|
||||
* segs: An address of a segment pointer array to store those segments which
|
||||
* are overlapped with given interval. NOTE that user should not malloc
|
||||
* the array, and segs need to be freed by user. The element of *segs
|
||||
* MUST not be freed by user.
|
||||
* Return:
|
||||
* Return the number of segments which are overlapped with given interval
|
||||
**/
|
||||
int IVI_query(IVI_t * handler, OFFSET_TYPE left, OFFSET_TYPE right, IVI_seg_t *** segs);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_query_continuous
|
||||
* Description:
|
||||
* Query from interval index handler and get the number of continous segments
|
||||
* which are overlapped with given interval.
|
||||
* Params:
|
||||
* handler: The handler of InterVal Index created by IVI_create.
|
||||
* left: Left point of given interval
|
||||
* right: Right point of given interval
|
||||
* segs: An address of a segment pointer array to store those segments which
|
||||
* are overlapped with given interval. NOTE that user should not malloc
|
||||
* the array, and segs need to be freed by user. The element of *segs
|
||||
* MUST not be freed by user.
|
||||
* Return:
|
||||
* Return the number of continous segments which are overlapped with given interval
|
||||
**/
|
||||
int IVI_query_continuous(IVI_t * handler, OFFSET_TYPE left, OFFSET_TYPE right, IVI_seg_t *** segs);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_seg_cnt
|
||||
* Description:
|
||||
* Get the count of segments in given interval index handler
|
||||
* Params:
|
||||
* handler: The handler of InterVal Index created by IVI_create.
|
||||
* Return:
|
||||
* Return the count of segments in given interval index handler
|
||||
**/
|
||||
int IVI_seg_cnt(IVI_t * handler);
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_seg_len
|
||||
* Description:
|
||||
* Get the length of whole segments in given interval index handler
|
||||
* Params:
|
||||
* handler: The handler of InterVal Index created by IVI_create.
|
||||
* Return:
|
||||
* Return the length of whole segments in given interval index handler
|
||||
**/
|
||||
OFFSET_TYPE IVI_seg_length(IVI_t * handler);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Name:
|
||||
* IVI_traverse
|
||||
* Description:
|
||||
* Traverse given InterVal Index and execute given callback function
|
||||
* one time for each seg in InterVal Index.
|
||||
* Params:
|
||||
* handler: The handler of InterVal Index created by IVI_create.
|
||||
* IVI_callback_t: Callback function for user to define.
|
||||
* usr_para: Parameter user want to pass to callback function.
|
||||
* Return:
|
||||
* void
|
||||
**/
|
||||
void IVI_traverse(IVI_t * handler, IVI_callback_t cb, void * usr_para);
|
||||
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _INTERVAL_INDEX_H_ */
|
||||
@@ -1,85 +0,0 @@
|
||||
#ifndef _MESA_FUZZY_
|
||||
#define _MESA_FUZZY_
|
||||
|
||||
/*
|
||||
* Copyright (C) MESA 2015
|
||||
*
|
||||
* These functions allow a programmer to compute the fuzzy hashes
|
||||
* (also called the context-triggered piecewise hashes) of
|
||||
* buffer[s] of text.
|
||||
*
|
||||
* See also:
|
||||
* ssdeep, and
|
||||
* Identifying almost identical files using context triggered piecewise hashing
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define TOTAL_LENGTH 0
|
||||
#define EFFECTIVE_LENGTH 1
|
||||
#define HASH_LENGTH 2
|
||||
|
||||
// typedef fuzzy_handle_t void*;
|
||||
typedef struct
|
||||
{
|
||||
}fuzzy_handle_t;
|
||||
|
||||
/**
|
||||
* create a fuzzy hash handle and return it.
|
||||
* @return [handle]
|
||||
*/
|
||||
fuzzy_handle_t * fuzzy_create_handle(unsigned long long origin_len);
|
||||
|
||||
/**
|
||||
* destroy context by a fuzzy hash handle.
|
||||
* @param handle [handle]
|
||||
*/
|
||||
void fuzzy_destroy_handle(fuzzy_handle_t * handle);
|
||||
|
||||
/**
|
||||
* Feed the function your data.
|
||||
* Call this function several times, if you have several parts of data to feed.
|
||||
* @param handle [handle]
|
||||
* @param data [data that you want to fuzzy_hash]
|
||||
* @param size [data size]
|
||||
* @param offset [offset]
|
||||
* @return [return effective data length in current feed]
|
||||
*/
|
||||
unsigned int fuzzy_feed(fuzzy_handle_t * handle, const char* data, unsigned int size, unsigned long long offset);
|
||||
|
||||
/**
|
||||
* Obtain the fuzzy hash values.
|
||||
* @param handle [handle]
|
||||
* @param result [fuzzy hash result]
|
||||
* Fuzzy hash result with offsets(in the square brackets, with colon splitted).
|
||||
* eg. abc[1:100]def[200:300]
|
||||
* @param size [@result size]
|
||||
* @return [return zero on success, non-zero on error]
|
||||
*/
|
||||
int fuzzy_digest(fuzzy_handle_t * handle, char* result, unsigned int size);
|
||||
|
||||
/**
|
||||
* Obtain certain length of fuzzy hash status.
|
||||
* @param handle [handle]
|
||||
* @param type [length type]
|
||||
* TOTAL_LENGTH:Total length of data you have fed.
|
||||
* Overlapped data will NOT count for 2 times.
|
||||
* EFFECTIVE_LENGTH:Length of data that involved in the calculation of hash.
|
||||
* HASH_LENGTH:Hash result length.
|
||||
* @return [length value]
|
||||
*/
|
||||
unsigned long long fuzzy_status(fuzzy_handle_t * handle, int type);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user