增加基础组件、rulescan、maat的自动部署

This commit is contained in:
Lu Qiuwen
2019-06-18 21:24:42 +08:00
commit 3cab0a9c99
20 changed files with 1103 additions and 0 deletions

View File

@@ -0,0 +1,191 @@
#ifndef H_MAAT_COMMAND_H_INCLUDE
#define H_MAAT_COMMAND_H_INCLUDE
#ifndef __cplusplus
#error("This file should be compiled with C++ compiler")
#endif
#include "Maat_rule.h"
enum MAAT_OPERATION
{
MAAT_OP_DEL=0,
MAAT_OP_ADD,
MAAT_OP_RENEW_TIMEOUT //Rule expire time is changed to now+cmd->expire_after
};
enum MAAT_GROUP_RELATION
{
PARENT_TYPE_COMPILE=0,
PARENT_TYPE_GROUP
};
enum MAAT_REGION_TYPE
{
REGION_EXPR,
REGION_IP,
REGION_INTERVAL,
REGION_DIGEST,
REGION_SIMILARITY
};
enum MAAT_EXPR_TYPE
{
EXPR_TYPE_STRING=0,
EXPR_TYPE_AND,
EXPR_TYPE_REGEX,
EXPR_TYPE_OFFSET
};
enum MAAT_MATCH_METHOD
{
MATCH_METHOD_SUB=0,
MATCH_METHOD_RIGHT,
MATCH_METHOD_LEFT,
MATCH_METHOD_COMPLETE
};
enum MAAT_CASE_TYPE
{
UNCASE_PLAIN=0,
CASE_HEXBIN,
CASE_PLAIN
};
enum MAAT_ADDR_TYPE
{
ADDR_TYPE_IPv4=4,
ADDR_TYPE_IPv6=6
};
enum MAAT_ADDR_DIRECTION
{
ADDR_DIR_DOUBLE=0,
ADDR_DIR_SINGLE=1
};
struct Maat_rgn_str_t
{
const char *keywords;
const char *district;// optional for expr_plus, otherwise set to NULL.
enum MAAT_EXPR_TYPE expr_type;
enum MAAT_MATCH_METHOD match_method;
enum MAAT_CASE_TYPE hex_bin;
};
struct Maat_rgn_addr_t
{
enum MAAT_ADDR_TYPE addr_type;
const char* src_ip;
const char* mask_src_ip;
const char* dst_ip;
const char* mask_dst_ip;
unsigned short src_port;
unsigned short mask_src_port;
unsigned short dst_port;
unsigned short mask_dst_port;
unsigned short protocol;
enum MAAT_ADDR_DIRECTION direction;
};
struct Maat_rgn_intv_t
{
unsigned int low_boundary;
unsigned int up_boundary;
};
struct Maat_rgn_digest_t
{
unsigned long long orgin_len;
const char* digest_string;
short confidence_degree;
};
struct Maat_rgn_sim_t
{
char* target;
short threshold;// 1~100
};
struct Maat_region_t
{
const char* table_name;
int region_id; //If MAAT_OPT_CMD_AUTO_NUMBERING==1, maat will assigned one. Or users must appoint a unique number.
enum MAAT_REGION_TYPE region_type;
union
{
struct Maat_rgn_str_t expr_rule;
struct Maat_rgn_addr_t ip_rule;
struct Maat_rgn_intv_t interval_rule;
struct Maat_rgn_digest_t digest_rule;
struct Maat_rgn_sim_t similarity_rule;
};
};
struct Maat_group_t
{
const char* table_name;
int group_id; //If MAAT_OPT_CMD_AUTO_NUMBERING==1, maat will assigned one. Or users must assign a unique number.
int parent_id;
int not_flag;
enum MAAT_GROUP_RELATION parent_type;
int region_num;
struct Maat_region_t *regions;
};
struct Maat_cmd_t
{
//This Struct MUST alloced by Maat_create_cmd(), then released by Maat_free_cmd().
struct Maat_rule_t compile; // for MAAT_OP_DEL, only compile.config_id is necessary.
int group_num; // for MAAT_OP_DEL, set to 0.
int expire_after; //expired after $expire_after$ seconds, set to 0 for never timeout.
int label_id; //>0, to be indexed and quried by Maat_cmd_select; =0 not index
struct Maat_group_t* groups;// Add regions with Maat_add_region2cmd
};
struct Maat_line_t
{
const char* table_name;
const char* table_line;
int rule_id; // for MAAT_OP_DEL, only rule_id and table_name are necessary.
int label_id;
int expire_after; //expired after $timeout$ seconds, set to 0 for never timeout.
};
struct Maat_cmd_t* Maat_create_cmd(const struct Maat_rule_t* rule, int group_num);
int Maat_cmd_set_opt(struct Maat_cmd_t* cmd, enum MAAT_RULE_OPT type, const char* val, int size);
//input: which_group 0~group_num
//input: region can be freed after added.
void Maat_add_region2cmd(struct Maat_cmd_t* cmd,int which_group,const struct Maat_region_t* region);
void Maat_free_cmd(struct Maat_cmd_t* cmd);
int Maat_format_cmd(struct Maat_cmd_t* cmd, char* buffer, int size);
//Input string of REGION_EXPR and REGION_SIMILARITY need to be escapeed.
char* Maat_str_escape(char* dst,int size,const char*src);
//Deletion failed due to not complete synchronize with Redis.
//To make sure the delete command is excecuted, user should try again after MAAT_OPT_SCANDIR_INTERVAL_MS ms.
//Returns number of successfully updated rule.
//The following functions are NOT thread safe.
int Maat_cmd(Maat_feather_t feather,struct Maat_cmd_t* cmd,enum MAAT_OPERATION op);
//pipeline model
int Maat_cmd_append(Maat_feather_t feather,struct Maat_cmd_t* cmd,enum MAAT_OPERATION op);
//Return number of successfully updated rule.
//Return -1 for failed.
int Maat_cmd_commit(Maat_feather_t feather);
int Maat_cmd_set_group(Maat_feather_t feather, int group_id, const struct Maat_region_t* region, enum MAAT_OPERATION op);
//Returns number of successfully updated rule.
//Return -1 for failed.
int Maat_cmd_set_line(Maat_feather_t feather,const struct Maat_line_t* line_rule, enum MAAT_OPERATION op);
int Maat_cmd_set_lines(Maat_feather_t feather,const struct Maat_line_t** line_rule, int line_num ,enum MAAT_OPERATION op);
int Maat_cmd_set_file(Maat_feather_t feather,const char* key, const char* value, size_t size, enum MAAT_OPERATION op);
//Return the value of key after the increment.
//If the key does not exist, it is set to 0 before performing the operation.
long long Maat_cmd_incrby(Maat_feather_t feather,const char* key, int increment);
struct Maat_cmd_key
{
char* table_name;
int rule_id;
};
void Maat_cmd_key_free(struct Maat_cmd_key**keys, int number);
int Maat_cmd_key_select(Maat_feather_t feather, int label_id, struct Maat_cmd_key** keys);
int Maat_cmd_select(Maat_feather_t feather, int label_id, int * output_ids, unsigned int size);
int Maat_cmd_flushDB(Maat_feather_t feather);
int Maat_command_raw_set_compile(Maat_feather_t feather, enum MAAT_OPERATION op, const struct Maat_rule_t* compile, const char* table_name, const char * huge_service_defined, int group_num);
int Maat_command_raw_set_region(Maat_feather_t feather, enum MAAT_OPERATION op, const struct Maat_region_t* region, int group_id);
int Maat_command_raw_set_group(Maat_feather_t feather, enum MAAT_OPERATION op, const struct Maat_group_t* group);
int Maat_cmd_get_new_group_id(Maat_feather_t feather);
int Maat_cmd_get_new_region_id(Maat_feather_t feather);
#endif

View File

@@ -0,0 +1,291 @@
/*
*****************Maat Deep Packet Inspection Policy Framework********
* Maat is the Goddess of truth and justice in ancient Egyptian concept.
* Her feather was the measure that determined whether the souls (considered
* to reside in the heart) of the departed would reach the paradise of afterlife
* successfully.
* Author: zhengchao@iie.ac.cn, MESA
* Version 2018-12-07 Plugin Extra Data.
* NOTE: MUST compile with G++
* All right reserved by Institute of Infomation Engineering,Chinese Academic of Science 2014~2018
*********************************************************
*/
#ifndef H_MAAT_RULE_H_INCLUDE
#define H_MAAT_RULE_H_INCLUDE
#ifndef __cplusplus
#error("This file should be compiled with C++ compiler")
#endif
#include <MESA/stream.h>
enum MAAT_CHARSET
{
CHARSET_NONE=0,
CHARSET_GBK,
CHARSET_BIG5,
CHARSET_UNICODE,
CHARSET_UTF8, // 4
CHARSET_BIN, //5
CHARSET_UNICODE_ASCII_ESC, // Unicode Escape format, prefix backslash-u hex, e.g. "\u627;"
CHARSET_UNICODE_ASCII_ALIGNED,//Unicode Escape format, prefix backslash-u with 4 bytes aligned, e.g. "\u0627"
CHARSET_UNICODE_NCR_DEC, //SGML Numeric character reference,decimal base, e.g. "&#1575;"
CHARSET_UNICODE_NCR_HEX, //SGML Numeric character reference,hexdecimal base, e.g. "&#x627;"
CHARSET_URL_ENCODE_GB2312, //URL encode with GB2312, e.g. the chinese word "china" was encoded to %D6%D0%B9%FA
CHARSET_URL_ENCODE_UTF8 //11, URL encode with UTF8,e.g. the chinese word "china" was encoded to %E4%B8%AD%E5%9B%BD
};
enum MAAT_ACTION
{
MAAT_ACTION_BLOCK=0,
MAAT_ACTION_MONIT,
MAAT_ACTION_WHITE
};
enum MAAT_POS_TYPE
{
MAAT_POSTYPE_EXPR=0,
MAAT_POSTYPE_REGEX
};
typedef void* scan_status_t;
typedef void* stream_para_t;
typedef void* Maat_feather_t;
#define MAX_SERVICE_DEFINE_LEN 128
#define MAX_HUGE_SERVICE_DEFINE_LEN (1024*4)
struct Maat_rule_t
{
int config_id;
int service_id;
char do_log;
char do_blacklist;
char action;
char reserved;
int serv_def_len;
char service_defined[MAX_SERVICE_DEFINE_LEN];
};
#define MAAT_RULE_UPDATE_TYPE_FULL 1
#define MAAT_RULE_UPDATE_TYPE_INC 2
typedef void Maat_start_callback_t(int update_type,void* u_para);
typedef void Maat_update_callback_t(int table_id,const char* table_line,void* u_para);
typedef void Maat_finish_callback_t(void* u_para);
//--------------------HITTING DETAIL DESCRIPTION BEGIN
#define MAAT_MAX_HIT_RULE_NUM 8
#define MAAT_MAX_EXPR_ITEM_NUM 8
#define MAAT_MAX_HIT_POS_NUM 8
#define MAAT_MAX_REGEX_GROUP_NUM 8
//NOTE position buffer as hitting_regex_pos and hit_pos,are ONLY valid before next scan or Maat_stream_scan_string_end
struct regex_pos_t
{
int group_num;
int hitting_regex_len;
const char* hitting_regex_pos;
int grouping_len[MAAT_MAX_REGEX_GROUP_NUM];
const char* grouping_pos[MAAT_MAX_REGEX_GROUP_NUM];
};
struct str_pos_t
{
int hit_len;
const char* hit_pos;
};
struct sub_item_pos_t
{
enum MAAT_POS_TYPE ruletype;
int hit_cnt;
union
{
struct regex_pos_t regex_pos[MAAT_MAX_HIT_POS_NUM];
struct str_pos_t substr_pos[MAAT_MAX_HIT_POS_NUM];
};
};
struct Maat_region_pos_t
{
int region_id;
int sub_item_num;
struct sub_item_pos_t sub_item_pos[MAAT_MAX_EXPR_ITEM_NUM];
};
struct Maat_hit_detail_t
{
int config_id;//set <0 if half hit;
int hit_region_cnt;
struct Maat_region_pos_t region_pos[MAAT_MAX_HIT_RULE_NUM];
};
//--------------------HITTING DETAIL DESCRIPTION END
//Abondon interface ,left for compatible.
Maat_feather_t Maat_summon_feather(int max_thread_num,
const char* table_info_path,
const char* ful_cfg_dir,
const char* inc_cfg_dir,
void*logger);//MESA_handle_logger
//Abondon interface ,left for compatible.
Maat_feather_t Maat_summon_feather_json(int max_thread_num,
const char* table_info_path,
const char* json_rule,
void* logger);
Maat_feather_t Maat_feather(int max_thread_num,const char* table_info_path,void* logger);
int Maat_initiate_feather(Maat_feather_t feather);
enum MAAT_INIT_OPT
{
MAAT_OPT_SCANDIR_INTERVAL_MS=1, //VALUE is interger, SIZE=sizeof(int). DEFAULT:1,000 milliseconds.
MAAT_OPT_EFFECT_INVERVAL_MS, //VALUE is interger, SIZE=sizeof(int). DEFAULT:60,000 milliseconds.
MAAT_OPT_FULL_CFG_DIR, //VALUE is a const char*, MUST end with '\0', SIZE= strlen(string+'\0')+1.DEFAULT: no default.
MAAT_OPT_INC_CFG_DIR, //VALUE is a const char*, MUST end with '\0', SIZE= strlen(string+'\0')+1.DEFAULT: no default.
MAAT_OPT_JSON_FILE_PATH, //VALUE is a const char*, MUST end with '\0', SIZE= strlen(string+'\0')+1.DEFAULT: no default.
MAAT_OPT_STAT_ON, //VALUE is NULL, SIZE is 0. MAAT_OPT_STAT_FILE_PATH must be set. Default: stat OFF.
MAAT_OPT_PERF_ON, //VALUE is NULL, SIZE is 0. MAAT_OPT_STAT_FILE_PATH must be set. Default: stat OFF.
MAAT_OPT_STAT_FILE_PATH, //VALUE is a const char*, MUST end with '\0', SIZE= strlen(string+'\0')+1. DEFAULT: no default.
MAAT_OPT_SCAN_DETAIL, //VALUE is interger *, SIZE=sizeof(int). 0: not return any detail;1: return hit pos, not include regex grouping.
// 2 return hit pos and regex grouping pos;DEFAULT:0
MAAT_OPT_INSTANCE_NAME, //VALUE is a const char*, MUST end with '\0', SIZE= strlen(string+'\0')+1, no more than 11 bytes.DEFAULT: MAAT_$tableinfo_path$.
MAAT_OPT_DECRYPT_KEY, //VALUE is a const char*, MUST end with '\0', SIZE= strlen(string+'\0')+1. No DEFAULT.
MAAT_OPT_REDIS_IP, //VALUE is a const char*, MUST end with '\0', SIZE= strlen(string+'\0')+1. No DEFAULT.
MAAT_OPT_REDIS_PORT, //VALUE is a unsigned short or a signed int, host order, SIZE= sizeof(unsigned short) or sizeof(int). No DEFAULT.
MAAT_OPT_REDIS_INDEX, //VALUE is interger *, 0~15, SIZE=sizeof(int). DEFAULT: 0.
MAAT_OPT_CMD_AUTO_NUMBERING, //VALUE is a interger *, 1 or 0, SIZE=sizeof(int). DEFAULT: 1.
MAAT_OPT_DEFERRED_LOAD, //VALUE is NULL,SIZE is 0. Default: Deffered initialization OFF.
MAAT_OPT_CUMULATIVE_UPDATE_OFF, //VALUE is NULL,SIZE is 0. Default: CUMMULATIVE UPDATE ON.
MAAT_OPT_LOAD_VERSION_FROM, //VALUE is a long long, SIZE=sizeof(long long). Default: Load the Latest. Only valid in redis mode, and maybe failed for too old.
//This option also disables background update.
MAAT_OPT_ENABLE_UPDATE, //VALUE is interger, SIZE=sizeof(int). 1: Enabled, 0:Disabled. DEFAULT: Backgroud update is enabled. Runtime setting is allowed.
MAAT_OPT_ACCEPT_TAGS, //VALUE is a const char*, MUST end with '\0', SIZE= strlen(string+'\0')+1. Format is a JSON, e.g.{"tags":[{"tag":"location","value":"Beijing/ChaoYang/Huayan/22A"},{"tag":"isp","value":"telecom"}]}
MAAT_OPT_FOREIGN_CONT_DIR, //VALUE is a const char*, MUST end with '\0', SIZE= strlen(string+'\0')+1. Specifies a local diretory to store foreign content. Default: []table_info_path]_files
MAAT_OPT_FOREIGN_CONT_LINGER //VALUE is interger *, SIZE=sizeof(int). Greater than 0: delete after VALUE seconds; 0: delete foreign content right after the notification callbacks; Less than 0: NEVER delete. Default: 0.
};
//return -1 if failed, return 0 on success;
int Maat_set_feather_opt(Maat_feather_t feather,enum MAAT_INIT_OPT type,const void* value,int size);
enum MAAT_STATE_OPT
{
MAAT_STATE_VERSION=1, //Get current maat version, if maat is in update progress, the updating version is returned. VALUE is long long, SIZE=sizeof(long long).
MAAT_STATE_LAST_UPDATING_TABLE, //Query at Maat_finish_callback_t to determine whether this table is the last one to update. VALUE is interger, SIZE=sizeof(int), 1:yes, 0: no
MAAT_STATE_IN_UPDATING
};
int Maat_read_state(Maat_feather_t feather, enum MAAT_STATE_OPT type, void* value, int size);
void Maat_burn_feather(Maat_feather_t feather);
//return table_id(>=0) if success,otherwise return -1;
int Maat_table_register(Maat_feather_t feather,const char* table_name);
//return 1 if success,otherwise return -1 incase invalid table_id or registed function number exceed 32;
int Maat_table_callback_register(Maat_feather_t feather,short table_id,
Maat_start_callback_t *start,//MAAT_RULE_UPDATE_TYPE_*,u_para
Maat_update_callback_t *update,//table line ,u_para
Maat_finish_callback_t *finish,//u_para
void* u_para);
enum MAAT_SCAN_OPT
{
MAAT_SET_SCAN_DISTRICT=1, //VALUE is a const char*,SIZE= strlen(string).DEFAULT: no default.
MAAT_SET_SCAN_LAST_REGION //VALUE is NULL, SIZE=0. This option indicates that the follow scan is the last region of current scan cobination.
};
//return 0 if success, return -1 when failed;
int Maat_set_scan_status(Maat_feather_t feather,scan_status_t* mid,enum MAAT_SCAN_OPT type,const void* value,int size);
//Return hit rule number, return -1 when error occurs,return -2 when hit current region
//mid MUST set to NULL before fist call
int Maat_scan_intval(Maat_feather_t feather,int table_id
,unsigned int intval
,struct Maat_rule_t*result,int rule_num
,scan_status_t *mid,int thread_num);
int Maat_scan_addr(Maat_feather_t feather,int table_id
,struct ipaddr* addr
,struct Maat_rule_t*result,int rule_num
,scan_status_t *mid,int thread_num);
int Maat_scan_proto_addr(Maat_feather_t feather,int table_id
,struct ipaddr* addr,unsigned short int proto
,struct Maat_rule_t*result,int rule_num
,scan_status_t *mid,int thread_num);
int Maat_full_scan_string(Maat_feather_t feather,int table_id
,enum MAAT_CHARSET charset,const char* data,int data_len
,struct Maat_rule_t*result,int* found_pos,int rule_num
,scan_status_t* mid,int thread_num);
//hite_detail could be NULL if unconcern
int Maat_full_scan_string_detail(Maat_feather_t feather,int table_id
,enum MAAT_CHARSET charset,const char* data,int data_len
,struct Maat_rule_t*result,int rule_num,struct Maat_hit_detail_t *hit_detail,int detail_num
,int* detail_ret,scan_status_t* mid,int thread_num);
stream_para_t Maat_stream_scan_string_start(Maat_feather_t feather,int table_id,int thread_num);
int Maat_stream_scan_string(stream_para_t* stream_para
,enum MAAT_CHARSET charset,const char* data,int data_len
,struct Maat_rule_t*result,int* found_pos,int rule_num
,scan_status_t* mid);
//hited_detail could be NULL if unconcern
int Maat_stream_scan_string_detail(stream_para_t* stream_para
,enum MAAT_CHARSET charset,const char* data,int data_len
,struct Maat_rule_t*result,int rule_num,struct Maat_hit_detail_t *hit_detail,int detail_num
,int* detail_ret,scan_status_t* mid);
void Maat_stream_scan_string_end(stream_para_t* stream_para);
stream_para_t Maat_stream_scan_digest_start(Maat_feather_t feather,int table_id,unsigned long long total_len,int thread_num);
int Maat_stream_scan_digest(stream_para_t* stream_para
,const char* data,int data_len,unsigned long long offset
,struct Maat_rule_t*result,int rule_num
,scan_status_t* mid);
void Maat_stream_scan_digest_end(stream_para_t* stream_para);
int Maat_similar_scan_string(Maat_feather_t feather,int table_id
,const char* data,int data_len
,struct Maat_rule_t*result,int rule_num
,scan_status_t* mid,int thread_num);
void Maat_clean_status(scan_status_t* mid);
typedef void* MAAT_RULE_EX_DATA;
// The idx parameter is the index: this will be the same value returned by Maat_rule_get_ex_new_index() when the functions were initially registered.
// Finally the argl and argp parameters are the values originally passed to the same corresponding parameters when Maat_rule_get_ex_new_index() was called.
typedef void Maat_rule_EX_new_func_t(int idx, const struct Maat_rule_t* rule, const char* srv_def_large,
MAAT_RULE_EX_DATA* ad, long argl, void *argp);
typedef void Maat_rule_EX_free_func_t(int idx, const struct Maat_rule_t* rule, const char* srv_def_large,
MAAT_RULE_EX_DATA* ad, long argl, void *argp);
typedef void Maat_rule_EX_dup_func_t(int idx, MAAT_RULE_EX_DATA *to, MAAT_RULE_EX_DATA *from, long argl, void *argp);
int Maat_rule_get_ex_new_index(Maat_feather_t feather, const char* compile_table_name,
Maat_rule_EX_new_func_t* new_func,
Maat_rule_EX_free_func_t* free_func,
Maat_rule_EX_dup_func_t* dup_func,
long argl, void *argp);
//returned data is duplicated by dup_func of Maat_rule_get_ex_new_index, caller is responsible to free the data.
MAAT_RULE_EX_DATA Maat_rule_get_ex_data(Maat_feather_t feather, const struct Maat_rule_t* rule, int idx);
//Helper function for parsing space or tab seperated line.
//Nth_column: the Nth column is numberd from 1.
//Return 0 if success.
int Maat_helper_read_column(const char* line, int Nth_column, size_t *column_offset, size_t *column_len);
//Following functions are similar to Maat_rule_get_ex_data, except they are effective on plugin table.
typedef void* MAAT_PLUGIN_EX_DATA;
typedef void Maat_plugin_EX_new_func_t(int table_id, const char* key, const char* table_line, MAAT_PLUGIN_EX_DATA* ad, long argl, void *argp);
typedef void Maat_plugin_EX_free_func_t(int table_id, MAAT_PLUGIN_EX_DATA* ad, long argl, void *argp);
typedef void Maat_plugin_EX_dup_func_t(int table_id, MAAT_PLUGIN_EX_DATA *to, MAAT_PLUGIN_EX_DATA *from, long argl, void *argp);
typedef int Maat_plugin_EX_key2index_func_t(const char* key);
int Maat_plugin_EX_register(Maat_feather_t feather, int table_id,
Maat_plugin_EX_new_func_t* new_func,
Maat_plugin_EX_free_func_t* free_func,
Maat_plugin_EX_dup_func_t* dup_func,
Maat_plugin_EX_key2index_func_t* key2index_func,
long argl, void *argp);
//Data is duplicated by dup_func of Maat_plugin_EX_register, caller is responsible to free the data.
MAAT_PLUGIN_EX_DATA Maat_plugin_get_EX_data(Maat_feather_t feather, int table_id, const char* key);
enum MAAT_RULE_OPT
{
MAAT_RULE_SERV_DEFINE //VALUE is a char* buffer,SIZE= buffer size.
};
int Maat_read_rule(Maat_feather_t feather, const struct Maat_rule_t* rule, enum MAAT_RULE_OPT type, void* value, int size);
#endif // H_MAAT_RULE_H_INCLUDE

View File

@@ -0,0 +1,72 @@
#ifndef _GRAM_INDEX_ENGINE_
#define _GRAM_INDEX_ENGINE_
#ifdef __cplusplus
extern "C" {
#endif
enum GIE_operation
{
GIE_INSERT_OPT,
GIE_DELETE_OPT
};
enum GIE_INPUT_FORMAT
{
GIE_INPUT_FORMAT_PLAIN,
GIE_INPUT_FORMAT_SFH
};
typedef struct
{
/* data */
}GIE_handle_t;
typedef struct
{
unsigned int id;
unsigned int sfh_length;//size of fuzzy_hash
enum GIE_operation operation;//GIE_INSERT_OPT or GIE_DELETE_OPT.if operation is GIE_DELETE_OPT, only id is needed;
short cfds_lvl;
char * sfh;
void * tag;
}GIE_digest_t;
typedef struct
{
unsigned int id;
short cfds_lvl;
void * tag;
}GIE_result_t;
typedef struct
{
unsigned int gram_value;
//unsigned int htable_num;
unsigned int position_accuracy;
enum GIE_INPUT_FORMAT format; //if format==GIE_INPUT_FORMAT_SFH, means the input string is a GIE_INPUT_FORMAT_SFH string
//else id format==PALIN, means the input string is common string
int ED_reexamine;//if ED_reexamine==1, calculate edit distance to verify the final result
}GIE_create_para_t;
GIE_handle_t * GIE_create(const GIE_create_para_t * para);
int GIE_update(GIE_handle_t * handle, GIE_digest_t ** digests, int size);
//return actual matched result count
//return 0 when matched nothing;
//return -1 when error occurs;
int GIE_query(GIE_handle_t * handle, const char * data, int data_len, GIE_result_t * results, int result_size);
void GIE_destory(GIE_handle_t * handle);
int GIE_string_similiarity(const char *str1, int len1, const char *str2, int len2);
int GIE_sfh_similiarity(const char *sfh1, int len1, const char *sfh2, int len2);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,78 @@
#ifndef _STREAM_FUZZY_HASH_
#define _STREAM_FUZZY_HASH_
/*
* Copyright (C) MESA 2015
*
*/
#include <stdint.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
#define TOTAL_LENGTH 0
#define EFFECTIVE_LENGTH 1
#define HASH_LENGTH 2
// typedef sfh_instance_t void*;
typedef struct
{
}sfh_instance_t;
/**
* create a fuzzy hash handle and return it.
* @return [handle]
*/
sfh_instance_t * SFH_instance(unsigned long long origin_len);
/**
* destroy context by a fuzzy hash handle.
* @param handle [handle]
*/
void SFH_release(sfh_instance_t * handle);
/**
* Feed the function your data.
* Call this function several times, if you have several parts of data to feed.
* @param handle [handle]
* @param data [data that you want to fuzzy_hash]
* @param size [data size]
* @param offset [offset]
* @return [return effective data length in current feed]
*/
unsigned int SFH_feed(sfh_instance_t * handle, const char* data, unsigned int size, unsigned long long offset);
/**
* Obtain the fuzzy hash values.
* @param handle [handle]
* @param result [fuzzy hash result]
* Fuzzy hash result with offsets(in the square brackets, with colon splitted).
* eg. abc[1:100]def[200:300]
* @param size [@result size]
* @return [return zero on success, non-zero on error]
*/
int SFH_digest(sfh_instance_t * handle, char* result, unsigned int size);
/**
* Obtain certain length of fuzzy hash status.
* @param handle [handle]
* @param type [length type]
* TOTAL_LENGTH:Total length of data you have fed.
* Overlapped data will NOT count for 2 times.
* EFFECTIVE_LENGTH:Length of data that involved in the calculation of hash.
* HASH_LENGTH:Hash result length.
* @return [length value]
*/
unsigned long long SFH_status(sfh_instance_t * handle, int type);
#ifdef __cplusplus
}
#endif
#endif