[FEATURE]expr_matcher support dual engine(hyperscan & rulescan) & benchmark

This commit is contained in:
liuwentan
2023-08-10 16:10:50 +08:00
parent fb0cb5405d
commit 42f4480271
30 changed files with 4598 additions and 1284 deletions

View File

@@ -37,6 +37,7 @@ struct maat_hit_path {
};
struct maat_hit_group {
long long item_id;
long long group_id;
int vtable_id;
};
@@ -59,6 +60,11 @@ enum maat_list_type {
MAAT_LIST_TYPE_INC
};
enum maat_expr_engine {
MAAT_EXPR_ENGINE_HS = 0, //default engine(hyperscan)
MAAT_EXPR_ENGINE_RS //rulescan
};
struct ip_addr {
int ip_type; //4: IPv4, 6: IPv6
union {
@@ -144,6 +150,8 @@ int maat_options_set_redis(struct maat_options *opts, const char *redis_ip,
int maat_options_set_stat_file(struct maat_options *opts, const char *stat_filename);
int maat_options_set_expr_engine(struct maat_options *opts, enum maat_expr_engine engine);
/* maat_instance API */
struct maat *maat_new(struct maat_options *opts, const char *table_info_path);
void maat_free(struct maat *instance);

View File

@@ -7,8 +7,9 @@ include_directories(${PROJECT_SOURCE_DIR}/src/inc_internal)
add_subdirectory(ip_matcher/IntervalIndex)
add_library(adapter-static adapter_hs/adapter_hs.cpp bool_matcher/bool_matcher.cpp
add_library(adapter-static bool_matcher/bool_matcher.cpp expr_matcher/expr_matcher.cpp
expr_matcher/adapter_hs/adapter_hs.cpp expr_matcher/adapter_rs/adapter_rs.cpp
fqdn_engine/fqdn_engine.cpp ip_matcher/ip_matcher.cpp ip_matcher/ipv4_match.cpp
ip_matcher/ipv6_match.cpp flag_matcher/flag_matcher.cpp interval_matcher/cgranges.c
interval_matcher/interval_matcher.cpp)
target_link_libraries(adapter-static hyperscan_static hyperscan_runtime_static interval_index_static)
target_link_libraries(adapter-static hyperscan_static hyperscan_runtime_static rulescan_static interval_index_static)

View File

@@ -1,134 +0,0 @@
/*
**********************************************************************************************
* File: adapter_hs.h
* Description: wrapper for raw hyperscan
* Authors: Liu WenTan <liuwentan@geedgenetworks.com>
* Date: 2022-10-31
* Copyright: (c) 2018-2022 Geedge Networks, Inc. All rights reserved.
***********************************************************************************************
*/
#ifndef _ADAPTER_HS_H_
#define _ADAPTER_HS_H_
#ifdef __cplusplus
extern "C"
{
#endif
#include <stddef.h>
#include <stdint.h>
#include "log/log.h"
#define MAX_EXPR_PATTERN_NUM 8
struct adapter_hs;
/* match method */
enum hs_match_mode {
HS_MATCH_MODE_INVALID = -1,
HS_MATCH_MODE_EXACTLY = 1, /* scan data must match pattern exactly */
HS_MATCH_MODE_PREFIX, /* pattern must in the head of scan_data */
HS_MATCH_MODE_SUFFIX, /* pattern must in the end of scan_data */
HS_MATCH_MODE_SUB /* pattern must in the range[l_offset, r_offset] of scan_data */
};
enum hs_pattern_type {
HS_PATTERN_TYPE_STR = 0, /* pure literal string */
HS_PATTERN_TYPE_REG /* regex expression */
};
enum hs_case_sensitive {
HS_CASE_SENSITIVE = 0,
HS_CASE_INSENSITIVE
};
struct hs_scan_result {
long long rule_id;
void *user_tag;
};
struct hs_pattern {
enum hs_case_sensitive case_sensitive;
enum hs_match_mode match_mode;
enum hs_pattern_type pattern_type;
int is_hexbin; /* 1(yes) 0(no) */
/*
* just match in scan_data's range of [start_offset, end_offset], -1 means no limits
* for example:
* [-1, end_offset] means the pattern must in scan_data's [0 ~ start_offset]
* [start_offset, -1] means the pattern must in scan_data's [start_offset ~ data_end]
*/
int start_offset;
int end_offset;
/* start pointer of pattern */
char *pat;
/* pattern length */
size_t pat_len;
};
/* logic AND expression, such as (pattern1 & pattern2) */
struct expr_rule {
long long expr_id;
size_t n_patterns;
struct hs_pattern patterns[MAX_EXPR_PATTERN_NUM];
void *user_tag;
};
int adapter_hs_verify_regex_expression(const char *regex_expr,
struct log_handle *logger);
/**
* @brief new adapter_hs instance
*
* @param rules: logic AND expression's array
* @param n_rule: the number of logic AND expression's array
* @param nr_worker_threads: the number of scan threads which will call adapter_hs_scan()
*
* @retval the pointer to adapter_hs instance
*/
struct adapter_hs *adapter_hs_new(struct expr_rule *rules, size_t n_rule,
size_t n_worker_thread, struct log_handle *logger);
/**
* @brief scan input data to match logic AND expression, return all matched expr_id
*
* @param instance: adapter_hs instance obtained by adapter_hs_new()
* @param thread_id: the thread_id of caller
* @param data: data to be scanned
* @param data_len: the length of data to be scanned
* @param results: the array of expr_id
* @param n_results: number of elements in array of expr_id
*/
int adapter_hs_scan(struct adapter_hs *hs_instance, int thread_id,
const char *data, size_t data_len,
struct hs_scan_result *results,
size_t n_result, size_t *n_hit_result);
/**
* @brief destroy adapter_hs instance
*
* @param instance: adapter_hs instance obtained by adapter_hs_new()
*/
void adapter_hs_free(struct adapter_hs *instance);
struct adapter_hs_stream;
/**
* @brief open adapter_hs stream after adapter_hs instance initialized for stream scan
*
*/
struct adapter_hs_stream *adapter_hs_stream_open(struct adapter_hs *hs_instance, int thread_id);
int adapter_hs_scan_stream(struct adapter_hs_stream *stream, const char *data, size_t data_len,
struct hs_scan_result *results, size_t n_result, size_t *n_hit_result);
void adapter_hs_stream_close(struct adapter_hs_stream *stream);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -1,10 +1,10 @@
/*
**********************************************************************************************
* File: adapter_hs.cpp
* File: adapter_hs.c
* Description:
* Authors: Liu WenTan <liuwentan@geedgenetworks.com>
* Authors: Liu wentan <liuwentan@geedgenetworks.com>
* Date: 2022-10-31
* Copyright: (c) 2018-2022 Geedge Networks, Inc. All rights reserved.
* Copyright: (c) Since 2022 Geedge Networks, Ltd. All rights reserved.
***********************************************************************************************
*/
@@ -20,9 +20,8 @@
#include "uthash/utarray.h"
#include "uthash/uthash.h"
#include "maat_utils.h"
#include "../bool_matcher/bool_matcher.h"
#include "../../bool_matcher/bool_matcher.h"
#define MAX_OFFSET_NUM 1024
#define MAX_HIT_PATTERN_NUM 512
pid_t hs_gettid()
@@ -41,6 +40,7 @@ static const char *hs_module_name_str(const char *name)
#define MODULE_ADAPTER_HS hs_module_name_str("maat.adapter_hs")
struct adpt_hs_compile_data {
enum expr_pattern_type pat_type;
unsigned int *ids;
unsigned int *flags;
char **patterns;
@@ -56,7 +56,6 @@ struct adapter_hs_scratch {
struct adapter_hs_stream {
int thread_id;
size_t n_expr;
hs_stream_t *literal_stream;
hs_stream_t *regex_stream;
struct adapter_hs_runtime *ref_hs_rt;
@@ -91,7 +90,7 @@ struct pattern_offset {
struct pattern_attribute {
long long pattern_id;
enum hs_match_mode match_mode;
enum expr_match_mode match_mode;
struct pattern_offset offset;
};
@@ -137,12 +136,12 @@ static int _hs_alloc_scratch(hs_database_t *db, hs_scratch_t **scratches,
static int adpt_hs_alloc_scratch(struct adapter_hs_runtime *hs_rt,
size_t n_worker_thread,
enum hs_pattern_type pattern_type,
enum expr_pattern_type pattern_type,
struct log_handle *logger)
{
int ret = 0;
if (pattern_type == HS_PATTERN_TYPE_STR) {
if (pattern_type == EXPR_PATTERN_TYPE_STR) {
hs_rt->scratch->literal_scratches = ALLOC(hs_scratch_t *, n_worker_thread);
ret = _hs_alloc_scratch(hs_rt->literal_db, hs_rt->scratch->literal_scratches,
n_worker_thread, logger);
@@ -200,7 +199,7 @@ static int adpt_hs_build_database(struct adapter_hs_runtime *hs_rt,
if (regex_cd != NULL) {
err = hs_compile_multi((const char *const *)regex_cd->patterns,
regex_cd->flags, regex_cd->ids, regex_cd->n_patterns,
HS_MODE_STREAM | HS_MODE_SOM_HORIZON_SMALL,
HS_MODE_STREAM,
NULL, &hs_rt->regex_db, &compile_err);
if (err != HS_SUCCESS) {
if (compile_err) {
@@ -215,9 +214,11 @@ static int adpt_hs_build_database(struct adapter_hs_runtime *hs_rt,
return 0;
}
static struct adpt_hs_compile_data *adpt_hs_compile_data_new(size_t n_patterns)
static struct adpt_hs_compile_data *
adpt_hs_compile_data_new(enum expr_pattern_type pat_type, size_t n_patterns)
{
struct adpt_hs_compile_data *hs_cd = ALLOC(struct adpt_hs_compile_data, 1);
hs_cd->pat_type = pat_type;
hs_cd->patterns = ALLOC(char *, n_patterns);
hs_cd->pattern_lens = ALLOC(size_t, n_patterns);
hs_cd->n_patterns = n_patterns;
@@ -263,8 +264,11 @@ static void populate_compile_data(struct adpt_hs_compile_data *compile_data,
compile_data->ids[index] = pattern_id;
/* set flags */
if (compile_data->pat_type == EXPR_PATTERN_TYPE_STR) {
compile_data->flags[index] |= HS_FLAG_SOM_LEFTMOST;
if (case_sensitive == HS_CASE_INSENSITIVE) {
}
if (case_sensitive == EXPR_CASE_INSENSITIVE) {
compile_data->flags[index] |= HS_FLAG_CASELESS;
}
@@ -295,14 +299,14 @@ static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule,
pattern_attr[pattern_index].pattern_id = pattern_index;
pattern_attr[pattern_index].match_mode = rules[i].patterns[j].match_mode;
if (pattern_attr[pattern_index].match_mode == HS_MATCH_MODE_SUB ||
pattern_attr[pattern_index].match_mode == HS_MATCH_MODE_EXACTLY) {
if (pattern_attr[pattern_index].match_mode == EXPR_MATCH_MODE_SUB ||
pattern_attr[pattern_index].match_mode == EXPR_MATCH_MODE_EXACTLY) {
pattern_attr[pattern_index].offset.start = rules[i].patterns[j].start_offset;
pattern_attr[pattern_index].offset.end = rules[i].patterns[j].end_offset;
}
/* literal pattern */
if (rules[i].patterns[j].pattern_type == HS_PATTERN_TYPE_STR) {
if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) {
populate_compile_data(literal_cd, literal_index, pattern_index,
rules[i].patterns[j].pat, rules[i].patterns[j].pat_len,
rules[i].patterns[j].case_sensitive);
@@ -321,7 +325,7 @@ static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule,
bool_exprs[i].expr_id = rules[i].expr_id;
bool_exprs[i].item_num = rules[i].n_patterns;
bool_exprs[i].user_tag = rules[i].user_tag;
bool_exprs[i].user_tag = rules[i].tag;
}
*n_pattern = pattern_index;
@@ -345,81 +349,43 @@ static int verify_regex_expression(const char *regex_str, struct log_handle *log
FREE(info);
hs_free_compile_error(error);
return -1;
return 0;
}
if (info != NULL) {
FREE(info);
}
return 0;
return 1;
}
int adapter_hs_verify_regex_expression(const char *regex_expr, struct log_handle *logger)
{
if (NULL == regex_expr) {
return -1;
return 0;
}
return verify_regex_expression(regex_expr, logger);
}
struct adapter_hs *adapter_hs_new(struct expr_rule *rules, size_t n_rule,
void *adapter_hs_new(struct expr_rule *rules, size_t n_rule,
size_t n_literal_pattern, size_t n_regex_pattern,
size_t n_worker_thread, struct log_handle *logger)
{
if (0 == n_worker_thread || NULL == rules || 0 == n_rule) {
log_error(logger, MODULE_ADAPTER_HS,
"[%s:%d] input parameters illegal!", __FUNCTION__, __LINE__);
return NULL;
}
/* get the sum of pattern */
size_t i = 0, j = 0;
size_t literal_pattern_num = 0;
size_t regex_pattern_num = 0;
for (i = 0; i < n_rule; i++) {
if (rules[i].n_patterns > MAX_EXPR_PATTERN_NUM) {
log_error(logger, MODULE_ADAPTER_HS,
"[%s:%d] the number of patterns in one expression "
"should less than %d", __FUNCTION__, __LINE__,
MAX_EXPR_PATTERN_NUM);
return NULL;
}
for (j = 0; j < rules[i].n_patterns; j++) {
/* pat_len should not 0 */
if (0 == rules[i].patterns[j].pat_len) {
log_error(logger, MODULE_ADAPTER_HS,
"[%s:%d] expr pattern length should not 0",
__FUNCTION__, __LINE__);
return NULL;
}
if (rules[i].patterns[j].pattern_type == HS_PATTERN_TYPE_STR) {
literal_pattern_num++;
} else {
regex_pattern_num++;
}
}
}
if (0 == literal_pattern_num && 0 == regex_pattern_num) {
log_error(logger, MODULE_ADAPTER_HS,
"[%s:%d] exprs has no valid pattern", __FUNCTION__, __LINE__);
return NULL;
}
size_t i = 0;
struct adpt_hs_compile_data *literal_cd = NULL;
struct adpt_hs_compile_data *regex_cd = NULL;
if (literal_pattern_num > 0) {
literal_cd = adpt_hs_compile_data_new(literal_pattern_num);
if (n_literal_pattern > 0) {
literal_cd = adpt_hs_compile_data_new(EXPR_PATTERN_TYPE_STR, n_literal_pattern);
}
if (regex_pattern_num > 0) {
regex_cd = adpt_hs_compile_data_new(regex_pattern_num);
if (n_regex_pattern > 0) {
regex_cd = adpt_hs_compile_data_new(EXPR_PATTERN_TYPE_REG, n_regex_pattern);
}
size_t pattern_cnt = literal_pattern_num + regex_pattern_num;
size_t pattern_cnt = n_literal_pattern + n_regex_pattern;
struct adapter_hs *hs_inst = ALLOC(struct adapter_hs, 1);
hs_inst->hs_attr = ALLOC(struct pattern_attribute, pattern_cnt);
hs_inst->logger = logger;
@@ -478,21 +444,21 @@ struct adapter_hs *adapter_hs_new(struct expr_rule *rules, size_t n_rule,
n_worker_thread);
for (i = 0; i < n_worker_thread; i++) {
hs_inst->hs_rt->scratch->bool_match_buffs[i] = ALLOC(struct bool_expr_match,
hs_inst->n_expr);
MAX_HIT_EXPR_NUM);
}
/* literal and regex scratch can't reuse */
if (literal_pattern_num > 0) {
if (n_literal_pattern > 0) {
ret = adpt_hs_alloc_scratch(hs_inst->hs_rt, n_worker_thread,
HS_PATTERN_TYPE_STR, logger);
EXPR_PATTERN_TYPE_STR, logger);
if (ret < 0) {
goto error;
}
}
if (regex_pattern_num > 0) {
if (n_regex_pattern > 0) {
ret = adpt_hs_alloc_scratch(hs_inst->hs_rt, n_worker_thread,
HS_PATTERN_TYPE_REG, logger);
EXPR_PATTERN_TYPE_REG, logger);
if (ret < 0) {
goto error;
}
@@ -500,7 +466,7 @@ struct adapter_hs *adapter_hs_new(struct expr_rule *rules, size_t n_rule,
hs_inst->hs_rt->streams = ALLOC(struct adapter_hs_stream *, n_worker_thread);
for (i = 0; i < n_worker_thread; i++) {
hs_inst->hs_rt->streams[i] = adapter_hs_stream_open(hs_inst, i);
hs_inst->hs_rt->streams[i] = (struct adapter_hs_stream *)adapter_hs_stream_open(hs_inst, i);
}
return hs_inst;
@@ -509,13 +475,15 @@ error:
return NULL;
}
void adapter_hs_free(struct adapter_hs *hs_inst)
void adapter_hs_free(void *hs_instance)
{
if (NULL == hs_inst) {
if (NULL == hs_instance) {
return;
}
struct adapter_hs *hs_inst = (struct adapter_hs *)hs_instance;
size_t i = 0;
if (hs_inst->hs_rt != NULL) {
if (hs_inst->hs_rt->literal_db != NULL) {
hs_free_database(hs_inst->hs_rt->literal_db);
@@ -625,12 +593,12 @@ static int matched_event_cb(unsigned int id, unsigned long long from,
int ret = 0;
struct pattern_attribute pat_attr = matched_pat->ref_hs_attr[id];
switch (pat_attr.match_mode) {
case HS_MATCH_MODE_EXACTLY:
case EXPR_MATCH_MODE_EXACTLY:
if (0 == from && matched_pat->scan_data_len == to) {
ret = 1;
}
break;
case HS_MATCH_MODE_SUB:
case EXPR_MATCH_MODE_SUB:
if (pat_attr.offset.start == -1 &&
pat_attr.offset.end == -1) {
ret = 1;
@@ -656,12 +624,12 @@ static int matched_event_cb(unsigned int id, unsigned long long from,
ret = 1;
}
break;
case HS_MATCH_MODE_PREFIX:
case EXPR_MATCH_MODE_PREFIX:
if (0 == from) {
ret = 1;
}
break;
case HS_MATCH_MODE_SUFFIX:
case EXPR_MATCH_MODE_SUFFIX:
if (to == matched_pat->scan_data_len) {
ret = 1;
}
@@ -678,43 +646,42 @@ static int matched_event_cb(unsigned int id, unsigned long long from,
return 0;
}
UT_icd ut_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL};
struct adapter_hs_stream *
adapter_hs_stream_open(struct adapter_hs *hs_instance, int thread_id)
UT_icd ut_hs_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL};
void *adapter_hs_stream_open(void *hs_instance, int thread_id)
{
if (NULL == hs_instance || thread_id < 0) {
return NULL;
}
struct adapter_hs *hs_inst = (struct adapter_hs *)hs_instance;
struct adapter_hs_stream *hs_stream = ALLOC(struct adapter_hs_stream, 1);
hs_error_t err;
hs_stream->logger = hs_instance->logger;
hs_stream->logger = hs_inst->logger;
hs_stream->thread_id = thread_id;
hs_stream->n_expr = hs_instance->n_expr;
hs_stream->ref_hs_rt = hs_instance->hs_rt;
hs_stream->ref_hs_rt = hs_inst->hs_rt;
hs_stream->matched_pat = ALLOC(struct matched_pattern, 1);
hs_stream->matched_pat->ref_hs_attr = hs_instance->hs_attr;
hs_stream->matched_pat->n_patterns = hs_instance->n_patterns;
utarray_new(hs_stream->matched_pat->pattern_ids, &ut_pattern_id_icd);
hs_stream->matched_pat->ref_hs_attr = hs_inst->hs_attr;
hs_stream->matched_pat->n_patterns = hs_inst->n_patterns;
utarray_new(hs_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd);
utarray_reserve(hs_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
int err_count = 0;
if (hs_instance->hs_rt->literal_db != NULL) {
err = hs_open_stream(hs_instance->hs_rt->literal_db, 0,
if (hs_inst->hs_rt->literal_db != NULL) {
err = hs_open_stream(hs_inst->hs_rt->literal_db, 0,
&hs_stream->literal_stream);
if (err != HS_SUCCESS) {
log_error(hs_instance->logger, MODULE_ADAPTER_HS,
log_error(hs_inst->logger, MODULE_ADAPTER_HS,
"hs_open_stream failed, hs err:%d", err);
err_count++;
}
}
if (hs_instance->hs_rt->regex_db != NULL) {
err = hs_open_stream(hs_instance->hs_rt->regex_db, 0,
if (hs_inst->hs_rt->regex_db != NULL) {
err = hs_open_stream(hs_inst->hs_rt->regex_db, 0,
&hs_stream->regex_stream);
if (err != HS_SUCCESS) {
log_error(hs_instance->logger, MODULE_ADAPTER_HS,
log_error(hs_inst->logger, MODULE_ADAPTER_HS,
"hs_open_stream failed, hs err:%d", err);
err_count++;
}
@@ -740,36 +707,37 @@ error:
return NULL;
}
void adapter_hs_stream_close(struct adapter_hs_stream *hs_stream)
void adapter_hs_stream_close(void *hs_stream)
{
if (NULL == hs_stream) {
return;
}
if (hs_stream->ref_hs_rt != NULL) {
if (hs_stream->literal_stream != NULL) {
hs_close_stream(hs_stream->literal_stream, NULL, NULL, NULL);
hs_stream->literal_stream = NULL;
struct adapter_hs_stream *stream = (struct adapter_hs_stream *)hs_stream;
if (stream->ref_hs_rt != NULL) {
if (stream->literal_stream != NULL) {
hs_close_stream(stream->literal_stream, NULL, NULL, NULL);
stream->literal_stream = NULL;
}
if (hs_stream->regex_stream != NULL) {
hs_close_stream(hs_stream->regex_stream, NULL, NULL, NULL);
hs_stream->regex_stream = NULL;
if (stream->regex_stream != NULL) {
hs_close_stream(stream->regex_stream, NULL, NULL, NULL);
stream->regex_stream = NULL;
}
}
/* hs_stream->hs_rt point to hs_instance->hs_rt which will call free
/* stream->hs_rt point to hs_instance->hs_rt which will call free
same as hs_attr */
hs_stream->ref_hs_rt = NULL;
hs_stream->matched_pat->ref_hs_attr = NULL;
stream->ref_hs_rt = NULL;
stream->matched_pat->ref_hs_attr = NULL;
if (hs_stream->matched_pat->pattern_ids != NULL) {
utarray_free(hs_stream->matched_pat->pattern_ids);
hs_stream->matched_pat->pattern_ids = NULL;
if (stream->matched_pat->pattern_ids != NULL) {
utarray_free(stream->matched_pat->pattern_ids);
stream->matched_pat->pattern_ids = NULL;
}
FREE(hs_stream->matched_pat);
FREE(hs_stream);
FREE(stream->matched_pat);
FREE(stream);
}
static void adapter_hs_stream_reset(struct adapter_hs_stream *hs_stream)
@@ -794,9 +762,9 @@ static void adapter_hs_stream_reset(struct adapter_hs_stream *hs_stream)
utarray_clear(hs_stream->matched_pat->pattern_ids);
}
int adapter_hs_scan_stream(struct adapter_hs_stream *hs_stream, const char *data,
size_t data_len, struct hs_scan_result *results,
size_t n_result, size_t *n_hit_result)
int adapter_hs_scan_stream(void *hs_stream, const char *data, size_t data_len,
struct expr_scan_result *results, size_t n_result,
size_t *n_hit_result)
{
hs_error_t err;
@@ -816,36 +784,37 @@ int adapter_hs_scan_stream(struct adapter_hs_stream *hs_stream, const char *data
*/
int err_count = 0;
int thread_id = hs_stream->thread_id;
struct adapter_hs_scratch *scratch = hs_stream->ref_hs_rt->scratch;
hs_stream->matched_pat->scan_data_len = data_len;
struct adapter_hs_stream *stream = (struct adapter_hs_stream *)hs_stream;
int thread_id = stream->thread_id;
struct adapter_hs_scratch *scratch = stream->ref_hs_rt->scratch;
stream->matched_pat->scan_data_len = data_len;
int err_scratch_flag = 0;
if (hs_stream->literal_stream != NULL) {
if (stream->literal_stream != NULL) {
if (scratch->literal_scratches != NULL) {
err = hs_scan_stream(hs_stream->literal_stream, data, data_len,
err = hs_scan_stream(stream->literal_stream, data, data_len,
0, scratch->literal_scratches[thread_id],
matched_event_cb, hs_stream->matched_pat);
matched_event_cb, stream->matched_pat);
if (err != HS_SUCCESS) {
err_count++;
}
} else {
log_error(hs_stream->logger, MODULE_ADAPTER_HS,
log_error(stream->logger, MODULE_ADAPTER_HS,
"literal_scratches is null, thread_id:%d", thread_id);
err_scratch_flag++;
}
}
if (hs_stream->regex_stream != NULL) {
if (stream->regex_stream != NULL) {
if (scratch->regex_scratches != NULL) {
err = hs_scan_stream(hs_stream->regex_stream, data, data_len,
err = hs_scan_stream(stream->regex_stream, data, data_len,
0, scratch->regex_scratches[thread_id],
matched_event_cb, hs_stream->matched_pat);
matched_event_cb, stream->matched_pat);
if (err != HS_SUCCESS) {
err_count++;
}
} else {
log_error(hs_stream->logger, MODULE_ADAPTER_HS,
log_error(stream->logger, MODULE_ADAPTER_HS,
"regex_scratches is null, thread_id:%d", thread_id);
err_scratch_flag++;
}
@@ -859,7 +828,7 @@ int adapter_hs_scan_stream(struct adapter_hs_stream *hs_stream, const char *data
return -1;
}
size_t n_pattern_id = utarray_len(hs_stream->matched_pat->pattern_ids);
size_t n_pattern_id = utarray_len(stream->matched_pat->pattern_ids);
if (0 == n_pattern_id) {
*n_hit_result = 0;
return 0;
@@ -868,13 +837,13 @@ int adapter_hs_scan_stream(struct adapter_hs_stream *hs_stream, const char *data
unsigned long long pattern_ids[n_pattern_id];
for (size_t i = 0; i < n_pattern_id; i++) {
pattern_ids[i] = *(unsigned long long *)utarray_eltptr(hs_stream->matched_pat->pattern_ids, i);
pattern_ids[i] = *(unsigned long long *)utarray_eltptr(stream->matched_pat->pattern_ids, i);
}
int ret = 0;
struct bool_expr_match *bool_matcher_results = scratch->bool_match_buffs[thread_id];
int bool_matcher_ret = bool_matcher_match(hs_stream->ref_hs_rt->bm, pattern_ids, n_pattern_id,
bool_matcher_results, hs_stream->n_expr);
int bool_matcher_ret = bool_matcher_match(stream->ref_hs_rt->bm, pattern_ids, n_pattern_id,
bool_matcher_results, MAX_HIT_EXPR_NUM);
if (bool_matcher_ret < 0) {
ret = -1;
goto next;
@@ -891,22 +860,21 @@ int adapter_hs_scan_stream(struct adapter_hs_stream *hs_stream, const char *data
*n_hit_result = bool_matcher_ret;
next:
utarray_clear(hs_stream->matched_pat->pattern_ids);
utarray_clear(stream->matched_pat->pattern_ids);
return ret;
}
int adapter_hs_scan(struct adapter_hs *hs_instance, int thread_id,
const char *data, size_t data_len,
struct hs_scan_result *results,
size_t n_result, size_t *n_hit_result)
int adapter_hs_scan(void *hs_instance, int thread_id, const char *data, size_t data_len,
struct expr_scan_result *results, size_t n_result, size_t *n_hit_result)
{
if (NULL == hs_instance || NULL == data || (0 == data_len) ||
NULL == results || 0 == n_result || NULL == n_hit_result) {
return -1;
}
struct adapter_hs_stream *hs_stream = hs_instance->hs_rt->streams[thread_id];
struct adapter_hs *hs_inst = (struct adapter_hs *)hs_instance;
struct adapter_hs_stream *hs_stream = hs_inst->hs_rt->streams[thread_id];
assert(hs_stream != NULL);
adapter_hs_stream_reset(hs_stream);

View File

@@ -0,0 +1,75 @@
/*
**********************************************************************************************
* File: adapter_hs.h
* Description:
* Authors: Liu wentan <liuwentan@geedgenetworks.com>
* Date: 2022-10-31
* Copyright: (c) Since 2022 Geedge Networks, Ltd. All rights reserved.
***********************************************************************************************
*/
#ifndef _ADAPTER_HS_H_
#define _ADAPTER_HS_H_
#ifdef __cplusplus
extern "C"
{
#endif
#include <stddef.h>
#include <stdint.h>
#include "log/log.h"
#include "../expr_matcher.h"
int adapter_hs_verify_regex_expression(const char *regex_expr, struct log_handle *logger);
/**
* @brief new adapter_hs instance
*
* @param rules: logic AND expression's array
* @param n_rule: the number of logic AND expression's array
* @param nr_worker_threads: the number of scan threads which will call adapter_hs_scan()
*
* @retval the pointer to adapter_hs instance
*/
void *adapter_hs_new(struct expr_rule *rules, size_t n_rule,
size_t n_literal_pattern, size_t n_regex_pattern,
size_t n_worker_thread, struct log_handle *logger);
/**
* @brief scan input data to match logic AND expression, return all matched expr_id
*
* @param instance: adapter_hs instance obtained by adapter_hs_new()
* @param thread_id: the thread_id of caller
* @param data: data to be scanned
* @param data_len: the length of data to be scanned
* @param results: the array of expr_id
* @param n_results: number of elements in array of expr_id
*/
int adapter_hs_scan(void *hs_instance, int thread_id, const char *data, size_t data_len,
struct expr_scan_result *results, size_t n_result, size_t *n_hit_result);
/**
* @brief destroy adapter_hs instance
*
* @param instance: adapter_hs instance obtained by adapter_hs_new()
*/
void adapter_hs_free(void *instance);
/**
* @brief open adapter_hs stream after adapter_hs instance initialized for stream scan
*
*/
void *adapter_hs_stream_open(void *hs_instance, int thread_id);
int adapter_hs_scan_stream(void *stream, const char *data, size_t data_len,
struct expr_scan_result *results, size_t n_result,
size_t *n_hit_result);
void adapter_hs_stream_close(void *stream);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,708 @@
/*
**********************************************************************************************
* File: adapter_rs.cpp
* Description:
* Authors: Liu wentan <liuwentan@geedgenetworks.com>
* Date: 2022-10-31
* Copyright: (c) Since 2022 Geedge Networks, Ltd. All rights reserved.
***********************************************************************************************
*/
#include <stdint.h>
#include <stdio.h>
#include <stddef.h>
#include <assert.h>
#include <unistd.h>
#include <sys/syscall.h>
#include "rulescan.h"
#include "adapter_rs.h"
#include "uthash/utarray.h"
#include "uthash/uthash.h"
#include "maat_utils.h"
#include "../../bool_matcher/bool_matcher.h"
#define MAX_HIT_PATTERN_NUM 512
pid_t rs_gettid()
{
return syscall(SYS_gettid);
}
static const char *rs_module_name_str(const char *name)
{
static __thread char module[64];
snprintf(module, sizeof(module), "%s(%d)", name, rs_gettid());
return module;
}
#define MODULE_ADAPTER_RS rs_module_name_str("maat.adapter_rs")
struct adpt_rs_compile_data {
struct scan_pattern *patterns;
size_t n_patterns;
};
struct adapter_rs_stream {
int thread_id;
size_t offset; /* current stream offset */
rs_stream_t *literal_stream;
rs_stream_t *regex_stream;
struct adapter_rs_runtime *ref_rs_rt;
struct log_handle *logger;
};
/* adapter_rs runtime */
struct adapter_rs_runtime {
rs_database_t *literal_db;
rs_database_t *regex_db;
struct bool_expr_match **bool_match_buffs; /* per thread */
struct adapter_rs_stream **streams; /* per thread */
struct matched_pattern **matched_pats; /* per thread */
struct bool_matcher *bm;
};
/* adapter_rs instance */
struct adapter_rs {
size_t n_worker_thread;
size_t n_expr;
size_t n_patterns;
struct adapter_rs_runtime *rs_rt;
struct pattern_attribute *rs_attr;
struct log_handle *logger;
};
struct pattern_offset {
long long start;
long long end;
};
struct pattern_attribute {
long long pattern_id;
enum expr_match_mode match_mode;
struct pattern_offset offset;
size_t pattern_len;
};
struct matched_pattern {
UT_array *pattern_ids;
size_t n_patterns;
struct pattern_attribute *ref_rs_attr;
};
int adapter_rs_verify_regex_expression(const char *regex_expr,
struct log_handle *logger)
{
int ret = rs_verify_regex(regex_expr);
if (ret == 0) {
log_error(logger, MODULE_ADAPTER_RS,
"[%s:%d] illegal regex expression: \"%s\"",
__FUNCTION__, __LINE__, regex_expr);
}
return ret;
}
/**
* @brief build rs database for literal string and regex expression respectively
*
* @retval 0(success) -1(failed)
*/
static int adpt_rs_build_database(struct adapter_rs_runtime *rs_rt,
size_t n_worker_thread,
struct adpt_rs_compile_data *literal_cd,
struct adpt_rs_compile_data *regex_cd,
struct log_handle *logger)
{
if (NULL == rs_rt) {
return -1;
}
int ret = 0;
if (literal_cd != NULL) {
ret = rs_compile_lit(literal_cd->patterns, literal_cd->n_patterns,
&rs_rt->literal_db);
if (ret < 0) {
log_error(logger, MODULE_ADAPTER_RS, "[%s:%d] compile error",
__FUNCTION__, __LINE__);
return -1;
}
}
if (regex_cd != NULL) {
size_t n_failed_pats = 0;
ret = rs_compile_regex(regex_cd->patterns, regex_cd->n_patterns,
n_worker_thread, &rs_rt->regex_db, &n_failed_pats);
if (ret < 0) {
log_error(logger, MODULE_ADAPTER_RS, "[%s:%d] compile error",
__FUNCTION__, __LINE__);
return -1;
}
}
return 0;
}
static struct adpt_rs_compile_data *adpt_rs_compile_data_new(size_t n_patterns)
{
struct adpt_rs_compile_data *rs_cd = ALLOC(struct adpt_rs_compile_data, 1);
rs_cd->patterns = ALLOC(struct scan_pattern, n_patterns);
rs_cd->n_patterns = n_patterns;
return rs_cd;
}
static void adpt_rs_compile_data_free(struct adpt_rs_compile_data *rs_cd)
{
if (NULL == rs_cd) {
return;
}
if (rs_cd->patterns != NULL) {
for (size_t i = 0; i < rs_cd->n_patterns; i++) {
if (rs_cd->patterns[i].pattern != NULL) {
FREE(rs_cd->patterns[i].pattern);
}
}
FREE(rs_cd->patterns);
}
FREE(rs_cd);
}
static void populate_compile_data(struct adpt_rs_compile_data *compile_data,
size_t index, long long pattern_id, char *pat,
size_t pat_len, int case_sensitive)
{
compile_data->patterns[index].id = pattern_id;
compile_data->patterns[index].case_sensitive = case_sensitive;
compile_data->patterns[index].pattern = ALLOC(char, pat_len + 1);
memcpy(compile_data->patterns[index].pattern, pat, pat_len);
compile_data->patterns[index].pattern_len = pat_len;
}
static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule,
struct pattern_attribute *pattern_attr,
struct adpt_rs_compile_data *literal_cd,
struct adpt_rs_compile_data *regex_cd,
size_t *n_pattern)
{
long long pattern_idx = 0;
size_t literal_idx = 0;
size_t regex_idx = 0;
struct bool_expr *bool_exprs = ALLOC(struct bool_expr, n_rule);
/* populate adpt_rs_compile_data and bool_expr */
for (size_t i = 0; i < n_rule; i++) {
for (size_t j = 0; j < rules[i].n_patterns; j++) {
pattern_attr[pattern_idx].pattern_id = pattern_idx;
pattern_attr[pattern_idx].match_mode = rules[i].patterns[j].match_mode;
pattern_attr[pattern_idx].pattern_len = rules[i].patterns[j].pat_len;
if (pattern_attr[pattern_idx].match_mode == EXPR_MATCH_MODE_SUB ||
pattern_attr[pattern_idx].match_mode == EXPR_MATCH_MODE_EXACTLY) {
pattern_attr[pattern_idx].offset.start = rules[i].patterns[j].start_offset;
pattern_attr[pattern_idx].offset.end = rules[i].patterns[j].end_offset;
}
/* literal pattern */
if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) {
populate_compile_data(literal_cd, literal_idx, pattern_idx,
rules[i].patterns[j].pat, rules[i].patterns[j].pat_len,
rules[i].patterns[j].case_sensitive);
literal_idx++;
} else {
/* regex pattern */
populate_compile_data(regex_cd, regex_idx, pattern_idx,
rules[i].patterns[j].pat, rules[i].patterns[j].pat_len,
rules[i].patterns[j].case_sensitive);
regex_idx++;
}
bool_exprs[i].items[j].item_id = pattern_idx++;
bool_exprs[i].items[j].not_flag = 0;
}
bool_exprs[i].expr_id = rules[i].expr_id;
bool_exprs[i].item_num = rules[i].n_patterns;
bool_exprs[i].user_tag = rules[i].tag;
}
*n_pattern = pattern_idx;
return bool_exprs;
}
UT_icd ut_rs_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL};
void *adapter_rs_new(struct expr_rule *rules, size_t n_rule,
size_t n_literal_pattern, size_t n_regex_pattern,
size_t n_worker_thread, struct log_handle *logger)
{
/* get the sum of pattern */
size_t i = 0;
struct adpt_rs_compile_data *literal_cd = NULL;
struct adpt_rs_compile_data *regex_cd = NULL;
if (n_literal_pattern > 0) {
literal_cd = adpt_rs_compile_data_new(n_literal_pattern);
}
if (n_regex_pattern > 0) {
regex_cd = adpt_rs_compile_data_new(n_regex_pattern);
}
size_t pattern_cnt = n_literal_pattern + n_regex_pattern;
struct adapter_rs *rs_inst = ALLOC(struct adapter_rs, 1);
rs_inst->rs_attr = ALLOC(struct pattern_attribute, pattern_cnt);
rs_inst->logger = logger;
rs_inst->n_worker_thread = n_worker_thread;
rs_inst->n_expr = n_rule;
struct bool_expr *bool_exprs = bool_exprs_new(rules, n_rule, rs_inst->rs_attr,
literal_cd, regex_cd, &pattern_cnt);
if (NULL == bool_exprs) {
return NULL;
}
rs_inst->n_patterns = pattern_cnt;
/* create bool matcher */
size_t mem_size = 0;
int rs_ret = 0;
rs_inst->rs_rt = ALLOC(struct adapter_rs_runtime, 1);
//rs_rt->bm
rs_inst->rs_rt->bm = bool_matcher_new(bool_exprs, n_rule, &mem_size);
if (rs_inst->rs_rt->bm != NULL) {
log_info(logger, MODULE_ADAPTER_RS,
"Adapter_rs module: build bool matcher of %zu expressions"
" with %zu bytes memory", n_rule, mem_size);
} else {
log_error(logger, MODULE_ADAPTER_RS,
"[%s:%d] Adapter_rs module: build bool matcher failed",
__FUNCTION__, __LINE__);
rs_ret = -1;
}
FREE(bool_exprs);
/* build rs database rs_rt->literal_db & rs_rt->regex_db */
int ret = adpt_rs_build_database(rs_inst->rs_rt, n_worker_thread,
literal_cd, regex_cd, logger);
if (ret < 0) {
rs_ret = -1;
}
if (literal_cd != NULL) {
adpt_rs_compile_data_free(literal_cd);
}
if (regex_cd != NULL) {
adpt_rs_compile_data_free(regex_cd);
}
if (rs_ret < 0) {
goto error;
}
/* alloc scratch */
rs_inst->rs_rt->bool_match_buffs = ALLOC(struct bool_expr_match *, n_worker_thread);
for (i = 0; i < n_worker_thread; i++) {
rs_inst->rs_rt->bool_match_buffs[i] = ALLOC(struct bool_expr_match, MAX_HIT_EXPR_NUM);
}
rs_inst->rs_rt->streams = ALLOC(struct adapter_rs_stream *, n_worker_thread);
for (i = 0; i < n_worker_thread; i++) {
rs_inst->rs_rt->streams[i] = (struct adapter_rs_stream *)adapter_rs_stream_open(rs_inst, i);
}
rs_inst->rs_rt->matched_pats = ALLOC(struct matched_pattern *, n_worker_thread);
for (i = 0; i < n_worker_thread; i++) {
rs_inst->rs_rt->matched_pats[i] = ALLOC(struct matched_pattern, 1);
rs_inst->rs_rt->matched_pats[i]->ref_rs_attr = rs_inst->rs_attr;
rs_inst->rs_rt->matched_pats[i]->n_patterns = rs_inst->n_patterns;
utarray_new(rs_inst->rs_rt->matched_pats[i]->pattern_ids, &ut_rs_pattern_id_icd);
utarray_reserve(rs_inst->rs_rt->matched_pats[i]->pattern_ids, MAX_HIT_PATTERN_NUM);
}
return rs_inst;
error:
adapter_rs_free(rs_inst);
return NULL;
}
void adapter_rs_free(void *rs_instance)
{
if (NULL == rs_instance) {
return;
}
size_t i = 0;
struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance;
if (rs_inst->rs_rt != NULL) {
if (rs_inst->rs_rt->literal_db != NULL) {
rs_free_database(rs_inst->rs_rt->literal_db);
rs_inst->rs_rt->literal_db = NULL;
}
if (rs_inst->rs_rt->regex_db != NULL) {
rs_free_database(rs_inst->rs_rt->regex_db);
rs_inst->rs_rt->regex_db = NULL;
}
if (rs_inst->rs_rt->bool_match_buffs != NULL) {
for (i = 0; i < rs_inst->n_worker_thread; i++) {
if (rs_inst->rs_rt->bool_match_buffs[i] != NULL) {
FREE(rs_inst->rs_rt->bool_match_buffs[i]);
}
}
FREE(rs_inst->rs_rt->bool_match_buffs);
}
if (rs_inst->rs_rt->bm != NULL) {
bool_matcher_free(rs_inst->rs_rt->bm);
rs_inst->rs_rt->bm = NULL;
}
if (rs_inst->rs_rt->streams != NULL) {
for (i = 0; i < rs_inst->n_worker_thread; i++) {
if (rs_inst->rs_rt->streams[i] != NULL) {
adapter_rs_stream_close(rs_inst->rs_rt->streams[i]);
rs_inst->rs_rt->streams[i] = NULL;
}
}
FREE(rs_inst->rs_rt->streams);
}
if (rs_inst->rs_rt->matched_pats != NULL) {
for (i = 0; i < rs_inst->n_worker_thread; i++) {
if (rs_inst->rs_rt->matched_pats[i] != NULL) {
utarray_free(rs_inst->rs_rt->matched_pats[i]->pattern_ids);
FREE(rs_inst->rs_rt->matched_pats[i]);
}
}
FREE(rs_inst->rs_rt->matched_pats);
}
FREE(rs_inst->rs_rt);
}
if (rs_inst->rs_attr != NULL) {
FREE(rs_inst->rs_attr);
}
FREE(rs_inst);
}
static inline int compare_pattern_id(const void *a, const void *b)
{
long long ret = *(const unsigned long long *)a - *(const unsigned long long *)b;
if (ret == 0) {
return 0;
} else if(ret < 0) {
return -1;
} else {
return 1;
}
}
/**
* @param id: pattern id
*/
static int matched_event_cb(unsigned int id, int pos_offset, int from, int to,
size_t data_len, void *ctx)
{
// put id in set
unsigned long long pattern_id = id;
struct matched_pattern *matched_pat = (struct matched_pattern *)ctx;
if (pattern_id > matched_pat->n_patterns || id < 0) {
return 0;
}
if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) {
return 0;
}
// duplicate pattern_id
if (utarray_find(matched_pat->pattern_ids, &pattern_id, compare_pattern_id)) {
return 0;
}
int ret = 0;
struct pattern_attribute pat_attr = matched_pat->ref_rs_attr[id];
switch (pat_attr.match_mode) {
case EXPR_MATCH_MODE_EXACTLY:
if (0 == (from + pos_offset) && (int)data_len == (to + pos_offset)) {
ret = 1;
}
break;
case EXPR_MATCH_MODE_SUB:
if (pat_attr.offset.start == -1 &&
pat_attr.offset.end == -1) {
ret = 1;
break;
}
if (pat_attr.offset.start == -1) {
if ((long long)(to + pos_offset - 1) <= pat_attr.offset.end) {
ret = 1;
break;
}
}
if (pat_attr.offset.end == -1) {
if ((long long)(from + pos_offset) >= pat_attr.offset.start) {
ret = 1;
break;
}
}
if ((long long)(from + pos_offset) >= pat_attr.offset.start &&
(long long)(to + pos_offset - 1) <= pat_attr.offset.end) {
ret = 1;
}
break;
case EXPR_MATCH_MODE_PREFIX:
if (0 == (from + pos_offset)) {
ret = 1;
}
break;
case EXPR_MATCH_MODE_SUFFIX:
if ((to + pos_offset) == (int)data_len) {
ret = 1;
}
break;
default:
break;
}
if (1 == ret) {
utarray_push_back(matched_pat->pattern_ids, &pattern_id);
utarray_sort(matched_pat->pattern_ids, compare_pattern_id);
}
return 0;
}
void *adapter_rs_stream_open(void *rs_instance, int thread_id)
{
if (NULL == rs_instance || thread_id < 0) {
return NULL;
}
struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance;
struct adapter_rs_stream *rs_stream = ALLOC(struct adapter_rs_stream, 1);
rs_stream->logger = rs_inst->logger;
rs_stream->thread_id = thread_id;
rs_stream->ref_rs_rt = rs_inst->rs_rt;
int err_count = 0;
if (rs_inst->rs_rt->literal_db != NULL) {
rs_stream->literal_stream = rs_open_stream(rs_inst->rs_rt->literal_db, 0, 128);
if (NULL == rs_stream->literal_stream) {
log_error(rs_inst->logger, MODULE_ADAPTER_RS, "rs_open_stream failed");
err_count++;
}
}
if (rs_inst->rs_rt->regex_db != NULL) {
rs_stream->regex_stream = rs_open_stream(rs_inst->rs_rt->regex_db, 0, 128);
if (NULL == rs_stream->regex_stream) {
log_error(rs_inst->logger, MODULE_ADAPTER_RS, "rs_open_stream failed");
err_count++;
}
}
if (err_count > 0) {
goto error;
}
return rs_stream;
error:
if (rs_stream->literal_stream != NULL) {
rs_close_stream(rs_stream->literal_stream);
rs_stream->literal_stream = NULL;
}
if (rs_stream->regex_stream != NULL) {
rs_close_stream(rs_stream->regex_stream);
rs_stream->regex_stream = NULL;
}
FREE(rs_stream);
return NULL;
}
void adapter_rs_stream_close(void *rs_stream)
{
if (NULL == rs_stream) {
return;
}
struct adapter_rs_stream *stream = (struct adapter_rs_stream *)rs_stream;
if (stream->ref_rs_rt != NULL) {
if (stream->literal_stream != NULL) {
rs_close_stream(stream->literal_stream);
stream->literal_stream = NULL;
}
if (stream->regex_stream != NULL) {
rs_close_stream(stream->regex_stream);
stream->regex_stream = NULL;
}
}
/* rs_stream->rs_rt point to rs_instance->rs_rt which will call free
same as rs_attr */
stream->ref_rs_rt = NULL;
FREE(stream);
}
int adapter_rs_scan_stream(void *rs_stream, const char *data, size_t data_len,
struct expr_scan_result *results, size_t n_result,
size_t *n_hit_result)
{
if (NULL == rs_stream || NULL == data || 0 == data_len ||
NULL == results || 0 == n_result || NULL == n_hit_result) {
return -1;
}
int ret = 0, err_count = 0;
struct adapter_rs_stream *stream = (struct adapter_rs_stream *)rs_stream;
int thread_id = stream->thread_id;
struct adapter_rs_runtime *rs_rt = stream->ref_rs_rt;
struct matched_pattern *matched_pat = rs_rt->matched_pats[thread_id];
if (stream->literal_stream != NULL) {
ret = rs_scan_stream(stream->literal_stream, data, data_len,
matched_event_cb, matched_pat);
if (ret < 0) {
err_count++;
}
}
if (stream->regex_stream != NULL) {
ret = rs_scan_stream(stream->regex_stream, data, data_len,
matched_event_cb, matched_pat);
if (ret < 0) {
err_count++;
}
}
if (err_count == 2) {
return -1;
}
size_t n_pattern_id = utarray_len(matched_pat->pattern_ids);
if (0 == n_pattern_id) {
*n_hit_result = 0;
return 0;
}
unsigned long long pattern_ids[n_pattern_id];
for (size_t i = 0; i < n_pattern_id; i++) {
pattern_ids[i] = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i);
}
struct bool_expr_match *bool_matcher_results = rs_rt->bool_match_buffs[thread_id];
int bool_matcher_ret = bool_matcher_match(rs_rt->bm, pattern_ids, n_pattern_id,
bool_matcher_results, MAX_HIT_EXPR_NUM);
if (bool_matcher_ret < 0) {
ret = -1;
goto next;
}
if (bool_matcher_ret > (int)n_result) {
bool_matcher_ret = n_result;
}
for (int index = 0; index < bool_matcher_ret; index++) {
results[index].rule_id = bool_matcher_results[index].expr_id;
results[index].user_tag = bool_matcher_results[index].user_tag;
}
*n_hit_result = bool_matcher_ret;
next:
utarray_clear(matched_pat->pattern_ids);
return ret;
}
int adapter_rs_scan(void *rs_instance, int thread_id, const char *data, size_t data_len,
struct expr_scan_result *results, size_t n_result, size_t *n_hit_result)
{
if (NULL == rs_instance || NULL == data || (0 == data_len) ||
NULL == results || 0 == n_result || NULL == n_hit_result) {
return -1;
}
int ret = 0, err_count = 0;
struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance;
struct adapter_rs_runtime *rs_rt = rs_inst->rs_rt;
struct matched_pattern *matched_pat = rs_rt->matched_pats[thread_id];
if (rs_rt->literal_db != NULL) {
ret = rs_scan(rs_rt->literal_db, thread_id, data, data_len,
0, matched_event_cb, matched_pat);
if (ret < 0) {
err_count++;
}
}
if (rs_rt->regex_db != NULL) {
ret = rs_scan(rs_rt->regex_db, thread_id, data, data_len,
0, matched_event_cb, matched_pat);
if (ret < 0) {
err_count++;
}
}
if (err_count == 2) {
return -1;
}
size_t n_pattern_id = utarray_len(matched_pat->pattern_ids);
if (0 == n_pattern_id) {
*n_hit_result = 0;
return 0;
}
unsigned long long pattern_ids[n_pattern_id];
for (size_t i = 0; i < n_pattern_id; i++) {
pattern_ids[i] = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i);
}
struct bool_expr_match *bool_matcher_results = rs_rt->bool_match_buffs[thread_id];
int bool_matcher_ret = bool_matcher_match(rs_rt->bm, pattern_ids, n_pattern_id,
bool_matcher_results, MAX_HIT_EXPR_NUM);
if (bool_matcher_ret < 0) {
ret = -1;
goto next;
}
if (bool_matcher_ret > (int)n_result) {
bool_matcher_ret = n_result;
}
for (int index = 0; index < bool_matcher_ret; index++) {
results[index].rule_id = bool_matcher_results[index].expr_id;
results[index].user_tag = bool_matcher_results[index].user_tag;
}
*n_hit_result = bool_matcher_ret;
next:
utarray_clear(matched_pat->pattern_ids);
return ret;
}

View File

@@ -0,0 +1,78 @@
/*
**********************************************************************************************
* File: adapter_rs.h
* Description:
* Authors: Liu wentan <liuwentan@geedgenetworks.com>
* Date: 2023-06-30
* Copyright: (c) Since 2022 Geedge Networks, Ltd. All rights reserved.
***********************************************************************************************
*/
#ifndef _ADAPTER_RS_H_
#define _ADAPTER_RS_H_
#ifdef __cplusplus
extern "C"
{
#endif
#include <stddef.h>
#include "log/log.h"
#include "../expr_matcher.h"
int adapter_rs_verify_regex_expression(const char *regex_expr,
struct log_handle *logger);
/**
* @brief new adapter_rs instance
*
* @param rules: logic AND expression's array
* @param n_rule: the number of logic AND expression's array
* @param n_worker_threads: the number of scan threads which will call adapter_rs_scan()
*
* @retval the pointer to adapter_rs instance
*/
void *adapter_rs_new(struct expr_rule *rules, size_t n_rule,
size_t n_literal_pattern, size_t n_regex_pattern,
size_t n_worker_thread, struct log_handle *logger);
void adapter_rs_free(void *rs_instance);
/**
* @brief scan input data to match logic AND expression, return all matched expr_id
*
* @param rs_instance: adapter_rs instance obtained by adapter_rs_new()
* @param thread_id: the thread_id of caller
* @param scan_data: data to be scanned
* @param data_len: the length of data to be scanned
* @param result_array: the array to store hit expr_id which allocated by caller
* @param n_result_array: number of elements in array of expr_id
*/
int adapter_rs_scan(void *rs_instance, int thread_id,
const char *scan_data, size_t data_len,
struct expr_scan_result *result_array,
size_t n_result_array, size_t *n_hit_results);
/**
* @brief
*/
void *adapter_rs_stream_open(void *rs_instance, int thread_id);
/**
* @brief
*/
int adapter_rs_scan_stream(void *rs_stream, const char *scan_data,
size_t data_len, struct expr_scan_result *result_array,
size_t n_result_array, size_t *n_hit_results);
/**
* @brief
*/
void adapter_rs_stream_close(void *rs_stream);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,235 @@
/*
**********************************************************************************************
* File: expr_matcher.cpp
* Description:
* Authors: Liu wentan <liuwentan@geedgenetworks.com>
* Date: 2023-06-30
* Copyright: (c) Since 2023 Geedge Networks, Ltd. All rights reserved.
***********************************************************************************************
*/
#include <unistd.h>
#include <assert.h>
#include <sys/syscall.h>
#include "log/log.h"
#include "expr_matcher.h"
#include "maat_utils.h"
#include "adapter_hs/adapter_hs.h"
#include "adapter_rs/adapter_rs.h"
pid_t expr_matcher_gettid()
{
return syscall(SYS_gettid);
}
static const char *expr_matcher_module_name_str(const char *name)
{
static __thread char module[64];
snprintf(module, sizeof(module), "%s(%d)", name, expr_matcher_gettid());
return module;
}
#define MODULE_EXPR_MATCHER expr_matcher_module_name_str("maat.expr_matcher")
struct expr_matcher {
enum expr_engine_type engine_type;
void *engine;
struct log_handle *logger;
};
struct expr_matcher_stream {
enum expr_engine_type engine_type;
void *handle;
};
struct expr_engine_operations {
enum expr_engine_type type;
void *(*engine_new)(struct expr_rule *rules, size_t n_rule,
size_t n_literal_pattern, size_t n_regex_pattern,
size_t n_worker_thread, struct log_handle *logger);
void (*engine_free)(void *engine);
int (*engine_scan)(void *engine, int thread_id, const char *scan_data,
size_t data_len, struct expr_scan_result *result_array,
size_t n_result_array, size_t *n_hit_result);
void *(*engine_stream_open)(void *engine, int thread_id);
void (*engine_stream_close)(void *stream);
int (*engine_scan_stream)(void *stream, const char *scan_data, size_t data_len,
struct expr_scan_result *result_array, size_t n_result_array,
size_t *n_hit_result);
};
struct expr_engine_operations expr_engine_ops[EXPR_ENGINE_TYPE_MAX] = {
{
.type = EXPR_ENGINE_TYPE_HS,
.engine_new = adapter_hs_new,
.engine_free = adapter_hs_free,
.engine_scan = adapter_hs_scan,
.engine_stream_open = adapter_hs_stream_open,
.engine_stream_close = adapter_hs_stream_close,
.engine_scan_stream = adapter_hs_scan_stream
},
{
.type = EXPR_ENGINE_TYPE_RS,
.engine_new = adapter_rs_new,
.engine_free = adapter_rs_free,
.engine_scan = adapter_rs_scan,
.engine_stream_open = adapter_rs_stream_open,
.engine_stream_close = adapter_rs_stream_close,
.engine_scan_stream = adapter_rs_scan_stream
}
};
int expr_matcher_verify_regex_expression(const char *regex_expr,
struct log_handle *logger)
{
int ret = adapter_hs_verify_regex_expression(regex_expr, logger);
if (ret == 0) {
return 0;
}
return adapter_rs_verify_regex_expression(regex_expr, logger);
}
struct expr_matcher *
expr_matcher_new(struct expr_rule *rules, size_t n_rule, enum expr_engine_type engine_type,
size_t n_worker_thread, struct log_handle *logger)
{
if (NULL == rules || 0 == n_rule || 0 == n_worker_thread ||
(engine_type != EXPR_ENGINE_TYPE_HS && engine_type != EXPR_ENGINE_TYPE_RS)) {
log_error(logger, MODULE_EXPR_MATCHER, "[%s:%d]engine type:%d is illegal",
__FUNCTION__, __LINE__, engine_type);
return NULL;
}
size_t i = 0, j = 0;
size_t literal_pat_num = 0;
size_t regex_pat_num = 0;
for (i = 0; i < n_rule; i++) {
if (rules[i].n_patterns > MAX_EXPR_PATTERN_NUM) {
log_error(logger, MODULE_EXPR_MATCHER,
"[%s:%d] the number of patterns in one expression should less than"
" %d", __FUNCTION__, __LINE__, MAX_EXPR_PATTERN_NUM);
return NULL;
}
for (j = 0; j < rules[i].n_patterns; j++) {
/* pat_len should not 0 */
if (0 == rules[i].patterns[j].pat_len) {
log_error(logger, MODULE_EXPR_MATCHER,
"[%s:%d] expr pattern length should not 0",
__FUNCTION__, __LINE__);
return NULL;
}
if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) {
literal_pat_num++;
} else {
regex_pat_num++;
}
}
}
if (0 == literal_pat_num && 0 == regex_pat_num) {
log_error(logger, MODULE_EXPR_MATCHER,
"[%s:%d] exprs has no valid pattern", __FUNCTION__, __LINE__);
return NULL;
}
void *engine = expr_engine_ops[engine_type].engine_new(rules, n_rule, literal_pat_num,
regex_pat_num, n_worker_thread,
logger);
if (NULL == engine) {
log_error(logger, MODULE_EXPR_MATCHER,
"[%s:%d]expr_matcher engine_new failed.", __FUNCTION__, __LINE__);
return NULL;
}
struct expr_matcher *matcher = ALLOC(struct expr_matcher, 1);
matcher->engine_type = engine_type;
matcher->engine = engine;
matcher->logger = logger;
return matcher;
}
void expr_matcher_free(struct expr_matcher *matcher)
{
if (NULL == matcher) {
return;
}
if (matcher->engine != NULL) {
expr_engine_ops[matcher->engine_type].engine_free(matcher->engine);
matcher->engine = NULL;
}
FREE(matcher);
}
int expr_matcher_match(struct expr_matcher *matcher, int thread_id, const char *scan_data,
size_t data_len, struct expr_scan_result *result_array,
size_t n_result_array, size_t *n_hit_results)
{
if (NULL == matcher || thread_id < 0 || NULL == scan_data || 0 == data_len
|| NULL == result_array || 0 == n_result_array || NULL == n_hit_results) {
return -1;
}
return expr_engine_ops[matcher->engine_type].engine_scan(matcher->engine, thread_id,
scan_data, data_len, result_array,
n_result_array, n_hit_results);
}
struct expr_matcher_stream *
expr_matcher_stream_open(struct expr_matcher *matcher, int thread_id)
{
if (NULL == matcher || thread_id < 0) {
return NULL;
}
void *s_handle = expr_engine_ops[matcher->engine_type].engine_stream_open(matcher->engine,
thread_id);
if (NULL == s_handle) {
log_error(matcher->logger, MODULE_EXPR_MATCHER,
"[%s:%d] expr_matcher engine_stream_open failed.",
__FUNCTION__, __LINE__);
return NULL;
}
struct expr_matcher_stream *stream = ALLOC(struct expr_matcher_stream, 1);
stream->engine_type = matcher->engine_type;
stream->handle = s_handle;
return stream;
}
int expr_matcher_stream_match(struct expr_matcher_stream *stream, const char *scan_data,
size_t data_len, struct expr_scan_result *result_array,
size_t n_result_array, size_t *n_hit_results)
{
if (NULL == stream || NULL == scan_data || 0 == data_len || NULL == result_array
|| 0 == n_result_array || NULL == n_hit_results) {
return -1;
}
return expr_engine_ops[stream->engine_type].engine_scan_stream(stream->handle, scan_data,
data_len, result_array,
n_result_array, n_hit_results);
}
void expr_matcher_stream_close(struct expr_matcher_stream *stream)
{
if (NULL == stream) {
return;
}
if (stream->handle != NULL) {
expr_engine_ops[stream->engine_type].engine_stream_close(stream->handle);
stream->handle = NULL;
}
FREE(stream);
}

View File

@@ -0,0 +1,134 @@
/*
**********************************************************************************************
* File: expr_matcher.h
* Description:
* Authors: Liu wentan <liuwentan@geedgenetworks.com>
* Date: 2023-06-30
* Copyright: (c) Since 2023 Geedge Networks, Ltd. All rights reserved.
***********************************************************************************************
*/
#ifndef _EXPR_MATCHER_H_
#define _EXPR_MATCHER_H_
#ifdef __cplusplus
extern "C"
{
#endif
#include <stddef.h>
#include "log/log.h"
#define MAX_EXPR_PATTERN_NUM 8 /* 每条与表达式最多由MAX_EXPR_ITEM_NUM个规则组成 */
#define MAX_HIT_EXPR_NUM 1024
enum expr_engine_type {
EXPR_ENGINE_TYPE_HS = 0, /* default engine */
EXPR_ENGINE_TYPE_RS,
EXPR_ENGINE_TYPE_MAX
};
enum expr_pattern_type {
EXPR_PATTERN_TYPE_STR = 0, /* pure literal string */
EXPR_PATTERN_TYPE_REG = 1, /* regex expression */
};
enum expr_case_sensitive {
EXPR_CASE_INSENSITIVE = 0,
EXPR_CASE_SENSITIVE
};
enum expr_match_mode {
EXPR_MATCH_MODE_INVALID = -1,
EXPR_MATCH_MODE_EXACTLY = 1, /* scan data must match pattern exactly */
EXPR_MATCH_MODE_PREFIX, /* pattern must in the head of scan_data */
EXPR_MATCH_MODE_SUFFIX, /* pattern must in the tail of scan_data */
EXPR_MATCH_MODE_SUB /* pattern must in the range[l_offset, r_offset] of scan_data */
};
struct expr_pattern {
enum expr_pattern_type type;
enum expr_match_mode match_mode;
enum expr_case_sensitive case_sensitive;
/*
* just match in scan_data's range of [start_offset, end_offset], -1 means no limits
* for example:
* [-1, end_offset] means the pattern must in scan_data's [0 ~ start_offset]
* [start_offset, -1] means the pattern must in scan_data's [start_offset ~ data_end]
*/
int start_offset;
int end_offset;
char *pat;
size_t pat_len;
};
struct expr_scan_result {
long long rule_id;
void *user_tag;
};
/* logic AND expression, such as (rule1 & rule2) */
struct expr_rule {
long long expr_id; /* AND expression ID */
size_t n_patterns;
struct expr_pattern patterns[MAX_EXPR_PATTERN_NUM];
void *tag; /* user defined data, return with hit result */
};
int expr_matcher_verify_regex_expression(const char *regex_expr,
struct log_handle *logger);
/**
* @brief new expr matcher instance
*
* @param expr_array: logic AND expression's array
* @param n_expr_array: the number of logic AND expression's array
* @param n_worker_threads: the number of scan threads which will call adapter_rs_scan()
*
*/
struct expr_matcher *
expr_matcher_new(struct expr_rule *rules, size_t n_rule, enum expr_engine_type type,
size_t n_worker_thread, struct log_handle *logger);
void expr_matcher_free(struct expr_matcher *matcher);
/**
* @brief scan input data to match logic AND expression, return all matched expr_id
*
* @param matcher: expr_matcher instance obtained by expr_matcher_new()
* @param thread_id: the thread_id of caller
* @param scan_data: data to be scanned
* @param data_len: the length of data to be scanned
* @param result_array: the array to store hit expr_id which allocated by caller
* @param n_result_array: number of elements in array of expr_id
*/
int expr_matcher_match(struct expr_matcher *matcher, int thread_id, const char *scan_data,
size_t data_len, struct expr_scan_result *result_array,
size_t n_result_array, size_t *n_hit_results);
/**
* @brief
*/
struct expr_matcher_stream *
expr_matcher_stream_open(struct expr_matcher *matcher, int thread_id);
/**
* @brief
*/
int expr_matcher_stream_match(struct expr_matcher_stream *stream, const char *scan_data,
size_t data_len, struct expr_scan_result *result_array,
size_t n_result_array, size_t *n_hit_results);
/**
* @brief
*/
void expr_matcher_stream_close(struct expr_matcher_stream *stream);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -23,13 +23,15 @@ include_directories(/opt/MESA/include/MESA/)
include_directories(${PROJECT_SOURCE_DIR}/include/)
include_directories(${PROJECT_SOURCE_DIR}/deps/)
include_directories(${PROJECT_SOURCE_DIR}/scanner)
include_directories(${PROJECT_SOURCE_DIR}/scanner/adapter_hs)
include_directories(${PROJECT_SOURCE_DIR}/scanner/fqdn_engine)
include_directories(${PROJECT_SOURCE_DIR}/scanner/bool_matcher)
include_directories(${PROJECT_SOURCE_DIR}/scanner/ip_matcher)
include_directories(${PROJECT_SOURCE_DIR}/scanner/flag_matcher)
include_directories(${PROJECT_SOURCE_DIR}/scanner/interval_matcher)
include_directories(${PROJECT_SOURCE_DIR}/src/inc_internal)
include_directories(${PROJECT_SOURCE_DIR}/scanner/expr_matcher)
include_directories(${PROJECT_SOURCE_DIR}/scanner/expr_matcher/adapter_hs)
include_directories(${PROJECT_SOURCE_DIR}/scanner/expr_matcher/adapter_rs)
# Static Library Output
add_library(maat_frame_static STATIC ${MAAT_SRC} ${LIB_SOURCE_FILES})

View File

@@ -52,14 +52,13 @@ long long expr_runtime_get_version(void *expr_runtime);
int expr_runtime_scan(struct expr_runtime *expr_rt, int thread_id, const char *data,
size_t data_len, int vtable_id, struct maat_state *state);
struct adapter_hs_stream *expr_runtime_stream_open(struct expr_runtime *expr_rt, int thread_id);
struct expr_matcher_stream *expr_runtime_stream_open(struct expr_runtime *expr_rt, int thread_id);
int expr_runtime_stream_scan(struct expr_runtime *expr_rt, struct adapter_hs_stream *s_handle,
const char *data, size_t data_len,
int vtable_id, struct maat_state *state);
int expr_runtime_stream_scan(struct expr_runtime *expr_rt, struct expr_matcher_stream *s_handle,
const char *data, size_t data_len, int vtable_id, struct maat_state *state);
void expr_runtime_stream_close(struct expr_runtime *expr_rt, int thread_id,
struct adapter_hs_stream *s_handle);
struct expr_matcher_stream *stream);
int expr_runtime_set_scan_district(struct expr_runtime *expr_rt, const char *district,
size_t district_len, long long *district_id);

View File

@@ -128,6 +128,7 @@ struct maat_options {
int rule_effect_interval_ms;
int rule_update_checking_interval_ms;
enum maat_expr_engine expr_engine;
enum data_source input_mode;
union {
struct source_iris_ctx iris_ctx;

View File

@@ -19,6 +19,7 @@ extern "C"
#include <stddef.h>
#include <cJSON/cJSON.h>
#include "maat.h"
#include "log/log.h"
#include "maat_garbage_collection.h"
@@ -47,7 +48,8 @@ struct table_manager;
struct table_manager *
table_manager_create(const char *table_info_path, const char *accept_tags,
struct maat_garbage_bin *garbage_bin, struct log_handle *logger);
enum maat_expr_engine expr_engine, struct maat_garbage_bin *garbage_bin,
struct log_handle *logger);
int table_manager_runtime_create(struct table_manager *tbl_mgr, size_t max_thread_num,
struct maat_garbage_bin *garbage_bin);
@@ -67,6 +69,8 @@ int table_manager_get_group2group_table_id(struct table_manager *tbl_mgr);
int table_manager_get_valid_column(struct table_manager *tbl_mgr, int table_id);
enum maat_expr_engine table_manager_get_expr_engine(struct table_manager *tbl_mgr);
size_t table_manager_accept_tags_count(struct table_manager *tbl_mgr);
int table_manager_accept_tags_match(struct table_manager *tbl_mgr, const char *tags);

View File

@@ -51,7 +51,7 @@ enum district_flag {
struct maat_stream {
struct maat *ref_maat_inst;
struct adapter_hs_stream *handle; //each physical table open one stream
struct expr_matcher_stream *handle; //each physical table open one stream
long long last_full_version;
long long expr_rt_version;
struct log_handle *logger;
@@ -70,6 +70,7 @@ struct maat_options* maat_options_new(void)
options->rule_update_checking_interval_ms = 1 * 1000;
options->gc_timeout_ms = 10 * 1000;
options->input_mode = DATA_SOURCE_NONE;
options->expr_engine = MAAT_EXPR_ENGINE_HS;
options->log_level = 0;
return options;
@@ -254,6 +255,19 @@ int maat_options_set_stat_file(struct maat_options *opts, const char *stat_filen
return 0;
}
int maat_options_set_expr_engine(struct maat_options *opts,
enum maat_expr_engine expr_engine)
{
if (NULL == opts ||
(expr_engine != MAAT_EXPR_ENGINE_HS && expr_engine != MAAT_EXPR_ENGINE_RS)) {
return -1;
}
opts->expr_engine = expr_engine;
return 0;
}
int maat_options_set_logger(struct maat_options *opts, const char *log_path,
enum log_level level)
{
@@ -357,7 +371,8 @@ struct maat *maat_new(struct maat_options *opts, const char *table_info_path)
pthread_mutex_init(&(maat_inst->background_update_mutex), NULL);
maat_inst->tbl_mgr = table_manager_create(table_info_path, maat_inst->opts.accept_tags,
maat_inst->garbage_bin, maat_inst->logger);
maat_inst->opts.expr_engine, maat_inst->garbage_bin,
maat_inst->logger);
if (NULL == maat_inst->tbl_mgr) {
goto failed;
}
@@ -410,12 +425,7 @@ int maat_helper_verify_regex_expression(const char *regex_expr)
return 0;
}
int ret = adapter_hs_verify_regex_expression(regex_expr, NULL);
if (ret < 0) {
return 0;
} else {
return 1;
}
return expr_matcher_verify_regex_expression(regex_expr, NULL);
}
int maat_get_table_id(struct maat *maat_inst, const char *table_name)
@@ -1168,9 +1178,7 @@ int maat_scan_flag(struct maat *maat_inst, int table_id,
return MAAT_SCAN_ERR;
}
maat_runtime_ref_inc(maat_rt, state->thread_id);
alignment_int64_array_add(maat_inst->stat->thread_call_cnt, state->thread_id, 1);
int hit_group_cnt = flag_scan(maat_inst->tbl_mgr, state->thread_id, flag,
phy_table_id, vtable_id, state);
if (hit_group_cnt < 0) {
@@ -1178,6 +1186,8 @@ int maat_scan_flag(struct maat *maat_inst, int table_id,
return MAAT_SCAN_ERR;
}
maat_runtime_ref_inc(maat_rt, state->thread_id);
size_t sum_hit_compile_cnt = 0;
if (hit_group_cnt > 0 || scan_status_should_compile_NOT(state)) {
sum_hit_compile_cnt = group_to_compile(maat_inst, results, n_result, state);
@@ -1257,9 +1267,7 @@ int maat_scan_integer(struct maat *maat_inst, int table_id,
return MAAT_SCAN_ERR;
}
maat_runtime_ref_inc(maat_rt, state->thread_id);
alignment_int64_array_add(maat_inst->stat->thread_call_cnt, state->thread_id, 1);
int hit_group_cnt = interval_scan(maat_inst->tbl_mgr, state->thread_id, integer,
phy_table_id, vtable_id, state);
if (hit_group_cnt < 0) {
@@ -1267,6 +1275,8 @@ int maat_scan_integer(struct maat *maat_inst, int table_id,
return MAAT_SCAN_ERR;
}
maat_runtime_ref_inc(maat_rt, state->thread_id);
size_t sum_hit_compile_cnt = 0;
if (hit_group_cnt > 0 || scan_status_should_compile_NOT(state)) {
sum_hit_compile_cnt = group_to_compile(maat_inst, results, n_result, state);
@@ -1346,9 +1356,7 @@ int maat_scan_ipv4(struct maat *maat_inst, int table_id, uint32_t ip_addr,
return MAAT_SCAN_ERR;
}
maat_runtime_ref_inc(maat_rt, state->thread_id);
alignment_int64_array_add(maat_inst->stat->thread_call_cnt, state->thread_id, 1);
int hit_group_cnt = ipv4_scan(maat_inst->tbl_mgr, state->thread_id, ip_addr,
port, protocol, phy_table_id, vtable_id, state);
if (hit_group_cnt < 0) {
@@ -1356,6 +1364,8 @@ int maat_scan_ipv4(struct maat *maat_inst, int table_id, uint32_t ip_addr,
return MAAT_SCAN_ERR;
}
maat_runtime_ref_inc(maat_rt, state->thread_id);
size_t sum_hit_compile_cnt = 0;
if (hit_group_cnt > 0 || scan_status_should_compile_NOT(state)) {
sum_hit_compile_cnt = group_to_compile(maat_inst, results, n_result, state);
@@ -1436,9 +1446,7 @@ int maat_scan_ipv6(struct maat *maat_inst, int table_id,
return MAAT_SCAN_ERR;
}
maat_runtime_ref_inc(maat_rt, state->thread_id);
alignment_int64_array_add(maat_inst->stat->thread_call_cnt, state->thread_id, 1);
int hit_group_cnt = ipv6_scan(maat_inst->tbl_mgr, state->thread_id, ip_addr,
port, protocol, phy_table_id, vtable_id, state);
if (hit_group_cnt < 0) {
@@ -1446,6 +1454,8 @@ int maat_scan_ipv6(struct maat *maat_inst, int table_id,
return MAAT_SCAN_ERR;
}
maat_runtime_ref_inc(maat_rt, state->thread_id);
size_t sum_hit_compile_cnt = 0;
if (hit_group_cnt > 0 || scan_status_should_compile_NOT(state)) {
sum_hit_compile_cnt = group_to_compile(maat_inst, results, n_result, state);
@@ -1525,9 +1535,7 @@ int maat_scan_string(struct maat *maat_inst, int table_id, const char *data,
return MAAT_SCAN_ERR;
}
maat_runtime_ref_inc(maat_rt, state->thread_id);
alignment_int64_array_add(maat_inst->stat->thread_call_cnt, state->thread_id, 1);
int hit_group_cnt = string_scan(maat_inst->tbl_mgr, state->thread_id, data,
data_len, phy_table_id, vtable_id, state);
if (hit_group_cnt < 0) {
@@ -1535,6 +1543,8 @@ int maat_scan_string(struct maat *maat_inst, int table_id, const char *data,
return MAAT_SCAN_ERR;
}
maat_runtime_ref_inc(maat_rt, state->thread_id);
size_t sum_hit_compile_cnt = 0;
if (hit_group_cnt > 0 || scan_status_should_compile_NOT(state)) {
sum_hit_compile_cnt = group_to_compile(maat_inst, results, n_result, state);
@@ -1609,7 +1619,7 @@ struct maat_stream *maat_stream_new(struct maat *maat_inst, int table_id,
stream->expr_rt_version = expr_runtime_get_version(expr_rt);
maat_runtime_ref_inc(maat_inst->maat_rt, state->thread_id);
struct adapter_hs_stream *handle = expr_runtime_stream_open((struct expr_runtime *)expr_rt,
struct expr_matcher_stream *handle = expr_runtime_stream_open((struct expr_runtime *)expr_rt,
state->thread_id);
if (NULL == handle) {
goto error;

View File

@@ -140,6 +140,7 @@ struct maat_internal_hit_path {
struct maat_compile_state {
uint8_t this_scan_hit_item_flag;
uint8_t not_clause_hit_flag;
uint8_t inc_hit_path_flag;
int Nth_scan;
time_t compile_rt_version;
@@ -987,10 +988,13 @@ static inline int compare_hit_group(const void *pa, const void *pb)
struct maat_hit_group *la=(struct maat_hit_group *)pa;
struct maat_hit_group *lb=(struct maat_hit_group *)pb;
long long ret = la->group_id - lb->group_id;
long long ret = la->item_id - lb->item_id;
if (ret == 0) {
ret = la->group_id - lb->group_id;
if (ret == 0) {
ret = la->vtable_id - lb->vtable_id;
}
}
return ret;
}
@@ -1289,7 +1293,7 @@ static int maat_remove_group_from_compile(struct rcu_hash_table *hash_tbl,
if (NULL == compile) {
log_error(logger, MODULE_COMPILE,
"[%s:%d] Remove group_id:%lld from compile_id:%lld failed, compile"
" is not exisited.", __FUNCTION__, __LINE__, g2c_item->group_id,
" is not existed.", __FUNCTION__, __LINE__, g2c_item->group_id,
compile_id);
return -1;
} else {
@@ -1345,7 +1349,7 @@ static int maat_remove_group_from_compile(struct rcu_hash_table *hash_tbl,
} else {
log_error(logger, MODULE_COMPILE,
"[%s:%d] Remove group_id:%lld from compile_id:%lld failed, "
"compile is not exisited.", __FUNCTION__, __LINE__,
"compile is not existed.", __FUNCTION__, __LINE__,
g2c_item->group_id, compile_id);
return -1;
}
@@ -1376,6 +1380,7 @@ void maat_compile_state_reset(struct maat_compile_state *compile_state)
compile_state->compile_rt_version = 0;
compile_state->this_scan_hit_item_flag = 0;
compile_state->not_clause_hit_flag = 0;
compile_state->inc_hit_path_flag = 0;
utarray_clear(compile_state->internal_hit_paths);
utarray_clear(compile_state->internal_inc_hit_paths);
@@ -1541,10 +1546,14 @@ static void maat_compile_state_update_hit_path(struct maat_compile_state *compil
if (compile_state->Nth_scan != Nth_scan) {
assert(compile_state->this_scan_hit_item_flag == 0);
compile_state->Nth_scan = Nth_scan;
utarray_clear(compile_state->internal_inc_hit_paths);
utarray_clear(compile_state->this_scan_hit_clauses);
}
if (1 == compile_state->inc_hit_path_flag) {
compile_state->inc_hit_path_flag = 0;
utarray_clear(compile_state->internal_inc_hit_paths);
}
maat_compile_hit_path_add(compile_state->internal_inc_hit_paths, item_id, group_id,
vtable_id, Nth_scan, Nth_item_result);
@@ -2125,6 +2134,7 @@ size_t maat_compile_state_get_hit_groups(struct maat_compile_state *compile_stat
tmp_hit_path = compile_state->internal_hit_paths;
} else if (type == MAAT_LIST_TYPE_INC) {
tmp_hit_path = compile_state->internal_inc_hit_paths;
compile_state->inc_hit_path_flag = 1;
}
for (i = 0; i < utarray_len(tmp_hit_path); i++) {
@@ -2139,6 +2149,7 @@ size_t maat_compile_state_get_hit_groups(struct maat_compile_state *compile_stat
for (size_t idx = 0; idx < super_group_cnt; idx++) {
struct maat_hit_group hit_group;
hit_group.item_id = internal_path->item_id;
hit_group.group_id = super_group_ids[idx];
hit_group.vtable_id = internal_path->vtable_id;
if (utarray_find(all_hit_groups, &hit_group, compare_hit_group)) {

View File

@@ -60,7 +60,7 @@ struct expr_item {
long long group_id;
char keywords[MAX_KEYWORDS_STR];
enum expr_type expr_type;
enum hs_match_mode match_mode;
enum expr_match_mode match_mode;
int is_hexbin;
int is_case_sensitive;
void *user_data;
@@ -68,7 +68,7 @@ struct expr_item {
};
struct expr_runtime {
struct adapter_hs *hs;
struct expr_matcher *matcher;
struct rcu_hash_table *item_hash; // <item_id, struct expr_item>
long long version; //expr_rt version
@@ -79,6 +79,7 @@ struct expr_runtime {
struct log_handle *logger;
struct maat_garbage_bin *ref_garbage_bin;
enum maat_expr_engine expr_engine;
int district_num;
struct maat_kv_store *district_map;
struct maat_kv_store *tmp_district_map;
@@ -114,22 +115,22 @@ static enum expr_type int_to_expr_type(int expr_type)
return type;
}
static enum hs_match_mode int_to_match_mode(int match_method)
static enum expr_match_mode int_to_match_mode(int match_method)
{
enum hs_match_mode mode = HS_MATCH_MODE_INVALID;
enum expr_match_mode mode = EXPR_MATCH_MODE_INVALID;
switch (match_method) {
case 0:
mode = HS_MATCH_MODE_SUB;
mode = EXPR_MATCH_MODE_SUB;
break;
case 1:
mode = HS_MATCH_MODE_SUFFIX;
mode = EXPR_MATCH_MODE_SUFFIX;
break;
case 2:
mode = HS_MATCH_MODE_PREFIX;
mode = EXPR_MATCH_MODE_PREFIX;
break;
case 3:
mode = HS_MATCH_MODE_EXACTLY;
mode = EXPR_MATCH_MODE_EXACTLY;
break;
default:
break;
@@ -234,8 +235,8 @@ expr_item_new(struct expr_schema *expr_schema, const char *table_name,
__FUNCTION__, __LINE__, table_name, line);
goto error;
} else if (expr_item->expr_type == EXPR_TYPE_REGEX) {
ret = adapter_hs_verify_regex_expression(expr_item->keywords, expr_rt->logger);
if (ret < 0) {
ret = expr_matcher_verify_regex_expression(expr_item->keywords, expr_rt->logger);
if (0 == ret) {
log_error(expr_rt->logger, MODULE_EXPR,
"[%s:%d] expr table:<%s> regex expression(item_id:%lld):%s illegal,"
" will be dropped", __FUNCTION__, __LINE__, table_name,
@@ -277,7 +278,7 @@ expr_item_new(struct expr_schema *expr_schema, const char *table_name,
match_method_type = atoi(line + column_offset);
expr_item->match_mode = int_to_match_mode(match_method_type);
if (expr_item->match_mode == HS_MATCH_MODE_INVALID) {
if (expr_item->match_mode == EXPR_MATCH_MODE_INVALID) {
log_error(expr_rt->logger, MODULE_EXPR,
"[%s:%d] expr table:<%s> has invalid match_method in line:%s",
__FUNCTION__, __LINE__, table_name, line);
@@ -472,12 +473,14 @@ void *expr_runtime_new(void *expr_schema, size_t max_thread_num,
return NULL;
}
struct expr_schema *schema = (struct expr_schema *)expr_schema;
struct expr_runtime *expr_rt = ALLOC(struct expr_runtime, 1);
expr_rt->item_hash = rcu_hash_new(expr_item_free_cb, NULL, 0);
expr_rt->n_worker_thread = max_thread_num;
expr_rt->ref_garbage_bin = garbage_bin;
expr_rt->logger = logger;
expr_rt->expr_engine = table_manager_get_expr_engine(schema->ref_tbl_mgr);
expr_rt->district_map = maat_kv_store_new();
expr_rt->hit_cnt = alignment_int64_array_alloc(max_thread_num);
@@ -495,9 +498,9 @@ void expr_runtime_free(void *expr_runtime)
}
struct expr_runtime *expr_rt = (struct expr_runtime *)expr_runtime;
if (expr_rt->hs != NULL) {
adapter_hs_free(expr_rt->hs);
expr_rt->hs = NULL;
if (expr_rt->matcher != NULL) {
expr_matcher_free(expr_rt->matcher);
expr_rt->matcher = NULL;
}
if (expr_rt->item_hash != NULL) {
@@ -558,18 +561,18 @@ static int expr_runtime_update_row(struct expr_runtime *expr_rt, char *key,
return 0;
}
static enum hs_pattern_type expr_type2pattern_type(enum expr_type expr_type)
static enum expr_pattern_type expr_type2pattern_type(enum expr_type expr_type)
{
enum hs_pattern_type pattern_type;
enum expr_pattern_type pattern_type = EXPR_PATTERN_TYPE_STR;
switch (expr_type) {
case EXPR_TYPE_STRING:
case EXPR_TYPE_AND:
case EXPR_TYPE_OFFSET:
pattern_type = HS_PATTERN_TYPE_STR;
pattern_type = EXPR_PATTERN_TYPE_STR;
break;
case EXPR_TYPE_REGEX:
pattern_type = HS_PATTERN_TYPE_REG;
pattern_type = EXPR_PATTERN_TYPE_REG;
break;
default:
break;
@@ -686,12 +689,12 @@ static int expr_item_to_expr_rule(struct expr_item *expr_item,
}
sub_expr_cnt = i;
break;
case EXPR_TYPE_STRING:
case EXPR_TYPE_STRING: //AND/OFFSET/STRING type expression use \b to represent blank(' ')
sub_expr_cnt = 1;
sub_key_array[0] = expr_item->keywords;
sub_key_array[0] = str_unescape(sub_key_array[0]);
break;
case EXPR_TYPE_REGEX:
case EXPR_TYPE_REGEX: //only regex type expression use \s to represent blank(' ')
sub_expr_cnt = 1;
sub_key_array[0] = expr_item->keywords;
break;
@@ -710,15 +713,15 @@ static int expr_item_to_expr_rule(struct expr_item *expr_item,
if (TRUE == expr_item->is_case_sensitive) {
// insensitive
expr_rule->patterns[i].case_sensitive = HS_CASE_SENSITIVE;
expr_rule->patterns[i].case_sensitive = EXPR_CASE_SENSITIVE;
} else {
expr_rule->patterns[i].case_sensitive = HS_CASE_INSENSITIVE;
expr_rule->patterns[i].case_sensitive = EXPR_CASE_INSENSITIVE;
}
expr_rule->patterns[i].pattern_type = expr_type2pattern_type(expr_item->expr_type);
expr_rule->patterns[i].type = expr_type2pattern_type(expr_item->expr_type);
if (TRUE == expr_item->is_hexbin &&
expr_rule->patterns[i].pattern_type != HS_PATTERN_TYPE_REG) {
expr_rule->patterns[i].type != EXPR_PATTERN_TYPE_REG) {
region_str_len = strlen(sub_key_array[i]) * 8 + 1;
region_string = ALLOC(char, region_str_len);
region_str_len = hex2bin(sub_key_array[i], strlen(sub_key_array[i]),
@@ -738,13 +741,13 @@ static int expr_item_to_expr_rule(struct expr_item *expr_item,
}
expr_rule->patterns[i].match_mode = expr_item->match_mode;
if (expr_rule->patterns[i].match_mode == HS_MATCH_MODE_SUB) {
if (expr_rule->patterns[i].match_mode == EXPR_MATCH_MODE_SUB) {
expr_rule->patterns[i].start_offset = key_left_offset[i];
expr_rule->patterns[i].end_offset = key_right_offset[i];
}
}
expr_rule->expr_id = expr_item->item_id;
expr_rule->user_tag = expr_item->user_data;
expr_rule->tag = expr_item->user_data;
expr_rule->n_patterns = sub_expr_cnt;
return 0;
@@ -810,10 +813,10 @@ int expr_runtime_update(void *expr_runtime, void *expr_schema,
return 0;
}
static void garbage_adapter_hs_free(void *adapter_hs, void *arg)
static void garbage_expr_matcher_free(void *expr_matcher, void *arg)
{
struct adapter_hs *hs = (struct adapter_hs *)adapter_hs;
adapter_hs_free(hs);
struct expr_matcher *matcher = (struct expr_matcher *)expr_matcher;
expr_matcher_free(matcher);
}
int expr_runtime_commit(void *expr_runtime, const char *table_name,
@@ -864,38 +867,42 @@ int expr_runtime_commit(void *expr_runtime, const char *table_name,
}
}
struct adapter_hs *new_adapter_hs = NULL;
struct adapter_hs *old_adapter_hs = NULL;
struct expr_matcher *new_matcher = NULL;
struct expr_matcher *old_matcher = NULL;
if (rule_cnt > 0) {
new_adapter_hs = adapter_hs_new(rules, real_rule_cnt, expr_rt->n_worker_thread,
expr_rt->logger);
if (NULL == new_adapter_hs) {
enum expr_engine_type engine_type = EXPR_ENGINE_TYPE_HS;
if (expr_rt->expr_engine == MAAT_EXPR_ENGINE_RS) {
engine_type = EXPR_ENGINE_TYPE_RS;
}
new_matcher = expr_matcher_new(rules, real_rule_cnt, engine_type,
expr_rt->n_worker_thread, expr_rt->logger);
if (NULL == new_matcher) {
log_error(expr_rt->logger, MODULE_EXPR,
"[%s:%d] table[%s] rebuild adapter_hs engine failed when update"
"[%s:%d] table[%s] rebuild expr_matcher failed when update"
" %zu expr rules", __FUNCTION__, __LINE__, table_name, real_rule_cnt);
ret = -1;
} else {
log_info(expr_rt->logger, MODULE_EXPR,
"table[%s] has %zu rules, commit %zu expr rules(regex rules:%zu) "
"and rebuild adapter_hs completed, version:%lld", table_name, rule_cnt,
real_rule_cnt, real_regex_rule_cnt, maat_rt_version);
}
}
old_adapter_hs = expr_rt->hs;
expr_rt->hs = new_adapter_hs;
old_matcher = expr_rt->matcher;
expr_rt->matcher = new_matcher;
rcu_hash_commit(expr_rt->item_hash);
if (old_adapter_hs != NULL) {
maat_garbage_bagging(expr_rt->ref_garbage_bin, old_adapter_hs, NULL,
garbage_adapter_hs_free);
if (old_matcher != NULL) {
maat_garbage_bagging(expr_rt->ref_garbage_bin, old_matcher, NULL, garbage_expr_matcher_free);
}
expr_rt->rule_num = real_rule_cnt;
expr_rt->regex_rule_num = real_regex_rule_cnt;
expr_rt->version = maat_rt_version;
log_info(expr_rt->logger, MODULE_EXPR,
"table[%s] has %zu rules, commit %zu expr rules(regex rules:%zu) "
"and rebuild adapter_hs completed, version:%lld", table_name, rule_cnt,
real_rule_cnt, real_regex_rule_cnt, expr_rt->version);
if (rules != NULL) {
for (i = 0; i < rule_cnt; i++) {
expr_rule_reset(&rules[i]);
@@ -949,15 +956,14 @@ int expr_runtime_scan(struct expr_runtime *expr_rt, int thread_id,
return 0;
}
if (NULL == expr_rt->hs) {
if (NULL == expr_rt->matcher) {
return 0;
}
size_t n_hit_item = 0;
struct hs_scan_result hit_results[MAX_SCANNER_HIT_ITEM_NUM];
int ret = adapter_hs_scan(expr_rt->hs, thread_id, data, data_len,
hit_results, MAX_SCANNER_HIT_ITEM_NUM,
&n_hit_item);
struct expr_scan_result hit_results[MAX_SCANNER_HIT_ITEM_NUM];
int ret = expr_matcher_match(expr_rt->matcher, thread_id, data, data_len,
hit_results, MAX_SCANNER_HIT_ITEM_NUM, &n_hit_item);
if (ret < 0) {
return -1;
}
@@ -1000,14 +1006,15 @@ int expr_runtime_scan(struct expr_runtime *expr_rt, int thread_id,
return real_hit_item_cnt;
}
struct adapter_hs_stream *
struct expr_matcher_stream *
expr_runtime_stream_open(struct expr_runtime *expr_rt, int thread_id)
{
if (NULL == expr_rt || thread_id < 0) {
return NULL;
}
struct adapter_hs_stream *stream = adapter_hs_stream_open(expr_rt->hs, thread_id);
struct expr_matcher_stream *stream = expr_matcher_stream_open(expr_rt->matcher,
thread_id);
if (NULL == stream) {
return NULL;
}
@@ -1016,7 +1023,7 @@ expr_runtime_stream_open(struct expr_runtime *expr_rt, int thread_id)
}
int expr_runtime_stream_scan(struct expr_runtime *expr_rt,
struct adapter_hs_stream *s_handle,
struct expr_matcher_stream *s_handle,
const char *data, size_t data_len,
int vtable_id, struct maat_state *state)
{
@@ -1026,9 +1033,9 @@ int expr_runtime_stream_scan(struct expr_runtime *expr_rt,
}
size_t n_hit_item = 0;
struct hs_scan_result hit_results[MAX_SCANNER_HIT_ITEM_NUM];
struct expr_scan_result hit_results[MAX_SCANNER_HIT_ITEM_NUM];
int ret = adapter_hs_scan_stream(s_handle, data, data_len, hit_results,
int ret = expr_matcher_stream_match(s_handle, data, data_len, hit_results,
MAX_SCANNER_HIT_ITEM_NUM, &n_hit_item);
if (ret < 0) {
return -1;
@@ -1067,13 +1074,13 @@ int expr_runtime_stream_scan(struct expr_runtime *expr_rt,
}
void expr_runtime_stream_close(struct expr_runtime *expr_rt, int thread_id,
struct adapter_hs_stream *s_handle)
struct expr_matcher_stream *stream)
{
if (NULL == expr_rt || thread_id < 0 || NULL == s_handle) {
if (NULL == expr_rt || thread_id < 0 || NULL == stream) {
return;
}
adapter_hs_stream_close(s_handle);
expr_matcher_stream_close(stream);
}
void expr_runtime_hit_inc(struct expr_runtime *expr_rt, int thread_id)

View File

@@ -47,6 +47,7 @@ struct table_manager {
struct rule_tag *accept_tags;
size_t n_accept_tag;
enum maat_expr_engine expr_engine;
int default_compile_table_id;
int g2g_table_id;
struct maat_kv_store *tablename2id_map;
@@ -661,7 +662,8 @@ static int register_tablename2id(cJSON *json, struct maat_kv_store *tablename2id
struct table_manager *
table_manager_create(const char *table_info_path, const char *accept_tags,
struct maat_garbage_bin *garbage_bin, struct log_handle *logger)
enum maat_expr_engine expr_engine, struct maat_garbage_bin *garbage_bin,
struct log_handle *logger)
{
if (NULL == table_info_path) {
return NULL;
@@ -702,6 +704,7 @@ table_manager_create(const char *table_info_path, const char *accept_tags,
tbl_mgr->n_accept_tag = parse_accept_tag(accept_tags, &tbl_mgr->accept_tags, logger);
tbl_mgr->logger = logger;
tbl_mgr->tablename2id_map = maat_kv_store_new();
tbl_mgr->expr_engine = expr_engine;
tbl_mgr->ref_garbage_bin = garbage_bin;
for (int i = 0; i < json_array_size; i++) {
@@ -1018,6 +1021,15 @@ int table_manager_get_valid_column(struct table_manager *tbl_mgr, int table_id)
return tbl_mgr->tbl[table_id]->valid_column;
}
enum maat_expr_engine table_manager_get_expr_engine(struct table_manager *tbl_mgr)
{
if (NULL == tbl_mgr) {
return MAAT_EXPR_ENGINE_HS;
}
return tbl_mgr->expr_engine;
}
size_t table_manager_accept_tags_count(struct table_manager *tbl_mgr)
{
if (NULL == tbl_mgr) {

View File

@@ -3,7 +3,9 @@ include_directories(${PROJECT_SOURCE_DIR}/include)
include_directories(${PROJECT_SOURCE_DIR}/src/inc_internal)
include_directories(${PROJECT_SOURCE_DIR}/deps)
include_directories(${PROJECT_SOURCE_DIR}/scanner)
include_directories(${PROJECT_SOURCE_DIR}/scanner/adapter_hs)
include_directories(${PROJECT_SOURCE_DIR}/scanner/expr_matcher)
include_directories(${PROJECT_SOURCE_DIR}/scanner/expr_matcher/adapter_hs)
include_directories(${PROJECT_SOURCE_DIR}/scanner/expr_matcher/adapter_rs)
include_directories(${PROJECT_SOURCE_DIR}/scanner/ip_matcher)
include_directories(${PROJECT_SOURCE_DIR}/scanner/bool_matcher)
@@ -19,8 +21,8 @@ target_link_libraries(maat_framework_gtest maat_frame_static gtest_static)
add_executable(maat_framework_perf_gtest maat_framework_perf_gtest.cpp)
target_link_libraries(maat_framework_perf_gtest maat_frame_static gtest_static)
add_executable(adapter_hs_gtest adapter_hs_gtest.cpp)
target_link_libraries(adapter_hs_gtest maat_frame_static gtest_static)
add_executable(expr_matcher_gtest expr_matcher_gtest.cpp)
target_link_libraries(expr_matcher_gtest maat_frame_static gtest_static)
add_executable(ip_matcher_gtest ip_matcher_gtest.cpp)
target_link_libraries(ip_matcher_gtest maat_frame_static gtest_static)
@@ -32,6 +34,7 @@ add_executable(maat_ex_data_gtest maat_ex_data_gtest.cpp)
target_link_libraries(maat_ex_data_gtest maat_frame_static gtest_static)
add_subdirectory(group_exclude)
add_subdirectory(benchmark)
file(COPY table_info.conf DESTINATION ./)
file(COPY tsg_table_info.conf DESTINATION ./)
@@ -39,13 +42,13 @@ file(COPY file_test_tableinfo.conf DESTINATION ./)
file(COPY literal_expr.conf DESTINATION ./)
file(COPY regex_expr.conf DESTINATION ./)
file(COPY maat_json.json DESTINATION ./)
file(COPY maat_json.json DESTINATION ../tools/)
file(COPY ntcrule DESTINATION ./)
file(COPY tsgrule DESTINATION ./)
file(COPY testdata DESTINATION ./)
file(COPY test_streamfiles DESTINATION ./)
file(COPY json_update DESTINATION ./)
file(COPY group_exclude DESTINATION ./)
file(COPY benchmark DESTINATION ./)
include(GoogleTest)
gtest_discover_tests(maat_framework_gtest)

View File

@@ -1,730 +0,0 @@
#include <gtest/gtest.h>
#include "log/log.h"
#include "adapter_hs.h"
#include "maat_utils.h"
#include "cJSON/cJSON.h"
struct log_handle *g_logger = NULL;
enum hs_match_mode match_method_to_match_mode(const char *method)
{
enum hs_match_mode mode = HS_MATCH_MODE_INVALID;
if (strcmp(method, "sub") == 0) {
mode = HS_MATCH_MODE_SUB;
} else if (strcmp(method, "exactly") == 0) {
mode = HS_MATCH_MODE_EXACTLY;
} else if (strcmp(method, "prefix") == 0) {
mode = HS_MATCH_MODE_PREFIX;
} else if (strcmp(method, "suffix") == 0) {
mode = HS_MATCH_MODE_SUFFIX;
} else {
assert(0);
}
return mode;
}
enum hs_case_sensitive case_sensitive_str_to_enum(const char *str)
{
enum hs_case_sensitive case_sensitive = HS_CASE_SENSITIVE;
if (strcmp(str, "yes") == 0) {
case_sensitive = HS_CASE_SENSITIVE;
} else if (strcmp(str, "no") == 0) {
case_sensitive = HS_CASE_INSENSITIVE;
} else {
assert(0);
}
return case_sensitive;
}
int is_hexbin_str_to_int(const char *str)
{
int ret = 0;
if (strcmp(str, "yes") == 0) {
ret = 1;
}
return ret;
}
static int convertHextoint(char srctmp)
{
if (isdigit(srctmp)) {
return srctmp - '0';
} else {
char temp = toupper(srctmp);
temp = temp - 'A' + 10;
return temp;
}
}
static size_t hex2bin(char *hex, int hex_len, char *binary, size_t size)
{
size_t resultlen = 0;
int high,low;
for (int i = 0; i < hex_len && size > resultlen; i += 2, resultlen++) {
high = convertHextoint(hex[i]);
low = convertHextoint(hex[i+1]);
binary[resultlen] = high * 16 + low;
}
size = resultlen;
binary[resultlen] = '\0';
return resultlen;
}
enum hs_pattern_type pattern_type_str_to_enum(const char *str)
{
enum hs_pattern_type pattern_type;
if (strcmp(str, "regex") == 0) {
pattern_type = HS_PATTERN_TYPE_REG;
} else if (strcmp(str, "literal") == 0) {
pattern_type = HS_PATTERN_TYPE_STR;
} else {
assert(0);
}
return pattern_type;
}
int parse_config_file(const char *filename, struct expr_rule exprs[], size_t *n_expr)
{
unsigned char *json_buff = NULL;
size_t json_buff_size = 0;
int ret = load_file_to_memory(filename, &json_buff, &json_buff_size);
if (ret < 0) {
printf("load file:%s to memory failed.\n", filename);
return -1;
}
size_t rule_cnt = 0;
cJSON *rules_obj = NULL;
cJSON *root = cJSON_Parse((const char *)json_buff);
if (NULL == root) {
printf("Error before: %-200.200s\n", cJSON_GetErrorPtr());
ret = -1;
goto next;
}
rules_obj = cJSON_GetObjectItem(root, "expr_rules");
if (NULL == rules_obj) {
printf("Error before: %-200.200s\n", cJSON_GetErrorPtr());
ret = -1;
goto next;
}
rule_cnt = cJSON_GetArraySize(rules_obj);
for (size_t i = 0; i < rule_cnt; i++) {
cJSON *expr_obj = cJSON_GetArrayItem(rules_obj, i);
cJSON *tmp_item = cJSON_GetObjectItem(expr_obj, "expr_id");
if (tmp_item != NULL && tmp_item->type == cJSON_Number) {
exprs[i].expr_id = tmp_item->valueint;
}
tmp_item = cJSON_GetObjectItem(expr_obj, "pattern_num");
if (tmp_item != NULL && tmp_item->type == cJSON_Number) {
exprs[i].n_patterns = tmp_item->valueint;
}
tmp_item = cJSON_GetObjectItem(expr_obj, "patterns");
if (NULL == tmp_item || tmp_item->type != cJSON_Array) {
printf("json has no patterns array.\n");
ret = -1;
goto next;
}
size_t pattern_cnt = cJSON_GetArraySize(tmp_item);
for (size_t j = 0; j < pattern_cnt; j++) {
cJSON *pat_item = cJSON_GetArrayItem(tmp_item, j);
cJSON *item = cJSON_GetObjectItem(pat_item, "pattern_type");
if (item != NULL && item->type == cJSON_String) {
exprs[i].patterns[j].pattern_type = pattern_type_str_to_enum(item->valuestring);
}
item = cJSON_GetObjectItem(pat_item, "match_method");
if (item != NULL && item->type == cJSON_String) {
exprs[i].patterns[j].match_mode = match_method_to_match_mode(item->valuestring);
}
item = cJSON_GetObjectItem(pat_item, "case_sensitive");
if (item != NULL && item->type == cJSON_String) {
exprs[i].patterns[j].case_sensitive = case_sensitive_str_to_enum(item->valuestring);
}
item = cJSON_GetObjectItem(pat_item, "is_hexbin");
if (item != NULL && item->type == cJSON_String) {
exprs[i].patterns[j].is_hexbin = is_hexbin_str_to_int(item->valuestring);
}
item = cJSON_GetObjectItem(pat_item, "pattern");
if (item != NULL && item->type == cJSON_String) {
exprs[i].patterns[j].pat = ALLOC(char, strlen(item->valuestring) + 1);
if (exprs[i].patterns[j].is_hexbin == 1) {
size_t pat_str_len = strlen(item->valuestring) + 1;
char *pat_str = ALLOC(char, pat_str_len);
pat_str_len = hex2bin(item->valuestring, strlen(item->valuestring),
pat_str, pat_str_len);
memcpy(exprs[i].patterns[j].pat, pat_str, pat_str_len);
free(pat_str);
exprs[i].patterns[j].pat_len = pat_str_len;
} else {
memcpy(exprs[i].patterns[j].pat, item->valuestring,
strlen(item->valuestring));
exprs[i].patterns[j].pat_len = strlen(item->valuestring);
}
}
if (exprs[i].patterns->match_mode == HS_MATCH_MODE_SUB) {
item = cJSON_GetObjectItem(pat_item, "offset");
if (item != NULL && item->type == cJSON_String) {
int key_left_offset = -1;
int key_right_offset = -1;
sscanf(item->valuestring, "%d~%d", &key_left_offset, &key_right_offset);
if (key_left_offset < -1 || key_right_offset < -1) {
printf("Error: offset should not less than -1, left_offset:%d, right_offset:%d\n",
key_left_offset, key_right_offset);
}
exprs[i].patterns[j].start_offset = key_left_offset;
exprs[i].patterns[j].end_offset = key_right_offset;
} else {
exprs[i].patterns[j].start_offset = -1;
exprs[i].patterns[j].end_offset = -1;
}
}
if (exprs[i].patterns->match_mode == HS_MATCH_MODE_EXACTLY) {
exprs[i].patterns[j].start_offset = 0;
exprs[i].patterns[j].end_offset = exprs[i].patterns[j].pat_len - 1;
}
}
exprs[i].n_patterns = pattern_cnt;
}
*n_expr = rule_cnt;
next:
cJSON_Delete(root);
FREE(json_buff);
return ret;
}
void expr_array_free(struct expr_rule rules[], size_t n_rule)
{
for (size_t i = 0; i < n_rule; i++) {
for (size_t j = 0; j < rules[i].n_patterns; j++) {
if (rules[i].patterns[j].pat != NULL) {
free(rules[i].patterns[j].pat);
rules[i].patterns[j].pat = NULL;
}
}
}
}
TEST(adapter_hs_init, invalid_input_parameter)
{
struct expr_rule rules[64];
size_t n_rule = 0;
struct adapter_hs *hs_instance = adapter_hs_new(NULL, 0, 1, g_logger);
EXPECT_TRUE(hs_instance == NULL);
hs_instance = adapter_hs_new(rules, n_rule, 1, g_logger);
EXPECT_TRUE(hs_instance == NULL);
n_rule = 1;
rules[0].expr_id = 101;
rules[0].n_patterns = 10;
hs_instance = adapter_hs_new(rules, n_rule, 1, g_logger);
EXPECT_TRUE(hs_instance == NULL);
memset(rules, 0, sizeof(rules));
n_rule = 1;
rules[0].expr_id = 101;
rules[0].n_patterns = 1;
hs_instance = adapter_hs_new(rules, n_rule, 1, g_logger);
EXPECT_TRUE(hs_instance == NULL);
}
TEST(adapter_hs_scan, literal_sub_has_normal_offset)
{
struct expr_rule rules[64] = {0};
size_t n_rule = 0;
int ret = parse_config_file("./literal_expr.conf", rules, &n_rule);
EXPECT_EQ(ret, 0);
struct adapter_hs *hs_instance = adapter_hs_new(rules, n_rule, 1, g_logger);
EXPECT_TRUE(hs_instance != NULL);
expr_array_free(rules, n_rule);
char scan_data1[64] = "hello aaa";
struct hs_scan_result result[64] = {0};
size_t n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data1, strlen(scan_data1), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 0);
char scan_data2[64] = "Ahello aaa";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data2, strlen(scan_data2), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 1);
EXPECT_EQ(result[0].rule_id, 101);
char scan_data3[64] = "Aahello aaa";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data3, strlen(scan_data3), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 1);
EXPECT_EQ(result[0].rule_id, 101);
char scan_data4[64] = "Aaahello aaa";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data4, strlen(scan_data4), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 0);
adapter_hs_free(hs_instance);
hs_instance = NULL;
}
TEST(adapter_hs_scan, literal_sub_has_left_unlimit_offset)
{
struct expr_rule rules[64] = {0};
size_t n_rule = 0;
int ret = parse_config_file("./literal_expr.conf", rules, &n_rule);
EXPECT_EQ(ret, 0);
struct adapter_hs *hs_instance = adapter_hs_new(rules, n_rule, 1, g_logger);
EXPECT_TRUE(hs_instance != NULL);
expr_array_free(rules, n_rule);
char scan_data1[64] = "hello bbb";
struct hs_scan_result result[64] = {0};
size_t n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data1, strlen(scan_data1), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 1);
EXPECT_EQ(result[0].rule_id, 102);
char scan_data2[64] = "Ahello bbb";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data2, strlen(scan_data2), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 1);
EXPECT_EQ(result[0].rule_id, 102);
char scan_data3[64] = "Aahello bbb";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data3, strlen(scan_data3), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 1);
EXPECT_EQ(result[0].rule_id, 102);
char scan_data4[64] = "Aaahello bbb";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data4, strlen(scan_data4), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 0);
adapter_hs_free(hs_instance);
hs_instance = NULL;
}
TEST(adapter_hs_scan, literal_sub_has_right_unlimit_offset)
{
struct expr_rule rules[64] = {0};
size_t n_rule = 0;
int ret = parse_config_file("./literal_expr.conf", rules, &n_rule);
EXPECT_EQ(ret, 0);
struct adapter_hs *hs_instance = adapter_hs_new(rules, n_rule, 1, g_logger);
EXPECT_TRUE(hs_instance != NULL);
expr_array_free(rules, n_rule);
char scan_data1[64] = "hello ccc";
struct hs_scan_result result[64] = {0};
size_t n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data1, strlen(scan_data1), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 0);
char scan_data2[64] = "1234hello ccc";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data2, strlen(scan_data2), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 0);
char scan_data3[64] = "12345hello ccc";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data3, strlen(scan_data3), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 1);
EXPECT_EQ(result[0].rule_id, 103);
char scan_data4[64] = "12345hello cccAaBb";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data4, strlen(scan_data4), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 1);
EXPECT_EQ(result[0].rule_id, 103);
char scan_data5[64] = "123456hello cccAaBb";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data5, strlen(scan_data5), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 1);
EXPECT_EQ(result[0].rule_id, 103);
adapter_hs_free(hs_instance);
hs_instance = NULL;
}
TEST(adapter_hs_scan, literal_sub_with_no_offset)
{
struct expr_rule rules[64] = {0};
size_t n_rule = 0;
int ret = parse_config_file("./literal_expr.conf", rules, &n_rule);
EXPECT_EQ(ret, 0);
struct adapter_hs *hs_instance = adapter_hs_new(rules, n_rule, 1, g_logger);
EXPECT_TRUE(hs_instance != NULL);
expr_array_free(rules, n_rule);
char scan_data1[64] = "hello ddd";
struct hs_scan_result result[64] = {0};
size_t n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data1, strlen(scan_data1), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 1);
EXPECT_EQ(result[0].rule_id, 104);
char scan_data2[64] = "123hello ddd";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data2, strlen(scan_data2), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 1);
EXPECT_EQ(result[0].rule_id, 104);
char scan_data3[64] = "123hello ddd456";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data3, strlen(scan_data3), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 1);
EXPECT_EQ(result[0].rule_id, 104);
char scan_data4[64] = "helloddd";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data4, strlen(scan_data4), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 0);
adapter_hs_free(hs_instance);
hs_instance = NULL;
}
TEST(adapter_hs_scan, literal_exactly)
{
struct expr_rule rules[64] = {0};
size_t n_rule = 0;
int ret = parse_config_file("./literal_expr.conf", rules, &n_rule);
EXPECT_EQ(ret, 0);
struct adapter_hs *hs_instance = adapter_hs_new(rules, n_rule, 1, g_logger);
EXPECT_TRUE(hs_instance != NULL);
expr_array_free(rules, n_rule);
char scan_data1[64] = "hello eee";
struct hs_scan_result result[64] = {0};
size_t n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data1, strlen(scan_data1), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 1);
EXPECT_EQ(result[0].rule_id, 105);
char scan_data2[64] = "Ahello eee";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data2, strlen(scan_data2), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 0);
char scan_data3[64] = "hello eeeB";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data3, strlen(scan_data3), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 0);
adapter_hs_free(hs_instance);
hs_instance = NULL;
}
TEST(adapter_hs_scan, literal_prefix)
{
struct expr_rule rules[64] = {0};
size_t n_rule = 0;
int ret = parse_config_file("./literal_expr.conf", rules, &n_rule);
EXPECT_EQ(ret, 0);
struct adapter_hs *hs_instance = adapter_hs_new(rules, n_rule, 1, g_logger);
EXPECT_TRUE(hs_instance != NULL);
expr_array_free(rules, n_rule);
char scan_data1[64] = "hello fff";
struct hs_scan_result result[64] = {0};
size_t n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data1, strlen(scan_data1), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 1);
EXPECT_EQ(result[0].rule_id, 106);
char scan_data2[64] = "Ahello fff";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data2, strlen(scan_data2), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 0);
char scan_data3[64] = "Ahello fffBCD";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data3, strlen(scan_data3), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 0);
char scan_data4[64] = "hello fffBCD";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data4, strlen(scan_data4), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 1);
EXPECT_EQ(result[0].rule_id, 106);
adapter_hs_free(hs_instance);
hs_instance = NULL;
}
TEST(adapter_hs_scan, literal_suffix)
{
struct expr_rule rules[64] = {0};
size_t n_rule = 0;
int ret = parse_config_file("./literal_expr.conf", rules, &n_rule);
EXPECT_EQ(ret, 0);
struct adapter_hs *hs_instance = adapter_hs_new(rules, n_rule, 1, g_logger);
EXPECT_TRUE(hs_instance != NULL);
expr_array_free(rules, n_rule);
char scan_data1[64] = "hello ggg";
struct hs_scan_result result[64] = {0};
size_t n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data1, strlen(scan_data1), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 1);
EXPECT_EQ(result[0].rule_id, 107);
char scan_data2[64] = "ABChello ggg";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data2, strlen(scan_data2), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 1);
EXPECT_EQ(result[0].rule_id, 107);
char scan_data3[64] = "ABChello gggDEF";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data3, strlen(scan_data3), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 0);
char scan_data4[64] = "hello gggDEF";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data4, strlen(scan_data4), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 0);
adapter_hs_free(hs_instance);
hs_instance = NULL;
}
TEST(adapter_hs_scan, literal_sub_with_hexbin)
{
struct expr_rule rules[64] = {0};
size_t n_rule = 0;
int ret = parse_config_file("./literal_expr.conf", rules, &n_rule);
EXPECT_EQ(ret, 0);
struct adapter_hs *hs_instance = adapter_hs_new(rules, n_rule, 1, g_logger);
EXPECT_TRUE(hs_instance != NULL);
expr_array_free(rules, n_rule);
char scan_data1[64] = "Content-Type: /html";
struct hs_scan_result result[64] = {0};
size_t n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data1, strlen(scan_data1), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 1);
EXPECT_EQ(result[0].rule_id, 108);
char scan_data2[64] = " html";
memset(result, 0, sizeof(result));
n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data2, strlen(scan_data2), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 0);
adapter_hs_free(hs_instance);
hs_instance = NULL;
}
TEST(adapter_hs_scan, literal_with_chinese)
{
struct expr_rule rules[64] = {0};
size_t n_rule = 0;
int ret = parse_config_file("./literal_expr.conf", rules, &n_rule);
EXPECT_EQ(ret, 0);
struct adapter_hs *hs_instance = adapter_hs_new(rules, n_rule, 1, g_logger);
EXPECT_TRUE(hs_instance != NULL);
expr_array_free(rules, n_rule);
char data0[64] = "#中国 你好";
struct hs_scan_result result0[64] = {0};
size_t n_result0 = 0;
ret = adapter_hs_scan(hs_instance, 0, data0, strlen(data0), result0, 64, &n_result0);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result0, 1);
EXPECT_EQ(result0[0].rule_id, 110);
adapter_hs_free(hs_instance);
hs_instance = NULL;
}
TEST(adapter_hs_scan, same_pattern_different_offset)
{
struct expr_rule rules[64] = {0};
size_t n_rule = 0;
int ret = parse_config_file("./literal_expr.conf", rules, &n_rule);
EXPECT_EQ(ret, 0);
struct adapter_hs *hs_instance = adapter_hs_new(rules, n_rule, 1, g_logger);
EXPECT_TRUE(hs_instance != NULL);
expr_array_free(rules, n_rule);
char data[64] = "onetoday,anothertoday";
struct hs_scan_result result[64] = {0};
size_t n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, data, strlen(data), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 1);
EXPECT_EQ(result[0].rule_id, 112);
adapter_hs_free(hs_instance);
hs_instance = NULL;
}
TEST(adapter_hs_scan, long_scan_data)
{
struct expr_rule rules[64] = {0};
size_t n_rule = 0;
int ret = parse_config_file("./literal_expr.conf", rules, &n_rule);
EXPECT_EQ(ret, 0);
struct adapter_hs *hs_instance = adapter_hs_new(rules, n_rule, 1, g_logger);
EXPECT_TRUE(hs_instance != NULL);
expr_array_free(rules, n_rule);
const char* scan_data = "A directed path in a directed graph is a finite or infinite\
sequence of edges which joins a sequence of distinct vertices, but with the added restriction\
that the edges be all directed in the same direction.";
struct hs_scan_result result[64] = {0};
size_t n_result = 0;
ret = adapter_hs_scan(hs_instance, 0, scan_data, strlen(scan_data), result, 64, &n_result);
EXPECT_EQ(ret, 0);
EXPECT_EQ(n_result, 1);
EXPECT_EQ(result[0].rule_id, 113);
adapter_hs_free(hs_instance);
hs_instance = NULL;
}
TEST(adapter_hs_scan, regex_expression_check)
{
struct expr_rule rules[64] = {0};
size_t n_rule = 0;
int ret = parse_config_file("./regex_expr.conf", rules, &n_rule);
EXPECT_EQ(ret, 0);
for (size_t i = 0; i < n_rule; i++) {
for (size_t j = 0; j < rules[i].n_patterns; j++) {
adapter_hs_verify_regex_expression(rules[i].patterns[j].pat, g_logger);
}
}
expr_array_free(rules, n_rule);
}
int main(int argc, char **argv)
{
int ret = 0;
::testing::InitGoogleTest(&argc, argv);
g_logger = log_handle_create("./adapter_hs_gtest.log", 0);
ret = RUN_ALL_TESTS();
log_handle_destroy(g_logger);
return ret;
}

1330
test/expr_matcher_gtest.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -5,6 +5,7 @@
"pattern_num": 1,
"patterns": [
{
"pattern_type": "literal",
"match_method": "sub",
"case_sensitive": "yes",
"is_hexbin": "no",
@@ -18,6 +19,7 @@
"pattern_num": 1,
"patterns": [
{
"pattern_type": "literal",
"match_method": "sub",
"case_sensitive": "yes",
"is_hexbin": "no",
@@ -31,6 +33,7 @@
"pattern_num": 1,
"patterns": [
{
"pattern_type": "literal",
"match_method": "sub",
"case_sensitive": "yes",
"is_hexbin": "no",
@@ -44,6 +47,7 @@
"pattern_num": 1,
"patterns": [
{
"pattern_type": "literal",
"match_method": "sub",
"case_sensitive": "yes",
"is_hexbin": "no",
@@ -56,6 +60,7 @@
"pattern_num": 1,
"patterns": [
{
"pattern_type": "literal",
"match_method": "exactly",
"case_sensitive": "yes",
"is_hexbin": "no",
@@ -68,6 +73,7 @@
"pattern_num": 1,
"patterns": [
{
"pattern_type": "literal",
"match_method": "prefix",
"case_sensitive": "yes",
"is_hexbin": "no",
@@ -80,6 +86,7 @@
"pattern_num": 1,
"patterns": [
{
"pattern_type": "literal",
"match_method": "suffix",
"case_sensitive": "yes",
"is_hexbin": "no",
@@ -92,6 +99,7 @@
"pattern_num": 1,
"patterns": [
{
"pattern_type": "literal",
"match_method": "sub",
"case_sensitive": "yes",
"is_hexbin": "yes",
@@ -105,12 +113,14 @@
"pattern_num": 2,
"patterns": [
{
"pattern_type": "literal",
"match_method": "sub",
"case_sensitive": "yes",
"is_hexbin": "no",
"pattern": "multi"
},
{
"pattern_type": "literal",
"match_method": "sub",
"case_sensitive": "yes",
"is_hexbin": "no",
@@ -123,6 +133,7 @@
"pattern_num": 1,
"patterns": [
{
"pattern_type": "literal",
"match_method": "sub",
"case_sensitive": "yes",
"is_hexbin": "no",
@@ -135,6 +146,7 @@
"pattern_num": 1,
"patterns": [
{
"pattern_type": "literal",
"match_method": "sub",
"case_sensitive": "yes",
"is_hexbin": "no",
@@ -147,6 +159,7 @@
"pattern_num": 2,
"patterns": [
{
"pattern_type": "literal",
"match_method": "sub",
"case_sensitive": "yes",
"is_hexbin": "no",
@@ -154,6 +167,7 @@
"offset": "3~7"
},
{
"pattern_type": "literal",
"match_method": "sub",
"case_sensitive": "yes",
"is_hexbin": "no",
@@ -167,12 +181,26 @@
"pattern_num": 1,
"patterns": [
{
"pattern_type": "literal",
"match_method": "sub",
"case_sensitive": "yes",
"is_hexbin": "no",
"pattern": "a finite or infinite"
}
]
},
{
"expr_id": 114,
"pattern_num": 1,
"patterns": [
{
"pattern_type": "regex",
"match_method": "sub",
"case_sensitive": "yes",
"is_hexbin": "no",
"pattern": "query=(.*)"
}
]
}
]
}

File diff suppressed because it is too large Load Diff

View File

@@ -55,7 +55,6 @@ int make_serial_rule(const char *table_name, const char *line, void *u_para)
char *buff = ALLOC(char, strlen(line) + 1);
memcpy(buff, line, strlen(line) + 1);
while (buff[strlen(buff) - 1] == '\n' || buff[strlen(buff) - 1] == '\t') {
buff[strlen(buff) - 1] = '\0';
}
@@ -257,7 +256,53 @@ static int ip_table_set_line(struct maat *maat_inst, const char *table_name, enu
return maat_cmd_set_line(maat_inst, &line_rule);
}
void test_add_expr_command(struct maat *maat_inst, const char *table_name,
static int integer_table_set_line(struct maat *maat_inst, const char *table_name,
enum maat_operation op, long long item_id,
long long group_id, int low_bound, int up_bound,
int expire_after)
{
char table_line[1024] = {0};
int table_id = maat_get_table_id(maat_inst, table_name);
if (table_id < 0) {
return 0;
}
sprintf(table_line, "%lld\t%lld\t%d\t%d\t%d",
item_id, group_id, low_bound, up_bound, op);
struct maat_cmd_line line_rule;
line_rule.rule_id = item_id;
line_rule.table_line = table_line;
line_rule.table_name = table_name;
line_rule.expire_after = expire_after;
return maat_cmd_set_line(maat_inst, &line_rule);
}
static int flag_table_set_line(struct maat *maat_inst, const char *table_name,
enum maat_operation op, long long item_id,
long long group_id, long long flag,
long long flag_mask, int expire_after)
{
char table_line[1024] = {0};
int table_id = maat_get_table_id(maat_inst, table_name);
if (table_id < 0) {
return 0;
}
sprintf(table_line, "%lld\t%lld\t%lld\t%lld\t%d",
item_id, group_id, flag, flag_mask, op);
struct maat_cmd_line line_rule;
line_rule.rule_id = item_id;
line_rule.table_line = table_line;
line_rule.table_name = table_name;
line_rule.expire_after = expire_after;
return maat_cmd_set_line(maat_inst, &line_rule);
}
static void test_add_expr_command(struct maat *maat_inst, const char *table_name,
const char *keywords)
{
long long compile_id = maat_cmd_incrby(maat_inst, "TEST_SEQ", 1);
@@ -271,11 +316,11 @@ void test_add_expr_command(struct maat *maat_inst, const char *table_name,
long long item_id = maat_cmd_incrby(maat_inst, "SEQUENCE_REGION", 1);
ret = expr_table_set_line(maat_inst, table_name, MAAT_OP_ADD, item_id, group_id,
keywords, NULL, 1, 0, 0, 0);
keywords, "null", 1, 0, 0, 0);
EXPECT_EQ(ret, 1);
}
void test_add_ip_command(struct maat *maat_inst, const char *table_name,
static void test_add_ip_command(struct maat *maat_inst, const char *table_name,
const char *ip, uint16_t port)
{
long long compile_id = maat_cmd_incrby(maat_inst, "TEST_SEQ", 1);
@@ -293,6 +338,42 @@ void test_add_ip_command(struct maat *maat_inst, const char *table_name,
EXPECT_EQ(ret, 1);
}
static void test_add_integer_command(struct maat *maat_inst, const char *table_name,
int low_bound, int up_bound)
{
long long compile_id = maat_cmd_incrby(maat_inst, "TEST_SEQ", 1);
int ret = compile_table_set_line(maat_inst, "COMPILE", MAAT_OP_ADD, compile_id, "null", 1, 0);
EXPECT_EQ(ret, 1);
long long group_id = maat_cmd_incrby(maat_inst, "SEQUENCE_GROUP", 1);
ret = group2compile_table_set_line(maat_inst, "GROUP2COMPILE", MAAT_OP_ADD, group_id,
compile_id, 0, "null", 1, 0);
EXPECT_EQ(ret, 1);
long long item_id = maat_cmd_incrby(maat_inst, "SEQUENCE_REGION", 1);
ret = integer_table_set_line(maat_inst, table_name, MAAT_OP_ADD, item_id, group_id,
low_bound, up_bound, 0);
EXPECT_EQ(ret, 1);
}
static void test_add_flag_command(struct maat *maat_inst, const char *table_name,
long long flag, long long flag_mask)
{
long long compile_id = maat_cmd_incrby(maat_inst, "TEST_SEQ", 1);
int ret = compile_table_set_line(maat_inst, "COMPILE", MAAT_OP_ADD, compile_id, "null", 1, 0);
EXPECT_EQ(ret, 1);
long long group_id = maat_cmd_incrby(maat_inst, "SEQUENCE_GROUP", 1);
ret = group2compile_table_set_line(maat_inst, "GROUP2COMPILE", MAAT_OP_ADD, group_id,
compile_id, 0, "null", 1, 0);
EXPECT_EQ(ret, 1);
long long item_id = maat_cmd_incrby(maat_inst, "SEQUENCE_REGION", 1);
ret = flag_table_set_line(maat_inst, table_name, MAAT_OP_ADD, item_id, group_id,
flag, flag_mask, 0);
EXPECT_EQ(ret, 1);
}
class MaatPerfStringScan : public testing::Test
{
protected:
@@ -345,7 +426,7 @@ void *perf_string_scan_thread(void *arg)
struct maat *maat_inst = param->maat_inst;
const char *table_name = param->table_name;
struct timespec start, end;
const char *scan_data = "String TEST should hit";
const char *scan_data = "today and yesterday should hit";
long long results[ARRAY_SIZE] = {0};
int hit_times = 0;
size_t n_hit_result = 0;
@@ -393,13 +474,67 @@ void *perf_string_update_thread(void *arg)
return is_all_hit;
}
void *perf_regex_scan_thread(void *arg)
{
struct thread_param *param = (struct thread_param *)arg;
struct maat *maat_inst = param->maat_inst;
const char *table_name = param->table_name;
struct timespec start, end;
const char *scan_data = "http://www.cyberessays.com/search_results.php?action=search&query=username,abckkk,1234567";
long long results[ARRAY_SIZE] = {0};
int hit_times = 0;
size_t n_hit_result = 0;
struct maat_state *state = maat_state_new(maat_inst, param->thread_id);
int table_id = maat_get_table_id(maat_inst, table_name);
clock_gettime(CLOCK_MONOTONIC, &start);
for (int i = 0; i < param->test_count; i++) {
int ret = maat_scan_string(maat_inst, table_id, scan_data, strlen(scan_data),
results, ARRAY_SIZE, &n_hit_result, state);
if (ret == MAAT_SCAN_HIT) {
hit_times++;
}
maat_state_reset(state);
}
clock_gettime(CLOCK_MONOTONIC, &end);
param->time_elapse_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000;
int *is_all_hit = ALLOC(int, 1);
*is_all_hit = (hit_times == param->test_count ? 1 : 0);
log_info(param->logger, MODULE_FRAMEWORK_PERF_GTEST,
"thread_id:%d regex_scan time_elapse:%lldms hit_times:%d",
param->thread_id, param->time_elapse_ms, hit_times);
return is_all_hit;
}
void *perf_regex_update_thread(void *arg)
{
struct thread_param *param = (struct thread_param *)arg;
struct maat *maat_inst = param->maat_inst;
const char *table_name = param->table_name;
const int CMD_EXPR_NUM = 10;
char keyword_buf[128];
for (int i = 0; i < CMD_EXPR_NUM; i++) {
random_keyword_generate(keyword_buf, sizeof(keyword_buf));
test_add_expr_command(maat_inst, table_name, keyword_buf);
sleep(1);
}
int *is_all_hit = ALLOC(int, 1);
*is_all_hit = 1;
return is_all_hit;
}
void *perf_ip_scan_thread(void *arg)
{
struct thread_param *param = (struct thread_param *)arg;
struct maat *maat_inst = param->maat_inst;
const char *table_name = param->table_name;
struct timespec start, end;
char ip_str[32] = "10.0.7.100";
char ip_str[32] = "10.0.0.1";
uint32_t ip_addr;
uint16_t port = htons(65530);
@@ -462,8 +597,113 @@ void *perf_ip_update_thread(void *arg)
return is_all_hit;
}
TEST_F(MaatPerfStringScan, MultiThread) {
const char *table_name = "KEYWORDS_TABLE";
void *perf_integer_scan_thread(void *arg)
{
struct thread_param *param = (struct thread_param *)arg;
struct maat *maat_inst = param->maat_inst;
const char *table_name = param->table_name;
struct timespec start, end;
int hit_times = 0;
long long results[ARRAY_SIZE] = {0};
size_t n_hit_result = 0;
struct maat_state *state = maat_state_new(maat_inst, param->thread_id);
int table_id = maat_get_table_id(maat_inst, table_name);
clock_gettime(CLOCK_MONOTONIC, &start);
for (int i = 0; i < param->test_count; i++) {
int ret = maat_scan_integer(maat_inst, table_id, 3000, results,
ARRAY_SIZE, &n_hit_result, state);
if (ret == MAAT_SCAN_HIT) {
hit_times++;
}
maat_state_reset(state);
}
clock_gettime(CLOCK_MONOTONIC, &end);
param->time_elapse_ms = (end.tv_sec - start.tv_sec) * 1000 +
(end.tv_nsec - start.tv_nsec) / 1000000;
int *is_all_hit = ALLOC(int, 1);
*is_all_hit = (hit_times == param->test_count ? 1 : 0);
log_info(param->logger, MODULE_FRAMEWORK_PERF_GTEST,
"thread_id:%d integer_scan time_elapse:%lldms hit_times:%d",
param->thread_id, param->time_elapse_ms, hit_times);
return is_all_hit;
}
void *perf_integer_update_thread(void *arg)
{
struct thread_param *param = (struct thread_param *)arg;
struct maat *maat_inst = param->maat_inst;
const char *table_name = param->table_name;
const int CMD_EXPR_NUM = 10;
for (int i = 0; i < CMD_EXPR_NUM; i++) {
test_add_integer_command(maat_inst, table_name, 3001+i, 3001+i);
sleep(1);
}
int *is_all_hit = ALLOC(int, 1);
*is_all_hit = 1;
return is_all_hit;
}
void *perf_flag_scan_thread(void *arg)
{
struct thread_param *param = (struct thread_param *)arg;
struct maat *maat_inst = param->maat_inst;
const char *table_name = param->table_name;
struct timespec start, end;
int hit_times = 0;
long long results[ARRAY_SIZE] = {0};
size_t n_hit_result = 0;
long long scan_data = 15;
struct maat_state *state = maat_state_new(maat_inst, param->thread_id);
int table_id = maat_get_table_id(maat_inst, table_name);
clock_gettime(CLOCK_MONOTONIC, &start);
for (int i = 0; i < param->test_count; i++) {
int ret = maat_scan_flag(maat_inst, table_id, scan_data, results,
ARRAY_SIZE, &n_hit_result, state);
if (ret == MAAT_SCAN_HIT) {
hit_times++;
}
maat_state_reset(state);
}
clock_gettime(CLOCK_MONOTONIC, &end);
param->time_elapse_ms = (end.tv_sec - start.tv_sec) * 1000 +
(end.tv_nsec - start.tv_nsec) / 1000000;
int *is_all_hit = ALLOC(int, 1);
*is_all_hit = (hit_times == param->test_count ? 1 : 0);
log_info(param->logger, MODULE_FRAMEWORK_PERF_GTEST,
"thread_id:%d flag_scan time_elapse:%lldms hit_times:%d",
param->thread_id, param->time_elapse_ms, hit_times);
return is_all_hit;
}
void *perf_flag_update_thread(void *arg)
{
struct thread_param *param = (struct thread_param *)arg;
struct maat *maat_inst = param->maat_inst;
const char *table_name = param->table_name;
const int CMD_EXPR_NUM = 10;
for (int i = 0; i < CMD_EXPR_NUM; i++) {
test_add_flag_command(maat_inst, table_name, i, 15);
sleep(1);
}
int *is_all_hit = ALLOC(int, 1);
*is_all_hit = 1;
return is_all_hit;
}
TEST_F(MaatPerfStringScan, LiteralMultiThread) {
const char *table_name = "EXPR_LITERAL_PERF_CONFIG";
struct maat *maat_inst = MaatPerfStringScan::_shared_maat_inst;
int table_id = maat_get_table_id(maat_inst, table_name);
@@ -503,12 +743,58 @@ TEST_F(MaatPerfStringScan, MultiThread) {
free(is_all_hit);
}
scan_per_second = scan_count * 1000 / time_elapse_ms;
//EXPECT_GT(scan_per_second, 800 * 1000);
log_info(maat_inst->logger, MODULE_FRAMEWORK_PERF_GTEST,
"StringScan match rate on %d-threads speed %lld lookups/s/thread",
PERF_THREAD_NUM, scan_per_second);
}
TEST_F(MaatPerfStringScan, RegexMultiThread) {
const char *table_name = "EXPR_REGEX_PERF_CONFIG";
struct maat *maat_inst = MaatPerfStringScan::_shared_maat_inst;
int table_id = maat_get_table_id(maat_inst, table_name);
ASSERT_GT(table_id, 0);
pthread_t threads[PERF_THREAD_NUM + 1];
struct thread_param thread_params[PERF_THREAD_NUM + 1];
int i = 0;
int *is_all_hit = NULL;
for (i = 0; i < PERF_THREAD_NUM + 1; i++) {
thread_params[i].maat_inst = maat_inst;
thread_params[i].thread_id = i;
thread_params[i].table_name = table_name;
thread_params[i].test_count = PERF_SCAN_COUNT;
thread_params[i].time_elapse_ms = 0;
thread_params[i].logger = logger;
if (i < PERF_THREAD_NUM) {
pthread_create(&threads[i], NULL, perf_regex_scan_thread, thread_params+i);
} else {
thread_params[i].test_count = 0;
pthread_create(&threads[i], NULL, perf_regex_update_thread, thread_params+i);
}
}
long long time_elapse_ms = 0;
long long scan_count = 0;
long long scan_per_second = 0;
for (i = 0; i < PERF_THREAD_NUM + 1; i++) {
pthread_join(threads[i], (void **)&is_all_hit);
time_elapse_ms += thread_params[i].time_elapse_ms;
scan_count += thread_params[i].test_count;
EXPECT_EQ(*is_all_hit, 1);
*is_all_hit = 0;
free(is_all_hit);
}
scan_per_second = scan_count * 1000 / time_elapse_ms;
log_info(maat_inst->logger, MODULE_FRAMEWORK_PERF_GTEST,
"RegexScan match rate on %d-threads speed %lld lookups/s/thread",
PERF_THREAD_NUM, scan_per_second);
}
class MaatPerfStreamScan : public testing::Test
{
protected:
@@ -561,34 +847,31 @@ void *perf_stream_scan_thread(void *arg)
struct maat *maat_inst = param->maat_inst;
const char *table_name = param->table_name;
struct timespec start, end;
const char *scan_data = "http://www.cyberessays.com/search_results.php?action=search&query=yulingjing,abckkk,1234567";
const char *scan_data = "http://www.cyberessays.com/search_results.php?today and yesterday";
long long results[ARRAY_SIZE] = {0};
int ret = 0, hit_times = 0;
size_t n_hit_result = 0;
struct maat_state *state_array[ARRAY_SIZE];
struct maat_stream *sp[ARRAY_SIZE];
int table_id = maat_get_table_id(maat_inst, table_name);
struct maat_state *state = maat_state_new(maat_inst, param->thread_id);
struct maat_stream *sp = maat_stream_new(maat_inst, table_id, state);
clock_gettime(CLOCK_MONOTONIC, &start);
for (int i = 0; i < param->test_count; i++) {
for (int j = 0; j < ARRAY_SIZE; j++) {
state_array[j] = maat_state_new(maat_inst, param->thread_id);
sp[j] = maat_stream_new(maat_inst, table_id, state_array[j]);
ret = maat_stream_scan(sp[j], scan_data, strlen(scan_data), results, ARRAY_SIZE,
&n_hit_result, state_array[j]);
ret = maat_stream_scan(sp, scan_data, strlen(scan_data), results, ARRAY_SIZE,
&n_hit_result, state);
if (ret == MAAT_SCAN_HIT) {
hit_times++;
}
maat_stream_free(sp[j]);
maat_state_free(state_array[j]);
}
maat_state_reset(state);
}
clock_gettime(CLOCK_MONOTONIC, &end);
maat_stream_free(sp);
maat_state_free(state);
param->time_elapse_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000;
int *is_all_hit = ALLOC(int, 1);
*is_all_hit = ((hit_times == param->test_count*ARRAY_SIZE) ? 1 : 0);
*is_all_hit = ((hit_times == param->test_count) ? 1 : 0);
log_info(param->logger, MODULE_FRAMEWORK_PERF_GTEST,
"thread_id:%d stream_scan time_elapse:%lldms hit_times:%d",
@@ -596,64 +879,40 @@ void *perf_stream_scan_thread(void *arg)
return is_all_hit;
}
void *perf_stream_update_thread(void *arg)
{
struct thread_param *param = (struct thread_param *)arg;
struct maat *maat_inst = param->maat_inst;
const char *table_name = param->table_name;
const int CMD_EXPR_NUM = 10;
char keyword_buf[128];
for (int i = 0; i < CMD_EXPR_NUM; i++) {
random_keyword_generate(keyword_buf, sizeof(keyword_buf));
test_add_expr_command(maat_inst, table_name, keyword_buf);
sleep(1);
}
int *is_all_hit = ALLOC(int, 1);
*is_all_hit = 1;
return is_all_hit;
}
TEST_F(MaatPerfStreamScan, MultiThread) {
const char *table_name = "HTTP_URL";
const char *table_name = "EXPR_LITERAL_PERF_CONFIG";
struct maat *maat_inst = MaatPerfStreamScan::_shared_maat_inst;
int table_id = maat_get_table_id(maat_inst, table_name);
ASSERT_GT(table_id, 0);
pthread_t threads[PERF_THREAD_NUM + 1];
struct thread_param thread_params[PERF_THREAD_NUM + 1];
pthread_t threads[PERF_THREAD_NUM];
struct thread_param thread_params[PERF_THREAD_NUM];
int i = 0;
int *is_all_hit = NULL;
for (i = 0; i < PERF_THREAD_NUM + 1; i++) {
for (i = 0; i < PERF_THREAD_NUM; i++) {
thread_params[i].maat_inst = maat_inst;
thread_params[i].thread_id = i;
thread_params[i].table_name = table_name;
thread_params[i].test_count = PERF_SCAN_COUNT / 10;
thread_params[i].test_count = PERF_SCAN_COUNT;
thread_params[i].time_elapse_ms = 0;
thread_params[i].logger = logger;
if (i < PERF_THREAD_NUM) {
pthread_create(&threads[i], NULL, perf_stream_scan_thread, thread_params+i);
} else {
thread_params[i].test_count = 0;
pthread_create(&threads[i], NULL, perf_stream_update_thread, thread_params+i);
}
}
long long time_elapse_ms = 0;
long long scan_count = 0;
long long scan_per_second = 0;
for (i = 0; i < PERF_THREAD_NUM + 1; i++) {
for (i = 0; i < PERF_THREAD_NUM; i++) {
pthread_join(threads[i], (void **)&is_all_hit);
time_elapse_ms += thread_params[i].time_elapse_ms;
scan_count += thread_params[i].test_count;
//maybe expr_runtime rebuild in stream_scan, so should not expect is_all_hit always 1
//EXPECT_EQ(*is_all_hit, 1);
*is_all_hit = 0;
EXPECT_EQ(*is_all_hit, 1);
free(is_all_hit);
}
scan_per_second = scan_count * 1000 / time_elapse_ms;
@@ -711,7 +970,7 @@ struct log_handle *MaatPerfIPScan::logger;
TEST_F(MaatPerfIPScan, MultiThread)
{
const char *table_name = "IP_PLUS_CONFIG";
const char *table_name = "IP_PERF_CONFIG";
struct maat *maat_inst = MaatPerfIPScan::_shared_maat_inst;
int table_id = maat_get_table_id(maat_inst, table_name);
@@ -757,6 +1016,192 @@ TEST_F(MaatPerfIPScan, MultiThread)
PERF_THREAD_NUM, scan_per_second);
}
class MaatPerfIntegerScan : public testing::Test
{
protected:
static void SetUpTestCase() {
const char *accept_tags = "{\"tags\":[{\"tag\":\"location\",\"value\":\"北京/朝阳/华严北里/甲22号\"},"
"{\"tag\":\"isp\",\"value\":\"移动\"},{\"tag\":\"location\",\"value\":\"Astana\"}]}";
char redis_ip[64] = "127.0.0.1";
int redis_port = 6379;
int redis_db = 0;
logger = log_handle_create("./maat_framework_perf_gtest.log", 0);
int ret = write_config_to_redis(redis_ip, redis_port, redis_db, logger);
if (ret < 0) {
log_error(logger, MODULE_FRAMEWORK_PERF_GTEST,
"[%s:%d] write config to redis failed.", __FUNCTION__, __LINE__);
}
struct maat_options *opts = maat_options_new();
maat_options_set_stat_file(opts, "./stat.log");
maat_options_set_perf_on(opts);
maat_options_set_redis(opts, redis_ip, redis_port, redis_db);
maat_options_set_logger(opts, "./maat_framework_perf_gtest.log", LOG_LEVEL_INFO);
maat_options_set_accept_tags(opts, accept_tags);
maat_options_set_caller_thread_number(opts, 5);
_shared_maat_inst = maat_new(opts, table_info_path);
maat_options_free(opts);
if (NULL == _shared_maat_inst) {
log_error(logger, MODULE_FRAMEWORK_PERF_GTEST,
"[%s:%d] create maat instance in MaatFlagScan failed.",
__FUNCTION__, __LINE__);
}
}
static void TearDownTestCase() {
maat_free(_shared_maat_inst);
log_handle_destroy(logger);
}
static struct log_handle *logger;
static struct maat *_shared_maat_inst;
};
struct maat *MaatPerfIntegerScan::_shared_maat_inst;
struct log_handle *MaatPerfIntegerScan::logger;
TEST_F(MaatPerfIntegerScan, MultiThread) {
const char *table_name = "INTEGER_PERF_CONFIG";
struct maat *maat_inst = MaatPerfIntegerScan::_shared_maat_inst;
int table_id = maat_get_table_id(maat_inst, table_name);
ASSERT_GT(table_id, 0);
pthread_t threads[PERF_THREAD_NUM + 1];
struct thread_param thread_params[PERF_THREAD_NUM + 1];
int i = 0;
int *is_all_hit = NULL;
for (i = 0; i < PERF_THREAD_NUM + 1; i++) {
thread_params[i].maat_inst = maat_inst;
thread_params[i].thread_id = i;
thread_params[i].table_name = table_name;
thread_params[i].test_count = PERF_SCAN_COUNT;
thread_params[i].time_elapse_ms = 0;
thread_params[i].logger = logger;
if (i < PERF_THREAD_NUM) {
pthread_create(&threads[i], NULL, perf_integer_scan_thread, thread_params+i);
} else {
thread_params[i].test_count = 0;
pthread_create(&threads[i], NULL, perf_integer_update_thread, thread_params+i);
}
}
long long time_elapse_ms = 0;
long long scan_count = 0;
long long scan_per_second = 0;
for (i = 0; i < PERF_THREAD_NUM + 1; i++) {
pthread_join(threads[i], (void **)&is_all_hit);
time_elapse_ms += thread_params[i].time_elapse_ms;
scan_count += thread_params[i].test_count;
EXPECT_EQ(*is_all_hit, 1);
*is_all_hit = 0;
free(is_all_hit);
}
scan_per_second = scan_count * 1000 / time_elapse_ms;
log_info(maat_inst->logger, MODULE_FRAMEWORK_PERF_GTEST,
"IntegerScan match rate on %d-threads speed %lld lookups/s/thread",
PERF_THREAD_NUM, scan_per_second);
}
class MaatPerfFlagScan : public testing::Test
{
protected:
static void SetUpTestCase() {
const char *accept_tags = "{\"tags\":[{\"tag\":\"location\",\"value\":\"北京/朝阳/华严北里/甲22号\"},"
"{\"tag\":\"isp\",\"value\":\"移动\"},{\"tag\":\"location\",\"value\":\"Astana\"}]}";
char redis_ip[64] = "127.0.0.1";
int redis_port = 6379;
int redis_db = 0;
logger = log_handle_create("./maat_framework_perf_gtest.log", 0);
int ret = write_config_to_redis(redis_ip, redis_port, redis_db, logger);
if (ret < 0) {
log_error(logger, MODULE_FRAMEWORK_PERF_GTEST,
"[%s:%d] write config to redis failed.", __FUNCTION__, __LINE__);
}
struct maat_options *opts = maat_options_new();
maat_options_set_stat_file(opts, "./stat.log");
maat_options_set_perf_on(opts);
maat_options_set_redis(opts, redis_ip, redis_port, redis_db);
maat_options_set_logger(opts, "./maat_framework_perf_gtest.log", LOG_LEVEL_INFO);
maat_options_set_accept_tags(opts, accept_tags);
maat_options_set_caller_thread_number(opts, 5);
_shared_maat_inst = maat_new(opts, table_info_path);
maat_options_free(opts);
if (NULL == _shared_maat_inst) {
log_error(logger, MODULE_FRAMEWORK_PERF_GTEST,
"[%s:%d] create maat instance in MaatFlagScan failed.",
__FUNCTION__, __LINE__);
}
}
static void TearDownTestCase() {
maat_free(_shared_maat_inst);
log_handle_destroy(logger);
}
static struct log_handle *logger;
static struct maat *_shared_maat_inst;
};
struct maat *MaatPerfFlagScan::_shared_maat_inst;
struct log_handle *MaatPerfFlagScan::logger;
TEST_F(MaatPerfFlagScan, MultiThread) {
const char *table_name = "FLAG_PERF_CONFIG";
struct maat *maat_inst = MaatPerfFlagScan::_shared_maat_inst;
int table_id = maat_get_table_id(maat_inst, table_name);
ASSERT_GT(table_id, 0);
pthread_t threads[PERF_THREAD_NUM + 1];
struct thread_param thread_params[PERF_THREAD_NUM + 1];
int i = 0;
int *is_all_hit = NULL;
for (i = 0; i < PERF_THREAD_NUM + 1; i++) {
thread_params[i].maat_inst = maat_inst;
thread_params[i].thread_id = i;
thread_params[i].table_name = table_name;
thread_params[i].test_count = PERF_SCAN_COUNT;
thread_params[i].time_elapse_ms = 0;
thread_params[i].logger = logger;
if (i < PERF_THREAD_NUM) {
pthread_create(&threads[i], NULL, perf_flag_scan_thread, thread_params+i);
} else {
thread_params[i].test_count = 0;
pthread_create(&threads[i], NULL, perf_flag_update_thread, thread_params+i);
}
}
long long time_elapse_ms = 0;
long long scan_count = 0;
long long scan_per_second = 0;
for (i = 0; i < PERF_THREAD_NUM + 1; i++) {
pthread_join(threads[i], (void **)&is_all_hit);
time_elapse_ms += thread_params[i].time_elapse_ms;
scan_count += thread_params[i].test_count;
EXPECT_EQ(*is_all_hit, 1);
*is_all_hit = 0;
free(is_all_hit);
}
scan_per_second = scan_count * 1000 / time_elapse_ms;
log_info(maat_inst->logger, MODULE_FRAMEWORK_PERF_GTEST,
"FlagScan match rate on %d-threads speed %lld lookups/s/thread",
PERF_THREAD_NUM, scan_per_second);
}
class MaatPerfFQDNPluginScan : public testing::Test
{
protected:

View File

@@ -495,5 +495,75 @@
"table_name":"HTTP_URL_FILTER",
"table_type":"virtual",
"physical_table": "HTTP_URL"
},
{
"table_id":42,
"table_name":"IP_PERF_CONFIG",
"table_type":"ip_plus",
"valid_column":11,
"custom": {
"item_id":1,
"group_id":2,
"addr_type":3,
"addr_format":4,
"ip1":5,
"ip2":6,
"port_format":7,
"port1":8,
"port2":9,
"protocol":10
}
},
{
"table_id":43,
"table_name":"INTEGER_PERF_CONFIG",
"table_type":"intval",
"valid_column":5,
"custom": {
"item_id":1,
"group_id":2,
"low_bound":3,
"up_bound":4
}
},
{
"table_id":44,
"table_name":"EXPR_LITERAL_PERF_CONFIG",
"table_type":"expr",
"valid_column":7,
"custom": {
"item_id":1,
"group_id":2,
"keywords":3,
"expr_type":4,
"match_method":5,
"is_hexbin":6
}
},
{
"table_id":45,
"table_name":"EXPR_REGEX_PERF_CONFIG",
"table_type":"expr",
"valid_column":7,
"custom": {
"item_id":1,
"group_id":2,
"keywords":3,
"expr_type":4,
"match_method":5,
"is_hexbin":6
}
},
{
"table_id":46,
"table_name":"FLAG_PERF_CONFIG",
"table_type":"flag",
"valid_column":5,
"custom": {
"item_id":1,
"group_id":2,
"flag":3,
"flag_mask":4
}
}
]

28
vendor/CMakeLists.txt vendored
View File

@@ -58,6 +58,34 @@ add_dependencies(hyperscan_runtime_static hyperscan)
set_property(TARGET hyperscan_runtime_static PROPERTY IMPORTED_LOCATION ${VENDOR_BUILD}/lib64/libhs_runtime.a)
set_property(TARGET hyperscan_runtime_static PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${VENDOR_BUILD}/include)
#pcre-8.45
ExternalProject_Add(pcre PREFIX pcre
URL ${CMAKE_CURRENT_SOURCE_DIR}/pcre-8.45.tar.gz
CONFIGURE_COMMAND ./configure --prefix=${VENDOR_BUILD}
BUILD_COMMAND make
INSTALL_COMMAND make install
BUILD_IN_SOURCE 1)
ExternalProject_Get_Property(pcre INSTALL_DIR)
file(MAKE_DIRECTORY ${VENDOR_BUILD}/include)
#rulescan 3.0.1
ExternalProject_Add(rulescan PREFIX rulescan
URL ${CMAKE_CURRENT_SOURCE_DIR}/rulescan-3.0.1.tar.gz
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${VENDOR_BUILD} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_C_FLAGS="-fPIC" -DCMAKE_CXX_FLAGS="-fPIC")
ExternalProject_Get_Property(rulescan INSTALL_DIR)
file(MAKE_DIRECTORY ${VENDOR_BUILD}/include)
#merge librulescan.a and libpcre.a => librs.a
add_custom_command(OUTPUT ${VENDOR_BUILD}/lib/librs.a
COMMAND ar crsT ${VENDOR_BUILD}/lib/librs.a ${VENDOR_BUILD}/lib/libpcre.a ${VENDOR_BUILD}/lib/librulescan.a
DEPENDS pcre rulescan)
add_custom_target(_merge ALL DEPENDS ${VENDOR_BUILD}/lib/librs.a)
add_library(rulescan_static STATIC IMPORTED GLOBAL)
set_property(TARGET rulescan_static PROPERTY IMPORTED_LOCATION ${VENDOR_BUILD}/lib/librs.a)
# hiredis-1.1.0
ExternalProject_Add(hiredis PREFIX hiredis
URL ${CMAKE_CURRENT_SOURCE_DIR}/hiredis-1.1.0.tar.gz

BIN
vendor/pcre-8.45.tar.gz vendored Normal file

Binary file not shown.

BIN
vendor/rulescan-3.0.1.tar.gz vendored Normal file

Binary file not shown.