diff --git a/include/maat.h b/include/maat.h index 7af3bf1..8beaf67 100644 --- a/include/maat.h +++ b/include/maat.h @@ -58,7 +58,8 @@ enum maat_update_type { enum maat_expr_engine { MAAT_EXPR_ENGINE_HS = 0, //hyperscan(default engine) - MAAT_EXPR_ENGINE_RS //rulescan + MAAT_EXPR_ENGINE_RS, //rulescan + MAAT_EXPR_ENGINE_AUTO }; struct ip_addr { diff --git a/scanner/expr_matcher/adapter_hs/adapter_hs.cpp b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp index c5ebfa9..fc0a2b5 100644 --- a/scanner/expr_matcher/adapter_hs/adapter_hs.cpp +++ b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp @@ -17,13 +17,10 @@ #include #include "adapter_hs.h" -#include "uthash/utarray.h" #include "uthash/uthash.h" #include "maat_utils.h" #include "../../bool_matcher/bool_matcher.h" -#define MAX_HIT_PATTERN_NUM 1024 - pid_t hs_gettid() { return syscall(SYS_gettid); @@ -39,7 +36,7 @@ static const char *hs_module_name_str(const char *name) #define MODULE_ADAPTER_HS hs_module_name_str("maat.adapter_hs") -struct adpt_hs_compile_data { +struct hs_compile_data { enum expr_pattern_type pat_type; unsigned int *ids; unsigned int *flags; @@ -48,61 +45,44 @@ struct adpt_hs_compile_data { unsigned int n_patterns; }; -struct adapter_hs_scratch { - hs_scratch_t **literal_scratches; - hs_scratch_t **regex_scratches; - struct bool_expr_match **bool_match_buffs; -}; - -struct adapter_hs_stream { +struct hs_lit_stream { int thread_id; - hs_stream_t *literal_stream; - hs_stream_t *regex_stream; - struct adapter_hs_runtime *ref_hs_rt; + hs_stream_t *hs_stream; + struct hs_lit_engine *ref_hs_rt; struct matched_pattern *matched_pat; struct log_handle *logger; }; -/* adapter_hs runtime */ -struct adapter_hs_runtime { - hs_database_t *literal_db; - hs_database_t *regex_db; - - struct adapter_hs_scratch *scratch; - struct adapter_hs_stream **streams; - struct bool_matcher *bm; -}; - -/* adapter_hs instance */ -struct adapter_hs { - size_t n_worker_thread; - size_t n_expr; - size_t n_patterns; - struct adapter_hs_runtime *hs_rt; - struct pattern_attribute *hs_attr; +struct hs_regex_stream { + int thread_id; + hs_stream_t *hs_stream; + struct hs_regex_engine *ref_hs_rt; + struct matched_pattern *matched_pat; struct log_handle *logger; }; -struct pattern_offset { - long long start; - long long end; +/* hs literal runtime */ +struct hs_lit_engine { + size_t n_thread; + hs_database_t *hs_db; + hs_scratch_t **hs_scratches; + struct hs_lit_stream **streams; + struct pattern_attribute *ref_pat_attr; + struct log_handle *logger; }; -struct pattern_attribute { - long long pattern_id; - enum expr_match_mode match_mode; - struct pattern_offset offset; +/* hs regex runtime */ +struct hs_regex_engine { + size_t n_thread; + hs_database_t *hs_db; + hs_scratch_t **hs_scratches; + struct hs_regex_stream **streams; + struct pattern_attribute *ref_pat_attr; + struct log_handle *logger; }; -struct matched_pattern { - UT_array *pattern_ids; - size_t n_patterns; - struct pattern_attribute *ref_hs_attr; - size_t scan_data_len; -}; - -static int _hs_alloc_scratch(hs_database_t *db, hs_scratch_t **scratches, - size_t n_worker_thread, struct log_handle *logger) +static int hs_alloc_scratches(hs_database_t *db, hs_scratch_t **scratches, + size_t n_worker_thread, struct log_handle *logger) { size_t scratch_size = 0; @@ -134,205 +114,6 @@ static int _hs_alloc_scratch(hs_database_t *db, hs_scratch_t **scratches, return 0; } -static int adpt_hs_alloc_scratch(struct adapter_hs_runtime *hs_rt, - size_t n_worker_thread, - enum expr_pattern_type pattern_type, - struct log_handle *logger) -{ - int ret = 0; - - if (pattern_type == EXPR_PATTERN_TYPE_STR) { - hs_rt->scratch->literal_scratches = ALLOC(hs_scratch_t *, n_worker_thread); - ret = _hs_alloc_scratch(hs_rt->literal_db, hs_rt->scratch->literal_scratches, - n_worker_thread, logger); - if (ret < 0) { - FREE(hs_rt->scratch->literal_scratches); - return -1; - } - } else { - hs_rt->scratch->regex_scratches = ALLOC(hs_scratch_t *, n_worker_thread); - ret = _hs_alloc_scratch(hs_rt->regex_db, hs_rt->scratch->regex_scratches, - n_worker_thread, logger); - if (ret < 0) { - FREE(hs_rt->scratch->regex_scratches); - return -1; - } - } - - return 0; -} - -/** - * @brief build hs block database for literal string and regex expression respectively - * - * @retval 0(success) -1(failed) -*/ -static int adpt_hs_build_database(struct adapter_hs_runtime *hs_rt, - struct adpt_hs_compile_data *literal_cd, - struct adpt_hs_compile_data *regex_cd, - struct log_handle *logger) -{ - hs_error_t err; - hs_compile_error_t *compile_err = NULL; - - if (NULL == hs_rt || (NULL == literal_cd && NULL == regex_cd)) { - return -1; - } - - if (literal_cd != NULL) { - err = hs_compile_lit_multi((const char *const *)literal_cd->patterns, - literal_cd->flags,literal_cd->ids, - literal_cd->pattern_lens, literal_cd->n_patterns, - HS_MODE_STREAM | HS_MODE_SOM_HORIZON_SMALL, NULL, - &hs_rt->literal_db, &compile_err); - if (err != HS_SUCCESS) { - if (compile_err) { - log_fatal(logger, MODULE_ADAPTER_HS, "[%s:%d] compile error: %s", - __FUNCTION__, __LINE__, compile_err->message); - } - - hs_free_compile_error(compile_err); - return -1; - } - } - - if (regex_cd != NULL) { - err = hs_compile_multi((const char *const *)regex_cd->patterns, - regex_cd->flags, regex_cd->ids, regex_cd->n_patterns, - HS_MODE_STREAM, - NULL, &hs_rt->regex_db, &compile_err); - if (err != HS_SUCCESS) { - if (compile_err) { - log_fatal(logger, MODULE_ADAPTER_HS, "[%s:%d] compile error: %s", - __FUNCTION__, __LINE__, compile_err->message); - } - hs_free_compile_error(compile_err); - return -1; - } - } - - return 0; -} - -static struct adpt_hs_compile_data * -adpt_hs_compile_data_new(enum expr_pattern_type pat_type, size_t n_patterns) -{ - struct adpt_hs_compile_data *hs_cd = ALLOC(struct adpt_hs_compile_data, 1); - hs_cd->pat_type = pat_type; - hs_cd->patterns = ALLOC(char *, n_patterns); - hs_cd->pattern_lens = ALLOC(size_t, n_patterns); - hs_cd->n_patterns = n_patterns; - hs_cd->ids = ALLOC(unsigned int, n_patterns); - hs_cd->flags = ALLOC(unsigned int, n_patterns); - - return hs_cd; -} - -static void adpt_hs_compile_data_free(struct adpt_hs_compile_data *hs_cd) -{ - if (NULL == hs_cd) { - return; - } - - if (hs_cd->patterns != NULL) { - for (size_t i = 0; i < hs_cd->n_patterns; i++) { - FREE(hs_cd->patterns[i]); - } - - FREE(hs_cd->patterns); - } - - if (hs_cd->pattern_lens != NULL) { - FREE(hs_cd->pattern_lens); - } - - if (hs_cd->ids != NULL) { - FREE(hs_cd->ids); - } - - if (hs_cd->flags != NULL) { - FREE(hs_cd->flags); - } - - FREE(hs_cd); -} - -static void populate_compile_data(struct adpt_hs_compile_data *compile_data, - int index, int pattern_id, char *pat, - size_t pat_len, int case_sensitive) -{ - compile_data->ids[index] = pattern_id; - - /* set flags */ - if (compile_data->pat_type == EXPR_PATTERN_TYPE_STR) { - compile_data->flags[index] |= HS_FLAG_SOM_LEFTMOST; - } - - if (case_sensitive == EXPR_CASE_INSENSITIVE) { - compile_data->flags[index] |= HS_FLAG_CASELESS; - } - - compile_data->pattern_lens[index] = pat_len; - compile_data->patterns[index] = ALLOC(char, pat_len + 1); - memcpy(compile_data->patterns[index], pat, pat_len); -} - -static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule, - struct pattern_attribute *pattern_attr, - struct adpt_hs_compile_data *literal_cd, - struct adpt_hs_compile_data *regex_cd, - size_t *n_pattern) -{ - uint32_t pattern_index = 0; - uint32_t literal_index = 0; - uint32_t regex_index = 0; - - struct bool_expr *bool_exprs = ALLOC(struct bool_expr, n_rule); - if (NULL == bool_exprs) { - return NULL; - } - - /* populate adpt_hs_compile_data and bool_expr */ - for (size_t i = 0; i < n_rule; i++) { - - for (size_t j = 0; j < rules[i].n_patterns; j++) { - pattern_attr[pattern_index].pattern_id = pattern_index; - pattern_attr[pattern_index].match_mode = rules[i].patterns[j].match_mode; - - if (pattern_attr[pattern_index].match_mode == EXPR_MATCH_MODE_SUB || - pattern_attr[pattern_index].match_mode == EXPR_MATCH_MODE_EXACTLY) { - pattern_attr[pattern_index].offset.start = rules[i].patterns[j].start_offset; - pattern_attr[pattern_index].offset.end = rules[i].patterns[j].end_offset; - } - - /* literal pattern */ - if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) { - populate_compile_data(literal_cd, literal_index, pattern_index, - rules[i].patterns[j].pat, rules[i].patterns[j].pat_len, - rules[i].patterns[j].case_sensitive); - literal_index++; - } else { - /* regex pattern */ - populate_compile_data(regex_cd, regex_index, pattern_index, - rules[i].patterns[j].pat, rules[i].patterns[j].pat_len, - rules[i].patterns[j].case_sensitive); - regex_index++; - } - - bool_exprs[i].items[j].item_id = pattern_index++; - bool_exprs[i].items[j].not_flag = 0; - } - - bool_exprs[i].expr_id = rules[i].expr_id; - bool_exprs[i].item_num = rules[i].n_patterns; - bool_exprs[i].user_tag = rules[i].tag; - } - - *n_pattern = pattern_index; - - return bool_exprs; -} - static int verify_regex_expression(const char *regex_str, struct log_handle *logger) { hs_expr_info_t *info = NULL; @@ -359,7 +140,7 @@ static int verify_regex_expression(const char *regex_str, struct log_handle *log return 1; } -int adapter_hs_verify_regex_expression(const char *regex_expr, struct log_handle *logger) +int hs_verify_regex_expression(const char *regex_expr, struct log_handle *logger) { if (NULL == regex_expr) { return 0; @@ -368,204 +149,72 @@ int adapter_hs_verify_regex_expression(const char *regex_expr, struct log_handle return verify_regex_expression(regex_expr, logger); } -void *adapter_hs_new(struct expr_rule *rules, size_t n_rule, - size_t n_literal_pattern, size_t n_regex_pattern, - size_t n_worker_thread, struct log_handle *logger) +void hs_lit_engine_free(void *hs_lit_engine) { - /* get the sum of pattern */ - size_t i = 0; - struct adpt_hs_compile_data *literal_cd = NULL; - struct adpt_hs_compile_data *regex_cd = NULL; - - if (n_literal_pattern > 0) { - literal_cd = adpt_hs_compile_data_new(EXPR_PATTERN_TYPE_STR, n_literal_pattern); - } - - if (n_regex_pattern > 0) { - regex_cd = adpt_hs_compile_data_new(EXPR_PATTERN_TYPE_REG, n_regex_pattern); - } - - size_t pattern_cnt = n_literal_pattern + n_regex_pattern; - struct adapter_hs *hs_inst = ALLOC(struct adapter_hs, 1); - hs_inst->hs_attr = ALLOC(struct pattern_attribute, pattern_cnt); - hs_inst->logger = logger; - hs_inst->n_worker_thread = n_worker_thread; - hs_inst->n_expr = n_rule; - - struct bool_expr *bool_exprs = bool_exprs_new(rules, n_rule, hs_inst->hs_attr, - literal_cd, regex_cd, &pattern_cnt); - if (NULL == bool_exprs) { - return NULL; - } - hs_inst->n_patterns = pattern_cnt; - - /* create bool matcher */ - size_t mem_size = 0; - int hs_ret = 0; - - hs_inst->hs_rt = ALLOC(struct adapter_hs_runtime, 1); - - //hs_rt->bm - hs_inst->hs_rt->bm = bool_matcher_new(bool_exprs, n_rule, &mem_size); - if (hs_inst->hs_rt->bm != NULL) { - log_info(logger, MODULE_ADAPTER_HS, - "Adapter_hs module: build bool matcher of %zu expressions" - " with %zu bytes memory", n_rule, mem_size); - } else { - log_fatal(logger, MODULE_ADAPTER_HS, - "[%s:%d] Adapter_hs module: build bool matcher failed", - __FUNCTION__, __LINE__); - - hs_ret = -1; - } - FREE(bool_exprs); - - /* build hs database hs_rt->literal_db & hs_rt->regex_db */ - int ret = adpt_hs_build_database(hs_inst->hs_rt, literal_cd, regex_cd, logger); - if (ret < 0) { - hs_ret = -1; - } - - if (literal_cd != NULL) { - adpt_hs_compile_data_free(literal_cd); - literal_cd = NULL; - } - - if (regex_cd != NULL) { - adpt_hs_compile_data_free(regex_cd); - regex_cd = NULL; - } - - if (hs_ret < 0) { - goto error; - } - - /* alloc scratch */ - hs_inst->hs_rt->scratch = ALLOC(struct adapter_hs_scratch, 1); - hs_inst->hs_rt->scratch->bool_match_buffs = ALLOC(struct bool_expr_match *, - n_worker_thread); - for (i = 0; i < n_worker_thread; i++) { - hs_inst->hs_rt->scratch->bool_match_buffs[i] = ALLOC(struct bool_expr_match, - MAX_HIT_EXPR_NUM); - } - - /* literal and regex scratch can't reuse */ - if (n_literal_pattern > 0) { - ret = adpt_hs_alloc_scratch(hs_inst->hs_rt, n_worker_thread, - EXPR_PATTERN_TYPE_STR, logger); - if (ret < 0) { - goto error; - } - } - - if (n_regex_pattern > 0) { - ret = adpt_hs_alloc_scratch(hs_inst->hs_rt, n_worker_thread, - EXPR_PATTERN_TYPE_REG, logger); - if (ret < 0) { - goto error; - } - } - - hs_inst->hs_rt->streams = ALLOC(struct adapter_hs_stream *, n_worker_thread); - for (i = 0; i < n_worker_thread; i++) { - hs_inst->hs_rt->streams[i] = (struct adapter_hs_stream *)adapter_hs_stream_open(hs_inst, i); - } - - return hs_inst; -error: - adapter_hs_free(hs_inst); - return NULL; -} - -void adapter_hs_free(void *hs_instance) -{ - if (NULL == hs_instance) { + if (NULL == hs_lit_engine) { return; } - struct adapter_hs *hs_inst = (struct adapter_hs *)hs_instance; + struct hs_lit_engine *hs_lit_inst = (struct hs_lit_engine *)hs_lit_engine; size_t i = 0; - if (hs_inst->hs_rt != NULL) { - if (hs_inst->hs_rt->literal_db != NULL) { - hs_free_database(hs_inst->hs_rt->literal_db); - hs_inst->hs_rt->literal_db = NULL; - } - - if (hs_inst->hs_rt->regex_db != NULL) { - hs_free_database(hs_inst->hs_rt->regex_db); - hs_inst->hs_rt->regex_db = NULL; - } - - if (hs_inst->hs_rt->scratch != NULL) { - if (hs_inst->hs_rt->scratch->literal_scratches != NULL) { - for (i = 0; i < hs_inst->n_worker_thread; i++) { - if (hs_inst->hs_rt->scratch->literal_scratches[i] != NULL) { - hs_free_scratch(hs_inst->hs_rt->scratch->literal_scratches[i]); - hs_inst->hs_rt->scratch->literal_scratches[i] = NULL; - } - } - FREE(hs_inst->hs_rt->scratch->literal_scratches); - } - - if (hs_inst->hs_rt->scratch->regex_scratches != NULL) { - for (i = 0; i < hs_inst->n_worker_thread; i++) { - if (hs_inst->hs_rt->scratch->regex_scratches[i] != NULL) { - hs_free_scratch(hs_inst->hs_rt->scratch->regex_scratches[i]); - hs_inst->hs_rt->scratch->regex_scratches[i] = NULL; - } - } - FREE(hs_inst->hs_rt->scratch->regex_scratches); - } - - if (hs_inst->hs_rt->scratch->bool_match_buffs != NULL) { - for (i = 0; i < hs_inst->n_worker_thread; i++) { - if (hs_inst->hs_rt->scratch->bool_match_buffs[i] != NULL) { - FREE(hs_inst->hs_rt->scratch->bool_match_buffs[i]); - } - } - - FREE(hs_inst->hs_rt->scratch->bool_match_buffs); - } - - FREE(hs_inst->hs_rt->scratch); - } - - if (hs_inst->hs_rt->bm != NULL) { - bool_matcher_free(hs_inst->hs_rt->bm); - hs_inst->hs_rt->bm = NULL; - } - - if (hs_inst->hs_rt->streams != NULL) { - for (i = 0; i < hs_inst->n_worker_thread; i++) { - if (hs_inst->hs_rt->streams[i] != NULL) { - adapter_hs_stream_close(hs_inst->hs_rt->streams[i]); - hs_inst->hs_rt->streams[i] = NULL; - } - } - FREE(hs_inst->hs_rt->streams); - } - - FREE(hs_inst->hs_rt); + if (hs_lit_inst->hs_db != NULL) { + hs_free_database(hs_lit_inst->hs_db); + hs_lit_inst->hs_db = NULL; } - if (hs_inst->hs_attr != NULL) { - FREE(hs_inst->hs_attr); + if (hs_lit_inst->hs_scratches != NULL) { + for (i = 0; i < hs_lit_inst->n_thread; i++) { + if (hs_lit_inst->hs_scratches[i] != NULL) { + hs_free_scratch(hs_lit_inst->hs_scratches[i]); + hs_lit_inst->hs_scratches[i] = NULL; + } + } + FREE(hs_lit_inst->hs_scratches); } - FREE(hs_inst); + if (hs_lit_inst->streams != NULL) { + for (i = 0; i < hs_lit_inst->n_thread; i++) { + if (hs_lit_inst->streams[i] != NULL) { + hs_lit_stream_close(hs_lit_inst->streams[i]); + hs_lit_inst->streams[i] = NULL; + } + } + FREE(hs_lit_inst->streams); + } + + FREE(hs_lit_inst); } -static inline int compare_pattern_id(const void *a, const void *b) +void *hs_lit_engine_new(struct expr_rule *rules, size_t n_rule, + struct pattern_attribute *pat_attr, + void *hs_lit_db, size_t n_thread, + struct log_handle *logger) { - long long ret = *(const unsigned long long *)a - *(const unsigned long long *)b; - if (ret == 0) { - return 0; - } else if(ret < 0) { - return -1; - } else { - return 1; - } + struct hs_lit_engine *hs_lit_inst = ALLOC(struct hs_lit_engine, 1); + + hs_lit_inst->n_thread = n_thread; + hs_lit_inst->hs_db = (hs_database_t *)hs_lit_db; + hs_lit_inst->logger = logger; + hs_lit_inst->ref_pat_attr = pat_attr; + hs_lit_inst->hs_scratches = ALLOC(hs_scratch_t *, n_thread); + int ret = hs_alloc_scratches((hs_database_t *)hs_lit_db, hs_lit_inst->hs_scratches, + n_thread, logger); + if (ret < 0) { + log_fatal(logger, MODULE_ADAPTER_HS, + "[%s:%d]alloc scratches for hs lit runtime failed.", + __FUNCTION__, __LINE__); + FREE(hs_lit_inst->hs_scratches); + FREE(hs_lit_inst); + return NULL; + } + + hs_lit_inst->streams = ALLOC(struct hs_lit_stream *, n_thread); + for (size_t i = 0; i < n_thread; i++) { + hs_lit_inst->streams[i] = (struct hs_lit_stream *)hs_lit_stream_open(hs_lit_inst, i); + } + + return hs_lit_inst; } /** @@ -579,16 +228,12 @@ static int matched_event_cb(unsigned int id, unsigned long long from, unsigned long long pattern_id = id; struct matched_pattern *matched_pat = (struct matched_pattern *)ctx; - if (id > matched_pat->n_patterns || id < 0) { - return 0; - } - if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) { return 0; } int ret = 0; - struct pattern_attribute pat_attr = matched_pat->ref_hs_attr[id]; + struct pattern_attribute pat_attr = matched_pat->ref_pat_attr[id]; switch (pat_attr.match_mode) { case EXPR_MATCH_MODE_EXACTLY: if (0 == from && matched_pat->scan_data_len == to) { @@ -643,89 +288,62 @@ static int matched_event_cb(unsigned int id, unsigned long long from, } UT_icd ut_hs_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL}; -void *adapter_hs_stream_open(void *hs_instance, int thread_id) +void *hs_lit_stream_open(void *hs_lit_engine, int thread_id) { - if (NULL == hs_instance || thread_id < 0) { + if (NULL == hs_lit_engine || thread_id < 0) { return NULL; } - struct adapter_hs *hs_inst = (struct adapter_hs *)hs_instance; - struct adapter_hs_stream *hs_stream = ALLOC(struct adapter_hs_stream, 1); + struct hs_lit_engine *hs_lit_inst = (struct hs_lit_engine *)hs_lit_engine; + struct hs_lit_stream *lit_stream = ALLOC(struct hs_lit_stream, 1); hs_error_t err; - hs_stream->logger = hs_inst->logger; - hs_stream->thread_id = thread_id; - hs_stream->ref_hs_rt = hs_inst->hs_rt; - hs_stream->matched_pat = ALLOC(struct matched_pattern, 1); - hs_stream->matched_pat->ref_hs_attr = hs_inst->hs_attr; - hs_stream->matched_pat->n_patterns = hs_inst->n_patterns; - utarray_new(hs_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd); - utarray_reserve(hs_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM); + lit_stream->logger = hs_lit_inst->logger; + lit_stream->thread_id = thread_id; + lit_stream->ref_hs_rt = hs_lit_inst; + lit_stream->matched_pat = ALLOC(struct matched_pattern, 1); + lit_stream->matched_pat->ref_pat_attr = hs_lit_inst->ref_pat_attr; + utarray_new(lit_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd); + utarray_reserve(lit_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM); - int err_count = 0; - if (hs_inst->hs_rt->literal_db != NULL) { - err = hs_open_stream(hs_inst->hs_rt->literal_db, 0, - &hs_stream->literal_stream); + if (hs_lit_inst->hs_db != NULL) { + err = hs_open_stream(hs_lit_inst->hs_db, 0, &lit_stream->hs_stream); if (err != HS_SUCCESS) { - log_fatal(hs_inst->logger, MODULE_ADAPTER_HS, + log_fatal(hs_lit_inst->logger, MODULE_ADAPTER_HS, "hs_open_stream failed, hs err:%d", err); - err_count++; + goto error; } } - if (hs_inst->hs_rt->regex_db != NULL) { - err = hs_open_stream(hs_inst->hs_rt->regex_db, 0, - &hs_stream->regex_stream); - if (err != HS_SUCCESS) { - log_fatal(hs_inst->logger, MODULE_ADAPTER_HS, - "hs_open_stream failed, hs err:%d", err); - err_count++; - } - } - - if (err_count > 0) { - goto error; - } - - return hs_stream; + return lit_stream; error: - if (hs_stream->literal_stream != NULL) { - hs_close_stream(hs_stream->literal_stream, NULL, NULL, NULL); - hs_stream->literal_stream = NULL; + if (lit_stream->hs_stream != NULL) { + hs_close_stream(lit_stream->hs_stream, NULL, NULL, NULL); + lit_stream->hs_stream = NULL; } - if (hs_stream->regex_stream != NULL) { - hs_close_stream(hs_stream->regex_stream, NULL, NULL, NULL); - hs_stream->regex_stream = NULL; - } - - FREE(hs_stream); + FREE(lit_stream); return NULL; } -void adapter_hs_stream_close(void *hs_stream) +void hs_lit_stream_close(void *hs_lit_stream) { - if (NULL == hs_stream) { + if (NULL == hs_lit_stream) { return; } - struct adapter_hs_stream *stream = (struct adapter_hs_stream *)hs_stream; + struct hs_lit_stream *stream = (struct hs_lit_stream *)hs_lit_stream; if (stream->ref_hs_rt != NULL) { - if (stream->literal_stream != NULL) { - hs_close_stream(stream->literal_stream, NULL, NULL, NULL); - stream->literal_stream = NULL; - } - - if (stream->regex_stream != NULL) { - hs_close_stream(stream->regex_stream, NULL, NULL, NULL); - stream->regex_stream = NULL; + if (stream->hs_stream != NULL) { + hs_close_stream(stream->hs_stream, NULL, NULL, NULL); + stream->hs_stream = NULL; } } /* stream->hs_rt point to hs_instance->hs_rt which will call free same as hs_attr */ stream->ref_hs_rt = NULL; - stream->matched_pat->ref_hs_attr = NULL; + stream->matched_pat->ref_pat_attr = NULL; if (stream->matched_pat->pattern_ids != NULL) { utarray_free(stream->matched_pat->pattern_ids); @@ -736,84 +354,63 @@ void adapter_hs_stream_close(void *hs_stream) FREE(stream); } -static void adapter_hs_stream_reset(struct adapter_hs_stream *hs_stream) +static void hs_lit_stream_reset(struct hs_lit_stream *hs_lit_stream) { - if (NULL == hs_stream) { + if (NULL == hs_lit_stream) { return; } - struct adapter_hs_scratch *scratch = hs_stream->ref_hs_rt->scratch; - if (hs_stream->literal_stream != NULL) { - hs_reset_stream(hs_stream->literal_stream, 0, - scratch->literal_scratches[hs_stream->thread_id], - matched_event_cb, hs_stream->matched_pat); + hs_scratch_t **scratches = hs_lit_stream->ref_hs_rt->hs_scratches; + if (hs_lit_stream->hs_stream != NULL) { + hs_reset_stream(hs_lit_stream->hs_stream, 0, + scratches[hs_lit_stream->thread_id], + matched_event_cb, hs_lit_stream->matched_pat); } - - if (hs_stream->regex_stream != NULL) { - hs_reset_stream(hs_stream->regex_stream, 0, - scratch->regex_scratches[hs_stream->thread_id], - matched_event_cb, hs_stream->matched_pat); - } - - utarray_clear(hs_stream->matched_pat->pattern_ids); } -int adapter_hs_scan_match(struct bool_matcher *bm, UT_array *pattern_ids, - struct bool_expr_match *match_buff, size_t buff_size, - struct expr_scan_result *results, size_t n_result, - size_t *n_hit_result) +static void hs_regex_stream_reset(struct hs_regex_stream *hs_regex_stream) { - size_t n_pattern_id = utarray_len(pattern_ids); - if (0 == n_pattern_id) { - *n_hit_result = 0; + if (NULL == hs_regex_stream) { + return; + } + + hs_scratch_t **scratches = hs_regex_stream->ref_hs_rt->hs_scratches; + if (hs_regex_stream->hs_stream != NULL) { + hs_reset_stream(hs_regex_stream->hs_stream, 0, + scratches[hs_regex_stream->thread_id], + matched_event_cb, hs_regex_stream->matched_pat); + } +} + +static int gather_hit_pattern_id(struct matched_pattern *matched_pat, + unsigned long long *pattern_id_array, + size_t array_size, size_t *n_pattern_id) +{ + size_t pattern_id_cnt = utarray_len(matched_pat->pattern_ids); + if (0 == pattern_id_cnt) { + *n_pattern_id = 0; return 0; } - utarray_sort(pattern_ids, compare_pattern_id); - - unsigned long long prev_pattern_id = 0xFFFFFFFFFFFFFFFF; - unsigned long long tmp_pattern_id = 0; - size_t n_unique_pattern_id = 0; - unsigned long long unique_pattern_ids[n_pattern_id]; - - for (size_t i = 0; i < n_pattern_id; i++) { - tmp_pattern_id = *(unsigned long long *)utarray_eltptr(pattern_ids, i); - if (tmp_pattern_id != prev_pattern_id) { - unique_pattern_ids[n_unique_pattern_id++] = tmp_pattern_id; - prev_pattern_id = tmp_pattern_id; - } + size_t array_index = 0; + for (size_t i = 0; i < pattern_id_cnt && array_index < array_size; i++) { + pattern_id_array[array_index++] = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i); } - int bool_matcher_ret = bool_matcher_match(bm, unique_pattern_ids, - n_unique_pattern_id, - match_buff, buff_size); - if (bool_matcher_ret < 0) { - goto next; - } + *n_pattern_id = array_index; + utarray_clear(matched_pat->pattern_ids); - if (bool_matcher_ret > (int)n_result) { - bool_matcher_ret = n_result; - } - - for (int index = 0; index < bool_matcher_ret; index++) { - results[index].rule_id = match_buff[index].expr_id; - results[index].user_tag = match_buff[index].user_tag; - } - *n_hit_result = bool_matcher_ret; - -next: - utarray_clear(pattern_ids); - return bool_matcher_ret; + return 0; } -int adapter_hs_scan_stream(void *hs_stream, const char *data, size_t data_len, - struct expr_scan_result *results, size_t n_result, - size_t *n_hit_result) +int hs_lit_stream_scan(void *hs_lit_stream, const char *data, size_t data_len, + unsigned long long *pattern_id_array, size_t array_size, + size_t *n_pattern_id) { hs_error_t err; - if (NULL == hs_stream || NULL == data || 0 == data_len || - NULL == results || 0 == n_result || NULL == n_hit_result) { + if (NULL == hs_lit_stream || NULL == data || 0 == data_len || + NULL == pattern_id_array || 0 == array_size || NULL == n_pattern_id) { return -1; } @@ -826,69 +423,355 @@ int adapter_hs_scan_stream(void *hs_stream, const char *data, size_t data_len, hs_scan_stream will return `HS_SCAN_TERMINATED`. This case has not been demonstrated in pcapscan, as its callback always returns 0. */ + struct hs_lit_stream *lit_stream = (struct hs_lit_stream *)hs_lit_stream; + int thread_id = lit_stream->thread_id; + hs_scratch_t **scratches = lit_stream->ref_hs_rt->hs_scratches; + lit_stream->matched_pat->scan_data_len = data_len; - int err_count = 0; - struct adapter_hs_stream *stream = (struct adapter_hs_stream *)hs_stream; - int thread_id = stream->thread_id; - struct adapter_hs_scratch *scratch = stream->ref_hs_rt->scratch; - stream->matched_pat->scan_data_len = data_len; - - int err_scratch_flag = 0; - if (stream->literal_stream != NULL) { - if (scratch->literal_scratches != NULL) { - err = hs_scan_stream(stream->literal_stream, data, data_len, - 0, scratch->literal_scratches[thread_id], - matched_event_cb, stream->matched_pat); + if (lit_stream->hs_stream != NULL) { + if (scratches != NULL) { + err = hs_scan_stream(lit_stream->hs_stream, data, data_len, + 0, scratches[thread_id], matched_event_cb, + lit_stream->matched_pat); if (err != HS_SUCCESS) { - err_count++; + return -1; } } else { - log_fatal(stream->logger, MODULE_ADAPTER_HS, - "literal_scratches is null, thread_id:%d", thread_id); - err_scratch_flag++; + log_fatal(lit_stream->logger, MODULE_ADAPTER_HS, + "literal scratches is null, thread_id:%d", thread_id); + return -1; } } - if (stream->regex_stream != NULL) { - if (scratch->regex_scratches != NULL) { - err = hs_scan_stream(stream->regex_stream, data, data_len, - 0, scratch->regex_scratches[thread_id], - matched_event_cb, stream->matched_pat); - if (err != HS_SUCCESS) { - err_count++; - } - } else { - log_fatal(stream->logger, MODULE_ADAPTER_HS, - "regex_scratches is null, thread_id:%d", thread_id); - err_scratch_flag++; - } - } - - if (err_count == 2) { - return -1; - } - - if (err_scratch_flag != 0) { - return -1; - } - - return adapter_hs_scan_match(stream->ref_hs_rt->bm, stream->matched_pat->pattern_ids, - scratch->bool_match_buffs[thread_id], MAX_HIT_EXPR_NUM, - results, n_result, n_hit_result); + return gather_hit_pattern_id(lit_stream->matched_pat, pattern_id_array, + array_size, n_pattern_id); } -int adapter_hs_scan(void *hs_instance, int thread_id, const char *data, size_t data_len, - struct expr_scan_result *results, size_t n_result, size_t *n_hit_result) +int hs_lit_engine_scan(void *hs_lit_engine, int thread_id, + const char *data, size_t data_len, + unsigned long long *pattern_id_array, + size_t array_size, size_t *n_pattern_id) { - if (NULL == hs_instance || NULL == data || (0 == data_len) || - NULL == results || 0 == n_result || NULL == n_hit_result) { + if (NULL == hs_lit_engine || NULL == data || 0 == data_len) { return -1; } - struct adapter_hs *hs_inst = (struct adapter_hs *)hs_instance; - struct adapter_hs_stream *hs_stream = hs_inst->hs_rt->streams[thread_id]; - assert(hs_stream != NULL); + struct hs_lit_engine *hs_lit_inst = (struct hs_lit_engine *)hs_lit_engine; + struct hs_lit_stream *hs_lit_stream = hs_lit_inst->streams[thread_id]; + assert(hs_lit_stream != NULL); - adapter_hs_stream_reset(hs_stream); - return adapter_hs_scan_stream(hs_stream, data, data_len, results, n_result, n_hit_result); + hs_lit_stream_reset(hs_lit_stream); + return hs_lit_stream_scan(hs_lit_stream, data, data_len, pattern_id_array, + array_size, n_pattern_id); +} + +void hs_regex_engine_free(void *hs_regex_engine) +{ + if (NULL == hs_regex_engine) { + return; + } + + struct hs_regex_engine *hs_regex_inst = (struct hs_regex_engine *)hs_regex_engine; + size_t i = 0; + + if (hs_regex_inst->hs_db != NULL) { + hs_free_database(hs_regex_inst->hs_db); + hs_regex_inst->hs_db = NULL; + } + + if (hs_regex_inst->hs_scratches != NULL) { + for (i = 0; i < hs_regex_inst->n_thread; i++) { + if (hs_regex_inst->hs_scratches[i] != NULL) { + hs_free_scratch(hs_regex_inst->hs_scratches[i]); + hs_regex_inst->hs_scratches[i] = NULL; + } + } + FREE(hs_regex_inst->hs_scratches); + } + + if (hs_regex_inst->streams != NULL) { + for (i = 0; i < hs_regex_inst->n_thread; i++) { + if (hs_regex_inst->streams[i] != NULL) { + hs_regex_stream_close(hs_regex_inst->streams[i]); + hs_regex_inst->streams[i] = NULL; + } + } + FREE(hs_regex_inst->streams); + } + + FREE(hs_regex_inst); +} + +void *hs_regex_engine_new(struct expr_rule *rules, size_t n_rule, + struct pattern_attribute *pat_attr, + void *hs_regex_db, size_t n_thread, + struct log_handle *logger) +{ + struct hs_regex_engine *hs_regex_inst = ALLOC(struct hs_regex_engine, 1); + + hs_regex_inst->n_thread = n_thread; + hs_regex_inst->hs_db = (hs_database_t *)hs_regex_db; + hs_regex_inst->ref_pat_attr = pat_attr; + hs_regex_inst->logger = logger; + hs_regex_inst->hs_scratches = ALLOC(hs_scratch_t *, n_thread); + + int ret = hs_alloc_scratches((hs_database_t *)hs_regex_db, + hs_regex_inst->hs_scratches, + n_thread, logger); + if (ret < 0) { + log_fatal(logger, MODULE_ADAPTER_HS, + "[%s:%d]alloc scratches for hs regex runtime failed.", + __FUNCTION__, __LINE__); + FREE(hs_regex_inst->hs_scratches); + FREE(hs_regex_inst); + return NULL; + } + + hs_regex_inst->streams = ALLOC(struct hs_regex_stream *, n_thread); + for (size_t i = 0; i < n_thread; i++) { + hs_regex_inst->streams[i] = (struct hs_regex_stream *)hs_regex_stream_open(hs_regex_inst, i); + } + + return hs_regex_inst; +} + +int hs_regex_engine_scan(void *hs_regex_engine, int thread_id, + const char *data, size_t data_len, + unsigned long long *pattern_id_array, + size_t array_size, size_t *n_pattern_id) +{ + if (NULL == hs_regex_engine || NULL == data || 0 == data_len) { + return -1; + } + + struct hs_regex_engine *hs_regex_inst = (struct hs_regex_engine *)hs_regex_engine; + struct hs_regex_stream *hs_regex_stream = hs_regex_inst->streams[thread_id]; + assert(hs_regex_stream != NULL); + + hs_regex_stream_reset(hs_regex_stream); + return hs_regex_stream_scan(hs_regex_stream, data, data_len, pattern_id_array, + array_size, n_pattern_id); +} + +void hs_regex_stream_close(void *hs_regex_stream) +{ + if (NULL == hs_regex_stream) { + return; + } + + struct hs_regex_stream *stream = (struct hs_regex_stream *)hs_regex_stream; + if (stream->ref_hs_rt != NULL) { + if (stream->hs_stream != NULL) { + hs_close_stream(stream->hs_stream, NULL, NULL, NULL); + stream->hs_stream = NULL; + } + } + + /* stream->hs_rt point to hs_instance->hs_rt which will call free + same as hs_attr */ + stream->ref_hs_rt = NULL; + stream->matched_pat->ref_pat_attr = NULL; + + if (stream->matched_pat->pattern_ids != NULL) { + utarray_free(stream->matched_pat->pattern_ids); + stream->matched_pat->pattern_ids = NULL; + } + + FREE(stream->matched_pat); + FREE(stream); +} + +void *hs_regex_stream_open(void *hs_regex_engine, int thread_id) +{ + if (NULL == hs_regex_engine || thread_id < 0) { + return NULL; + } + + struct hs_regex_engine *hs_regex_inst = (struct hs_regex_engine *)hs_regex_engine; + struct hs_regex_stream *regex_stream = ALLOC(struct hs_regex_stream, 1); + hs_error_t err; + + regex_stream->logger = hs_regex_inst->logger; + regex_stream->thread_id = thread_id; + regex_stream->ref_hs_rt = hs_regex_inst; + regex_stream->matched_pat = ALLOC(struct matched_pattern, 1); + regex_stream->matched_pat->ref_pat_attr = hs_regex_inst->ref_pat_attr; + utarray_new(regex_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd); + utarray_reserve(regex_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM); + + if (hs_regex_inst->hs_db != NULL) { + err = hs_open_stream(hs_regex_inst->hs_db, 0, ®ex_stream->hs_stream); + if (err != HS_SUCCESS) { + log_fatal(hs_regex_inst->logger, MODULE_ADAPTER_HS, + "hs_open_stream failed, hs err:%d", err); + goto error; + } + } + + return regex_stream; +error: + hs_regex_stream_close(regex_stream); + return NULL; +} + +int hs_regex_stream_scan(void *hs_regex_stream, const char *data, size_t data_len, + unsigned long long *pattern_id_array, size_t array_size, + size_t *n_pattern_id) +{ + hs_error_t err; + + if (NULL == hs_regex_stream || NULL == data || 0 == data_len) { + return -1; + } + + /* + In streaming mode, a non-zero return from the user-specified event-handler + function has consequences for the rest of that stream's lifetime: when a + non-zero return occurs, it signals that no more of the stream should be + scanned. Consequently if the user makes a subsequent call to + `hs_scan_stream` on a stream whose processing was terminated in this way, + hs_scan_stream will return `HS_SCAN_TERMINATED`. This case has not been + demonstrated in pcapscan, as its callback always returns 0. + */ + struct hs_regex_stream *regex_stream = (struct hs_regex_stream *)hs_regex_stream; + int thread_id = regex_stream->thread_id; + hs_scratch_t **scratches = regex_stream->ref_hs_rt->hs_scratches; + regex_stream->matched_pat->scan_data_len = data_len; + + if (regex_stream->hs_stream != NULL) { + if (scratches != NULL) { + err = hs_scan_stream(regex_stream->hs_stream, data, data_len, + 0, scratches[thread_id], matched_event_cb, + regex_stream->matched_pat); + if (err != HS_SUCCESS) { + return -1; + } + } else { + log_fatal(regex_stream->logger, MODULE_ADAPTER_HS, + "literal scratches is null, thread_id:%d", thread_id); + return -1; + } + } + + return gather_hit_pattern_id(regex_stream->matched_pat, pattern_id_array, + array_size, n_pattern_id); +} + +void *hs_compile_data_new(enum expr_pattern_type pat_type, size_t n_patterns) +{ + struct hs_compile_data *hs_cd = ALLOC(struct hs_compile_data, 1); + + hs_cd->pat_type = pat_type; + hs_cd->patterns = ALLOC(char *, n_patterns); + hs_cd->pattern_lens = ALLOC(size_t, n_patterns); + hs_cd->n_patterns = n_patterns; + hs_cd->ids = ALLOC(unsigned int, n_patterns); + hs_cd->flags = ALLOC(unsigned int, n_patterns); + + return hs_cd; +} + +void hs_compile_data_free(void *compile_data) +{ + if (NULL == compile_data) { + return; + } + + struct hs_compile_data *hs_cd = (struct hs_compile_data *)compile_data; + if (hs_cd->patterns != NULL) { + for (size_t i = 0; i < hs_cd->n_patterns; i++) { + FREE(hs_cd->patterns[i]); + } + + FREE(hs_cd->patterns); + } + + if (hs_cd->pattern_lens != NULL) { + FREE(hs_cd->pattern_lens); + } + + if (hs_cd->ids != NULL) { + FREE(hs_cd->ids); + } + + if (hs_cd->flags != NULL) { + FREE(hs_cd->flags); + } + + FREE(hs_cd); +} + +void hs_populate_compile_data(void *compile_data, size_t index, int pattern_id, + char *pat, size_t pat_len, int case_sensitive) +{ + struct hs_compile_data *hs_cd = (struct hs_compile_data *)compile_data; + + hs_cd->ids[index] = pattern_id; + + /* set flags */ + if (hs_cd->pat_type == EXPR_PATTERN_TYPE_STR) { + hs_cd->flags[index] |= HS_FLAG_SOM_LEFTMOST; + } + + if (case_sensitive == EXPR_CASE_INSENSITIVE) { + hs_cd->flags[index] |= HS_FLAG_CASELESS; + } + + hs_cd->pattern_lens[index] = pat_len; + hs_cd->patterns[index] = ALLOC(char, pat_len + 1); + memcpy(hs_cd->patterns[index], pat, pat_len); +} + +int hs_build_lit_db(void **hs_lit_db, void *compile_data, struct log_handle *logger) +{ + if (NULL == hs_lit_db || NULL == compile_data) { + return -1; + } + + struct hs_compile_data *lit_cd = (struct hs_compile_data *)compile_data; + hs_compile_error_t *compile_err = NULL; + + if (lit_cd != NULL) { + hs_error_t err = hs_compile_lit_multi((const char *const *)lit_cd->patterns, + lit_cd->flags,lit_cd->ids, lit_cd->pattern_lens, + lit_cd->n_patterns, + HS_MODE_STREAM | HS_MODE_SOM_HORIZON_SMALL, + NULL, (hs_database_t **)hs_lit_db, &compile_err); + if (err != HS_SUCCESS) { + if (compile_err) { + log_error(logger, MODULE_ADAPTER_HS, "[%s:%d] compile error: %s", + __FUNCTION__, __LINE__, compile_err->message); + } + + hs_free_compile_error(compile_err); + return -1; + } + } + + return 0; +} + +int hs_build_regex_db(void **hs_regex_db, void *compile_data, struct log_handle *logger) +{ + if (NULL == hs_regex_db || NULL == compile_data) { + return -1; + } + + struct hs_compile_data *regex_cd = (struct hs_compile_data *)compile_data; + hs_compile_error_t *compile_err = NULL; + + hs_error_t err = hs_compile_multi((const char *const *)regex_cd->patterns, + regex_cd->flags, regex_cd->ids, regex_cd->n_patterns, + HS_MODE_STREAM, NULL, (hs_database_t **)hs_regex_db, + &compile_err); + if (err != HS_SUCCESS) { + if (compile_err) { + log_error(logger, MODULE_ADAPTER_HS, "[%s:%d] compile error: %s", + __FUNCTION__, __LINE__, compile_err->message); + } + hs_free_compile_error(compile_err); + return -1; + } + + return 0; } \ No newline at end of file diff --git a/scanner/expr_matcher/adapter_hs/adapter_hs.h b/scanner/expr_matcher/adapter_hs/adapter_hs.h index c9de7d2..aaeca91 100644 --- a/scanner/expr_matcher/adapter_hs/adapter_hs.h +++ b/scanner/expr_matcher/adapter_hs/adapter_hs.h @@ -21,52 +21,82 @@ extern "C" #include "log/log.h" #include "../expr_matcher.h" +#include "../expr_matcher_inc.h" + +int hs_verify_regex_expression(const char *regex_expr, struct log_handle *logger); -int adapter_hs_verify_regex_expression(const char *regex_expr, struct log_handle *logger); /** - * @brief new adapter_hs instance + * @brief new adapter_hs literal instance * * @param rules: logic AND expression's array * @param n_rule: the number of logic AND expression's array * @param nr_worker_threads: the number of scan threads which will call adapter_hs_scan() * - * @retval the pointer to adapter_hs instance + * @retval the pointer to adapter_hs literal instance */ -void *adapter_hs_new(struct expr_rule *rules, size_t n_rule, - size_t n_literal_pattern, size_t n_regex_pattern, - size_t n_worker_thread, struct log_handle *logger); +void *hs_lit_engine_new(struct expr_rule *rules, size_t n_rule, + struct pattern_attribute *pat_attr, + void *hs_lit_db, size_t n_thread, + struct log_handle *logger); + +void *hs_regex_engine_new(struct expr_rule *rules, size_t n_rule, + struct pattern_attribute *pat_attr, + void *hs_regex_db, size_t n_thread, + struct log_handle *logger); + +void hs_lit_engine_free(void *hs_lit_engine); + +void hs_regex_engine_free(void *hs_regex_engine); /** * @brief scan input data to match logic AND expression, return all matched expr_id * - * @param instance: adapter_hs instance obtained by adapter_hs_new() + * @param hs_lit: adapter_hs literal instance obtained by adapter_hs_lit_new() * @param thread_id: the thread_id of caller * @param data: data to be scanned * @param data_len: the length of data to be scanned * @param results: the array of expr_id * @param n_results: number of elements in array of expr_id */ -int adapter_hs_scan(void *hs_instance, int thread_id, const char *data, size_t data_len, - struct expr_scan_result *results, size_t n_result, size_t *n_hit_result); +int hs_lit_engine_scan(void *hs_lit_engine, int thread_id, + const char *data, size_t data_len, + unsigned long long *pattern_id_array, + size_t array_size, size_t *n_pattern_id); + +int hs_regex_engine_scan(void *hs_regex_engine, int thread_id, + const char *data, size_t data_len, + unsigned long long *pattern_id_array, + size_t array_size, size_t *n_pattern_id); + +void *hs_lit_stream_open(void *hs_lit_engine, int thread_id); + +void *hs_regex_stream_open(void *hs_regex_engine, int thread_id); + +void hs_lit_stream_close(void *hs_lit_stream); + +void hs_regex_stream_close(void *hs_regex_stream); + +int hs_lit_stream_scan(void *hs_lit_stream, const char *data, size_t data_len, + unsigned long long *pattern_id_array, size_t array_size, + size_t *n_pattern_id); + +int hs_regex_stream_scan(void *hs_regex_stream, const char *data, size_t data_len, + unsigned long long *pattern_id_array, size_t array_size, + size_t *n_pattern_id); /** - * @brief destroy adapter_hs instance - * - * @param instance: adapter_hs instance obtained by adapter_hs_new() + * @brief build database */ -void adapter_hs_free(void *instance); +void *hs_compile_data_new(enum expr_pattern_type pat_type, size_t n_patterns); -/** - * @brief open adapter_hs stream after adapter_hs instance initialized for stream scan - * -*/ -void *adapter_hs_stream_open(void *hs_instance, int thread_id); +void hs_compile_data_free(void *compile_data); -int adapter_hs_scan_stream(void *stream, const char *data, size_t data_len, - struct expr_scan_result *results, size_t n_result, - size_t *n_hit_result); +void hs_populate_compile_data(void *compile_data, size_t index, int pattern_id, + char *pat, size_t pat_len, int case_sensitive); -void adapter_hs_stream_close(void *stream); +int hs_build_lit_db(void **hs_lit_db, void *compile_data, struct log_handle *logger); + +int hs_build_regex_db(void **hs_regex_db, void *compile_data, struct log_handle *logger); #ifdef __cplusplus } diff --git a/scanner/expr_matcher/adapter_rs/adapter_rs.cpp b/scanner/expr_matcher/adapter_rs/adapter_rs.cpp index 369c385..dbc6880 100644 --- a/scanner/expr_matcher/adapter_rs/adapter_rs.cpp +++ b/scanner/expr_matcher/adapter_rs/adapter_rs.cpp @@ -17,13 +17,10 @@ #include "rulescan.h" #include "adapter_rs.h" -#include "uthash/utarray.h" #include "uthash/uthash.h" #include "maat_utils.h" #include "../../bool_matcher/bool_matcher.h" -#define MAX_HIT_PATTERN_NUM 1024 - pid_t rs_gettid() { return syscall(SYS_gettid); @@ -39,62 +36,48 @@ static const char *rs_module_name_str(const char *name) #define MODULE_ADAPTER_RS rs_module_name_str("maat.adapter_rs") -struct adpt_rs_compile_data { +struct rs_compile_data { struct scan_pattern *patterns; size_t n_patterns; }; -struct adapter_rs_stream { +struct rs_lit_stream { int thread_id; size_t offset; /* current stream offset */ - rs_stream_t *literal_stream; - rs_stream_t *regex_stream; - struct adapter_rs_runtime *ref_rs_rt; - + rs_stream_t *rs_stream; + struct rs_lit_engine *ref_rs_rt; + struct matched_pattern *matched_pat; struct log_handle *logger; }; -/* adapter_rs runtime */ -struct adapter_rs_runtime { - rs_database_t *literal_db; - rs_database_t *regex_db; - - struct bool_expr_match **bool_match_buffs; /* per thread */ - struct adapter_rs_stream **streams; /* per thread */ - struct matched_pattern **matched_pats; /* per thread */ - struct bool_matcher *bm; -}; - -/* adapter_rs instance */ -struct adapter_rs { - size_t n_worker_thread; - size_t n_expr; - size_t n_patterns; - struct adapter_rs_runtime *rs_rt; - struct pattern_attribute *rs_attr; +struct rs_regex_stream { + int thread_id; + size_t offset; /* current stream offset */ + rs_stream_t *rs_stream; + struct rs_regex_engine *ref_rs_rt; + struct matched_pattern *matched_pat; struct log_handle *logger; }; -struct pattern_offset { - long long start; - long long end; +/* adapter_rs literal runtime */ +struct rs_lit_engine { + size_t n_thread; + rs_database_t *rs_db; + struct rs_lit_stream **streams; /* per thread */ + struct pattern_attribute *ref_pat_attr; + struct log_handle *logger; }; -struct pattern_attribute { - long long pattern_id; - enum expr_match_mode match_mode; - struct pattern_offset offset; - size_t pattern_len; +/* adapter_rs regex runtime */ +struct rs_regex_engine { + size_t n_thread; + rs_database_t *rs_db; + struct rs_regex_stream **streams; /* per thread */ + struct pattern_attribute *ref_pat_attr; + struct log_handle *logger; }; -struct matched_pattern { - UT_array *pattern_ids; - size_t n_patterns; - struct pattern_attribute *ref_rs_attr; -}; - -int adapter_rs_verify_regex_expression(const char *regex_expr, - struct log_handle *logger) +int rs_verify_regex_expression(const char *regex_expr, struct log_handle *logger) { int ret = rs_verify_regex(regex_expr); if (ret == 0) { @@ -110,20 +93,16 @@ int adapter_rs_verify_regex_expression(const char *regex_expr, * * @retval 0(success) -1(failed) */ -static int adpt_rs_build_database(struct adapter_rs_runtime *rs_rt, - size_t n_worker_thread, - struct adpt_rs_compile_data *literal_cd, - struct adpt_rs_compile_data *regex_cd, - struct log_handle *logger) +int rs_build_lit_db(void **rs_lit_db, void *compile_data, struct log_handle *logger) { - if (NULL == rs_rt) { + if (NULL == rs_lit_db) { return -1; } - int ret = 0; - if (literal_cd != NULL) { - ret = rs_compile_lit(literal_cd->patterns, literal_cd->n_patterns, - &rs_rt->literal_db); + struct rs_compile_data *lit_cd = (struct rs_compile_data *)compile_data; + if (lit_cd != NULL) { + int ret = rs_compile_lit(lit_cd->patterns, lit_cd->n_patterns, + (rs_database_t **)rs_lit_db); if (ret < 0) { log_fatal(logger, MODULE_ADAPTER_RS, "[%s:%d] compile error", __FUNCTION__, __LINE__); @@ -131,13 +110,25 @@ static int adpt_rs_build_database(struct adapter_rs_runtime *rs_rt, } } + return 0; +} + +int rs_build_regex_db(void **rs_regex_db, size_t n_thread, void *compile_data, + struct log_handle *logger) +{ + if (NULL == rs_regex_db) { + return -1; + } + + struct rs_compile_data *regex_cd = (struct rs_compile_data *)compile_data; if (regex_cd != NULL) { size_t n_failed_pats = 0; - ret = rs_compile_regex(regex_cd->patterns, regex_cd->n_patterns, - n_worker_thread, &rs_rt->regex_db, &n_failed_pats); + int ret = rs_compile_regex(regex_cd->patterns, regex_cd->n_patterns, + n_thread, (rs_database_t **)rs_regex_db, + &n_failed_pats); if (ret < 0) { - log_fatal(logger, MODULE_ADAPTER_RS, "[%s:%d] compile error", - __FUNCTION__, __LINE__); + log_fatal(logger, MODULE_ADAPTER_RS, "[%s:%d] compile error", + __FUNCTION__, __LINE__); return -1; } } @@ -145,21 +136,22 @@ static int adpt_rs_build_database(struct adapter_rs_runtime *rs_rt, return 0; } -static struct adpt_rs_compile_data *adpt_rs_compile_data_new(size_t n_patterns) +void *rs_compile_data_new(enum expr_pattern_type pat_type, size_t n_patterns) { - struct adpt_rs_compile_data *rs_cd = ALLOC(struct adpt_rs_compile_data, 1); + struct rs_compile_data *rs_cd = ALLOC(struct rs_compile_data, 1); rs_cd->patterns = ALLOC(struct scan_pattern, n_patterns); rs_cd->n_patterns = n_patterns; return rs_cd; } -static void adpt_rs_compile_data_free(struct adpt_rs_compile_data *rs_cd) +void rs_compile_data_free(void *compile_data) { - if (NULL == rs_cd) { + if (NULL == compile_data) { return; } + struct rs_compile_data *rs_cd = (struct rs_compile_data *)compile_data; if (rs_cd->patterns != NULL) { for (size_t i = 0; i < rs_cd->n_patterns; i++) { if (rs_cd->patterns[i].pattern != NULL) { @@ -173,247 +165,16 @@ static void adpt_rs_compile_data_free(struct adpt_rs_compile_data *rs_cd) FREE(rs_cd); } -static void populate_compile_data(struct adpt_rs_compile_data *compile_data, - size_t index, long long pattern_id, char *pat, - size_t pat_len, int case_sensitive) +void rs_populate_compile_data(void *compile_data, size_t index, int pattern_id, + char *pat, size_t pat_len, int case_sensitive) { - compile_data->patterns[index].id = pattern_id; - compile_data->patterns[index].case_sensitive = case_sensitive; - compile_data->patterns[index].pattern = ALLOC(char, pat_len + 1); - memcpy(compile_data->patterns[index].pattern, pat, pat_len); - compile_data->patterns[index].pattern_len = pat_len; -} + struct rs_compile_data *rs_cd = (struct rs_compile_data *)compile_data; -static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule, - struct pattern_attribute *pattern_attr, - struct adpt_rs_compile_data *literal_cd, - struct adpt_rs_compile_data *regex_cd, - size_t *n_pattern) -{ - long long pattern_idx = 0; - size_t literal_idx = 0; - size_t regex_idx = 0; - - struct bool_expr *bool_exprs = ALLOC(struct bool_expr, n_rule); - - /* populate adpt_rs_compile_data and bool_expr */ - for (size_t i = 0; i < n_rule; i++) { - - for (size_t j = 0; j < rules[i].n_patterns; j++) { - pattern_attr[pattern_idx].pattern_id = pattern_idx; - pattern_attr[pattern_idx].match_mode = rules[i].patterns[j].match_mode; - pattern_attr[pattern_idx].pattern_len = rules[i].patterns[j].pat_len; - - if (pattern_attr[pattern_idx].match_mode == EXPR_MATCH_MODE_SUB || - pattern_attr[pattern_idx].match_mode == EXPR_MATCH_MODE_EXACTLY) { - pattern_attr[pattern_idx].offset.start = rules[i].patterns[j].start_offset; - pattern_attr[pattern_idx].offset.end = rules[i].patterns[j].end_offset; - } - - /* literal pattern */ - if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) { - populate_compile_data(literal_cd, literal_idx, pattern_idx, - rules[i].patterns[j].pat, rules[i].patterns[j].pat_len, - rules[i].patterns[j].case_sensitive); - literal_idx++; - } else { - /* regex pattern */ - populate_compile_data(regex_cd, regex_idx, pattern_idx, - rules[i].patterns[j].pat, rules[i].patterns[j].pat_len, - rules[i].patterns[j].case_sensitive); - regex_idx++; - } - - bool_exprs[i].items[j].item_id = pattern_idx++; - bool_exprs[i].items[j].not_flag = 0; - } - - bool_exprs[i].expr_id = rules[i].expr_id; - bool_exprs[i].item_num = rules[i].n_patterns; - bool_exprs[i].user_tag = rules[i].tag; - } - - *n_pattern = pattern_idx; - - return bool_exprs; -} - -UT_icd ut_rs_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL}; -void *adapter_rs_new(struct expr_rule *rules, size_t n_rule, - size_t n_literal_pattern, size_t n_regex_pattern, - size_t n_worker_thread, struct log_handle *logger) -{ - /* get the sum of pattern */ - size_t i = 0; - struct adpt_rs_compile_data *literal_cd = NULL; - struct adpt_rs_compile_data *regex_cd = NULL; - - if (n_literal_pattern > 0) { - literal_cd = adpt_rs_compile_data_new(n_literal_pattern); - } - - if (n_regex_pattern > 0) { - regex_cd = adpt_rs_compile_data_new(n_regex_pattern); - } - - size_t pattern_cnt = n_literal_pattern + n_regex_pattern; - struct adapter_rs *rs_inst = ALLOC(struct adapter_rs, 1); - rs_inst->rs_attr = ALLOC(struct pattern_attribute, pattern_cnt); - rs_inst->logger = logger; - rs_inst->n_worker_thread = n_worker_thread; - rs_inst->n_expr = n_rule; - - struct bool_expr *bool_exprs = bool_exprs_new(rules, n_rule, rs_inst->rs_attr, - literal_cd, regex_cd, &pattern_cnt); - if (NULL == bool_exprs) { - return NULL; - } - rs_inst->n_patterns = pattern_cnt; - - /* create bool matcher */ - size_t mem_size = 0; - int rs_ret = 0; - - rs_inst->rs_rt = ALLOC(struct adapter_rs_runtime, 1); - - //rs_rt->bm - rs_inst->rs_rt->bm = bool_matcher_new(bool_exprs, n_rule, &mem_size); - if (rs_inst->rs_rt->bm != NULL) { - log_info(logger, MODULE_ADAPTER_RS, - "Adapter_rs module: build bool matcher of %zu expressions" - " with %zu bytes memory", n_rule, mem_size); - } else { - log_fatal(logger, MODULE_ADAPTER_RS, - "[%s:%d] Adapter_rs module: build bool matcher failed", - __FUNCTION__, __LINE__); - - rs_ret = -1; - } - FREE(bool_exprs); - - /* build rs database rs_rt->literal_db & rs_rt->regex_db */ - int ret = adpt_rs_build_database(rs_inst->rs_rt, n_worker_thread, - literal_cd, regex_cd, logger); - if (ret < 0) { - rs_ret = -1; - } - - if (literal_cd != NULL) { - adpt_rs_compile_data_free(literal_cd); - literal_cd = NULL; - } - - if (regex_cd != NULL) { - adpt_rs_compile_data_free(regex_cd); - regex_cd = NULL; - } - - if (rs_ret < 0) { - goto error; - } - - /* alloc scratch */ - rs_inst->rs_rt->bool_match_buffs = ALLOC(struct bool_expr_match *, n_worker_thread); - for (i = 0; i < n_worker_thread; i++) { - rs_inst->rs_rt->bool_match_buffs[i] = ALLOC(struct bool_expr_match, MAX_HIT_EXPR_NUM); - } - - rs_inst->rs_rt->streams = ALLOC(struct adapter_rs_stream *, n_worker_thread); - for (i = 0; i < n_worker_thread; i++) { - rs_inst->rs_rt->streams[i] = (struct adapter_rs_stream *)adapter_rs_stream_open(rs_inst, i); - } - - rs_inst->rs_rt->matched_pats = ALLOC(struct matched_pattern *, n_worker_thread); - for (i = 0; i < n_worker_thread; i++) { - rs_inst->rs_rt->matched_pats[i] = ALLOC(struct matched_pattern, 1); - rs_inst->rs_rt->matched_pats[i]->ref_rs_attr = rs_inst->rs_attr; - rs_inst->rs_rt->matched_pats[i]->n_patterns = rs_inst->n_patterns; - utarray_new(rs_inst->rs_rt->matched_pats[i]->pattern_ids, &ut_rs_pattern_id_icd); - utarray_reserve(rs_inst->rs_rt->matched_pats[i]->pattern_ids, MAX_HIT_PATTERN_NUM); - } - - return rs_inst; -error: - adapter_rs_free(rs_inst); - return NULL; -} - -void adapter_rs_free(void *rs_instance) -{ - if (NULL == rs_instance) { - return; - } - - size_t i = 0; - struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance; - - if (rs_inst->rs_rt != NULL) { - if (rs_inst->rs_rt->literal_db != NULL) { - rs_free_database(rs_inst->rs_rt->literal_db); - rs_inst->rs_rt->literal_db = NULL; - } - - if (rs_inst->rs_rt->regex_db != NULL) { - rs_free_database(rs_inst->rs_rt->regex_db); - rs_inst->rs_rt->regex_db = NULL; - } - - if (rs_inst->rs_rt->bool_match_buffs != NULL) { - for (i = 0; i < rs_inst->n_worker_thread; i++) { - if (rs_inst->rs_rt->bool_match_buffs[i] != NULL) { - FREE(rs_inst->rs_rt->bool_match_buffs[i]); - } - } - - FREE(rs_inst->rs_rt->bool_match_buffs); - } - - if (rs_inst->rs_rt->bm != NULL) { - bool_matcher_free(rs_inst->rs_rt->bm); - rs_inst->rs_rt->bm = NULL; - } - - if (rs_inst->rs_rt->streams != NULL) { - for (i = 0; i < rs_inst->n_worker_thread; i++) { - if (rs_inst->rs_rt->streams[i] != NULL) { - adapter_rs_stream_close(rs_inst->rs_rt->streams[i]); - rs_inst->rs_rt->streams[i] = NULL; - } - } - FREE(rs_inst->rs_rt->streams); - } - - if (rs_inst->rs_rt->matched_pats != NULL) { - for (i = 0; i < rs_inst->n_worker_thread; i++) { - if (rs_inst->rs_rt->matched_pats[i] != NULL) { - utarray_free(rs_inst->rs_rt->matched_pats[i]->pattern_ids); - rs_inst->rs_rt->matched_pats[i]->pattern_ids = NULL; - FREE(rs_inst->rs_rt->matched_pats[i]); - } - } - FREE(rs_inst->rs_rt->matched_pats); - } - - FREE(rs_inst->rs_rt); - } - - if (rs_inst->rs_attr != NULL) { - FREE(rs_inst->rs_attr); - } - - FREE(rs_inst); -} - -static inline int compare_pattern_id(const void *a, const void *b) -{ - long long ret = *(const unsigned long long *)a - *(const unsigned long long *)b; - if (ret == 0) { - return 0; - } else if(ret < 0) { - return -1; - } else { - return 1; - } + rs_cd->patterns[index].id = pattern_id; + rs_cd->patterns[index].case_sensitive = case_sensitive; + rs_cd->patterns[index].pattern = ALLOC(char, pat_len + 1); + memcpy(rs_cd->patterns[index].pattern, pat, pat_len); + rs_cd->patterns[index].pattern_len = pat_len; } /** @@ -426,16 +187,12 @@ static int matched_event_cb(unsigned int id, int pos_offset, int from, int to, unsigned long long pattern_id = id; struct matched_pattern *matched_pat = (struct matched_pattern *)ctx; - if (pattern_id > matched_pat->n_patterns || id < 0) { - return 0; - } - if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) { return 0; } int ret = 0; - struct pattern_attribute pat_attr = matched_pat->ref_rs_attr[id]; + struct pattern_attribute pat_attr = matched_pat->ref_pat_attr[id]; switch (pat_attr.match_mode) { case EXPR_MATCH_MODE_EXACTLY: @@ -490,205 +247,329 @@ static int matched_event_cb(unsigned int id, int pos_offset, int from, int to, return 0; } -void *adapter_rs_stream_open(void *rs_instance, int thread_id) +static int gather_hit_pattern_id(struct matched_pattern *matched_pat, + unsigned long long *pattern_id_array, + size_t array_size, size_t *n_pattern_id) { - if (NULL == rs_instance || thread_id < 0) { - return NULL; - } - - struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance; - struct adapter_rs_stream *rs_stream = ALLOC(struct adapter_rs_stream, 1); - - rs_stream->logger = rs_inst->logger; - rs_stream->thread_id = thread_id; - rs_stream->ref_rs_rt = rs_inst->rs_rt; - - int err_count = 0; - if (rs_inst->rs_rt->literal_db != NULL) { - rs_stream->literal_stream = rs_open_stream(rs_inst->rs_rt->literal_db, 0, 128); - if (NULL == rs_stream->literal_stream) { - log_fatal(rs_inst->logger, MODULE_ADAPTER_RS, "rs_open_stream failed"); - err_count++; - } + size_t pattern_id_cnt = utarray_len(matched_pat->pattern_ids); + if (0 == pattern_id_cnt) { + *n_pattern_id = 0; + return 0; } - if (rs_inst->rs_rt->regex_db != NULL) { - rs_stream->regex_stream = rs_open_stream(rs_inst->rs_rt->regex_db, 0, 128); - if (NULL == rs_stream->regex_stream) { - log_fatal(rs_inst->logger, MODULE_ADAPTER_RS, "rs_open_stream failed"); - err_count++; - } + size_t array_index = 0; + for (size_t i = 0; i < pattern_id_cnt && array_index < array_size; i++) { + pattern_id_array[array_index++] = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i); } - if (err_count > 0) { - goto error; - } + *n_pattern_id = array_index; + utarray_clear(matched_pat->pattern_ids); - return rs_stream; -error: - if (rs_stream->literal_stream != NULL) { - rs_close_stream(rs_stream->literal_stream); - rs_stream->literal_stream = NULL; - } - - if (rs_stream->regex_stream != NULL) { - rs_close_stream(rs_stream->regex_stream); - rs_stream->regex_stream = NULL; - } - - FREE(rs_stream); - return NULL; + return 0; } -void adapter_rs_stream_close(void *rs_stream) +void rs_lit_engine_free(void *rs_lit_engine) { - if (NULL == rs_stream) { + if (NULL == rs_lit_engine) { return; } - struct adapter_rs_stream *stream = (struct adapter_rs_stream *)rs_stream; - if (stream->ref_rs_rt != NULL) { - if (stream->literal_stream != NULL) { - rs_close_stream(stream->literal_stream); - stream->literal_stream = NULL; + struct rs_lit_engine *rs_lit_inst = (struct rs_lit_engine *)rs_lit_engine; + + if (rs_lit_inst->rs_db != NULL) { + rs_free_database(rs_lit_inst->rs_db); + rs_lit_inst->rs_db = NULL; + } + + if (rs_lit_inst->streams != NULL) { + for (size_t i = 0; i < rs_lit_inst->n_thread; i++) { + if (rs_lit_inst->streams[i] != NULL) { + rs_lit_stream_close(rs_lit_inst->streams[i]); + rs_lit_inst->streams[i] = NULL; + } } - - if (stream->regex_stream != NULL) { - rs_close_stream(stream->regex_stream); - stream->regex_stream = NULL; + FREE(rs_lit_inst->streams); + } + + FREE(rs_lit_inst); +} + +UT_icd ut_rs_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL}; +void *rs_lit_engine_new(struct expr_rule *rules, size_t n_rule, + struct pattern_attribute *pat_attr, + void *rs_lit_db, size_t n_thread, + struct log_handle *logger) +{ + struct rs_lit_engine *rs_lit_inst = ALLOC(struct rs_lit_engine, 1); + + rs_lit_inst->n_thread = n_thread; + rs_lit_inst->rs_db = (rs_database_t *)rs_lit_db; + rs_lit_inst->ref_pat_attr = pat_attr; + rs_lit_inst->logger = logger; + rs_lit_inst->streams = ALLOC(struct rs_lit_stream *, n_thread); + + for (size_t i = 0; i < n_thread; i++) { + rs_lit_inst->streams[i] = (struct rs_lit_stream *)rs_lit_stream_open(rs_lit_inst, i); + } + + return rs_lit_inst; +} + +int rs_lit_engine_scan(void *rs_lit_engine, int thread_id, + const char *data, size_t data_len, + unsigned long long *pattern_id_array, + size_t array_size, size_t *n_pattern_id) +{ + if (NULL == rs_lit_engine || NULL == data || (0 == data_len) || + NULL == pattern_id_array || 0 == array_size || NULL == n_pattern_id) { + return -1; + } + + struct rs_lit_engine *rs_lit_inst = (struct rs_lit_engine *)rs_lit_engine; + struct rs_lit_stream *rs_lit_stream = rs_lit_inst->streams[thread_id]; + assert(rs_lit_stream != NULL); + + if (rs_lit_inst->rs_db != NULL) { + int ret = rs_scan(rs_lit_inst->rs_db, thread_id, data, data_len, + 0, matched_event_cb, rs_lit_stream->matched_pat); + if (ret < 0) { + return -1; + } + } + + return gather_hit_pattern_id(rs_lit_stream->matched_pat, pattern_id_array, + array_size, n_pattern_id); +} + +void *rs_lit_stream_open(void *rs_lit_engine, int thread_id) +{ + if (NULL == rs_lit_engine || thread_id < 0) { + return NULL; + } + + struct rs_lit_engine *rs_lit_inst = (struct rs_lit_engine *)rs_lit_engine; + struct rs_lit_stream *lit_stream = ALLOC(struct rs_lit_stream, 1); + + lit_stream->logger = rs_lit_inst->logger; + lit_stream->thread_id = thread_id; + lit_stream->ref_rs_rt = rs_lit_inst; + lit_stream->matched_pat = ALLOC(struct matched_pattern, 1); + lit_stream->matched_pat->ref_pat_attr = rs_lit_inst->ref_pat_attr; + utarray_new(lit_stream->matched_pat->pattern_ids, &ut_rs_pattern_id_icd); + utarray_reserve(lit_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM); + + if (rs_lit_inst->rs_db != NULL) { + lit_stream->rs_stream = rs_open_stream(rs_lit_inst->rs_db, 0, 128); + if (NULL == lit_stream->rs_stream) { + log_fatal(rs_lit_inst->logger, MODULE_ADAPTER_RS, "rs_open_stream failed"); + FREE(lit_stream); + return NULL; + } + } + + return lit_stream; +} + +void rs_lit_stream_close(void *rs_lit_stream) +{ + if (NULL == rs_lit_stream) { + return; + } + + struct rs_lit_stream *lit_stream = (struct rs_lit_stream *)rs_lit_stream; + if (lit_stream->ref_rs_rt != NULL) { + if (lit_stream->rs_stream != NULL) { + rs_close_stream(lit_stream->rs_stream); + lit_stream->rs_stream = NULL; } } /* rs_stream->rs_rt point to rs_instance->rs_rt which will call free same as rs_attr */ - stream->ref_rs_rt = NULL; - FREE(stream); + lit_stream->ref_rs_rt = NULL; + lit_stream->matched_pat->ref_pat_attr = NULL; + + if (lit_stream->matched_pat->pattern_ids != NULL) { + utarray_free(lit_stream->matched_pat->pattern_ids); + lit_stream->matched_pat->pattern_ids = NULL; + } + + FREE(lit_stream->matched_pat); + FREE(lit_stream); } -int adapter_rs_scan_match(struct bool_matcher *bm, UT_array *pattern_ids, - struct bool_expr_match *match_buff, size_t buff_size, - struct expr_scan_result *results, size_t n_result, - size_t *n_hit_result) +int rs_lit_stream_scan(void *rs_lit_stream, const char *data, size_t data_len, + unsigned long long *pattern_id_array, size_t array_size, + size_t *n_pattern_id) { - size_t n_pattern_id = utarray_len(pattern_ids); - if (0 == n_pattern_id) { - *n_hit_result = 0; - return 0; - } - - utarray_sort(pattern_ids, compare_pattern_id); - - unsigned long long prev_pattern_id = 0xFFFFFFFFFFFFFFFF; - unsigned long long tmp_pattern_id = 0; - size_t n_unique_pattern_id = 0; - unsigned long long unique_pattern_ids[n_pattern_id]; - - for (size_t i = 0; i < n_pattern_id; i++) { - tmp_pattern_id = *(unsigned long long *)utarray_eltptr(pattern_ids, i); - if (tmp_pattern_id != prev_pattern_id) { - unique_pattern_ids[n_unique_pattern_id++] = tmp_pattern_id; - prev_pattern_id = tmp_pattern_id; - } - } - - int bool_matcher_ret = bool_matcher_match(bm, unique_pattern_ids, - n_unique_pattern_id, - match_buff, buff_size); - if (bool_matcher_ret < 0) { - goto next; - } - - if (bool_matcher_ret > (int)n_result) { - bool_matcher_ret = n_result; - } - - for (int index = 0; index < bool_matcher_ret; index++) { - results[index].rule_id = match_buff[index].expr_id; - results[index].user_tag = match_buff[index].user_tag; - } - *n_hit_result = bool_matcher_ret; - -next: - utarray_clear(pattern_ids); - return bool_matcher_ret; -} - -int adapter_rs_scan_stream(void *rs_stream, const char *data, size_t data_len, - struct expr_scan_result *results, size_t n_result, - size_t *n_hit_result) -{ - if (NULL == rs_stream || NULL == data || 0 == data_len || - NULL == results || 0 == n_result || NULL == n_hit_result) { + if (NULL == rs_lit_stream || NULL == data || 0 == data_len || + NULL == pattern_id_array || 0 == array_size || NULL == n_pattern_id) { return -1; } - int ret = 0, err_count = 0; - struct adapter_rs_stream *stream = (struct adapter_rs_stream *)rs_stream; - int thread_id = stream->thread_id; - struct adapter_rs_runtime *rs_rt = stream->ref_rs_rt; - struct matched_pattern *matched_pat = rs_rt->matched_pats[thread_id]; + struct rs_lit_stream *lit_stream = (struct rs_lit_stream *)rs_lit_stream; - if (stream->literal_stream != NULL) { - ret = rs_scan_stream(stream->literal_stream, data, data_len, - matched_event_cb, matched_pat); + if (lit_stream->rs_stream != NULL) { + int ret = rs_scan_stream(lit_stream->rs_stream, data, data_len, + matched_event_cb, lit_stream->matched_pat); if (ret < 0) { - err_count++; + return -1; } } - if (stream->regex_stream != NULL) { - ret = rs_scan_stream(stream->regex_stream, data, data_len, - matched_event_cb, matched_pat); - if (ret < 0) { - err_count++; - } - } - - if (err_count == 2) { - return -1; - } - - return adapter_rs_scan_match(rs_rt->bm, matched_pat->pattern_ids, - rs_rt->bool_match_buffs[thread_id], - MAX_HIT_EXPR_NUM, results, n_result, - n_hit_result); + return gather_hit_pattern_id(lit_stream->matched_pat, pattern_id_array, + array_size, n_pattern_id); } -int adapter_rs_scan(void *rs_instance, int thread_id, const char *data, size_t data_len, - struct expr_scan_result *results, size_t n_result, size_t *n_hit_result) +void rs_regex_engine_free(void *rs_regex_engine) { - if (NULL == rs_instance || NULL == data || (0 == data_len) || - NULL == results || 0 == n_result || NULL == n_hit_result) { + if (NULL == rs_regex_engine) { + return; + } + + struct rs_regex_engine *rs_regex_inst = (struct rs_regex_engine *)rs_regex_engine; + + if (rs_regex_inst->rs_db != NULL) { + rs_free_database(rs_regex_inst->rs_db); + rs_regex_inst->rs_db = NULL; + } + + if (rs_regex_inst->streams != NULL) { + for (size_t i = 0; i < rs_regex_inst->n_thread; i++) { + if (rs_regex_inst->streams[i] != NULL) { + rs_regex_stream_close(rs_regex_inst->streams[i]); + rs_regex_inst->streams[i] = NULL; + } + } + + FREE(rs_regex_inst->streams); + } + + FREE(rs_regex_inst); +} + +void *rs_regex_engine_new(struct expr_rule *rules, size_t n_rule, + struct pattern_attribute *pat_attr, + void *rs_regex_db, size_t n_thread, + struct log_handle *logger) +{ + struct rs_regex_engine *rs_regex_inst = ALLOC(struct rs_regex_engine, 1); + + rs_regex_inst->n_thread = n_thread; + rs_regex_inst->rs_db = (rs_database_t *)rs_regex_db; + rs_regex_inst->ref_pat_attr = pat_attr; + rs_regex_inst->logger = logger; + rs_regex_inst->streams = ALLOC(struct rs_regex_stream *, n_thread); + + for (size_t i = 0; i < n_thread; i++) { + rs_regex_inst->streams[i] = (struct rs_regex_stream *)rs_regex_stream_open(rs_regex_inst, i); + } + + return rs_regex_inst; +} + +int rs_regex_engine_scan(void *rs_regex_engine, int thread_id, + const char *data, size_t data_len, + unsigned long long *pattern_id_array, + size_t array_size, size_t *n_pattern_id) +{ + if (NULL == rs_regex_engine || NULL == data || (0 == data_len) || + NULL == pattern_id_array || 0 == array_size || NULL == n_pattern_id) { return -1; } - int ret = 0, err_count = 0; - struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance; - struct adapter_rs_runtime *rs_rt = rs_inst->rs_rt; - struct matched_pattern *matched_pat = rs_rt->matched_pats[thread_id]; + struct rs_regex_engine *rs_regex_inst = (struct rs_regex_engine *)rs_regex_engine; + struct rs_regex_stream *rs_regex_stream = rs_regex_inst->streams[thread_id]; + assert(rs_regex_stream != NULL); - if (rs_rt->literal_db != NULL) { - ret = rs_scan(rs_rt->literal_db, thread_id, data, data_len, - 0, matched_event_cb, matched_pat); + if (rs_regex_inst->rs_db != NULL) { + int ret = rs_scan(rs_regex_inst->rs_db, thread_id, data, data_len, + 0, matched_event_cb, rs_regex_stream->matched_pat); if (ret < 0) { - err_count++; + return -1; } } + + return gather_hit_pattern_id(rs_regex_stream->matched_pat, pattern_id_array, + array_size, n_pattern_id); +} + +void *rs_regex_stream_open(void *rs_regex_engine, int thread_id) +{ + if (NULL == rs_regex_engine || thread_id < 0) { + return NULL; + } + + struct rs_regex_engine *rs_regex_inst = (struct rs_regex_engine *)rs_regex_engine; + struct rs_regex_stream *regex_stream = ALLOC(struct rs_regex_stream, 1); + + regex_stream->logger = rs_regex_inst->logger; + regex_stream->thread_id = thread_id; + regex_stream->ref_rs_rt = rs_regex_inst; + regex_stream->matched_pat = ALLOC(struct matched_pattern, 1); + regex_stream->matched_pat->ref_pat_attr = rs_regex_inst->ref_pat_attr; + utarray_new(regex_stream->matched_pat->pattern_ids, &ut_rs_pattern_id_icd); + utarray_reserve(regex_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM); + + if (rs_regex_inst->rs_db != NULL) { + regex_stream->rs_stream = rs_open_stream(rs_regex_inst->rs_db, 0, 128); + if (NULL == regex_stream->rs_stream) { + log_fatal(rs_regex_inst->logger, MODULE_ADAPTER_RS, "rs_open_stream failed"); + FREE(regex_stream); + return NULL; + } + } + + return regex_stream; +} + +void rs_regex_stream_close(void *rs_regex_stream) +{ + if (NULL == rs_regex_stream) { + return; + } + + struct rs_regex_stream *regex_stream = (struct rs_regex_stream *)rs_regex_stream; + if (regex_stream->ref_rs_rt != NULL) { + if (regex_stream->rs_stream != NULL) { + rs_close_stream(regex_stream->rs_stream); + regex_stream->rs_stream = NULL; + } + } + + /* rs_stream->rs_rt point to rs_instance->rs_rt which will call free + same as rs_attr */ + regex_stream->ref_rs_rt = NULL; + regex_stream->matched_pat->ref_pat_attr = NULL; + + if (regex_stream->matched_pat->pattern_ids != NULL) { + utarray_free(regex_stream->matched_pat->pattern_ids); + regex_stream->matched_pat->pattern_ids = NULL; + } + + FREE(regex_stream->matched_pat); + FREE(regex_stream); +} + +int rs_regex_stream_scan(void *rs_regex_stream, const char *data, size_t data_len, + unsigned long long *pattern_id_array, size_t array_size, + size_t *n_pattern_id) +{ + if (NULL == rs_regex_stream || NULL == data || 0 == data_len || + NULL == pattern_id_array || 0 == array_size || NULL == n_pattern_id) { + return -1; + } - if (rs_rt->regex_db != NULL) { - ret = rs_scan(rs_rt->regex_db, thread_id, data, data_len, - 0, matched_event_cb, matched_pat); + struct rs_regex_stream *regex_stream = (struct rs_regex_stream *)rs_regex_stream; + + if (regex_stream->rs_stream != NULL) { + int ret = rs_scan_stream(regex_stream->rs_stream, data, data_len, + matched_event_cb, regex_stream->matched_pat); if (ret < 0) { - err_count++; + return -1; } } - if (err_count == 2) { - return -1; - } - - return adapter_rs_scan_match(rs_rt->bm, matched_pat->pattern_ids, - rs_rt->bool_match_buffs[thread_id], - MAX_HIT_EXPR_NUM, results, n_result, - n_hit_result); + return gather_hit_pattern_id(regex_stream->matched_pat, pattern_id_array, + array_size, n_pattern_id); } \ No newline at end of file diff --git a/scanner/expr_matcher/adapter_rs/adapter_rs.h b/scanner/expr_matcher/adapter_rs/adapter_rs.h index c43e553..31ba83a 100644 --- a/scanner/expr_matcher/adapter_rs/adapter_rs.h +++ b/scanner/expr_matcher/adapter_rs/adapter_rs.h @@ -21,55 +21,89 @@ extern "C" #include "log/log.h" #include "../expr_matcher.h" +#include "../expr_matcher_inc.h" -int adapter_rs_verify_regex_expression(const char *regex_expr, - struct log_handle *logger); +int rs_verify_regex_expression(const char *regex_expr, struct log_handle *logger); /** - * @brief new adapter_rs instance + * @brief new adapter_rs literal instance * * @param rules: logic AND expression's array * @param n_rule: the number of logic AND expression's array - * @param n_worker_threads: the number of scan threads which will call adapter_rs_scan() + * @param n_thread: the number of scan threads which will call adapter_rs_lit_scan() * - * @retval the pointer to adapter_rs instance + * @retval the pointer to adapter_rs literal instance */ -void *adapter_rs_new(struct expr_rule *rules, size_t n_rule, - size_t n_literal_pattern, size_t n_regex_pattern, - size_t n_worker_thread, struct log_handle *logger); +void *rs_lit_engine_new(struct expr_rule *rules, size_t n_rule, + struct pattern_attribute *pat_attr, + void *rs_lit_db, size_t n_thread, + struct log_handle *logger); -void adapter_rs_free(void *rs_instance); +void *rs_regex_engine_new(struct expr_rule *rules, size_t n_rule, + struct pattern_attribute *pat_attr, + void *rs_regex_db, size_t n_thread, + struct log_handle *logger); + +void rs_lit_engine_free(void *rs_lit_engine); + +void rs_regex_engine_free(void *rs_regex_engine); /** * @brief scan input data to match logic AND expression, return all matched expr_id * - * @param rs_instance: adapter_rs instance obtained by adapter_rs_new() - * @param thread_id: the thread_id of caller - * @param scan_data: data to be scanned - * @param data_len: the length of data to be scanned - * @param result_array: the array to store hit expr_id which allocated by caller - * @param n_result_array: number of elements in array of expr_id + * @param adapter_rs_lit: adapter_rs literal instance obtained by adapter_rs_lit_new() + * @param thread_id: the thread_id of caller + * @param scan_data: data to be scanned + * @param data_len: the length of data to be scanned + * @param result_array: the array to store hit expr_id which allocated by caller + * @param n_result_array: number of elements in array of expr_id */ -int adapter_rs_scan(void *rs_instance, int thread_id, - const char *scan_data, size_t data_len, - struct expr_scan_result *result_array, - size_t n_result_array, size_t *n_hit_results); +int rs_lit_engine_scan(void *rs_lit_engine, int thread_id, + const char *scan_data, size_t data_len, + unsigned long long *pattern_id_array, + size_t array_size, size_t *n_pattern_id); + +int rs_regex_engine_scan(void *rs_lit_engine, int thread_id, + const char *scan_data, size_t data_len, + unsigned long long *pattern_id_array, + size_t array_size, size_t *n_pattern_id); /** - * @brief + * @brief open stream for adapter_rs literal instance */ -void *adapter_rs_stream_open(void *rs_instance, int thread_id); +void *rs_lit_stream_open(void *rs_lit_engine, int thread_id); + +void *rs_regex_stream_open(void *rs_regex_engine, int thread_id); + +void rs_lit_stream_close(void *rs_lit_stream); + +void rs_regex_stream_close(void *rs_regex_stream); /** - * @brief + * @brief scan stream by adapter_rs literal stream */ -int adapter_rs_scan_stream(void *rs_stream, const char *scan_data, - size_t data_len, struct expr_scan_result *result_array, - size_t n_result_array, size_t *n_hit_results); +int rs_lit_stream_scan(void *rs_lit_stream, const char *scan_data, size_t data_len, + unsigned long long *pattern_id_array, size_t array_size, + size_t *n_pattern_id); + +int rs_regex_stream_scan(void *rs_regex_stream, const char *scan_data, size_t data_len, + unsigned long long *pattern_id_array, size_t array_size, + size_t *n_pattern_id); + /** - * @brief - */ -void adapter_rs_stream_close(void *rs_stream); + * @brief build database +*/ +void *rs_compile_data_new(enum expr_pattern_type pat_type, size_t n_patterns); + +void rs_compile_data_free(void *compile_data); + +void rs_populate_compile_data(void *compile_data, size_t index, int pattern_id, + char *pat, size_t pat_len, int case_sensitive); + +int rs_build_lit_db(void **rs_lit_db, void *compile_data, struct log_handle *logger); + +int rs_build_regex_db(void **rs_regex_db, size_t n_thread, void *compile_data, + struct log_handle *logger); #ifdef __cplusplus } diff --git a/scanner/expr_matcher/expr_matcher.cpp b/scanner/expr_matcher/expr_matcher.cpp index 64fbe97..16ec4ee 100644 --- a/scanner/expr_matcher/expr_matcher.cpp +++ b/scanner/expr_matcher/expr_matcher.cpp @@ -13,8 +13,9 @@ #include #include "log/log.h" -#include "expr_matcher.h" #include "maat_utils.h" +#include "../bool_matcher/bool_matcher.h" +#include "expr_matcher_inc.h" #include "adapter_hs/adapter_hs.h" #include "adapter_rs/adapter_rs.h" @@ -34,125 +35,200 @@ static const char *expr_matcher_module_name_str(const char *name) #define MODULE_EXPR_MATCHER expr_matcher_module_name_str("maat.expr_matcher") struct expr_matcher { + size_t n_thread; enum expr_engine_type engine_type; - void *engine; + void *lit_runtime; + void *regex_runtime; + struct pattern_attribute *pat_attr; + struct bool_matcher *bm; + struct bool_expr_match **bool_match_buffs; struct log_handle *logger; }; struct expr_matcher_stream { + int thread_id; enum expr_engine_type engine_type; - void *handle; + void *lit_stream; + void *regex_stream; + struct expr_matcher *ref_matcher; }; -struct expr_engine_operations { +struct db_operations { enum expr_engine_type type; - void *(*engine_new)(struct expr_rule *rules, size_t n_rule, - size_t n_literal_pattern, size_t n_regex_pattern, - size_t n_worker_thread, struct log_handle *logger); - void (*engine_free)(void *engine); - int (*engine_scan)(void *engine, int thread_id, const char *scan_data, - size_t data_len, struct expr_scan_result *result_array, - size_t n_result_array, size_t *n_hit_result); - void *(*engine_stream_open)(void *engine, int thread_id); - void (*engine_stream_close)(void *stream); - int (*engine_scan_stream)(void *stream, const char *scan_data, size_t data_len, - struct expr_scan_result *result_array, size_t n_result_array, - size_t *n_hit_result); + void *(*compile_data_new)(enum expr_pattern_type pat_type, size_t n_pattern); + void (*compile_data_free)(void *compile_data); + void (*populate_compile_data)(void *compile_data, size_t index, int pattern_id, + char *pat, size_t pat_len, int case_sensitive); + int (*build_db)(void **lit_db, void *compile_data, struct log_handle *logger); }; -struct expr_engine_operations expr_engine_ops[EXPR_ENGINE_TYPE_MAX] = { +struct db_operations db_ops[EXPR_ENGINE_TYPE_AUTO] = { { .type = EXPR_ENGINE_TYPE_HS, - .engine_new = adapter_hs_new, - .engine_free = adapter_hs_free, - .engine_scan = adapter_hs_scan, - .engine_stream_open = adapter_hs_stream_open, - .engine_stream_close = adapter_hs_stream_close, - .engine_scan_stream = adapter_hs_scan_stream + .compile_data_new = hs_compile_data_new, + .compile_data_free = hs_compile_data_free, + .populate_compile_data = hs_populate_compile_data, + .build_db = hs_build_lit_db }, { .type = EXPR_ENGINE_TYPE_RS, - .engine_new = adapter_rs_new, - .engine_free = adapter_rs_free, - .engine_scan = adapter_rs_scan, - .engine_stream_open = adapter_rs_stream_open, - .engine_stream_close = adapter_rs_stream_close, - .engine_scan_stream = adapter_rs_scan_stream + .compile_data_new = rs_compile_data_new, + .compile_data_free = rs_compile_data_free, + .populate_compile_data = rs_populate_compile_data, + .build_db = rs_build_lit_db + } +}; + +struct engine_operations { + enum expr_engine_type type; + void *(*engine_new)(struct expr_rule *rules, size_t n_rule, + struct pattern_attribute *pat_attr, + void *hs_lit_db, size_t n_thread, + struct log_handle *logger); + + void (*engine_free)(void *engine); + + int (*engine_scan)(void *engine, int thread_id, + const char *data, size_t data_len, + unsigned long long *pattern_id_array, + size_t array_size, size_t *n_pattern_id); + + void *(*stream_open)(void *engine, int thread_id); + + void (*stream_close)(void *stream); + + int (*scan_stream)(void *stream, const char *data, size_t data_len, + unsigned long long *pattern_id_array, size_t array_size, + size_t *n_pattern_id); +}; + +struct engine_operations engine_ops[EXPR_ENGINE_TYPE_AUTO] = { + { + .type = EXPR_ENGINE_TYPE_HS, + .engine_new = hs_lit_engine_new, + .engine_free = hs_lit_engine_free, + .engine_scan = hs_lit_engine_scan, + .stream_open = hs_lit_stream_open, + .stream_close = hs_lit_stream_close, + .scan_stream = hs_lit_stream_scan + }, + { + .type = EXPR_ENGINE_TYPE_RS, + .engine_new = rs_lit_engine_new, + .engine_free = rs_lit_engine_free, + .engine_scan = rs_lit_engine_scan, + .stream_open = rs_lit_stream_open, + .stream_close = rs_lit_stream_close, + .scan_stream = rs_lit_stream_scan } }; int expr_matcher_verify_regex_expression(const char *regex_expr, struct log_handle *logger) { - int ret = adapter_hs_verify_regex_expression(regex_expr, logger); + int ret = hs_verify_regex_expression(regex_expr, logger); if (ret == 0) { return 0; } - return adapter_rs_verify_regex_expression(regex_expr, logger); + return rs_verify_regex_expression(regex_expr, logger); } -struct expr_matcher * -expr_matcher_new(struct expr_rule *rules, size_t n_rule, enum expr_engine_type engine_type, - size_t n_worker_thread, struct log_handle *logger) +static int expr_rule_pattern_count(struct expr_rule *rules, size_t n_rule, + size_t *n_lit_pat, size_t *n_regex_pat, + struct log_handle *logger) { - if (NULL == rules || 0 == n_rule || 0 == n_worker_thread || - (engine_type != EXPR_ENGINE_TYPE_HS && engine_type != EXPR_ENGINE_TYPE_RS)) { - log_fatal(logger, MODULE_EXPR_MATCHER, "[%s:%d]engine type:%d is illegal", - __FUNCTION__, __LINE__, engine_type); - return NULL; - } - - size_t i = 0, j = 0; - size_t literal_pat_num = 0; + size_t lit_pat_num = 0; size_t regex_pat_num = 0; - for (i = 0; i < n_rule; i++) { + for (size_t i = 0; i < n_rule; i++) { if (rules[i].n_patterns > MAX_EXPR_PATTERN_NUM) { - log_fatal(logger, MODULE_EXPR_MATCHER, - "[%s:%d] the number of patterns in one expression should less than" - " %d", __FUNCTION__, __LINE__, MAX_EXPR_PATTERN_NUM); - return NULL; + log_fatal(logger, MODULE_EXPR_MATCHER, + "[%s:%d] the number of patterns in expr_rule(rule_id:%lld)" + " should less than %d", __FUNCTION__, __LINE__, + rules[i].expr_id, MAX_EXPR_PATTERN_NUM); + return -1; } - for (j = 0; j < rules[i].n_patterns; j++) { + for (size_t j = 0; j < rules[i].n_patterns; j++) { /* pat_len should not 0 */ if (0 == rules[i].patterns[j].pat_len) { log_fatal(logger, MODULE_EXPR_MATCHER, "[%s:%d] expr pattern length should not 0", __FUNCTION__, __LINE__); - return NULL; + return -1; } if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) { - literal_pat_num++; + lit_pat_num++; } else { regex_pat_num++; } } } - if (0 == literal_pat_num && 0 == regex_pat_num) { + if (0 == lit_pat_num && 0 == regex_pat_num) { log_fatal(logger, MODULE_EXPR_MATCHER, - "[%s:%d] exprs has no valid pattern", __FUNCTION__, __LINE__); - return NULL; + "[%s:%d] exprs has no valid pattern", + __FUNCTION__, __LINE__); + return -1; } - void *engine = expr_engine_ops[engine_type].engine_new(rules, n_rule, literal_pat_num, - regex_pat_num, n_worker_thread, - logger); - if (NULL == engine) { - log_fatal(logger, MODULE_EXPR_MATCHER, - "[%s:%d]expr_matcher engine_new failed.", __FUNCTION__, __LINE__); - return NULL; + *n_lit_pat = lit_pat_num; + *n_regex_pat = regex_pat_num; + + return 0; +} + +static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule, + enum expr_engine_type engine_type, + struct pattern_attribute *pat_attr, + void *lit_compile_data, void *regex_compile_data) +{ + uint32_t pattern_index = 0; + uint32_t literal_index = 0; + uint32_t regex_index = 0; + + struct bool_expr *bool_exprs = ALLOC(struct bool_expr, n_rule); + + /* populate adpt_hs_compile_data and bool_expr */ + for (size_t i = 0; i < n_rule; i++) { + + for (size_t j = 0; j < rules[i].n_patterns; j++) { + pat_attr[pattern_index].pattern_id = pattern_index; + pat_attr[pattern_index].match_mode = rules[i].patterns[j].match_mode; + + if (pat_attr[pattern_index].match_mode == EXPR_MATCH_MODE_SUB || + pat_attr[pattern_index].match_mode == EXPR_MATCH_MODE_EXACTLY) { + pat_attr[pattern_index].offset.start = rules[i].patterns[j].start_offset; + pat_attr[pattern_index].offset.end = rules[i].patterns[j].end_offset; + } + + /* literal pattern */ + if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) { + db_ops[engine_type].populate_compile_data(lit_compile_data, literal_index, + pattern_index, rules[i].patterns[j].pat, + rules[i].patterns[j].pat_len, + rules[i].patterns[j].case_sensitive); + literal_index++; + } else { + /* regex pattern */ + hs_populate_compile_data(regex_compile_data, regex_index, pattern_index, + rules[i].patterns[j].pat, rules[i].patterns[j].pat_len, + rules[i].patterns[j].case_sensitive); + regex_index++; + } + + bool_exprs[i].items[j].item_id = pattern_index++; + bool_exprs[i].items[j].not_flag = 0; + } + + bool_exprs[i].expr_id = rules[i].expr_id; + bool_exprs[i].item_num = rules[i].n_patterns; + bool_exprs[i].user_tag = rules[i].tag; } - struct expr_matcher *matcher = ALLOC(struct expr_matcher, 1); - matcher->engine_type = engine_type; - matcher->engine = engine; - matcher->logger = logger; - - return matcher; + return bool_exprs; } void expr_matcher_free(struct expr_matcher *matcher) @@ -161,26 +237,257 @@ void expr_matcher_free(struct expr_matcher *matcher) return; } - if (matcher->engine != NULL) { - expr_engine_ops[matcher->engine_type].engine_free(matcher->engine); - matcher->engine = NULL; + if (matcher->lit_runtime != NULL) { + engine_ops[matcher->engine_type].engine_free(matcher->lit_runtime); + matcher->lit_runtime = NULL; } + if (matcher->regex_runtime != NULL) { + hs_regex_engine_free(matcher->regex_runtime); + matcher->regex_runtime = NULL; + } + + if (matcher->bm != NULL) { + bool_matcher_free(matcher->bm); + matcher->bm = NULL; + } + + if (matcher->bool_match_buffs != NULL) { + for (size_t i = 0; i < matcher->n_thread; i++) { + if (matcher->bool_match_buffs[i] != NULL) { + FREE(matcher->bool_match_buffs[i]); + } + } + + FREE(matcher->bool_match_buffs); + } + + if (matcher->pat_attr != NULL) { + FREE(matcher->pat_attr); + } + FREE(matcher); } -int expr_matcher_match(struct expr_matcher *matcher, int thread_id, const char *scan_data, - size_t data_len, struct expr_scan_result *result_array, - size_t n_result_array, size_t *n_hit_results) +struct expr_matcher *expr_matcher_new(struct expr_rule *rules, size_t n_rule, + enum expr_engine_type engine_type, + size_t n_thread, struct log_handle *logger) { - if (NULL == matcher || thread_id < 0 || NULL == scan_data || 0 == data_len - || NULL == result_array || 0 == n_result_array || NULL == n_hit_results) { + if (NULL == rules || 0 == n_rule || 0 == n_thread || + (engine_type != EXPR_ENGINE_TYPE_HS && + engine_type != EXPR_ENGINE_TYPE_RS)) { + log_fatal(logger, MODULE_EXPR_MATCHER, + "[%s:%d]engine type:%d is illegal", + __FUNCTION__, __LINE__, engine_type); + return NULL; + } + + size_t lit_pat_cnt = 0; + size_t regex_pat_cnt = 0; + size_t pat_cnt = 0; + + int ret = expr_rule_pattern_count(rules, n_rule, &lit_pat_cnt, + ®ex_pat_cnt, logger); + if (ret < 0) { + return NULL; + } + + pat_cnt = lit_pat_cnt + regex_pat_cnt; + void *lit_compile_data = NULL; + void *regex_compile_data = NULL; + + if (lit_pat_cnt > 0) { + lit_compile_data = db_ops[engine_type].compile_data_new(EXPR_PATTERN_TYPE_STR, + lit_pat_cnt); + } + + if (regex_pat_cnt > 0) { + regex_compile_data = hs_compile_data_new(EXPR_PATTERN_TYPE_REG, regex_pat_cnt); + } + + struct pattern_attribute *pat_attr = ALLOC(struct pattern_attribute, pat_cnt); + struct bool_expr *bool_exprs = bool_exprs_new(rules, n_rule, engine_type, + pat_attr, lit_compile_data, + regex_compile_data); + size_t mem_size = 0; + int bm_ret = 0; + struct expr_matcher *matcher = ALLOC(struct expr_matcher, 1); + + matcher->n_thread = n_thread; + matcher->pat_attr = pat_attr; + matcher->engine_type = engine_type; + matcher->logger = logger; + matcher->bm = bool_matcher_new(bool_exprs, n_rule, &mem_size); + if (matcher->bm != NULL) { + log_info(logger, MODULE_EXPR_MATCHER, + "expr_matcher module: build bool matcher of %zu expressions" + " with %zu bytes memory", n_rule, mem_size); + } else { + log_fatal(logger, MODULE_EXPR_MATCHER, + "[%s:%d] expr_matcher module: build bool matcher failed", + __FUNCTION__, __LINE__); + bm_ret = -1; + } + FREE(bool_exprs); + + matcher->bool_match_buffs = ALLOC(struct bool_expr_match *, n_thread); + for (size_t i = 0; i < n_thread; i++) { + matcher->bool_match_buffs[i] = ALLOC(struct bool_expr_match, MAX_HIT_PATTERN_NUM); + } + + void *lit_db = NULL; + if (lit_compile_data != NULL) { + ret = db_ops[engine_type].build_db(&lit_db, lit_compile_data, logger); + if (ret < 0) { + bm_ret = -1; + } + db_ops[engine_type].compile_data_free(lit_compile_data); + } + + if (lit_db != NULL) { + matcher->lit_runtime = engine_ops[engine_type].engine_new(rules, n_rule, pat_attr, + lit_db, n_thread, logger); + if (NULL == matcher->lit_runtime) { + log_fatal(logger, MODULE_EXPR_MATCHER, + "[%s:%d]expr_matcher new lit runtime failed.", + __FUNCTION__, __LINE__); + bm_ret = -1; + } + } + + + void *regex_db = NULL; + if (regex_compile_data != NULL) { + ret = hs_build_regex_db(®ex_db, regex_compile_data, logger); + if (ret < 0) { + bm_ret = -1; + } + hs_compile_data_free(regex_compile_data); + } + + if (regex_db != NULL) { + matcher->regex_runtime = hs_regex_engine_new(rules, n_rule, pat_attr, + regex_db, n_thread, logger); + if (NULL == matcher->regex_runtime) { + log_fatal(logger, MODULE_EXPR_MATCHER, + "[%s:%d]expr_matcher new regex runtime failed.", + __FUNCTION__, __LINE__); + bm_ret = -1; + } + } + + if (bm_ret < 0) { + goto error; + } + + return matcher; +error: + expr_matcher_free(matcher); + return NULL; +} + +static inline int compare_pattern_id(const void *a, const void *b) +{ + long long ret = *(const unsigned long long *)a - *(const unsigned long long *)b; + if (ret == 0) { + return 0; + } else if (ret < 0) { + return -1; + } else { + return 1; + } +} + +static int expr_matcher_bool_matcher_match(struct bool_matcher *bm, struct bool_expr_match *match_buff, + size_t buff_size, unsigned long long *hit_pattern_ids, + size_t n_hit_pattern, struct expr_scan_result *result_array, + size_t array_size, size_t *n_hit_result) +{ + + unsigned long long prev_pat_id = 0xFFFFFFFFFFFFFFFF; + unsigned long long tmp_pat_id = 0; + unsigned long long unique_pat_ids[n_hit_pattern]; + size_t n_unique_pat_id = 0; + + qsort(hit_pattern_ids, n_hit_pattern, sizeof(unsigned long long *), compare_pattern_id); + + for (size_t i = 0; i < n_hit_pattern; i++) { + tmp_pat_id = hit_pattern_ids[i]; + if (tmp_pat_id != prev_pat_id) { + unique_pat_ids[n_unique_pat_id++] = tmp_pat_id; + prev_pat_id = tmp_pat_id; + } + } + + int bool_matcher_ret = bool_matcher_match(bm, unique_pat_ids, n_unique_pat_id, + match_buff, MAX_HIT_PATTERN_NUM); + if (bool_matcher_ret < 0) { + goto next; + } + + if (bool_matcher_ret > (int)array_size) { + bool_matcher_ret = array_size; + } + + for (int index = 0; index < bool_matcher_ret; index++) { + result_array[index].rule_id = match_buff[index].expr_id; + result_array[index].user_tag = match_buff[index].user_tag; + } + *n_hit_result = bool_matcher_ret; + +next: + return bool_matcher_ret; +} + +int expr_matcher_match(struct expr_matcher *matcher, int thread_id, + const char *data, size_t data_len, + struct expr_scan_result *result_array, + size_t array_size, size_t *n_hit_result) +{ + if (NULL == matcher || thread_id < 0 || NULL == data || 0 == data_len + || NULL == result_array || 0 == array_size || NULL == n_hit_result) { return -1; } - return expr_engine_ops[matcher->engine_type].engine_scan(matcher->engine, thread_id, - scan_data, data_len, result_array, - n_result_array, n_hit_results); + int err_count = 0; + unsigned long long lit_pattern_ids[MAX_HIT_PATTERN_NUM]; + unsigned long long regex_pattern_ids[MAX_HIT_PATTERN_NUM]; + size_t n_lit_pattern = 0; + size_t n_regex_pattern = 0; + size_t n_pattern = 0; + + int ret = engine_ops[matcher->engine_type].engine_scan(matcher->lit_runtime, thread_id, + data, data_len, lit_pattern_ids, + MAX_HIT_PATTERN_NUM, &n_lit_pattern); + if (ret < 0) { + err_count++; + } + + ret = hs_regex_engine_scan(matcher->regex_runtime, thread_id, data, data_len, + regex_pattern_ids, MAX_HIT_PATTERN_NUM, &n_regex_pattern); + if (ret < 0) { + err_count++; + } + + if (err_count == 2) { + return -1; + } + + n_pattern = n_lit_pattern + n_regex_pattern; + if (n_pattern > MAX_HIT_PATTERN_NUM) { + n_pattern = MAX_HIT_PATTERN_NUM; + } + + size_t j = 0; + for (size_t i = n_lit_pattern; i < n_pattern; i++, j++) { + lit_pattern_ids[i] = regex_pattern_ids[j]; + } + + struct bool_expr_match *match_buff = matcher->bool_match_buffs[thread_id]; + + return expr_matcher_bool_matcher_match(matcher->bm, match_buff, MAX_HIT_PATTERN_NUM, + lit_pattern_ids, n_pattern, result_array, + array_size, n_hit_result); } struct expr_matcher_stream * @@ -190,34 +497,89 @@ expr_matcher_stream_open(struct expr_matcher *matcher, int thread_id) return NULL; } - void *s_handle = expr_engine_ops[matcher->engine_type].engine_stream_open(matcher->engine, - thread_id); - if (NULL == s_handle) { + size_t err_count = 0; + void *lit_stream = engine_ops[matcher->engine_type].stream_open(matcher->lit_runtime, + thread_id); + if (NULL == lit_stream && matcher->lit_runtime != NULL) { log_fatal(matcher->logger, MODULE_EXPR_MATCHER, - "[%s:%d] expr_matcher engine_stream_open failed.", + "[%s:%d] expr_matcher open lit engine stream failed.", __FUNCTION__, __LINE__); + err_count++; + } + + void *regex_stream = hs_regex_stream_open(matcher->regex_runtime, thread_id); + if (NULL == regex_stream && matcher->regex_runtime != NULL) { + engine_ops[matcher->engine_type].stream_close(lit_stream); + log_fatal(matcher->logger, MODULE_EXPR_MATCHER, + "[%s:%d] expr_matcher open regex engine stream failed.", + __FUNCTION__, __LINE__); + err_count++; + } + + if (err_count == 2) { return NULL; } struct expr_matcher_stream *stream = ALLOC(struct expr_matcher_stream, 1); stream->engine_type = matcher->engine_type; - stream->handle = s_handle; - + stream->thread_id = thread_id; + stream->lit_stream = lit_stream; + stream->regex_stream = regex_stream; + stream->ref_matcher = matcher; + return stream; } -int expr_matcher_stream_match(struct expr_matcher_stream *stream, const char *scan_data, - size_t data_len, struct expr_scan_result *result_array, - size_t n_result_array, size_t *n_hit_results) +int expr_matcher_stream_match(struct expr_matcher_stream *stream, + const char *data, size_t data_len, + struct expr_scan_result *result_array, + size_t array_size, size_t *n_hit_result) { - if (NULL == stream || NULL == scan_data || 0 == data_len || NULL == result_array - || 0 == n_result_array || NULL == n_hit_results) { + if (NULL == stream || NULL == data || 0 == data_len || NULL == result_array + || 0 == array_size || NULL == n_hit_result) { return -1; } - return expr_engine_ops[stream->engine_type].engine_scan_stream(stream->handle, scan_data, - data_len, result_array, - n_result_array, n_hit_results); + int err_count = 0; + unsigned long long lit_pattern_ids[MAX_HIT_PATTERN_NUM]; + unsigned long long regex_pattern_ids[MAX_HIT_PATTERN_NUM]; + size_t n_lit_pattern = 0; + size_t n_regex_pattern = 0; + size_t n_pattern = 0; + + int ret = engine_ops[stream->engine_type].scan_stream(stream->lit_stream, data, data_len, + lit_pattern_ids, MAX_HIT_PATTERN_NUM, + &n_lit_pattern); + if (ret < 0) { + err_count++; + } + + ret = hs_regex_stream_scan(stream->regex_stream, data, data_len, regex_pattern_ids, + MAX_HIT_PATTERN_NUM, &n_regex_pattern); + if (ret < 0) { + err_count++; + } + + if (err_count == 2) { + return -1; + } + + n_pattern = n_lit_pattern + n_regex_pattern; + if (n_pattern > MAX_HIT_PATTERN_NUM) { + n_pattern = MAX_HIT_PATTERN_NUM; + } + + size_t j = 0; + for (size_t i = n_lit_pattern; i < n_pattern; i++, j++) { + lit_pattern_ids[i] = regex_pattern_ids[j]; + } + + struct expr_matcher *matcher = stream->ref_matcher; + struct bool_expr_match *match_buff = matcher->bool_match_buffs[stream->thread_id]; + + return expr_matcher_bool_matcher_match(matcher->bm, match_buff, MAX_HIT_PATTERN_NUM, + lit_pattern_ids, n_pattern, result_array, + array_size, n_hit_result); } void expr_matcher_stream_close(struct expr_matcher_stream *stream) @@ -226,10 +588,15 @@ void expr_matcher_stream_close(struct expr_matcher_stream *stream) return; } - if (stream->handle != NULL) { - expr_engine_ops[stream->engine_type].engine_stream_close(stream->handle); - stream->handle = NULL; + if (stream->lit_stream != NULL) { + engine_ops[stream->engine_type].stream_close(stream->lit_stream); + stream->lit_stream = NULL; } + if (stream->regex_stream != NULL) { + hs_regex_stream_close(stream->regex_stream); + stream->regex_stream = NULL; + } + FREE(stream); } \ No newline at end of file diff --git a/scanner/expr_matcher/expr_matcher.h b/scanner/expr_matcher/expr_matcher.h index fb61854..75dbe94 100644 --- a/scanner/expr_matcher/expr_matcher.h +++ b/scanner/expr_matcher/expr_matcher.h @@ -21,12 +21,11 @@ extern "C" #include "log/log.h" #define MAX_EXPR_PATTERN_NUM 8 /* 每条与表达式最多由MAX_EXPR_ITEM_NUM个规则组成 */ -#define MAX_HIT_EXPR_NUM 1024 enum expr_engine_type { - EXPR_ENGINE_TYPE_HS = 0, /* default engine */ + EXPR_ENGINE_TYPE_HS = 0, EXPR_ENGINE_TYPE_RS, - EXPR_ENGINE_TYPE_MAX + EXPR_ENGINE_TYPE_AUTO }; enum expr_pattern_type { @@ -73,7 +72,7 @@ struct expr_scan_result { /* logic AND expression, such as (rule1 & rule2) */ struct expr_rule { long long expr_id; /* AND expression ID */ - size_t n_patterns; + size_t n_patterns; struct expr_pattern patterns[MAX_EXPR_PATTERN_NUM]; void *tag; /* user defined data, return with hit result */ }; @@ -89,25 +88,26 @@ int expr_matcher_verify_regex_expression(const char *regex_expr, * @param n_worker_threads: the number of scan threads which will call adapter_rs_scan() * */ -struct expr_matcher * -expr_matcher_new(struct expr_rule *rules, size_t n_rule, enum expr_engine_type type, - size_t n_worker_thread, struct log_handle *logger); +struct expr_matcher *expr_matcher_new(struct expr_rule *rules, size_t n_rule, + enum expr_engine_type type, size_t n_thread, + struct log_handle *logger); void expr_matcher_free(struct expr_matcher *matcher); /** * @brief scan input data to match logic AND expression, return all matched expr_id * - * @param matcher: expr_matcher instance obtained by expr_matcher_new() - * @param thread_id: the thread_id of caller - * @param scan_data: data to be scanned - * @param data_len: the length of data to be scanned - * @param result_array: the array to store hit expr_id which allocated by caller + * @param matcher: expr_matcher instance obtained by expr_matcher_new() + * @param thread_id: the thread_id of caller + * @param scan_data: data to be scanned + * @param data_len: the length of data to be scanned + * @param result_array: the array to store hit expr_id which allocated by caller * @param n_result_array: number of elements in array of expr_id */ -int expr_matcher_match(struct expr_matcher *matcher, int thread_id, const char *scan_data, - size_t data_len, struct expr_scan_result *result_array, - size_t n_result_array, size_t *n_hit_results); +int expr_matcher_match(struct expr_matcher *matcher, int thread_id, + const char *data, size_t data_len, + struct expr_scan_result *result_array, + size_t array_size, size_t *n_hit_result); /** * @brief @@ -118,9 +118,10 @@ expr_matcher_stream_open(struct expr_matcher *matcher, int thread_id); /** * @brief */ -int expr_matcher_stream_match(struct expr_matcher_stream *stream, const char *scan_data, - size_t data_len, struct expr_scan_result *result_array, - size_t n_result_array, size_t *n_hit_results); +int expr_matcher_stream_match(struct expr_matcher_stream *stream, + const char *data, size_t data_len, + struct expr_scan_result *result_array, + size_t array_size, size_t *n_hit_result); /** * @brief diff --git a/scanner/expr_matcher/expr_matcher_inc.h b/scanner/expr_matcher/expr_matcher_inc.h new file mode 100644 index 0000000..57782ed --- /dev/null +++ b/scanner/expr_matcher/expr_matcher_inc.h @@ -0,0 +1,47 @@ +/* +********************************************************************************************** +* File: expr_matcher_inc.h +* Description: for expr matcher internal use only +* Authors: Liu wentan +* Date: 2023-06-30 +* Copyright: (c) Since 2023 Geedge Networks, Ltd. All rights reserved. +*********************************************************************************************** +*/ + +#ifndef _EXPR_MATCHER_INC_H_ +#define _EXPR_MATCHER_INC_H_ + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include +#include "uthash/utarray.h" +#include "expr_matcher.h" + +#define MAX_HIT_PATTERN_NUM 1024 + +struct pattern_offset { + long long start; + long long end; +}; + +struct pattern_attribute { + long long pattern_id; + enum expr_match_mode match_mode; + struct pattern_offset offset; + size_t pattern_len; +}; + +struct matched_pattern { + UT_array *pattern_ids; + struct pattern_attribute *ref_pat_attr; + size_t scan_data_len; +}; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/maat_api.c b/src/maat_api.c index 923f5c0..5c17e96 100644 --- a/src/maat_api.c +++ b/src/maat_api.c @@ -76,7 +76,7 @@ struct maat_options* maat_options_new(void) options->rule_update_checking_interval_ms = 1 * 1000; options->gc_timeout_ms = 10 * 1000; options->input_mode = DATA_SOURCE_NONE; - options->expr_engine = MAAT_EXPR_ENGINE_HS; + options->expr_engine = MAAT_EXPR_ENGINE_AUTO; options->log_level = 0; return options; @@ -265,7 +265,9 @@ int maat_options_set_expr_engine(struct maat_options *opts, enum maat_expr_engine expr_engine) { if (NULL == opts || - (expr_engine != MAAT_EXPR_ENGINE_HS && expr_engine != MAAT_EXPR_ENGINE_RS)) { + (expr_engine != MAAT_EXPR_ENGINE_HS && + expr_engine != MAAT_EXPR_ENGINE_RS && + expr_engine != MAAT_EXPR_ENGINE_AUTO)) { return -1; } diff --git a/src/maat_expr.c b/src/maat_expr.c index 2da71a2..db1592d 100644 --- a/src/maat_expr.c +++ b/src/maat_expr.c @@ -18,6 +18,7 @@ #include "maat_kv.h" #include "maat_limits.h" #include "rcu_hash.h" +#include "maat.h" #include "maat_rule.h" #include "maat_compile.h" #include "maat_group.h" @@ -26,6 +27,12 @@ #define MODULE_EXPR module_name_str("maat.expr") +/* + If expr_engine_type == MAAT_EXPR_ENGINE_AUTO, and the pattern number less than 50K, + expr_engine_type = MAAT_EXPR_ENGINE_HS; Otherwise expr_engine_type = MAAT_EXPR_ENGINE_RS +*/ +#define ENGINE_TYPE_SWITCH_THRESHOLD 50000 + struct expr_schema { int item_id_column; int group_id_column; @@ -35,7 +42,7 @@ struct expr_schema { int match_method_column; int is_hexbin_column; int table_id; - int expr_engine; + enum maat_expr_engine engine_type; struct table_manager *ref_tbl_mgr; }; @@ -80,7 +87,7 @@ struct expr_runtime { struct log_handle *logger; struct maat_garbage_bin *ref_garbage_bin; - enum maat_expr_engine expr_engine; + enum expr_engine_type engine_type; int district_num; struct maat_kv_store *district_map; struct maat_kv_store *tmp_district_map; @@ -328,7 +335,7 @@ void *expr_schema_new(cJSON *json, struct table_manager *tbl_mgr, { char table_type[NAME_MAX] = {0}; struct expr_schema *expr_schema = ALLOC(struct expr_schema, 1); - expr_schema->expr_engine = EXPR_ENGINE_TYPE_MAX; + expr_schema->engine_type = MAAT_EXPR_ENGINE_AUTO; cJSON *custom_item = NULL; cJSON *item = cJSON_GetObjectItem(json, "table_id"); @@ -348,9 +355,9 @@ void *expr_schema_new(cJSON *json, struct table_manager *tbl_mgr, item = cJSON_GetObjectItem(json, "expr_engine"); if (item != NULL && item->type == cJSON_String) { if (strcmp(item->valuestring, "hyperscan") == 0) { - expr_schema->expr_engine = EXPR_ENGINE_TYPE_HS; + expr_schema->engine_type = MAAT_EXPR_ENGINE_HS; } else if (strcmp(item->valuestring, "rulescan") == 0) { - expr_schema->expr_engine = EXPR_ENGINE_TYPE_RS; + expr_schema->engine_type = MAAT_EXPR_ENGINE_RS; } else { log_fatal(logger, MODULE_EXPR, "[%s:%d] expr table:<%s> schema has invalid expr_engine", @@ -499,12 +506,7 @@ void *expr_runtime_new(void *expr_schema, size_t max_thread_num, expr_rt->ref_garbage_bin = garbage_bin; expr_rt->logger = logger; expr_rt->district_map = maat_kv_store_new(); - - if (schema->expr_engine != EXPR_ENGINE_TYPE_MAX) { - expr_rt->expr_engine = schema->expr_engine; - } else { - expr_rt->expr_engine = table_manager_get_expr_engine(schema->ref_tbl_mgr); - } + expr_rt->engine_type = schema->engine_type; expr_rt->hit_cnt = alignment_int64_array_alloc(max_thread_num); expr_rt->scan_cnt = alignment_int64_array_alloc(max_thread_num); @@ -842,6 +844,18 @@ static void garbage_expr_matcher_free(void *expr_matcher, void *arg) expr_matcher_free(matcher); } +const char *expr_engine_int2str(enum expr_engine_type type) +{ + switch (type) { + case EXPR_ENGINE_TYPE_HS: + return "hyperscan"; + case EXPR_ENGINE_TYPE_RS: + return "rulescan"; + default: + return "unknown"; + } +} + int expr_runtime_commit(void *expr_runtime, const char *table_name, long long maat_rt_version) { @@ -867,6 +881,7 @@ int expr_runtime_commit(void *expr_runtime, const char *table_name, int ret = 0; size_t i = 0; size_t real_rule_cnt = 0; + size_t real_lit_rule_cnt = 0; size_t real_regex_rule_cnt = 0; struct expr_rule *rules = NULL; void **ex_data_array = NULL; @@ -886,25 +901,30 @@ int expr_runtime_commit(void *expr_runtime, const char *table_name, if (expr_item->expr_type == EXPR_TYPE_REGEX) { real_regex_rule_cnt++; + } else { + real_lit_rule_cnt++; } } } + if (expr_rt->engine_type == EXPR_ENGINE_TYPE_AUTO) { + if (real_lit_rule_cnt <= ENGINE_TYPE_SWITCH_THRESHOLD) { + expr_rt->engine_type = EXPR_ENGINE_TYPE_HS; + } else { + expr_rt->engine_type = EXPR_ENGINE_TYPE_RS; + } + } + struct expr_matcher *new_matcher = NULL; struct expr_matcher *old_matcher = NULL; if (rule_cnt > 0) { - enum expr_engine_type engine_type = EXPR_ENGINE_TYPE_HS; - if (expr_rt->expr_engine == MAAT_EXPR_ENGINE_RS) { - engine_type = EXPR_ENGINE_TYPE_RS; - } - struct timespec start, end; clock_gettime(CLOCK_MONOTONIC, &start); - new_matcher = expr_matcher_new(rules, real_rule_cnt, engine_type, + new_matcher = expr_matcher_new(rules, real_rule_cnt, expr_rt->engine_type, expr_rt->n_worker_thread, expr_rt->logger); clock_gettime(CLOCK_MONOTONIC, &end); - long long time_elapse_ms = (end.tv_sec - start.tv_sec) * 1000 + + long long time_elapse_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000; if (NULL == new_matcher) { @@ -914,10 +934,10 @@ int expr_runtime_commit(void *expr_runtime, const char *table_name, ret = -1; } else { log_info(expr_rt->logger, MODULE_EXPR, - "table[%s] has %zu rules, commit %zu expr rules(regex rules:%zu) " - "and rebuild expr_matcher(%s) completed, version:%lld, consume:%lldms", table_name, rule_cnt, - real_rule_cnt, real_regex_rule_cnt, engine_type == EXPR_ENGINE_TYPE_HS ? "hyperscan" : "rulescan", - maat_rt_version, time_elapse_ms); + "table[%s] has %zu rules, commit %zu expr rules(literal_rules:%zu regex_rules:%zu)" + " and rebuild expr_matcher(%s) completed, version:%lld, consume:%lldms", + table_name, rule_cnt, real_rule_cnt, real_lit_rule_cnt, real_regex_rule_cnt, + expr_engine_int2str(expr_rt->engine_type), maat_rt_version, time_elapse_ms); } } diff --git a/src/maat_table.c b/src/maat_table.c index ea27b77..d6d7286 100644 --- a/src/maat_table.c +++ b/src/maat_table.c @@ -28,6 +28,7 @@ #include "maat_fqdn_plugin.h" #include "maat_interval.h" #include "maat_virtual.h" +#include "expr_matcher/expr_matcher.h" #define MODULE_TABLE module_name_str("maat.table") @@ -48,7 +49,7 @@ struct table_manager { struct rule_tag *accept_tags; size_t n_accept_tag; - enum maat_expr_engine expr_engine; + enum expr_engine_type engine_type; int default_compile_table_id; int g2g_table_id; struct maat_kv_store *tbl_name2id_map; @@ -750,7 +751,7 @@ int maat_default_compile_table_id(cJSON *json, struct log_handle *logger) struct table_manager * table_manager_create(const char *table_info_path, const char *accept_tags, - enum maat_expr_engine expr_engine, struct maat_garbage_bin *garbage_bin, + enum maat_expr_engine engine_type, struct maat_garbage_bin *garbage_bin, struct log_handle *logger) { if (NULL == table_info_path) { @@ -793,7 +794,7 @@ table_manager_create(const char *table_info_path, const char *accept_tags, tbl_mgr->logger = logger; tbl_mgr->tbl_name2id_map = maat_kv_store_new(); tbl_mgr->conj_tbl_name2id_map = maat_kv_store_new(); - tbl_mgr->expr_engine = expr_engine; + tbl_mgr->engine_type = engine_type; tbl_mgr->ref_garbage_bin = garbage_bin; ret = register_tbl_name2id(tbl_mgr->tbl_name2id_map, root, table_info_path, logger); @@ -1137,10 +1138,10 @@ int table_manager_get_valid_column(struct table_manager *tbl_mgr, int table_id) enum maat_expr_engine table_manager_get_expr_engine(struct table_manager *tbl_mgr) { if (NULL == tbl_mgr) { - return MAAT_EXPR_ENGINE_HS; + return EXPR_ENGINE_TYPE_HS; } - return tbl_mgr->expr_engine; + return tbl_mgr->engine_type; } size_t table_manager_accept_tags_count(struct table_manager *tbl_mgr) diff --git a/test/expr_matcher_gtest.cpp b/test/expr_matcher_gtest.cpp index bb70306..1f58fc8 100644 --- a/test/expr_matcher_gtest.cpp +++ b/test/expr_matcher_gtest.cpp @@ -1320,7 +1320,7 @@ int main(int argc, char **argv) { int ret = 0; ::testing::InitGoogleTest(&argc, argv); - g_logger = log_handle_create("./adapter_hs_gtest.log", 0); + g_logger = log_handle_create("./expr_matcher_gtest.log", 0); ret = RUN_ALL_TESTS(); diff --git a/test/maat_framework_gtest.cpp b/test/maat_framework_gtest.cpp index 0e1b797..54b5cdb 100644 --- a/test/maat_framework_gtest.cpp +++ b/test/maat_framework_gtest.cpp @@ -787,7 +787,7 @@ protected: maat_options_set_logger(opts, "./maat_framework_gtest.log", LOG_LEVEL_INFO); maat_options_set_accept_tags(opts, accept_tags); maat_options_set_hit_path_enabled(opts); - //maat_options_set_expr_engine(opts, MAAT_EXPR_ENGINE_HS); //default + maat_options_set_expr_engine(opts, MAAT_EXPR_ENGINE_HS); _shared_maat_inst = maat_new(opts, table_info_path); maat_options_free(opts);