681 lines
23 KiB
C++
681 lines
23 KiB
C++
/*
|
|
**********************************************************************************************
|
|
* File: expr_matcher.cpp
|
|
* Description:
|
|
* Authors: Liu wentan <liuwentan@geedgenetworks.com>
|
|
* Date: 2023-06-30
|
|
* Copyright: (c) Since 2023 Geedge Networks, Ltd. All rights reserved.
|
|
***********************************************************************************************
|
|
*/
|
|
|
|
#include <unistd.h>
|
|
#include <assert.h>
|
|
#include <sys/syscall.h>
|
|
|
|
#include "log/log.h"
|
|
#include "bloom/bloom.h"
|
|
#include "maat_utils.h"
|
|
#include "../bool_matcher/bool_matcher.h"
|
|
#include "expr_matcher_inc.h"
|
|
#include "adapter_hs/adapter_hs.h"
|
|
#include "adapter_rs/adapter_rs.h"
|
|
|
|
pid_t expr_matcher_gettid()
|
|
{
|
|
return syscall(SYS_gettid);
|
|
}
|
|
|
|
static const char *expr_matcher_module_name_str(const char *name)
|
|
{
|
|
static __thread char module[64];
|
|
snprintf(module, sizeof(module), "%s(%d)", name, expr_matcher_gettid());
|
|
|
|
return module;
|
|
}
|
|
|
|
#define MODULE_EXPR_MATCHER expr_matcher_module_name_str("maat.expr_matcher")
|
|
|
|
struct expr_matcher {
|
|
size_t n_thread;
|
|
enum expr_engine_type engine_type;
|
|
void *lit_runtime;
|
|
void *regex_runtime;
|
|
struct pattern_attribute *pat_attr;
|
|
struct bool_matcher *bm;
|
|
struct bool_expr_match **bool_match_buffs;
|
|
struct log_handle *logger;
|
|
struct bool_expr *bool_exprs;
|
|
};
|
|
|
|
struct expr_matcher_stream {
|
|
int thread_id;
|
|
enum expr_engine_type engine_type;
|
|
void *lit_stream;
|
|
void *regex_stream;
|
|
UT_array *all_hit_lit_pattern_ids;
|
|
struct expr_matcher *ref_matcher;
|
|
};
|
|
|
|
struct db_operations {
|
|
enum expr_engine_type type;
|
|
void *(*compile_data_new)(enum expr_pattern_type pat_type, size_t n_pattern);
|
|
void (*compile_data_free)(void *compile_data);
|
|
void (*populate_compile_data)(void *compile_data, size_t index, int pattern_id,
|
|
char *pat, size_t pat_len, int case_sensitive);
|
|
int (*build_db)(void **lit_db, void *compile_data, struct log_handle *logger);
|
|
};
|
|
|
|
UT_icd ut_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL};
|
|
|
|
struct db_operations db_ops[EXPR_ENGINE_TYPE_AUTO] = {
|
|
{
|
|
.type = EXPR_ENGINE_TYPE_HS,
|
|
.compile_data_new = hs_compile_data_new,
|
|
.compile_data_free = hs_compile_data_free,
|
|
.populate_compile_data = hs_populate_compile_data,
|
|
.build_db = hs_build_lit_db
|
|
},
|
|
{
|
|
.type = EXPR_ENGINE_TYPE_RS,
|
|
.compile_data_new = rs_compile_data_new,
|
|
.compile_data_free = rs_compile_data_free,
|
|
.populate_compile_data = rs_populate_compile_data,
|
|
.build_db = rs_build_lit_db
|
|
}
|
|
};
|
|
|
|
struct engine_operations {
|
|
enum expr_engine_type type;
|
|
void *(*engine_new)(struct expr_rule *rules, size_t n_rule,
|
|
struct pattern_attribute *pat_attr,
|
|
void *hs_lit_db, size_t n_thread,
|
|
struct log_handle *logger);
|
|
|
|
void (*engine_free)(void *engine);
|
|
|
|
int (*engine_scan)(void *engine, int thread_id,
|
|
const char *data, size_t data_len,
|
|
unsigned long long *pattern_id_array,
|
|
size_t array_size, size_t *n_pattern_id);
|
|
|
|
void *(*stream_open)(void *engine, int thread_id);
|
|
|
|
void (*stream_close)(void *stream);
|
|
|
|
int (*scan_stream)(void *stream, const char *data, size_t data_len,
|
|
unsigned long long *pattern_id_array, size_t array_size,
|
|
size_t *n_pattern_id);
|
|
};
|
|
|
|
struct engine_operations engine_ops[EXPR_ENGINE_TYPE_AUTO] = {
|
|
{
|
|
.type = EXPR_ENGINE_TYPE_HS,
|
|
.engine_new = hs_lit_engine_new,
|
|
.engine_free = hs_lit_engine_free,
|
|
.engine_scan = hs_lit_engine_scan,
|
|
.stream_open = hs_lit_stream_open,
|
|
.stream_close = hs_lit_stream_close,
|
|
.scan_stream = hs_lit_stream_scan
|
|
},
|
|
{
|
|
.type = EXPR_ENGINE_TYPE_RS,
|
|
.engine_new = rs_lit_engine_new,
|
|
.engine_free = rs_lit_engine_free,
|
|
.engine_scan = rs_lit_engine_scan,
|
|
.stream_open = rs_lit_stream_open,
|
|
.stream_close = rs_lit_stream_close,
|
|
.scan_stream = rs_lit_stream_scan
|
|
}
|
|
};
|
|
|
|
int expr_matcher_verify_regex_expression(const char *regex_expr,
|
|
struct log_handle *logger)
|
|
{
|
|
int ret = hs_verify_regex_expression(regex_expr, logger);
|
|
if (ret == 0) {
|
|
return 0;
|
|
}
|
|
|
|
return rs_verify_regex_expression(regex_expr, logger);
|
|
}
|
|
|
|
static int expr_rule_pattern_count(struct expr_rule *rules, size_t n_rule,
|
|
size_t *n_lit_pat, size_t *n_regex_pat,
|
|
struct log_handle *logger)
|
|
{
|
|
size_t lit_pat_num = 0;
|
|
size_t regex_pat_num = 0;
|
|
|
|
for (size_t i = 0; i < n_rule; i++) {
|
|
if (rules[i].n_patterns > MAX_EXPR_PATTERN_NUM) {
|
|
char uuid_str[37];
|
|
uuid_unparse(rules[i].expr_uuid, uuid_str);
|
|
log_fatal(logger, MODULE_EXPR_MATCHER,
|
|
"[%s:%d] the number of patterns in expr_rule(rule_id:%s)"
|
|
" should less than %d", __FUNCTION__, __LINE__,
|
|
uuid_str, MAX_EXPR_PATTERN_NUM);
|
|
return -1;
|
|
}
|
|
|
|
for (size_t j = 0; j < rules[i].n_patterns; j++) {
|
|
/* pat_len should not 0 */
|
|
if (0 == rules[i].patterns[j].pat_len) {
|
|
char uuid_str[37];
|
|
uuid_unparse(rules[i].expr_uuid, uuid_str);
|
|
log_fatal(logger, MODULE_EXPR_MATCHER,
|
|
"[%s:%d] expr rule %s pattern length should not 0",
|
|
__FUNCTION__, __LINE__, uuid_str);
|
|
return -1;
|
|
}
|
|
|
|
if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) {
|
|
lit_pat_num++;
|
|
} else {
|
|
regex_pat_num++;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (0 == lit_pat_num && 0 == regex_pat_num) {
|
|
log_fatal(logger, MODULE_EXPR_MATCHER,
|
|
"[%s:%d] exprs has no valid pattern",
|
|
__FUNCTION__, __LINE__);
|
|
return -1;
|
|
}
|
|
|
|
*n_lit_pat = lit_pat_num;
|
|
*n_regex_pat = regex_pat_num;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule,
|
|
enum expr_engine_type engine_type,
|
|
struct pattern_attribute *pat_attr,
|
|
void *lit_compile_data, void *regex_compile_data)
|
|
{
|
|
uint32_t pattern_index = 0;
|
|
uint32_t literal_index = 0;
|
|
uint32_t regex_index = 0;
|
|
|
|
struct bool_expr *bool_exprs = ALLOC(struct bool_expr, n_rule);
|
|
|
|
/* populate adpt_hs_compile_data and bool_expr */
|
|
for (size_t i = 0; i < n_rule; i++) {
|
|
|
|
for (size_t j = 0; j < rules[i].n_patterns; j++) {
|
|
pat_attr[pattern_index].pattern_id = pattern_index;
|
|
pat_attr[pattern_index].match_mode = rules[i].patterns[j].match_mode;
|
|
|
|
if (pat_attr[pattern_index].match_mode == EXPR_MATCH_MODE_SUB ||
|
|
pat_attr[pattern_index].match_mode == EXPR_MATCH_MODE_EXACTLY) {
|
|
pat_attr[pattern_index].offset.start = rules[i].patterns[j].start_offset;
|
|
pat_attr[pattern_index].offset.end = rules[i].patterns[j].end_offset;
|
|
}
|
|
|
|
/* literal pattern */
|
|
if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) {
|
|
db_ops[engine_type].populate_compile_data(lit_compile_data, literal_index,
|
|
pattern_index, rules[i].patterns[j].pat,
|
|
rules[i].patterns[j].pat_len,
|
|
rules[i].patterns[j].case_sensitive);
|
|
literal_index++;
|
|
} else {
|
|
/* regex pattern */
|
|
hs_populate_compile_data(regex_compile_data, regex_index, pattern_index,
|
|
rules[i].patterns[j].pat, rules[i].patterns[j].pat_len,
|
|
rules[i].patterns[j].case_sensitive);
|
|
regex_index++;
|
|
}
|
|
|
|
bool_exprs[i].items[j].item_id = pattern_index++;
|
|
bool_exprs[i].items[j].negate_option = 0;
|
|
}
|
|
|
|
uuid_copy(bool_exprs[i].expr_uuid, rules[i].expr_uuid);
|
|
bool_exprs[i].item_num = rules[i].n_patterns;
|
|
bool_exprs[i].user_tag = &(bool_exprs[i]);
|
|
}
|
|
|
|
return bool_exprs;
|
|
}
|
|
|
|
void expr_matcher_free(struct expr_matcher *matcher)
|
|
{
|
|
if (NULL == matcher) {
|
|
return;
|
|
}
|
|
|
|
if (matcher->lit_runtime != NULL) {
|
|
engine_ops[matcher->engine_type].engine_free(matcher->lit_runtime);
|
|
matcher->lit_runtime = NULL;
|
|
}
|
|
|
|
if (matcher->regex_runtime != NULL) {
|
|
hs_regex_engine_free(matcher->regex_runtime);
|
|
matcher->regex_runtime = NULL;
|
|
}
|
|
|
|
if (matcher->bm != NULL) {
|
|
bool_matcher_free(matcher->bm);
|
|
matcher->bm = NULL;
|
|
}
|
|
|
|
if (matcher->bool_match_buffs != NULL) {
|
|
for (size_t i = 0; i < matcher->n_thread; i++) {
|
|
if (matcher->bool_match_buffs[i] != NULL) {
|
|
FREE(matcher->bool_match_buffs[i]);
|
|
}
|
|
}
|
|
|
|
FREE(matcher->bool_match_buffs);
|
|
}
|
|
|
|
if (matcher->pat_attr != NULL) {
|
|
FREE(matcher->pat_attr);
|
|
}
|
|
|
|
if (matcher->bool_exprs != NULL) {
|
|
FREE(matcher->bool_exprs);
|
|
}
|
|
|
|
FREE(matcher);
|
|
}
|
|
|
|
struct expr_matcher *
|
|
expr_matcher_new(struct expr_rule *rules, size_t n_rule,
|
|
enum expr_engine_type engine_type,
|
|
size_t n_thread, struct log_handle *logger)
|
|
{
|
|
if (NULL == rules || 0 == n_rule || 0 == n_thread ||
|
|
(engine_type != EXPR_ENGINE_TYPE_HS &&
|
|
engine_type != EXPR_ENGINE_TYPE_RS)) {
|
|
log_fatal(logger, MODULE_EXPR_MATCHER,
|
|
"[%s:%d]engine type:%d is illegal",
|
|
__FUNCTION__, __LINE__, engine_type);
|
|
return NULL;
|
|
}
|
|
|
|
size_t lit_pat_cnt = 0;
|
|
size_t regex_pat_cnt = 0;
|
|
size_t pat_cnt = 0;
|
|
|
|
int ret = expr_rule_pattern_count(rules, n_rule, &lit_pat_cnt,
|
|
®ex_pat_cnt, logger);
|
|
if (ret < 0) {
|
|
return NULL;
|
|
}
|
|
|
|
pat_cnt = lit_pat_cnt + regex_pat_cnt;
|
|
void *lit_compile_data = NULL;
|
|
void *regex_compile_data = NULL;
|
|
|
|
if (lit_pat_cnt > 0) {
|
|
lit_compile_data = db_ops[engine_type].compile_data_new(EXPR_PATTERN_TYPE_STR,
|
|
lit_pat_cnt);
|
|
}
|
|
|
|
if (regex_pat_cnt > 0) {
|
|
regex_compile_data = hs_compile_data_new(EXPR_PATTERN_TYPE_REG, regex_pat_cnt);
|
|
}
|
|
|
|
struct pattern_attribute *pat_attr = ALLOC(struct pattern_attribute, pat_cnt);
|
|
struct bool_expr *bool_exprs = bool_exprs_new(rules, n_rule, engine_type,
|
|
pat_attr, lit_compile_data,
|
|
regex_compile_data);
|
|
size_t mem_size = 0;
|
|
int bm_ret = 0;
|
|
struct expr_matcher *matcher = ALLOC(struct expr_matcher, 1);
|
|
|
|
matcher->n_thread = n_thread;
|
|
matcher->pat_attr = pat_attr;
|
|
matcher->engine_type = engine_type;
|
|
matcher->logger = logger;
|
|
matcher->bool_exprs = bool_exprs;
|
|
matcher->bm = bool_matcher_new(bool_exprs, n_rule, &mem_size);
|
|
if (matcher->bm != NULL) {
|
|
log_info(logger, MODULE_EXPR_MATCHER,
|
|
"expr_matcher module: build bool matcher of %zu expressions"
|
|
" with %zu bytes memory", n_rule, mem_size);
|
|
} else {
|
|
log_fatal(logger, MODULE_EXPR_MATCHER,
|
|
"[%s:%d] expr_matcher module: build bool matcher failed",
|
|
__FUNCTION__, __LINE__);
|
|
bm_ret = -1;
|
|
}
|
|
|
|
matcher->bool_match_buffs = ALLOC(struct bool_expr_match *, n_thread);
|
|
for (size_t i = 0; i < n_thread; i++) {
|
|
matcher->bool_match_buffs[i] = ALLOC(struct bool_expr_match, MAX_HIT_PATTERN_NUM);
|
|
}
|
|
|
|
void *lit_db = NULL;
|
|
if (lit_compile_data != NULL) {
|
|
ret = db_ops[engine_type].build_db(&lit_db, lit_compile_data, logger);
|
|
if (ret < 0) {
|
|
bm_ret = -1;
|
|
}
|
|
db_ops[engine_type].compile_data_free(lit_compile_data);
|
|
}
|
|
|
|
if (lit_db != NULL) {
|
|
matcher->lit_runtime = engine_ops[engine_type].engine_new(rules, n_rule, pat_attr,
|
|
lit_db, n_thread, logger);
|
|
if (NULL == matcher->lit_runtime) {
|
|
log_fatal(logger, MODULE_EXPR_MATCHER,
|
|
"[%s:%d]expr_matcher new lit runtime failed.",
|
|
__FUNCTION__, __LINE__);
|
|
bm_ret = -1;
|
|
}
|
|
}
|
|
|
|
|
|
void *regex_db = NULL;
|
|
if (regex_compile_data != NULL) {
|
|
ret = hs_build_regex_db(®ex_db, regex_compile_data, logger);
|
|
if (ret < 0) {
|
|
bm_ret = -1;
|
|
}
|
|
hs_compile_data_free(regex_compile_data);
|
|
}
|
|
|
|
if (regex_db != NULL) {
|
|
matcher->regex_runtime = hs_regex_engine_new(rules, n_rule, pat_attr,
|
|
regex_db, n_thread, logger);
|
|
if (NULL == matcher->regex_runtime) {
|
|
log_fatal(logger, MODULE_EXPR_MATCHER,
|
|
"[%s:%d]expr_matcher new regex runtime failed.",
|
|
__FUNCTION__, __LINE__);
|
|
bm_ret = -1;
|
|
}
|
|
}
|
|
|
|
if (bm_ret < 0) {
|
|
goto error;
|
|
}
|
|
|
|
return matcher;
|
|
error:
|
|
expr_matcher_free(matcher);
|
|
return NULL;
|
|
}
|
|
|
|
static inline int compare_pattern_id(const void *a, const void *b)
|
|
{
|
|
long long ret = *(const unsigned long long *)a - *(const unsigned long long *)b;
|
|
if (ret == 0) {
|
|
return 0;
|
|
} else if (ret < 0) {
|
|
return -1;
|
|
} else {
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
static int expr_matcher_bool_matcher_match(struct bool_matcher *bm, struct bool_expr_match *match_buff,
|
|
size_t buff_size, unsigned long long *hit_pattern_ids,
|
|
size_t n_hit_pattern, size_t *n_hit_result)
|
|
{
|
|
|
|
unsigned long long prev_pat_id = 0xFFFFFFFFFFFFFFFF;
|
|
unsigned long long tmp_pat_id = 0;
|
|
unsigned long long unique_pat_ids[n_hit_pattern];
|
|
size_t n_unique_pat_id = 0;
|
|
|
|
qsort(hit_pattern_ids, n_hit_pattern, sizeof(unsigned long long), compare_pattern_id);
|
|
|
|
for (size_t i = 0; i < n_hit_pattern; i++) {
|
|
tmp_pat_id = hit_pattern_ids[i];
|
|
if (tmp_pat_id != prev_pat_id) {
|
|
unique_pat_ids[n_unique_pat_id++] = tmp_pat_id;
|
|
prev_pat_id = tmp_pat_id;
|
|
}
|
|
}
|
|
|
|
int bool_matcher_ret = bool_matcher_match(bm, unique_pat_ids, n_unique_pat_id,
|
|
match_buff, MAX_HIT_PATTERN_NUM);
|
|
if (bool_matcher_ret < 0) {
|
|
goto next;
|
|
}
|
|
|
|
if (bool_matcher_ret > (int)buff_size) {
|
|
bool_matcher_ret = buff_size;
|
|
}
|
|
|
|
*n_hit_result = bool_matcher_ret;
|
|
|
|
next:
|
|
return bool_matcher_ret;
|
|
}
|
|
|
|
int expr_matcher_match(struct expr_matcher *matcher, int thread_id,
|
|
const char *data, size_t data_len,
|
|
uuid_t *result_array,
|
|
size_t array_size, size_t *n_hit_result,
|
|
size_t *n_hit_pattern)
|
|
{
|
|
if (NULL == matcher || thread_id < 0 || NULL == data || 0 == data_len
|
|
|| NULL == result_array || 0 == array_size || NULL == n_hit_result) {
|
|
return -1;
|
|
}
|
|
|
|
int err_count = 0;
|
|
unsigned long long lit_pat_ids[MAX_HIT_PATTERN_NUM];
|
|
unsigned long long regex_pat_ids[MAX_HIT_PATTERN_NUM];
|
|
size_t lit_pat_cnt = 0;
|
|
size_t regex_pat_cnt = 0;
|
|
size_t pat_cnt = 0;
|
|
|
|
int ret = engine_ops[matcher->engine_type].engine_scan(matcher->lit_runtime, thread_id,
|
|
data, data_len, lit_pat_ids,
|
|
MAX_HIT_PATTERN_NUM, &lit_pat_cnt);
|
|
if (ret < 0) {
|
|
err_count++;
|
|
}
|
|
|
|
ret = hs_regex_engine_scan(matcher->regex_runtime, thread_id, data, data_len,
|
|
regex_pat_ids, MAX_HIT_PATTERN_NUM, ®ex_pat_cnt);
|
|
if (ret < 0) {
|
|
err_count++;
|
|
}
|
|
|
|
if (err_count == 2) {
|
|
return -1;
|
|
}
|
|
|
|
pat_cnt = lit_pat_cnt + regex_pat_cnt;
|
|
*n_hit_pattern = pat_cnt;
|
|
|
|
if (pat_cnt > MAX_HIT_PATTERN_NUM) {
|
|
pat_cnt = MAX_HIT_PATTERN_NUM;
|
|
}
|
|
|
|
size_t j = 0;
|
|
for (size_t i = lit_pat_cnt; i < pat_cnt; i++, j++) {
|
|
lit_pat_ids[i] = regex_pat_ids[j];
|
|
}
|
|
|
|
struct bool_expr_match *match_buff = matcher->bool_match_buffs[thread_id];
|
|
|
|
ret = expr_matcher_bool_matcher_match(matcher->bm, match_buff, MAX_HIT_PATTERN_NUM,
|
|
lit_pat_ids, pat_cnt, n_hit_result);
|
|
for (size_t i = 0; i < *n_hit_result && i < array_size; i++) {
|
|
uuid_copy(result_array[i], match_buff[i].expr_uuid);
|
|
}
|
|
if (*n_hit_result > array_size) {
|
|
*n_hit_result = array_size;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
struct expr_matcher_stream *
|
|
expr_matcher_stream_open(struct expr_matcher *matcher, int thread_id)
|
|
{
|
|
if (NULL == matcher || thread_id < 0) {
|
|
return NULL;
|
|
}
|
|
|
|
size_t err_count = 0;
|
|
void *lit_stream = engine_ops[matcher->engine_type].stream_open(matcher->lit_runtime,
|
|
thread_id);
|
|
if (NULL == lit_stream && matcher->lit_runtime != NULL) {
|
|
log_fatal(matcher->logger, MODULE_EXPR_MATCHER,
|
|
"[%s:%d] expr_matcher open lit engine stream failed.",
|
|
__FUNCTION__, __LINE__);
|
|
err_count++;
|
|
}
|
|
|
|
void *regex_stream = hs_regex_stream_open(matcher->regex_runtime, thread_id);
|
|
if (NULL == regex_stream && matcher->regex_runtime != NULL) {
|
|
engine_ops[matcher->engine_type].stream_close(lit_stream);
|
|
log_fatal(matcher->logger, MODULE_EXPR_MATCHER,
|
|
"[%s:%d] expr_matcher open regex engine stream failed.",
|
|
__FUNCTION__, __LINE__);
|
|
err_count++;
|
|
}
|
|
|
|
if (err_count == 2) {
|
|
return NULL;
|
|
}
|
|
|
|
struct expr_matcher_stream *stream = ALLOC(struct expr_matcher_stream, 1);
|
|
stream->engine_type = matcher->engine_type;
|
|
stream->thread_id = thread_id;
|
|
stream->lit_stream = lit_stream;
|
|
stream->regex_stream = regex_stream;
|
|
stream->ref_matcher = matcher;
|
|
utarray_new(stream->all_hit_lit_pattern_ids, &ut_pattern_id_icd);
|
|
|
|
return stream;
|
|
}
|
|
|
|
static int expr_has_pattern_id_in_array(struct bool_expr *expr, unsigned long long *pat_ids, size_t n_pat)
|
|
{
|
|
for (size_t i = 0; i < expr->item_num; i++) {
|
|
for (size_t j = 0; j < n_pat; j++) {
|
|
if (expr->items[i].item_id == pat_ids[j]) {
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int expr_matcher_stream_match(struct expr_matcher_stream *stream,
|
|
const char *data, size_t data_len,
|
|
uuid_t *result_array,
|
|
size_t array_size, size_t *n_hit_result,
|
|
size_t *n_hit_pattern)
|
|
{
|
|
if (NULL == stream || NULL == data || 0 == data_len ||
|
|
NULL == result_array || 0 == array_size ||
|
|
NULL == n_hit_result) {
|
|
return -1;
|
|
}
|
|
|
|
int err_count = 0;
|
|
unsigned long long lit_pat_ids[MAX_HIT_PATTERN_NUM];
|
|
unsigned long long regex_pat_ids[MAX_HIT_PATTERN_NUM];
|
|
size_t lit_pat_cnt = 0;
|
|
size_t regex_pat_cnt = 0;
|
|
size_t all_hit_pat_cnt = 0;
|
|
|
|
int ret = engine_ops[stream->engine_type].scan_stream(stream->lit_stream, data, data_len,
|
|
lit_pat_ids, MAX_HIT_PATTERN_NUM,
|
|
&lit_pat_cnt);
|
|
if (ret < 0) {
|
|
err_count++;
|
|
}
|
|
|
|
ret = hs_regex_stream_scan(stream->regex_stream, data, data_len, regex_pat_ids,
|
|
MAX_HIT_PATTERN_NUM, ®ex_pat_cnt);
|
|
if (ret < 0) {
|
|
err_count++;
|
|
}
|
|
|
|
if (err_count == 2) {
|
|
return -1;
|
|
}
|
|
|
|
*n_hit_pattern = lit_pat_cnt + regex_pat_cnt;
|
|
|
|
/*
|
|
1.some expr items may contain multi patterns such as "aaa&bbb", so we need to keep all hit patterns to ensure no expr item is missed by scanning multi times.
|
|
2.while thinking of maat api function maat_state_get_direct_hit_objects, bool_matcher(all_hit_patterns) will return all expr items every time, while this scan
|
|
may not hit some of items, so we need to check them.
|
|
*/
|
|
|
|
//1. add lit pattern ids to all_hit_lit_pattern_ids, and remove duplicate
|
|
for (size_t i = 0; i < lit_pat_cnt; i++) {
|
|
if (utarray_find(stream->all_hit_lit_pattern_ids, &lit_pat_ids[i], compare_pattern_id) == NULL) {
|
|
utarray_push_back(stream->all_hit_lit_pattern_ids, &lit_pat_ids[i]);
|
|
utarray_sort(stream->all_hit_lit_pattern_ids, compare_pattern_id);
|
|
}
|
|
}
|
|
|
|
//2. find expr item uuid by all hit lit pattern ids with bool_matcher
|
|
size_t all_hit_lit_pat_cnt = utarray_len(stream->all_hit_lit_pattern_ids);
|
|
unsigned long long all_hit_pat_ids[MAX_HIT_PATTERN_NUM];
|
|
|
|
all_hit_pat_cnt = all_hit_lit_pat_cnt + regex_pat_cnt;
|
|
if (all_hit_pat_cnt > MAX_HIT_PATTERN_NUM) {
|
|
all_hit_pat_cnt = MAX_HIT_PATTERN_NUM;
|
|
}
|
|
|
|
for (size_t i = 0; i < all_hit_lit_pat_cnt; i++) {
|
|
all_hit_pat_ids[i] = *(unsigned long long *)utarray_eltptr(stream->all_hit_lit_pattern_ids, i);
|
|
}
|
|
|
|
for (size_t i = all_hit_lit_pat_cnt, j = 0; i < all_hit_pat_cnt; i++, j++) {
|
|
all_hit_pat_ids[i] = regex_pat_ids[j];
|
|
}
|
|
|
|
struct expr_matcher *matcher = stream->ref_matcher;
|
|
struct bool_expr_match *match_buff = matcher->bool_match_buffs[stream->thread_id];
|
|
size_t n_hit_expr = 0;
|
|
|
|
ret = expr_matcher_bool_matcher_match(matcher->bm, match_buff, MAX_HIT_PATTERN_NUM,
|
|
all_hit_pat_ids, all_hit_pat_cnt, &n_hit_expr);
|
|
|
|
//3. check the result of bool_matcher
|
|
*n_hit_result = 0;
|
|
for (size_t i = 0; i < n_hit_expr; i++) {
|
|
struct bool_expr *expr = (struct bool_expr *)match_buff[i].user_tag;
|
|
if (expr_has_pattern_id_in_array(expr, lit_pat_ids, lit_pat_cnt) ||
|
|
expr_has_pattern_id_in_array(expr, regex_pat_ids, regex_pat_cnt)) {
|
|
uuid_copy(result_array[*n_hit_result], expr->expr_uuid);
|
|
(*n_hit_result)++;
|
|
|
|
if (*n_hit_result >= array_size) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void expr_matcher_stream_close(struct expr_matcher_stream *stream)
|
|
{
|
|
if (NULL == stream) {
|
|
return;
|
|
}
|
|
|
|
if (stream->lit_stream != NULL) {
|
|
engine_ops[stream->engine_type].stream_close(stream->lit_stream);
|
|
stream->lit_stream = NULL;
|
|
}
|
|
|
|
if (stream->regex_stream != NULL) {
|
|
hs_regex_stream_close(stream->regex_stream);
|
|
stream->regex_stream = NULL;
|
|
}
|
|
|
|
if (stream->all_hit_lit_pattern_ids != NULL) {
|
|
utarray_free(stream->all_hit_lit_pattern_ids);
|
|
}
|
|
|
|
FREE(stream);
|
|
} |