[FEATURE]expr_matcher support dual engine(hyperscan & rulescan) & benchmark
This commit is contained in:
882
scanner/expr_matcher/adapter_hs/adapter_hs.cpp
Normal file
882
scanner/expr_matcher/adapter_hs/adapter_hs.cpp
Normal file
@@ -0,0 +1,882 @@
|
||||
/*
|
||||
**********************************************************************************************
|
||||
* File: adapter_hs.c
|
||||
* Description:
|
||||
* Authors: Liu wentan <liuwentan@geedgenetworks.com>
|
||||
* Date: 2022-10-31
|
||||
* Copyright: (c) Since 2022 Geedge Networks, Ltd. All rights reserved.
|
||||
***********************************************************************************************
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <hs/hs.h>
|
||||
#include <assert.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include "adapter_hs.h"
|
||||
#include "uthash/utarray.h"
|
||||
#include "uthash/uthash.h"
|
||||
#include "maat_utils.h"
|
||||
#include "../../bool_matcher/bool_matcher.h"
|
||||
|
||||
#define MAX_HIT_PATTERN_NUM 512
|
||||
|
||||
pid_t hs_gettid()
|
||||
{
|
||||
return syscall(SYS_gettid);
|
||||
}
|
||||
|
||||
static const char *hs_module_name_str(const char *name)
|
||||
{
|
||||
static __thread char module[64];
|
||||
snprintf(module, sizeof(module), "%s(%d)", name, hs_gettid());
|
||||
|
||||
return module;
|
||||
}
|
||||
|
||||
#define MODULE_ADAPTER_HS hs_module_name_str("maat.adapter_hs")
|
||||
|
||||
struct adpt_hs_compile_data {
|
||||
enum expr_pattern_type pat_type;
|
||||
unsigned int *ids;
|
||||
unsigned int *flags;
|
||||
char **patterns;
|
||||
size_t *pattern_lens;
|
||||
unsigned int n_patterns;
|
||||
};
|
||||
|
||||
struct adapter_hs_scratch {
|
||||
hs_scratch_t **literal_scratches;
|
||||
hs_scratch_t **regex_scratches;
|
||||
struct bool_expr_match **bool_match_buffs;
|
||||
};
|
||||
|
||||
struct adapter_hs_stream {
|
||||
int thread_id;
|
||||
hs_stream_t *literal_stream;
|
||||
hs_stream_t *regex_stream;
|
||||
struct adapter_hs_runtime *ref_hs_rt;
|
||||
struct matched_pattern *matched_pat;
|
||||
struct log_handle *logger;
|
||||
};
|
||||
|
||||
/* adapter_hs runtime */
|
||||
struct adapter_hs_runtime {
|
||||
hs_database_t *literal_db;
|
||||
hs_database_t *regex_db;
|
||||
|
||||
struct adapter_hs_scratch *scratch;
|
||||
struct adapter_hs_stream **streams;
|
||||
struct bool_matcher *bm;
|
||||
};
|
||||
|
||||
/* adapter_hs instance */
|
||||
struct adapter_hs {
|
||||
size_t n_worker_thread;
|
||||
size_t n_expr;
|
||||
size_t n_patterns;
|
||||
struct adapter_hs_runtime *hs_rt;
|
||||
struct pattern_attribute *hs_attr;
|
||||
struct log_handle *logger;
|
||||
};
|
||||
|
||||
struct pattern_offset {
|
||||
long long start;
|
||||
long long end;
|
||||
};
|
||||
|
||||
struct pattern_attribute {
|
||||
long long pattern_id;
|
||||
enum expr_match_mode match_mode;
|
||||
struct pattern_offset offset;
|
||||
};
|
||||
|
||||
struct matched_pattern {
|
||||
UT_array *pattern_ids;
|
||||
size_t n_patterns;
|
||||
struct pattern_attribute *ref_hs_attr;
|
||||
size_t scan_data_len;
|
||||
};
|
||||
|
||||
static int _hs_alloc_scratch(hs_database_t *db, hs_scratch_t **scratches,
|
||||
size_t n_worker_thread, struct log_handle *logger)
|
||||
{
|
||||
size_t scratch_size = 0;
|
||||
|
||||
if (hs_alloc_scratch(db, &scratches[0]) != HS_SUCCESS) {
|
||||
log_error(logger, MODULE_ADAPTER_HS,
|
||||
"[%s:%d] Unable to allocate scratch space. Exiting.",
|
||||
__FUNCTION__, __LINE__);
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (size_t i = 1; i < n_worker_thread; i++) {
|
||||
hs_error_t err = hs_clone_scratch(scratches[0], &scratches[i]);
|
||||
if (err != HS_SUCCESS) {
|
||||
log_error(logger, MODULE_ADAPTER_HS,
|
||||
"[%s:%d] Unable to clone scratch",
|
||||
__FUNCTION__, __LINE__);
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = hs_scratch_size(scratches[i], &scratch_size);
|
||||
if (err != HS_SUCCESS) {
|
||||
log_error(logger, MODULE_ADAPTER_HS,
|
||||
"[%s:%d] Unable to query scratch size",
|
||||
__FUNCTION__, __LINE__);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int adpt_hs_alloc_scratch(struct adapter_hs_runtime *hs_rt,
|
||||
size_t n_worker_thread,
|
||||
enum expr_pattern_type pattern_type,
|
||||
struct log_handle *logger)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (pattern_type == EXPR_PATTERN_TYPE_STR) {
|
||||
hs_rt->scratch->literal_scratches = ALLOC(hs_scratch_t *, n_worker_thread);
|
||||
ret = _hs_alloc_scratch(hs_rt->literal_db, hs_rt->scratch->literal_scratches,
|
||||
n_worker_thread, logger);
|
||||
if (ret < 0) {
|
||||
FREE(hs_rt->scratch->literal_scratches);
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
hs_rt->scratch->regex_scratches = ALLOC(hs_scratch_t *, n_worker_thread);
|
||||
ret = _hs_alloc_scratch(hs_rt->regex_db, hs_rt->scratch->regex_scratches,
|
||||
n_worker_thread, logger);
|
||||
if (ret < 0) {
|
||||
FREE(hs_rt->scratch->regex_scratches);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief build hs block database for literal string and regex expression respectively
|
||||
*
|
||||
* @retval 0(success) -1(failed)
|
||||
*/
|
||||
static int adpt_hs_build_database(struct adapter_hs_runtime *hs_rt,
|
||||
struct adpt_hs_compile_data *literal_cd,
|
||||
struct adpt_hs_compile_data *regex_cd,
|
||||
struct log_handle *logger)
|
||||
{
|
||||
hs_error_t err;
|
||||
hs_compile_error_t *compile_err = NULL;
|
||||
|
||||
if (NULL == hs_rt || (NULL == literal_cd && NULL == regex_cd)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (literal_cd != NULL) {
|
||||
err = hs_compile_lit_multi((const char *const *)literal_cd->patterns,
|
||||
literal_cd->flags,literal_cd->ids,
|
||||
literal_cd->pattern_lens, literal_cd->n_patterns,
|
||||
HS_MODE_STREAM | HS_MODE_SOM_HORIZON_SMALL, NULL,
|
||||
&hs_rt->literal_db, &compile_err);
|
||||
if (err != HS_SUCCESS) {
|
||||
if (compile_err) {
|
||||
log_error(logger, MODULE_ADAPTER_HS, "[%s:%d] compile error: %s",
|
||||
__FUNCTION__, __LINE__, compile_err->message);
|
||||
}
|
||||
|
||||
hs_free_compile_error(compile_err);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (regex_cd != NULL) {
|
||||
err = hs_compile_multi((const char *const *)regex_cd->patterns,
|
||||
regex_cd->flags, regex_cd->ids, regex_cd->n_patterns,
|
||||
HS_MODE_STREAM,
|
||||
NULL, &hs_rt->regex_db, &compile_err);
|
||||
if (err != HS_SUCCESS) {
|
||||
if (compile_err) {
|
||||
log_error(logger, MODULE_ADAPTER_HS, "[%s:%d] compile error: %s",
|
||||
__FUNCTION__, __LINE__, compile_err->message);
|
||||
}
|
||||
hs_free_compile_error(compile_err);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct adpt_hs_compile_data *
|
||||
adpt_hs_compile_data_new(enum expr_pattern_type pat_type, size_t n_patterns)
|
||||
{
|
||||
struct adpt_hs_compile_data *hs_cd = ALLOC(struct adpt_hs_compile_data, 1);
|
||||
hs_cd->pat_type = pat_type;
|
||||
hs_cd->patterns = ALLOC(char *, n_patterns);
|
||||
hs_cd->pattern_lens = ALLOC(size_t, n_patterns);
|
||||
hs_cd->n_patterns = n_patterns;
|
||||
hs_cd->ids = ALLOC(unsigned int, n_patterns);
|
||||
hs_cd->flags = ALLOC(unsigned int, n_patterns);
|
||||
|
||||
return hs_cd;
|
||||
}
|
||||
|
||||
static void adpt_hs_compile_data_free(struct adpt_hs_compile_data *hs_cd)
|
||||
{
|
||||
if (NULL == hs_cd) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (hs_cd->patterns != NULL) {
|
||||
for (size_t i = 0; i < hs_cd->n_patterns; i++) {
|
||||
FREE(hs_cd->patterns[i]);
|
||||
}
|
||||
|
||||
FREE(hs_cd->patterns);
|
||||
}
|
||||
|
||||
if (hs_cd->pattern_lens != NULL) {
|
||||
FREE(hs_cd->pattern_lens);
|
||||
}
|
||||
|
||||
if (hs_cd->ids != NULL) {
|
||||
FREE(hs_cd->ids);
|
||||
}
|
||||
|
||||
if (hs_cd->flags != NULL) {
|
||||
FREE(hs_cd->flags);
|
||||
}
|
||||
|
||||
FREE(hs_cd);
|
||||
}
|
||||
|
||||
static void populate_compile_data(struct adpt_hs_compile_data *compile_data,
|
||||
int index, int pattern_id, char *pat,
|
||||
size_t pat_len, int case_sensitive)
|
||||
{
|
||||
compile_data->ids[index] = pattern_id;
|
||||
|
||||
/* set flags */
|
||||
if (compile_data->pat_type == EXPR_PATTERN_TYPE_STR) {
|
||||
compile_data->flags[index] |= HS_FLAG_SOM_LEFTMOST;
|
||||
}
|
||||
|
||||
if (case_sensitive == EXPR_CASE_INSENSITIVE) {
|
||||
compile_data->flags[index] |= HS_FLAG_CASELESS;
|
||||
}
|
||||
|
||||
compile_data->pattern_lens[index] = pat_len;
|
||||
compile_data->patterns[index] = ALLOC(char, pat_len + 1);
|
||||
memcpy(compile_data->patterns[index], pat, pat_len);
|
||||
}
|
||||
|
||||
static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule,
|
||||
struct pattern_attribute *pattern_attr,
|
||||
struct adpt_hs_compile_data *literal_cd,
|
||||
struct adpt_hs_compile_data *regex_cd,
|
||||
size_t *n_pattern)
|
||||
{
|
||||
uint32_t pattern_index = 0;
|
||||
uint32_t literal_index = 0;
|
||||
uint32_t regex_index = 0;
|
||||
|
||||
struct bool_expr *bool_exprs = ALLOC(struct bool_expr, n_rule);
|
||||
if (NULL == bool_exprs) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* populate adpt_hs_compile_data and bool_expr */
|
||||
for (size_t i = 0; i < n_rule; i++) {
|
||||
|
||||
for (size_t j = 0; j < rules[i].n_patterns; j++) {
|
||||
pattern_attr[pattern_index].pattern_id = pattern_index;
|
||||
pattern_attr[pattern_index].match_mode = rules[i].patterns[j].match_mode;
|
||||
|
||||
if (pattern_attr[pattern_index].match_mode == EXPR_MATCH_MODE_SUB ||
|
||||
pattern_attr[pattern_index].match_mode == EXPR_MATCH_MODE_EXACTLY) {
|
||||
pattern_attr[pattern_index].offset.start = rules[i].patterns[j].start_offset;
|
||||
pattern_attr[pattern_index].offset.end = rules[i].patterns[j].end_offset;
|
||||
}
|
||||
|
||||
/* literal pattern */
|
||||
if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) {
|
||||
populate_compile_data(literal_cd, literal_index, pattern_index,
|
||||
rules[i].patterns[j].pat, rules[i].patterns[j].pat_len,
|
||||
rules[i].patterns[j].case_sensitive);
|
||||
literal_index++;
|
||||
} else {
|
||||
/* regex pattern */
|
||||
populate_compile_data(regex_cd, regex_index, pattern_index,
|
||||
rules[i].patterns[j].pat, rules[i].patterns[j].pat_len,
|
||||
rules[i].patterns[j].case_sensitive);
|
||||
regex_index++;
|
||||
}
|
||||
|
||||
bool_exprs[i].items[j].item_id = pattern_index++;
|
||||
bool_exprs[i].items[j].not_flag = 0;
|
||||
}
|
||||
|
||||
bool_exprs[i].expr_id = rules[i].expr_id;
|
||||
bool_exprs[i].item_num = rules[i].n_patterns;
|
||||
bool_exprs[i].user_tag = rules[i].tag;
|
||||
}
|
||||
|
||||
*n_pattern = pattern_index;
|
||||
|
||||
return bool_exprs;
|
||||
}
|
||||
|
||||
static int verify_regex_expression(const char *regex_str, struct log_handle *logger)
|
||||
{
|
||||
hs_expr_info_t *info = NULL;
|
||||
hs_compile_error_t *error = NULL;
|
||||
|
||||
hs_error_t err = hs_expression_info(regex_str, HS_FLAG_CASELESS, &info, &error);
|
||||
if (err != HS_SUCCESS) {
|
||||
// Expression will fail compilation and report error elsewhere.
|
||||
if (logger != NULL) {
|
||||
log_error(logger, MODULE_ADAPTER_HS,
|
||||
"[%s:%d] illegal regex expression: \"%s\": %s",
|
||||
__FUNCTION__, __LINE__, regex_str, error->message);
|
||||
}
|
||||
|
||||
FREE(info);
|
||||
hs_free_compile_error(error);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (info != NULL) {
|
||||
FREE(info);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int adapter_hs_verify_regex_expression(const char *regex_expr, struct log_handle *logger)
|
||||
{
|
||||
if (NULL == regex_expr) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return verify_regex_expression(regex_expr, logger);
|
||||
}
|
||||
|
||||
void *adapter_hs_new(struct expr_rule *rules, size_t n_rule,
|
||||
size_t n_literal_pattern, size_t n_regex_pattern,
|
||||
size_t n_worker_thread, struct log_handle *logger)
|
||||
{
|
||||
/* get the sum of pattern */
|
||||
size_t i = 0;
|
||||
struct adpt_hs_compile_data *literal_cd = NULL;
|
||||
struct adpt_hs_compile_data *regex_cd = NULL;
|
||||
|
||||
if (n_literal_pattern > 0) {
|
||||
literal_cd = adpt_hs_compile_data_new(EXPR_PATTERN_TYPE_STR, n_literal_pattern);
|
||||
}
|
||||
|
||||
if (n_regex_pattern > 0) {
|
||||
regex_cd = adpt_hs_compile_data_new(EXPR_PATTERN_TYPE_REG, n_regex_pattern);
|
||||
}
|
||||
|
||||
size_t pattern_cnt = n_literal_pattern + n_regex_pattern;
|
||||
struct adapter_hs *hs_inst = ALLOC(struct adapter_hs, 1);
|
||||
hs_inst->hs_attr = ALLOC(struct pattern_attribute, pattern_cnt);
|
||||
hs_inst->logger = logger;
|
||||
hs_inst->n_worker_thread = n_worker_thread;
|
||||
hs_inst->n_expr = n_rule;
|
||||
|
||||
struct bool_expr *bool_exprs = bool_exprs_new(rules, n_rule, hs_inst->hs_attr,
|
||||
literal_cd, regex_cd, &pattern_cnt);
|
||||
if (NULL == bool_exprs) {
|
||||
return NULL;
|
||||
}
|
||||
hs_inst->n_patterns = pattern_cnt;
|
||||
|
||||
/* create bool matcher */
|
||||
size_t mem_size = 0;
|
||||
int hs_ret = 0;
|
||||
|
||||
hs_inst->hs_rt = ALLOC(struct adapter_hs_runtime, 1);
|
||||
|
||||
//hs_rt->bm
|
||||
hs_inst->hs_rt->bm = bool_matcher_new(bool_exprs, n_rule, &mem_size);
|
||||
if (hs_inst->hs_rt->bm != NULL) {
|
||||
log_info(logger, MODULE_ADAPTER_HS,
|
||||
"Adapter_hs module: build bool matcher of %zu expressions"
|
||||
" with %zu bytes memory", n_rule, mem_size);
|
||||
} else {
|
||||
log_error(logger, MODULE_ADAPTER_HS,
|
||||
"[%s:%d] Adapter_hs module: build bool matcher failed",
|
||||
__FUNCTION__, __LINE__);
|
||||
|
||||
hs_ret = -1;
|
||||
}
|
||||
FREE(bool_exprs);
|
||||
|
||||
/* build hs database hs_rt->literal_db & hs_rt->regex_db */
|
||||
int ret = adpt_hs_build_database(hs_inst->hs_rt, literal_cd, regex_cd, logger);
|
||||
if (ret < 0) {
|
||||
hs_ret = -1;
|
||||
}
|
||||
|
||||
if (literal_cd != NULL) {
|
||||
adpt_hs_compile_data_free(literal_cd);
|
||||
}
|
||||
|
||||
if (regex_cd != NULL) {
|
||||
adpt_hs_compile_data_free(regex_cd);
|
||||
}
|
||||
|
||||
if (hs_ret < 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* alloc scratch */
|
||||
hs_inst->hs_rt->scratch = ALLOC(struct adapter_hs_scratch, 1);
|
||||
hs_inst->hs_rt->scratch->bool_match_buffs = ALLOC(struct bool_expr_match *,
|
||||
n_worker_thread);
|
||||
for (i = 0; i < n_worker_thread; i++) {
|
||||
hs_inst->hs_rt->scratch->bool_match_buffs[i] = ALLOC(struct bool_expr_match,
|
||||
MAX_HIT_EXPR_NUM);
|
||||
}
|
||||
|
||||
/* literal and regex scratch can't reuse */
|
||||
if (n_literal_pattern > 0) {
|
||||
ret = adpt_hs_alloc_scratch(hs_inst->hs_rt, n_worker_thread,
|
||||
EXPR_PATTERN_TYPE_STR, logger);
|
||||
if (ret < 0) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
if (n_regex_pattern > 0) {
|
||||
ret = adpt_hs_alloc_scratch(hs_inst->hs_rt, n_worker_thread,
|
||||
EXPR_PATTERN_TYPE_REG, logger);
|
||||
if (ret < 0) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
hs_inst->hs_rt->streams = ALLOC(struct adapter_hs_stream *, n_worker_thread);
|
||||
for (i = 0; i < n_worker_thread; i++) {
|
||||
hs_inst->hs_rt->streams[i] = (struct adapter_hs_stream *)adapter_hs_stream_open(hs_inst, i);
|
||||
}
|
||||
|
||||
return hs_inst;
|
||||
error:
|
||||
adapter_hs_free(hs_inst);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void adapter_hs_free(void *hs_instance)
|
||||
{
|
||||
if (NULL == hs_instance) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct adapter_hs *hs_inst = (struct adapter_hs *)hs_instance;
|
||||
size_t i = 0;
|
||||
|
||||
if (hs_inst->hs_rt != NULL) {
|
||||
if (hs_inst->hs_rt->literal_db != NULL) {
|
||||
hs_free_database(hs_inst->hs_rt->literal_db);
|
||||
hs_inst->hs_rt->literal_db = NULL;
|
||||
}
|
||||
|
||||
if (hs_inst->hs_rt->regex_db != NULL) {
|
||||
hs_free_database(hs_inst->hs_rt->regex_db);
|
||||
hs_inst->hs_rt->regex_db = NULL;
|
||||
}
|
||||
|
||||
if (hs_inst->hs_rt->scratch != NULL) {
|
||||
if (hs_inst->hs_rt->scratch->literal_scratches != NULL) {
|
||||
for (i = 0; i < hs_inst->n_worker_thread; i++) {
|
||||
if (hs_inst->hs_rt->scratch->literal_scratches[i] != NULL) {
|
||||
hs_free_scratch(hs_inst->hs_rt->scratch->literal_scratches[i]);
|
||||
hs_inst->hs_rt->scratch->literal_scratches[i] = NULL;
|
||||
}
|
||||
}
|
||||
FREE(hs_inst->hs_rt->scratch->literal_scratches);
|
||||
}
|
||||
|
||||
if (hs_inst->hs_rt->scratch->regex_scratches != NULL) {
|
||||
for (i = 0; i < hs_inst->n_worker_thread; i++) {
|
||||
if (hs_inst->hs_rt->scratch->regex_scratches[i] != NULL) {
|
||||
hs_free_scratch(hs_inst->hs_rt->scratch->regex_scratches[i]);
|
||||
hs_inst->hs_rt->scratch->regex_scratches[i] = NULL;
|
||||
}
|
||||
}
|
||||
FREE(hs_inst->hs_rt->scratch->regex_scratches);
|
||||
}
|
||||
|
||||
if (hs_inst->hs_rt->scratch->bool_match_buffs != NULL) {
|
||||
for (i = 0; i < hs_inst->n_worker_thread; i++) {
|
||||
if (hs_inst->hs_rt->scratch->bool_match_buffs[i] != NULL) {
|
||||
FREE(hs_inst->hs_rt->scratch->bool_match_buffs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
FREE(hs_inst->hs_rt->scratch->bool_match_buffs);
|
||||
}
|
||||
|
||||
FREE(hs_inst->hs_rt->scratch);
|
||||
}
|
||||
|
||||
if (hs_inst->hs_rt->bm != NULL) {
|
||||
bool_matcher_free(hs_inst->hs_rt->bm);
|
||||
hs_inst->hs_rt->bm = NULL;
|
||||
}
|
||||
|
||||
if (hs_inst->hs_rt->streams != NULL) {
|
||||
for (i = 0; i < hs_inst->n_worker_thread; i++) {
|
||||
if (hs_inst->hs_rt->streams[i] != NULL) {
|
||||
adapter_hs_stream_close(hs_inst->hs_rt->streams[i]);
|
||||
hs_inst->hs_rt->streams[i] = NULL;
|
||||
}
|
||||
}
|
||||
FREE(hs_inst->hs_rt->streams);
|
||||
}
|
||||
|
||||
FREE(hs_inst->hs_rt);
|
||||
}
|
||||
|
||||
if (hs_inst->hs_attr != NULL) {
|
||||
FREE(hs_inst->hs_attr);
|
||||
}
|
||||
|
||||
FREE(hs_inst);
|
||||
}
|
||||
|
||||
static inline int compare_pattern_id(const void *a, const void *b)
|
||||
{
|
||||
long long ret = *(const unsigned long long *)a - *(const unsigned long long *)b;
|
||||
if (ret == 0) {
|
||||
return 0;
|
||||
} else if(ret < 0) {
|
||||
return -1;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param id: pattern id
|
||||
*/
|
||||
static int matched_event_cb(unsigned int id, unsigned long long from,
|
||||
unsigned long long to, unsigned int flags,
|
||||
void *ctx)
|
||||
{
|
||||
// put id in set
|
||||
unsigned long long pattern_id = id;
|
||||
struct matched_pattern *matched_pat = (struct matched_pattern *)ctx;
|
||||
|
||||
if (id > matched_pat->n_patterns || id < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// duplicate pattern_id
|
||||
if (utarray_find(matched_pat->pattern_ids, &pattern_id, compare_pattern_id)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ret = 0;
|
||||
struct pattern_attribute pat_attr = matched_pat->ref_hs_attr[id];
|
||||
switch (pat_attr.match_mode) {
|
||||
case EXPR_MATCH_MODE_EXACTLY:
|
||||
if (0 == from && matched_pat->scan_data_len == to) {
|
||||
ret = 1;
|
||||
}
|
||||
break;
|
||||
case EXPR_MATCH_MODE_SUB:
|
||||
if (pat_attr.offset.start == -1 &&
|
||||
pat_attr.offset.end == -1) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (pat_attr.offset.start == -1) {
|
||||
if ((long long)(to - 1) <= pat_attr.offset.end) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (pat_attr.offset.end == -1) {
|
||||
if ((long long)from >= pat_attr.offset.start) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ((long long)from >= pat_attr.offset.start &&
|
||||
(long long)(to - 1) <= pat_attr.offset.end) {
|
||||
ret = 1;
|
||||
}
|
||||
break;
|
||||
case EXPR_MATCH_MODE_PREFIX:
|
||||
if (0 == from) {
|
||||
ret = 1;
|
||||
}
|
||||
break;
|
||||
case EXPR_MATCH_MODE_SUFFIX:
|
||||
if (to == matched_pat->scan_data_len) {
|
||||
ret = 1;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (1 == ret) {
|
||||
utarray_push_back(matched_pat->pattern_ids, &pattern_id);
|
||||
utarray_sort(matched_pat->pattern_ids, compare_pattern_id);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
UT_icd ut_hs_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL};
|
||||
void *adapter_hs_stream_open(void *hs_instance, int thread_id)
|
||||
{
|
||||
if (NULL == hs_instance || thread_id < 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct adapter_hs *hs_inst = (struct adapter_hs *)hs_instance;
|
||||
struct adapter_hs_stream *hs_stream = ALLOC(struct adapter_hs_stream, 1);
|
||||
hs_error_t err;
|
||||
|
||||
hs_stream->logger = hs_inst->logger;
|
||||
hs_stream->thread_id = thread_id;
|
||||
hs_stream->ref_hs_rt = hs_inst->hs_rt;
|
||||
hs_stream->matched_pat = ALLOC(struct matched_pattern, 1);
|
||||
hs_stream->matched_pat->ref_hs_attr = hs_inst->hs_attr;
|
||||
hs_stream->matched_pat->n_patterns = hs_inst->n_patterns;
|
||||
utarray_new(hs_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd);
|
||||
utarray_reserve(hs_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
|
||||
|
||||
int err_count = 0;
|
||||
if (hs_inst->hs_rt->literal_db != NULL) {
|
||||
err = hs_open_stream(hs_inst->hs_rt->literal_db, 0,
|
||||
&hs_stream->literal_stream);
|
||||
if (err != HS_SUCCESS) {
|
||||
log_error(hs_inst->logger, MODULE_ADAPTER_HS,
|
||||
"hs_open_stream failed, hs err:%d", err);
|
||||
err_count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (hs_inst->hs_rt->regex_db != NULL) {
|
||||
err = hs_open_stream(hs_inst->hs_rt->regex_db, 0,
|
||||
&hs_stream->regex_stream);
|
||||
if (err != HS_SUCCESS) {
|
||||
log_error(hs_inst->logger, MODULE_ADAPTER_HS,
|
||||
"hs_open_stream failed, hs err:%d", err);
|
||||
err_count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (err_count > 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
return hs_stream;
|
||||
error:
|
||||
if (hs_stream->literal_stream != NULL) {
|
||||
hs_close_stream(hs_stream->literal_stream, NULL, NULL, NULL);
|
||||
hs_stream->literal_stream = NULL;
|
||||
}
|
||||
|
||||
if (hs_stream->regex_stream != NULL) {
|
||||
hs_close_stream(hs_stream->regex_stream, NULL, NULL, NULL);
|
||||
hs_stream->regex_stream = NULL;
|
||||
}
|
||||
|
||||
FREE(hs_stream);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void adapter_hs_stream_close(void *hs_stream)
|
||||
{
|
||||
if (NULL == hs_stream) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct adapter_hs_stream *stream = (struct adapter_hs_stream *)hs_stream;
|
||||
if (stream->ref_hs_rt != NULL) {
|
||||
if (stream->literal_stream != NULL) {
|
||||
hs_close_stream(stream->literal_stream, NULL, NULL, NULL);
|
||||
stream->literal_stream = NULL;
|
||||
}
|
||||
|
||||
if (stream->regex_stream != NULL) {
|
||||
hs_close_stream(stream->regex_stream, NULL, NULL, NULL);
|
||||
stream->regex_stream = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* stream->hs_rt point to hs_instance->hs_rt which will call free
|
||||
same as hs_attr */
|
||||
stream->ref_hs_rt = NULL;
|
||||
stream->matched_pat->ref_hs_attr = NULL;
|
||||
|
||||
if (stream->matched_pat->pattern_ids != NULL) {
|
||||
utarray_free(stream->matched_pat->pattern_ids);
|
||||
stream->matched_pat->pattern_ids = NULL;
|
||||
}
|
||||
|
||||
FREE(stream->matched_pat);
|
||||
FREE(stream);
|
||||
}
|
||||
|
||||
static void adapter_hs_stream_reset(struct adapter_hs_stream *hs_stream)
|
||||
{
|
||||
if (NULL == hs_stream) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct adapter_hs_scratch *scratch = hs_stream->ref_hs_rt->scratch;
|
||||
if (hs_stream->literal_stream != NULL) {
|
||||
hs_reset_stream(hs_stream->literal_stream, 0,
|
||||
scratch->literal_scratches[hs_stream->thread_id],
|
||||
matched_event_cb, hs_stream->matched_pat);
|
||||
}
|
||||
|
||||
if (hs_stream->regex_stream != NULL) {
|
||||
hs_reset_stream(hs_stream->regex_stream, 0,
|
||||
scratch->regex_scratches[hs_stream->thread_id],
|
||||
matched_event_cb, hs_stream->matched_pat);
|
||||
}
|
||||
|
||||
utarray_clear(hs_stream->matched_pat->pattern_ids);
|
||||
}
|
||||
|
||||
int adapter_hs_scan_stream(void *hs_stream, const char *data, size_t data_len,
|
||||
struct expr_scan_result *results, size_t n_result,
|
||||
size_t *n_hit_result)
|
||||
{
|
||||
hs_error_t err;
|
||||
|
||||
if (NULL == hs_stream || NULL == data || 0 == data_len ||
|
||||
NULL == results || 0 == n_result || NULL == n_hit_result) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
In streaming mode, a non-zero return from the user-specified event-handler
|
||||
function has consequences for the rest of that stream's lifetime: when a
|
||||
non-zero return occurs, it signals that no more of the stream should be
|
||||
scanned. Consequently if the user makes a subsequent call to
|
||||
`hs_scan_stream` on a stream whose processing was terminated in this way,
|
||||
hs_scan_stream will return `HS_SCAN_TERMINATED`. This case has not been
|
||||
demonstrated in pcapscan, as its callback always returns 0.
|
||||
*/
|
||||
|
||||
int err_count = 0;
|
||||
struct adapter_hs_stream *stream = (struct adapter_hs_stream *)hs_stream;
|
||||
int thread_id = stream->thread_id;
|
||||
struct adapter_hs_scratch *scratch = stream->ref_hs_rt->scratch;
|
||||
stream->matched_pat->scan_data_len = data_len;
|
||||
|
||||
int err_scratch_flag = 0;
|
||||
if (stream->literal_stream != NULL) {
|
||||
if (scratch->literal_scratches != NULL) {
|
||||
err = hs_scan_stream(stream->literal_stream, data, data_len,
|
||||
0, scratch->literal_scratches[thread_id],
|
||||
matched_event_cb, stream->matched_pat);
|
||||
if (err != HS_SUCCESS) {
|
||||
err_count++;
|
||||
}
|
||||
} else {
|
||||
log_error(stream->logger, MODULE_ADAPTER_HS,
|
||||
"literal_scratches is null, thread_id:%d", thread_id);
|
||||
err_scratch_flag++;
|
||||
}
|
||||
}
|
||||
|
||||
if (stream->regex_stream != NULL) {
|
||||
if (scratch->regex_scratches != NULL) {
|
||||
err = hs_scan_stream(stream->regex_stream, data, data_len,
|
||||
0, scratch->regex_scratches[thread_id],
|
||||
matched_event_cb, stream->matched_pat);
|
||||
if (err != HS_SUCCESS) {
|
||||
err_count++;
|
||||
}
|
||||
} else {
|
||||
log_error(stream->logger, MODULE_ADAPTER_HS,
|
||||
"regex_scratches is null, thread_id:%d", thread_id);
|
||||
err_scratch_flag++;
|
||||
}
|
||||
}
|
||||
|
||||
if (err_count == 2) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (err_scratch_flag != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t n_pattern_id = utarray_len(stream->matched_pat->pattern_ids);
|
||||
if (0 == n_pattern_id) {
|
||||
*n_hit_result = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned long long pattern_ids[n_pattern_id];
|
||||
|
||||
for (size_t i = 0; i < n_pattern_id; i++) {
|
||||
pattern_ids[i] = *(unsigned long long *)utarray_eltptr(stream->matched_pat->pattern_ids, i);
|
||||
}
|
||||
|
||||
int ret = 0;
|
||||
struct bool_expr_match *bool_matcher_results = scratch->bool_match_buffs[thread_id];
|
||||
int bool_matcher_ret = bool_matcher_match(stream->ref_hs_rt->bm, pattern_ids, n_pattern_id,
|
||||
bool_matcher_results, MAX_HIT_EXPR_NUM);
|
||||
if (bool_matcher_ret < 0) {
|
||||
ret = -1;
|
||||
goto next;
|
||||
}
|
||||
|
||||
if (bool_matcher_ret > (int)n_result) {
|
||||
bool_matcher_ret = n_result;
|
||||
}
|
||||
|
||||
for (int index = 0; index < bool_matcher_ret; index++) {
|
||||
results[index].rule_id = bool_matcher_results[index].expr_id;
|
||||
results[index].user_tag = bool_matcher_results[index].user_tag;
|
||||
}
|
||||
*n_hit_result = bool_matcher_ret;
|
||||
|
||||
next:
|
||||
utarray_clear(stream->matched_pat->pattern_ids);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int adapter_hs_scan(void *hs_instance, int thread_id, const char *data, size_t data_len,
|
||||
struct expr_scan_result *results, size_t n_result, size_t *n_hit_result)
|
||||
{
|
||||
if (NULL == hs_instance || NULL == data || (0 == data_len) ||
|
||||
NULL == results || 0 == n_result || NULL == n_hit_result) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct adapter_hs *hs_inst = (struct adapter_hs *)hs_instance;
|
||||
struct adapter_hs_stream *hs_stream = hs_inst->hs_rt->streams[thread_id];
|
||||
assert(hs_stream != NULL);
|
||||
|
||||
adapter_hs_stream_reset(hs_stream);
|
||||
return adapter_hs_scan_stream(hs_stream, data, data_len, results, n_result, n_hit_result);
|
||||
}
|
||||
75
scanner/expr_matcher/adapter_hs/adapter_hs.h
Normal file
75
scanner/expr_matcher/adapter_hs/adapter_hs.h
Normal file
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
**********************************************************************************************
|
||||
* File: adapter_hs.h
|
||||
* Description:
|
||||
* Authors: Liu wentan <liuwentan@geedgenetworks.com>
|
||||
* Date: 2022-10-31
|
||||
* Copyright: (c) Since 2022 Geedge Networks, Ltd. All rights reserved.
|
||||
***********************************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ADAPTER_HS_H_
|
||||
#define _ADAPTER_HS_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "log/log.h"
|
||||
#include "../expr_matcher.h"
|
||||
|
||||
int adapter_hs_verify_regex_expression(const char *regex_expr, struct log_handle *logger);
|
||||
/**
|
||||
* @brief new adapter_hs instance
|
||||
*
|
||||
* @param rules: logic AND expression's array
|
||||
* @param n_rule: the number of logic AND expression's array
|
||||
* @param nr_worker_threads: the number of scan threads which will call adapter_hs_scan()
|
||||
*
|
||||
* @retval the pointer to adapter_hs instance
|
||||
*/
|
||||
void *adapter_hs_new(struct expr_rule *rules, size_t n_rule,
|
||||
size_t n_literal_pattern, size_t n_regex_pattern,
|
||||
size_t n_worker_thread, struct log_handle *logger);
|
||||
|
||||
/**
|
||||
* @brief scan input data to match logic AND expression, return all matched expr_id
|
||||
*
|
||||
* @param instance: adapter_hs instance obtained by adapter_hs_new()
|
||||
* @param thread_id: the thread_id of caller
|
||||
* @param data: data to be scanned
|
||||
* @param data_len: the length of data to be scanned
|
||||
* @param results: the array of expr_id
|
||||
* @param n_results: number of elements in array of expr_id
|
||||
*/
|
||||
int adapter_hs_scan(void *hs_instance, int thread_id, const char *data, size_t data_len,
|
||||
struct expr_scan_result *results, size_t n_result, size_t *n_hit_result);
|
||||
|
||||
/**
|
||||
* @brief destroy adapter_hs instance
|
||||
*
|
||||
* @param instance: adapter_hs instance obtained by adapter_hs_new()
|
||||
*/
|
||||
void adapter_hs_free(void *instance);
|
||||
|
||||
/**
|
||||
* @brief open adapter_hs stream after adapter_hs instance initialized for stream scan
|
||||
*
|
||||
*/
|
||||
void *adapter_hs_stream_open(void *hs_instance, int thread_id);
|
||||
|
||||
int adapter_hs_scan_stream(void *stream, const char *data, size_t data_len,
|
||||
struct expr_scan_result *results, size_t n_result,
|
||||
size_t *n_hit_result);
|
||||
|
||||
void adapter_hs_stream_close(void *stream);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
708
scanner/expr_matcher/adapter_rs/adapter_rs.cpp
Normal file
708
scanner/expr_matcher/adapter_rs/adapter_rs.cpp
Normal file
@@ -0,0 +1,708 @@
|
||||
/*
|
||||
**********************************************************************************************
|
||||
* File: adapter_rs.cpp
|
||||
* Description:
|
||||
* Authors: Liu wentan <liuwentan@geedgenetworks.com>
|
||||
* Date: 2022-10-31
|
||||
* Copyright: (c) Since 2022 Geedge Networks, Ltd. All rights reserved.
|
||||
***********************************************************************************************
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <assert.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include "rulescan.h"
|
||||
#include "adapter_rs.h"
|
||||
#include "uthash/utarray.h"
|
||||
#include "uthash/uthash.h"
|
||||
#include "maat_utils.h"
|
||||
#include "../../bool_matcher/bool_matcher.h"
|
||||
|
||||
#define MAX_HIT_PATTERN_NUM 512
|
||||
|
||||
pid_t rs_gettid()
|
||||
{
|
||||
return syscall(SYS_gettid);
|
||||
}
|
||||
|
||||
static const char *rs_module_name_str(const char *name)
|
||||
{
|
||||
static __thread char module[64];
|
||||
snprintf(module, sizeof(module), "%s(%d)", name, rs_gettid());
|
||||
|
||||
return module;
|
||||
}
|
||||
|
||||
#define MODULE_ADAPTER_RS rs_module_name_str("maat.adapter_rs")
|
||||
|
||||
struct adpt_rs_compile_data {
|
||||
struct scan_pattern *patterns;
|
||||
size_t n_patterns;
|
||||
};
|
||||
|
||||
struct adapter_rs_stream {
|
||||
int thread_id;
|
||||
size_t offset; /* current stream offset */
|
||||
rs_stream_t *literal_stream;
|
||||
rs_stream_t *regex_stream;
|
||||
struct adapter_rs_runtime *ref_rs_rt;
|
||||
|
||||
struct log_handle *logger;
|
||||
};
|
||||
|
||||
/* adapter_rs runtime */
|
||||
struct adapter_rs_runtime {
|
||||
rs_database_t *literal_db;
|
||||
rs_database_t *regex_db;
|
||||
|
||||
struct bool_expr_match **bool_match_buffs; /* per thread */
|
||||
struct adapter_rs_stream **streams; /* per thread */
|
||||
struct matched_pattern **matched_pats; /* per thread */
|
||||
struct bool_matcher *bm;
|
||||
};
|
||||
|
||||
/* adapter_rs instance */
|
||||
struct adapter_rs {
|
||||
size_t n_worker_thread;
|
||||
size_t n_expr;
|
||||
size_t n_patterns;
|
||||
struct adapter_rs_runtime *rs_rt;
|
||||
struct pattern_attribute *rs_attr;
|
||||
struct log_handle *logger;
|
||||
};
|
||||
|
||||
struct pattern_offset {
|
||||
long long start;
|
||||
long long end;
|
||||
};
|
||||
|
||||
struct pattern_attribute {
|
||||
long long pattern_id;
|
||||
enum expr_match_mode match_mode;
|
||||
struct pattern_offset offset;
|
||||
size_t pattern_len;
|
||||
};
|
||||
|
||||
struct matched_pattern {
|
||||
UT_array *pattern_ids;
|
||||
size_t n_patterns;
|
||||
struct pattern_attribute *ref_rs_attr;
|
||||
};
|
||||
|
||||
int adapter_rs_verify_regex_expression(const char *regex_expr,
|
||||
struct log_handle *logger)
|
||||
{
|
||||
int ret = rs_verify_regex(regex_expr);
|
||||
if (ret == 0) {
|
||||
log_error(logger, MODULE_ADAPTER_RS,
|
||||
"[%s:%d] illegal regex expression: \"%s\"",
|
||||
__FUNCTION__, __LINE__, regex_expr);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
/**
|
||||
* @brief build rs database for literal string and regex expression respectively
|
||||
*
|
||||
* @retval 0(success) -1(failed)
|
||||
*/
|
||||
static int adpt_rs_build_database(struct adapter_rs_runtime *rs_rt,
|
||||
size_t n_worker_thread,
|
||||
struct adpt_rs_compile_data *literal_cd,
|
||||
struct adpt_rs_compile_data *regex_cd,
|
||||
struct log_handle *logger)
|
||||
{
|
||||
if (NULL == rs_rt) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int ret = 0;
|
||||
if (literal_cd != NULL) {
|
||||
ret = rs_compile_lit(literal_cd->patterns, literal_cd->n_patterns,
|
||||
&rs_rt->literal_db);
|
||||
if (ret < 0) {
|
||||
log_error(logger, MODULE_ADAPTER_RS, "[%s:%d] compile error",
|
||||
__FUNCTION__, __LINE__);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (regex_cd != NULL) {
|
||||
size_t n_failed_pats = 0;
|
||||
ret = rs_compile_regex(regex_cd->patterns, regex_cd->n_patterns,
|
||||
n_worker_thread, &rs_rt->regex_db, &n_failed_pats);
|
||||
if (ret < 0) {
|
||||
log_error(logger, MODULE_ADAPTER_RS, "[%s:%d] compile error",
|
||||
__FUNCTION__, __LINE__);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct adpt_rs_compile_data *adpt_rs_compile_data_new(size_t n_patterns)
|
||||
{
|
||||
struct adpt_rs_compile_data *rs_cd = ALLOC(struct adpt_rs_compile_data, 1);
|
||||
rs_cd->patterns = ALLOC(struct scan_pattern, n_patterns);
|
||||
rs_cd->n_patterns = n_patterns;
|
||||
|
||||
return rs_cd;
|
||||
}
|
||||
|
||||
static void adpt_rs_compile_data_free(struct adpt_rs_compile_data *rs_cd)
|
||||
{
|
||||
if (NULL == rs_cd) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (rs_cd->patterns != NULL) {
|
||||
for (size_t i = 0; i < rs_cd->n_patterns; i++) {
|
||||
if (rs_cd->patterns[i].pattern != NULL) {
|
||||
FREE(rs_cd->patterns[i].pattern);
|
||||
}
|
||||
}
|
||||
|
||||
FREE(rs_cd->patterns);
|
||||
}
|
||||
|
||||
FREE(rs_cd);
|
||||
}
|
||||
|
||||
static void populate_compile_data(struct adpt_rs_compile_data *compile_data,
|
||||
size_t index, long long pattern_id, char *pat,
|
||||
size_t pat_len, int case_sensitive)
|
||||
{
|
||||
compile_data->patterns[index].id = pattern_id;
|
||||
compile_data->patterns[index].case_sensitive = case_sensitive;
|
||||
compile_data->patterns[index].pattern = ALLOC(char, pat_len + 1);
|
||||
memcpy(compile_data->patterns[index].pattern, pat, pat_len);
|
||||
compile_data->patterns[index].pattern_len = pat_len;
|
||||
}
|
||||
|
||||
static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule,
|
||||
struct pattern_attribute *pattern_attr,
|
||||
struct adpt_rs_compile_data *literal_cd,
|
||||
struct adpt_rs_compile_data *regex_cd,
|
||||
size_t *n_pattern)
|
||||
{
|
||||
long long pattern_idx = 0;
|
||||
size_t literal_idx = 0;
|
||||
size_t regex_idx = 0;
|
||||
|
||||
struct bool_expr *bool_exprs = ALLOC(struct bool_expr, n_rule);
|
||||
|
||||
/* populate adpt_rs_compile_data and bool_expr */
|
||||
for (size_t i = 0; i < n_rule; i++) {
|
||||
|
||||
for (size_t j = 0; j < rules[i].n_patterns; j++) {
|
||||
pattern_attr[pattern_idx].pattern_id = pattern_idx;
|
||||
pattern_attr[pattern_idx].match_mode = rules[i].patterns[j].match_mode;
|
||||
pattern_attr[pattern_idx].pattern_len = rules[i].patterns[j].pat_len;
|
||||
|
||||
if (pattern_attr[pattern_idx].match_mode == EXPR_MATCH_MODE_SUB ||
|
||||
pattern_attr[pattern_idx].match_mode == EXPR_MATCH_MODE_EXACTLY) {
|
||||
pattern_attr[pattern_idx].offset.start = rules[i].patterns[j].start_offset;
|
||||
pattern_attr[pattern_idx].offset.end = rules[i].patterns[j].end_offset;
|
||||
}
|
||||
|
||||
/* literal pattern */
|
||||
if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) {
|
||||
populate_compile_data(literal_cd, literal_idx, pattern_idx,
|
||||
rules[i].patterns[j].pat, rules[i].patterns[j].pat_len,
|
||||
rules[i].patterns[j].case_sensitive);
|
||||
literal_idx++;
|
||||
} else {
|
||||
/* regex pattern */
|
||||
populate_compile_data(regex_cd, regex_idx, pattern_idx,
|
||||
rules[i].patterns[j].pat, rules[i].patterns[j].pat_len,
|
||||
rules[i].patterns[j].case_sensitive);
|
||||
regex_idx++;
|
||||
}
|
||||
|
||||
bool_exprs[i].items[j].item_id = pattern_idx++;
|
||||
bool_exprs[i].items[j].not_flag = 0;
|
||||
}
|
||||
|
||||
bool_exprs[i].expr_id = rules[i].expr_id;
|
||||
bool_exprs[i].item_num = rules[i].n_patterns;
|
||||
bool_exprs[i].user_tag = rules[i].tag;
|
||||
}
|
||||
|
||||
*n_pattern = pattern_idx;
|
||||
|
||||
return bool_exprs;
|
||||
}
|
||||
|
||||
UT_icd ut_rs_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL};
|
||||
void *adapter_rs_new(struct expr_rule *rules, size_t n_rule,
|
||||
size_t n_literal_pattern, size_t n_regex_pattern,
|
||||
size_t n_worker_thread, struct log_handle *logger)
|
||||
{
|
||||
/* get the sum of pattern */
|
||||
size_t i = 0;
|
||||
struct adpt_rs_compile_data *literal_cd = NULL;
|
||||
struct adpt_rs_compile_data *regex_cd = NULL;
|
||||
|
||||
if (n_literal_pattern > 0) {
|
||||
literal_cd = adpt_rs_compile_data_new(n_literal_pattern);
|
||||
}
|
||||
|
||||
if (n_regex_pattern > 0) {
|
||||
regex_cd = adpt_rs_compile_data_new(n_regex_pattern);
|
||||
}
|
||||
|
||||
size_t pattern_cnt = n_literal_pattern + n_regex_pattern;
|
||||
struct adapter_rs *rs_inst = ALLOC(struct adapter_rs, 1);
|
||||
rs_inst->rs_attr = ALLOC(struct pattern_attribute, pattern_cnt);
|
||||
rs_inst->logger = logger;
|
||||
rs_inst->n_worker_thread = n_worker_thread;
|
||||
rs_inst->n_expr = n_rule;
|
||||
|
||||
struct bool_expr *bool_exprs = bool_exprs_new(rules, n_rule, rs_inst->rs_attr,
|
||||
literal_cd, regex_cd, &pattern_cnt);
|
||||
if (NULL == bool_exprs) {
|
||||
return NULL;
|
||||
}
|
||||
rs_inst->n_patterns = pattern_cnt;
|
||||
|
||||
/* create bool matcher */
|
||||
size_t mem_size = 0;
|
||||
int rs_ret = 0;
|
||||
|
||||
rs_inst->rs_rt = ALLOC(struct adapter_rs_runtime, 1);
|
||||
|
||||
//rs_rt->bm
|
||||
rs_inst->rs_rt->bm = bool_matcher_new(bool_exprs, n_rule, &mem_size);
|
||||
if (rs_inst->rs_rt->bm != NULL) {
|
||||
log_info(logger, MODULE_ADAPTER_RS,
|
||||
"Adapter_rs module: build bool matcher of %zu expressions"
|
||||
" with %zu bytes memory", n_rule, mem_size);
|
||||
} else {
|
||||
log_error(logger, MODULE_ADAPTER_RS,
|
||||
"[%s:%d] Adapter_rs module: build bool matcher failed",
|
||||
__FUNCTION__, __LINE__);
|
||||
|
||||
rs_ret = -1;
|
||||
}
|
||||
FREE(bool_exprs);
|
||||
|
||||
/* build rs database rs_rt->literal_db & rs_rt->regex_db */
|
||||
int ret = adpt_rs_build_database(rs_inst->rs_rt, n_worker_thread,
|
||||
literal_cd, regex_cd, logger);
|
||||
if (ret < 0) {
|
||||
rs_ret = -1;
|
||||
}
|
||||
|
||||
if (literal_cd != NULL) {
|
||||
adpt_rs_compile_data_free(literal_cd);
|
||||
}
|
||||
|
||||
if (regex_cd != NULL) {
|
||||
adpt_rs_compile_data_free(regex_cd);
|
||||
}
|
||||
|
||||
if (rs_ret < 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* alloc scratch */
|
||||
rs_inst->rs_rt->bool_match_buffs = ALLOC(struct bool_expr_match *, n_worker_thread);
|
||||
for (i = 0; i < n_worker_thread; i++) {
|
||||
rs_inst->rs_rt->bool_match_buffs[i] = ALLOC(struct bool_expr_match, MAX_HIT_EXPR_NUM);
|
||||
}
|
||||
|
||||
rs_inst->rs_rt->streams = ALLOC(struct adapter_rs_stream *, n_worker_thread);
|
||||
for (i = 0; i < n_worker_thread; i++) {
|
||||
rs_inst->rs_rt->streams[i] = (struct adapter_rs_stream *)adapter_rs_stream_open(rs_inst, i);
|
||||
}
|
||||
|
||||
rs_inst->rs_rt->matched_pats = ALLOC(struct matched_pattern *, n_worker_thread);
|
||||
for (i = 0; i < n_worker_thread; i++) {
|
||||
rs_inst->rs_rt->matched_pats[i] = ALLOC(struct matched_pattern, 1);
|
||||
rs_inst->rs_rt->matched_pats[i]->ref_rs_attr = rs_inst->rs_attr;
|
||||
rs_inst->rs_rt->matched_pats[i]->n_patterns = rs_inst->n_patterns;
|
||||
utarray_new(rs_inst->rs_rt->matched_pats[i]->pattern_ids, &ut_rs_pattern_id_icd);
|
||||
utarray_reserve(rs_inst->rs_rt->matched_pats[i]->pattern_ids, MAX_HIT_PATTERN_NUM);
|
||||
}
|
||||
|
||||
return rs_inst;
|
||||
error:
|
||||
adapter_rs_free(rs_inst);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void adapter_rs_free(void *rs_instance)
|
||||
{
|
||||
if (NULL == rs_instance) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t i = 0;
|
||||
struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance;
|
||||
|
||||
if (rs_inst->rs_rt != NULL) {
|
||||
if (rs_inst->rs_rt->literal_db != NULL) {
|
||||
rs_free_database(rs_inst->rs_rt->literal_db);
|
||||
rs_inst->rs_rt->literal_db = NULL;
|
||||
}
|
||||
|
||||
if (rs_inst->rs_rt->regex_db != NULL) {
|
||||
rs_free_database(rs_inst->rs_rt->regex_db);
|
||||
rs_inst->rs_rt->regex_db = NULL;
|
||||
}
|
||||
|
||||
if (rs_inst->rs_rt->bool_match_buffs != NULL) {
|
||||
for (i = 0; i < rs_inst->n_worker_thread; i++) {
|
||||
if (rs_inst->rs_rt->bool_match_buffs[i] != NULL) {
|
||||
FREE(rs_inst->rs_rt->bool_match_buffs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
FREE(rs_inst->rs_rt->bool_match_buffs);
|
||||
}
|
||||
|
||||
if (rs_inst->rs_rt->bm != NULL) {
|
||||
bool_matcher_free(rs_inst->rs_rt->bm);
|
||||
rs_inst->rs_rt->bm = NULL;
|
||||
}
|
||||
|
||||
if (rs_inst->rs_rt->streams != NULL) {
|
||||
for (i = 0; i < rs_inst->n_worker_thread; i++) {
|
||||
if (rs_inst->rs_rt->streams[i] != NULL) {
|
||||
adapter_rs_stream_close(rs_inst->rs_rt->streams[i]);
|
||||
rs_inst->rs_rt->streams[i] = NULL;
|
||||
}
|
||||
}
|
||||
FREE(rs_inst->rs_rt->streams);
|
||||
}
|
||||
|
||||
if (rs_inst->rs_rt->matched_pats != NULL) {
|
||||
for (i = 0; i < rs_inst->n_worker_thread; i++) {
|
||||
if (rs_inst->rs_rt->matched_pats[i] != NULL) {
|
||||
utarray_free(rs_inst->rs_rt->matched_pats[i]->pattern_ids);
|
||||
FREE(rs_inst->rs_rt->matched_pats[i]);
|
||||
}
|
||||
}
|
||||
FREE(rs_inst->rs_rt->matched_pats);
|
||||
}
|
||||
|
||||
FREE(rs_inst->rs_rt);
|
||||
}
|
||||
|
||||
if (rs_inst->rs_attr != NULL) {
|
||||
FREE(rs_inst->rs_attr);
|
||||
}
|
||||
|
||||
FREE(rs_inst);
|
||||
}
|
||||
|
||||
static inline int compare_pattern_id(const void *a, const void *b)
|
||||
{
|
||||
long long ret = *(const unsigned long long *)a - *(const unsigned long long *)b;
|
||||
if (ret == 0) {
|
||||
return 0;
|
||||
} else if(ret < 0) {
|
||||
return -1;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param id: pattern id
|
||||
*/
|
||||
static int matched_event_cb(unsigned int id, int pos_offset, int from, int to,
|
||||
size_t data_len, void *ctx)
|
||||
{
|
||||
// put id in set
|
||||
unsigned long long pattern_id = id;
|
||||
struct matched_pattern *matched_pat = (struct matched_pattern *)ctx;
|
||||
|
||||
if (pattern_id > matched_pat->n_patterns || id < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// duplicate pattern_id
|
||||
if (utarray_find(matched_pat->pattern_ids, &pattern_id, compare_pattern_id)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ret = 0;
|
||||
struct pattern_attribute pat_attr = matched_pat->ref_rs_attr[id];
|
||||
|
||||
switch (pat_attr.match_mode) {
|
||||
case EXPR_MATCH_MODE_EXACTLY:
|
||||
if (0 == (from + pos_offset) && (int)data_len == (to + pos_offset)) {
|
||||
ret = 1;
|
||||
}
|
||||
break;
|
||||
case EXPR_MATCH_MODE_SUB:
|
||||
if (pat_attr.offset.start == -1 &&
|
||||
pat_attr.offset.end == -1) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (pat_attr.offset.start == -1) {
|
||||
if ((long long)(to + pos_offset - 1) <= pat_attr.offset.end) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (pat_attr.offset.end == -1) {
|
||||
if ((long long)(from + pos_offset) >= pat_attr.offset.start) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ((long long)(from + pos_offset) >= pat_attr.offset.start &&
|
||||
(long long)(to + pos_offset - 1) <= pat_attr.offset.end) {
|
||||
ret = 1;
|
||||
}
|
||||
break;
|
||||
case EXPR_MATCH_MODE_PREFIX:
|
||||
if (0 == (from + pos_offset)) {
|
||||
ret = 1;
|
||||
}
|
||||
break;
|
||||
case EXPR_MATCH_MODE_SUFFIX:
|
||||
if ((to + pos_offset) == (int)data_len) {
|
||||
ret = 1;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (1 == ret) {
|
||||
utarray_push_back(matched_pat->pattern_ids, &pattern_id);
|
||||
utarray_sort(matched_pat->pattern_ids, compare_pattern_id);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *adapter_rs_stream_open(void *rs_instance, int thread_id)
|
||||
{
|
||||
if (NULL == rs_instance || thread_id < 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance;
|
||||
struct adapter_rs_stream *rs_stream = ALLOC(struct adapter_rs_stream, 1);
|
||||
|
||||
rs_stream->logger = rs_inst->logger;
|
||||
rs_stream->thread_id = thread_id;
|
||||
rs_stream->ref_rs_rt = rs_inst->rs_rt;
|
||||
|
||||
int err_count = 0;
|
||||
if (rs_inst->rs_rt->literal_db != NULL) {
|
||||
rs_stream->literal_stream = rs_open_stream(rs_inst->rs_rt->literal_db, 0, 128);
|
||||
if (NULL == rs_stream->literal_stream) {
|
||||
log_error(rs_inst->logger, MODULE_ADAPTER_RS, "rs_open_stream failed");
|
||||
err_count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (rs_inst->rs_rt->regex_db != NULL) {
|
||||
rs_stream->regex_stream = rs_open_stream(rs_inst->rs_rt->regex_db, 0, 128);
|
||||
if (NULL == rs_stream->regex_stream) {
|
||||
log_error(rs_inst->logger, MODULE_ADAPTER_RS, "rs_open_stream failed");
|
||||
err_count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (err_count > 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
return rs_stream;
|
||||
error:
|
||||
if (rs_stream->literal_stream != NULL) {
|
||||
rs_close_stream(rs_stream->literal_stream);
|
||||
rs_stream->literal_stream = NULL;
|
||||
}
|
||||
|
||||
if (rs_stream->regex_stream != NULL) {
|
||||
rs_close_stream(rs_stream->regex_stream);
|
||||
rs_stream->regex_stream = NULL;
|
||||
}
|
||||
|
||||
FREE(rs_stream);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void adapter_rs_stream_close(void *rs_stream)
|
||||
{
|
||||
if (NULL == rs_stream) {
|
||||
return;
|
||||
}
|
||||
|
||||
struct adapter_rs_stream *stream = (struct adapter_rs_stream *)rs_stream;
|
||||
if (stream->ref_rs_rt != NULL) {
|
||||
if (stream->literal_stream != NULL) {
|
||||
rs_close_stream(stream->literal_stream);
|
||||
stream->literal_stream = NULL;
|
||||
}
|
||||
|
||||
if (stream->regex_stream != NULL) {
|
||||
rs_close_stream(stream->regex_stream);
|
||||
stream->regex_stream = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* rs_stream->rs_rt point to rs_instance->rs_rt which will call free
|
||||
same as rs_attr */
|
||||
stream->ref_rs_rt = NULL;
|
||||
FREE(stream);
|
||||
}
|
||||
|
||||
int adapter_rs_scan_stream(void *rs_stream, const char *data, size_t data_len,
|
||||
struct expr_scan_result *results, size_t n_result,
|
||||
size_t *n_hit_result)
|
||||
{
|
||||
if (NULL == rs_stream || NULL == data || 0 == data_len ||
|
||||
NULL == results || 0 == n_result || NULL == n_hit_result) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int ret = 0, err_count = 0;
|
||||
struct adapter_rs_stream *stream = (struct adapter_rs_stream *)rs_stream;
|
||||
int thread_id = stream->thread_id;
|
||||
struct adapter_rs_runtime *rs_rt = stream->ref_rs_rt;
|
||||
struct matched_pattern *matched_pat = rs_rt->matched_pats[thread_id];
|
||||
|
||||
if (stream->literal_stream != NULL) {
|
||||
ret = rs_scan_stream(stream->literal_stream, data, data_len,
|
||||
matched_event_cb, matched_pat);
|
||||
if (ret < 0) {
|
||||
err_count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (stream->regex_stream != NULL) {
|
||||
ret = rs_scan_stream(stream->regex_stream, data, data_len,
|
||||
matched_event_cb, matched_pat);
|
||||
if (ret < 0) {
|
||||
err_count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (err_count == 2) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t n_pattern_id = utarray_len(matched_pat->pattern_ids);
|
||||
if (0 == n_pattern_id) {
|
||||
*n_hit_result = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned long long pattern_ids[n_pattern_id];
|
||||
|
||||
for (size_t i = 0; i < n_pattern_id; i++) {
|
||||
pattern_ids[i] = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i);
|
||||
}
|
||||
|
||||
struct bool_expr_match *bool_matcher_results = rs_rt->bool_match_buffs[thread_id];
|
||||
int bool_matcher_ret = bool_matcher_match(rs_rt->bm, pattern_ids, n_pattern_id,
|
||||
bool_matcher_results, MAX_HIT_EXPR_NUM);
|
||||
if (bool_matcher_ret < 0) {
|
||||
ret = -1;
|
||||
goto next;
|
||||
}
|
||||
|
||||
if (bool_matcher_ret > (int)n_result) {
|
||||
bool_matcher_ret = n_result;
|
||||
}
|
||||
|
||||
for (int index = 0; index < bool_matcher_ret; index++) {
|
||||
results[index].rule_id = bool_matcher_results[index].expr_id;
|
||||
results[index].user_tag = bool_matcher_results[index].user_tag;
|
||||
}
|
||||
*n_hit_result = bool_matcher_ret;
|
||||
|
||||
next:
|
||||
utarray_clear(matched_pat->pattern_ids);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int adapter_rs_scan(void *rs_instance, int thread_id, const char *data, size_t data_len,
|
||||
struct expr_scan_result *results, size_t n_result, size_t *n_hit_result)
|
||||
{
|
||||
if (NULL == rs_instance || NULL == data || (0 == data_len) ||
|
||||
NULL == results || 0 == n_result || NULL == n_hit_result) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int ret = 0, err_count = 0;
|
||||
struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance;
|
||||
struct adapter_rs_runtime *rs_rt = rs_inst->rs_rt;
|
||||
struct matched_pattern *matched_pat = rs_rt->matched_pats[thread_id];
|
||||
|
||||
if (rs_rt->literal_db != NULL) {
|
||||
ret = rs_scan(rs_rt->literal_db, thread_id, data, data_len,
|
||||
0, matched_event_cb, matched_pat);
|
||||
if (ret < 0) {
|
||||
err_count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (rs_rt->regex_db != NULL) {
|
||||
ret = rs_scan(rs_rt->regex_db, thread_id, data, data_len,
|
||||
0, matched_event_cb, matched_pat);
|
||||
if (ret < 0) {
|
||||
err_count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (err_count == 2) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t n_pattern_id = utarray_len(matched_pat->pattern_ids);
|
||||
if (0 == n_pattern_id) {
|
||||
*n_hit_result = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned long long pattern_ids[n_pattern_id];
|
||||
for (size_t i = 0; i < n_pattern_id; i++) {
|
||||
pattern_ids[i] = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i);
|
||||
}
|
||||
|
||||
struct bool_expr_match *bool_matcher_results = rs_rt->bool_match_buffs[thread_id];
|
||||
int bool_matcher_ret = bool_matcher_match(rs_rt->bm, pattern_ids, n_pattern_id,
|
||||
bool_matcher_results, MAX_HIT_EXPR_NUM);
|
||||
if (bool_matcher_ret < 0) {
|
||||
ret = -1;
|
||||
goto next;
|
||||
}
|
||||
|
||||
if (bool_matcher_ret > (int)n_result) {
|
||||
bool_matcher_ret = n_result;
|
||||
}
|
||||
|
||||
for (int index = 0; index < bool_matcher_ret; index++) {
|
||||
results[index].rule_id = bool_matcher_results[index].expr_id;
|
||||
results[index].user_tag = bool_matcher_results[index].user_tag;
|
||||
}
|
||||
*n_hit_result = bool_matcher_ret;
|
||||
|
||||
next:
|
||||
utarray_clear(matched_pat->pattern_ids);
|
||||
|
||||
return ret;
|
||||
}
|
||||
78
scanner/expr_matcher/adapter_rs/adapter_rs.h
Normal file
78
scanner/expr_matcher/adapter_rs/adapter_rs.h
Normal file
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
**********************************************************************************************
|
||||
* File: adapter_rs.h
|
||||
* Description:
|
||||
* Authors: Liu wentan <liuwentan@geedgenetworks.com>
|
||||
* Date: 2023-06-30
|
||||
* Copyright: (c) Since 2022 Geedge Networks, Ltd. All rights reserved.
|
||||
***********************************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _ADAPTER_RS_H_
|
||||
#define _ADAPTER_RS_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "log/log.h"
|
||||
|
||||
#include "../expr_matcher.h"
|
||||
|
||||
int adapter_rs_verify_regex_expression(const char *regex_expr,
|
||||
struct log_handle *logger);
|
||||
|
||||
/**
|
||||
* @brief new adapter_rs instance
|
||||
*
|
||||
* @param rules: logic AND expression's array
|
||||
* @param n_rule: the number of logic AND expression's array
|
||||
* @param n_worker_threads: the number of scan threads which will call adapter_rs_scan()
|
||||
*
|
||||
* @retval the pointer to adapter_rs instance
|
||||
*/
|
||||
void *adapter_rs_new(struct expr_rule *rules, size_t n_rule,
|
||||
size_t n_literal_pattern, size_t n_regex_pattern,
|
||||
size_t n_worker_thread, struct log_handle *logger);
|
||||
|
||||
void adapter_rs_free(void *rs_instance);
|
||||
|
||||
/**
|
||||
* @brief scan input data to match logic AND expression, return all matched expr_id
|
||||
*
|
||||
* @param rs_instance: adapter_rs instance obtained by adapter_rs_new()
|
||||
* @param thread_id: the thread_id of caller
|
||||
* @param scan_data: data to be scanned
|
||||
* @param data_len: the length of data to be scanned
|
||||
* @param result_array: the array to store hit expr_id which allocated by caller
|
||||
* @param n_result_array: number of elements in array of expr_id
|
||||
*/
|
||||
int adapter_rs_scan(void *rs_instance, int thread_id,
|
||||
const char *scan_data, size_t data_len,
|
||||
struct expr_scan_result *result_array,
|
||||
size_t n_result_array, size_t *n_hit_results);
|
||||
|
||||
/**
|
||||
* @brief
|
||||
*/
|
||||
void *adapter_rs_stream_open(void *rs_instance, int thread_id);
|
||||
|
||||
/**
|
||||
* @brief
|
||||
*/
|
||||
int adapter_rs_scan_stream(void *rs_stream, const char *scan_data,
|
||||
size_t data_len, struct expr_scan_result *result_array,
|
||||
size_t n_result_array, size_t *n_hit_results);
|
||||
/**
|
||||
* @brief
|
||||
*/
|
||||
void adapter_rs_stream_close(void *rs_stream);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
235
scanner/expr_matcher/expr_matcher.cpp
Normal file
235
scanner/expr_matcher/expr_matcher.cpp
Normal file
@@ -0,0 +1,235 @@
|
||||
/*
|
||||
**********************************************************************************************
|
||||
* File: expr_matcher.cpp
|
||||
* Description:
|
||||
* Authors: Liu wentan <liuwentan@geedgenetworks.com>
|
||||
* Date: 2023-06-30
|
||||
* Copyright: (c) Since 2023 Geedge Networks, Ltd. All rights reserved.
|
||||
***********************************************************************************************
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include "log/log.h"
|
||||
#include "expr_matcher.h"
|
||||
#include "maat_utils.h"
|
||||
#include "adapter_hs/adapter_hs.h"
|
||||
#include "adapter_rs/adapter_rs.h"
|
||||
|
||||
pid_t expr_matcher_gettid()
|
||||
{
|
||||
return syscall(SYS_gettid);
|
||||
}
|
||||
|
||||
static const char *expr_matcher_module_name_str(const char *name)
|
||||
{
|
||||
static __thread char module[64];
|
||||
snprintf(module, sizeof(module), "%s(%d)", name, expr_matcher_gettid());
|
||||
|
||||
return module;
|
||||
}
|
||||
|
||||
#define MODULE_EXPR_MATCHER expr_matcher_module_name_str("maat.expr_matcher")
|
||||
|
||||
struct expr_matcher {
|
||||
enum expr_engine_type engine_type;
|
||||
void *engine;
|
||||
struct log_handle *logger;
|
||||
};
|
||||
|
||||
struct expr_matcher_stream {
|
||||
enum expr_engine_type engine_type;
|
||||
void *handle;
|
||||
};
|
||||
|
||||
struct expr_engine_operations {
|
||||
enum expr_engine_type type;
|
||||
void *(*engine_new)(struct expr_rule *rules, size_t n_rule,
|
||||
size_t n_literal_pattern, size_t n_regex_pattern,
|
||||
size_t n_worker_thread, struct log_handle *logger);
|
||||
void (*engine_free)(void *engine);
|
||||
int (*engine_scan)(void *engine, int thread_id, const char *scan_data,
|
||||
size_t data_len, struct expr_scan_result *result_array,
|
||||
size_t n_result_array, size_t *n_hit_result);
|
||||
void *(*engine_stream_open)(void *engine, int thread_id);
|
||||
void (*engine_stream_close)(void *stream);
|
||||
int (*engine_scan_stream)(void *stream, const char *scan_data, size_t data_len,
|
||||
struct expr_scan_result *result_array, size_t n_result_array,
|
||||
size_t *n_hit_result);
|
||||
};
|
||||
|
||||
struct expr_engine_operations expr_engine_ops[EXPR_ENGINE_TYPE_MAX] = {
|
||||
{
|
||||
.type = EXPR_ENGINE_TYPE_HS,
|
||||
.engine_new = adapter_hs_new,
|
||||
.engine_free = adapter_hs_free,
|
||||
.engine_scan = adapter_hs_scan,
|
||||
.engine_stream_open = adapter_hs_stream_open,
|
||||
.engine_stream_close = adapter_hs_stream_close,
|
||||
.engine_scan_stream = adapter_hs_scan_stream
|
||||
},
|
||||
{
|
||||
.type = EXPR_ENGINE_TYPE_RS,
|
||||
.engine_new = adapter_rs_new,
|
||||
.engine_free = adapter_rs_free,
|
||||
.engine_scan = adapter_rs_scan,
|
||||
.engine_stream_open = adapter_rs_stream_open,
|
||||
.engine_stream_close = adapter_rs_stream_close,
|
||||
.engine_scan_stream = adapter_rs_scan_stream
|
||||
}
|
||||
};
|
||||
|
||||
int expr_matcher_verify_regex_expression(const char *regex_expr,
|
||||
struct log_handle *logger)
|
||||
{
|
||||
int ret = adapter_hs_verify_regex_expression(regex_expr, logger);
|
||||
if (ret == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return adapter_rs_verify_regex_expression(regex_expr, logger);
|
||||
}
|
||||
|
||||
struct expr_matcher *
|
||||
expr_matcher_new(struct expr_rule *rules, size_t n_rule, enum expr_engine_type engine_type,
|
||||
size_t n_worker_thread, struct log_handle *logger)
|
||||
{
|
||||
if (NULL == rules || 0 == n_rule || 0 == n_worker_thread ||
|
||||
(engine_type != EXPR_ENGINE_TYPE_HS && engine_type != EXPR_ENGINE_TYPE_RS)) {
|
||||
log_error(logger, MODULE_EXPR_MATCHER, "[%s:%d]engine type:%d is illegal",
|
||||
__FUNCTION__, __LINE__, engine_type);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
size_t i = 0, j = 0;
|
||||
size_t literal_pat_num = 0;
|
||||
size_t regex_pat_num = 0;
|
||||
|
||||
for (i = 0; i < n_rule; i++) {
|
||||
if (rules[i].n_patterns > MAX_EXPR_PATTERN_NUM) {
|
||||
log_error(logger, MODULE_EXPR_MATCHER,
|
||||
"[%s:%d] the number of patterns in one expression should less than"
|
||||
" %d", __FUNCTION__, __LINE__, MAX_EXPR_PATTERN_NUM);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (j = 0; j < rules[i].n_patterns; j++) {
|
||||
/* pat_len should not 0 */
|
||||
if (0 == rules[i].patterns[j].pat_len) {
|
||||
log_error(logger, MODULE_EXPR_MATCHER,
|
||||
"[%s:%d] expr pattern length should not 0",
|
||||
__FUNCTION__, __LINE__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) {
|
||||
literal_pat_num++;
|
||||
} else {
|
||||
regex_pat_num++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (0 == literal_pat_num && 0 == regex_pat_num) {
|
||||
log_error(logger, MODULE_EXPR_MATCHER,
|
||||
"[%s:%d] exprs has no valid pattern", __FUNCTION__, __LINE__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void *engine = expr_engine_ops[engine_type].engine_new(rules, n_rule, literal_pat_num,
|
||||
regex_pat_num, n_worker_thread,
|
||||
logger);
|
||||
if (NULL == engine) {
|
||||
log_error(logger, MODULE_EXPR_MATCHER,
|
||||
"[%s:%d]expr_matcher engine_new failed.", __FUNCTION__, __LINE__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct expr_matcher *matcher = ALLOC(struct expr_matcher, 1);
|
||||
matcher->engine_type = engine_type;
|
||||
matcher->engine = engine;
|
||||
matcher->logger = logger;
|
||||
|
||||
return matcher;
|
||||
}
|
||||
|
||||
void expr_matcher_free(struct expr_matcher *matcher)
|
||||
{
|
||||
if (NULL == matcher) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (matcher->engine != NULL) {
|
||||
expr_engine_ops[matcher->engine_type].engine_free(matcher->engine);
|
||||
matcher->engine = NULL;
|
||||
}
|
||||
|
||||
FREE(matcher);
|
||||
}
|
||||
|
||||
int expr_matcher_match(struct expr_matcher *matcher, int thread_id, const char *scan_data,
|
||||
size_t data_len, struct expr_scan_result *result_array,
|
||||
size_t n_result_array, size_t *n_hit_results)
|
||||
{
|
||||
if (NULL == matcher || thread_id < 0 || NULL == scan_data || 0 == data_len
|
||||
|| NULL == result_array || 0 == n_result_array || NULL == n_hit_results) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return expr_engine_ops[matcher->engine_type].engine_scan(matcher->engine, thread_id,
|
||||
scan_data, data_len, result_array,
|
||||
n_result_array, n_hit_results);
|
||||
}
|
||||
|
||||
struct expr_matcher_stream *
|
||||
expr_matcher_stream_open(struct expr_matcher *matcher, int thread_id)
|
||||
{
|
||||
if (NULL == matcher || thread_id < 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void *s_handle = expr_engine_ops[matcher->engine_type].engine_stream_open(matcher->engine,
|
||||
thread_id);
|
||||
if (NULL == s_handle) {
|
||||
log_error(matcher->logger, MODULE_EXPR_MATCHER,
|
||||
"[%s:%d] expr_matcher engine_stream_open failed.",
|
||||
__FUNCTION__, __LINE__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct expr_matcher_stream *stream = ALLOC(struct expr_matcher_stream, 1);
|
||||
stream->engine_type = matcher->engine_type;
|
||||
stream->handle = s_handle;
|
||||
|
||||
return stream;
|
||||
}
|
||||
|
||||
int expr_matcher_stream_match(struct expr_matcher_stream *stream, const char *scan_data,
|
||||
size_t data_len, struct expr_scan_result *result_array,
|
||||
size_t n_result_array, size_t *n_hit_results)
|
||||
{
|
||||
if (NULL == stream || NULL == scan_data || 0 == data_len || NULL == result_array
|
||||
|| 0 == n_result_array || NULL == n_hit_results) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return expr_engine_ops[stream->engine_type].engine_scan_stream(stream->handle, scan_data,
|
||||
data_len, result_array,
|
||||
n_result_array, n_hit_results);
|
||||
}
|
||||
|
||||
void expr_matcher_stream_close(struct expr_matcher_stream *stream)
|
||||
{
|
||||
if (NULL == stream) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (stream->handle != NULL) {
|
||||
expr_engine_ops[stream->engine_type].engine_stream_close(stream->handle);
|
||||
stream->handle = NULL;
|
||||
}
|
||||
|
||||
FREE(stream);
|
||||
}
|
||||
134
scanner/expr_matcher/expr_matcher.h
Normal file
134
scanner/expr_matcher/expr_matcher.h
Normal file
@@ -0,0 +1,134 @@
|
||||
/*
|
||||
**********************************************************************************************
|
||||
* File: expr_matcher.h
|
||||
* Description:
|
||||
* Authors: Liu wentan <liuwentan@geedgenetworks.com>
|
||||
* Date: 2023-06-30
|
||||
* Copyright: (c) Since 2023 Geedge Networks, Ltd. All rights reserved.
|
||||
***********************************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef _EXPR_MATCHER_H_
|
||||
#define _EXPR_MATCHER_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "log/log.h"
|
||||
|
||||
#define MAX_EXPR_PATTERN_NUM 8 /* 每条与表达式最多由MAX_EXPR_ITEM_NUM个规则组成 */
|
||||
#define MAX_HIT_EXPR_NUM 1024
|
||||
|
||||
enum expr_engine_type {
|
||||
EXPR_ENGINE_TYPE_HS = 0, /* default engine */
|
||||
EXPR_ENGINE_TYPE_RS,
|
||||
EXPR_ENGINE_TYPE_MAX
|
||||
};
|
||||
|
||||
enum expr_pattern_type {
|
||||
EXPR_PATTERN_TYPE_STR = 0, /* pure literal string */
|
||||
EXPR_PATTERN_TYPE_REG = 1, /* regex expression */
|
||||
};
|
||||
|
||||
enum expr_case_sensitive {
|
||||
EXPR_CASE_INSENSITIVE = 0,
|
||||
EXPR_CASE_SENSITIVE
|
||||
};
|
||||
|
||||
enum expr_match_mode {
|
||||
EXPR_MATCH_MODE_INVALID = -1,
|
||||
EXPR_MATCH_MODE_EXACTLY = 1, /* scan data must match pattern exactly */
|
||||
EXPR_MATCH_MODE_PREFIX, /* pattern must in the head of scan_data */
|
||||
EXPR_MATCH_MODE_SUFFIX, /* pattern must in the tail of scan_data */
|
||||
EXPR_MATCH_MODE_SUB /* pattern must in the range[l_offset, r_offset] of scan_data */
|
||||
};
|
||||
|
||||
struct expr_pattern {
|
||||
enum expr_pattern_type type;
|
||||
enum expr_match_mode match_mode;
|
||||
enum expr_case_sensitive case_sensitive;
|
||||
|
||||
/*
|
||||
* just match in scan_data's range of [start_offset, end_offset], -1 means no limits
|
||||
* for example:
|
||||
* [-1, end_offset] means the pattern must in scan_data's [0 ~ start_offset]
|
||||
* [start_offset, -1] means the pattern must in scan_data's [start_offset ~ data_end]
|
||||
*/
|
||||
int start_offset;
|
||||
int end_offset;
|
||||
|
||||
char *pat;
|
||||
size_t pat_len;
|
||||
};
|
||||
|
||||
struct expr_scan_result {
|
||||
long long rule_id;
|
||||
void *user_tag;
|
||||
};
|
||||
|
||||
/* logic AND expression, such as (rule1 & rule2) */
|
||||
struct expr_rule {
|
||||
long long expr_id; /* AND expression ID */
|
||||
size_t n_patterns;
|
||||
struct expr_pattern patterns[MAX_EXPR_PATTERN_NUM];
|
||||
void *tag; /* user defined data, return with hit result */
|
||||
};
|
||||
|
||||
int expr_matcher_verify_regex_expression(const char *regex_expr,
|
||||
struct log_handle *logger);
|
||||
|
||||
/**
|
||||
* @brief new expr matcher instance
|
||||
*
|
||||
* @param expr_array: logic AND expression's array
|
||||
* @param n_expr_array: the number of logic AND expression's array
|
||||
* @param n_worker_threads: the number of scan threads which will call adapter_rs_scan()
|
||||
*
|
||||
*/
|
||||
struct expr_matcher *
|
||||
expr_matcher_new(struct expr_rule *rules, size_t n_rule, enum expr_engine_type type,
|
||||
size_t n_worker_thread, struct log_handle *logger);
|
||||
|
||||
void expr_matcher_free(struct expr_matcher *matcher);
|
||||
|
||||
/**
|
||||
* @brief scan input data to match logic AND expression, return all matched expr_id
|
||||
*
|
||||
* @param matcher: expr_matcher instance obtained by expr_matcher_new()
|
||||
* @param thread_id: the thread_id of caller
|
||||
* @param scan_data: data to be scanned
|
||||
* @param data_len: the length of data to be scanned
|
||||
* @param result_array: the array to store hit expr_id which allocated by caller
|
||||
* @param n_result_array: number of elements in array of expr_id
|
||||
*/
|
||||
int expr_matcher_match(struct expr_matcher *matcher, int thread_id, const char *scan_data,
|
||||
size_t data_len, struct expr_scan_result *result_array,
|
||||
size_t n_result_array, size_t *n_hit_results);
|
||||
|
||||
/**
|
||||
* @brief
|
||||
*/
|
||||
struct expr_matcher_stream *
|
||||
expr_matcher_stream_open(struct expr_matcher *matcher, int thread_id);
|
||||
|
||||
/**
|
||||
* @brief
|
||||
*/
|
||||
int expr_matcher_stream_match(struct expr_matcher_stream *stream, const char *scan_data,
|
||||
size_t data_len, struct expr_scan_result *result_array,
|
||||
size_t n_result_array, size_t *n_hit_results);
|
||||
|
||||
/**
|
||||
* @brief
|
||||
*/
|
||||
void expr_matcher_stream_close(struct expr_matcher_stream *stream);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user