This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
tango-maat/scanner/expr_matcher/adapter_rs/adapter_rs.cpp

718 lines
22 KiB
C++
Raw Normal View History

/*
**********************************************************************************************
* File: adapter_rs.cpp
* Description:
* Authors: Liu wentan <liuwentan@geedgenetworks.com>
* Date: 2022-10-31
* Copyright: (c) Since 2022 Geedge Networks, Ltd. All rights reserved.
***********************************************************************************************
*/
#include <stdint.h>
#include <stdio.h>
#include <stddef.h>
#include <assert.h>
#include <unistd.h>
#include <sys/syscall.h>
#include "rulescan.h"
#include "adapter_rs.h"
#include "uthash/utarray.h"
#include "uthash/uthash.h"
#include "maat_utils.h"
#include "../../bool_matcher/bool_matcher.h"
#define MAX_HIT_PATTERN_NUM 512
pid_t rs_gettid()
{
return syscall(SYS_gettid);
}
static const char *rs_module_name_str(const char *name)
{
static __thread char module[64];
snprintf(module, sizeof(module), "%s(%d)", name, rs_gettid());
return module;
}
#define MODULE_ADAPTER_RS rs_module_name_str("maat.adapter_rs")
struct adpt_rs_compile_data {
struct scan_pattern *patterns;
size_t n_patterns;
};
struct adapter_rs_stream {
int thread_id;
size_t offset; /* current stream offset */
rs_stream_t *literal_stream;
rs_stream_t *regex_stream;
struct adapter_rs_runtime *ref_rs_rt;
struct log_handle *logger;
};
/* adapter_rs runtime */
struct adapter_rs_runtime {
rs_database_t *literal_db;
rs_database_t *regex_db;
struct bool_expr_match **bool_match_buffs; /* per thread */
struct adapter_rs_stream **streams; /* per thread */
struct matched_pattern **matched_pats; /* per thread */
struct bool_matcher *bm;
};
/* adapter_hs instance */
struct adapter_rs {
size_t n_worker_thread;
size_t n_expr;
size_t n_patterns;
struct adapter_rs_runtime *rs_rt;
struct pattern_attribute *rs_attr;
struct log_handle *logger;
};
struct pattern_offset {
long long start;
long long end;
};
struct pattern_attribute {
long long pattern_id;
enum expr_match_mode match_mode;
struct pattern_offset offset;
size_t pattern_len;
};
struct matched_pattern {
UT_array *pattern_ids;
size_t n_patterns;
struct pattern_attribute *ref_rs_attr;
};
int adapter_rs_verify_regex_expression(const char *regex_expr,
struct log_handle *logger)
{
int ret = rs_verify_regex(regex_expr);
if (ret == 0) {
log_error(logger, MODULE_ADAPTER_RS,
"[%s:%d] illegal regex expression: \"%s\"",
__FUNCTION__, __LINE__, regex_expr);
}
return ret;
}
/**
* @brief build hs block database for literal string and regex expression respectively
*
* @retval 0(success) -1(failed)
*/
static int adpt_rs_build_database(struct adapter_rs_runtime *rs_rt,
size_t n_worker_thread,
struct adpt_rs_compile_data *literal_cd,
struct adpt_rs_compile_data *regex_cd,
struct log_handle *logger)
{
if (NULL == rs_rt) {
return -1;
}
int ret = 0;
if (literal_cd != NULL) {
ret = rs_compile_lit(literal_cd->patterns, literal_cd->n_patterns,
&rs_rt->literal_db);
if (ret < 0) {
log_error(logger, MODULE_ADAPTER_RS, "[%s:%d] compile error",
__FUNCTION__, __LINE__);
return -1;
}
}
if (regex_cd != NULL) {
size_t n_failed_pats = 0;
ret = rs_compile_regex(regex_cd->patterns, regex_cd->n_patterns,
n_worker_thread, &rs_rt->regex_db, &n_failed_pats);
if (ret < 0) {
log_error(logger, MODULE_ADAPTER_RS, "[%s:%d] compile error",
__FUNCTION__, __LINE__);
return -1;
}
}
return 0;
}
static struct adpt_rs_compile_data *adpt_rs_compile_data_new(size_t n_patterns)
{
struct adpt_rs_compile_data *rs_cd = ALLOC(struct adpt_rs_compile_data, 1);
rs_cd->patterns = ALLOC(struct scan_pattern, n_patterns);
rs_cd->n_patterns = n_patterns;
return rs_cd;
}
static void adpt_rs_compile_data_free(struct adpt_rs_compile_data *rs_cd)
{
if (NULL == rs_cd) {
return;
}
if (rs_cd->patterns != NULL) {
for (size_t i = 0; i < rs_cd->n_patterns; i++) {
if (rs_cd->patterns[i].pattern != NULL) {
FREE(rs_cd->patterns[i].pattern);
}
}
FREE(rs_cd->patterns);
}
FREE(rs_cd);
}
static void populate_compile_data(struct adpt_rs_compile_data *compile_data,
size_t index, long long pattern_id, char *pat,
size_t pat_len, int case_sensitive)
{
compile_data->patterns[index].id = pattern_id;
compile_data->patterns[index].case_sensitive = case_sensitive;
compile_data->patterns[index].pattern = ALLOC(char, pat_len + 1);
memcpy(compile_data->patterns[index].pattern, pat, pat_len);
compile_data->patterns[index].pattern_len = pat_len;
}
static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule,
struct pattern_attribute *pattern_attr,
struct adpt_rs_compile_data *literal_cd,
struct adpt_rs_compile_data *regex_cd,
size_t *n_pattern)
{
long long pattern_idx = 0;
size_t literal_idx = 0;
size_t regex_idx = 0;
struct bool_expr *bool_exprs = ALLOC(struct bool_expr, n_rule);
/* populate adpt_hs_compile_data and bool_expr */
for (size_t i = 0; i < n_rule; i++) {
for (size_t j = 0; j < rules[i].n_patterns; j++) {
pattern_attr[pattern_idx].pattern_id = pattern_idx;
pattern_attr[pattern_idx].match_mode = rules[i].patterns[j].match_mode;
pattern_attr[pattern_idx].pattern_len = rules[i].patterns[j].pat_len;
if (pattern_attr[pattern_idx].match_mode == EXPR_MATCH_MODE_SUB ||
pattern_attr[pattern_idx].match_mode == EXPR_MATCH_MODE_EXACTLY) {
pattern_attr[pattern_idx].offset.start = rules[i].patterns[j].start_offset;
pattern_attr[pattern_idx].offset.end = rules[i].patterns[j].end_offset;
}
/* literal pattern */
if (rules[i].patterns[j].type == EXPR_PATTERN_TYPE_STR) {
populate_compile_data(literal_cd, literal_idx, pattern_idx,
rules[i].patterns[j].pat, rules[i].patterns[j].pat_len,
rules[i].patterns[j].case_sensitive);
literal_idx++;
} else {
/* regex pattern */
populate_compile_data(regex_cd, regex_idx, pattern_idx,
rules[i].patterns[j].pat, rules[i].patterns[j].pat_len,
rules[i].patterns[j].case_sensitive);
regex_idx++;
}
bool_exprs[i].items[j].item_id = pattern_idx++;
bool_exprs[i].items[j].not_flag = 0;
}
bool_exprs[i].expr_id = rules[i].expr_id;
bool_exprs[i].item_num = rules[i].n_patterns;
bool_exprs[i].user_tag = rules[i].tag;
}
*n_pattern = pattern_idx;
return bool_exprs;
}
UT_icd ut_rs_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL};
void *adapter_rs_new(struct expr_rule *rules, size_t n_rule,
size_t n_literal_pattern, size_t n_regex_pattern,
size_t n_worker_thread, struct log_handle *logger)
{
/* get the sum of pattern */
size_t i = 0;
struct adpt_rs_compile_data *literal_cd = NULL;
struct adpt_rs_compile_data *regex_cd = NULL;
if (n_literal_pattern > 0) {
literal_cd = adpt_rs_compile_data_new(n_literal_pattern);
}
if (n_regex_pattern > 0) {
regex_cd = adpt_rs_compile_data_new(n_regex_pattern);
}
size_t pattern_cnt = n_literal_pattern + n_regex_pattern;
struct adapter_rs *rs_inst = ALLOC(struct adapter_rs, 1);
rs_inst->rs_attr = ALLOC(struct pattern_attribute, pattern_cnt);
rs_inst->logger = logger;
rs_inst->n_worker_thread = n_worker_thread;
rs_inst->n_expr = n_rule;
struct bool_expr *bool_exprs = bool_exprs_new(rules, n_rule, rs_inst->rs_attr,
literal_cd, regex_cd, &pattern_cnt);
if (NULL == bool_exprs) {
return NULL;
}
rs_inst->n_patterns = pattern_cnt;
/* create bool matcher */
size_t mem_size = 0;
int rs_ret = 0;
rs_inst->rs_rt = ALLOC(struct adapter_rs_runtime, 1);
//hs_rt->bm
rs_inst->rs_rt->bm = bool_matcher_new(bool_exprs, n_rule, &mem_size);
if (rs_inst->rs_rt->bm != NULL) {
log_info(logger, MODULE_ADAPTER_RS,
"Adapter_hs module: build bool matcher of %zu expressions"
" with %zu bytes memory", n_rule, mem_size);
} else {
log_error(logger, MODULE_ADAPTER_RS,
"[%s:%d] Adapter_hs module: build bool matcher failed",
__FUNCTION__, __LINE__);
rs_ret = -1;
}
FREE(bool_exprs);
/* build hs database hs_rt->literal_db & hs_rt->regex_db */
int ret = adpt_rs_build_database(rs_inst->rs_rt, n_worker_thread,
literal_cd, regex_cd, logger);
if (ret < 0) {
rs_ret = -1;
}
if (literal_cd != NULL) {
adpt_rs_compile_data_free(literal_cd);
}
if (regex_cd != NULL) {
adpt_rs_compile_data_free(regex_cd);
}
if (rs_ret < 0) {
goto error;
}
/* alloc scratch */
rs_inst->rs_rt->bool_match_buffs = ALLOC(struct bool_expr_match *, n_worker_thread);
for (i = 0; i < n_worker_thread; i++) {
rs_inst->rs_rt->bool_match_buffs[i] = ALLOC(struct bool_expr_match, MAX_HIT_EXPR_NUM);
}
rs_inst->rs_rt->streams = ALLOC(struct adapter_rs_stream *, n_worker_thread);
for (i = 0; i < n_worker_thread; i++) {
rs_inst->rs_rt->streams[i] = (struct adapter_rs_stream *)adapter_rs_stream_open(rs_inst, i);
}
rs_inst->rs_rt->matched_pats = ALLOC(struct matched_pattern *, n_worker_thread);
for (i = 0; i < n_worker_thread; i++) {
rs_inst->rs_rt->matched_pats[i] = ALLOC(struct matched_pattern, 1);
rs_inst->rs_rt->matched_pats[i]->ref_rs_attr = rs_inst->rs_attr;
rs_inst->rs_rt->matched_pats[i]->n_patterns = rs_inst->n_patterns;
utarray_new(rs_inst->rs_rt->matched_pats[i]->pattern_ids, &ut_rs_pattern_id_icd);
utarray_reserve(rs_inst->rs_rt->matched_pats[i]->pattern_ids, MAX_HIT_PATTERN_NUM);
}
return rs_inst;
error:
adapter_rs_free(rs_inst);
return NULL;
}
void adapter_rs_free(void *rs_instance)
{
if (NULL == rs_instance) {
return;
}
size_t i = 0;
struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance;
if (rs_inst->rs_rt != NULL) {
if (rs_inst->rs_rt->literal_db != NULL) {
rs_free_database(rs_inst->rs_rt->literal_db);
rs_inst->rs_rt->literal_db = NULL;
}
if (rs_inst->rs_rt->regex_db != NULL) {
rs_free_database(rs_inst->rs_rt->regex_db);
rs_inst->rs_rt->regex_db = NULL;
}
if (rs_inst->rs_rt->bool_match_buffs != NULL) {
for (i = 0; i < rs_inst->n_worker_thread; i++) {
if (rs_inst->rs_rt->bool_match_buffs[i] != NULL) {
FREE(rs_inst->rs_rt->bool_match_buffs[i]);
}
}
FREE(rs_inst->rs_rt->bool_match_buffs);
}
if (rs_inst->rs_rt->bm != NULL) {
bool_matcher_free(rs_inst->rs_rt->bm);
rs_inst->rs_rt->bm = NULL;
}
if (rs_inst->rs_rt->streams != NULL) {
for (i = 0; i < rs_inst->n_worker_thread; i++) {
if (rs_inst->rs_rt->streams[i] != NULL) {
adapter_rs_stream_close(rs_inst->rs_rt->streams[i]);
rs_inst->rs_rt->streams[i] = NULL;
}
}
FREE(rs_inst->rs_rt->streams);
}
if (rs_inst->rs_rt->matched_pats != NULL) {
for (i = 0; i < rs_inst->n_worker_thread; i++) {
if (rs_inst->rs_rt->matched_pats[i] != NULL) {
utarray_free(rs_inst->rs_rt->matched_pats[i]->pattern_ids);
FREE(rs_inst->rs_rt->matched_pats[i]);
}
}
FREE(rs_inst->rs_rt->matched_pats);
}
FREE(rs_inst->rs_rt);
}
if (rs_inst->rs_attr != NULL) {
FREE(rs_inst->rs_attr);
}
FREE(rs_inst);
}
static inline int compare_pattern_id(const void *a, const void *b)
{
long long ret = *(const unsigned long long *)a - *(const unsigned long long *)b;
if (ret == 0) {
return 0;
} else if(ret < 0) {
return -1;
} else {
return 1;
}
}
/**
* @param id: pattern id
*/
static int matched_event_cb(unsigned int id, int pos_offset, int from, int to,
size_t data_len, void *ctx)
{
// put id in set
unsigned long long pattern_id = id;
struct matched_pattern *matched_pat = (struct matched_pattern *)ctx;
if (pattern_id > matched_pat->n_patterns || id < 0) {
return 0;
}
if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) {
return 0;
}
// duplicate pattern_id
if (utarray_find(matched_pat->pattern_ids, &pattern_id, compare_pattern_id)) {
return 0;
}
int ret = 0;
struct pattern_attribute pat_attr = matched_pat->ref_rs_attr[id];
switch (pat_attr.match_mode) {
case EXPR_MATCH_MODE_EXACTLY:
if (0 == (from + pos_offset) && (int)data_len == (to + pos_offset)) {
ret = 1;
}
break;
case EXPR_MATCH_MODE_SUB:
if (pat_attr.offset.start == -1 &&
pat_attr.offset.end == -1) {
ret = 1;
break;
}
if (pat_attr.offset.start == -1) {
if ((long long)(to + pos_offset - 1) <= pat_attr.offset.end) {
ret = 1;
break;
}
}
if (pat_attr.offset.end == -1) {
if ((long long)(from + pos_offset) >= pat_attr.offset.start) {
ret = 1;
break;
}
}
if ((long long)(from + pos_offset) >= pat_attr.offset.start &&
(long long)(to + pos_offset - 1) <= pat_attr.offset.end) {
ret = 1;
}
break;
case EXPR_MATCH_MODE_PREFIX:
if (0 == (from + pos_offset)) {
ret = 1;
}
break;
case EXPR_MATCH_MODE_SUFFIX:
if ((to + pos_offset) == (int)data_len) {
ret = 1;
}
break;
default:
break;
}
if (1 == ret) {
utarray_push_back(matched_pat->pattern_ids, &pattern_id);
utarray_sort(matched_pat->pattern_ids, compare_pattern_id);
}
return 0;
}
void *adapter_rs_stream_open(void *rs_instance, int thread_id)
{
if (NULL == rs_instance || thread_id < 0) {
return NULL;
}
struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance;
struct adapter_rs_stream *rs_stream = ALLOC(struct adapter_rs_stream, 1);
rs_stream->logger = rs_inst->logger;
rs_stream->thread_id = thread_id;
rs_stream->ref_rs_rt = rs_inst->rs_rt;
int err_count = 0;
if (rs_inst->rs_rt->literal_db != NULL) {
rs_stream->literal_stream = rs_open_stream(rs_inst->rs_rt->literal_db, 0, 128);
if (NULL == rs_stream->literal_stream) {
log_error(rs_inst->logger, MODULE_ADAPTER_RS, "rs_open_stream failed");
err_count++;
}
}
if (rs_inst->rs_rt->regex_db != NULL) {
rs_stream->regex_stream = rs_open_stream(rs_inst->rs_rt->regex_db, 0, 128);
if (NULL == rs_stream->regex_stream) {
log_error(rs_inst->logger, MODULE_ADAPTER_RS, "rs_open_stream failed");
err_count++;
}
}
if (err_count > 0) {
goto error;
}
return rs_stream;
error:
if (rs_stream->literal_stream != NULL) {
rs_close_stream(rs_stream->literal_stream);
rs_stream->literal_stream = NULL;
}
if (rs_stream->regex_stream != NULL) {
rs_close_stream(rs_stream->regex_stream);
rs_stream->regex_stream = NULL;
}
FREE(rs_stream);
return NULL;
}
void adapter_rs_stream_close(void *rs_stream)
{
if (NULL == rs_stream) {
return;
}
struct adapter_rs_stream *stream = (struct adapter_rs_stream *)rs_stream;
if (stream->ref_rs_rt != NULL) {
if (stream->literal_stream != NULL) {
rs_close_stream(stream->literal_stream);
stream->literal_stream = NULL;
}
if (stream->regex_stream != NULL) {
rs_close_stream(stream->regex_stream);
stream->regex_stream = NULL;
}
}
/* rs_stream->rs_rt point to rs_instance->rs_rt which will call free
same as rs_attr */
stream->ref_rs_rt = NULL;
FREE(stream);
}
int adapter_rs_scan_stream(void *rs_stream, const char *data, size_t data_len,
struct expr_scan_result *results, size_t n_result,
size_t *n_hit_result)
{
if (NULL == rs_stream || NULL == data || 0 == data_len ||
NULL == results || 0 == n_result || NULL == n_hit_result) {
return -1;
}
/*
In streaming mode, a non-zero return from the user-specified event-handler
function has consequences for the rest of that stream's lifetime: when a
non-zero return occurs, it signals that no more of the stream should be
scanned. Consequently if the user makes a subsequent call to
`hs_scan_stream` on a stream whose processing was terminated in this way,
hs_scan_stream will return `HS_SCAN_TERMINATED`. This case has not been
demonstrated in pcapscan, as its callback always returns 0.
*/
int ret = 0, err_count = 0;
struct adapter_rs_stream *stream = (struct adapter_rs_stream *)rs_stream;
int thread_id = stream->thread_id;
struct adapter_rs_runtime *rs_rt = stream->ref_rs_rt;
struct matched_pattern *matched_pat = rs_rt->matched_pats[thread_id];
if (stream->literal_stream != NULL) {
ret = rs_scan_stream(stream->literal_stream, data, data_len,
matched_event_cb, matched_pat);
if (ret < 0) {
err_count++;
}
}
if (stream->regex_stream != NULL) {
ret = rs_scan_stream(stream->regex_stream, data, data_len,
matched_event_cb, matched_pat);
if (ret < 0) {
err_count++;
}
}
if (err_count == 2) {
return -1;
}
size_t n_pattern_id = utarray_len(matched_pat->pattern_ids);
if (0 == n_pattern_id) {
*n_hit_result = 0;
return 0;
}
unsigned long long pattern_ids[n_pattern_id];
for (size_t i = 0; i < n_pattern_id; i++) {
pattern_ids[i] = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i);
}
struct bool_expr_match *bool_matcher_results = rs_rt->bool_match_buffs[thread_id];
int bool_matcher_ret = bool_matcher_match(rs_rt->bm, pattern_ids, n_pattern_id,
bool_matcher_results, MAX_HIT_EXPR_NUM);
if (bool_matcher_ret < 0) {
ret = -1;
goto next;
}
if (bool_matcher_ret > (int)n_result) {
bool_matcher_ret = n_result;
}
for (int index = 0; index < bool_matcher_ret; index++) {
results[index].rule_id = bool_matcher_results[index].expr_id;
results[index].user_tag = bool_matcher_results[index].user_tag;
}
*n_hit_result = bool_matcher_ret;
next:
utarray_clear(matched_pat->pattern_ids);
return ret;
}
int adapter_rs_scan(void *rs_instance, int thread_id, const char *data, size_t data_len,
struct expr_scan_result *results, size_t n_result, size_t *n_hit_result)
{
if (NULL == rs_instance || NULL == data || (0 == data_len) ||
NULL == results || 0 == n_result || NULL == n_hit_result) {
return -1;
}
int ret = 0, err_count = 0;
struct adapter_rs *rs_inst = (struct adapter_rs *)rs_instance;
struct adapter_rs_runtime *rs_rt = rs_inst->rs_rt;
struct matched_pattern *matched_pat = rs_rt->matched_pats[thread_id];
if (rs_rt->literal_db != NULL) {
ret = rs_scan(rs_rt->literal_db, thread_id, data, data_len,
0, matched_event_cb, matched_pat);
if (ret < 0) {
err_count++;
}
}
if (rs_rt->regex_db != NULL) {
ret = rs_scan(rs_rt->regex_db, thread_id, data, data_len,
0, matched_event_cb, matched_pat);
if (ret < 0) {
err_count++;
}
}
if (err_count == 2) {
return -1;
}
size_t n_pattern_id = utarray_len(matched_pat->pattern_ids);
if (0 == n_pattern_id) {
*n_hit_result = 0;
return 0;
}
unsigned long long pattern_ids[n_pattern_id];
for (size_t i = 0; i < n_pattern_id; i++) {
pattern_ids[i] = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i);
}
struct bool_expr_match *bool_matcher_results = rs_rt->bool_match_buffs[thread_id];
int bool_matcher_ret = bool_matcher_match(rs_rt->bm, pattern_ids, n_pattern_id,
bool_matcher_results, MAX_HIT_EXPR_NUM);
if (bool_matcher_ret < 0) {
ret = -1;
goto next;
}
if (bool_matcher_ret > (int)n_result) {
bool_matcher_ret = n_result;
}
for (int index = 0; index < bool_matcher_ret; index++) {
results[index].rule_id = bool_matcher_results[index].expr_id;
results[index].user_tag = bool_matcher_results[index].user_tag;
}
*n_hit_result = bool_matcher_ret;
next:
utarray_clear(matched_pat->pattern_ids);
return ret;
}