2022-11-17 05:05:35 +08:00
|
|
|
/*
|
|
|
|
|
**********************************************************************************************
|
|
|
|
|
* File: adapter_hs.cpp
|
|
|
|
|
* Description:
|
|
|
|
|
* Authors: Liu WenTan <liuwentan@geedgenetworks.com>
|
|
|
|
|
* Date: 2022-10-31
|
|
|
|
|
* Copyright: (c) 2018-2022 Geedge Networks, Inc. All rights reserved.
|
|
|
|
|
***********************************************************************************************
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <stdint.h>
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <stddef.h>
|
|
|
|
|
#include <hs/hs.h>
|
2023-02-09 22:13:15 +08:00
|
|
|
#include <assert.h>
|
|
|
|
|
#include <unistd.h>
|
|
|
|
|
#include <sys/syscall.h>
|
2022-11-17 05:05:35 +08:00
|
|
|
|
|
|
|
|
#include "adapter_hs.h"
|
|
|
|
|
#include "uthash/utarray.h"
|
|
|
|
|
#include "uthash/uthash.h"
|
2023-01-30 21:59:35 +08:00
|
|
|
#include "maat_utils.h"
|
2023-03-01 09:32:36 +08:00
|
|
|
#include "../bool_matcher/bool_matcher.h"
|
2022-11-17 05:05:35 +08:00
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
#define MAX_OFFSET_NUM 1024
|
|
|
|
|
|
2023-02-09 22:13:15 +08:00
|
|
|
pid_t hs_gettid()
|
|
|
|
|
{
|
|
|
|
|
return syscall(SYS_gettid);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static const char *hs_module_name_str(const char *name)
|
|
|
|
|
{
|
|
|
|
|
static __thread char module[64];
|
|
|
|
|
snprintf(module,sizeof(module),"%s(%d)", name, hs_gettid());
|
|
|
|
|
|
|
|
|
|
return module;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define MODULE_ADAPTER_HS hs_module_name_str("maat.adapter_hs")
|
2023-01-30 21:59:35 +08:00
|
|
|
|
2022-11-17 05:05:35 +08:00
|
|
|
struct adpt_hs_compile_data {
|
|
|
|
|
unsigned int *ids;
|
|
|
|
|
unsigned int *flags;
|
|
|
|
|
char **patterns;
|
|
|
|
|
size_t *pattern_lens;
|
|
|
|
|
unsigned int n_patterns;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* adapter_hs runtime */
|
|
|
|
|
struct adapter_hs_runtime {
|
|
|
|
|
hs_database_t *literal_db;
|
|
|
|
|
hs_database_t *regex_db;
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
hs_scratch_t **literal_scratchs;
|
|
|
|
|
hs_scratch_t **regex_scratchs;
|
|
|
|
|
size_t literal_scratch_size;
|
|
|
|
|
size_t regex_scratch_size;
|
2022-11-17 05:05:35 +08:00
|
|
|
|
|
|
|
|
struct bool_matcher *bm;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* adapter_hs instance */
|
|
|
|
|
struct adapter_hs {
|
2023-02-03 17:28:14 +08:00
|
|
|
size_t n_worker_thread;
|
2022-11-17 05:05:35 +08:00
|
|
|
size_t n_expr;
|
|
|
|
|
size_t n_patterns;
|
|
|
|
|
struct adapter_hs_runtime *hs_rt;
|
2023-03-15 11:36:54 +08:00
|
|
|
struct hs_tag *tag_map;
|
2023-03-22 11:10:00 +08:00
|
|
|
struct pattern_attribute *pat_attr_by_str;
|
|
|
|
|
struct pattern_attribute *pat_attr_by_id;
|
2023-03-17 11:32:13 +08:00
|
|
|
struct log_handle *logger;
|
2022-11-17 05:05:35 +08:00
|
|
|
};
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
struct matched_offset {
|
|
|
|
|
unsigned long long start_offset;
|
|
|
|
|
unsigned long long end_offset;
|
|
|
|
|
};
|
|
|
|
|
|
2023-02-09 22:13:15 +08:00
|
|
|
struct matched_pattern {
|
2023-02-15 11:53:46 +08:00
|
|
|
unsigned long long pattern_id;
|
2023-03-22 11:10:00 +08:00
|
|
|
struct matched_offset *offsets;
|
|
|
|
|
size_t offset_cnt;
|
|
|
|
|
size_t offset_size;
|
2023-02-09 22:13:15 +08:00
|
|
|
UT_hash_handle hh;
|
|
|
|
|
};
|
|
|
|
|
|
2023-02-15 11:53:46 +08:00
|
|
|
struct matched_pattern_container {
|
2023-02-09 22:13:15 +08:00
|
|
|
struct matched_pattern *pat_hash;
|
|
|
|
|
};
|
|
|
|
|
|
2023-03-17 11:32:13 +08:00
|
|
|
struct adapter_hs_stream {
|
|
|
|
|
int thread_id;
|
|
|
|
|
size_t n_expr;
|
|
|
|
|
size_t n_patterns;
|
|
|
|
|
hs_stream_t *literal_stream;
|
|
|
|
|
hs_stream_t *regex_stream;
|
|
|
|
|
struct adapter_hs_runtime *hs_rt;
|
|
|
|
|
struct matched_pattern_container matched_pat_container;
|
|
|
|
|
};
|
|
|
|
|
|
2023-02-15 11:53:46 +08:00
|
|
|
struct pattern_attribute {
|
2023-03-22 11:10:00 +08:00
|
|
|
unsigned long long bool_expr_id;
|
2023-02-15 11:53:46 +08:00
|
|
|
unsigned long long pattern_id;
|
|
|
|
|
enum hs_match_mode match_mode;
|
2023-03-22 11:10:00 +08:00
|
|
|
int start_offset;
|
|
|
|
|
int end_offset;
|
2023-02-09 22:13:15 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct hs_tag {
|
2023-03-15 11:36:54 +08:00
|
|
|
char *key;
|
|
|
|
|
size_t key_len;
|
|
|
|
|
|
2023-02-15 11:53:46 +08:00
|
|
|
size_t n_pat_attr;
|
|
|
|
|
struct pattern_attribute *pat_attr;
|
2023-02-09 22:13:15 +08:00
|
|
|
void *user_tag;
|
2023-03-15 11:36:54 +08:00
|
|
|
UT_hash_handle hh;
|
2023-02-09 22:13:15 +08:00
|
|
|
};
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
int _hs_alloc_scratch(hs_database_t *db, hs_scratch_t **scratchs, size_t n_worker_thread,
|
|
|
|
|
struct log_handle *logger)
|
2022-11-17 05:05:35 +08:00
|
|
|
{
|
2023-03-22 11:10:00 +08:00
|
|
|
size_t scratch_size = 0;
|
2022-11-17 05:05:35 +08:00
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
if (hs_alloc_scratch(db, &scratchs[0]) != HS_SUCCESS) {
|
2023-02-03 17:28:14 +08:00
|
|
|
log_error(logger, MODULE_ADAPTER_HS,
|
2023-03-22 11:10:00 +08:00
|
|
|
"[%s:%d] Unable to allocate scratch space. Exiting.",
|
2023-03-02 14:52:31 +08:00
|
|
|
__FUNCTION__, __LINE__);
|
2022-11-17 05:05:35 +08:00
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-03 17:28:14 +08:00
|
|
|
for (size_t i = 1; i < n_worker_thread; i++) {
|
2023-03-22 11:10:00 +08:00
|
|
|
hs_error_t err = hs_clone_scratch(scratchs[0], &scratchs[i]);
|
2022-11-17 05:05:35 +08:00
|
|
|
if (err != HS_SUCCESS) {
|
2023-03-22 11:10:00 +08:00
|
|
|
log_error(logger, MODULE_ADAPTER_HS,
|
|
|
|
|
"[%s:%d] Unable to clone scratch", __FUNCTION__, __LINE__);
|
2022-11-17 05:05:35 +08:00
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
err = hs_scratch_size(scratchs[i], &scratch_size);
|
2022-11-17 05:05:35 +08:00
|
|
|
if (err != HS_SUCCESS) {
|
2023-03-22 11:10:00 +08:00
|
|
|
log_error(logger, MODULE_ADAPTER_HS,
|
2023-03-02 14:52:31 +08:00
|
|
|
"[%s:%d] Unable to query scratch size", __FUNCTION__, __LINE__);
|
2023-03-22 11:10:00 +08:00
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int adpt_hs_alloc_scratch(struct adapter_hs_runtime *hs_rt, size_t n_worker_thread,
|
|
|
|
|
enum hs_pattern_type pattern_type, struct log_handle *logger)
|
|
|
|
|
{
|
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
|
|
if (pattern_type == HS_PATTERN_TYPE_STR) {
|
|
|
|
|
hs_rt->literal_scratchs = ALLOC(hs_scratch_t *, n_worker_thread);
|
|
|
|
|
ret = _hs_alloc_scratch(hs_rt->literal_db, hs_rt->literal_scratchs, n_worker_thread, logger);
|
|
|
|
|
if (ret < 0) {
|
|
|
|
|
FREE(hs_rt->literal_scratchs);
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
hs_rt->regex_scratchs = ALLOC(hs_scratch_t *, n_worker_thread);
|
|
|
|
|
ret = _hs_alloc_scratch(hs_rt->regex_db, hs_rt->regex_scratchs, n_worker_thread, logger);
|
|
|
|
|
if (ret < 0) {
|
|
|
|
|
FREE(hs_rt->regex_scratchs);
|
2022-11-17 05:05:35 +08:00
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @brief build hs block database for literal string and regex expression respectively
|
|
|
|
|
*
|
|
|
|
|
* @retval 0(success) -1(failed)
|
|
|
|
|
*/
|
2023-01-30 21:59:35 +08:00
|
|
|
static int adpt_hs_build_database(struct adapter_hs_runtime *hs_rt,
|
2023-03-22 11:10:00 +08:00
|
|
|
struct adpt_hs_compile_data *literal_cd,
|
|
|
|
|
struct adpt_hs_compile_data *regex_cd,
|
2023-02-15 11:53:46 +08:00
|
|
|
struct log_handle *logger)
|
2022-11-17 05:05:35 +08:00
|
|
|
{
|
|
|
|
|
hs_error_t err;
|
|
|
|
|
hs_compile_error_t *compile_err = NULL;
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
if (NULL == hs_rt || (NULL == literal_cd && NULL == regex_cd)) {
|
2022-11-17 05:05:35 +08:00
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
if (literal_cd != NULL) {
|
|
|
|
|
err = hs_compile_lit_multi((const char *const *)literal_cd->patterns, literal_cd->flags,
|
|
|
|
|
literal_cd->ids, literal_cd->pattern_lens, literal_cd->n_patterns,
|
2023-03-17 17:28:52 +08:00
|
|
|
HS_MODE_STREAM, NULL, &hs_rt->literal_db, &compile_err);
|
2022-11-17 05:05:35 +08:00
|
|
|
if (err != HS_SUCCESS) {
|
|
|
|
|
if (compile_err) {
|
2023-03-02 14:52:31 +08:00
|
|
|
log_error(logger, MODULE_ADAPTER_HS, "[%s:%d] compile error: %s",
|
|
|
|
|
__FUNCTION__, __LINE__, compile_err->message);
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hs_free_compile_error(compile_err);
|
2023-02-15 11:53:46 +08:00
|
|
|
return -1;
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
2023-03-22 11:10:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (regex_cd != NULL) {
|
|
|
|
|
err = hs_compile_multi((const char *const *)regex_cd->patterns, regex_cd->flags, regex_cd->ids,
|
|
|
|
|
regex_cd->n_patterns, HS_MODE_STREAM | HS_MODE_SOM_HORIZON_SMALL, NULL,
|
|
|
|
|
&hs_rt->regex_db, &compile_err);
|
2022-11-17 05:05:35 +08:00
|
|
|
if (err != HS_SUCCESS) {
|
|
|
|
|
if (compile_err) {
|
2023-03-02 14:52:31 +08:00
|
|
|
log_error(logger, MODULE_ADAPTER_HS, "[%s:%d] compile error: %s",
|
|
|
|
|
__FUNCTION__, __LINE__, compile_err->message);
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
hs_free_compile_error(compile_err);
|
2023-02-15 11:53:46 +08:00
|
|
|
return -1;
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-15 11:53:46 +08:00
|
|
|
return 0;
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct adpt_hs_compile_data *adpt_hs_compile_data_new(size_t n_patterns)
|
|
|
|
|
{
|
|
|
|
|
struct adpt_hs_compile_data *hs_cd = ALLOC(struct adpt_hs_compile_data, 1);
|
|
|
|
|
hs_cd->patterns = ALLOC(char *, n_patterns);
|
|
|
|
|
hs_cd->pattern_lens = ALLOC(size_t, n_patterns);
|
2023-03-22 11:10:00 +08:00
|
|
|
hs_cd->n_patterns = n_patterns;
|
2022-11-17 05:05:35 +08:00
|
|
|
hs_cd->ids = ALLOC(unsigned int, n_patterns);
|
|
|
|
|
hs_cd->flags = ALLOC(unsigned int, n_patterns);
|
|
|
|
|
|
|
|
|
|
return hs_cd;
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
void adpt_hs_compile_data_free(struct adpt_hs_compile_data *hs_cd)
|
2022-11-17 05:05:35 +08:00
|
|
|
{
|
|
|
|
|
if (NULL == hs_cd) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (hs_cd->patterns != NULL) {
|
2023-03-22 11:10:00 +08:00
|
|
|
for (size_t i = 0; i < hs_cd->n_patterns; i++) {
|
2022-12-05 23:21:18 +08:00
|
|
|
FREE(hs_cd->patterns[i]);
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
|
2022-12-05 23:21:18 +08:00
|
|
|
FREE(hs_cd->patterns);
|
|
|
|
|
FREE(hs_cd->pattern_lens);
|
|
|
|
|
FREE(hs_cd->ids);
|
|
|
|
|
FREE(hs_cd->flags);
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
|
2022-12-05 23:21:18 +08:00
|
|
|
FREE(hs_cd);
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-03-15 11:36:54 +08:00
|
|
|
struct hs_tag *hs_tag_new(long long expr_id, size_t n_pattern)
|
|
|
|
|
{
|
|
|
|
|
struct hs_tag *tag = ALLOC(struct hs_tag, 1);
|
|
|
|
|
|
|
|
|
|
tag->key = ALLOC(char, sizeof(long long));
|
|
|
|
|
memcpy(tag->key, (char *)&expr_id, sizeof(long long));
|
|
|
|
|
tag->key_len = sizeof(long long);
|
|
|
|
|
tag->pat_attr = ALLOC(struct pattern_attribute, n_pattern);
|
|
|
|
|
tag->n_pat_attr = n_pattern;
|
|
|
|
|
|
|
|
|
|
return tag;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void hs_tag_free(struct hs_tag *tag)
|
|
|
|
|
{
|
|
|
|
|
if (NULL == tag) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (tag->key != NULL) {
|
|
|
|
|
FREE(tag->key);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (tag->pat_attr != NULL) {
|
|
|
|
|
FREE(tag->pat_attr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FREE(tag);
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
void populate_compile_data(struct adpt_hs_compile_data *compile_data, int index, int pattern_id,
|
|
|
|
|
char *pat, size_t pat_len, int case_sensitive)
|
|
|
|
|
{
|
|
|
|
|
compile_data->ids[index] = pattern_id;
|
|
|
|
|
|
|
|
|
|
/* set flags */
|
|
|
|
|
compile_data->flags[index] |= HS_FLAG_SOM_LEFTMOST;
|
|
|
|
|
if (case_sensitive == HS_CASE_INSESITIVE) {
|
|
|
|
|
compile_data->flags[index] |= HS_FLAG_CASELESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
compile_data->pattern_lens[index] = pat_len;
|
|
|
|
|
compile_data->patterns[index] = ALLOC(char, pat_len + 1);
|
|
|
|
|
memcpy(compile_data->patterns[index], pat, pat_len);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct bool_expr *bool_exprs_new(struct hs_expr *exprs, size_t n_expr, struct hs_tag **tag_hash,
|
|
|
|
|
struct adpt_hs_compile_data *literal_cd, struct adpt_hs_compile_data *regex_cd,
|
|
|
|
|
size_t *n_pattern)
|
|
|
|
|
{
|
|
|
|
|
uint32_t pattern_index = 0;
|
|
|
|
|
uint32_t literal_index = 0;
|
|
|
|
|
uint32_t regex_index = 0;
|
|
|
|
|
|
|
|
|
|
struct bool_expr *bool_exprs = ALLOC(struct bool_expr, n_expr);
|
|
|
|
|
if (NULL == bool_exprs) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* populate adpt_hs_compile_data and bool_expr */
|
|
|
|
|
for (size_t i = 0; i < n_expr; i++) {
|
|
|
|
|
struct hs_tag *hs_tag = hs_tag_new(exprs[i].expr_id, exprs[i].n_patterns);
|
|
|
|
|
hs_tag->user_tag = exprs[i].user_tag;
|
|
|
|
|
|
|
|
|
|
for (size_t j = 0; j < exprs[i].n_patterns; j++) {
|
|
|
|
|
hs_tag->pat_attr[j].pattern_id = pattern_index;
|
|
|
|
|
hs_tag->pat_attr[j].match_mode = exprs[i].patterns[j].match_mode;
|
|
|
|
|
if (exprs[i].patterns[j].match_mode == HS_MATCH_MODE_SUB) {
|
|
|
|
|
hs_tag->pat_attr[j].start_offset = exprs[i].patterns[j].start_offset;
|
|
|
|
|
hs_tag->pat_attr[j].end_offset = exprs[i].patterns[j].end_offset;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* literal pattern */
|
|
|
|
|
if (exprs[i].patterns[j].pattern_type == HS_PATTERN_TYPE_STR) {
|
|
|
|
|
populate_compile_data(literal_cd, literal_index, pattern_index,
|
|
|
|
|
exprs[i].patterns[j].pat, exprs[i].patterns[j].pat_len,
|
|
|
|
|
exprs[i].patterns[j].case_sensitive);
|
|
|
|
|
literal_index++;
|
|
|
|
|
} else {
|
|
|
|
|
/* regex pattern */
|
|
|
|
|
populate_compile_data(regex_cd, regex_index, pattern_index,
|
|
|
|
|
exprs[i].patterns[j].pat, exprs[i].patterns[j].pat_len,
|
|
|
|
|
exprs[i].patterns[j].case_sensitive);
|
|
|
|
|
regex_index++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool_exprs[i].items[j].item_id = pattern_index++;
|
|
|
|
|
bool_exprs[i].items[j].not_flag = 0;
|
|
|
|
|
// printf("item_id:%llu, pat:%s pat_len:%zu\n",
|
|
|
|
|
// bool_exprs[i].items[j].item_id, exprs[i].patterns[j].pat, exprs[i].patterns[j].pat_len);
|
|
|
|
|
}
|
|
|
|
|
//printf("expr_id:%lld item_num:%zu\n", exprs[i].expr_id, exprs[i].n_patterns);
|
|
|
|
|
bool_exprs[i].expr_id = exprs[i].expr_id;
|
|
|
|
|
bool_exprs[i].item_num = exprs[i].n_patterns;
|
|
|
|
|
bool_exprs[i].user_tag = hs_tag;
|
|
|
|
|
HASH_ADD_KEYPTR(hh, *tag_hash, hs_tag->key, hs_tag->key_len, hs_tag);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*n_pattern = pattern_index;
|
|
|
|
|
|
|
|
|
|
return bool_exprs;
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:23:21 +08:00
|
|
|
struct adapter_hs *adapter_hs_new(size_t n_worker_thread,
|
|
|
|
|
struct hs_expr *exprs, size_t n_expr,
|
|
|
|
|
struct log_handle *logger)
|
2022-11-17 05:05:35 +08:00
|
|
|
{
|
2023-03-22 11:10:00 +08:00
|
|
|
if (0 == n_worker_thread || NULL == exprs || 0 == n_expr) {
|
2023-03-02 14:52:31 +08:00
|
|
|
log_error(logger, MODULE_ADAPTER_HS, "[%s:%d] input parameters illegal!",
|
|
|
|
|
__FUNCTION__, __LINE__);
|
2022-11-17 05:05:35 +08:00
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* get the sum of pattern */
|
2023-03-22 11:10:00 +08:00
|
|
|
size_t literal_pattern_num = 0;
|
|
|
|
|
size_t regex_pattern_num = 0;
|
2023-02-15 11:53:46 +08:00
|
|
|
for (size_t i = 0; i < n_expr; i++) {
|
|
|
|
|
if (exprs[i].n_patterns > MAX_EXPR_PATTERN_NUM) {
|
2023-01-30 21:59:35 +08:00
|
|
|
log_error(logger, MODULE_ADAPTER_HS,
|
2023-03-02 14:52:31 +08:00
|
|
|
"[%s:%d] the number of patterns in one expression should less than %d",
|
|
|
|
|
__FUNCTION__, __LINE__, MAX_EXPR_PATTERN_NUM);
|
2022-11-17 05:05:35 +08:00
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-15 11:53:46 +08:00
|
|
|
for (size_t j = 0; j < exprs[i].n_patterns; j++) {
|
2023-03-22 11:10:00 +08:00
|
|
|
/* pat_len should not 0 */
|
2023-02-15 11:53:46 +08:00
|
|
|
if (0 == exprs[i].patterns[j].pat_len) {
|
2023-03-22 11:10:00 +08:00
|
|
|
log_error(logger, MODULE_ADAPTER_HS,
|
2023-03-02 14:52:31 +08:00
|
|
|
"[%s:%d] expr pattern length should not 0", __FUNCTION__, __LINE__);
|
2023-02-09 22:13:15 +08:00
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
if (exprs[i].patterns[j].pattern_type == HS_PATTERN_TYPE_STR) {
|
|
|
|
|
literal_pattern_num++;
|
|
|
|
|
} else {
|
|
|
|
|
regex_pattern_num++;
|
|
|
|
|
}
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
if (0 == literal_pattern_num && 0 == regex_pattern_num) {
|
|
|
|
|
log_error(logger, MODULE_ADAPTER_HS, "[%s:%d] exprs has no valid pattern",
|
2023-03-02 14:52:31 +08:00
|
|
|
__FUNCTION__, __LINE__);
|
2023-02-15 11:53:46 +08:00
|
|
|
return NULL;
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
struct adpt_hs_compile_data *literal_cd = NULL;
|
|
|
|
|
struct adpt_hs_compile_data *regex_cd = NULL;
|
|
|
|
|
if (literal_pattern_num > 0) {
|
|
|
|
|
literal_cd = adpt_hs_compile_data_new(literal_pattern_num);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (regex_pattern_num > 0) {
|
|
|
|
|
regex_cd = adpt_hs_compile_data_new(regex_pattern_num);
|
|
|
|
|
}
|
2023-02-15 11:53:46 +08:00
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
size_t pattern_cnt = 0;
|
2023-03-15 11:36:54 +08:00
|
|
|
struct adapter_hs *hs_instance = ALLOC(struct adapter_hs, 1);
|
|
|
|
|
hs_instance->tag_map = NULL;
|
2023-03-17 11:32:13 +08:00
|
|
|
hs_instance->logger = logger;
|
2023-02-03 17:28:14 +08:00
|
|
|
hs_instance->n_worker_thread = n_worker_thread;
|
2023-02-15 11:53:46 +08:00
|
|
|
hs_instance->n_expr = n_expr;
|
2022-11-17 05:05:35 +08:00
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
struct bool_expr *bool_exprs = bool_exprs_new(exprs, n_expr, &hs_instance->tag_map,
|
|
|
|
|
literal_cd, regex_cd, &pattern_cnt);
|
|
|
|
|
if (NULL == bool_exprs) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
hs_instance->n_patterns = pattern_cnt;
|
|
|
|
|
|
2023-02-15 11:53:46 +08:00
|
|
|
//mytest
|
2023-03-17 11:32:13 +08:00
|
|
|
// for (size_t i = 0; i < n_expr; i++) {
|
2023-03-17 17:28:52 +08:00
|
|
|
// {
|
|
|
|
|
// printf("<before bool_matcher_new> exprs[%zu] expr_id:%llu, item_num:%zu\n",
|
|
|
|
|
// i, bool_exprs[i].expr_id, bool_exprs[i].item_num);
|
|
|
|
|
// printf("item_id: ");
|
|
|
|
|
// for (size_t j = 0; j < bool_exprs[i].item_num; j++)
|
|
|
|
|
// {
|
|
|
|
|
// printf("%llu ", bool_exprs[i].items[j].item_id);
|
|
|
|
|
// }
|
|
|
|
|
// }
|
2023-03-22 11:10:00 +08:00
|
|
|
// printf("\n");
|
2023-02-15 11:53:46 +08:00
|
|
|
// }
|
2023-03-22 11:10:00 +08:00
|
|
|
|
2023-03-15 11:36:54 +08:00
|
|
|
|
2022-11-17 05:05:35 +08:00
|
|
|
/* create bool matcher */
|
2023-03-22 11:10:00 +08:00
|
|
|
size_t mem_size = 0;
|
|
|
|
|
int hs_ret = 0;
|
|
|
|
|
hs_instance->hs_rt = ALLOC(struct adapter_hs_runtime, 1);
|
2023-02-15 11:53:46 +08:00
|
|
|
hs_instance->hs_rt->bm = bool_matcher_new(bool_exprs, n_expr, &mem_size);
|
2022-11-17 05:05:35 +08:00
|
|
|
if (hs_instance->hs_rt->bm != NULL) {
|
2023-01-30 21:59:35 +08:00
|
|
|
log_info(logger, MODULE_ADAPTER_HS,
|
|
|
|
|
"Adapter_hs module: build bool matcher of %zu expressions with %zu bytes memory",
|
2023-02-15 11:53:46 +08:00
|
|
|
n_expr, mem_size);
|
2022-11-17 05:05:35 +08:00
|
|
|
} else {
|
2023-03-02 14:52:31 +08:00
|
|
|
log_error(logger, MODULE_ADAPTER_HS, "[%s:%d] Adapter_hs module: build bool matcher failed",
|
|
|
|
|
__FUNCTION__, __LINE__);
|
2023-03-22 11:10:00 +08:00
|
|
|
|
|
|
|
|
hs_ret = -1;
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
2023-02-15 11:53:46 +08:00
|
|
|
FREE(bool_exprs);
|
2022-11-17 05:05:35 +08:00
|
|
|
|
|
|
|
|
/* build hs database */
|
2023-03-22 11:10:00 +08:00
|
|
|
int ret = adpt_hs_build_database(hs_instance->hs_rt, literal_cd, regex_cd, logger);
|
2022-11-17 05:05:35 +08:00
|
|
|
if (ret < 0) {
|
2023-03-22 11:10:00 +08:00
|
|
|
hs_ret = -1;
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
if (literal_cd != NULL) {
|
|
|
|
|
adpt_hs_compile_data_free(literal_cd);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (regex_cd != NULL) {
|
|
|
|
|
adpt_hs_compile_data_free(regex_cd);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (hs_ret < 0) {
|
2022-11-17 05:05:35 +08:00
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
/* literal and regex scratch can't reuse */
|
|
|
|
|
if (literal_pattern_num > 0) {
|
|
|
|
|
ret = adpt_hs_alloc_scratch(hs_instance->hs_rt, n_worker_thread, HS_PATTERN_TYPE_STR, logger);
|
|
|
|
|
if (ret < 0) {
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (regex_pattern_num > 0) {
|
|
|
|
|
ret = adpt_hs_alloc_scratch(hs_instance->hs_rt, n_worker_thread, HS_PATTERN_TYPE_REG, logger);
|
|
|
|
|
if (ret < 0) {
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-11-17 05:05:35 +08:00
|
|
|
return hs_instance;
|
|
|
|
|
error:
|
2023-03-22 11:23:21 +08:00
|
|
|
adapter_hs_free(hs_instance);
|
2022-11-17 05:05:35 +08:00
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:23:21 +08:00
|
|
|
void adapter_hs_free(struct adapter_hs *hs_instance)
|
2022-11-17 05:05:35 +08:00
|
|
|
{
|
|
|
|
|
if (NULL == hs_instance) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (hs_instance->hs_rt != NULL) {
|
|
|
|
|
if (hs_instance->hs_rt->literal_db != NULL) {
|
|
|
|
|
hs_free_database(hs_instance->hs_rt->literal_db);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (hs_instance->hs_rt->regex_db != NULL) {
|
|
|
|
|
hs_free_database(hs_instance->hs_rt->regex_db);
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
if (hs_instance->hs_rt->literal_scratchs != NULL) {
|
|
|
|
|
for (size_t i = 0; i < hs_instance->n_worker_thread; i++) {
|
|
|
|
|
if (hs_instance->hs_rt->literal_scratchs[i] != NULL) {
|
|
|
|
|
hs_free_scratch(hs_instance->hs_rt->literal_scratchs[i]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
FREE(hs_instance->hs_rt->literal_scratchs);
|
|
|
|
|
|
|
|
|
|
if (hs_instance->hs_rt->regex_scratchs != NULL) {
|
2023-02-03 17:28:14 +08:00
|
|
|
for (size_t i = 0; i < hs_instance->n_worker_thread; i++) {
|
2023-03-22 11:10:00 +08:00
|
|
|
if (hs_instance->hs_rt->regex_scratchs[i] != NULL) {
|
|
|
|
|
hs_free_scratch(hs_instance->hs_rt->regex_scratchs[i]);
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2023-03-22 11:10:00 +08:00
|
|
|
FREE(hs_instance->hs_rt->regex_scratchs);
|
2022-11-17 05:05:35 +08:00
|
|
|
|
|
|
|
|
if (hs_instance->hs_rt->bm != NULL) {
|
|
|
|
|
bool_matcher_free(hs_instance->hs_rt->bm);
|
|
|
|
|
}
|
|
|
|
|
|
2022-12-05 23:21:18 +08:00
|
|
|
FREE(hs_instance->hs_rt);
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-03-15 11:36:54 +08:00
|
|
|
if (hs_instance->tag_map != NULL) {
|
|
|
|
|
struct hs_tag *tag = NULL, *tmp_tag = NULL;
|
|
|
|
|
HASH_ITER(hh, hs_instance->tag_map, tag, tmp_tag) {
|
|
|
|
|
HASH_DEL(hs_instance->tag_map, tag);
|
|
|
|
|
hs_tag_free(tag);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-12-05 23:21:18 +08:00
|
|
|
FREE(hs_instance);
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
int find_same_pattern_offset(struct matched_pattern *matched_pat, unsigned long long from,
|
|
|
|
|
unsigned long long to)
|
2022-11-17 05:05:35 +08:00
|
|
|
{
|
2023-03-22 11:10:00 +08:00
|
|
|
for (size_t i = 0; i < matched_pat->offset_cnt; i++) {
|
|
|
|
|
if (matched_pat->offsets[i].start_offset == from &&
|
|
|
|
|
matched_pat->offsets[i].end_offset == to - 1) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
2022-11-17 05:05:35 +08:00
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
return -1;
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @param id: pattern id
|
|
|
|
|
*/
|
|
|
|
|
int matched_event_cb(unsigned int id, unsigned long long from,
|
2023-02-09 22:13:15 +08:00
|
|
|
unsigned long long to, unsigned int flags,
|
|
|
|
|
void *ctx) {
|
2022-11-17 05:05:35 +08:00
|
|
|
// put id in set
|
2023-02-15 11:53:46 +08:00
|
|
|
struct matched_pattern_container *matched_pat_container = (struct matched_pattern_container *)ctx;
|
|
|
|
|
unsigned long long pattern_id = id;
|
2023-02-09 22:13:15 +08:00
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
struct matched_pattern *matched_pat = NULL;
|
|
|
|
|
HASH_FIND(hh, matched_pat_container->pat_hash, &pattern_id, sizeof(unsigned long long), matched_pat);
|
|
|
|
|
if (matched_pat != NULL) {
|
|
|
|
|
// same pattern_id, offset maybe different
|
|
|
|
|
int ret = find_same_pattern_offset(matched_pat, from, to);
|
|
|
|
|
if (ret < 0) { /* different offset */
|
|
|
|
|
// TODO: use realloc
|
|
|
|
|
if (matched_pat->offset_cnt >= matched_pat->offset_size) {
|
|
|
|
|
matched_pat->offset_size *= 2;
|
|
|
|
|
matched_pat->offsets = (struct matched_offset *)realloc(matched_pat->offsets,
|
|
|
|
|
matched_pat->offset_size*sizeof(struct matched_offset));
|
|
|
|
|
}
|
|
|
|
|
matched_pat->offsets[matched_pat->offset_cnt].start_offset = from;
|
|
|
|
|
matched_pat->offsets[matched_pat->offset_cnt].end_offset = to - 1;
|
|
|
|
|
matched_pat->offset_cnt++;
|
|
|
|
|
}
|
2023-03-17 17:28:52 +08:00
|
|
|
return 0;
|
2023-03-22 11:10:00 +08:00
|
|
|
} else {
|
|
|
|
|
// different pattern_id
|
|
|
|
|
struct matched_pattern *matched_pat = ALLOC(struct matched_pattern, 1);
|
|
|
|
|
matched_pat->pattern_id = pattern_id;
|
|
|
|
|
matched_pat->offsets = ALLOC(struct matched_offset, MAX_OFFSET_NUM);
|
|
|
|
|
matched_pat->offset_size = MAX_OFFSET_NUM;
|
|
|
|
|
matched_pat->offsets[matched_pat->offset_cnt].start_offset = from;
|
|
|
|
|
matched_pat->offsets[matched_pat->offset_cnt].end_offset = to - 1;
|
|
|
|
|
matched_pat->offset_cnt++;
|
2023-02-09 22:13:15 +08:00
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
HASH_ADD(hh, matched_pat_container->pat_hash, pattern_id, sizeof(unsigned long long), matched_pat);
|
|
|
|
|
}
|
2023-02-15 11:53:46 +08:00
|
|
|
|
2022-11-17 05:05:35 +08:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-15 11:53:46 +08:00
|
|
|
int is_real_matched_pattern(struct matched_pattern *matched_pat, enum hs_match_mode match_mode,
|
2023-03-22 11:10:00 +08:00
|
|
|
size_t data_len, int attr_start_offset, int attr_end_offset)
|
2023-02-15 11:53:46 +08:00
|
|
|
{
|
|
|
|
|
if (match_mode == HS_MATCH_MODE_EXACTLY) {
|
2023-03-22 11:10:00 +08:00
|
|
|
for (size_t i = 0; i < matched_pat->offset_cnt; i++) {
|
|
|
|
|
if (matched_pat->offsets[i].start_offset == 0 &&
|
|
|
|
|
matched_pat->offsets[i].end_offset == data_len - 1) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2023-02-15 11:53:46 +08:00
|
|
|
}
|
|
|
|
|
} else if (match_mode == HS_MATCH_MODE_PREFIX) {
|
2023-03-22 11:10:00 +08:00
|
|
|
for (size_t i = 0; i < matched_pat->offset_cnt; i++) {
|
|
|
|
|
if (matched_pat->offsets[i].start_offset == 0) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2023-02-15 11:53:46 +08:00
|
|
|
}
|
|
|
|
|
} else if (match_mode == HS_MATCH_MODE_SUFFIX) {
|
2023-03-22 11:10:00 +08:00
|
|
|
for (size_t i = 0; i < matched_pat->offset_cnt; i++) {
|
|
|
|
|
if (matched_pat->offsets[i].end_offset == data_len - 1) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2023-02-15 11:53:46 +08:00
|
|
|
}
|
|
|
|
|
} else if (match_mode == HS_MATCH_MODE_SUB) {
|
2023-03-22 11:10:00 +08:00
|
|
|
if (attr_start_offset == -1) {
|
|
|
|
|
attr_start_offset = 0;
|
2023-02-15 11:53:46 +08:00
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
if (attr_end_offset == -1) {
|
|
|
|
|
attr_end_offset = (int)data_len - 1;
|
2023-02-15 11:53:46 +08:00
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
for (size_t i = 0; i < matched_pat->offset_cnt; i++) {
|
|
|
|
|
if (matched_pat->offsets[i].start_offset >= (unsigned long long)attr_start_offset &&
|
|
|
|
|
matched_pat->offsets[i].end_offset <= (unsigned long long)attr_end_offset) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2023-02-15 11:53:46 +08:00
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
assert(0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-16 11:13:23 +08:00
|
|
|
int hs_tag_validate(struct hs_tag *hs_tag, struct matched_pattern_container *matched_pat_container,
|
|
|
|
|
size_t data_len)
|
|
|
|
|
{
|
|
|
|
|
/* check if real matched pattern, because pattern match_mode is different */
|
|
|
|
|
for (size_t i = 0; i < hs_tag->n_pat_attr; i++) {
|
2023-03-22 11:10:00 +08:00
|
|
|
struct matched_pattern *matched_pat = NULL;
|
2023-02-22 15:08:52 +08:00
|
|
|
unsigned long long pattern_id = hs_tag->pat_attr[i].pattern_id;
|
2023-03-22 11:10:00 +08:00
|
|
|
HASH_FIND(hh, matched_pat_container->pat_hash, &pattern_id, sizeof(unsigned long long), matched_pat);
|
|
|
|
|
if (matched_pat) {
|
|
|
|
|
int matched_ret = is_real_matched_pattern(matched_pat, hs_tag->pat_attr[i].match_mode,
|
|
|
|
|
data_len, hs_tag->pat_attr[i].start_offset,
|
|
|
|
|
hs_tag->pat_attr[i].end_offset);
|
2023-02-16 11:13:23 +08:00
|
|
|
if (matched_ret < 0) {
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2022-11-17 05:05:35 +08:00
|
|
|
struct adapter_hs_stream *adapter_hs_stream_open(struct adapter_hs *hs_instance, int thread_id)
|
|
|
|
|
{
|
2023-02-27 10:07:37 +08:00
|
|
|
if (NULL == hs_instance || thread_id < 0) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2022-11-17 05:05:35 +08:00
|
|
|
struct adapter_hs_stream *hs_stream = ALLOC(struct adapter_hs_stream, 1);
|
|
|
|
|
hs_error_t err;
|
|
|
|
|
|
|
|
|
|
hs_stream->thread_id = thread_id;
|
|
|
|
|
hs_stream->n_expr = hs_instance->n_expr;
|
|
|
|
|
hs_stream->n_patterns = hs_instance->n_patterns;
|
|
|
|
|
hs_stream->hs_rt = hs_instance->hs_rt;
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
int err_count = 0;
|
2022-11-17 05:05:35 +08:00
|
|
|
if (hs_instance->hs_rt->literal_db != NULL) {
|
|
|
|
|
err = hs_open_stream(hs_instance->hs_rt->literal_db, 0, &hs_stream->literal_stream);
|
|
|
|
|
if (err != HS_SUCCESS) {
|
2023-03-17 11:32:13 +08:00
|
|
|
log_error(hs_instance->logger, MODULE_ADAPTER_HS, "hs_open_stream failed, hs err:%d", err);
|
2023-03-22 11:10:00 +08:00
|
|
|
err_count++;
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (hs_instance->hs_rt->regex_db != NULL) {
|
|
|
|
|
err = hs_open_stream(hs_instance->hs_rt->regex_db, 0, &hs_stream->regex_stream);
|
|
|
|
|
if (err != HS_SUCCESS) {
|
2023-03-17 11:32:13 +08:00
|
|
|
log_error(hs_instance->logger, MODULE_ADAPTER_HS, "hs_open_stream failed, hs err:%d", err);
|
2023-03-22 11:10:00 +08:00
|
|
|
err_count++;
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
if (err_count > 0) {
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
2022-11-17 05:05:35 +08:00
|
|
|
return hs_stream;
|
2023-03-22 11:10:00 +08:00
|
|
|
error:
|
|
|
|
|
//TODO: hs_stream->hs_rt->scratchs[thread_id] may be free twice
|
|
|
|
|
if (hs_stream->literal_stream != NULL) {
|
|
|
|
|
hs_close_stream(hs_stream->literal_stream, NULL, NULL, NULL);
|
|
|
|
|
hs_stream->literal_stream = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (hs_stream->regex_stream != NULL) {
|
|
|
|
|
hs_close_stream(hs_stream->regex_stream, NULL, NULL, NULL);
|
|
|
|
|
hs_stream->regex_stream = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FREE(hs_stream);
|
|
|
|
|
return NULL;
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-03-17 17:28:52 +08:00
|
|
|
void adapter_hs_stream_close(struct adapter_hs_stream *hs_stream)
|
|
|
|
|
{
|
|
|
|
|
if (NULL == hs_stream) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (hs_stream->hs_rt != NULL) {
|
|
|
|
|
if (hs_stream->literal_stream != NULL) {
|
2023-03-22 11:10:00 +08:00
|
|
|
hs_close_stream(hs_stream->literal_stream, NULL, NULL, NULL);
|
2023-03-17 17:28:52 +08:00
|
|
|
hs_stream->literal_stream = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (hs_stream->regex_stream != NULL) {
|
2023-03-22 11:10:00 +08:00
|
|
|
hs_close_stream(hs_stream->regex_stream, NULL, NULL, NULL);
|
2023-03-17 17:28:52 +08:00
|
|
|
hs_stream->regex_stream = NULL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (hs_stream->matched_pat_container.pat_hash != NULL) {
|
|
|
|
|
struct matched_pattern *pattern = NULL, *tmp_pattern = NULL;
|
|
|
|
|
HASH_ITER(hh, hs_stream->matched_pat_container.pat_hash, pattern, tmp_pattern) {
|
|
|
|
|
HASH_DELETE(hh, hs_stream->matched_pat_container.pat_hash, pattern);
|
|
|
|
|
FREE(pattern);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* hs_stream->hs_rt point to hs_instance->hs_rt which will call free */
|
|
|
|
|
hs_stream->hs_rt = NULL;
|
|
|
|
|
FREE(hs_stream);
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
static int cmp_ull_p(const void *p1, const void *p2)
|
|
|
|
|
{
|
|
|
|
|
if(* (unsigned long long*) p1 > * (unsigned long long*) p2) {
|
|
|
|
|
return 1;
|
|
|
|
|
} else if(* (unsigned long long*) p1 < * (unsigned long long*) p2) {
|
|
|
|
|
return -1;
|
|
|
|
|
} else {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-07 11:25:31 +08:00
|
|
|
int adapter_hs_scan_stream(struct adapter_hs_stream *hs_stream, const char *data, size_t data_len,
|
|
|
|
|
struct hs_scan_result *results, size_t n_result, size_t *n_hit_result)
|
2022-11-17 05:05:35 +08:00
|
|
|
{
|
|
|
|
|
hs_error_t err;
|
|
|
|
|
|
2023-02-27 10:07:37 +08:00
|
|
|
if (NULL == hs_stream || NULL == data || 0 == data_len ||
|
|
|
|
|
NULL == results || 0 == n_result || NULL == n_hit_result) {
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-17 11:32:13 +08:00
|
|
|
/*
|
|
|
|
|
In streaming mode, a non-zero return from the user-specified event-handler
|
|
|
|
|
function has consequences for the rest of that stream's lifetime: when a
|
|
|
|
|
non-zero return occurs, it signals that no more of the stream should be
|
|
|
|
|
scanned. Consequently if the user makes a subsequent call to
|
|
|
|
|
`hs_scan_stream` on a stream whose processing was terminated in this way,
|
|
|
|
|
hs_scan_stream will return `HS_SCAN_TERMINATED`. This case has not been
|
|
|
|
|
demonstrated in pcapscan, as its callback always returns 0.
|
|
|
|
|
*/
|
|
|
|
|
|
2023-03-16 09:55:35 +08:00
|
|
|
int err_count = 0;
|
2022-11-17 05:05:35 +08:00
|
|
|
int thread_id = hs_stream->thread_id;
|
|
|
|
|
if (hs_stream->literal_stream != NULL) {
|
2023-02-03 17:28:14 +08:00
|
|
|
err = hs_scan_stream(hs_stream->literal_stream, data, data_len,
|
2023-03-22 11:10:00 +08:00
|
|
|
0, hs_stream->hs_rt->literal_scratchs[thread_id],
|
2023-03-17 11:32:13 +08:00
|
|
|
matched_event_cb, &hs_stream->matched_pat_container);
|
2023-03-17 17:28:52 +08:00
|
|
|
if (err != HS_SUCCESS) {
|
2023-03-16 09:55:35 +08:00
|
|
|
err_count++;
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (hs_stream->regex_stream != NULL) {
|
2023-02-03 17:28:14 +08:00
|
|
|
err = hs_scan_stream(hs_stream->regex_stream, data, data_len,
|
2023-03-22 11:10:00 +08:00
|
|
|
0, hs_stream->hs_rt->regex_scratchs[thread_id],
|
2023-03-17 11:32:13 +08:00
|
|
|
matched_event_cb, &hs_stream->matched_pat_container);
|
2023-03-17 17:28:52 +08:00
|
|
|
if (err != HS_SUCCESS) {
|
2023-03-16 09:55:35 +08:00
|
|
|
err_count++;
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
if (err_count == 2) {
|
2023-03-16 09:55:35 +08:00
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
size_t n_item = HASH_COUNT(hs_stream->matched_pat_container.pat_hash);
|
|
|
|
|
if (0 == n_item) {
|
2023-03-17 11:32:13 +08:00
|
|
|
*n_hit_result = 0;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-23 11:57:17 +08:00
|
|
|
if (n_item > MAX_SCANNER_HIT_PATTERN_NUM) {
|
|
|
|
|
n_item = MAX_SCANNER_HIT_PATTERN_NUM;
|
2023-03-22 11:10:00 +08:00
|
|
|
}
|
|
|
|
|
|
2023-03-23 11:57:17 +08:00
|
|
|
unsigned long long item_ids[MAX_SCANNER_HIT_PATTERN_NUM];
|
|
|
|
|
memset(item_ids, 0, sizeof(unsigned long long) * MAX_SCANNER_HIT_PATTERN_NUM);
|
2023-03-22 11:10:00 +08:00
|
|
|
|
|
|
|
|
int i = 0;
|
|
|
|
|
struct matched_pattern *pat = NULL, *tmp_pat = NULL;
|
|
|
|
|
HASH_ITER(hh, hs_stream->matched_pat_container.pat_hash, pat, tmp_pat) {
|
2023-03-23 11:57:17 +08:00
|
|
|
if (i >= MAX_SCANNER_HIT_PATTERN_NUM) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
2023-03-22 11:10:00 +08:00
|
|
|
item_ids[i++] = pat->pattern_id;
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
2023-03-22 11:10:00 +08:00
|
|
|
qsort(item_ids, n_item, sizeof(unsigned long long), cmp_ull_p);
|
2022-11-17 05:05:35 +08:00
|
|
|
|
2023-02-07 11:25:31 +08:00
|
|
|
int ret = 0;
|
2023-03-17 17:28:52 +08:00
|
|
|
int real_matched_index = 0;
|
|
|
|
|
struct hs_tag *hs_tag = NULL;
|
|
|
|
|
struct bool_expr_match *bool_matcher_results = ALLOC(struct bool_expr_match, hs_stream->n_expr);
|
2023-03-22 11:10:00 +08:00
|
|
|
int bool_matcher_ret = bool_matcher_match(hs_stream->hs_rt->bm, item_ids, n_item,
|
2023-03-15 11:36:54 +08:00
|
|
|
bool_matcher_results, hs_stream->n_expr);
|
2023-02-07 11:25:31 +08:00
|
|
|
if (bool_matcher_ret < 0) {
|
|
|
|
|
ret = -1;
|
|
|
|
|
goto next;
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-02-09 22:13:15 +08:00
|
|
|
if (bool_matcher_ret > (int)n_result) {
|
2023-02-07 11:25:31 +08:00
|
|
|
bool_matcher_ret = n_result;
|
|
|
|
|
}
|
2022-11-17 05:05:35 +08:00
|
|
|
|
2023-03-17 17:28:52 +08:00
|
|
|
for (int index = 0; index < bool_matcher_ret; index++) {
|
|
|
|
|
hs_tag = (struct hs_tag *)bool_matcher_results[index].user_tag;
|
|
|
|
|
|
|
|
|
|
int tag_ret = hs_tag_validate(hs_tag, &hs_stream->matched_pat_container, data_len);
|
|
|
|
|
if (tag_ret < 0) {
|
|
|
|
|
//bool_matcher_results[index] is invalid hit, continue
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
results[real_matched_index].item_id = bool_matcher_results[index].expr_id;
|
|
|
|
|
results[real_matched_index].user_tag = hs_tag->user_tag;
|
|
|
|
|
real_matched_index++;
|
2023-02-07 11:25:31 +08:00
|
|
|
}
|
2023-03-17 17:28:52 +08:00
|
|
|
*n_hit_result = real_matched_index;
|
2023-02-07 11:25:31 +08:00
|
|
|
next:
|
|
|
|
|
FREE(bool_matcher_results);
|
2023-03-17 11:32:13 +08:00
|
|
|
|
|
|
|
|
struct matched_pattern *pattern = NULL, *tmp_pattern = NULL;
|
|
|
|
|
HASH_ITER(hh, hs_stream->matched_pat_container.pat_hash, pattern, tmp_pattern) {
|
|
|
|
|
HASH_DELETE(hh, hs_stream->matched_pat_container.pat_hash, pattern);
|
|
|
|
|
FREE(pattern);
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-07 11:25:31 +08:00
|
|
|
return ret;
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-03-17 17:28:52 +08:00
|
|
|
int adapter_hs_scan(struct adapter_hs *hs_instance, int thread_id,
|
|
|
|
|
const char *data, size_t data_len,
|
|
|
|
|
struct hs_scan_result *results,
|
|
|
|
|
size_t n_result, size_t *n_hit_result)
|
2022-11-17 05:05:35 +08:00
|
|
|
{
|
2023-03-17 17:28:52 +08:00
|
|
|
if (NULL == hs_instance || NULL == data || (0 == data_len) ||
|
|
|
|
|
NULL == results || 0 == n_result || NULL == n_hit_result) {
|
|
|
|
|
return -1;
|
2023-02-03 17:28:14 +08:00
|
|
|
}
|
|
|
|
|
|
2023-03-17 17:28:52 +08:00
|
|
|
struct adapter_hs_stream *hs_stream = adapter_hs_stream_open(hs_instance, thread_id);
|
|
|
|
|
int ret = adapter_hs_scan_stream(hs_stream, data, data_len, results, n_result, n_hit_result);
|
|
|
|
|
adapter_hs_stream_close(hs_stream);
|
2022-11-17 05:05:35 +08:00
|
|
|
|
2023-03-17 17:28:52 +08:00
|
|
|
return ret;
|
2022-11-17 05:05:35 +08:00
|
|
|
}
|