2023-01-30 21:59:35 +08:00
|
|
|
/*
|
|
|
|
|
**********************************************************************************************
|
2023-05-04 17:10:19 +08:00
|
|
|
* File: maat_expr.c
|
2023-01-30 21:59:35 +08:00
|
|
|
* Description:
|
|
|
|
|
* Authors: Liu WenTan <liuwentan@geedgenetworks.com>
|
|
|
|
|
* Date: 2022-10-31
|
2023-05-04 17:10:19 +08:00
|
|
|
* Copyright: (c) Since 2022 Geedge Networks, Ltd. All rights reserved.
|
2023-01-30 21:59:35 +08:00
|
|
|
***********************************************************************************************
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
|
#include <assert.h>
|
2023-02-09 22:13:15 +08:00
|
|
|
#include <ctype.h>
|
2023-01-30 21:59:35 +08:00
|
|
|
|
|
|
|
|
#include "maat_expr.h"
|
|
|
|
|
#include "adapter_hs.h"
|
|
|
|
|
#include "maat_utils.h"
|
|
|
|
|
#include "maat_kv.h"
|
|
|
|
|
#include "maat_limits.h"
|
|
|
|
|
#include "rcu_hash.h"
|
2023-11-24 11:05:52 +08:00
|
|
|
#include "maat.h"
|
2024-08-21 08:39:28 +00:00
|
|
|
#include "maat_core.h"
|
2023-01-31 20:39:53 +08:00
|
|
|
#include "maat_compile.h"
|
2023-02-03 17:28:14 +08:00
|
|
|
#include "maat_group.h"
|
|
|
|
|
#include "alignment.h"
|
2023-01-30 21:59:35 +08:00
|
|
|
#include "maat_garbage_collection.h"
|
|
|
|
|
|
|
|
|
|
#define MODULE_EXPR module_name_str("maat.expr")
|
|
|
|
|
|
2023-11-24 11:05:52 +08:00
|
|
|
/*
|
|
|
|
|
If expr_engine_type == MAAT_EXPR_ENGINE_AUTO, and the pattern number less than 50K,
|
|
|
|
|
expr_engine_type = MAAT_EXPR_ENGINE_HS; Otherwise expr_engine_type = MAAT_EXPR_ENGINE_RS
|
|
|
|
|
*/
|
|
|
|
|
#define ENGINE_TYPE_SWITCH_THRESHOLD 50000
|
|
|
|
|
|
2023-01-30 21:59:35 +08:00
|
|
|
struct expr_schema {
|
|
|
|
|
int item_id_column;
|
|
|
|
|
int group_id_column;
|
|
|
|
|
int district_column;
|
|
|
|
|
int keywords_column;
|
2024-08-19 11:04:17 +00:00
|
|
|
int expr_type_column;
|
2023-11-20 18:50:11 +08:00
|
|
|
int table_id;
|
2023-11-24 11:05:52 +08:00
|
|
|
enum maat_expr_engine engine_type;
|
2023-02-03 17:28:14 +08:00
|
|
|
struct table_manager *ref_tbl_mgr;
|
2023-01-30 21:59:35 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
enum expr_type {
|
2023-02-16 11:13:23 +08:00
|
|
|
EXPR_TYPE_INVALID = -1,
|
2023-01-30 21:59:35 +08:00
|
|
|
EXPR_TYPE_STRING = 0,
|
|
|
|
|
EXPR_TYPE_AND,
|
|
|
|
|
EXPR_TYPE_REGEX,
|
2023-02-09 22:13:15 +08:00
|
|
|
EXPR_TYPE_OFFSET,
|
2023-01-30 21:59:35 +08:00
|
|
|
EXPR_TYPE_MAX
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
enum match_method {
|
|
|
|
|
MATCH_METHOD_SUB = 0,
|
|
|
|
|
MATCH_METHOD_RIGHT,
|
|
|
|
|
MATCH_METHOD_LEFT,
|
|
|
|
|
MATCH_METHOD_COMPLETE,
|
|
|
|
|
MATCH_METHOD_MAX
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct expr_item {
|
2023-02-22 15:22:41 +08:00
|
|
|
long long item_id;
|
|
|
|
|
long long group_id;
|
2023-08-11 17:06:22 +08:00
|
|
|
char keywords[MAX_KEYWORDS_STR_LEN + 1];
|
2023-01-30 21:59:35 +08:00
|
|
|
enum expr_type expr_type;
|
2023-05-07 23:09:33 +08:00
|
|
|
void *user_data;
|
2023-06-09 16:44:47 +08:00
|
|
|
int district_id;
|
2023-01-30 21:59:35 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct expr_runtime {
|
2023-08-10 16:10:50 +08:00
|
|
|
struct expr_matcher *matcher;
|
2023-06-12 18:22:01 +08:00
|
|
|
struct rcu_hash_table *item_hash; // <item_id, struct expr_item>
|
2023-01-30 21:59:35 +08:00
|
|
|
|
2023-04-13 14:56:35 +08:00
|
|
|
long long version; //expr_rt version
|
2023-04-12 19:20:05 +08:00
|
|
|
long long rule_num;
|
2023-05-07 23:09:33 +08:00
|
|
|
long long regex_rule_num;
|
2023-04-20 15:34:56 +08:00
|
|
|
size_t n_worker_thread;
|
2023-05-07 23:09:33 +08:00
|
|
|
|
2023-01-30 21:59:35 +08:00
|
|
|
struct log_handle *logger;
|
2023-05-07 23:09:33 +08:00
|
|
|
struct maat_garbage_bin *ref_garbage_bin;
|
|
|
|
|
|
2023-11-24 11:05:52 +08:00
|
|
|
enum expr_engine_type engine_type;
|
2023-04-04 15:59:34 +08:00
|
|
|
int district_num;
|
|
|
|
|
struct maat_kv_store *district_map;
|
|
|
|
|
struct maat_kv_store *tmp_district_map;
|
2023-01-30 21:59:35 +08:00
|
|
|
|
2023-12-27 12:04:15 +08:00
|
|
|
long long *scan_times;
|
2023-04-20 15:34:56 +08:00
|
|
|
long long *scan_cpu_time;
|
|
|
|
|
long long *scan_bytes;
|
2023-12-27 12:04:15 +08:00
|
|
|
|
|
|
|
|
long long *hit_times;
|
|
|
|
|
long long *hit_item_num;
|
|
|
|
|
long long *hit_pattern_num;
|
|
|
|
|
|
|
|
|
|
long long update_err_cnt;
|
2023-01-30 21:59:35 +08:00
|
|
|
};
|
|
|
|
|
|
2023-12-27 12:04:15 +08:00
|
|
|
struct expr_runtime_stream {
|
|
|
|
|
struct expr_runtime *ref_expr_rt;
|
|
|
|
|
struct expr_matcher_stream *handle;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static enum expr_type int_to_expr_type(int expr_type) {
|
2023-02-16 11:13:23 +08:00
|
|
|
enum expr_type type = EXPR_TYPE_INVALID;
|
2023-01-30 21:59:35 +08:00
|
|
|
|
|
|
|
|
switch (expr_type) {
|
|
|
|
|
case 0:
|
|
|
|
|
case 1:
|
2024-08-19 11:04:17 +00:00
|
|
|
case 3:
|
2023-01-30 21:59:35 +08:00
|
|
|
type = EXPR_TYPE_AND;
|
|
|
|
|
break;
|
|
|
|
|
case 2:
|
|
|
|
|
type = EXPR_TYPE_REGEX;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return type;
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-20 07:00:49 +00:00
|
|
|
static int expr_runtime_get_district_id(struct expr_runtime *expr_rt,
|
|
|
|
|
const char *district)
|
2023-04-04 15:59:34 +08:00
|
|
|
{
|
|
|
|
|
long long district_id = DISTRICT_ANY;
|
|
|
|
|
|
2023-10-30 08:00:49 +00:00
|
|
|
int map_ret = maat_kv_read(expr_rt->district_map, district, &district_id, 1);
|
2023-04-04 15:59:34 +08:00
|
|
|
if (map_ret < 0) {
|
|
|
|
|
if (NULL == expr_rt->tmp_district_map) {
|
|
|
|
|
expr_rt->tmp_district_map = maat_kv_store_duplicate(expr_rt->district_map);
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-30 08:00:49 +00:00
|
|
|
map_ret = maat_kv_read(expr_rt->tmp_district_map, district, &district_id, 1);
|
2023-04-04 15:59:34 +08:00
|
|
|
if (map_ret < 0) {
|
|
|
|
|
district_id = expr_rt->district_num;
|
|
|
|
|
maat_kv_register(expr_rt->tmp_district_map, district, district_id);
|
|
|
|
|
expr_rt->district_num++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-09 16:44:47 +08:00
|
|
|
return (int)district_id;
|
2023-04-04 15:59:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int expr_runtime_set_scan_district(struct expr_runtime *expr_rt, const char *district,
|
|
|
|
|
size_t district_len, long long *district_id)
|
|
|
|
|
{
|
|
|
|
|
if (NULL == expr_rt || NULL == district || 0 == district_len) {
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-30 08:00:49 +00:00
|
|
|
return maat_kv_read_unNull(expr_rt->district_map, district, district_len,
|
|
|
|
|
district_id, 1);
|
2023-04-04 15:59:34 +08:00
|
|
|
}
|
|
|
|
|
|
2023-06-20 07:00:49 +00:00
|
|
|
static struct expr_item *
|
|
|
|
|
expr_item_new(struct expr_schema *expr_schema, const char *table_name,
|
|
|
|
|
const char *line, struct expr_runtime *expr_rt)
|
2023-01-30 21:59:35 +08:00
|
|
|
{
|
|
|
|
|
size_t column_offset = 0;
|
|
|
|
|
size_t column_len = 0;
|
|
|
|
|
int expr_type = -1;
|
2023-02-03 17:28:14 +08:00
|
|
|
enum table_type table_type = TABLE_TYPE_INVALID;
|
2023-01-30 21:59:35 +08:00
|
|
|
struct expr_item *expr_item = ALLOC(struct expr_item, 1);
|
|
|
|
|
|
2023-05-30 16:16:18 +08:00
|
|
|
int ret = get_column_pos(line, expr_schema->item_id_column, &column_offset,
|
|
|
|
|
&column_len);
|
2023-01-30 21:59:35 +08:00
|
|
|
if (ret < 0) {
|
2023-11-10 08:26:48 +00:00
|
|
|
log_fatal(expr_rt->logger, MODULE_EXPR,
|
2023-05-30 16:16:18 +08:00
|
|
|
"[%s:%d] expr table:<%s> has no item_id in line:%s",
|
|
|
|
|
__FUNCTION__, __LINE__, table_name, line);
|
2023-01-30 21:59:35 +08:00
|
|
|
goto error;
|
|
|
|
|
}
|
2023-02-22 15:08:52 +08:00
|
|
|
expr_item->item_id = atoll(line + column_offset);
|
2023-01-30 21:59:35 +08:00
|
|
|
|
2023-05-30 16:16:18 +08:00
|
|
|
ret = get_column_pos(line, expr_schema->group_id_column, &column_offset,
|
|
|
|
|
&column_len);
|
2023-01-30 21:59:35 +08:00
|
|
|
if (ret < 0) {
|
2023-11-10 08:26:48 +00:00
|
|
|
log_fatal(expr_rt->logger, MODULE_EXPR,
|
2023-05-30 16:16:18 +08:00
|
|
|
"[%s:%d] expr table:<%s> has no group_id in line:%s",
|
|
|
|
|
__FUNCTION__, __LINE__, table_name, line);
|
2023-01-30 21:59:35 +08:00
|
|
|
goto error;
|
|
|
|
|
}
|
2023-02-22 15:08:52 +08:00
|
|
|
expr_item->group_id = atoll(line + column_offset);
|
2023-01-30 21:59:35 +08:00
|
|
|
|
2023-05-09 17:45:43 +08:00
|
|
|
ret = get_column_pos(line, expr_schema->keywords_column, &column_offset, &column_len);
|
|
|
|
|
if (ret < 0) {
|
2023-11-10 08:26:48 +00:00
|
|
|
log_fatal(expr_rt->logger, MODULE_EXPR,
|
2023-05-30 16:16:18 +08:00
|
|
|
"[%s:%d] expr table:<%s> has no keywords in line:%s",
|
|
|
|
|
__FUNCTION__, __LINE__, table_name, line);
|
2023-05-09 17:45:43 +08:00
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-11 17:06:22 +08:00
|
|
|
if (column_len > MAX_KEYWORDS_STR_LEN) {
|
2023-11-10 08:26:48 +00:00
|
|
|
log_fatal(expr_rt->logger, MODULE_EXPR,
|
2023-05-30 16:16:18 +08:00
|
|
|
"[%s:%d] expr table:<%s> keywords length too long in line:%s",
|
|
|
|
|
__FUNCTION__, __LINE__, table_name, line);
|
2023-05-09 17:45:43 +08:00
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
memcpy(expr_item->keywords, (line + column_offset), column_len);
|
|
|
|
|
|
|
|
|
|
ret = get_column_pos(line, expr_schema->expr_type_column, &column_offset, &column_len);
|
|
|
|
|
if (ret < 0) {
|
2024-08-19 11:04:17 +00:00
|
|
|
log_fatal(expr_rt->logger, MODULE_EXPR,
|
2023-05-30 16:16:18 +08:00
|
|
|
"[%s:%d] expr table:<%s> has no expr_type in line:%s",
|
|
|
|
|
__FUNCTION__, __LINE__, table_name, line);
|
2023-05-09 17:45:43 +08:00
|
|
|
goto error;
|
|
|
|
|
}
|
2024-08-19 11:04:17 +00:00
|
|
|
|
2023-05-09 17:45:43 +08:00
|
|
|
expr_type = atoi(line + column_offset);
|
|
|
|
|
expr_item->expr_type = int_to_expr_type(expr_type);
|
|
|
|
|
if (expr_item->expr_type == EXPR_TYPE_INVALID) {
|
2024-08-19 11:04:17 +00:00
|
|
|
log_fatal(expr_rt->logger, MODULE_EXPR,
|
2023-05-30 16:16:18 +08:00
|
|
|
"[%s:%d] expr table:<%s> has invalid expr_type in line:%s",
|
|
|
|
|
__FUNCTION__, __LINE__, table_name, line);
|
2023-05-09 17:45:43 +08:00
|
|
|
goto error;
|
|
|
|
|
} else if (expr_item->expr_type == EXPR_TYPE_REGEX) {
|
2023-08-10 16:10:50 +08:00
|
|
|
ret = expr_matcher_verify_regex_expression(expr_item->keywords, expr_rt->logger);
|
|
|
|
|
if (0 == ret) {
|
2024-08-19 11:04:17 +00:00
|
|
|
log_fatal(expr_rt->logger, MODULE_EXPR,
|
2023-05-30 16:16:18 +08:00
|
|
|
"[%s:%d] expr table:<%s> regex expression(item_id:%lld):%s illegal,"
|
|
|
|
|
" will be dropped", __FUNCTION__, __LINE__, table_name,
|
|
|
|
|
expr_item->item_id, expr_item->keywords);
|
2023-05-09 17:45:43 +08:00
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-07 11:25:31 +08:00
|
|
|
table_type = table_manager_get_table_type(expr_schema->ref_tbl_mgr, expr_schema->table_id);
|
2023-02-03 17:28:14 +08:00
|
|
|
if (table_type == TABLE_TYPE_EXPR_PLUS) {
|
2023-01-30 21:59:35 +08:00
|
|
|
ret = get_column_pos(line, expr_schema->district_column, &column_offset, &column_len);
|
|
|
|
|
if (ret < 0) {
|
2023-02-03 17:28:14 +08:00
|
|
|
goto error;
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-08-11 17:06:22 +08:00
|
|
|
if (column_len > MAX_DISTRICT_STR_LEN) {
|
2023-11-10 08:26:48 +00:00
|
|
|
log_fatal(expr_rt->logger, MODULE_EXPR,
|
2023-08-11 17:06:22 +08:00
|
|
|
"[%s:%d] expr table:<%s> district length exceed maximum:%d"
|
|
|
|
|
" in line:%s", __FUNCTION__, __LINE__, table_name,
|
|
|
|
|
MAX_DISTRICT_STR_LEN, line);
|
2023-02-03 17:28:14 +08:00
|
|
|
goto error;
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
2023-02-03 17:28:14 +08:00
|
|
|
|
2023-08-11 17:06:22 +08:00
|
|
|
char district[MAX_DISTRICT_STR_LEN + 1] = {0};
|
2023-02-03 17:28:14 +08:00
|
|
|
memcpy(district, (line + column_offset), column_len);
|
|
|
|
|
assert(strlen(district) > 0);
|
|
|
|
|
str_unescape(district);
|
2023-04-04 15:59:34 +08:00
|
|
|
expr_item->district_id = expr_runtime_get_district_id(expr_rt, district);
|
2023-02-23 19:08:26 +08:00
|
|
|
} else {
|
|
|
|
|
expr_item->district_id = DISTRICT_ANY;
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return expr_item;
|
|
|
|
|
error:
|
|
|
|
|
FREE(expr_item);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-03 17:28:14 +08:00
|
|
|
void *expr_schema_new(cJSON *json, struct table_manager *tbl_mgr,
|
|
|
|
|
const char *table_name, struct log_handle *logger)
|
2023-01-30 21:59:35 +08:00
|
|
|
{
|
2023-03-22 11:10:00 +08:00
|
|
|
char table_type[NAME_MAX] = {0};
|
2023-01-30 21:59:35 +08:00
|
|
|
struct expr_schema *expr_schema = ALLOC(struct expr_schema, 1);
|
2023-11-24 11:05:52 +08:00
|
|
|
expr_schema->engine_type = MAAT_EXPR_ENGINE_AUTO;
|
2023-01-30 21:59:35 +08:00
|
|
|
|
|
|
|
|
cJSON *custom_item = NULL;
|
2023-01-31 20:39:53 +08:00
|
|
|
cJSON *item = cJSON_GetObjectItem(json, "table_id");
|
|
|
|
|
if (item != NULL && item->type == cJSON_Number) {
|
|
|
|
|
expr_schema->table_id = item->valueint;
|
2023-03-22 11:10:00 +08:00
|
|
|
} else {
|
2023-11-10 08:26:48 +00:00
|
|
|
log_fatal(logger, MODULE_EXPR,
|
2023-05-30 16:16:18 +08:00
|
|
|
"[%s:%d] expr table:<%s> schema has no table_id column",
|
2023-03-27 15:52:47 +08:00
|
|
|
__FUNCTION__, __LINE__, table_name);
|
2023-03-22 11:10:00 +08:00
|
|
|
goto error;
|
2023-01-31 20:39:53 +08:00
|
|
|
}
|
|
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
/* table_type already validate in maat_table_new() */
|
|
|
|
|
item = cJSON_GetObjectItem(json, "table_type");
|
|
|
|
|
memcpy(table_type, item->valuestring, strlen(item->valuestring));
|
|
|
|
|
|
2023-11-20 18:50:11 +08:00
|
|
|
item = cJSON_GetObjectItem(json, "expr_engine");
|
|
|
|
|
if (item != NULL && item->type == cJSON_String) {
|
|
|
|
|
if (strcmp(item->valuestring, "hyperscan") == 0) {
|
2023-11-24 11:05:52 +08:00
|
|
|
expr_schema->engine_type = MAAT_EXPR_ENGINE_HS;
|
2023-11-20 18:50:11 +08:00
|
|
|
} else if (strcmp(item->valuestring, "rulescan") == 0) {
|
2023-11-24 11:05:52 +08:00
|
|
|
expr_schema->engine_type = MAAT_EXPR_ENGINE_RS;
|
2023-11-20 18:50:11 +08:00
|
|
|
} else {
|
|
|
|
|
log_fatal(logger, MODULE_EXPR,
|
|
|
|
|
"[%s:%d] expr table:<%s> schema has invalid expr_engine",
|
|
|
|
|
__FUNCTION__, __LINE__, table_name);
|
|
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-31 20:39:53 +08:00
|
|
|
item = cJSON_GetObjectItem(json, "custom");
|
2023-01-30 21:59:35 +08:00
|
|
|
if (item == NULL || item->type != cJSON_Object) {
|
2023-11-10 08:26:48 +00:00
|
|
|
log_fatal(logger, MODULE_EXPR,
|
2023-05-30 16:16:18 +08:00
|
|
|
"[%s:%d] expr table:<%s> schema has no custom column",
|
2023-03-27 15:52:47 +08:00
|
|
|
__FUNCTION__, __LINE__, table_name);
|
2023-01-30 21:59:35 +08:00
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
custom_item = cJSON_GetObjectItem(item, "item_id");
|
|
|
|
|
if (custom_item != NULL && custom_item->type == cJSON_Number) {
|
|
|
|
|
expr_schema->item_id_column = custom_item->valueint;
|
2023-03-22 11:10:00 +08:00
|
|
|
} else {
|
2023-11-10 08:26:48 +00:00
|
|
|
log_fatal(logger, MODULE_EXPR,
|
2023-05-30 16:16:18 +08:00
|
|
|
"[%s:%d] expr table:<%s> schema has no item_id column",
|
2023-03-27 15:52:47 +08:00
|
|
|
__FUNCTION__, __LINE__, table_name);
|
2023-03-22 11:10:00 +08:00
|
|
|
goto error;
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
custom_item = cJSON_GetObjectItem(item, "group_id");
|
|
|
|
|
if (custom_item != NULL && custom_item->type == cJSON_Number) {
|
|
|
|
|
expr_schema->group_id_column = custom_item->valueint;
|
2023-03-22 11:10:00 +08:00
|
|
|
} else {
|
2023-11-10 08:26:48 +00:00
|
|
|
log_fatal(logger, MODULE_EXPR,
|
2023-05-30 16:16:18 +08:00
|
|
|
"[%s:%d] expr table:<%s> schema has no group_id column",
|
2023-03-27 15:52:47 +08:00
|
|
|
__FUNCTION__, __LINE__, table_name);
|
2023-03-22 11:10:00 +08:00
|
|
|
goto error;
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
custom_item = cJSON_GetObjectItem(item, "keywords");
|
|
|
|
|
if (custom_item != NULL && custom_item->type == cJSON_Number) {
|
|
|
|
|
expr_schema->keywords_column = custom_item->valueint;
|
2023-03-22 11:10:00 +08:00
|
|
|
} else {
|
2023-11-10 08:26:48 +00:00
|
|
|
log_fatal(logger, MODULE_EXPR,
|
2023-05-30 16:16:18 +08:00
|
|
|
"[%s:%d] expr table:<%s> schema has no keywords column",
|
2023-03-27 15:52:47 +08:00
|
|
|
__FUNCTION__, __LINE__, table_name);
|
2023-03-22 11:10:00 +08:00
|
|
|
goto error;
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-02-15 11:53:46 +08:00
|
|
|
/* expr_plus has district */
|
2023-03-22 11:10:00 +08:00
|
|
|
if (strcmp(table_type, "expr_plus") == 0) {
|
|
|
|
|
custom_item = cJSON_GetObjectItem(item, "district");
|
|
|
|
|
if (custom_item != NULL && custom_item->type == cJSON_Number) {
|
|
|
|
|
expr_schema->district_column = custom_item->valueint;
|
|
|
|
|
} else {
|
2023-11-10 08:26:48 +00:00
|
|
|
log_fatal(logger, MODULE_EXPR,
|
2023-05-30 16:16:18 +08:00
|
|
|
"[%s:%d] expr_plus table:<%s> schema has no district column",
|
2023-03-27 15:52:47 +08:00
|
|
|
__FUNCTION__, __LINE__, table_name);
|
2023-03-22 11:10:00 +08:00
|
|
|
goto error;
|
|
|
|
|
}
|
|
|
|
|
}
|
2023-01-30 21:59:35 +08:00
|
|
|
|
2023-03-22 11:10:00 +08:00
|
|
|
custom_item = cJSON_GetObjectItem(item, "expr_type");
|
2023-01-30 21:59:35 +08:00
|
|
|
if (custom_item != NULL && custom_item->type == cJSON_Number) {
|
|
|
|
|
expr_schema->expr_type_column = custom_item->valueint;
|
2023-03-22 11:10:00 +08:00
|
|
|
} else {
|
2024-08-19 11:04:17 +00:00
|
|
|
log_fatal(logger, MODULE_EXPR,
|
|
|
|
|
"[%s:%d] expr table:<%s> schema has no expr_type column",
|
2023-03-27 15:52:47 +08:00
|
|
|
__FUNCTION__, __LINE__, table_name);
|
2023-03-22 11:10:00 +08:00
|
|
|
goto error;
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-02-07 11:25:31 +08:00
|
|
|
expr_schema->ref_tbl_mgr = tbl_mgr;
|
2023-01-30 21:59:35 +08:00
|
|
|
|
|
|
|
|
return expr_schema;
|
|
|
|
|
error:
|
|
|
|
|
FREE(expr_schema);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void expr_schema_free(void *expr_schema)
|
|
|
|
|
{
|
|
|
|
|
FREE(expr_schema);
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-20 07:00:49 +00:00
|
|
|
static void expr_rule_reset(struct expr_rule *rule)
|
2023-01-31 20:39:53 +08:00
|
|
|
{
|
2023-05-07 23:09:33 +08:00
|
|
|
if (NULL == rule) {
|
2023-01-31 20:39:53 +08:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2023-05-07 23:09:33 +08:00
|
|
|
for (size_t i = 0; i < rule->n_patterns; i++) {
|
|
|
|
|
FREE(rule->patterns[i].pat);
|
2023-01-31 20:39:53 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-20 07:00:49 +00:00
|
|
|
static void expr_item_free(struct expr_item *item)
|
2023-01-31 20:39:53 +08:00
|
|
|
{
|
2023-05-23 03:23:39 +00:00
|
|
|
if (NULL == item) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-21 18:36:20 +08:00
|
|
|
if (item->user_data != NULL) {
|
|
|
|
|
FREE(item->user_data);
|
2023-05-07 23:09:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FREE(item);
|
2023-01-31 20:39:53 +08:00
|
|
|
}
|
|
|
|
|
|
2023-06-20 07:00:49 +00:00
|
|
|
static void expr_item_free_cb(void *user_ctx, void *data)
|
2023-03-15 13:30:39 +08:00
|
|
|
{
|
2023-05-07 23:09:33 +08:00
|
|
|
struct expr_item *item = (struct expr_item *)data;
|
|
|
|
|
expr_item_free(item);
|
2023-03-15 13:30:39 +08:00
|
|
|
}
|
|
|
|
|
|
2023-04-20 15:34:56 +08:00
|
|
|
void *expr_runtime_new(void *expr_schema, size_t max_thread_num,
|
2023-04-13 14:56:35 +08:00
|
|
|
struct maat_garbage_bin *garbage_bin,
|
2023-01-30 21:59:35 +08:00
|
|
|
struct log_handle *logger)
|
|
|
|
|
{
|
|
|
|
|
if (NULL == expr_schema) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-10 16:10:50 +08:00
|
|
|
struct expr_schema *schema = (struct expr_schema *)expr_schema;
|
2023-01-30 21:59:35 +08:00
|
|
|
struct expr_runtime *expr_rt = ALLOC(struct expr_runtime, 1);
|
|
|
|
|
|
2023-06-19 09:44:25 +00:00
|
|
|
expr_rt->item_hash = rcu_hash_new(expr_item_free_cb, NULL, 0);
|
2023-01-31 20:39:53 +08:00
|
|
|
expr_rt->n_worker_thread = max_thread_num;
|
2023-01-30 21:59:35 +08:00
|
|
|
expr_rt->ref_garbage_bin = garbage_bin;
|
|
|
|
|
expr_rt->logger = logger;
|
2023-04-04 15:59:34 +08:00
|
|
|
expr_rt->district_map = maat_kv_store_new();
|
2023-11-24 15:36:27 +08:00
|
|
|
if (schema->engine_type == MAAT_EXPR_ENGINE_AUTO) {
|
|
|
|
|
expr_rt->engine_type = table_manager_get_expr_engine(schema->ref_tbl_mgr);
|
|
|
|
|
} else {
|
|
|
|
|
expr_rt->engine_type = schema->engine_type;
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-27 12:04:15 +08:00
|
|
|
expr_rt->scan_times = alignment_int64_array_alloc(max_thread_num);
|
2023-04-20 15:34:56 +08:00
|
|
|
expr_rt->scan_bytes = alignment_int64_array_alloc(max_thread_num);
|
2023-04-24 19:18:12 +08:00
|
|
|
expr_rt->scan_cpu_time = alignment_int64_array_alloc(max_thread_num);
|
2023-01-30 21:59:35 +08:00
|
|
|
|
2023-12-27 12:04:15 +08:00
|
|
|
expr_rt->hit_times = alignment_int64_array_alloc(max_thread_num);
|
|
|
|
|
expr_rt->hit_item_num = alignment_int64_array_alloc(max_thread_num);
|
|
|
|
|
expr_rt->hit_pattern_num = alignment_int64_array_alloc(max_thread_num);
|
|
|
|
|
|
2023-01-30 21:59:35 +08:00
|
|
|
return expr_rt;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void expr_runtime_free(void *expr_runtime)
|
|
|
|
|
{
|
|
|
|
|
if (NULL == expr_runtime) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct expr_runtime *expr_rt = (struct expr_runtime *)expr_runtime;
|
2023-08-10 16:10:50 +08:00
|
|
|
if (expr_rt->matcher != NULL) {
|
|
|
|
|
expr_matcher_free(expr_rt->matcher);
|
|
|
|
|
expr_rt->matcher = NULL;
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-05-09 17:45:43 +08:00
|
|
|
if (expr_rt->item_hash != NULL) {
|
|
|
|
|
rcu_hash_free(expr_rt->item_hash);
|
|
|
|
|
expr_rt->item_hash = NULL;
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
2023-02-03 17:28:14 +08:00
|
|
|
|
2023-04-04 15:59:34 +08:00
|
|
|
assert(expr_rt->tmp_district_map == NULL);
|
|
|
|
|
|
|
|
|
|
if (expr_rt->district_map != NULL) {
|
|
|
|
|
maat_kv_store_free(expr_rt->district_map);
|
|
|
|
|
expr_rt->district_map = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-27 12:04:15 +08:00
|
|
|
if (expr_rt->scan_times != NULL) {
|
|
|
|
|
alignment_int64_array_free(expr_rt->scan_times);
|
|
|
|
|
expr_rt->scan_times = NULL;
|
2023-04-20 15:34:56 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (expr_rt->scan_cpu_time != NULL) {
|
|
|
|
|
alignment_int64_array_free(expr_rt->scan_cpu_time);
|
|
|
|
|
expr_rt->scan_cpu_time = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (expr_rt->scan_bytes != NULL) {
|
|
|
|
|
alignment_int64_array_free(expr_rt->scan_bytes);
|
|
|
|
|
expr_rt->scan_bytes = NULL;
|
2023-02-03 17:28:14 +08:00
|
|
|
}
|
|
|
|
|
|
2023-12-27 12:04:15 +08:00
|
|
|
if (expr_rt->hit_times != NULL) {
|
|
|
|
|
alignment_int64_array_free(expr_rt->hit_times);
|
|
|
|
|
expr_rt->hit_times = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (expr_rt->hit_item_num != NULL) {
|
|
|
|
|
alignment_int64_array_free(expr_rt->hit_item_num);
|
|
|
|
|
expr_rt->hit_item_num = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (expr_rt->hit_pattern_num != NULL) {
|
|
|
|
|
alignment_int64_array_free(expr_rt->hit_pattern_num);
|
|
|
|
|
expr_rt->hit_pattern_num = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-30 21:59:35 +08:00
|
|
|
FREE(expr_rt);
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-20 07:00:49 +00:00
|
|
|
static int expr_runtime_update_row(struct expr_runtime *expr_rt, char *key,
|
|
|
|
|
size_t key_len, struct expr_item *item,
|
|
|
|
|
int is_valid)
|
2023-01-30 21:59:35 +08:00
|
|
|
{
|
2023-03-15 13:30:39 +08:00
|
|
|
int ret = -1;
|
2023-01-30 21:59:35 +08:00
|
|
|
|
|
|
|
|
if (0 == is_valid) {
|
|
|
|
|
//delete
|
2023-05-09 17:45:43 +08:00
|
|
|
rcu_hash_del(expr_rt->item_hash, key, key_len);
|
2023-01-30 21:59:35 +08:00
|
|
|
} else {
|
|
|
|
|
//add
|
2023-05-09 17:45:43 +08:00
|
|
|
ret = rcu_hash_add(expr_rt->item_hash, key, key_len, (void *)item);
|
2023-03-15 13:30:39 +08:00
|
|
|
if (ret < 0) {
|
2024-04-24 08:16:57 +00:00
|
|
|
log_debug(expr_rt->logger, MODULE_EXPR,
|
2023-05-09 17:45:43 +08:00
|
|
|
"[%s:%d] expr item(item_id:%lld) add to item_hash failed",
|
2023-05-07 23:09:33 +08:00
|
|
|
__FUNCTION__, __LINE__, item->item_id);
|
2023-03-15 11:36:54 +08:00
|
|
|
return -1;
|
|
|
|
|
}
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-16 11:13:23 +08:00
|
|
|
static int convertHextoint(char srctmp)
|
2023-02-09 22:13:15 +08:00
|
|
|
{
|
|
|
|
|
if (isdigit(srctmp)) {
|
|
|
|
|
return srctmp - '0';
|
|
|
|
|
} else {
|
|
|
|
|
char temp = toupper(srctmp);
|
|
|
|
|
temp = temp - 'A' + 10;
|
|
|
|
|
return temp;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-16 11:13:23 +08:00
|
|
|
static size_t hex2bin(char *hex, int hex_len, char *binary, size_t size)
|
2023-02-09 22:13:15 +08:00
|
|
|
{
|
|
|
|
|
size_t resultlen = 0;
|
|
|
|
|
int high,low;
|
|
|
|
|
for (int i = 0; i < hex_len && size > resultlen; i += 2, resultlen++) {
|
2023-02-15 11:53:46 +08:00
|
|
|
high = convertHextoint(hex[i]);
|
|
|
|
|
low = convertHextoint(hex[i+1]);
|
2023-02-09 22:13:15 +08:00
|
|
|
binary[resultlen] = high * 16 + low;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size = resultlen;
|
|
|
|
|
binary[resultlen] = '\0';
|
|
|
|
|
|
|
|
|
|
return resultlen;
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-19 11:04:17 +00:00
|
|
|
static int expr_keywords_to_expr_pattern(char *keywords, struct expr_pattern *pattern, struct log_handle *logger)
|
|
|
|
|
{
|
|
|
|
|
char *ctrl_str = NULL;
|
|
|
|
|
char *expr_str = NULL;
|
|
|
|
|
int case_ctrl_flag = 0;
|
|
|
|
|
|
|
|
|
|
pattern->match_mode = EXPR_MATCH_MODE_SUB;
|
|
|
|
|
pattern->case_sensitive = EXPR_CASE_INSENSITIVE;
|
|
|
|
|
/* -1 means offset no limit, As long as the pattern appears in the scan data, it will hit */
|
|
|
|
|
pattern->start_offset = -1;
|
|
|
|
|
pattern->end_offset = -1;
|
|
|
|
|
|
|
|
|
|
if (keywords[0] == '(') {
|
|
|
|
|
ctrl_str = keywords + 1;
|
|
|
|
|
char *ctrl_str_end = strchr(ctrl_str, ')');
|
|
|
|
|
if (NULL == ctrl_str_end) {
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
ctrl_str_end[0] = '\0';
|
|
|
|
|
expr_str = ctrl_str_end + 1;
|
|
|
|
|
} else {
|
|
|
|
|
expr_str = keywords;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ctrl_str != NULL) {
|
|
|
|
|
char case_switch[8] = {0};
|
|
|
|
|
char *nocase_str = strstr(ctrl_str, "nocase");
|
|
|
|
|
if (nocase_str) {
|
|
|
|
|
case_ctrl_flag = 1;
|
|
|
|
|
sscanf(nocase_str, "nocase=%s", case_switch);
|
|
|
|
|
if (strcmp(case_switch, "off") == 0) {
|
|
|
|
|
pattern->case_sensitive = EXPR_CASE_SENSITIVE;
|
|
|
|
|
} else {
|
|
|
|
|
pattern->case_sensitive = EXPR_CASE_INSENSITIVE;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char *offset_str = strstr(ctrl_str, "offset");
|
|
|
|
|
char *depth_str = strstr(ctrl_str, "depth");
|
|
|
|
|
if (offset_str && depth_str) {
|
|
|
|
|
sscanf(offset_str, "offset=%d", &pattern->start_offset);
|
|
|
|
|
sscanf(depth_str, "depth=%d", &pattern->end_offset);
|
|
|
|
|
pattern->match_mode = EXPR_MATCH_MODE_SUB;
|
|
|
|
|
|
|
|
|
|
if (pattern->start_offset < 0 || pattern->end_offset <= 0 || (pattern->start_offset > pattern->end_offset)) {
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (expr_str[0] == '^') {
|
|
|
|
|
pattern->match_mode = EXPR_MATCH_MODE_PREFIX;
|
|
|
|
|
expr_str++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char *expr_suffix = strchr_esc(expr_str, '$');
|
|
|
|
|
if (expr_suffix != NULL) {
|
|
|
|
|
expr_suffix[0] = '\0';
|
|
|
|
|
if (pattern->match_mode == EXPR_MATCH_MODE_PREFIX) {
|
|
|
|
|
pattern->match_mode = EXPR_MATCH_MODE_EXACTLY;
|
|
|
|
|
} else {
|
|
|
|
|
pattern->match_mode = EXPR_MATCH_MODE_SUFFIX;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char *hex_str_start = strchr_esc(expr_str, '|');
|
|
|
|
|
char *tmp_start_str = expr_str;
|
|
|
|
|
char *tmp_end_str = NULL;
|
|
|
|
|
char tmp_keywords[MAX_KEYWORDS_STR_LEN + 1] = {0};
|
|
|
|
|
size_t pattern_len = 0;
|
|
|
|
|
|
|
|
|
|
if (hex_str_start && !case_ctrl_flag) {
|
|
|
|
|
pattern->case_sensitive = EXPR_CASE_SENSITIVE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (hex_str_start != NULL) {
|
|
|
|
|
hex_str_start[0] = '\0';
|
|
|
|
|
hex_str_start++;
|
|
|
|
|
|
|
|
|
|
tmp_end_str = strchr_esc(hex_str_start, '|');
|
|
|
|
|
if (tmp_end_str == NULL) {
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
tmp_end_str[0] = '\0';
|
|
|
|
|
tmp_end_str++;
|
|
|
|
|
|
|
|
|
|
size_t region_str_len = strlen(hex_str_start) * 8;
|
|
|
|
|
char *region_string = ALLOC(char, region_str_len + 1);
|
|
|
|
|
region_str_len = hex2bin(hex_str_start, strlen(hex_str_start), region_string, region_str_len);
|
|
|
|
|
|
|
|
|
|
tmp_start_str = str_unescape(tmp_start_str);
|
|
|
|
|
snprintf(tmp_keywords + pattern_len, MAX_KEYWORDS_STR_LEN - pattern_len, "%s%s", tmp_start_str, region_string);
|
|
|
|
|
pattern_len = strlen(tmp_keywords);
|
|
|
|
|
|
|
|
|
|
if (region_string != NULL) {
|
|
|
|
|
FREE(region_string);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tmp_start_str = tmp_end_str;
|
|
|
|
|
hex_str_start = strchr_esc(tmp_start_str, '|');
|
|
|
|
|
}
|
|
|
|
|
if (tmp_end_str != NULL && tmp_end_str[0] != '\0') {
|
|
|
|
|
tmp_end_str = str_unescape(tmp_end_str);
|
|
|
|
|
snprintf(tmp_keywords + pattern_len, MAX_KEYWORDS_STR_LEN - pattern_len, "%s%s", tmp_start_str, tmp_end_str);
|
|
|
|
|
pattern_len = strlen(tmp_keywords);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (pattern_len == 0) {
|
|
|
|
|
expr_str = str_unescape(expr_str);
|
|
|
|
|
pattern->pat_len = strlen(expr_str);
|
|
|
|
|
pattern->pat = ALLOC(char, pattern->pat_len + 1);
|
|
|
|
|
memcpy(pattern->pat, expr_str, pattern->pat_len);
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
pattern->pat = ALLOC(char, pattern_len + 1);
|
|
|
|
|
memcpy(pattern->pat, tmp_keywords, pattern_len);
|
|
|
|
|
pattern->pat_len = pattern_len;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-31 20:39:53 +08:00
|
|
|
#define MAAT_MAX_EXPR_ITEM_NUM 8
|
2023-06-20 07:00:49 +00:00
|
|
|
static int expr_item_to_expr_rule(struct expr_item *expr_item,
|
|
|
|
|
struct expr_rule *expr_rule,
|
|
|
|
|
struct log_handle *logger)
|
2023-01-30 21:59:35 +08:00
|
|
|
{
|
|
|
|
|
size_t i = 0;
|
|
|
|
|
size_t sub_expr_cnt = 0;
|
|
|
|
|
char *pos = NULL;
|
2023-02-09 22:13:15 +08:00
|
|
|
char *tmp = NULL;
|
2023-01-30 21:59:35 +08:00
|
|
|
char *saveptr = NULL;
|
2024-04-23 02:33:49 +00:00
|
|
|
char tmp_keywords[MAX_KEYWORDS_STR_LEN + 1];
|
2023-02-16 11:13:23 +08:00
|
|
|
|
2024-04-23 02:33:49 +00:00
|
|
|
memcpy(tmp_keywords, expr_item->keywords, MAX_KEYWORDS_STR_LEN + 1);
|
|
|
|
|
|
2023-01-30 21:59:35 +08:00
|
|
|
switch (expr_item->expr_type) {
|
|
|
|
|
case EXPR_TYPE_AND:
|
2024-04-23 02:33:49 +00:00
|
|
|
for (i = 0, pos = tmp_keywords; ; i++, pos = NULL) {
|
2023-02-09 22:13:15 +08:00
|
|
|
tmp = strtok_r_esc(pos, '&', &saveptr);
|
2023-01-30 21:59:35 +08:00
|
|
|
if (NULL == tmp) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (i >= MAAT_MAX_EXPR_ITEM_NUM) {
|
2023-11-10 08:26:48 +00:00
|
|
|
log_fatal(logger, MODULE_EXPR,
|
2024-08-19 11:04:17 +00:00
|
|
|
"[%s:%d]abandon config expr_item(item_id:%d) "
|
|
|
|
|
"too many patterns", __FUNCTION__, __LINE__,
|
|
|
|
|
expr_item->item_id);
|
2023-05-07 23:09:33 +08:00
|
|
|
return -1;
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
2024-08-19 11:04:17 +00:00
|
|
|
if (expr_keywords_to_expr_pattern(tmp, &expr_rule->patterns[i], logger) < 0) {
|
2023-11-10 08:26:48 +00:00
|
|
|
log_fatal(logger, MODULE_EXPR,
|
2024-08-19 11:04:17 +00:00
|
|
|
"[%s:%d]abandon config expr_item(item_id:%d) "
|
|
|
|
|
"has invalid pattern %s", __FUNCTION__, __LINE__,
|
|
|
|
|
expr_item->item_id, tmp);
|
2023-05-07 23:09:33 +08:00
|
|
|
return -1;
|
2023-02-09 22:13:15 +08:00
|
|
|
}
|
2024-08-19 11:04:17 +00:00
|
|
|
expr_rule->patterns[i].type = EXPR_PATTERN_TYPE_STR;
|
2023-02-09 22:13:15 +08:00
|
|
|
}
|
|
|
|
|
sub_expr_cnt = i;
|
|
|
|
|
break;
|
2024-08-19 11:04:17 +00:00
|
|
|
case EXPR_TYPE_REGEX:
|
2023-05-10 13:33:50 +08:00
|
|
|
sub_expr_cnt = 1;
|
2024-08-19 11:04:17 +00:00
|
|
|
size_t pat_len = strlen(tmp_keywords);
|
|
|
|
|
expr_rule->patterns[0].pat = ALLOC(char, pat_len + 1);
|
|
|
|
|
memcpy(expr_rule->patterns[0].pat, tmp_keywords, pat_len);
|
|
|
|
|
expr_rule->patterns[0].pat_len = pat_len;
|
|
|
|
|
expr_rule->patterns[0].type = EXPR_PATTERN_TYPE_REG;
|
|
|
|
|
expr_rule->patterns[0].match_mode = EXPR_MATCH_MODE_SUB;
|
|
|
|
|
expr_rule->patterns[0].case_sensitive = EXPR_CASE_INSENSITIVE;
|
|
|
|
|
expr_rule->patterns[0].start_offset = -1;
|
|
|
|
|
expr_rule->patterns[0].end_offset = -1;
|
2023-05-10 13:33:50 +08:00
|
|
|
break;
|
2023-01-30 21:59:35 +08:00
|
|
|
default:
|
2023-11-10 08:26:48 +00:00
|
|
|
log_fatal(logger, MODULE_EXPR,
|
2024-08-19 11:04:17 +00:00
|
|
|
"[%s:%d]abandon config expr_item(item_id:%lld) has "
|
|
|
|
|
"invalid expr type=%d", __FUNCTION__, __LINE__,
|
|
|
|
|
expr_item->item_id, expr_item->expr_type);
|
2023-05-07 23:09:33 +08:00
|
|
|
return -1;
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-02-15 11:53:46 +08:00
|
|
|
expr_rule->expr_id = expr_item->item_id;
|
2023-08-10 16:10:50 +08:00
|
|
|
expr_rule->tag = expr_item->user_data;
|
2023-01-30 21:59:35 +08:00
|
|
|
expr_rule->n_patterns = sub_expr_cnt;
|
|
|
|
|
|
2023-05-07 23:09:33 +08:00
|
|
|
return 0;
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-02-03 17:28:14 +08:00
|
|
|
int expr_runtime_update(void *expr_runtime, void *expr_schema,
|
2023-03-29 22:25:14 +08:00
|
|
|
const char *table_name, const char *line,
|
|
|
|
|
int valid_column)
|
2023-01-30 21:59:35 +08:00
|
|
|
{
|
2023-02-07 11:25:31 +08:00
|
|
|
if (NULL == expr_runtime || NULL == expr_schema ||
|
|
|
|
|
NULL == line) {
|
2023-01-30 21:59:35 +08:00
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct expr_schema *schema = (struct expr_schema *)expr_schema;
|
|
|
|
|
struct expr_runtime *expr_rt = (struct expr_runtime *)expr_runtime;
|
|
|
|
|
|
2023-02-22 15:08:52 +08:00
|
|
|
long long item_id = get_column_value(line, schema->item_id_column);
|
|
|
|
|
if (item_id < 0) {
|
2023-11-10 08:26:48 +00:00
|
|
|
log_fatal(expr_rt->logger, MODULE_EXPR,
|
2023-05-30 16:16:18 +08:00
|
|
|
"[%s:%d] expr table:<%s> has no item_id(column seq:%d)"
|
|
|
|
|
" in table_line:%s", __FUNCTION__, __LINE__, table_name,
|
|
|
|
|
schema->item_id_column, line);
|
2023-04-20 15:34:56 +08:00
|
|
|
expr_rt->update_err_cnt++;
|
2023-02-22 15:08:52 +08:00
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-30 21:59:35 +08:00
|
|
|
int is_valid = get_column_value(line, valid_column);
|
|
|
|
|
if (is_valid < 0) {
|
2023-11-10 08:26:48 +00:00
|
|
|
log_fatal(expr_rt->logger, MODULE_EXPR,
|
2023-05-30 16:16:18 +08:00
|
|
|
"[%s:%d] expr table:<%s> has no is_valid(column seq:%d)"
|
|
|
|
|
" in table_line:%s", __FUNCTION__, __LINE__, table_name,
|
|
|
|
|
valid_column, line);
|
2023-04-20 15:34:56 +08:00
|
|
|
expr_rt->update_err_cnt++;
|
2023-01-30 21:59:35 +08:00
|
|
|
return -1;
|
2023-05-07 23:09:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct expr_item *expr_item = NULL;
|
|
|
|
|
if (1 == is_valid) {
|
2023-01-30 21:59:35 +08:00
|
|
|
//add
|
2023-05-30 16:16:18 +08:00
|
|
|
expr_item = expr_item_new(schema, table_name, line, expr_rt);
|
2023-01-30 21:59:35 +08:00
|
|
|
if (NULL == expr_item) {
|
2023-04-20 15:34:56 +08:00
|
|
|
expr_rt->update_err_cnt++;
|
2023-01-30 21:59:35 +08:00
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-14 09:10:16 +00:00
|
|
|
int *item_district_id = ALLOC(int, 1);
|
|
|
|
|
*item_district_id = expr_item->district_id;
|
|
|
|
|
|
|
|
|
|
expr_item->user_data = item_district_id;
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-05-07 23:09:33 +08:00
|
|
|
int ret = expr_runtime_update_row(expr_rt, (char *)&item_id, sizeof(long long),
|
|
|
|
|
expr_item, is_valid);
|
2023-01-30 21:59:35 +08:00
|
|
|
if (ret < 0) {
|
2023-05-07 23:09:33 +08:00
|
|
|
if (expr_item != NULL) {
|
|
|
|
|
expr_item_free(expr_item);
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
2024-04-24 08:16:57 +00:00
|
|
|
//don't return failed, ignore the case of adding duplicate keys
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-10 16:10:50 +08:00
|
|
|
static void garbage_expr_matcher_free(void *expr_matcher, void *arg)
|
2023-03-29 22:25:14 +08:00
|
|
|
{
|
2023-08-10 16:10:50 +08:00
|
|
|
struct expr_matcher *matcher = (struct expr_matcher *)expr_matcher;
|
|
|
|
|
expr_matcher_free(matcher);
|
2023-03-29 22:25:14 +08:00
|
|
|
}
|
|
|
|
|
|
2023-11-24 11:05:52 +08:00
|
|
|
const char *expr_engine_int2str(enum expr_engine_type type)
|
|
|
|
|
{
|
|
|
|
|
switch (type) {
|
|
|
|
|
case EXPR_ENGINE_TYPE_HS:
|
|
|
|
|
return "hyperscan";
|
|
|
|
|
case EXPR_ENGINE_TYPE_RS:
|
|
|
|
|
return "rulescan";
|
|
|
|
|
default:
|
|
|
|
|
return "unknown";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-05-30 16:16:18 +08:00
|
|
|
int expr_runtime_commit(void *expr_runtime, const char *table_name,
|
|
|
|
|
long long maat_rt_version)
|
2023-01-30 21:59:35 +08:00
|
|
|
{
|
|
|
|
|
if (NULL == expr_runtime) {
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct expr_runtime *expr_rt = (struct expr_runtime *)expr_runtime;
|
|
|
|
|
|
2023-05-09 17:45:43 +08:00
|
|
|
int updating_flag = rcu_hash_is_updating(expr_rt->item_hash);
|
2023-03-15 11:36:54 +08:00
|
|
|
if (0 == updating_flag) {
|
2023-01-30 21:59:35 +08:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2023-04-04 15:59:34 +08:00
|
|
|
if (expr_rt->tmp_district_map != NULL) {
|
|
|
|
|
struct maat_kv_store *tmp_map = expr_rt->district_map;
|
|
|
|
|
expr_rt->district_map = expr_rt->tmp_district_map;
|
|
|
|
|
expr_rt->tmp_district_map = NULL;
|
|
|
|
|
maat_garbage_bagging(expr_rt->ref_garbage_bin, tmp_map, NULL,
|
|
|
|
|
garbage_maat_kv_store_free);
|
|
|
|
|
}
|
|
|
|
|
|
2023-05-07 23:09:33 +08:00
|
|
|
int ret = 0;
|
|
|
|
|
size_t i = 0;
|
|
|
|
|
size_t real_rule_cnt = 0;
|
2023-11-24 11:05:52 +08:00
|
|
|
size_t real_lit_rule_cnt = 0;
|
2023-05-31 09:13:14 +00:00
|
|
|
size_t real_regex_rule_cnt = 0;
|
2023-05-07 23:09:33 +08:00
|
|
|
struct expr_rule *rules = NULL;
|
2023-03-15 11:36:54 +08:00
|
|
|
void **ex_data_array = NULL;
|
2024-05-08 03:43:55 +00:00
|
|
|
enum expr_engine_type engine_type;
|
2023-05-07 23:09:33 +08:00
|
|
|
|
2023-05-09 17:45:43 +08:00
|
|
|
size_t rule_cnt = rcu_updating_hash_list(expr_rt->item_hash, &ex_data_array);
|
2023-03-15 11:36:54 +08:00
|
|
|
if (rule_cnt > 0) {
|
2023-05-07 23:09:33 +08:00
|
|
|
rules = ALLOC(struct expr_rule, rule_cnt);
|
|
|
|
|
for (i = 0; i < rule_cnt; i++) {
|
|
|
|
|
struct expr_item *expr_item = (struct expr_item *)ex_data_array[i];
|
2023-05-31 09:13:14 +00:00
|
|
|
struct expr_rule tmp_rule = {0};
|
2024-04-23 02:33:49 +00:00
|
|
|
|
2023-05-07 23:09:33 +08:00
|
|
|
ret = expr_item_to_expr_rule(expr_item, &tmp_rule, expr_rt->logger);
|
|
|
|
|
if (ret < 0) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2023-05-31 09:13:14 +00:00
|
|
|
rules[real_rule_cnt++] = tmp_rule;
|
|
|
|
|
|
|
|
|
|
if (expr_item->expr_type == EXPR_TYPE_REGEX) {
|
|
|
|
|
real_regex_rule_cnt++;
|
2023-11-24 11:05:52 +08:00
|
|
|
} else {
|
|
|
|
|
real_lit_rule_cnt++;
|
2023-05-31 09:13:14 +00:00
|
|
|
}
|
2023-03-15 11:36:54 +08:00
|
|
|
}
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-11-24 11:05:52 +08:00
|
|
|
if (expr_rt->engine_type == EXPR_ENGINE_TYPE_AUTO) {
|
|
|
|
|
if (real_lit_rule_cnt <= ENGINE_TYPE_SWITCH_THRESHOLD) {
|
2024-05-08 03:43:55 +00:00
|
|
|
engine_type = EXPR_ENGINE_TYPE_HS;
|
2023-11-24 11:05:52 +08:00
|
|
|
} else {
|
2024-05-08 03:43:55 +00:00
|
|
|
engine_type = EXPR_ENGINE_TYPE_RS;
|
2023-11-24 11:05:52 +08:00
|
|
|
}
|
2024-05-08 03:43:55 +00:00
|
|
|
} else {
|
|
|
|
|
engine_type = expr_rt->engine_type;
|
2023-11-24 11:05:52 +08:00
|
|
|
}
|
|
|
|
|
|
2023-08-10 16:10:50 +08:00
|
|
|
struct expr_matcher *new_matcher = NULL;
|
|
|
|
|
struct expr_matcher *old_matcher = NULL;
|
2023-03-27 15:52:47 +08:00
|
|
|
|
|
|
|
|
if (rule_cnt > 0) {
|
2023-10-11 06:53:03 +00:00
|
|
|
struct timespec start, end;
|
|
|
|
|
clock_gettime(CLOCK_MONOTONIC, &start);
|
2024-05-08 03:43:55 +00:00
|
|
|
new_matcher = expr_matcher_new(rules, real_rule_cnt, engine_type,
|
2023-08-10 16:10:50 +08:00
|
|
|
expr_rt->n_worker_thread, expr_rt->logger);
|
2023-10-11 06:53:03 +00:00
|
|
|
clock_gettime(CLOCK_MONOTONIC, &end);
|
2023-11-24 11:05:52 +08:00
|
|
|
long long time_elapse_ms = (end.tv_sec - start.tv_sec) * 1000 +
|
2023-10-11 06:53:03 +00:00
|
|
|
(end.tv_nsec - start.tv_nsec) / 1000000;
|
|
|
|
|
|
2023-08-10 16:10:50 +08:00
|
|
|
if (NULL == new_matcher) {
|
2023-11-10 08:26:48 +00:00
|
|
|
log_fatal(expr_rt->logger, MODULE_EXPR,
|
2023-08-10 16:10:50 +08:00
|
|
|
"[%s:%d] table[%s] rebuild expr_matcher failed when update"
|
2023-05-31 09:13:14 +00:00
|
|
|
" %zu expr rules", __FUNCTION__, __LINE__, table_name, real_rule_cnt);
|
2023-03-27 15:52:47 +08:00
|
|
|
ret = -1;
|
2023-08-10 16:10:50 +08:00
|
|
|
} else {
|
|
|
|
|
log_info(expr_rt->logger, MODULE_EXPR,
|
2023-11-24 11:05:52 +08:00
|
|
|
"table[%s] has %zu rules, commit %zu expr rules(literal_rules:%zu regex_rules:%zu)"
|
|
|
|
|
" and rebuild expr_matcher(%s) completed, version:%lld, consume:%lldms",
|
|
|
|
|
table_name, rule_cnt, real_rule_cnt, real_lit_rule_cnt, real_regex_rule_cnt,
|
2024-05-08 03:43:55 +00:00
|
|
|
expr_engine_int2str(engine_type), maat_rt_version, time_elapse_ms);
|
2023-03-27 15:52:47 +08:00
|
|
|
}
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-08-10 16:10:50 +08:00
|
|
|
old_matcher = expr_rt->matcher;
|
|
|
|
|
expr_rt->matcher = new_matcher;
|
2023-05-09 17:45:43 +08:00
|
|
|
rcu_hash_commit(expr_rt->item_hash);
|
2023-05-07 23:09:33 +08:00
|
|
|
|
2023-08-10 16:10:50 +08:00
|
|
|
if (old_matcher != NULL) {
|
|
|
|
|
maat_garbage_bagging(expr_rt->ref_garbage_bin, old_matcher, NULL, garbage_expr_matcher_free);
|
2023-03-15 11:36:54 +08:00
|
|
|
}
|
2023-05-07 23:09:33 +08:00
|
|
|
|
2023-05-31 09:13:14 +00:00
|
|
|
expr_rt->rule_num = real_rule_cnt;
|
|
|
|
|
expr_rt->regex_rule_num = real_regex_rule_cnt;
|
2023-04-13 14:56:35 +08:00
|
|
|
expr_rt->version = maat_rt_version;
|
|
|
|
|
|
2023-03-15 11:36:54 +08:00
|
|
|
if (rules != NULL) {
|
2023-05-07 23:09:33 +08:00
|
|
|
for (i = 0; i < rule_cnt; i++) {
|
|
|
|
|
expr_rule_reset(&rules[i]);
|
|
|
|
|
}
|
|
|
|
|
FREE(rules);
|
2023-03-15 11:36:54 +08:00
|
|
|
}
|
2023-01-30 21:59:35 +08:00
|
|
|
|
2023-03-15 11:36:54 +08:00
|
|
|
if (ex_data_array != NULL) {
|
|
|
|
|
FREE(ex_data_array);
|
|
|
|
|
}
|
2023-01-30 21:59:35 +08:00
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2023-04-12 19:20:05 +08:00
|
|
|
long long expr_runtime_rule_count(void *expr_runtime)
|
|
|
|
|
{
|
|
|
|
|
if (NULL == expr_runtime) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct expr_runtime *expr_rt = (struct expr_runtime *)expr_runtime;
|
|
|
|
|
return expr_rt->rule_num;
|
|
|
|
|
}
|
|
|
|
|
|
2023-04-20 15:34:56 +08:00
|
|
|
long long expr_runtime_regex_rule_count(void *expr_runtime)
|
|
|
|
|
{
|
|
|
|
|
if (NULL == expr_runtime) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct expr_runtime *expr_rt = (struct expr_runtime *)expr_runtime;
|
|
|
|
|
return expr_rt->regex_rule_num;
|
|
|
|
|
}
|
|
|
|
|
|
2023-04-13 14:56:35 +08:00
|
|
|
long long expr_runtime_get_version(void *expr_runtime)
|
|
|
|
|
{
|
|
|
|
|
if (NULL == expr_runtime) {
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct expr_runtime *expr_rt = (struct expr_runtime *)expr_runtime;
|
|
|
|
|
return expr_rt->version;
|
|
|
|
|
}
|
|
|
|
|
|
2023-05-30 16:16:18 +08:00
|
|
|
int expr_runtime_scan(struct expr_runtime *expr_rt, int thread_id,
|
2023-10-24 21:19:33 +08:00
|
|
|
const char *data, size_t data_len,
|
2023-10-24 08:21:41 +00:00
|
|
|
int vtable_id, struct maat_state *state)
|
2023-01-30 21:59:35 +08:00
|
|
|
{
|
2024-04-11 16:16:04 +08:00
|
|
|
//clear compile_state->last_hit_group
|
|
|
|
|
if (state != NULL && state->compile_state != NULL) {
|
|
|
|
|
compile_state_clear_last_hit_group(state->compile_state);
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-06 10:45:36 +08:00
|
|
|
if (0 == expr_rt->rule_num) {
|
|
|
|
|
//empty expr table
|
|
|
|
|
return 0;
|
2023-02-27 10:07:37 +08:00
|
|
|
}
|
|
|
|
|
|
2023-08-10 16:10:50 +08:00
|
|
|
if (NULL == expr_rt->matcher) {
|
2023-06-20 17:34:46 +08:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-30 21:59:35 +08:00
|
|
|
size_t n_hit_item = 0;
|
2023-12-27 12:04:15 +08:00
|
|
|
size_t n_hit_pattern = 0;
|
2023-11-10 08:26:48 +00:00
|
|
|
struct expr_scan_result hit_results[MAX_HIT_ITEM_NUM];
|
2023-08-10 16:10:50 +08:00
|
|
|
int ret = expr_matcher_match(expr_rt->matcher, thread_id, data, data_len,
|
2023-12-27 12:04:15 +08:00
|
|
|
hit_results, MAX_HIT_ITEM_NUM, &n_hit_item,
|
|
|
|
|
&n_hit_pattern);
|
2023-01-30 21:59:35 +08:00
|
|
|
if (ret < 0) {
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-27 12:04:15 +08:00
|
|
|
if (n_hit_pattern > 0) {
|
|
|
|
|
alignment_int64_array_add(expr_rt->hit_pattern_num, state->thread_id,
|
|
|
|
|
n_hit_pattern);
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-18 03:32:53 +00:00
|
|
|
struct maat_item hit_maat_items[n_hit_item];
|
2023-12-27 12:04:15 +08:00
|
|
|
size_t real_hit_item_num = 0;
|
2023-10-18 03:32:53 +00:00
|
|
|
|
2023-02-03 17:28:14 +08:00
|
|
|
if (0 == n_hit_item) {
|
2023-10-18 03:32:53 +00:00
|
|
|
goto next;
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-02-07 11:25:31 +08:00
|
|
|
for (size_t i = 0; i < n_hit_item; i++) {
|
2023-06-14 09:10:16 +00:00
|
|
|
int tag_district_id = *(int *)(hit_results[i].user_tag);
|
|
|
|
|
if (tag_district_id == state->district_id || tag_district_id == DISTRICT_ANY) {
|
2023-05-07 23:09:33 +08:00
|
|
|
long long item_id = hit_results[i].rule_id;
|
2023-06-14 09:10:16 +00:00
|
|
|
struct expr_item *expr_item = (struct expr_item *)rcu_hash_find(expr_rt->item_hash,
|
|
|
|
|
(char *)&item_id,
|
|
|
|
|
sizeof(long long));
|
2023-05-07 23:09:33 +08:00
|
|
|
if (!expr_item) {
|
|
|
|
|
// item config has been deleted
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-27 12:04:15 +08:00
|
|
|
hit_maat_items[real_hit_item_num].item_id = item_id;
|
|
|
|
|
hit_maat_items[real_hit_item_num].group_id = expr_item->group_id;
|
|
|
|
|
real_hit_item_num++;
|
2023-02-07 11:25:31 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-02 14:29:34 +08:00
|
|
|
if (real_hit_item_num > 0) {
|
|
|
|
|
alignment_int64_array_add(expr_rt->hit_item_num, state->thread_id,
|
|
|
|
|
real_hit_item_num);
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-18 03:32:53 +00:00
|
|
|
next:
|
2023-11-10 08:26:48 +00:00
|
|
|
if (NULL == state->compile_state) {
|
|
|
|
|
state->compile_state = compile_state_new();
|
|
|
|
|
alignment_int64_array_add(state->maat_inst->stat->compile_state_cnt,
|
|
|
|
|
state->thread_id, 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return compile_state_update(state->compile_state, state->maat_inst, vtable_id,
|
2023-12-27 12:04:15 +08:00
|
|
|
state->compile_table_id, state->Nth_scan,
|
|
|
|
|
hit_maat_items, real_hit_item_num);
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-12-27 12:04:15 +08:00
|
|
|
struct expr_runtime_stream *
|
2023-05-30 16:16:18 +08:00
|
|
|
expr_runtime_stream_open(struct expr_runtime *expr_rt, int thread_id)
|
2023-01-30 21:59:35 +08:00
|
|
|
{
|
2023-02-27 10:07:37 +08:00
|
|
|
if (NULL == expr_rt || thread_id < 0) {
|
2023-03-22 11:10:00 +08:00
|
|
|
return NULL;
|
2023-02-27 10:07:37 +08:00
|
|
|
}
|
|
|
|
|
|
2023-12-27 12:04:15 +08:00
|
|
|
struct expr_runtime_stream *expr_rt_stream = ALLOC(struct expr_runtime_stream, 1);
|
|
|
|
|
|
|
|
|
|
expr_rt_stream->ref_expr_rt = expr_rt;
|
|
|
|
|
expr_rt_stream->handle = expr_matcher_stream_open(expr_rt->matcher, thread_id);
|
|
|
|
|
if (NULL == expr_rt_stream->handle) {
|
|
|
|
|
FREE(expr_rt_stream);
|
2023-07-18 18:54:34 +08:00
|
|
|
return NULL;
|
|
|
|
|
}
|
2023-04-20 15:34:56 +08:00
|
|
|
|
2023-12-27 12:04:15 +08:00
|
|
|
return expr_rt_stream;
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-12-27 12:04:15 +08:00
|
|
|
int expr_runtime_stream_scan(struct expr_runtime_stream *expr_rt_stream,
|
2023-05-30 16:16:18 +08:00
|
|
|
const char *data, size_t data_len,
|
2023-10-24 21:19:33 +08:00
|
|
|
int vtable_id, struct maat_state *state)
|
2023-01-30 21:59:35 +08:00
|
|
|
{
|
2023-12-27 12:04:15 +08:00
|
|
|
struct expr_runtime *expr_rt = expr_rt_stream->ref_expr_rt;
|
2024-04-11 16:16:04 +08:00
|
|
|
|
|
|
|
|
//clear compile_state->last_hit_group
|
|
|
|
|
if (state != NULL && state->compile_state != NULL) {
|
|
|
|
|
compile_state_clear_last_hit_group(state->compile_state);
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-06 10:45:36 +08:00
|
|
|
if (0 == expr_rt->rule_num) {
|
|
|
|
|
//empty expr table
|
|
|
|
|
return 0;
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
2024-04-11 16:16:04 +08:00
|
|
|
if (NULL == expr_rt_stream->handle) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-07 11:25:31 +08:00
|
|
|
size_t n_hit_item = 0;
|
2023-12-27 12:04:15 +08:00
|
|
|
size_t n_hit_pattern = 0;
|
2023-11-10 08:26:48 +00:00
|
|
|
struct expr_scan_result hit_results[MAX_HIT_ITEM_NUM];
|
2023-04-04 15:59:34 +08:00
|
|
|
|
2023-12-27 12:04:15 +08:00
|
|
|
int ret = expr_matcher_stream_match(expr_rt_stream->handle, data, data_len, hit_results,
|
|
|
|
|
MAX_HIT_ITEM_NUM, &n_hit_item, &n_hit_pattern);
|
2023-02-07 11:25:31 +08:00
|
|
|
if (ret < 0) {
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-27 12:04:15 +08:00
|
|
|
if (n_hit_pattern > 0) {
|
|
|
|
|
alignment_int64_array_add(expr_rt->hit_pattern_num, state->thread_id,
|
|
|
|
|
n_hit_pattern);
|
|
|
|
|
}
|
|
|
|
|
|
2023-06-14 09:10:16 +00:00
|
|
|
struct maat_item hit_maat_items[n_hit_item];
|
2023-05-30 16:16:18 +08:00
|
|
|
struct expr_item *expr_item = NULL;
|
2023-05-07 23:09:33 +08:00
|
|
|
size_t real_hit_item_cnt = 0;
|
2023-02-22 15:08:52 +08:00
|
|
|
|
2023-10-18 03:32:53 +00:00
|
|
|
if (0 == n_hit_item) {
|
|
|
|
|
goto next;
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-07 11:25:31 +08:00
|
|
|
for (size_t i = 0; i < n_hit_item; i++) {
|
2023-05-07 23:09:33 +08:00
|
|
|
long long item_id = hit_results[i].rule_id;
|
2023-05-30 16:16:18 +08:00
|
|
|
expr_item = (struct expr_item *)rcu_hash_find(expr_rt->item_hash,
|
|
|
|
|
(char *)&item_id,
|
|
|
|
|
sizeof(long long));
|
2023-05-07 23:09:33 +08:00
|
|
|
if (!expr_item) {
|
|
|
|
|
// item config has been deleted
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2023-02-07 11:25:31 +08:00
|
|
|
|
2023-05-07 23:09:33 +08:00
|
|
|
hit_maat_items[real_hit_item_cnt].item_id = item_id;
|
|
|
|
|
hit_maat_items[real_hit_item_cnt].group_id = expr_item->group_id;
|
|
|
|
|
real_hit_item_cnt++;
|
2023-02-07 11:25:31 +08:00
|
|
|
}
|
|
|
|
|
|
2024-04-02 14:29:34 +08:00
|
|
|
if (real_hit_item_cnt > 0) {
|
|
|
|
|
alignment_int64_array_add(expr_rt->hit_item_num, state->thread_id,
|
|
|
|
|
real_hit_item_cnt);
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-18 03:32:53 +00:00
|
|
|
next:
|
2023-11-10 08:26:48 +00:00
|
|
|
if (NULL == state->compile_state) {
|
|
|
|
|
state->compile_state = compile_state_new();
|
|
|
|
|
alignment_int64_array_add(state->maat_inst->stat->compile_state_cnt,
|
|
|
|
|
state->thread_id, 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return compile_state_update(state->compile_state, state->maat_inst, vtable_id,
|
2023-12-27 12:04:15 +08:00
|
|
|
state->compile_table_id, state->Nth_scan,
|
2023-11-10 08:26:48 +00:00
|
|
|
hit_maat_items, real_hit_item_cnt);
|
2023-01-30 21:59:35 +08:00
|
|
|
}
|
|
|
|
|
|
2023-12-27 12:04:15 +08:00
|
|
|
void expr_runtime_stream_close(struct expr_runtime_stream *expr_rt_stream)
|
2023-01-30 21:59:35 +08:00
|
|
|
{
|
2023-12-27 12:04:15 +08:00
|
|
|
if (NULL == expr_rt_stream) {
|
2023-07-18 18:54:34 +08:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-27 12:04:15 +08:00
|
|
|
expr_rt_stream->ref_expr_rt = NULL;
|
|
|
|
|
if (expr_rt_stream->handle != NULL) {
|
|
|
|
|
expr_matcher_stream_close(expr_rt_stream->handle);
|
2023-04-20 15:34:56 +08:00
|
|
|
}
|
|
|
|
|
|
2023-12-27 12:04:15 +08:00
|
|
|
FREE(expr_rt_stream);
|
2023-02-03 17:28:14 +08:00
|
|
|
}
|
|
|
|
|
|
2023-04-20 15:34:56 +08:00
|
|
|
void expr_runtime_perf_stat(struct expr_runtime *expr_rt, size_t scan_len,
|
|
|
|
|
struct timespec *start, struct timespec *end,
|
|
|
|
|
int thread_id)
|
2023-02-03 17:28:14 +08:00
|
|
|
{
|
2023-04-20 15:34:56 +08:00
|
|
|
if (NULL == expr_rt || thread_id < 0) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (start != NULL && end != NULL) {
|
2023-05-30 16:16:18 +08:00
|
|
|
long long consume_time = (end->tv_sec - start->tv_sec) * 1000000000 +
|
|
|
|
|
(end->tv_nsec - start->tv_nsec);
|
2023-04-20 15:34:56 +08:00
|
|
|
alignment_int64_array_add(expr_rt->scan_cpu_time, thread_id, consume_time);
|
|
|
|
|
}
|
2023-02-22 15:22:41 +08:00
|
|
|
}
|
2023-04-20 15:34:56 +08:00
|
|
|
|
2024-04-02 14:29:34 +08:00
|
|
|
void expr_runtime_scan_bytes_add(struct expr_runtime *expr_rt, int thread_id,
|
|
|
|
|
size_t scan_len)
|
|
|
|
|
{
|
|
|
|
|
if (NULL == expr_rt || thread_id < 0 || 0 == scan_len) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
alignment_int64_array_add(expr_rt->scan_bytes, thread_id, scan_len);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long long expr_runtime_scan_bytes(void *expr_runtime)
|
|
|
|
|
{
|
|
|
|
|
if (NULL == expr_runtime) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct expr_runtime *expr_rt = (struct expr_runtime *)expr_runtime;
|
|
|
|
|
long long sum = alignment_int64_array_sum(expr_rt->scan_bytes,
|
|
|
|
|
expr_rt->n_worker_thread);
|
|
|
|
|
alignment_int64_array_reset(expr_rt->scan_bytes, expr_rt->n_worker_thread);
|
|
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void expr_runtime_scan_times_inc(struct expr_runtime *expr_rt, int thread_id)
|
|
|
|
|
{
|
|
|
|
|
if (NULL == expr_rt || thread_id < 0) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
alignment_int64_array_add(expr_rt->scan_times, thread_id, 1);
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-27 12:04:15 +08:00
|
|
|
long long expr_runtime_scan_times(void *expr_runtime)
|
2023-04-20 15:34:56 +08:00
|
|
|
{
|
|
|
|
|
if (NULL == expr_runtime) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct expr_runtime *expr_rt = (struct expr_runtime *)expr_runtime;
|
2023-12-27 12:04:15 +08:00
|
|
|
long long sum = alignment_int64_array_sum(expr_rt->scan_times,
|
2023-04-20 15:34:56 +08:00
|
|
|
expr_rt->n_worker_thread);
|
2023-12-27 12:04:15 +08:00
|
|
|
alignment_int64_array_reset(expr_rt->scan_times, expr_rt->n_worker_thread);
|
2023-04-20 15:34:56 +08:00
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long long expr_runtime_scan_cpu_time(void *expr_runtime)
|
|
|
|
|
{
|
|
|
|
|
if (NULL == expr_runtime) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct expr_runtime *expr_rt = (struct expr_runtime *)expr_runtime;
|
|
|
|
|
long long sum = alignment_int64_array_sum(expr_rt->scan_cpu_time,
|
|
|
|
|
expr_rt->n_worker_thread);
|
|
|
|
|
alignment_int64_array_reset(expr_rt->scan_cpu_time, expr_rt->n_worker_thread);
|
|
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-02 14:29:34 +08:00
|
|
|
void expr_runtime_stream_scan_times_inc(struct expr_runtime_stream *expr_rt_stream,
|
|
|
|
|
int thread_id)
|
|
|
|
|
{
|
|
|
|
|
if (NULL == expr_rt_stream || thread_id < 0) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct expr_runtime *expr_rt = expr_rt_stream->ref_expr_rt;
|
|
|
|
|
alignment_int64_array_add(expr_rt->scan_times, thread_id, 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void expr_runtime_stream_scan_bytes_add(struct expr_runtime_stream *expr_rt_stream,
|
|
|
|
|
int thread_id, size_t scan_len)
|
|
|
|
|
{
|
|
|
|
|
if (NULL == expr_rt_stream || thread_id < 0) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct expr_runtime *expr_rt = expr_rt_stream->ref_expr_rt;
|
|
|
|
|
alignment_int64_array_add(expr_rt->scan_bytes, thread_id, scan_len);
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-27 12:04:15 +08:00
|
|
|
void expr_runtime_hit_times_inc(struct expr_runtime *expr_rt, int thread_id)
|
|
|
|
|
{
|
|
|
|
|
if (NULL == expr_rt || thread_id < 0) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
alignment_int64_array_add(expr_rt->hit_times, thread_id, 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void expr_runtime_stream_hit_times_inc(struct expr_runtime_stream *expr_rt_stream,
|
|
|
|
|
int thread_id)
|
|
|
|
|
{
|
|
|
|
|
if (NULL == expr_rt_stream || thread_id < 0) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct expr_runtime *expr_rt = expr_rt_stream->ref_expr_rt;
|
|
|
|
|
alignment_int64_array_add(expr_rt->hit_times, thread_id, 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long long expr_runtime_hit_times(void *expr_runtime)
|
|
|
|
|
{
|
|
|
|
|
if (NULL == expr_runtime) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct expr_runtime *expr_rt = (struct expr_runtime *)expr_runtime;
|
|
|
|
|
long long sum = alignment_int64_array_sum(expr_rt->hit_times,
|
|
|
|
|
expr_rt->n_worker_thread);
|
|
|
|
|
alignment_int64_array_reset(expr_rt->hit_times,
|
|
|
|
|
expr_rt->n_worker_thread);
|
|
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long long expr_runtime_hit_item_num(void *expr_runtime)
|
|
|
|
|
{
|
|
|
|
|
if (NULL == expr_runtime) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct expr_runtime *expr_rt = (struct expr_runtime *)expr_runtime;
|
|
|
|
|
long long sum = alignment_int64_array_sum(expr_rt->hit_item_num,
|
|
|
|
|
expr_rt->n_worker_thread);
|
|
|
|
|
alignment_int64_array_reset(expr_rt->hit_item_num, expr_rt->n_worker_thread);
|
|
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long long expr_runtime_hit_pattern_num(void *expr_runtime)
|
2023-04-20 15:34:56 +08:00
|
|
|
{
|
|
|
|
|
if (NULL == expr_runtime) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct expr_runtime *expr_rt = (struct expr_runtime *)expr_runtime;
|
2023-12-27 12:04:15 +08:00
|
|
|
long long sum = alignment_int64_array_sum(expr_rt->hit_pattern_num,
|
2023-04-20 15:34:56 +08:00
|
|
|
expr_rt->n_worker_thread);
|
2023-12-27 12:04:15 +08:00
|
|
|
alignment_int64_array_reset(expr_rt->hit_pattern_num,
|
|
|
|
|
expr_rt->n_worker_thread);
|
2023-04-20 15:34:56 +08:00
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
long long expr_runtime_update_err_count(void *expr_runtime)
|
|
|
|
|
{
|
|
|
|
|
if (NULL == expr_runtime) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct expr_runtime *expr_rt = (struct expr_runtime *)expr_runtime;
|
|
|
|
|
return expr_rt->update_err_cnt;
|
2023-08-09 19:22:09 +08:00
|
|
|
}
|