store history pattern ids at expr_matcher after hs/rs stream scan, instead of storing them during hs/rs scan

This commit is contained in:
root
2024-10-28 10:44:22 +00:00
parent abd00a9aab
commit 9d72c83e9f
8 changed files with 319 additions and 228 deletions

View File

@@ -46,7 +46,7 @@ struct rs_lit_stream {
size_t offset; /* current stream offset */
rs_stream_t *rs_stream;
struct rs_lit_engine *ref_rs_rt;
struct matched_pattern *matched_pat;
struct matched_pattern *ref_matched_pat;
struct log_handle *logger;
};
@@ -55,7 +55,7 @@ struct rs_regex_stream {
size_t offset; /* current stream offset */
rs_stream_t *rs_stream;
struct rs_regex_engine *ref_rs_rt;
struct matched_pattern *matched_pat;
struct matched_pattern *ref_matched_pat;
struct log_handle *logger;
};
@@ -67,6 +67,7 @@ struct rs_lit_engine {
struct rs_lit_stream **per_thread_scratch_streams; /* per thread */
struct pattern_attribute *ref_pat_attr;
struct log_handle *logger;
struct matched_pattern **matched_pat;
};
/* adapter_rs regex runtime */
@@ -77,6 +78,7 @@ struct rs_regex_engine {
struct rs_regex_stream **streams; /* per thread */
struct pattern_attribute *ref_pat_attr;
struct log_handle *logger;
struct matched_pattern **matched_pat;
};
int rs_verify_regex_expression(const char *regex_expr, struct log_handle *logger)
@@ -189,11 +191,10 @@ static int matched_event_cb(unsigned int id, int pos_offset, int from, int to,
unsigned long long pattern_id = id;
struct matched_pattern *matched_pat = (struct matched_pattern *)ctx;
unsigned long long *tmp_pat_id = NULL;
if (utarray_len(matched_pat->pattern_ids) < (MAX_HIT_PATTERN_NUM / 10)) {
for (size_t i = 0; i < utarray_len(matched_pat->pattern_ids); i++) {
tmp_pat_id = (unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i);
if (*tmp_pat_id == pattern_id) {
size_t n_pat_id = *(matched_pat->n_pattern_id);
if (n_pat_id < (MAX_HIT_PATTERN_NUM / 10)) {
for (size_t i = 0; i < n_pat_id; i++) {
if (matched_pat->pattern_ids[i] == pattern_id) {
return 0;
}
}
@@ -206,7 +207,7 @@ static int matched_event_cb(unsigned int id, int pos_offset, int from, int to,
sizeof(unsigned long long));
}
if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) {
if (n_pat_id >= MAX_HIT_PATTERN_NUM) {
return 0;
}
@@ -260,32 +261,13 @@ static int matched_event_cb(unsigned int id, int pos_offset, int from, int to,
}
if (1 == ret) {
utarray_push_back(matched_pat->pattern_ids, &pattern_id);
matched_pat->pattern_ids[n_pat_id] = pattern_id;
*(matched_pat->n_pattern_id) = n_pat_id + 1;
}
return 0;
}
static int gather_hit_pattern_id(struct matched_pattern *matched_pat,
unsigned long long *pattern_id_array,
size_t array_size, size_t *n_pattern_id)
{
size_t pattern_id_cnt = utarray_len(matched_pat->pattern_ids);
if (0 == pattern_id_cnt) {
*n_pattern_id = 0;
return 0;
}
size_t array_index = 0;
for (size_t i = 0; i < pattern_id_cnt && array_index < array_size; i++) {
pattern_id_array[array_index++] = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i);
}
*n_pattern_id = array_index;
return 0;
}
void rs_lit_engine_free(void *rs_lit_engine)
{
if (NULL == rs_lit_engine) {
@@ -319,6 +301,15 @@ void rs_lit_engine_free(void *rs_lit_engine)
FREE(rs_lit_inst->per_thread_scratch_streams);
}
if (rs_lit_inst->matched_pat != NULL) {
for (size_t i = 0; i < rs_lit_inst->n_thread; i++) {
if (rs_lit_inst->matched_pat[i] != NULL) {
FREE(rs_lit_inst->matched_pat[i]);
}
}
FREE(rs_lit_inst->matched_pat);
}
FREE(rs_lit_inst);
}
@@ -341,6 +332,13 @@ void *rs_lit_engine_new(struct expr_rule *rules, size_t n_rule,
bloom_init2(rs_lit_inst->blooms[i], 1024, 0.001);
}
rs_lit_inst->matched_pat = ALLOC(struct matched_pattern *, n_thread);
for (size_t i = 0; i < n_thread; i++) {
rs_lit_inst->matched_pat[i] = ALLOC(struct matched_pattern, 1);
rs_lit_inst->matched_pat[i]->ref_bloom = rs_lit_inst->blooms[i];
rs_lit_inst->matched_pat[i]->ref_pat_attr = pat_attr;
}
rs_lit_inst->per_thread_scratch_streams = ALLOC(struct rs_lit_stream *, n_thread);
for (size_t i = 0; i < n_thread; i++) {
rs_lit_inst->per_thread_scratch_streams[i] = (struct rs_lit_stream *)rs_lit_stream_open(rs_lit_inst, i);
@@ -363,19 +361,21 @@ int rs_lit_engine_scan(void *rs_lit_engine, int thread_id,
struct rs_lit_stream *rs_lit_stream = rs_lit_inst->per_thread_scratch_streams[thread_id];
assert(rs_lit_stream != NULL);
utarray_clear(rs_lit_stream->matched_pat->pattern_ids);
bloom_reset(rs_lit_stream->matched_pat->ref_bloom);
rs_lit_stream->ref_matched_pat->pattern_ids = pattern_id_array;
rs_lit_stream->ref_matched_pat->n_pattern_id = n_pattern_id;
rs_lit_stream->ref_matched_pat->pattern_ids_size = array_size;
if (rs_lit_inst->rs_db != NULL) {
int ret = rs_scan(rs_lit_inst->rs_db, thread_id, data, data_len,
0, matched_event_cb, rs_lit_stream->matched_pat);
0, matched_event_cb, rs_lit_stream->ref_matched_pat);
if (ret < 0) {
return -1;
}
}
return gather_hit_pattern_id(rs_lit_stream->matched_pat, pattern_id_array,
array_size, n_pattern_id);
bloom_reset(rs_lit_stream->ref_matched_pat->ref_bloom);
return 0;
}
void *rs_lit_stream_open(void *rs_lit_engine, int thread_id)
@@ -390,11 +390,7 @@ void *rs_lit_stream_open(void *rs_lit_engine, int thread_id)
lit_stream->logger = rs_lit_inst->logger;
lit_stream->thread_id = thread_id;
lit_stream->ref_rs_rt = rs_lit_inst;
lit_stream->matched_pat = ALLOC(struct matched_pattern, 1);
lit_stream->matched_pat->ref_bloom = rs_lit_inst->blooms[thread_id];
lit_stream->matched_pat->ref_pat_attr = rs_lit_inst->ref_pat_attr;
utarray_new(lit_stream->matched_pat->pattern_ids, &ut_rs_pattern_id_icd);
utarray_reserve(lit_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
lit_stream->ref_matched_pat = rs_lit_inst->matched_pat[thread_id];
if (rs_lit_inst->rs_db != NULL) {
lit_stream->rs_stream = rs_open_stream(rs_lit_inst->rs_db, 0, 128);
@@ -425,15 +421,8 @@ void rs_lit_stream_close(void *rs_lit_stream)
/* rs_stream->rs_rt point to rs_instance->rs_rt which will call free
same as rs_attr */
lit_stream->ref_rs_rt = NULL;
lit_stream->matched_pat->ref_bloom = NULL;
lit_stream->matched_pat->ref_pat_attr = NULL;
lit_stream->ref_matched_pat = NULL;
if (lit_stream->matched_pat->pattern_ids != NULL) {
utarray_free(lit_stream->matched_pat->pattern_ids);
lit_stream->matched_pat->pattern_ids = NULL;
}
FREE(lit_stream->matched_pat);
FREE(lit_stream);
}
@@ -448,16 +437,22 @@ int rs_lit_stream_scan(void *rs_lit_stream, const char *data, size_t data_len,
struct rs_lit_stream *lit_stream = (struct rs_lit_stream *)rs_lit_stream;
lit_stream->ref_matched_pat->pattern_ids = pattern_id_array;
lit_stream->ref_matched_pat->n_pattern_id = n_pattern_id;
lit_stream->ref_matched_pat->pattern_ids_size = array_size;
if (lit_stream->rs_stream != NULL) {
int ret = rs_scan_stream(lit_stream->rs_stream, data, data_len,
matched_event_cb, lit_stream->matched_pat);
matched_event_cb, lit_stream->ref_matched_pat);
if (ret < 0) {
return -1;
}
}
return gather_hit_pattern_id(lit_stream->matched_pat, pattern_id_array,
array_size, n_pattern_id);
bloom_reset(lit_stream->ref_matched_pat->ref_bloom);
return 0;
}
void rs_regex_engine_free(void *rs_regex_engine)
@@ -494,6 +489,15 @@ void rs_regex_engine_free(void *rs_regex_engine)
FREE(rs_regex_inst->streams);
}
if (rs_regex_inst->matched_pat != NULL) {
for (size_t i = 0; i < rs_regex_inst->n_thread; i++) {
if (rs_regex_inst->matched_pat[i] != NULL) {
FREE(rs_regex_inst->matched_pat[i]);
}
}
FREE(rs_regex_inst->matched_pat);
}
FREE(rs_regex_inst);
}
@@ -515,6 +519,13 @@ void *rs_regex_engine_new(struct expr_rule *rules, size_t n_rule,
bloom_init2(rs_regex_inst->blooms[i], 1024, 0.001);
}
rs_regex_inst->matched_pat = ALLOC(struct matched_pattern *, n_thread);
for (size_t i = 0; i < n_thread; i++) {
rs_regex_inst->matched_pat[i] = ALLOC(struct matched_pattern, 1);
rs_regex_inst->matched_pat[i]->ref_bloom = rs_regex_inst->blooms[i];
rs_regex_inst->matched_pat[i]->ref_pat_attr = pat_attr;
}
rs_regex_inst->streams = ALLOC(struct rs_regex_stream *, n_thread);
for (size_t i = 0; i < n_thread; i++) {
rs_regex_inst->streams[i] = (struct rs_regex_stream *)rs_regex_stream_open(rs_regex_inst, i);
@@ -537,19 +548,21 @@ int rs_regex_engine_scan(void *rs_regex_engine, int thread_id,
struct rs_regex_stream *rs_regex_stream = rs_regex_inst->streams[thread_id];
assert(rs_regex_stream != NULL);
utarray_clear(rs_regex_stream->matched_pat->pattern_ids);
bloom_reset(rs_regex_stream->matched_pat->ref_bloom);
rs_regex_stream->ref_matched_pat->pattern_ids = pattern_id_array;
rs_regex_stream->ref_matched_pat->n_pattern_id = n_pattern_id;
rs_regex_stream->ref_matched_pat->pattern_ids_size = array_size;
if (rs_regex_inst->rs_db != NULL) {
int ret = rs_scan(rs_regex_inst->rs_db, thread_id, data, data_len,
0, matched_event_cb, rs_regex_stream->matched_pat);
0, matched_event_cb, rs_regex_stream->ref_matched_pat);
if (ret < 0) {
return -1;
}
}
return gather_hit_pattern_id(rs_regex_stream->matched_pat, pattern_id_array,
array_size, n_pattern_id);
bloom_reset(rs_regex_stream->ref_matched_pat->ref_bloom);
return 0;
}
void *rs_regex_stream_open(void *rs_regex_engine, int thread_id)
@@ -564,11 +577,7 @@ void *rs_regex_stream_open(void *rs_regex_engine, int thread_id)
regex_stream->logger = rs_regex_inst->logger;
regex_stream->thread_id = thread_id;
regex_stream->ref_rs_rt = rs_regex_inst;
regex_stream->matched_pat = ALLOC(struct matched_pattern, 1);
regex_stream->matched_pat->ref_bloom = rs_regex_inst->blooms[thread_id];
regex_stream->matched_pat->ref_pat_attr = rs_regex_inst->ref_pat_attr;
utarray_new(regex_stream->matched_pat->pattern_ids, &ut_rs_pattern_id_icd);
utarray_reserve(regex_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM);
regex_stream->ref_matched_pat = rs_regex_inst->matched_pat[thread_id];
if (rs_regex_inst->rs_db != NULL) {
regex_stream->rs_stream = rs_open_stream(rs_regex_inst->rs_db, 0, 128);
@@ -599,15 +608,8 @@ void rs_regex_stream_close(void *rs_regex_stream)
/* rs_stream->rs_rt point to rs_instance->rs_rt which will call free
same as rs_attr */
regex_stream->ref_rs_rt = NULL;
regex_stream->matched_pat->ref_bloom = NULL;
regex_stream->matched_pat->ref_pat_attr = NULL;
regex_stream->ref_matched_pat = NULL;
if (regex_stream->matched_pat->pattern_ids != NULL) {
utarray_free(regex_stream->matched_pat->pattern_ids);
regex_stream->matched_pat->pattern_ids = NULL;
}
FREE(regex_stream->matched_pat);
FREE(regex_stream);
}
@@ -622,14 +624,19 @@ int rs_regex_stream_scan(void *rs_regex_stream, const char *data, size_t data_le
struct rs_regex_stream *regex_stream = (struct rs_regex_stream *)rs_regex_stream;
regex_stream->ref_matched_pat->pattern_ids = pattern_id_array;
regex_stream->ref_matched_pat->n_pattern_id = n_pattern_id;
regex_stream->ref_matched_pat->pattern_ids_size = array_size;
if (regex_stream->rs_stream != NULL) {
int ret = rs_scan_stream(regex_stream->rs_stream, data, data_len,
matched_event_cb, regex_stream->matched_pat);
matched_event_cb, regex_stream->ref_matched_pat);
if (ret < 0) {
return -1;
}
}
return gather_hit_pattern_id(regex_stream->matched_pat, pattern_id_array,
array_size, n_pattern_id);
bloom_reset(regex_stream->ref_matched_pat->ref_bloom);
return 0;
}