From 9d72c83e9fd499f5246b6fc35bffd182e0bd9ebb Mon Sep 17 00:00:00 2001 From: root Date: Mon, 28 Oct 2024 10:44:22 +0000 Subject: [PATCH] store history pattern ids at expr_matcher after hs/rs stream scan, instead of storing them during hs/rs scan --- .../expr_matcher/adapter_hs/adapter_hs.cpp | 137 ++++++++-------- .../expr_matcher/adapter_rs/adapter_rs.cpp | 149 +++++++++--------- scanner/expr_matcher/expr_matcher.cpp | 117 +++++++++++--- scanner/expr_matcher/expr_matcher.h | 8 +- scanner/expr_matcher/expr_matcher_inc.h | 4 +- src/maat_expr.c | 8 +- test/expr_matcher_gtest.cpp | 58 +++---- test/maat_framework_gtest.cpp | 66 +++++--- 8 files changed, 319 insertions(+), 228 deletions(-) diff --git a/scanner/expr_matcher/adapter_hs/adapter_hs.cpp b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp index 4bfd6c4..bfd88a2 100644 --- a/scanner/expr_matcher/adapter_hs/adapter_hs.cpp +++ b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp @@ -50,7 +50,7 @@ struct hs_lit_stream { int thread_id; hs_stream_t *hs_stream; struct hs_lit_engine *ref_hs_rt; - struct matched_pattern *matched_pat; + struct matched_pattern *ref_matched_pat; struct log_handle *logger; }; @@ -58,7 +58,7 @@ struct hs_regex_stream { int thread_id; hs_stream_t *hs_stream; struct hs_regex_engine *ref_hs_rt; - struct matched_pattern *matched_pat; + struct matched_pattern *ref_matched_pat; struct log_handle *logger; }; @@ -69,6 +69,7 @@ struct hs_lit_engine { hs_scratch_t **hs_scratches; struct bloom **blooms; struct hs_lit_stream **streams; + struct matched_pattern **matched_pat; struct pattern_attribute *ref_pat_attr; struct log_handle *logger; }; @@ -80,6 +81,7 @@ struct hs_regex_engine { hs_scratch_t **hs_scratches; struct bloom **blooms; struct hs_regex_stream **streams; + struct matched_pattern **matched_pat; struct pattern_attribute *ref_pat_attr; struct log_handle *logger; }; @@ -196,6 +198,15 @@ void hs_lit_engine_free(void *hs_lit_engine) FREE(hs_lit_inst->streams); } + if (hs_lit_inst->matched_pat != NULL) { + for (i = 0; i < hs_lit_inst->n_thread; i++) { + if (hs_lit_inst->matched_pat[i] != NULL) { + FREE(hs_lit_inst->matched_pat[i]); + } + } + FREE(hs_lit_inst->matched_pat); + } + FREE(hs_lit_inst); } @@ -216,6 +227,13 @@ void *hs_lit_engine_new(struct expr_rule *rules, size_t n_rule, bloom_init2(hs_lit_inst->blooms[i], 1024, 0.001); } + hs_lit_inst->matched_pat = ALLOC(struct matched_pattern *, n_thread); + for (size_t i = 0; i < n_thread; i++) { + hs_lit_inst->matched_pat[i] = ALLOC(struct matched_pattern, 1); + hs_lit_inst->matched_pat[i]->ref_bloom = hs_lit_inst->blooms[i]; + hs_lit_inst->matched_pat[i]->ref_pat_attr = pat_attr; + } + hs_lit_inst->hs_scratches = ALLOC(hs_scratch_t *, n_thread); int ret = hs_alloc_scratches((hs_database_t *)hs_lit_db, hs_lit_inst->hs_scratches, n_thread, logger); @@ -247,11 +265,10 @@ static int matched_event_cb(unsigned int id, unsigned long long from, unsigned long long pattern_id = id; struct matched_pattern *matched_pat = (struct matched_pattern *)ctx; - unsigned long long *tmp_pat_id = NULL; - if (utarray_len(matched_pat->pattern_ids) < (MAX_HIT_PATTERN_NUM / 10)) { - for (size_t i = 0; i < utarray_len(matched_pat->pattern_ids); i++) { - tmp_pat_id = (unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i); - if (*tmp_pat_id == pattern_id) { + size_t n_pat_id = *(matched_pat->n_pattern_id); + if (n_pat_id < (MAX_HIT_PATTERN_NUM / 10)) { + for (size_t i = 0; i < n_pat_id; i++) { + if (matched_pat->pattern_ids[i] == pattern_id) { return 0; } } @@ -264,7 +281,7 @@ static int matched_event_cb(unsigned int id, unsigned long long from, sizeof(unsigned long long)); } - if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) { + if (n_pat_id >= MAX_HIT_PATTERN_NUM || n_pat_id >= matched_pat->pattern_ids_size) { return 0; } @@ -317,7 +334,8 @@ static int matched_event_cb(unsigned int id, unsigned long long from, } if (1 == ret) { - utarray_push_back(matched_pat->pattern_ids, &pattern_id); + matched_pat->pattern_ids[n_pat_id] = pattern_id; + *(matched_pat->n_pattern_id) = n_pat_id + 1; } return 0; @@ -337,11 +355,7 @@ void *hs_lit_stream_open(void *hs_lit_engine, int thread_id) lit_stream->logger = hs_lit_inst->logger; lit_stream->thread_id = thread_id; lit_stream->ref_hs_rt = hs_lit_inst; - lit_stream->matched_pat = ALLOC(struct matched_pattern, 1); - lit_stream->matched_pat->ref_bloom = hs_lit_inst->blooms[thread_id]; - lit_stream->matched_pat->ref_pat_attr = hs_lit_inst->ref_pat_attr; - utarray_new(lit_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd); - utarray_reserve(lit_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM); + lit_stream->ref_matched_pat = hs_lit_inst->matched_pat[thread_id]; if (hs_lit_inst->hs_db != NULL) { err = hs_open_stream(hs_lit_inst->hs_db, 0, &lit_stream->hs_stream); @@ -380,15 +394,7 @@ void hs_lit_stream_close(void *hs_lit_stream) /* stream->hs_rt point to hs_instance->hs_rt which will call free same as hs_attr */ stream->ref_hs_rt = NULL; - stream->matched_pat->ref_bloom = NULL; - stream->matched_pat->ref_pat_attr = NULL; - - if (stream->matched_pat->pattern_ids != NULL) { - utarray_free(stream->matched_pat->pattern_ids); - stream->matched_pat->pattern_ids = NULL; - } - - FREE(stream->matched_pat); + stream->ref_matched_pat = NULL; FREE(stream); } @@ -402,11 +408,8 @@ static void hs_lit_stream_reset(struct hs_lit_stream *hs_lit_stream) if (hs_lit_stream->hs_stream != NULL) { hs_reset_stream(hs_lit_stream->hs_stream, 0, scratches[hs_lit_stream->thread_id], - matched_event_cb, hs_lit_stream->matched_pat); + NULL, NULL); } - - utarray_clear(hs_lit_stream->matched_pat->pattern_ids); - bloom_reset(hs_lit_stream->matched_pat->ref_bloom); } static void hs_regex_stream_reset(struct hs_regex_stream *hs_regex_stream) @@ -419,31 +422,8 @@ static void hs_regex_stream_reset(struct hs_regex_stream *hs_regex_stream) if (hs_regex_stream->hs_stream != NULL) { hs_reset_stream(hs_regex_stream->hs_stream, 0, scratches[hs_regex_stream->thread_id], - matched_event_cb, hs_regex_stream->matched_pat); + NULL, NULL); } - - utarray_clear(hs_regex_stream->matched_pat->pattern_ids); - bloom_reset(hs_regex_stream->matched_pat->ref_bloom); -} - -static int gather_hit_pattern_id(struct matched_pattern *matched_pat, - unsigned long long *pattern_id_array, - size_t array_size, size_t *n_pattern_id) -{ - size_t pattern_id_cnt = utarray_len(matched_pat->pattern_ids); - if (0 == pattern_id_cnt) { - *n_pattern_id = 0; - return 0; - } - - size_t array_index = 0; - for (size_t i = 0; i < pattern_id_cnt && array_index < array_size; i++) { - pattern_id_array[array_index++] = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i); - } - - *n_pattern_id = array_index; - - return 0; } int hs_lit_stream_scan(void *hs_lit_stream, const char *data, size_t data_len, @@ -469,13 +449,16 @@ int hs_lit_stream_scan(void *hs_lit_stream, const char *data, size_t data_len, struct hs_lit_stream *lit_stream = (struct hs_lit_stream *)hs_lit_stream; int thread_id = lit_stream->thread_id; hs_scratch_t **scratches = lit_stream->ref_hs_rt->hs_scratches; - lit_stream->matched_pat->scan_data_len = data_len; + lit_stream->ref_matched_pat->scan_data_len = data_len; + lit_stream->ref_matched_pat->pattern_ids = pattern_id_array; + lit_stream->ref_matched_pat->pattern_ids_size = array_size; + lit_stream->ref_matched_pat->n_pattern_id = n_pattern_id; if (lit_stream->hs_stream != NULL) { if (scratches != NULL) { err = hs_scan_stream(lit_stream->hs_stream, data, data_len, 0, scratches[thread_id], matched_event_cb, - lit_stream->matched_pat); + lit_stream->ref_matched_pat); if (err != HS_SUCCESS) { return -1; } @@ -486,8 +469,9 @@ int hs_lit_stream_scan(void *hs_lit_stream, const char *data, size_t data_len, } } - return gather_hit_pattern_id(lit_stream->matched_pat, pattern_id_array, - array_size, n_pattern_id); + bloom_reset(lit_stream->ref_matched_pat->ref_bloom); + + return 0; } int hs_lit_engine_scan(void *hs_lit_engine, int thread_id, @@ -552,6 +536,15 @@ void hs_regex_engine_free(void *hs_regex_engine) FREE(hs_regex_inst->streams); } + if (hs_regex_inst->matched_pat != NULL) { + for (i = 0; i < hs_regex_inst->n_thread; i++) { + if (hs_regex_inst->matched_pat[i] != NULL) { + FREE(hs_regex_inst->matched_pat[i]); + } + } + FREE(hs_regex_inst->matched_pat); + } + FREE(hs_regex_inst); } @@ -572,6 +565,13 @@ void *hs_regex_engine_new(struct expr_rule *rules, size_t n_rule, bloom_init2(hs_regex_inst->blooms[i], 1024, 0.001); } + hs_regex_inst->matched_pat = ALLOC(struct matched_pattern *, n_thread); + for (size_t i = 0; i < n_thread; i++) { + hs_regex_inst->matched_pat[i] = ALLOC(struct matched_pattern, 1); + hs_regex_inst->matched_pat[i]->ref_bloom = hs_regex_inst->blooms[i]; + hs_regex_inst->matched_pat[i]->ref_pat_attr = pat_attr; + } + hs_regex_inst->hs_scratches = ALLOC(hs_scratch_t *, n_thread); int ret = hs_alloc_scratches((hs_database_t *)hs_regex_db, hs_regex_inst->hs_scratches, @@ -628,15 +628,8 @@ void hs_regex_stream_close(void *hs_regex_stream) /* stream->hs_rt point to hs_instance->hs_rt which will call free same as hs_attr */ stream->ref_hs_rt = NULL; - stream->matched_pat->ref_bloom = NULL; - stream->matched_pat->ref_pat_attr = NULL; + stream->ref_matched_pat = NULL; - if (stream->matched_pat->pattern_ids != NULL) { - utarray_free(stream->matched_pat->pattern_ids); - stream->matched_pat->pattern_ids = NULL; - } - - FREE(stream->matched_pat); FREE(stream); } @@ -653,11 +646,7 @@ void *hs_regex_stream_open(void *hs_regex_engine, int thread_id) regex_stream->logger = hs_regex_inst->logger; regex_stream->thread_id = thread_id; regex_stream->ref_hs_rt = hs_regex_inst; - regex_stream->matched_pat = ALLOC(struct matched_pattern, 1); - regex_stream->matched_pat->ref_bloom = hs_regex_inst->blooms[thread_id]; - regex_stream->matched_pat->ref_pat_attr = hs_regex_inst->ref_pat_attr; - utarray_new(regex_stream->matched_pat->pattern_ids, &ut_hs_pattern_id_icd); - utarray_reserve(regex_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM); + regex_stream->ref_matched_pat = hs_regex_inst->matched_pat[thread_id]; if (hs_regex_inst->hs_db != NULL) { err = hs_open_stream(hs_regex_inst->hs_db, 0, ®ex_stream->hs_stream); @@ -696,13 +685,16 @@ int hs_regex_stream_scan(void *hs_regex_stream, const char *data, size_t data_le struct hs_regex_stream *regex_stream = (struct hs_regex_stream *)hs_regex_stream; int thread_id = regex_stream->thread_id; hs_scratch_t **scratches = regex_stream->ref_hs_rt->hs_scratches; - regex_stream->matched_pat->scan_data_len = data_len; + regex_stream->ref_matched_pat->scan_data_len = data_len; + regex_stream->ref_matched_pat->pattern_ids = pattern_id_array; + regex_stream->ref_matched_pat->pattern_ids_size = array_size; + regex_stream->ref_matched_pat->n_pattern_id = n_pattern_id; if (regex_stream->hs_stream != NULL) { if (scratches != NULL) { err = hs_scan_stream(regex_stream->hs_stream, data, data_len, 0, scratches[thread_id], matched_event_cb, - regex_stream->matched_pat); + regex_stream->ref_matched_pat); if (err != HS_SUCCESS) { return -1; } @@ -713,8 +705,9 @@ int hs_regex_stream_scan(void *hs_regex_stream, const char *data, size_t data_le } } - return gather_hit_pattern_id(regex_stream->matched_pat, pattern_id_array, - array_size, n_pattern_id); + bloom_reset(regex_stream->ref_matched_pat->ref_bloom); + + return 0; } void *hs_compile_data_new(enum expr_pattern_type pat_type, size_t n_patterns) diff --git a/scanner/expr_matcher/adapter_rs/adapter_rs.cpp b/scanner/expr_matcher/adapter_rs/adapter_rs.cpp index 76e0c80..c85fb0a 100644 --- a/scanner/expr_matcher/adapter_rs/adapter_rs.cpp +++ b/scanner/expr_matcher/adapter_rs/adapter_rs.cpp @@ -46,7 +46,7 @@ struct rs_lit_stream { size_t offset; /* current stream offset */ rs_stream_t *rs_stream; struct rs_lit_engine *ref_rs_rt; - struct matched_pattern *matched_pat; + struct matched_pattern *ref_matched_pat; struct log_handle *logger; }; @@ -55,7 +55,7 @@ struct rs_regex_stream { size_t offset; /* current stream offset */ rs_stream_t *rs_stream; struct rs_regex_engine *ref_rs_rt; - struct matched_pattern *matched_pat; + struct matched_pattern *ref_matched_pat; struct log_handle *logger; }; @@ -67,6 +67,7 @@ struct rs_lit_engine { struct rs_lit_stream **per_thread_scratch_streams; /* per thread */ struct pattern_attribute *ref_pat_attr; struct log_handle *logger; + struct matched_pattern **matched_pat; }; /* adapter_rs regex runtime */ @@ -77,6 +78,7 @@ struct rs_regex_engine { struct rs_regex_stream **streams; /* per thread */ struct pattern_attribute *ref_pat_attr; struct log_handle *logger; + struct matched_pattern **matched_pat; }; int rs_verify_regex_expression(const char *regex_expr, struct log_handle *logger) @@ -189,11 +191,10 @@ static int matched_event_cb(unsigned int id, int pos_offset, int from, int to, unsigned long long pattern_id = id; struct matched_pattern *matched_pat = (struct matched_pattern *)ctx; - unsigned long long *tmp_pat_id = NULL; - if (utarray_len(matched_pat->pattern_ids) < (MAX_HIT_PATTERN_NUM / 10)) { - for (size_t i = 0; i < utarray_len(matched_pat->pattern_ids); i++) { - tmp_pat_id = (unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i); - if (*tmp_pat_id == pattern_id) { + size_t n_pat_id = *(matched_pat->n_pattern_id); + if (n_pat_id < (MAX_HIT_PATTERN_NUM / 10)) { + for (size_t i = 0; i < n_pat_id; i++) { + if (matched_pat->pattern_ids[i] == pattern_id) { return 0; } } @@ -206,7 +207,7 @@ static int matched_event_cb(unsigned int id, int pos_offset, int from, int to, sizeof(unsigned long long)); } - if (utarray_len(matched_pat->pattern_ids) >= MAX_HIT_PATTERN_NUM) { + if (n_pat_id >= MAX_HIT_PATTERN_NUM) { return 0; } @@ -260,32 +261,13 @@ static int matched_event_cb(unsigned int id, int pos_offset, int from, int to, } if (1 == ret) { - utarray_push_back(matched_pat->pattern_ids, &pattern_id); + matched_pat->pattern_ids[n_pat_id] = pattern_id; + *(matched_pat->n_pattern_id) = n_pat_id + 1; } return 0; } -static int gather_hit_pattern_id(struct matched_pattern *matched_pat, - unsigned long long *pattern_id_array, - size_t array_size, size_t *n_pattern_id) -{ - size_t pattern_id_cnt = utarray_len(matched_pat->pattern_ids); - if (0 == pattern_id_cnt) { - *n_pattern_id = 0; - return 0; - } - - size_t array_index = 0; - for (size_t i = 0; i < pattern_id_cnt && array_index < array_size; i++) { - pattern_id_array[array_index++] = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i); - } - - *n_pattern_id = array_index; - - return 0; -} - void rs_lit_engine_free(void *rs_lit_engine) { if (NULL == rs_lit_engine) { @@ -319,6 +301,15 @@ void rs_lit_engine_free(void *rs_lit_engine) FREE(rs_lit_inst->per_thread_scratch_streams); } + if (rs_lit_inst->matched_pat != NULL) { + for (size_t i = 0; i < rs_lit_inst->n_thread; i++) { + if (rs_lit_inst->matched_pat[i] != NULL) { + FREE(rs_lit_inst->matched_pat[i]); + } + } + FREE(rs_lit_inst->matched_pat); + } + FREE(rs_lit_inst); } @@ -341,6 +332,13 @@ void *rs_lit_engine_new(struct expr_rule *rules, size_t n_rule, bloom_init2(rs_lit_inst->blooms[i], 1024, 0.001); } + rs_lit_inst->matched_pat = ALLOC(struct matched_pattern *, n_thread); + for (size_t i = 0; i < n_thread; i++) { + rs_lit_inst->matched_pat[i] = ALLOC(struct matched_pattern, 1); + rs_lit_inst->matched_pat[i]->ref_bloom = rs_lit_inst->blooms[i]; + rs_lit_inst->matched_pat[i]->ref_pat_attr = pat_attr; + } + rs_lit_inst->per_thread_scratch_streams = ALLOC(struct rs_lit_stream *, n_thread); for (size_t i = 0; i < n_thread; i++) { rs_lit_inst->per_thread_scratch_streams[i] = (struct rs_lit_stream *)rs_lit_stream_open(rs_lit_inst, i); @@ -363,19 +361,21 @@ int rs_lit_engine_scan(void *rs_lit_engine, int thread_id, struct rs_lit_stream *rs_lit_stream = rs_lit_inst->per_thread_scratch_streams[thread_id]; assert(rs_lit_stream != NULL); - utarray_clear(rs_lit_stream->matched_pat->pattern_ids); - bloom_reset(rs_lit_stream->matched_pat->ref_bloom); + rs_lit_stream->ref_matched_pat->pattern_ids = pattern_id_array; + rs_lit_stream->ref_matched_pat->n_pattern_id = n_pattern_id; + rs_lit_stream->ref_matched_pat->pattern_ids_size = array_size; if (rs_lit_inst->rs_db != NULL) { int ret = rs_scan(rs_lit_inst->rs_db, thread_id, data, data_len, - 0, matched_event_cb, rs_lit_stream->matched_pat); + 0, matched_event_cb, rs_lit_stream->ref_matched_pat); if (ret < 0) { return -1; } } - return gather_hit_pattern_id(rs_lit_stream->matched_pat, pattern_id_array, - array_size, n_pattern_id); + bloom_reset(rs_lit_stream->ref_matched_pat->ref_bloom); + + return 0; } void *rs_lit_stream_open(void *rs_lit_engine, int thread_id) @@ -390,11 +390,7 @@ void *rs_lit_stream_open(void *rs_lit_engine, int thread_id) lit_stream->logger = rs_lit_inst->logger; lit_stream->thread_id = thread_id; lit_stream->ref_rs_rt = rs_lit_inst; - lit_stream->matched_pat = ALLOC(struct matched_pattern, 1); - lit_stream->matched_pat->ref_bloom = rs_lit_inst->blooms[thread_id]; - lit_stream->matched_pat->ref_pat_attr = rs_lit_inst->ref_pat_attr; - utarray_new(lit_stream->matched_pat->pattern_ids, &ut_rs_pattern_id_icd); - utarray_reserve(lit_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM); + lit_stream->ref_matched_pat = rs_lit_inst->matched_pat[thread_id]; if (rs_lit_inst->rs_db != NULL) { lit_stream->rs_stream = rs_open_stream(rs_lit_inst->rs_db, 0, 128); @@ -425,15 +421,8 @@ void rs_lit_stream_close(void *rs_lit_stream) /* rs_stream->rs_rt point to rs_instance->rs_rt which will call free same as rs_attr */ lit_stream->ref_rs_rt = NULL; - lit_stream->matched_pat->ref_bloom = NULL; - lit_stream->matched_pat->ref_pat_attr = NULL; + lit_stream->ref_matched_pat = NULL; - if (lit_stream->matched_pat->pattern_ids != NULL) { - utarray_free(lit_stream->matched_pat->pattern_ids); - lit_stream->matched_pat->pattern_ids = NULL; - } - - FREE(lit_stream->matched_pat); FREE(lit_stream); } @@ -448,16 +437,22 @@ int rs_lit_stream_scan(void *rs_lit_stream, const char *data, size_t data_len, struct rs_lit_stream *lit_stream = (struct rs_lit_stream *)rs_lit_stream; + lit_stream->ref_matched_pat->pattern_ids = pattern_id_array; + lit_stream->ref_matched_pat->n_pattern_id = n_pattern_id; + lit_stream->ref_matched_pat->pattern_ids_size = array_size; + if (lit_stream->rs_stream != NULL) { int ret = rs_scan_stream(lit_stream->rs_stream, data, data_len, - matched_event_cb, lit_stream->matched_pat); + matched_event_cb, lit_stream->ref_matched_pat); if (ret < 0) { return -1; } } - return gather_hit_pattern_id(lit_stream->matched_pat, pattern_id_array, - array_size, n_pattern_id); + + bloom_reset(lit_stream->ref_matched_pat->ref_bloom); + + return 0; } void rs_regex_engine_free(void *rs_regex_engine) @@ -494,6 +489,15 @@ void rs_regex_engine_free(void *rs_regex_engine) FREE(rs_regex_inst->streams); } + if (rs_regex_inst->matched_pat != NULL) { + for (size_t i = 0; i < rs_regex_inst->n_thread; i++) { + if (rs_regex_inst->matched_pat[i] != NULL) { + FREE(rs_regex_inst->matched_pat[i]); + } + } + FREE(rs_regex_inst->matched_pat); + } + FREE(rs_regex_inst); } @@ -515,6 +519,13 @@ void *rs_regex_engine_new(struct expr_rule *rules, size_t n_rule, bloom_init2(rs_regex_inst->blooms[i], 1024, 0.001); } + rs_regex_inst->matched_pat = ALLOC(struct matched_pattern *, n_thread); + for (size_t i = 0; i < n_thread; i++) { + rs_regex_inst->matched_pat[i] = ALLOC(struct matched_pattern, 1); + rs_regex_inst->matched_pat[i]->ref_bloom = rs_regex_inst->blooms[i]; + rs_regex_inst->matched_pat[i]->ref_pat_attr = pat_attr; + } + rs_regex_inst->streams = ALLOC(struct rs_regex_stream *, n_thread); for (size_t i = 0; i < n_thread; i++) { rs_regex_inst->streams[i] = (struct rs_regex_stream *)rs_regex_stream_open(rs_regex_inst, i); @@ -537,19 +548,21 @@ int rs_regex_engine_scan(void *rs_regex_engine, int thread_id, struct rs_regex_stream *rs_regex_stream = rs_regex_inst->streams[thread_id]; assert(rs_regex_stream != NULL); - utarray_clear(rs_regex_stream->matched_pat->pattern_ids); - bloom_reset(rs_regex_stream->matched_pat->ref_bloom); + rs_regex_stream->ref_matched_pat->pattern_ids = pattern_id_array; + rs_regex_stream->ref_matched_pat->n_pattern_id = n_pattern_id; + rs_regex_stream->ref_matched_pat->pattern_ids_size = array_size; if (rs_regex_inst->rs_db != NULL) { int ret = rs_scan(rs_regex_inst->rs_db, thread_id, data, data_len, - 0, matched_event_cb, rs_regex_stream->matched_pat); + 0, matched_event_cb, rs_regex_stream->ref_matched_pat); if (ret < 0) { return -1; } } - return gather_hit_pattern_id(rs_regex_stream->matched_pat, pattern_id_array, - array_size, n_pattern_id); + bloom_reset(rs_regex_stream->ref_matched_pat->ref_bloom); + + return 0; } void *rs_regex_stream_open(void *rs_regex_engine, int thread_id) @@ -564,11 +577,7 @@ void *rs_regex_stream_open(void *rs_regex_engine, int thread_id) regex_stream->logger = rs_regex_inst->logger; regex_stream->thread_id = thread_id; regex_stream->ref_rs_rt = rs_regex_inst; - regex_stream->matched_pat = ALLOC(struct matched_pattern, 1); - regex_stream->matched_pat->ref_bloom = rs_regex_inst->blooms[thread_id]; - regex_stream->matched_pat->ref_pat_attr = rs_regex_inst->ref_pat_attr; - utarray_new(regex_stream->matched_pat->pattern_ids, &ut_rs_pattern_id_icd); - utarray_reserve(regex_stream->matched_pat->pattern_ids, MAX_HIT_PATTERN_NUM); + regex_stream->ref_matched_pat = rs_regex_inst->matched_pat[thread_id]; if (rs_regex_inst->rs_db != NULL) { regex_stream->rs_stream = rs_open_stream(rs_regex_inst->rs_db, 0, 128); @@ -599,15 +608,8 @@ void rs_regex_stream_close(void *rs_regex_stream) /* rs_stream->rs_rt point to rs_instance->rs_rt which will call free same as rs_attr */ regex_stream->ref_rs_rt = NULL; - regex_stream->matched_pat->ref_bloom = NULL; - regex_stream->matched_pat->ref_pat_attr = NULL; + regex_stream->ref_matched_pat = NULL; - if (regex_stream->matched_pat->pattern_ids != NULL) { - utarray_free(regex_stream->matched_pat->pattern_ids); - regex_stream->matched_pat->pattern_ids = NULL; - } - - FREE(regex_stream->matched_pat); FREE(regex_stream); } @@ -622,14 +624,19 @@ int rs_regex_stream_scan(void *rs_regex_stream, const char *data, size_t data_le struct rs_regex_stream *regex_stream = (struct rs_regex_stream *)rs_regex_stream; + regex_stream->ref_matched_pat->pattern_ids = pattern_id_array; + regex_stream->ref_matched_pat->n_pattern_id = n_pattern_id; + regex_stream->ref_matched_pat->pattern_ids_size = array_size; + if (regex_stream->rs_stream != NULL) { int ret = rs_scan_stream(regex_stream->rs_stream, data, data_len, - matched_event_cb, regex_stream->matched_pat); + matched_event_cb, regex_stream->ref_matched_pat); if (ret < 0) { return -1; } } - return gather_hit_pattern_id(regex_stream->matched_pat, pattern_id_array, - array_size, n_pattern_id); + bloom_reset(regex_stream->ref_matched_pat->ref_bloom); + + return 0; } \ No newline at end of file diff --git a/scanner/expr_matcher/expr_matcher.cpp b/scanner/expr_matcher/expr_matcher.cpp index ee78fb7..eba443a 100644 --- a/scanner/expr_matcher/expr_matcher.cpp +++ b/scanner/expr_matcher/expr_matcher.cpp @@ -44,6 +44,7 @@ struct expr_matcher { struct bool_matcher *bm; struct bool_expr_match **bool_match_buffs; struct log_handle *logger; + struct bool_expr *bool_exprs; }; struct expr_matcher_stream { @@ -51,6 +52,7 @@ struct expr_matcher_stream { enum expr_engine_type engine_type; void *lit_stream; void *regex_stream; + UT_array *all_hit_lit_pattern_ids; struct expr_matcher *ref_matcher; }; @@ -63,6 +65,8 @@ struct db_operations { int (*build_db)(void **lit_db, void *compile_data, struct log_handle *logger); }; +UT_icd ut_pattern_id_icd = {sizeof(unsigned long long), NULL, NULL, NULL}; + struct db_operations db_ops[EXPR_ENGINE_TYPE_AUTO] = { { .type = EXPR_ENGINE_TYPE_HS, @@ -230,6 +234,7 @@ static struct bool_expr *bool_exprs_new(struct expr_rule *rules, size_t n_rule, uuid_copy(bool_exprs[i].expr_uuid, rules[i].expr_uuid); bool_exprs[i].item_num = rules[i].n_patterns; + bool_exprs[i].user_tag = &(bool_exprs[i]); } return bool_exprs; @@ -270,6 +275,10 @@ void expr_matcher_free(struct expr_matcher *matcher) FREE(matcher->pat_attr); } + if (matcher->bool_exprs != NULL) { + FREE(matcher->bool_exprs); + } + FREE(matcher); } @@ -322,6 +331,7 @@ expr_matcher_new(struct expr_rule *rules, size_t n_rule, matcher->pat_attr = pat_attr; matcher->engine_type = engine_type; matcher->logger = logger; + matcher->bool_exprs = bool_exprs; matcher->bm = bool_matcher_new(bool_exprs, n_rule, &mem_size); if (matcher->bm != NULL) { log_info(logger, MODULE_EXPR_MATCHER, @@ -333,7 +343,6 @@ expr_matcher_new(struct expr_rule *rules, size_t n_rule, __FUNCTION__, __LINE__); bm_ret = -1; } - FREE(bool_exprs); matcher->bool_match_buffs = ALLOC(struct bool_expr_match *, n_thread); for (size_t i = 0; i < n_thread; i++) { @@ -405,8 +414,7 @@ static inline int compare_pattern_id(const void *a, const void *b) static int expr_matcher_bool_matcher_match(struct bool_matcher *bm, struct bool_expr_match *match_buff, size_t buff_size, unsigned long long *hit_pattern_ids, - size_t n_hit_pattern, struct expr_scan_result *result_array, - size_t array_size, size_t *n_hit_result) + size_t n_hit_pattern, size_t *n_hit_result) { unsigned long long prev_pat_id = 0xFFFFFFFFFFFFFFFF; @@ -430,13 +438,10 @@ static int expr_matcher_bool_matcher_match(struct bool_matcher *bm, struct bool_ goto next; } - if (bool_matcher_ret > (int)array_size) { - bool_matcher_ret = array_size; + if (bool_matcher_ret > (int)buff_size) { + bool_matcher_ret = buff_size; } - for (int index = 0; index < bool_matcher_ret; index++) { - uuid_copy(result_array[index].rule_uuid, match_buff[index].expr_uuid); - } *n_hit_result = bool_matcher_ret; next: @@ -445,7 +450,7 @@ next: int expr_matcher_match(struct expr_matcher *matcher, int thread_id, const char *data, size_t data_len, - struct expr_scan_result *result_array, + uuid_t *result_array, size_t array_size, size_t *n_hit_result, size_t *n_hit_pattern) { @@ -492,9 +497,16 @@ int expr_matcher_match(struct expr_matcher *matcher, int thread_id, struct bool_expr_match *match_buff = matcher->bool_match_buffs[thread_id]; - return expr_matcher_bool_matcher_match(matcher->bm, match_buff, MAX_HIT_PATTERN_NUM, - lit_pat_ids, pat_cnt, result_array, - array_size, n_hit_result); + ret = expr_matcher_bool_matcher_match(matcher->bm, match_buff, MAX_HIT_PATTERN_NUM, + lit_pat_ids, pat_cnt, n_hit_result); + for (size_t i = 0; i < *n_hit_result && i < array_size; i++) { + uuid_copy(result_array[i], match_buff[i].expr_uuid); + } + if (*n_hit_result > array_size) { + *n_hit_result = array_size; + } + + return ret; } struct expr_matcher_stream * @@ -533,13 +545,27 @@ expr_matcher_stream_open(struct expr_matcher *matcher, int thread_id) stream->lit_stream = lit_stream; stream->regex_stream = regex_stream; stream->ref_matcher = matcher; + utarray_new(stream->all_hit_lit_pattern_ids, &ut_pattern_id_icd); return stream; } +static int expr_has_pattern_id_in_array(struct bool_expr *expr, unsigned long long *pat_ids, size_t n_pat) +{ + for (size_t i = 0; i < expr->item_num; i++) { + for (size_t j = 0; j < n_pat; j++) { + if (expr->items[i].item_id == pat_ids[j]) { + return 1; + } + } + } + + return 0; +} + int expr_matcher_stream_match(struct expr_matcher_stream *stream, const char *data, size_t data_len, - struct expr_scan_result *result_array, + uuid_t *result_array, size_t array_size, size_t *n_hit_result, size_t *n_hit_pattern) { @@ -554,7 +580,7 @@ int expr_matcher_stream_match(struct expr_matcher_stream *stream, unsigned long long regex_pat_ids[MAX_HIT_PATTERN_NUM]; size_t lit_pat_cnt = 0; size_t regex_pat_cnt = 0; - size_t pat_cnt = 0; + size_t all_hit_pat_cnt = 0; int ret = engine_ops[stream->engine_type].scan_stream(stream->lit_stream, data, data_len, lit_pat_ids, MAX_HIT_PATTERN_NUM, @@ -573,23 +599,62 @@ int expr_matcher_stream_match(struct expr_matcher_stream *stream, return -1; } - pat_cnt = lit_pat_cnt + regex_pat_cnt; - *n_hit_pattern = pat_cnt; - if (pat_cnt > MAX_HIT_PATTERN_NUM) { - pat_cnt = MAX_HIT_PATTERN_NUM; + *n_hit_pattern = lit_pat_cnt + regex_pat_cnt; + + /* + 1.some expr items may contain multi patterns such as "aaa&bbb", so we need to keep all hit patterns to ensure no expr item is missed by scanning multi times. + 2.while thinking of maat api function maat_state_get_direct_hit_objects, bool_matcher(all_hit_patterns) will return all expr items every time, while this scan + may not hit some of items, so we need to check them. + */ + + //1. add lit pattern ids to all_hit_lit_pattern_ids, and remove duplicate + for (size_t i = 0; i < lit_pat_cnt; i++) { + if (utarray_find(stream->all_hit_lit_pattern_ids, &lit_pat_ids[i], compare_pattern_id) == NULL) { + utarray_push_back(stream->all_hit_lit_pattern_ids, &lit_pat_ids[i]); + utarray_sort(stream->all_hit_lit_pattern_ids, compare_pattern_id); + } } - size_t j = 0; - for (size_t i = lit_pat_cnt; i < pat_cnt; i++, j++) { - lit_pat_ids[i] = regex_pat_ids[j]; + //2. find expr item uuid by all hit lit pattern ids with bool_matcher + size_t all_hit_lit_pat_cnt = utarray_len(stream->all_hit_lit_pattern_ids); + unsigned long long all_hit_pat_ids[MAX_HIT_PATTERN_NUM]; + + all_hit_pat_cnt = all_hit_lit_pat_cnt + regex_pat_cnt; + if (all_hit_pat_cnt > MAX_HIT_PATTERN_NUM) { + all_hit_pat_cnt = MAX_HIT_PATTERN_NUM; + } + + for (size_t i = 0; i < all_hit_lit_pat_cnt; i++) { + all_hit_pat_ids[i] = *(unsigned long long *)utarray_eltptr(stream->all_hit_lit_pattern_ids, i); + } + + for (size_t i = all_hit_lit_pat_cnt, j = 0; i < all_hit_pat_cnt; i++, j++) { + all_hit_pat_ids[i] = regex_pat_ids[j]; } struct expr_matcher *matcher = stream->ref_matcher; struct bool_expr_match *match_buff = matcher->bool_match_buffs[stream->thread_id]; + size_t n_hit_expr = 0; - return expr_matcher_bool_matcher_match(matcher->bm, match_buff, MAX_HIT_PATTERN_NUM, - lit_pat_ids, pat_cnt, result_array, array_size, - n_hit_result); + ret = expr_matcher_bool_matcher_match(matcher->bm, match_buff, MAX_HIT_PATTERN_NUM, + all_hit_pat_ids, all_hit_pat_cnt, &n_hit_expr); + + //3. check the result of bool_matcher + *n_hit_result = 0; + for (size_t i = 0; i < n_hit_expr; i++) { + struct bool_expr *expr = (struct bool_expr *)match_buff[i].user_tag; + if (expr_has_pattern_id_in_array(expr, lit_pat_ids, lit_pat_cnt) || + expr_has_pattern_id_in_array(expr, regex_pat_ids, regex_pat_cnt)) { + uuid_copy(result_array[*n_hit_result], expr->expr_uuid); + (*n_hit_result)++; + + if (*n_hit_result >= array_size) { + break; + } + } + } + + return ret; } void expr_matcher_stream_close(struct expr_matcher_stream *stream) @@ -608,5 +673,9 @@ void expr_matcher_stream_close(struct expr_matcher_stream *stream) stream->regex_stream = NULL; } + if (stream->all_hit_lit_pattern_ids != NULL) { + utarray_free(stream->all_hit_lit_pattern_ids); + } + FREE(stream); } \ No newline at end of file diff --git a/scanner/expr_matcher/expr_matcher.h b/scanner/expr_matcher/expr_matcher.h index 16ba9b2..0e5ec4b 100644 --- a/scanner/expr_matcher/expr_matcher.h +++ b/scanner/expr_matcher/expr_matcher.h @@ -65,10 +65,6 @@ struct expr_pattern { size_t pat_len; }; -struct expr_scan_result { - uuid_t rule_uuid; -}; - /* logic AND expression, such as (rule1 & rule2) */ struct expr_rule { uuid_t expr_uuid; /* AND expression ID */ @@ -106,7 +102,7 @@ void expr_matcher_free(struct expr_matcher *matcher); */ int expr_matcher_match(struct expr_matcher *matcher, int thread_id, const char *data, size_t data_len, - struct expr_scan_result *result_array, + uuid_t *result_array, size_t array_size, size_t *n_hit_result, size_t *n_hit_pattern); @@ -121,7 +117,7 @@ expr_matcher_stream_open(struct expr_matcher *matcher, int thread_id); */ int expr_matcher_stream_match(struct expr_matcher_stream *stream, const char *data, size_t data_len, - struct expr_scan_result *result_array, + uuid_t *result_array, size_t array_size, size_t *n_hit_result, size_t *n_hit_pattern); diff --git a/scanner/expr_matcher/expr_matcher_inc.h b/scanner/expr_matcher/expr_matcher_inc.h index c575508..3fdbd6b 100644 --- a/scanner/expr_matcher/expr_matcher_inc.h +++ b/scanner/expr_matcher/expr_matcher_inc.h @@ -36,7 +36,9 @@ struct pattern_attribute { }; struct matched_pattern { - UT_array *pattern_ids; + unsigned long long *pattern_ids; + size_t *n_pattern_id; + size_t pattern_ids_size; struct bloom *ref_bloom; struct pattern_attribute *ref_pat_attr; size_t scan_data_len; diff --git a/src/maat_expr.c b/src/maat_expr.c index 51807d8..03c8788 100644 --- a/src/maat_expr.c +++ b/src/maat_expr.c @@ -832,7 +832,7 @@ int expr_runtime_scan(struct expr_runtime *expr_rt, int thread_id, size_t n_hit_item = 0; size_t n_hit_pattern = 0; - struct expr_scan_result hit_results[MAX_HIT_ITEM_NUM]; + uuid_t hit_results[MAX_HIT_ITEM_NUM]; int ret = expr_matcher_match(expr_rt->matcher, thread_id, data, data_len, hit_results, MAX_HIT_ITEM_NUM, &n_hit_item, &n_hit_pattern); @@ -854,7 +854,7 @@ int expr_runtime_scan(struct expr_runtime *expr_rt, int thread_id, for (size_t i = 0; i < n_hit_item; i++) { struct expr_item *expr_item = (struct expr_item *)rcu_hash_find(expr_rt->item_hash, - (char *)&hit_results[i].rule_uuid, + (char *)&hit_results[i], sizeof(uuid_t)); if (!expr_item) { // item config has been deleted @@ -924,7 +924,7 @@ int expr_runtime_stream_scan(struct expr_runtime_stream *expr_rt_stream, size_t n_hit_item = 0; size_t n_hit_pattern = 0; - struct expr_scan_result hit_results[MAX_HIT_ITEM_NUM]; + uuid_t hit_results[MAX_HIT_ITEM_NUM]; int ret = expr_matcher_stream_match(expr_rt_stream->handle, data, data_len, hit_results, MAX_HIT_ITEM_NUM, &n_hit_item, &n_hit_pattern); @@ -947,7 +947,7 @@ int expr_runtime_stream_scan(struct expr_runtime_stream *expr_rt_stream, for (size_t i = 0; i < n_hit_item; i++) { expr_item = (struct expr_item *)rcu_hash_find(expr_rt->item_hash, - (char *)&hit_results[i].rule_uuid, + (char *)&hit_results[i], sizeof(uuid_t)); if (!expr_item) { // item config has been deleted diff --git a/test/expr_matcher_gtest.cpp b/test/expr_matcher_gtest.cpp index 4a51732..75f67a3 100644 --- a/test/expr_matcher_gtest.cpp +++ b/test/expr_matcher_gtest.cpp @@ -303,7 +303,7 @@ TEST(hs_expr_matcher_match, literal_sub_has_normal_offset) expr_array_free(rules, n_rule); char scan_data1[64] = "hello aaa"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -360,7 +360,7 @@ TEST(rs_expr_matcher_match, literal_sub_has_normal_offset) expr_array_free(rules, n_rule); char scan_data1[64] = "hello aaa"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -417,7 +417,7 @@ TEST(hs_expr_matcher_match, literal_sub_has_left_unlimit_offset) expr_array_free(rules, n_rule); char scan_data1[64] = "hello bbb"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -475,7 +475,7 @@ TEST(rs_expr_matcher_match, literal_sub_has_left_unlimit_offset) expr_array_free(rules, n_rule); char scan_data1[64] = "hello bbb"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -533,7 +533,7 @@ TEST(hs_expr_matcher_match, literal_sub_has_right_unlimit_offset) expr_array_free(rules, n_rule); char scan_data1[64] = "hello ccc"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -600,7 +600,7 @@ TEST(rs_expr_matcher_match, literal_sub_has_right_unlimit_offset) expr_array_free(rules, n_rule); char scan_data1[64] = "hello ccc"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -667,7 +667,7 @@ TEST(hs_expr_matcher_match, literal_sub_with_no_offset) expr_array_free(rules, n_rule); char scan_data1[64] = "hello ddd"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -725,7 +725,7 @@ TEST(rs_expr_matcher_match, literal_sub_with_no_offset) expr_array_free(rules, n_rule); char scan_data1[64] = "hello ddd"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -783,7 +783,7 @@ TEST(hs_expr_matcher_match, literal_exactly) expr_array_free(rules, n_rule); char scan_data1[64] = "hello eee"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -831,7 +831,7 @@ TEST(rs_expr_matcher_match, literal_exactly) expr_array_free(rules, n_rule); char scan_data1[64] = "hello eee"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -880,7 +880,7 @@ TEST(hs_expr_matcher_match, literal_prefix) expr_array_free(rules, n_rule); char scan_data1[64] = "hello fff"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -940,7 +940,7 @@ TEST(rs_expr_matcher_match, literal_prefix) expr_array_free(rules, n_rule); char scan_data1[64] = "hello fff"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -1000,7 +1000,7 @@ TEST(hs_expr_matcher_match, literal_suffix) expr_array_free(rules, n_rule); char scan_data1[64] = "hello ggg"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -1060,7 +1060,7 @@ TEST(rs_expr_matcher_match, literal_suffix) expr_array_free(rules, n_rule); char scan_data1[64] = "hello ggg"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -1120,7 +1120,7 @@ TEST(hs_expr_matcher_match, literal_sub_with_hex) expr_array_free(rules, n_rule); char scan_data1[64] = "Content-Type: /html"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -1159,7 +1159,7 @@ TEST(rs_expr_matcher_match, literal_sub_with_hex) expr_array_free(rules, n_rule); char scan_data1[64] = "Content-Type: /html"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -1198,7 +1198,7 @@ TEST(hs_expr_matcher_match, literal_with_chinese) expr_array_free(rules, n_rule); char data0[64] = "#中国 你好"; - struct expr_scan_result result0[64] = {0}; + uuid_t result0[64] = {0}; size_t n_result0 = 0; size_t n_hit_pattern = 0; @@ -1227,7 +1227,7 @@ TEST(rs_expr_matcher_match, literal_with_chinese) expr_array_free(rules, n_rule); char data0[64] = "#中国 你好"; - struct expr_scan_result result0[64] = {0}; + uuid_t result0[64] = {0}; size_t n_result0 = 0; size_t n_hit_pattern = 0; @@ -1256,7 +1256,7 @@ TEST(hs_expr_matcher_match, same_pattern_different_offset) expr_array_free(rules, n_rule); char data[64] = "onetoday,anothertoday"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -1285,7 +1285,7 @@ TEST(rs_expr_matcher_match, same_pattern_different_offset) expr_array_free(rules, n_rule); char data[64] = "onetoday,anothertoday"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -1316,7 +1316,7 @@ TEST(hs_expr_matcher_match, long_scan_data) const char* scan_data = "A directed path in a directed graph is a finite or infinite\ sequence of edges which joins a sequence of distinct vertices, but with the added restriction\ that the edges be all directed in the same direction."; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -1347,7 +1347,7 @@ TEST(rs_expr_matcher_match, long_scan_data) const char* scan_data = "A directed path in a directed graph is a finite or infinite\ sequence of edges which joins a sequence of distinct vertices, but with the added restriction\ that the edges be all directed in the same direction."; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -1396,7 +1396,7 @@ TEST(hs_expr_matcher_stream, basic) const char *scan_data2 = " or infinite sequence of edges which joins a " "sequence of distinct vertices"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_hit_result = 0; size_t n_hit_pattern = 0; int thread_id = 0; @@ -1440,7 +1440,7 @@ TEST(rs_expr_matcher_stream, basic) const char *scan_data2 = " or infinite sequence of edges which joins a " "sequence of distinct vertices"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_hit_result = 0; size_t n_hit_pattern = 0; int thread_id = 0; @@ -1485,7 +1485,7 @@ TEST(hs_expr_matcher, regex_basic) const char *scan_data1 = "http://www.cyberessays.com/search_results.php?" "action=search&query=username,abckkk,1234567"; //const char *scan_data2 = "8rain"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -1518,7 +1518,7 @@ TEST(rs_expr_matcher, regex_basic) const char *scan_data1 = "http://www.cyberessays.com/search_results.php?" "action=search&query=username,abckkk,1234567"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -1547,7 +1547,7 @@ TEST(hs_expr_matcher, regex_unicode) expr_array_free(rules, n_rule); const char *scan_data = "string contains É"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -1575,7 +1575,7 @@ TEST(rs_expr_matcher, regex_unicode) expr_array_free(rules, n_rule); const char *scan_data = "string contains É"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; @@ -1604,7 +1604,7 @@ TEST(hs_expr_matcher, hit_pattern_num) const char *scan_data1 = "string has one two"; const char *scan_data2 = "string has one two three"; - struct expr_scan_result result[64] = {0}; + uuid_t result[64] = {0}; size_t n_result = 0; size_t n_hit_pattern = 0; diff --git a/test/maat_framework_gtest.cpp b/test/maat_framework_gtest.cpp index 17ee7b2..cd9f447 100644 --- a/test/maat_framework_gtest.cpp +++ b/test/maat_framework_gtest.cpp @@ -1241,14 +1241,14 @@ TEST_F(HsStringScan, StreamHitDirectObject) { int ret; struct maat *maat_inst = HsStringScan::_shared_maat_inst; struct maat_state *state = maat_state_new(maat_inst, thread_id); - const char *table_name = "HTTP_URL"; - const char *attribute_name = "HTTP_URL"; + const char *table_name_url = "HTTP_URL"; + const char *attribute_name_url = "HTTP_URL"; const char *scan_data1 = "www.3300av.com"; const char *scan_data2 = "sdadhuadhasdgufgh;sdfhjaufhiwebfiusdafhaos;dhfaluhjweh"; memset(results, 0, sizeof(results)); - struct maat_stream *sp = maat_stream_new(maat_inst, table_name, attribute_name, state); + struct maat_stream *sp = maat_stream_new(maat_inst, table_name_url, attribute_name_url, state); ASSERT_TRUE(sp != NULL); ret = maat_stream_scan(sp, scan_data1, strlen(scan_data1), results, @@ -1265,7 +1265,7 @@ TEST_F(HsStringScan, StreamHitDirectObject) { uuid_unparse(object_array[0].object_uuid, uuid_str); EXPECT_STREQ(uuid_str, "00000000-0000-0000-0000-000000000112"); - ret = maat_scan_not_logic(maat_inst, table_name, attribute_name, results, ARRAY_SIZE, + ret = maat_scan_not_logic(maat_inst, table_name_url, attribute_name_url, results, ARRAY_SIZE, &n_hit_result, state); EXPECT_EQ(ret, MAAT_SCAN_OK); @@ -1274,10 +1274,46 @@ TEST_F(HsStringScan, StreamHitDirectObject) { EXPECT_EQ(ret, MAAT_SCAN_OK); ret = maat_state_get_direct_hit_objects(state, object_array, ARRAY_SIZE); EXPECT_EQ(ret, 0); - maat_stream_free(sp); + maat_state_reset(state); + const char *attribute_name_sig = "HTTP_SIGNATURE"; + const char *table_name_sig = "HTTP_SIGNATURE"; + const char *scan_data3 = "abckkk"; + const char *scan_data4 = "123"; + sp = maat_stream_new(maat_inst, table_name_sig, attribute_name_sig, state); + ASSERT_TRUE(sp != NULL); + + ret = maat_stream_scan(sp, scan_data3, strlen(scan_data3), results, + ARRAY_SIZE, &n_hit_result, state); + EXPECT_EQ(ret, MAAT_SCAN_OK); + + ret = maat_stream_scan(sp, scan_data4, strlen(scan_data4), results, + ARRAY_SIZE, &n_hit_result, state); + EXPECT_EQ(ret, MAAT_SCAN_HIT); + uuid_unparse(results[0], uuid_str); + EXPECT_STREQ(uuid_str, "00000000-0000-0000-0000-000000000128"); + + ret = maat_state_get_direct_hit_objects(state, object_array, ARRAY_SIZE); + EXPECT_EQ(ret, 1); + uuid_unparse(object_array[0].object_uuid, uuid_str); + EXPECT_STREQ(uuid_str, "00000000-0000-0000-0000-000000000107"); + + ret = maat_scan_not_logic(maat_inst, table_name_sig, attribute_name_sig, results, ARRAY_SIZE, + &n_hit_result, state); + EXPECT_EQ(ret, MAAT_SCAN_OK); + + ret = maat_stream_scan(sp, scan_data4, strlen(scan_data4), results, + ARRAY_SIZE, &n_hit_result, state); + EXPECT_EQ(ret, MAAT_SCAN_HALF_HIT);//rule has been hit before + + ret = maat_state_get_direct_hit_objects(state, object_array, ARRAY_SIZE); + EXPECT_EQ(ret, 1); + uuid_unparse(object_array[0].object_uuid, uuid_str); + EXPECT_STREQ(uuid_str, "00000000-0000-0000-0000-000000000107"); + + maat_stream_free(sp); maat_state_free(state); state = NULL; } @@ -8775,7 +8811,7 @@ TEST_F(MaatCmd, HitObject) { memset(hit_objects, 0, sizeof(hit_objects)); n_hit_object = maat_state_get_direct_hit_object_cnt(state); maat_state_get_direct_hit_objects(state, hit_objects, n_hit_object); - EXPECT_EQ(n_hit_object, 2); + EXPECT_EQ(n_hit_object, 1); uuid_unparse(hit_objects[0].item_uuid, uuid_str); EXPECT_STREQ(uuid_str, item5_uuid_str); @@ -8783,15 +8819,9 @@ TEST_F(MaatCmd, HitObject) { EXPECT_STREQ(uuid_str, object1_uuid_str); EXPECT_STREQ(hit_objects[0].attribute_name, keywords_attr_name); - uuid_unparse(hit_objects[1].item_uuid, uuid_str); - EXPECT_STREQ(uuid_str, item4_uuid_str); - uuid_unparse(hit_objects[1].object_uuid, uuid_str); - EXPECT_STREQ(uuid_str, object4_uuid_str); - EXPECT_STREQ(hit_objects[1].attribute_name, keywords_attr_name); - n_last_hit_object = maat_state_get_last_hit_object_cnt(state); maat_state_get_last_hit_objects(state, last_hit_objects, 128); - EXPECT_EQ(n_last_hit_object, 3); + EXPECT_EQ(n_last_hit_object, 2); uuid_unparse(last_hit_objects[0].item_uuid, uuid_str); EXPECT_STREQ(uuid_str, item5_uuid_str); @@ -8799,16 +8829,10 @@ TEST_F(MaatCmd, HitObject) { EXPECT_STREQ(uuid_str, object1_uuid_str); EXPECT_STREQ(last_hit_objects[0].attribute_name, keywords_attr_name); - uuid_unparse(last_hit_objects[1].item_uuid, uuid_str); - EXPECT_STREQ(uuid_str, item4_uuid_str); + EXPECT_TRUE(uuid_is_null(last_hit_objects[1].item_uuid)); uuid_unparse(last_hit_objects[1].object_uuid, uuid_str); - EXPECT_STREQ(uuid_str, object4_uuid_str); - EXPECT_STREQ(last_hit_objects[1].attribute_name, keywords_attr_name); - - EXPECT_TRUE(uuid_is_null(last_hit_objects[2].item_uuid)); - uuid_unparse(last_hit_objects[2].object_uuid, uuid_str); EXPECT_STREQ(uuid_str, object11_uuid_str); - EXPECT_STREQ(last_hit_objects[2].attribute_name, keywords_attr_name); + EXPECT_STREQ(last_hit_objects[1].attribute_name, keywords_attr_name); maat_stream_free(stream); maat_state_free(state);