From 2210aeef63faae9181d5a34d7d4e26da3559cbbc Mon Sep 17 00:00:00 2001 From: liuwentan Date: Mon, 25 Sep 2023 14:35:24 +0800 Subject: [PATCH] [OPTIMIZE]filter duplicate pattern id for bool_matcher_match --- .../expr_matcher/adapter_hs/adapter_hs.cpp | 17 ++++++++--- .../expr_matcher/adapter_rs/adapter_rs.cpp | 30 ++++++++++++++----- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/scanner/expr_matcher/adapter_hs/adapter_hs.cpp b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp index 11df9cf..d341c8d 100644 --- a/scanner/expr_matcher/adapter_hs/adapter_hs.cpp +++ b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp @@ -830,15 +830,24 @@ int adapter_hs_scan_stream(void *hs_stream, const char *data, size_t data_len, utarray_sort(stream->matched_pat->pattern_ids, compare_pattern_id); - unsigned long long pattern_ids[n_pattern_id]; + unsigned long long prev_pattern_id = 0xFFFFFFFFFFFFFFFF; + unsigned long long tmp_pattern_id = 0; + size_t n_unique_pattern_id = 0; + unsigned long long unique_pattern_ids[n_pattern_id]; + for (size_t i = 0; i < n_pattern_id; i++) { - pattern_ids[i] = *(unsigned long long *)utarray_eltptr(stream->matched_pat->pattern_ids, i); + tmp_pattern_id = *(unsigned long long *)utarray_eltptr(stream->matched_pat->pattern_ids, i); + if (tmp_pattern_id != prev_pattern_id) { + unique_pattern_ids[n_unique_pattern_id++] = tmp_pattern_id; + prev_pattern_id = tmp_pattern_id; + } } int ret = 0; struct bool_expr_match *bool_matcher_results = scratch->bool_match_buffs[thread_id]; - int bool_matcher_ret = bool_matcher_match(stream->ref_hs_rt->bm, pattern_ids, n_pattern_id, - bool_matcher_results, MAX_HIT_EXPR_NUM); + int bool_matcher_ret = bool_matcher_match(stream->ref_hs_rt->bm, unique_pattern_ids, + n_unique_pattern_id, bool_matcher_results, + MAX_HIT_EXPR_NUM); if (bool_matcher_ret < 0) { ret = -1; goto next; diff --git a/scanner/expr_matcher/adapter_rs/adapter_rs.cpp b/scanner/expr_matcher/adapter_rs/adapter_rs.cpp index 1489dca..42c47b2 100644 --- a/scanner/expr_matcher/adapter_rs/adapter_rs.cpp +++ b/scanner/expr_matcher/adapter_rs/adapter_rs.cpp @@ -605,13 +605,21 @@ int adapter_rs_scan_stream(void *rs_stream, const char *data, size_t data_len, utarray_sort(matched_pat->pattern_ids, compare_pattern_id); - unsigned long long pattern_ids[n_pattern_id]; + unsigned long long prev_pattern_id = 0xFFFFFFFFFFFFFFFF; + unsigned long long tmp_pattern_id = 0; + size_t n_unique_pattern_id = 0; + unsigned long long unique_pattern_ids[n_pattern_id]; + for (size_t i = 0; i < n_pattern_id; i++) { - pattern_ids[i] = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i); - } + tmp_pattern_id = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i); + if (tmp_pattern_id != prev_pattern_id) { + unique_pattern_ids[n_unique_pattern_id++] = tmp_pattern_id; + prev_pattern_id = tmp_pattern_id; + } + } struct bool_expr_match *bool_matcher_results = rs_rt->bool_match_buffs[thread_id]; - int bool_matcher_ret = bool_matcher_match(rs_rt->bm, pattern_ids, n_pattern_id, + int bool_matcher_ret = bool_matcher_match(rs_rt->bm, unique_pattern_ids, n_unique_pattern_id, bool_matcher_results, MAX_HIT_EXPR_NUM); if (bool_matcher_ret < 0) { ret = -1; @@ -675,13 +683,21 @@ int adapter_rs_scan(void *rs_instance, int thread_id, const char *data, size_t d utarray_sort(matched_pat->pattern_ids, compare_pattern_id); - unsigned long long pattern_ids[n_pattern_id]; + unsigned long long prev_pattern_id = 0xFFFFFFFFFFFFFFFF; + unsigned long long tmp_pattern_id = 0; + size_t n_unique_pattern_id = 0; + unsigned long long unique_pattern_ids[n_pattern_id]; + for (size_t i = 0; i < n_pattern_id; i++) { - pattern_ids[i] = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i); + tmp_pattern_id = *(unsigned long long *)utarray_eltptr(matched_pat->pattern_ids, i); + if (tmp_pattern_id != prev_pattern_id) { + unique_pattern_ids[n_unique_pattern_id++] = tmp_pattern_id; + prev_pattern_id = tmp_pattern_id; + } } struct bool_expr_match *bool_matcher_results = rs_rt->bool_match_buffs[thread_id]; - int bool_matcher_ret = bool_matcher_match(rs_rt->bm, pattern_ids, n_pattern_id, + int bool_matcher_ret = bool_matcher_match(rs_rt->bm, unique_pattern_ids, n_unique_pattern_id, bool_matcher_results, MAX_HIT_EXPR_NUM); if (bool_matcher_ret < 0) { ret = -1;