From e9ffca839277f9c179cbcdf6450a0d9032064837 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=96=87=E5=9D=9B?= Date: Thu, 3 Aug 2023 08:48:12 +0000 Subject: [PATCH] [PATCH]add regex benchmark --- .../expr_matcher/adapter_hs/adapter_hs.cpp | 16 +- src/json2iris.c | 2 +- src/maat_compile.c | 4 +- src/maat_config_monitor.c | 2 +- src/maat_group.c | 6 +- src/maat_redis_monitor.c | 2 +- test/benchmark/benchmark_hs_gtest.cpp | 914 +- test/benchmark/benchmark_rs_gtest.cpp | 322 +- test/benchmark/benchmark_table_info.conf | 140 + test/benchmark/regex_rules/REGEX_100 | 101 + test/benchmark/regex_rules/REGEX_10K | 10001 +++++++++++ test/benchmark/regex_rules/REGEX_15K | 15001 ++++++++++++++++ test/benchmark/regex_rules/REGEX_1K | 1001 ++ test/benchmark/regex_rules/REGEX_200 | 201 + test/benchmark/regex_rules/REGEX_2K | 2001 +++ test/benchmark/regex_rules/REGEX_300 | 301 + test/benchmark/regex_rules/REGEX_3K | 3001 ++++ test/benchmark/regex_rules/REGEX_500 | 501 + test/benchmark/regex_rules/REGEX_5K | 5001 ++++++ test/maat_framework_gtest.cpp | 2 +- 20 files changed, 38504 insertions(+), 16 deletions(-) create mode 100644 test/benchmark/regex_rules/REGEX_100 create mode 100644 test/benchmark/regex_rules/REGEX_10K create mode 100644 test/benchmark/regex_rules/REGEX_15K create mode 100644 test/benchmark/regex_rules/REGEX_1K create mode 100644 test/benchmark/regex_rules/REGEX_200 create mode 100644 test/benchmark/regex_rules/REGEX_2K create mode 100644 test/benchmark/regex_rules/REGEX_300 create mode 100644 test/benchmark/regex_rules/REGEX_3K create mode 100644 test/benchmark/regex_rules/REGEX_500 create mode 100644 test/benchmark/regex_rules/REGEX_5K diff --git a/scanner/expr_matcher/adapter_hs/adapter_hs.cpp b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp index 44846ef..4d57dcf 100644 --- a/scanner/expr_matcher/adapter_hs/adapter_hs.cpp +++ b/scanner/expr_matcher/adapter_hs/adapter_hs.cpp @@ -40,6 +40,7 @@ static const char *hs_module_name_str(const char *name) #define MODULE_ADAPTER_HS hs_module_name_str("maat.adapter_hs") struct adpt_hs_compile_data { + enum expr_pattern_type pat_type; unsigned int *ids; unsigned int *flags; char **patterns; @@ -198,7 +199,7 @@ static int adpt_hs_build_database(struct adapter_hs_runtime *hs_rt, if (regex_cd != NULL) { err = hs_compile_multi((const char *const *)regex_cd->patterns, regex_cd->flags, regex_cd->ids, regex_cd->n_patterns, - HS_MODE_STREAM | HS_MODE_SOM_HORIZON_SMALL, + HS_MODE_STREAM, NULL, &hs_rt->regex_db, &compile_err); if (err != HS_SUCCESS) { if (compile_err) { @@ -213,9 +214,11 @@ static int adpt_hs_build_database(struct adapter_hs_runtime *hs_rt, return 0; } -static struct adpt_hs_compile_data *adpt_hs_compile_data_new(size_t n_patterns) +static struct adpt_hs_compile_data * +adpt_hs_compile_data_new(enum expr_pattern_type pat_type, size_t n_patterns) { struct adpt_hs_compile_data *hs_cd = ALLOC(struct adpt_hs_compile_data, 1); + hs_cd->pat_type = pat_type; hs_cd->patterns = ALLOC(char *, n_patterns); hs_cd->pattern_lens = ALLOC(size_t, n_patterns); hs_cd->n_patterns = n_patterns; @@ -261,7 +264,10 @@ static void populate_compile_data(struct adpt_hs_compile_data *compile_data, compile_data->ids[index] = pattern_id; /* set flags */ - compile_data->flags[index] |= HS_FLAG_SOM_LEFTMOST; + if (compile_data->pat_type == EXPR_PATTERN_TYPE_STR) { + compile_data->flags[index] |= HS_FLAG_SOM_LEFTMOST; + } + if (case_sensitive == EXPR_CASE_INSENSITIVE) { compile_data->flags[index] |= HS_FLAG_CASELESS; } @@ -372,11 +378,11 @@ void *adapter_hs_new(struct expr_rule *rules, size_t n_rule, struct adpt_hs_compile_data *regex_cd = NULL; if (n_literal_pattern > 0) { - literal_cd = adpt_hs_compile_data_new(n_literal_pattern); + literal_cd = adpt_hs_compile_data_new(EXPR_PATTERN_TYPE_STR, n_literal_pattern); } if (n_regex_pattern > 0) { - regex_cd = adpt_hs_compile_data_new(n_regex_pattern); + regex_cd = adpt_hs_compile_data_new(EXPR_PATTERN_TYPE_REG, n_regex_pattern); } size_t pattern_cnt = n_literal_pattern + n_regex_pattern; diff --git a/src/json2iris.c b/src/json2iris.c index 59d93c6..876698b 100644 --- a/src/json2iris.c +++ b/src/json2iris.c @@ -821,7 +821,7 @@ static int write_group_rule(cJSON *group_json, int parent_id, if (NULL == region_json && NULL == sub_groups) { log_info(logger, MODULE_JSON2IRIS, "[%s:%d] A group of compile rule %d has neither regions, " - "sub groups, nor refered another exisited group", + "sub groups, nor refered another existed group", __FUNCTION__, __LINE__, tracking_compile_id); } } diff --git a/src/maat_compile.c b/src/maat_compile.c index 574b6d2..9ec95ae 100644 --- a/src/maat_compile.c +++ b/src/maat_compile.c @@ -1293,7 +1293,7 @@ static int maat_remove_group_from_compile(struct rcu_hash_table *hash_tbl, if (NULL == compile) { log_error(logger, MODULE_COMPILE, "[%s:%d] Remove group_id:%lld from compile_id:%lld failed, compile" - " is not exisited.", __FUNCTION__, __LINE__, g2c_item->group_id, + " is not existed.", __FUNCTION__, __LINE__, g2c_item->group_id, compile_id); return -1; } else { @@ -1349,7 +1349,7 @@ static int maat_remove_group_from_compile(struct rcu_hash_table *hash_tbl, } else { log_error(logger, MODULE_COMPILE, "[%s:%d] Remove group_id:%lld from compile_id:%lld failed, " - "compile is not exisited.", __FUNCTION__, __LINE__, + "compile is not existed.", __FUNCTION__, __LINE__, g2c_item->group_id, compile_id); return -1; } diff --git a/src/maat_config_monitor.c b/src/maat_config_monitor.c index 8fda6f6..1ac467b 100644 --- a/src/maat_config_monitor.c +++ b/src/maat_config_monitor.c @@ -55,7 +55,7 @@ static int cm_read_cfg_index_file(const char *path, struct cm_table_info_t *idx, ret = stat(idx[i].cfg_path, &file_info); if (ret != 0) { - log_error(logger, MODULE_CONFIG_MONITOR, "%s of %s not exisit", + log_error(logger, MODULE_CONFIG_MONITOR, "%s of %s not exist", idx[i].cfg_path, path); fclose(fp); return -1; diff --git a/src/maat_group.c b/src/maat_group.c index 5777b7d..db9ec44 100644 --- a/src/maat_group.c +++ b/src/maat_group.c @@ -582,7 +582,7 @@ static int group_topology_add_group_to_group(struct maat_group_topology *group_t //No duplicated edges between two groups. if (edge_id > 0) { log_error(group_topo->logger, MODULE_GROUP, - "[%s:%d] Add group %d to group %d failed, relation already exisited.", + "[%s:%d] Add group %d to group %d failed, relation already existed.", __FUNCTION__, __LINE__, group->group_id, super_group->group_id); ret = -1; } else { @@ -608,7 +608,7 @@ static int group_topology_del_group_from_group(struct maat_group_topology *group struct maat_group *group = group_topology_find_group(group_topo, group_id); if (NULL == group) { log_error(group_topo->logger, MODULE_GROUP, - "[%s:%d] Del group %d from group %d failed, group %d not exisited.", + "[%s:%d] Del group %d from group %d failed, group %d not existed.", __FUNCTION__, __LINE__, group_id, super_group_id, group_id); return -1; } @@ -616,7 +616,7 @@ static int group_topology_del_group_from_group(struct maat_group_topology *group struct maat_group *super_group = group_topology_find_group(group_topo, super_group_id); if (NULL == super_group) { log_error(group_topo->logger, MODULE_GROUP, - "[%s:%d] Del group %d from group %d failed, superior group %d not exisited.", + "[%s:%d] Del group %d from group %d failed, superior group %d not existed.", __FUNCTION__, __LINE__, group_id, super_group_id, super_group_id); return -1; } diff --git a/src/maat_redis_monitor.c b/src/maat_redis_monitor.c index b288e07..758721e 100644 --- a/src/maat_redis_monitor.c +++ b/src/maat_redis_monitor.c @@ -1138,7 +1138,7 @@ static void exec_serial_rule(redisContext *c, const char *transaction_list, (*cnt)++; append_cmd_cnt++; - //NX: Don't update already exisiting elements. Always add new elements. + //NX: Don't update already existing elements. Always add new elements. redisAppendCommand(c, "RPUSH %s DEL,%s,%lld", transaction_list, s_rule[i].table_name, diff --git a/test/benchmark/benchmark_hs_gtest.cpp b/test/benchmark/benchmark_hs_gtest.cpp index 8aea023..d805b8a 100644 --- a/test/benchmark/benchmark_hs_gtest.cpp +++ b/test/benchmark/benchmark_hs_gtest.cpp @@ -165,6 +165,69 @@ void generate_group2compile_sample(const char *table_name, int sample_count) fclose(fp); } +void *perf_regex_scan_thread(void *arg) +{ + struct thread_param *param = (struct thread_param *)arg; + struct maat *maat_inst = param->maat_inst; + const char *table_name = param->table_name; + struct timespec start, end; + const char *scan_data = "Maat was the goddness of harmony, justice, and truth in ancient Egyptian." + "Request from User-Agent: Chrome, will go to yyy.abc.net"; + long long results[ARRAY_SIZE] = {0}; + int hit_times = 0; + size_t n_hit_result = 0; + struct maat_state *state = maat_state_new(maat_inst, param->thread_id); + + int table_id = maat_get_table_id(maat_inst, table_name); + + clock_gettime(CLOCK_MONOTONIC, &start); + for (int i = 0; i < param->test_count; i++) { + int ret = maat_scan_string(maat_inst, table_id, scan_data, strlen(scan_data), + results, ARRAY_SIZE, &n_hit_result, state); + if (ret == MAAT_SCAN_HIT) { + hit_times++; + } + maat_state_reset(state); + } + clock_gettime(CLOCK_MONOTONIC, &end); + + param->time_elapse_ms = (end.tv_sec - start.tv_sec) * 1000 + + (end.tv_nsec - start.tv_nsec) / 1000000; + int *is_all_hit = ALLOC(int, 1); + *is_all_hit = (hit_times == param->test_count ? 1 : 0); + log_info(param->logger, MODULE_BENCHMARK_GTEST, + "thread_id:%d rule_count:%d regex_scan time_elapse:%lldms hit_times:%d", + param->thread_id, param->rule_count, param->time_elapse_ms, hit_times); + + return is_all_hit; +} + +void *perf_regex_update_thread(void *arg) +{ + struct thread_param *param = (struct thread_param *)arg; + const char *table_name = param->table_name; + const int CMD_EXPR_NUM = 10; + char keyword_buf[128]; + + for (int i = 0; i < CMD_EXPR_NUM; i++) { + random_keyword_generate(keyword_buf, sizeof(keyword_buf)); + FILE *fp = fopen(table_name, "a+"); + if (NULL == fp) { + continue; + } + + fprintf(fp, "%d\t%d\t%s\t1\t0\t0\t1\n", 2000001+i, 2000001+i, keyword_buf); + fclose(fp); + + sleep(1); + } + + int *is_all_hit = ALLOC(int, 1); + *is_all_hit = 1; + + return is_all_hit; +} + void *perf_literal_scan_thread(void *arg) { struct thread_param *param = (struct thread_param *)arg; @@ -463,12 +526,861 @@ void *perf_flag_update_thread(void *arg) return is_all_hit; } +class Regex100BenchmarkGTest : public ::testing::Test +{ +protected: + static void SetUpTestCase() { + logger = log_handle_create("./benchmark_hs_gtest.log", 0); + + FILE *fp = fopen("full_config_index.0000001", "w+"); + if (NULL == fp) { + log_error(g_logger, "open file %s failed", "full_config_index.0000001"); + return; + } + fprintf(fp, "REGEX_100\t100\t./regex_rules/REGEX_100\n"); + fprintf(fp, "COMPILE_PERF\t10\t./COMPILE_PERF\n"); + fprintf(fp, "GROUP2COMPILE_PERF\t10\t./GROUP2COMPILE_PERF\n"); + fclose(fp); + + struct maat_options *opts = maat_options_new(); + maat_options_set_logger(opts, "./benchmark_hs_gtest.log", LOG_LEVEL_INFO); + maat_options_set_iris(opts, "./", "./"); + maat_options_set_rule_effect_interval_ms(opts, 1000); + maat_options_set_caller_thread_number(opts, PERF_THREAD_NUM); + _shared_maat_inst = maat_new(opts, table_info_path); + maat_options_free(opts); + } + + static void TearDownTestCase() { + maat_free(_shared_maat_inst); + log_handle_destroy(logger); + } + + static struct log_handle *logger; + static struct maat *_shared_maat_inst; +}; + +struct maat *Regex100BenchmarkGTest::_shared_maat_inst; +struct log_handle *Regex100BenchmarkGTest::logger; + +TEST_F(Regex100BenchmarkGTest, LiteralScan) { + const char *table_name = "REGEX_100"; + struct maat *maat_inst = Regex100BenchmarkGTest::_shared_maat_inst; + + int table_id = maat_get_table_id(maat_inst, table_name); + ASSERT_GT(table_id, 0); + + pthread_t threads[PERF_THREAD_NUM + 1]; + struct thread_param thread_params[PERF_THREAD_NUM + 1]; + int i = 0; + int *is_all_hit = NULL; + + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + thread_params[i].maat_inst = maat_inst; + thread_params[i].thread_id = i; + thread_params[i].table_name = table_name; + thread_params[i].test_count = MAX_SCAN_COUNT; + thread_params[i].rule_count = 100; + thread_params[i].time_elapse_ms = 0; + thread_params[i].logger = logger; + + if (i < PERF_THREAD_NUM) { + pthread_create(&threads[i], NULL, perf_regex_scan_thread, thread_params+i); + } else { + thread_params[i].test_count = 0; + pthread_create(&threads[i], NULL, perf_regex_update_thread, thread_params+i); + } + } + + long long time_elapse_ms = 0; + long long scan_count = 0; + long long scan_per_second = 0; + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + pthread_join(threads[i], (void **)&is_all_hit); + time_elapse_ms += thread_params[i].time_elapse_ms; + scan_count += thread_params[i].test_count; + + EXPECT_EQ(*is_all_hit, 1); + *is_all_hit = 0; + free(is_all_hit); + } + scan_per_second = scan_count * 1000 / time_elapse_ms; + + log_info(maat_inst->logger, MODULE_BENCHMARK_GTEST, + "Regex100Scan match rate on %d-threads speed %lld lookups/s/thread", + PERF_THREAD_NUM, scan_per_second); +} + +class Regex200BenchmarkGTest : public ::testing::Test +{ +protected: + static void SetUpTestCase() { + logger = log_handle_create("./benchmark_hs_gtest.log", 0); + + FILE *fp = fopen("full_config_index.0000001", "w+"); + if (NULL == fp) { + log_error(g_logger, "open file %s failed", "full_config_index.0000001"); + return; + } + fprintf(fp, "REGEX_200\t200\t./regex_rules/REGEX_200\n"); + fprintf(fp, "COMPILE_PERF\t10\t./COMPILE_PERF\n"); + fprintf(fp, "GROUP2COMPILE_PERF\t10\t./GROUP2COMPILE_PERF\n"); + fclose(fp); + + struct maat_options *opts = maat_options_new(); + maat_options_set_logger(opts, "./benchmark_hs_gtest.log", LOG_LEVEL_INFO); + maat_options_set_iris(opts, "./", "./"); + maat_options_set_rule_effect_interval_ms(opts, 1000); + maat_options_set_caller_thread_number(opts, PERF_THREAD_NUM); + _shared_maat_inst = maat_new(opts, table_info_path); + maat_options_free(opts); + } + + static void TearDownTestCase() { + maat_free(_shared_maat_inst); + log_handle_destroy(logger); + } + + static struct log_handle *logger; + static struct maat *_shared_maat_inst; +}; + +struct maat *Regex200BenchmarkGTest::_shared_maat_inst; +struct log_handle *Regex200BenchmarkGTest::logger; + +TEST_F(Regex200BenchmarkGTest, LiteralScan) { + const char *table_name = "REGEX_200"; + struct maat *maat_inst = Regex200BenchmarkGTest::_shared_maat_inst; + + int table_id = maat_get_table_id(maat_inst, table_name); + ASSERT_GT(table_id, 0); + + pthread_t threads[PERF_THREAD_NUM + 1]; + struct thread_param thread_params[PERF_THREAD_NUM + 1]; + int i = 0; + int *is_all_hit = NULL; + + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + thread_params[i].maat_inst = maat_inst; + thread_params[i].thread_id = i; + thread_params[i].table_name = table_name; + thread_params[i].test_count = MAX_SCAN_COUNT; + thread_params[i].rule_count = 200; + thread_params[i].time_elapse_ms = 0; + thread_params[i].logger = logger; + + if (i < PERF_THREAD_NUM) { + pthread_create(&threads[i], NULL, perf_regex_scan_thread, thread_params+i); + } else { + thread_params[i].test_count = 0; + pthread_create(&threads[i], NULL, perf_regex_update_thread, thread_params+i); + } + } + + long long time_elapse_ms = 0; + long long scan_count = 0; + long long scan_per_second = 0; + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + pthread_join(threads[i], (void **)&is_all_hit); + time_elapse_ms += thread_params[i].time_elapse_ms; + scan_count += thread_params[i].test_count; + + EXPECT_EQ(*is_all_hit, 1); + *is_all_hit = 0; + free(is_all_hit); + } + scan_per_second = scan_count * 1000 / time_elapse_ms; + + log_info(maat_inst->logger, MODULE_BENCHMARK_GTEST, + "Regex200Scan match rate on %d-threads speed %lld lookups/s/thread", + PERF_THREAD_NUM, scan_per_second); +} + +class Regex300BenchmarkGTest : public ::testing::Test +{ +protected: + static void SetUpTestCase() { + logger = log_handle_create("./benchmark_hs_gtest.log", 0); + + FILE *fp = fopen("full_config_index.0000001", "w+"); + if (NULL == fp) { + log_error(g_logger, "open file %s failed", "full_config_index.0000001"); + return; + } + fprintf(fp, "REGEX_300\t300\t./regex_rules/REGEX_300\n"); + fprintf(fp, "COMPILE_PERF\t10\t./COMPILE_PERF\n"); + fprintf(fp, "GROUP2COMPILE_PERF\t10\t./GROUP2COMPILE_PERF\n"); + fclose(fp); + + struct maat_options *opts = maat_options_new(); + maat_options_set_logger(opts, "./benchmark_hs_gtest.log", LOG_LEVEL_INFO); + maat_options_set_iris(opts, "./", "./"); + maat_options_set_rule_effect_interval_ms(opts, 1000); + maat_options_set_caller_thread_number(opts, PERF_THREAD_NUM); + _shared_maat_inst = maat_new(opts, table_info_path); + maat_options_free(opts); + } + + static void TearDownTestCase() { + maat_free(_shared_maat_inst); + log_handle_destroy(logger); + } + + static struct log_handle *logger; + static struct maat *_shared_maat_inst; +}; + +struct maat *Regex300BenchmarkGTest::_shared_maat_inst; +struct log_handle *Regex300BenchmarkGTest::logger; + +TEST_F(Regex300BenchmarkGTest, LiteralScan) { + const char *table_name = "REGEX_300"; + struct maat *maat_inst = Regex300BenchmarkGTest::_shared_maat_inst; + + int table_id = maat_get_table_id(maat_inst, table_name); + ASSERT_GT(table_id, 0); + + pthread_t threads[PERF_THREAD_NUM + 1]; + struct thread_param thread_params[PERF_THREAD_NUM + 1]; + int i = 0; + int *is_all_hit = NULL; + + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + thread_params[i].maat_inst = maat_inst; + thread_params[i].thread_id = i; + thread_params[i].table_name = table_name; + thread_params[i].test_count = MAX_SCAN_COUNT; + thread_params[i].rule_count = 300; + thread_params[i].time_elapse_ms = 0; + thread_params[i].logger = logger; + + if (i < PERF_THREAD_NUM) { + pthread_create(&threads[i], NULL, perf_regex_scan_thread, thread_params+i); + } else { + thread_params[i].test_count = 0; + pthread_create(&threads[i], NULL, perf_regex_update_thread, thread_params+i); + } + } + + long long time_elapse_ms = 0; + long long scan_count = 0; + long long scan_per_second = 0; + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + pthread_join(threads[i], (void **)&is_all_hit); + time_elapse_ms += thread_params[i].time_elapse_ms; + scan_count += thread_params[i].test_count; + + EXPECT_EQ(*is_all_hit, 1); + *is_all_hit = 0; + free(is_all_hit); + } + scan_per_second = scan_count * 1000 / time_elapse_ms; + + log_info(maat_inst->logger, MODULE_BENCHMARK_GTEST, + "Regex300Scan match rate on %d-threads speed %lld lookups/s/thread", + PERF_THREAD_NUM, scan_per_second); +} + +class Regex500BenchmarkGTest : public ::testing::Test +{ +protected: + static void SetUpTestCase() { + logger = log_handle_create("./benchmark_hs_gtest.log", 0); + + FILE *fp = fopen("full_config_index.0000001", "w+"); + if (NULL == fp) { + log_error(g_logger, "open file %s failed", "full_config_index.0000001"); + return; + } + fprintf(fp, "REGEX_500\t500\t./regex_rules/REGEX_500\n"); + fprintf(fp, "COMPILE_PERF\t10\t./COMPILE_PERF\n"); + fprintf(fp, "GROUP2COMPILE_PERF\t10\t./GROUP2COMPILE_PERF\n"); + fclose(fp); + + struct maat_options *opts = maat_options_new(); + maat_options_set_logger(opts, "./benchmark_hs_gtest.log", LOG_LEVEL_INFO); + maat_options_set_iris(opts, "./", "./"); + maat_options_set_rule_effect_interval_ms(opts, 1000); + maat_options_set_caller_thread_number(opts, PERF_THREAD_NUM); + _shared_maat_inst = maat_new(opts, table_info_path); + maat_options_free(opts); + } + + static void TearDownTestCase() { + maat_free(_shared_maat_inst); + log_handle_destroy(logger); + } + + static struct log_handle *logger; + static struct maat *_shared_maat_inst; +}; + +struct maat *Regex500BenchmarkGTest::_shared_maat_inst; +struct log_handle *Regex500BenchmarkGTest::logger; + +TEST_F(Regex500BenchmarkGTest, LiteralScan) { + const char *table_name = "REGEX_500"; + struct maat *maat_inst = Regex500BenchmarkGTest::_shared_maat_inst; + + int table_id = maat_get_table_id(maat_inst, table_name); + ASSERT_GT(table_id, 0); + + pthread_t threads[PERF_THREAD_NUM + 1]; + struct thread_param thread_params[PERF_THREAD_NUM + 1]; + int i = 0; + int *is_all_hit = NULL; + + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + thread_params[i].maat_inst = maat_inst; + thread_params[i].thread_id = i; + thread_params[i].table_name = table_name; + thread_params[i].test_count = MAX_SCAN_COUNT; + thread_params[i].rule_count = 500; + thread_params[i].time_elapse_ms = 0; + thread_params[i].logger = logger; + + if (i < PERF_THREAD_NUM) { + pthread_create(&threads[i], NULL, perf_regex_scan_thread, thread_params+i); + } else { + thread_params[i].test_count = 0; + pthread_create(&threads[i], NULL, perf_regex_update_thread, thread_params+i); + } + } + + long long time_elapse_ms = 0; + long long scan_count = 0; + long long scan_per_second = 0; + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + pthread_join(threads[i], (void **)&is_all_hit); + time_elapse_ms += thread_params[i].time_elapse_ms; + scan_count += thread_params[i].test_count; + + EXPECT_EQ(*is_all_hit, 1); + *is_all_hit = 0; + free(is_all_hit); + } + scan_per_second = scan_count * 1000 / time_elapse_ms; + + log_info(maat_inst->logger, MODULE_BENCHMARK_GTEST, + "Regex500Scan match rate on %d-threads speed %lld lookups/s/thread", + PERF_THREAD_NUM, scan_per_second); +} + +class Regex1KBenchmarkGTest : public ::testing::Test +{ +protected: + static void SetUpTestCase() { + logger = log_handle_create("./benchmark_hs_gtest.log", 0); + + FILE *fp = fopen("full_config_index.0000001", "w+"); + if (NULL == fp) { + log_error(g_logger, "open file %s failed", "full_config_index.0000001"); + return; + } + fprintf(fp, "REGEX_1K\t1000\t./regex_rules/REGEX_1K\n"); + fprintf(fp, "COMPILE_PERF\t10\t./COMPILE_PERF\n"); + fprintf(fp, "GROUP2COMPILE_PERF\t10\t./GROUP2COMPILE_PERF\n"); + fclose(fp); + + struct maat_options *opts = maat_options_new(); + maat_options_set_logger(opts, "./benchmark_hs_gtest.log", LOG_LEVEL_INFO); + maat_options_set_iris(opts, "./", "./"); + maat_options_set_rule_effect_interval_ms(opts, 1000); + maat_options_set_caller_thread_number(opts, PERF_THREAD_NUM); + _shared_maat_inst = maat_new(opts, table_info_path); + maat_options_free(opts); + } + + static void TearDownTestCase() { + maat_free(_shared_maat_inst); + log_handle_destroy(logger); + } + + static struct log_handle *logger; + static struct maat *_shared_maat_inst; +}; + +struct maat *Regex1KBenchmarkGTest::_shared_maat_inst; +struct log_handle *Regex1KBenchmarkGTest::logger; + +TEST_F(Regex1KBenchmarkGTest, LiteralScan) { + const char *table_name = "REGEX_1K"; + struct maat *maat_inst = Regex1KBenchmarkGTest::_shared_maat_inst; + + int table_id = maat_get_table_id(maat_inst, table_name); + ASSERT_GT(table_id, 0); + + pthread_t threads[PERF_THREAD_NUM + 1]; + struct thread_param thread_params[PERF_THREAD_NUM + 1]; + int i = 0; + int *is_all_hit = NULL; + + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + thread_params[i].maat_inst = maat_inst; + thread_params[i].thread_id = i; + thread_params[i].table_name = table_name; + thread_params[i].test_count = MAX_SCAN_COUNT; + thread_params[i].rule_count = 1000; + thread_params[i].time_elapse_ms = 0; + thread_params[i].logger = logger; + + if (i < PERF_THREAD_NUM) { + pthread_create(&threads[i], NULL, perf_regex_scan_thread, thread_params+i); + } else { + thread_params[i].test_count = 0; + pthread_create(&threads[i], NULL, perf_regex_update_thread, thread_params+i); + } + } + + long long time_elapse_ms = 0; + long long scan_count = 0; + long long scan_per_second = 0; + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + pthread_join(threads[i], (void **)&is_all_hit); + time_elapse_ms += thread_params[i].time_elapse_ms; + scan_count += thread_params[i].test_count; + + EXPECT_EQ(*is_all_hit, 1); + *is_all_hit = 0; + free(is_all_hit); + } + scan_per_second = scan_count * 1000 / time_elapse_ms; + + log_info(maat_inst->logger, MODULE_BENCHMARK_GTEST, + "Regex1KScan match rate on %d-threads speed %lld lookups/s/thread", + PERF_THREAD_NUM, scan_per_second); +} + +class Regex2KBenchmarkGTest : public ::testing::Test +{ +protected: + static void SetUpTestCase() { + logger = log_handle_create("./benchmark_hs_gtest.log", 0); + + FILE *fp = fopen("full_config_index.0000001", "w+"); + if (NULL == fp) { + log_error(g_logger, "open file %s failed", "full_config_index.0000001"); + return; + } + fprintf(fp, "REGEX_2K\t2000\t./regex_rules/REGEX_2K\n"); + fprintf(fp, "COMPILE_PERF\t10\t./COMPILE_PERF\n"); + fprintf(fp, "GROUP2COMPILE_PERF\t10\t./GROUP2COMPILE_PERF\n"); + fclose(fp); + + struct maat_options *opts = maat_options_new(); + maat_options_set_logger(opts, "./benchmark_hs_gtest.log", LOG_LEVEL_INFO); + maat_options_set_iris(opts, "./", "./"); + maat_options_set_rule_effect_interval_ms(opts, 1000); + maat_options_set_caller_thread_number(opts, PERF_THREAD_NUM); + _shared_maat_inst = maat_new(opts, table_info_path); + maat_options_free(opts); + } + + static void TearDownTestCase() { + maat_free(_shared_maat_inst); + log_handle_destroy(logger); + } + + static struct log_handle *logger; + static struct maat *_shared_maat_inst; +}; + +struct maat *Regex2KBenchmarkGTest::_shared_maat_inst; +struct log_handle *Regex2KBenchmarkGTest::logger; + +TEST_F(Regex2KBenchmarkGTest, LiteralScan) { + const char *table_name = "REGEX_2K"; + struct maat *maat_inst = Regex2KBenchmarkGTest::_shared_maat_inst; + + int table_id = maat_get_table_id(maat_inst, table_name); + ASSERT_GT(table_id, 0); + + pthread_t threads[PERF_THREAD_NUM + 1]; + struct thread_param thread_params[PERF_THREAD_NUM + 1]; + int i = 0; + int *is_all_hit = NULL; + + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + thread_params[i].maat_inst = maat_inst; + thread_params[i].thread_id = i; + thread_params[i].table_name = table_name; + thread_params[i].test_count = MAX_SCAN_COUNT; + thread_params[i].rule_count = 2000; + thread_params[i].time_elapse_ms = 0; + thread_params[i].logger = logger; + + if (i < PERF_THREAD_NUM) { + pthread_create(&threads[i], NULL, perf_regex_scan_thread, thread_params+i); + } else { + thread_params[i].test_count = 0; + pthread_create(&threads[i], NULL, perf_regex_update_thread, thread_params+i); + } + } + + long long time_elapse_ms = 0; + long long scan_count = 0; + long long scan_per_second = 0; + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + pthread_join(threads[i], (void **)&is_all_hit); + time_elapse_ms += thread_params[i].time_elapse_ms; + scan_count += thread_params[i].test_count; + + EXPECT_EQ(*is_all_hit, 1); + *is_all_hit = 0; + free(is_all_hit); + } + scan_per_second = scan_count * 1000 / time_elapse_ms; + + log_info(maat_inst->logger, MODULE_BENCHMARK_GTEST, + "Regex2KScan match rate on %d-threads speed %lld lookups/s/thread", + PERF_THREAD_NUM, scan_per_second); +} + +class Regex3KBenchmarkGTest : public ::testing::Test +{ +protected: + static void SetUpTestCase() { + logger = log_handle_create("./benchmark_hs_gtest.log", 0); + + FILE *fp = fopen("full_config_index.0000001", "w+"); + if (NULL == fp) { + log_error(g_logger, "open file %s failed", "full_config_index.0000001"); + return; + } + fprintf(fp, "REGEX_3K\t3000\t./regex_rules/REGEX_3K\n"); + fprintf(fp, "COMPILE_PERF\t10\t./COMPILE_PERF\n"); + fprintf(fp, "GROUP2COMPILE_PERF\t10\t./GROUP2COMPILE_PERF\n"); + fclose(fp); + + struct maat_options *opts = maat_options_new(); + maat_options_set_logger(opts, "./benchmark_hs_gtest.log", LOG_LEVEL_INFO); + maat_options_set_iris(opts, "./", "./"); + maat_options_set_rule_effect_interval_ms(opts, 1000); + maat_options_set_caller_thread_number(opts, PERF_THREAD_NUM); + _shared_maat_inst = maat_new(opts, table_info_path); + maat_options_free(opts); + } + + static void TearDownTestCase() { + maat_free(_shared_maat_inst); + log_handle_destroy(logger); + } + + static struct log_handle *logger; + static struct maat *_shared_maat_inst; +}; + +struct maat *Regex3KBenchmarkGTest::_shared_maat_inst; +struct log_handle *Regex3KBenchmarkGTest::logger; + +TEST_F(Regex3KBenchmarkGTest, LiteralScan) { + const char *table_name = "REGEX_3K"; + struct maat *maat_inst = Regex3KBenchmarkGTest::_shared_maat_inst; + + int table_id = maat_get_table_id(maat_inst, table_name); + ASSERT_GT(table_id, 0); + + pthread_t threads[PERF_THREAD_NUM + 1]; + struct thread_param thread_params[PERF_THREAD_NUM + 1]; + int i = 0; + int *is_all_hit = NULL; + + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + thread_params[i].maat_inst = maat_inst; + thread_params[i].thread_id = i; + thread_params[i].table_name = table_name; + thread_params[i].test_count = MAX_SCAN_COUNT; + thread_params[i].rule_count = 3000; + thread_params[i].time_elapse_ms = 0; + thread_params[i].logger = logger; + + if (i < PERF_THREAD_NUM) { + pthread_create(&threads[i], NULL, perf_regex_scan_thread, thread_params+i); + } else { + thread_params[i].test_count = 0; + pthread_create(&threads[i], NULL, perf_regex_update_thread, thread_params+i); + } + } + + long long time_elapse_ms = 0; + long long scan_count = 0; + long long scan_per_second = 0; + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + pthread_join(threads[i], (void **)&is_all_hit); + time_elapse_ms += thread_params[i].time_elapse_ms; + scan_count += thread_params[i].test_count; + + EXPECT_EQ(*is_all_hit, 1); + *is_all_hit = 0; + free(is_all_hit); + } + scan_per_second = scan_count * 1000 / time_elapse_ms; + + log_info(maat_inst->logger, MODULE_BENCHMARK_GTEST, + "Regex3KScan match rate on %d-threads speed %lld lookups/s/thread", + PERF_THREAD_NUM, scan_per_second); +} + +class Regex5KBenchmarkGTest : public ::testing::Test +{ +protected: + static void SetUpTestCase() { + logger = log_handle_create("./benchmark_hs_gtest.log", 0); + + FILE *fp = fopen("full_config_index.0000001", "w+"); + if (NULL == fp) { + log_error(g_logger, "open file %s failed", "full_config_index.0000001"); + return; + } + fprintf(fp, "REGEX_5K\t5000\t./regex_rules/REGEX_5K\n"); + fprintf(fp, "COMPILE_PERF\t10\t./COMPILE_PERF\n"); + fprintf(fp, "GROUP2COMPILE_PERF\t10\t./GROUP2COMPILE_PERF\n"); + fclose(fp); + + struct maat_options *opts = maat_options_new(); + maat_options_set_logger(opts, "./benchmark_hs_gtest.log", LOG_LEVEL_INFO); + maat_options_set_iris(opts, "./", "./"); + maat_options_set_rule_effect_interval_ms(opts, 1000); + maat_options_set_caller_thread_number(opts, PERF_THREAD_NUM); + _shared_maat_inst = maat_new(opts, table_info_path); + maat_options_free(opts); + } + + static void TearDownTestCase() { + maat_free(_shared_maat_inst); + log_handle_destroy(logger); + } + + static struct log_handle *logger; + static struct maat *_shared_maat_inst; +}; + +struct maat *Regex5KBenchmarkGTest::_shared_maat_inst; +struct log_handle *Regex5KBenchmarkGTest::logger; + +TEST_F(Regex5KBenchmarkGTest, LiteralScan) { + const char *table_name = "REGEX_5K"; + struct maat *maat_inst = Regex5KBenchmarkGTest::_shared_maat_inst; + + int table_id = maat_get_table_id(maat_inst, table_name); + ASSERT_GT(table_id, 0); + + pthread_t threads[PERF_THREAD_NUM + 1]; + struct thread_param thread_params[PERF_THREAD_NUM + 1]; + int i = 0; + int *is_all_hit = NULL; + + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + thread_params[i].maat_inst = maat_inst; + thread_params[i].thread_id = i; + thread_params[i].table_name = table_name; + thread_params[i].test_count = MAX_SCAN_COUNT; + thread_params[i].rule_count = 5000; + thread_params[i].time_elapse_ms = 0; + thread_params[i].logger = logger; + + if (i < PERF_THREAD_NUM) { + pthread_create(&threads[i], NULL, perf_regex_scan_thread, thread_params+i); + } else { + thread_params[i].test_count = 0; + pthread_create(&threads[i], NULL, perf_regex_update_thread, thread_params+i); + } + } + + long long time_elapse_ms = 0; + long long scan_count = 0; + long long scan_per_second = 0; + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + pthread_join(threads[i], (void **)&is_all_hit); + time_elapse_ms += thread_params[i].time_elapse_ms; + scan_count += thread_params[i].test_count; + + EXPECT_EQ(*is_all_hit, 1); + *is_all_hit = 0; + free(is_all_hit); + } + scan_per_second = scan_count * 1000 / time_elapse_ms; + + log_info(maat_inst->logger, MODULE_BENCHMARK_GTEST, + "Regex5KScan match rate on %d-threads speed %lld lookups/s/thread", + PERF_THREAD_NUM, scan_per_second); +} + +class Regex10KBenchmarkGTest : public ::testing::Test +{ +protected: + static void SetUpTestCase() { + logger = log_handle_create("./benchmark_hs_gtest.log", 0); + + FILE *fp = fopen("full_config_index.0000001", "w+"); + if (NULL == fp) { + log_error(g_logger, "open file %s failed", "full_config_index.0000001"); + return; + } + fprintf(fp, "REGEX_10K\t10000\t./regex_rules/REGEX_10K\n"); + fprintf(fp, "COMPILE_PERF\t10\t./COMPILE_PERF\n"); + fprintf(fp, "GROUP2COMPILE_PERF\t10\t./GROUP2COMPILE_PERF\n"); + fclose(fp); + + struct maat_options *opts = maat_options_new(); + maat_options_set_logger(opts, "./benchmark_hs_gtest.log", LOG_LEVEL_INFO); + maat_options_set_iris(opts, "./", "./"); + maat_options_set_rule_effect_interval_ms(opts, 1000); + maat_options_set_caller_thread_number(opts, PERF_THREAD_NUM); + _shared_maat_inst = maat_new(opts, table_info_path); + maat_options_free(opts); + } + + static void TearDownTestCase() { + maat_free(_shared_maat_inst); + log_handle_destroy(logger); + } + + static struct log_handle *logger; + static struct maat *_shared_maat_inst; +}; + +struct maat *Regex10KBenchmarkGTest::_shared_maat_inst; +struct log_handle *Regex10KBenchmarkGTest::logger; + +TEST_F(Regex10KBenchmarkGTest, LiteralScan) { + const char *table_name = "REGEX_10K"; + struct maat *maat_inst = Regex10KBenchmarkGTest::_shared_maat_inst; + + int table_id = maat_get_table_id(maat_inst, table_name); + ASSERT_GT(table_id, 0); + + pthread_t threads[PERF_THREAD_NUM + 1]; + struct thread_param thread_params[PERF_THREAD_NUM + 1]; + int i = 0; + int *is_all_hit = NULL; + + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + thread_params[i].maat_inst = maat_inst; + thread_params[i].thread_id = i; + thread_params[i].table_name = table_name; + thread_params[i].test_count = MAX_SCAN_COUNT; + thread_params[i].rule_count = 10000; + thread_params[i].time_elapse_ms = 0; + thread_params[i].logger = logger; + + if (i < PERF_THREAD_NUM) { + pthread_create(&threads[i], NULL, perf_regex_scan_thread, thread_params+i); + } else { + thread_params[i].test_count = 0; + pthread_create(&threads[i], NULL, perf_regex_update_thread, thread_params+i); + } + } + + long long time_elapse_ms = 0; + long long scan_count = 0; + long long scan_per_second = 0; + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + pthread_join(threads[i], (void **)&is_all_hit); + time_elapse_ms += thread_params[i].time_elapse_ms; + scan_count += thread_params[i].test_count; + + EXPECT_EQ(*is_all_hit, 1); + *is_all_hit = 0; + free(is_all_hit); + } + scan_per_second = scan_count * 1000 / time_elapse_ms; + + log_info(maat_inst->logger, MODULE_BENCHMARK_GTEST, + "Regex10KScan match rate on %d-threads speed %lld lookups/s/thread", + PERF_THREAD_NUM, scan_per_second); +} + +class Regex15KBenchmarkGTest : public ::testing::Test +{ +protected: + static void SetUpTestCase() { + logger = log_handle_create("./benchmark_hs_gtest.log", 0); + + FILE *fp = fopen("full_config_index.0000001", "w+"); + if (NULL == fp) { + log_error(g_logger, "open file %s failed", "full_config_index.0000001"); + return; + } + fprintf(fp, "REGEX_15K\t15000\t./regex_rules/REGEX_15K\n"); + fprintf(fp, "COMPILE_PERF\t10\t./COMPILE_PERF\n"); + fprintf(fp, "GROUP2COMPILE_PERF\t10\t./GROUP2COMPILE_PERF\n"); + fclose(fp); + + struct maat_options *opts = maat_options_new(); + maat_options_set_logger(opts, "./benchmark_hs_gtest.log", LOG_LEVEL_INFO); + maat_options_set_iris(opts, "./", "./"); + maat_options_set_rule_effect_interval_ms(opts, 1000); + maat_options_set_caller_thread_number(opts, PERF_THREAD_NUM); + _shared_maat_inst = maat_new(opts, table_info_path); + maat_options_free(opts); + } + + static void TearDownTestCase() { + maat_free(_shared_maat_inst); + log_handle_destroy(logger); + } + + static struct log_handle *logger; + static struct maat *_shared_maat_inst; +}; + +struct maat *Regex15KBenchmarkGTest::_shared_maat_inst; +struct log_handle *Regex15KBenchmarkGTest::logger; + +TEST_F(Regex15KBenchmarkGTest, LiteralScan) { + const char *table_name = "REGEX_15K"; + struct maat *maat_inst = Regex15KBenchmarkGTest::_shared_maat_inst; + + int table_id = maat_get_table_id(maat_inst, table_name); + ASSERT_GT(table_id, 0); + + pthread_t threads[PERF_THREAD_NUM + 1]; + struct thread_param thread_params[PERF_THREAD_NUM + 1]; + int i = 0; + int *is_all_hit = NULL; + + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + thread_params[i].maat_inst = maat_inst; + thread_params[i].thread_id = i; + thread_params[i].table_name = table_name; + thread_params[i].test_count = MAX_SCAN_COUNT; + thread_params[i].rule_count = 15000; + thread_params[i].time_elapse_ms = 0; + thread_params[i].logger = logger; + + if (i < PERF_THREAD_NUM) { + pthread_create(&threads[i], NULL, perf_regex_scan_thread, thread_params+i); + } else { + thread_params[i].test_count = 0; + pthread_create(&threads[i], NULL, perf_regex_update_thread, thread_params+i); + } + } + + long long time_elapse_ms = 0; + long long scan_count = 0; + long long scan_per_second = 0; + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + pthread_join(threads[i], (void **)&is_all_hit); + time_elapse_ms += thread_params[i].time_elapse_ms; + scan_count += thread_params[i].test_count; + + EXPECT_EQ(*is_all_hit, 1); + *is_all_hit = 0; + free(is_all_hit); + } + scan_per_second = scan_count * 1000 / time_elapse_ms; + + log_info(maat_inst->logger, MODULE_BENCHMARK_GTEST, + "Regex15KScan match rate on %d-threads speed %lld lookups/s/thread", + PERF_THREAD_NUM, scan_per_second); +} + class Expr1KBenchmarkGTest : public ::testing::Test { protected: static void SetUpTestCase() { logger = log_handle_create("./benchmark_hs_gtest.log", 0); - ///printf("Start to generate test sample......\n"); generate_expr_sample("EXPR_LITERAL_1K", 1000); FILE *fp = fopen("full_config_index.0000001", "w+"); diff --git a/test/benchmark/benchmark_rs_gtest.cpp b/test/benchmark/benchmark_rs_gtest.cpp index 8f5e3c7..5c8bc00 100644 --- a/test/benchmark/benchmark_rs_gtest.cpp +++ b/test/benchmark/benchmark_rs_gtest.cpp @@ -463,12 +463,332 @@ void *perf_flag_update_thread(void *arg) return is_all_hit; } +void *perf_regex_scan_thread(void *arg) +{ + struct thread_param *param = (struct thread_param *)arg; + struct maat *maat_inst = param->maat_inst; + const char *table_name = param->table_name; + struct timespec start, end; + const char *scan_data = "Maat was the goddness of harmony, justice, and truth in ancient Egyptian." + "Request from User-Agent: Chrome, will go to yyy.abc.net"; + long long results[ARRAY_SIZE] = {0}; + int hit_times = 0; + size_t n_hit_result = 0; + struct maat_state *state = maat_state_new(maat_inst, param->thread_id); + + int table_id = maat_get_table_id(maat_inst, table_name); + + clock_gettime(CLOCK_MONOTONIC, &start); + for (int i = 0; i < param->test_count; i++) { + int ret = maat_scan_string(maat_inst, table_id, scan_data, strlen(scan_data), + results, ARRAY_SIZE, &n_hit_result, state); + if (ret == MAAT_SCAN_HIT) { + hit_times++; + } + maat_state_reset(state); + } + clock_gettime(CLOCK_MONOTONIC, &end); + + param->time_elapse_ms = (end.tv_sec - start.tv_sec) * 1000 + + (end.tv_nsec - start.tv_nsec) / 1000000; + int *is_all_hit = ALLOC(int, 1); + *is_all_hit = (hit_times == param->test_count ? 1 : 0); + log_info(param->logger, MODULE_BENCHMARK_GTEST, + "thread_id:%d rule_count:%d regex_scan time_elapse:%lldms hit_times:%d", + param->thread_id, param->rule_count, param->time_elapse_ms, hit_times); + + return is_all_hit; +} + +void *perf_regex_update_thread(void *arg) +{ + struct thread_param *param = (struct thread_param *)arg; + const char *table_name = param->table_name; + const int CMD_EXPR_NUM = 10; + char keyword_buf[128]; + + for (int i = 0; i < CMD_EXPR_NUM; i++) { + random_keyword_generate(keyword_buf, sizeof(keyword_buf)); + FILE *fp = fopen(table_name, "a+"); + if (NULL == fp) { + continue; + } + + fprintf(fp, "%d\t%d\t%s\t1\t0\t0\t1\n", 2000001+i, 2000001+i, keyword_buf); + fclose(fp); + + sleep(1); + } + + int *is_all_hit = ALLOC(int, 1); + *is_all_hit = 1; + + return is_all_hit; +} + +class Regex100BenchmarkGTest : public ::testing::Test +{ +protected: + static void SetUpTestCase() { + logger = log_handle_create("./benchmark_rs_gtest.log", 0); + + FILE *fp = fopen("full_config_index.0000001", "w+"); + if (NULL == fp) { + log_error(g_logger, "open file %s failed", "full_config_index.0000001"); + return; + } + fprintf(fp, "REGEX_100\t100\t./regex_rules/REGEX_100\n"); + fprintf(fp, "COMPILE_PERF\t10\t./COMPILE_PERF\n"); + fprintf(fp, "GROUP2COMPILE_PERF\t10\t./GROUP2COMPILE_PERF\n"); + fclose(fp); + + struct maat_options *opts = maat_options_new(); + maat_options_set_logger(opts, "./benchmark_rs_gtest.log", LOG_LEVEL_INFO); + maat_options_set_iris(opts, "./", "./"); + maat_options_set_rule_effect_interval_ms(opts, 1000); + maat_options_set_caller_thread_number(opts, PERF_THREAD_NUM); + maat_options_set_expr_engine(opts, MAAT_EXPR_ENGINE_RS); + _shared_maat_inst = maat_new(opts, table_info_path); + maat_options_free(opts); + } + + static void TearDownTestCase() { + maat_free(_shared_maat_inst); + log_handle_destroy(logger); + } + + static struct log_handle *logger; + static struct maat *_shared_maat_inst; +}; + +struct maat *Regex100BenchmarkGTest::_shared_maat_inst; +struct log_handle *Regex100BenchmarkGTest::logger; + +TEST_F(Regex100BenchmarkGTest, LiteralScan) { + const char *table_name = "REGEX_100"; + struct maat *maat_inst = Regex100BenchmarkGTest::_shared_maat_inst; + + int table_id = maat_get_table_id(maat_inst, table_name); + ASSERT_GT(table_id, 0); + + pthread_t threads[PERF_THREAD_NUM + 1]; + struct thread_param thread_params[PERF_THREAD_NUM + 1]; + int i = 0; + int *is_all_hit = NULL; + + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + thread_params[i].maat_inst = maat_inst; + thread_params[i].thread_id = i; + thread_params[i].table_name = table_name; + thread_params[i].test_count = MAX_SCAN_COUNT; + thread_params[i].rule_count = 100; + thread_params[i].time_elapse_ms = 0; + thread_params[i].logger = logger; + + if (i < PERF_THREAD_NUM) { + pthread_create(&threads[i], NULL, perf_regex_scan_thread, thread_params+i); + } else { + thread_params[i].test_count = 0; + pthread_create(&threads[i], NULL, perf_regex_update_thread, thread_params+i); + } + } + + long long time_elapse_ms = 0; + long long scan_count = 0; + long long scan_per_second = 0; + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + pthread_join(threads[i], (void **)&is_all_hit); + time_elapse_ms += thread_params[i].time_elapse_ms; + scan_count += thread_params[i].test_count; + + EXPECT_EQ(*is_all_hit, 1); + *is_all_hit = 0; + free(is_all_hit); + } + scan_per_second = scan_count * 1000 / time_elapse_ms; + + log_info(maat_inst->logger, MODULE_BENCHMARK_GTEST, + "Regex100Scan match rate on %d-threads speed %lld lookups/s/thread", + PERF_THREAD_NUM, scan_per_second); +} + +class Regex200BenchmarkGTest : public ::testing::Test +{ +protected: + static void SetUpTestCase() { + logger = log_handle_create("./benchmark_rs_gtest.log", 0); + + FILE *fp = fopen("full_config_index.0000001", "w+"); + if (NULL == fp) { + log_error(g_logger, "open file %s failed", "full_config_index.0000001"); + return; + } + fprintf(fp, "REGEX_200\t200\t./regex_rules/REGEX_200\n"); + fprintf(fp, "COMPILE_PERF\t10\t./COMPILE_PERF\n"); + fprintf(fp, "GROUP2COMPILE_PERF\t10\t./GROUP2COMPILE_PERF\n"); + fclose(fp); + + struct maat_options *opts = maat_options_new(); + maat_options_set_logger(opts, "./benchmark_rs_gtest.log", LOG_LEVEL_INFO); + maat_options_set_iris(opts, "./", "./"); + maat_options_set_rule_effect_interval_ms(opts, 1000); + maat_options_set_caller_thread_number(opts, PERF_THREAD_NUM); + maat_options_set_expr_engine(opts, MAAT_EXPR_ENGINE_RS); + _shared_maat_inst = maat_new(opts, table_info_path); + maat_options_free(opts); + } + + static void TearDownTestCase() { + maat_free(_shared_maat_inst); + log_handle_destroy(logger); + } + + static struct log_handle *logger; + static struct maat *_shared_maat_inst; +}; + +struct maat *Regex200BenchmarkGTest::_shared_maat_inst; +struct log_handle *Regex200BenchmarkGTest::logger; + +TEST_F(Regex200BenchmarkGTest, LiteralScan) { + const char *table_name = "REGEX_200"; + struct maat *maat_inst = Regex200BenchmarkGTest::_shared_maat_inst; + + int table_id = maat_get_table_id(maat_inst, table_name); + ASSERT_GT(table_id, 0); + + pthread_t threads[PERF_THREAD_NUM + 1]; + struct thread_param thread_params[PERF_THREAD_NUM + 1]; + int i = 0; + int *is_all_hit = NULL; + + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + thread_params[i].maat_inst = maat_inst; + thread_params[i].thread_id = i; + thread_params[i].table_name = table_name; + thread_params[i].test_count = MAX_SCAN_COUNT; + thread_params[i].rule_count = 200; + thread_params[i].time_elapse_ms = 0; + thread_params[i].logger = logger; + + if (i < PERF_THREAD_NUM) { + pthread_create(&threads[i], NULL, perf_regex_scan_thread, thread_params+i); + } else { + thread_params[i].test_count = 0; + pthread_create(&threads[i], NULL, perf_regex_update_thread, thread_params+i); + } + } + + long long time_elapse_ms = 0; + long long scan_count = 0; + long long scan_per_second = 0; + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + pthread_join(threads[i], (void **)&is_all_hit); + time_elapse_ms += thread_params[i].time_elapse_ms; + scan_count += thread_params[i].test_count; + + EXPECT_EQ(*is_all_hit, 1); + *is_all_hit = 0; + free(is_all_hit); + } + scan_per_second = scan_count * 1000 / time_elapse_ms; + + log_info(maat_inst->logger, MODULE_BENCHMARK_GTEST, + "Regex200Scan match rate on %d-threads speed %lld lookups/s/thread", + PERF_THREAD_NUM, scan_per_second); +} + +class Regex300BenchmarkGTest : public ::testing::Test +{ +protected: + static void SetUpTestCase() { + logger = log_handle_create("./benchmark_rs_gtest.log", 0); + + FILE *fp = fopen("full_config_index.0000001", "w+"); + if (NULL == fp) { + log_error(g_logger, "open file %s failed", "full_config_index.0000001"); + return; + } + fprintf(fp, "REGEX_300\t300\t./regex_rules/REGEX_300\n"); + fprintf(fp, "COMPILE_PERF\t10\t./COMPILE_PERF\n"); + fprintf(fp, "GROUP2COMPILE_PERF\t10\t./GROUP2COMPILE_PERF\n"); + fclose(fp); + + struct maat_options *opts = maat_options_new(); + maat_options_set_logger(opts, "./benchmark_rs_gtest.log", LOG_LEVEL_INFO); + maat_options_set_iris(opts, "./", "./"); + maat_options_set_rule_effect_interval_ms(opts, 1000); + maat_options_set_caller_thread_number(opts, PERF_THREAD_NUM); + maat_options_set_expr_engine(opts, MAAT_EXPR_ENGINE_RS); + _shared_maat_inst = maat_new(opts, table_info_path); + maat_options_free(opts); + } + + static void TearDownTestCase() { + maat_free(_shared_maat_inst); + log_handle_destroy(logger); + } + + static struct log_handle *logger; + static struct maat *_shared_maat_inst; +}; + +struct maat *Regex300BenchmarkGTest::_shared_maat_inst; +struct log_handle *Regex300BenchmarkGTest::logger; + +TEST_F(Regex300BenchmarkGTest, LiteralScan) { + const char *table_name = "REGEX_300"; + struct maat *maat_inst = Regex300BenchmarkGTest::_shared_maat_inst; + + int table_id = maat_get_table_id(maat_inst, table_name); + ASSERT_GT(table_id, 0); + + pthread_t threads[PERF_THREAD_NUM + 1]; + struct thread_param thread_params[PERF_THREAD_NUM + 1]; + int i = 0; + int *is_all_hit = NULL; + + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + thread_params[i].maat_inst = maat_inst; + thread_params[i].thread_id = i; + thread_params[i].table_name = table_name; + thread_params[i].test_count = MAX_SCAN_COUNT; + thread_params[i].rule_count = 300; + thread_params[i].time_elapse_ms = 0; + thread_params[i].logger = logger; + + if (i < PERF_THREAD_NUM) { + pthread_create(&threads[i], NULL, perf_regex_scan_thread, thread_params+i); + } else { + thread_params[i].test_count = 0; + pthread_create(&threads[i], NULL, perf_regex_update_thread, thread_params+i); + } + } + + long long time_elapse_ms = 0; + long long scan_count = 0; + long long scan_per_second = 0; + for (i = 0; i < PERF_THREAD_NUM + 1; i++) { + pthread_join(threads[i], (void **)&is_all_hit); + time_elapse_ms += thread_params[i].time_elapse_ms; + scan_count += thread_params[i].test_count; + + EXPECT_EQ(*is_all_hit, 1); + *is_all_hit = 0; + free(is_all_hit); + } + scan_per_second = scan_count * 1000 / time_elapse_ms; + + log_info(maat_inst->logger, MODULE_BENCHMARK_GTEST, + "Regex300Scan match rate on %d-threads speed %lld lookups/s/thread", + PERF_THREAD_NUM, scan_per_second); +} + class Expr1KBenchmarkGTest : public ::testing::Test { protected: static void SetUpTestCase() { logger = log_handle_create("./benchmark_rs_gtest.log", 0); - ///printf("Start to generate test sample......\n"); generate_expr_sample("EXPR_LITERAL_1K", 1000); FILE *fp = fopen("full_config_index.0000001", "w+"); diff --git a/test/benchmark/benchmark_table_info.conf b/test/benchmark/benchmark_table_info.conf index b01e64b..16630fa 100644 --- a/test/benchmark/benchmark_table_info.conf +++ b/test/benchmark/benchmark_table_info.conf @@ -575,5 +575,145 @@ "virtual_table_name":5, "clause_index":6 } + }, + { + "table_id":41, + "table_name":"REGEX_100", + "table_type":"expr", + "valid_column":7, + "custom": { + "item_id":1, + "group_id":2, + "keywords":3, + "expr_type":4, + "match_method":5, + "is_hexbin":6 + } + }, + { + "table_id":42, + "table_name":"REGEX_200", + "table_type":"expr", + "valid_column":7, + "custom": { + "item_id":1, + "group_id":2, + "keywords":3, + "expr_type":4, + "match_method":5, + "is_hexbin":6 + } + }, + { + "table_id":43, + "table_name":"REGEX_300", + "table_type":"expr", + "valid_column":7, + "custom": { + "item_id":1, + "group_id":2, + "keywords":3, + "expr_type":4, + "match_method":5, + "is_hexbin":6 + } + }, + { + "table_id":44, + "table_name":"REGEX_500", + "table_type":"expr", + "valid_column":7, + "custom": { + "item_id":1, + "group_id":2, + "keywords":3, + "expr_type":4, + "match_method":5, + "is_hexbin":6 + } + }, + { + "table_id":45, + "table_name":"REGEX_1K", + "table_type":"expr", + "valid_column":7, + "custom": { + "item_id":1, + "group_id":2, + "keywords":3, + "expr_type":4, + "match_method":5, + "is_hexbin":6 + } + }, + { + "table_id":46, + "table_name":"REGEX_2K", + "table_type":"expr", + "valid_column":7, + "custom": { + "item_id":1, + "group_id":2, + "keywords":3, + "expr_type":4, + "match_method":5, + "is_hexbin":6 + } + }, + { + "table_id":47, + "table_name":"REGEX_3K", + "table_type":"expr", + "valid_column":7, + "custom": { + "item_id":1, + "group_id":2, + "keywords":3, + "expr_type":4, + "match_method":5, + "is_hexbin":6 + } + }, + { + "table_id":48, + "table_name":"REGEX_5K", + "table_type":"expr", + "valid_column":7, + "custom": { + "item_id":1, + "group_id":2, + "keywords":3, + "expr_type":4, + "match_method":5, + "is_hexbin":6 + } + }, + { + "table_id":49, + "table_name":"REGEX_10K", + "table_type":"expr", + "valid_column":7, + "custom": { + "item_id":1, + "group_id":2, + "keywords":3, + "expr_type":4, + "match_method":5, + "is_hexbin":6 + } + }, + { + "table_id":50, + "table_name":"REGEX_15K", + "table_type":"expr", + "valid_column":7, + "custom": { + "item_id":1, + "group_id":2, + "keywords":3, + "expr_type":4, + "match_method":5, + "is_hexbin":6 + } } ] \ No newline at end of file diff --git a/test/benchmark/regex_rules/REGEX_100 b/test/benchmark/regex_rules/REGEX_100 new file mode 100644 index 0000000..ad7ccf0 --- /dev/null +++ b/test/benchmark/regex_rules/REGEX_100 @@ -0,0 +1,101 @@ +100 +1 1 User-Agent:\s.*.abc.net 2 0 0 1 +2 2 (#\d{1,2}){20} 2 0 0 1 +3 3 (((DestFile|encryptPass)\x3D[^\x26]{50})|((BaseDN|SearchFilter)\x3D[^\x26]{128})) 2 0 0 1 +4 4 (((c|l)pi\x00.{1}(-\d|0)\x21)|(columns\x00.{1}(-\d|0)\x21)|(page-(right|left|top|bottom)\x00.{1}(-\d|0|([3-9]\d{5}|24\d{4}|236\d{3}|23593\d{1}|23592[2-9])\x21))) 2 0 0 1 +5 5 (,\d{1,3}){20} 2 0 0 1 +6 6 (3BFFE033-BF43-11d5-A271-00A024A51325|iNotes6\.iNotes6|E008A543-CEFB-4559-912F-C27C2B89F13B|dwa7\.dwa7) 2 0 0 1 +7 7 (?P[A-Z\d_]+)\.DataURL\s*=\s*(\x22[^\x22]{128}|\x27[^\x27]{128}) 2 0 0 1 +8 8 (Context|Action)\x3D[^\x26\x3b]{1024} 2 0 0 1 +9 9 (DisableSandboxAndDrop|ConfusedClass|FieldAccessVerifierExpl)\.class 2 0 0 1 +10 10 (INSERT|UPDATE)\s*[\s\w]*((mysql\.)?func)[^\r\n]+values\s*\([^\)]+\x2c[\x22\x27][^\x22\x27]*\x2f 2 0 0 1 +11 11 (Image|Doc) 2 0 0 1 +12 12 (OvAcceptLang|Accept-Language)\s*[\x3D\x3A]\s*[^\n]{69} 2 0 0 1 +13 13 (OvJavaScript|OvTitleFrame|OvHelpWindow|OvMap|OvSession|OvJavaLocale|OvOSLocale|OvWebSession)\s*\x3D[^\x3B\x2C]{1024} 2 0 0 1 +14 14 (Set|Check)\x10Properties 2 0 0 1 +15 15 (USER|PASS)[^\x80-\xff]*[\x80-\xff] 2 0 0 1 +16 16 ([sp]key|csk)=[^\r\n\x26]+(script|onclick|onload|onmouseover|html|[\x22\x27\x3c\x3e\x28\x29]) 2 0 0 1 +17 17 (\x22|\x27)daap\x3a\x2f\x2f[^\x22\x27]*\x3a[^\x22\x27\x2f]{256} 2 0 0 1 +18 18 (\x22|\x27)itms\x3a\x2f\x2f[^\x22\x27]*\x3a[^\x22\x27\x2f]{256} 2 0 0 1 +19 19 (\x22|\x27)itmss\x3a\x2f\x2f[^\x22\x27]*\x3a[^\x22\x27\x2f]{256} 2 0 0 1 +20 20 (\x22|\x27)itpc\x3a\x2f\x2f[^\x22\x27]*\x3a[^\x22\x27\x2f]{256} 2 0 0 1 +21 21 (\x22|\x27)pcast\x3a\x2f\x2f[^\x22\x27]*\x3a[^\x22\x27\x2f]{256} 2 0 0 1 +22 22 (\x25(n|t|d)\x20){85} 2 0 0 1 +23 23 (\x3F|\x26)[^\x3D]*(\x27|%27)[^\x3D]*(\x3C|%3c)script(\x3E|%3e) 2 0 0 1 +24 24 (\x40\x09.{19}|\x41\x0b.{23})[\xf0-\xff].{8}\x01\x00[\x00\x01\x02\x04\x08\x10\x18\x20]\x00 2 0 0 1 +25 25 (^|&)SelectedID=[^&]+?(\x3B|%3B) 2 0 0 1 +26 26 (^|&)paths(%5b|\x5b)(%5d|\x5d)=[^&]*?(%2e|\x2e){2}(%2f|\x2f) 2 0 0 1 +27 27 (^|&)selectedLocale=[^&]+?([\x22\x27]|%22|%27) 2 0 0 1 +28 28 (^|[\x3b\x7b\x7d]|%3b|%7b|%7d)O(%3a|\x3a)(\x2b|%2b)?[0-9]+?(%3a|\x3a)(%22|\x22) 2 0 0 1 +29 29 (action|setup)=[a-z]{1,4} 2 0 0 1 +30 30 (arg=[^\x26]*?OVwSelection[^\x26]*?\x26.*?sel=[^\s\x26]{1023}|sel=[^\x26]{1023,}\x26.*?arg=[^\s\x26]*?OVwSelection) 2 0 0 1 +31 31 (caption,\x22\x5c\x5c\x5c|\x22\x5cn\x5cn\x5cn\x22\x20\x2b\x20str) 2 0 0 1 +32 32 (data_select1|nameParams|schdParams|text1|schd_select1)=[^\x26]{512} 2 0 0 1 +33 33 (displayWidth[\x2b\x20]\d[^\x2b\s\n]{128}) 2 0 0 1 +34 34 (filename|type)=[^\x26]*?\x2E\x2E 2 0 0 1 +35 35 (ora_osb_bgcookie|rbtool)=[^\x20\x26\x3b]{1} 2 0 0 1 +36 36 (sIda\/sId|urua\/uru)[abcd]\.classPK 2 0 0 1 +37 37 .{20}[\x01\x02]\x00\x03\x00.*?\x5c\x00\x5c\x00 2 0 0 1 +38 38 5FDC81917DE08A41A6AC(E9B8ECA1EE.8|.98ECB1EEA8E) 2 0 0 1 +39 39 (\x2e\x2e\x5c|%2E%2E%5C){2}[^<]+? 2 0 0 1 +40 40 [^<]+?(\x3B|%3B) 2 0 0 1 +41 41 <[^>]*?style\s*[>=].{1,1024}margin\s*\x3a\s*[^\x3b\x7d]*?-(\d{4}|1[0-9][1-9]|[2-9]\d\d)[ce][mx].*?[\x7b\x3b] 2 0 0 1 +42 42 <\?(php)?.{1,256}define\s*\x28\s*str_repeat\s*\x28\s*[\x22\x27][^\x22\x27]+[\x22\x27]\s*\x2c\s*\x24argv 2 0 0 1 +43 43 <\s*object[^>]*?data\s*\x3A[^,>]*?base64 2 0 0 1 +44 44 <\s*valitem[^>]*\s(value|name)\s*=\s*([\x22\x27])[^\x22\x27]{104} 2 0 0 1 +45 45 <\x21DOCTYPE\s+[^>]*?SYSTEM[^>]*?>.*?\x2EparseError 2 0 0 1 +46 46 <\x21ENTITY[^>]+SYSTEM[^>]+http\x3A\x2F\x2F[^>\s]+http\x3A\x2F\x2F 2 0 0 1 +47 47 ]+(archive|src)\s*?=\s*?(\x22|\x27|)\s*?(\d{5}\.jar|[^>]+\/\d{5}\.jar) 2 0 0 1 +48 48 ]+?dir\s*?=\s*?[\x22\x27]\s*?rtl\s*?[\x22\x27].*?(&#?x?[a-z\d]{2,4}\x3b){100} 2 0 0 1 +49 49 ]*?height\x3d\s*[\x22\x27]?\s*[0-9]{6} 2 0 0 1 +50 50 ]*?for\s*=\s*[\x22\x27]?.*?event\s*=\s*[\x22\x27]?onpropertychange[\x22\x27]?[^>]*?> 2 0 0 1 +51 51 ]*src\s*=\s*[\x22\x27][^\x22\x27]*\.json[\x22\x27][^>]*language=vbs 2 0 0 1 +52 52 CRiMEPACK [\d\.]+ 2 0 0 1 +53 53 \s*.*?\s* 2 0 0 1 +55 55 AdminServlet.*(userid|adminurl)[^\x26\x20\x0a]*