提高expr_plus和interval_plus表的扫描性能

This commit is contained in:
郑超
2021-07-15 03:59:09 +00:00
committed by 刘学利
parent 502a6e3420
commit 6d5a42fb4a
7 changed files with 1196 additions and 61 deletions

View File

@@ -175,53 +175,55 @@ size_t Maat_rule_sort_by_evaluation_order(Maat_feather_t feather, struct Maat_ru
struct scan_region_hit_wraper
{
int Nth_scan;
void* elem_array;
struct Maat_region_inner* hit_regions[MAX_SCANNER_HIT_NUM];
size_t n_hit_region;
int* virtual_table_ids;
size_t elem_size;
size_t n_elem;
size_t user_data_offset;
size_t expr_id_offset;
int virtual_table_id;
int is_last_region;
};
void scan_region_hit_wraper_build_with_rulescan(struct scan_region_hit_wraper* region_hit, scan_result_t* rulescan_rslt, size_t n_rslt, int is_last_region, int virtual_table_id, int Nth_scan)
void scan_region_hit_wraper_build_with_rulescan(struct scan_region_hit_wraper* wraper, scan_result_t* rulescan_rslt, size_t n_rslt, int district_id, int is_last_region, int virtual_table_id, int Nth_scan)
{
memset(region_hit, 0, sizeof(struct scan_region_hit_wraper));
region_hit->elem_array=rulescan_rslt;
region_hit->n_elem=n_rslt;
region_hit->elem_size=sizeof(scan_result_t);
region_hit->expr_id_offset=offsetof(scan_result_t, expr_id);
region_hit->user_data_offset=offsetof(scan_result_t, tag);
region_hit->is_last_region=is_last_region;
region_hit->virtual_table_id=virtual_table_id;
region_hit->Nth_scan=Nth_scan;
region_hit->virtual_table_ids=NULL;
size_t i=0;
struct Maat_region_inner* region=NULL;
memset(wraper, 0, sizeof(struct scan_region_hit_wraper));
for(i=0; i< n_rslt; i++)
{
region=(struct Maat_region_inner*)(rulescan_rslt[i].tag);
if(region->district_id==district_id||district_id==-1)
{
wraper->hit_regions[wraper->n_hit_region]=region;
wraper->n_hit_region++;
}
}
wraper->is_last_region=is_last_region;
wraper->virtual_table_id=virtual_table_id;
wraper->Nth_scan=Nth_scan;
wraper->virtual_table_ids=NULL;
return;
}
void scan_region_hit_wraper_build_with_GIE(struct scan_region_hit_wraper* region_hit, GIE_result_t* GIE_rslt, size_t n_rslt, int is_last_region, int virtual_table_id, int Nth_scan)
void scan_region_hit_wraper_build_with_GIE(struct scan_region_hit_wraper* wraper, GIE_result_t* GIE_rslt, size_t n_rslt, int is_last_region, int virtual_table_id, int Nth_scan)
{
memset(region_hit, 0, sizeof(struct scan_region_hit_wraper));
region_hit->elem_array=GIE_rslt;
region_hit->n_elem=n_rslt;
region_hit->elem_size=sizeof(GIE_result_t);
region_hit->expr_id_offset=offsetof(GIE_result_t, id);
region_hit->user_data_offset=offsetof(GIE_result_t, tag);
region_hit->is_last_region=is_last_region;
region_hit->virtual_table_id=virtual_table_id;
region_hit->Nth_scan=Nth_scan;
region_hit->virtual_table_ids=NULL;
size_t i=0;
memset(wraper, 0, sizeof(struct scan_region_hit_wraper));
for(i=0; i< n_rslt; i++)
{
wraper->hit_regions[wraper->n_hit_region]=(struct Maat_region_inner*)(GIE_rslt[i].tag);
}
wraper->n_hit_region=n_rslt;
wraper->is_last_region=is_last_region;
wraper->virtual_table_id=virtual_table_id;
wraper->Nth_scan=Nth_scan;
wraper->virtual_table_ids=NULL;
return;
}
int region_compile(_Maat_feather_t*feather, struct Maat_hierarchy_compile_mid* compile_mid, const struct scan_region_hit_wraper* region_hit_wraper, struct Maat_rule_t* result, int size,int thread_num)
{
int is_last_region=region_hit_wraper->is_last_region;
void* region_hit=region_hit_wraper->elem_array;
size_t region_type_size=region_hit_wraper->elem_size;
size_t user_data_offset=region_hit_wraper->user_data_offset;
size_t region_hit_num=region_hit_wraper->n_elem;
size_t region_hit_num=region_hit_wraper->n_hit_region;
int scan_ret=0;
int i=0;
@@ -234,7 +236,7 @@ int region_compile(_Maat_feather_t*feather, struct Maat_hierarchy_compile_mid* c
for(i=0; (size_t)i<region_hit_num;i++)
{
region=*(struct Maat_region_inner**)((char*)region_hit+region_type_size*i+user_data_offset);
region=region_hit_wraper->hit_regions[i];
assert(region->magic_num==REGION_RULE_MAGIC);
if(region_hit_wraper->virtual_table_ids)
{
@@ -277,26 +279,6 @@ int region_compile(_Maat_feather_t*feather, struct Maat_hierarchy_compile_mid* c
}
int match_district(struct _OUTER_scan_status_t *_mid, scan_result_t *region_hit, int region_hit_num)
{
struct Maat_region_inner* region=NULL;
int i=0, j=0;
for(i=0; i<region_hit_num; i++)
{
region=(struct Maat_region_inner*)(region_hit[i].tag);
if(region->district_id==_mid->district_id)
{
if(j!=i)
{
memcpy(region_hit+j, region_hit+i, sizeof(scan_result_t));
}
j++;
}
}
return j;
}
int fill_regex_pos(struct regex_pos_t *regex_pos,int size,rule_result_t *rs_result,const char* buff)
{
int i=0,j=0;
@@ -1354,6 +1336,7 @@ int Maat_full_scan_string_detail(Maat_feather_t feather,int table_id
,int* detail_ret,scan_status_t* mid,int thread_num)
{
int region_ret=0,compile_ret=0,hit_region_cnt=0;
int district_id=-1;
unsigned int sub_type=0;
int virtual_table_id=0;
struct _Maat_feather_t* _feather=(_Maat_feather_t*)feather;
@@ -1439,7 +1422,7 @@ int Maat_full_scan_string_detail(Maat_feather_t feather,int table_id
}
if(hit_region_cnt>0&&p_table->table_type==TABLE_TYPE_EXPR_PLUS)
{
hit_region_cnt=match_district(_mid, region_result, hit_region_cnt);
district_id=_mid->district_id;
}
if(hit_region_cnt>0 || scan_status_should_compile_NOT(_mid))
{
@@ -1449,7 +1432,7 @@ int Maat_full_scan_string_detail(Maat_feather_t feather,int table_id
}
_mid=grab_mid(mid, _feather, thread_num, 1);
struct scan_region_hit_wraper region_hit_wraper;
scan_region_hit_wraper_build_with_rulescan(&region_hit_wraper, region_result, hit_region_cnt,
scan_region_hit_wraper_build_with_rulescan(&region_hit_wraper, region_result, hit_region_cnt, district_id,
_mid->is_last_region, virtual_table_id, _mid->scan_cnt);
compile_ret=region_compile(_feather, _mid->compile_mid,
&region_hit_wraper,
@@ -1503,6 +1486,7 @@ int Maat_scan_intval(Maat_feather_t feather,int table_id
,scan_status_t *mid,int thread_num)
{
int region_ret=0,compile_ret=0;
int district_id=-1;
struct _OUTER_scan_status_t* _mid=NULL;
scan_data_t intval_scan_data;
scan_result_t *region_result=NULL;
@@ -1554,7 +1538,7 @@ int Maat_scan_intval(Maat_feather_t feather,int table_id
region_ret=rulescan_search(my_scanner->region, thread_num, &intval_scan_data, region_result, MAX_SCANNER_HIT_NUM);
if(region_ret>0&&p_table->table_type==TABLE_TYPE_INTERVAL_PLUS)
{
region_ret=match_district(_mid, region_result, region_ret);
district_id=_mid->district_id;
}
if(region_ret<0)
{
@@ -1570,7 +1554,7 @@ int Maat_scan_intval(Maat_feather_t feather,int table_id
}
_mid=grab_mid(mid, _feather, thread_num, 1);
struct scan_region_hit_wraper region_hit_wraper;
scan_region_hit_wraper_build_with_rulescan(&region_hit_wraper, region_result, region_ret,
scan_region_hit_wraper_build_with_rulescan(&region_hit_wraper, region_result, region_ret, district_id,
_mid->is_last_region, virtual_table_id, _mid->scan_cnt);
compile_ret=region_compile(_feather,_mid->compile_mid,
&region_hit_wraper,
@@ -1885,7 +1869,7 @@ int Maat_scan_proto_addr(Maat_feather_t feather,int table_id
if(region_hit_cnt>0 || scan_status_should_compile_NOT(_mid) )
{
_mid=grab_mid(mid, _feather, thread_num, 1);
scan_region_hit_wraper_build_with_rulescan(&region_hit_wraper, region_result, region_hit_cnt,
scan_region_hit_wraper_build_with_rulescan(&region_hit_wraper, region_result, region_hit_cnt, -1,
_mid->is_last_region, virtual_table_id, _mid->scan_cnt);
if(table_type==TABLE_TYPE_COMPOSITION)
{
@@ -1999,6 +1983,7 @@ int Maat_stream_scan_string_detail(stream_para_t* stream_para
int sub_type=0;
int region_ret=0,hit_region_cnt=0,compile_ret=0;
int district_id=-1;
struct _OUTER_scan_status_t* _mid=NULL;
scan_result_t *region_result;
scan_data_t region_scan_data;
@@ -2103,7 +2088,7 @@ int Maat_stream_scan_string_detail(stream_para_t* stream_para
}
if(hit_region_cnt>0&&sp->p_real_table->table_type==TABLE_TYPE_EXPR_PLUS)
{
hit_region_cnt=match_district(_mid, region_result, hit_region_cnt);
district_id=_mid->district_id;
}
if(hit_region_cnt>0 || scan_status_should_compile_NOT(_mid))
{
@@ -2113,7 +2098,7 @@ int Maat_stream_scan_string_detail(stream_para_t* stream_para
}
_mid=grab_mid(mid, sp->feather,sp->thread_num, 1);
struct scan_region_hit_wraper region_hit_wraper;
scan_region_hit_wraper_build_with_rulescan(&region_hit_wraper, region_result, hit_region_cnt,
scan_region_hit_wraper_build_with_rulescan(&region_hit_wraper, region_result, hit_region_cnt, district_id,
_mid->is_last_region, sp->virtual_table_id, _mid->scan_cnt);
compile_ret=region_compile(sp->feather, _mid->compile_mid,

View File

@@ -57,7 +57,7 @@ extern "C"
}
#endif
int MAAT_FRAME_VERSION_3_2_2_20210629=1;
int MAAT_FRAME_VERSION_3_2_3_20210714=1;
int is_valid_table_name(const char* str)
{

View File

@@ -11,6 +11,7 @@ add_executable(perf_test_maatframe perf_test_maatframe.cpp)
target_link_libraries(perf_test_maatframe maat_frame_shared gtest)
configure_file(table_info.conf table_info.conf COPYONLY)
configure_file(tsg_tableinfo.conf tsg_tableinfo.conf COPYONLY)
configure_file(file_test_tableinfo.conf file_test_tableinfo.conf COPYONLY)
configure_file(maat_json.json maat_json.json COPYONLY)
configure_file(reset_redis4maat.sh reset_redis4maat.sh COPYONLY)
@@ -20,4 +21,5 @@ file(COPY testdata DESTINATION ./)
file(COPY testdata_uni2ascii DESTINATION ./)
file(COPY test_streamfiles DESTINATION ./)
file(COPY ntcrule DESTINATION ./)
file(COPY tsgrule DESTINATION ./)
file(COPY json_update DESTINATION ./)

View File

@@ -271,7 +271,7 @@ protected:
logger=MESA_create_runtime_log_handle("maat_perf_test.log",0);
_shared_feather=Maat_feather(g_iThreadNum, table_info_path, logger);
Maat_set_feather_opt(_shared_feather,MAAT_OPT_INSTANCE_NAME,"perf", strlen("perf")+1);
Maat_set_feather_opt(_shared_feather,MAAT_OPT_INSTANCE_NAME,"cmdperf", strlen("cmdperf")+1);
Maat_set_feather_opt(_shared_feather, MAAT_OPT_REDIS_IP, test_maat_redis_ip, strlen(test_maat_redis_ip)+1);
Maat_set_feather_opt(_shared_feather, MAAT_OPT_REDIS_PORT, &test_maat_redis_port, sizeof(test_maat_redis_port));
Maat_set_feather_opt(_shared_feather, MAAT_OPT_SCANDIR_INTERVAL_MS,&scan_interval_ms, sizeof(scan_interval_ms));
@@ -575,6 +575,150 @@ TEST_F(MaatCMDPerfTest, UpdateFQDNPlugin)
return;
}
int global_thread_num=4;
class MaatFilePerfTest : public testing::Test
{
protected:
static void SetUpTestCase()
{
int scan_interval_ms=500;
int effective_interval_ms=0;
const char* rule_folder="./tsgrule/full/index";
logger=MESA_create_runtime_log_handle("file_perf_test.log",0);
const char* table_info="./tsg_tableinfo.conf";
_shared_feather_f=Maat_feather(global_thread_num, table_info, logger);
Maat_set_feather_opt(_shared_feather_f,MAAT_OPT_INSTANCE_NAME,"files", strlen("files")+1);
Maat_set_feather_opt(_shared_feather_f, MAAT_OPT_FULL_CFG_DIR, rule_folder, strlen(rule_folder)+1);
Maat_set_feather_opt(_shared_feather_f, MAAT_OPT_INC_CFG_DIR, rule_folder, strlen(rule_folder)+1);
Maat_set_feather_opt(_shared_feather_f, MAAT_OPT_SCANDIR_INTERVAL_MS, &scan_interval_ms, sizeof(scan_interval_ms));
//Set a short intevral for testing.
Maat_set_feather_opt(_shared_feather_f, MAAT_OPT_EFFECT_INVERVAL_MS, &effective_interval_ms, sizeof(effective_interval_ms));
Maat_initiate_feather(_shared_feather_f);
}
static void TearDownTestCase()
{
Maat_burn_feather(_shared_feather_f);
MESA_destroy_runtime_log_handle(logger);
}
// Some expensive resource shared by all tests.
static Maat_feather_t _shared_feather_f;
static void *logger;
};
Maat_feather_t MaatFilePerfTest::_shared_feather_f;
void* MaatFilePerfTest::logger;
struct perf_ip_plugin_ud
{
int rule_id;
int ref_cnt;
};
void perf_ip_plugin_EX_new_cb(int table_id, const char* key, const char* table_line, MAAT_PLUGIN_EX_DATA* ad, long argl, void *argp)
{
int *counter=(int *)argp, ret=0;
size_t column_offset=0, column_len=0;
struct perf_ip_plugin_ud* ud=(struct perf_ip_plugin_ud*)calloc(sizeof(struct perf_ip_plugin_ud), 1);
ret=Maat_helper_read_column(table_line, 1, &column_offset, &column_len);
EXPECT_EQ(ret, 0);
ud->rule_id=atoi(table_line+column_offset);
ret=Maat_helper_read_column(table_line, 5, &column_offset, &column_len);
EXPECT_EQ(ret, 0);
ud->ref_cnt=1;
*ad=ud;
(*counter)++;
return;
}
void perf_ip_plugin_EX_free_cb(int table_id, MAAT_PLUGIN_EX_DATA* ad, long argl, void *argp)
{
struct perf_ip_plugin_ud* u=(struct perf_ip_plugin_ud*)(*ad);
if ((__sync_sub_and_fetch(&u->ref_cnt, 1) == 0))
{
free(u);
*ad=NULL;
}
}
void perf_ip_plugin_EX_dup_cb(int table_id, MAAT_PLUGIN_EX_DATA *to, MAAT_PLUGIN_EX_DATA *from, long argl, void *argp)
{
struct perf_ip_plugin_ud* u=(struct perf_ip_plugin_ud*)(*from);
__sync_add_and_fetch(&(u->ref_cnt), 1);
*to=u;
}
static void* ip_plugin_get_thread(void* arg)
{
const char* table_name="TSG_IP_LOCATION_BUILT_IN";
int test_times=1000*1000, hit_times=0;
int table_id=0;
int ret=0, i=0, j=0;
Maat_feather_t feather=(Maat_feather_t)arg;
table_id=Maat_table_register(feather, table_name);
struct perf_ip_plugin_ud* result[4];
struct ip_address ip;
ip.ip_type=4;
inet_pton(AF_INET, "191.70.72.1", &(ip.ipv4));
for(i=0; i<test_times; i++)
{
ret=Maat_ip_plugin_get_EX_data(feather, table_id, &ip, (void**)result, 4);
if(ret>0)
{
hit_times++;
}
for(j=0; j<ret; j++)
{
perf_ip_plugin_EX_free_cb(table_id, (void**)&(result[j]), 0, NULL);
}
}
int* is_all_hit=(int*)malloc(sizeof(int));
*is_all_hit=(hit_times==test_times)?1:0;
return is_all_hit;
}
TEST_F(MaatFilePerfTest, IPPlugin)
{
Maat_feather_t feather=MaatFilePerfTest::_shared_feather_f;
int ret=0, i=0;
int* is_all_hit=NULL;
int table_id=0, ip_plugin_ex_data_counter=0;
const char* table_name="TSG_IP_LOCATION_BUILT_IN";
table_id=Maat_table_register(feather, table_name);
ASSERT_GT(table_id, 0);
ret=Maat_ip_plugin_EX_register(feather, table_id,
perf_ip_plugin_EX_new_cb,
perf_ip_plugin_EX_free_cb,
perf_ip_plugin_EX_dup_cb,
0, &ip_plugin_ex_data_counter);
ASSERT_TRUE(ret>=0);
pthread_t threads[global_thread_num];
for(i=0; i<global_thread_num; i++)
{
pthread_create(&(threads[i]), NULL, ip_plugin_get_thread, feather);
}
for(i=0; i<global_thread_num; i++)
{
pthread_join(threads[i], (void**)&is_all_hit);
EXPECT_EQ(*is_all_hit, 1);
*is_all_hit=0;
free(is_all_hit);
}
return;
}
int main(int argc, char ** argv)
{

1
test/tsg_tableinfo.conf Normal file
View File

@@ -0,0 +1 @@
42 TSG_IP_LOCATION_BUILT_IN ip_plugin {"row_id":1,"ip_type":3,"start_ip":4,"end_ip":5,"valid":18,"estimate_size":4194304}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,2 @@
TSG_IP_LOCATION_BUILT_IN 1000 ./tsgrule/TSG_IP_LOCATION_BUILT_IN.head_1k