From 81a827d1624f2f5d51ada7cc3184db260be06e70 Mon Sep 17 00:00:00 2001 From: zhengchao Date: Sun, 14 Jun 2020 20:52:14 +0800 Subject: [PATCH] =?UTF-8?q?=E5=9F=BA=E7=A1=80=E6=89=AB=E6=8F=8F=E5=8A=9F?= =?UTF-8?q?=E8=83=BD=E7=9A=84=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B=E9=80=9A?= =?UTF-8?q?=E8=BF=87=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/entry/Maat_hierarchy.cpp | 51 +++++++++++++-------- src/entry/Maat_rule.cpp | 87 +++++++++++++++++------------------- src/entry/json2iris.cpp | 5 ++- src/version.map | 2 +- test/maat_json.json | 11 +++-- test/table_info.conf | 69 ++++++++++++++-------------- test/test_maatframe.cpp | 3 +- 7 files changed, 123 insertions(+), 105 deletions(-) diff --git a/src/entry/Maat_hierarchy.cpp b/src/entry/Maat_hierarchy.cpp index a521593..2748725 100644 --- a/src/entry/Maat_hierarchy.cpp +++ b/src/entry/Maat_hierarchy.cpp @@ -26,7 +26,8 @@ struct Maat_hierarchy_group igraph_integer_t vertex_id; int group_id; int ref_by_compile_cnt; - int ref_by_group_cnt; + int ref_by_superior_group_cnt; + int ref_by_subordinate_group_cnt; int ref_by_region_cnt; int top_group_cnt; @@ -187,7 +188,12 @@ int compare_literal_id(const void *pa, const void *pb) { struct Maat_hierarchy_literal_id *la=(struct Maat_hierarchy_literal_id *)pa; struct Maat_hierarchy_literal_id *lb=(struct Maat_hierarchy_literal_id *)pb; - return TO_LITERAL_ID(la->virtual_table_id, la->group_id)-TO_LITERAL_ID(lb->virtual_table_id, lb->group_id); + int ret=la->virtual_table_id-lb->virtual_table_id; + if(ret==0) + { + ret=la->group_id-lb->group_id; + } + return ret; } @@ -453,7 +459,7 @@ static void Maat_hierarchy_group_free(struct Maat_hierarchy* hier, struct Maat_h { igraph_vector_t v; char buff[4096]; - assert(group->ref_by_compile_cnt==0&&group->ref_by_group_cnt==0); + assert(group->ref_by_compile_cnt==0&&group->ref_by_superior_group_cnt==0); igraph_vector_init(&v, 8); igraph_neighbors(&hier->group_graph, &v, group->vertex_id, IGRAPH_ALL); if(igraph_vector_size(&v)>0) @@ -496,7 +502,6 @@ int Maat_hierarchy_add_group_to_compile(struct Maat_hierarchy* hier, int group_i literal=Maat_hierarchy_literal_new(hier, group_id, vt_id); } ret=Maat_hierarchy_literal_join_clause(literal, not_flag, Nth_clause, compile_id); - pthread_rwlock_unlock(&hier->rwlock); if(ret<0) { MESA_handle_runtime_log(hier->logger, RLOG_LV_FATAL, module_maat_hierarchy, @@ -582,7 +587,8 @@ int Maat_hierarchy_add_group_to_group(struct Maat_hierarchy* hier, int group_id, else { igraph_add_edge(&hier->group_graph, group->vertex_id, superior_group->vertex_id); - group->ref_by_group_cnt++; + group->ref_by_superior_group_cnt++; + superior_group->ref_by_subordinate_group_cnt++; ret=0; } pthread_rwlock_unlock(&hier->rwlock); @@ -636,7 +642,8 @@ int Maat_hierarchy_remove_group_from_group(struct Maat_hierarchy* hier, int grou return -1; } - group->ref_by_group_cnt--; + group->ref_by_superior_group_cnt--; + superior_group->ref_by_subordinate_group_cnt--; return 0; } @@ -823,27 +830,32 @@ static int Maat_hierarchy_build_top_groups(struct Maat_hierarchy* hier) "Sub group cycle detected!"); return -1; } - + hier->group_graph_vcount=igraph_vcount(&hier->group_graph); + igraph_vector_init(&(hier->dfs_vids), hier->group_graph_vcount); HASH_ITER(hh_group_id, hier->hash_group_by_id, group, tmp) { top_group_cnt=0; temp_group_ids=NULL; //Orphan, Not reference by any one, free it. - if(group->ref_by_compile_cnt==0 && group->ref_by_group_cnt==0 && group->ref_by_region_cnt==0) + if(group->ref_by_compile_cnt==0 + && group->ref_by_superior_group_cnt==0 + && group->ref_by_subordinate_group_cnt==0 + && group->ref_by_region_cnt==0) { pthread_rwlock_wrlock(&hier->rwlock); free(group->top_group_ids); + group->top_group_ids=NULL; Maat_hierarchy_group_free(hier, group); pthread_rwlock_unlock(&hier->rwlock); continue; } //A group is need to build top groups when it has regions and referenced by superior groups or compiles. - if(group->ref_by_region_cnt>0 && (group->ref_by_compile_cnt>0 || group->ref_by_group_cnt>0)) + if(group->ref_by_region_cnt>0 && (group->ref_by_compile_cnt>0 || group->ref_by_superior_group_cnt>0)) { - if(group->ref_by_group_cnt==0) + if(group->ref_by_superior_group_cnt==0) { //fast path, group is only referenced by compile rules. top_group_cnt=1; @@ -885,7 +897,8 @@ static int Maat_hierarchy_build_top_groups(struct Maat_hierarchy* hier) free(temp_group_ids); temp_group_ids=NULL; - } + } + igraph_vector_destroy(&hier->dfs_vids); return 0; } @@ -912,7 +925,7 @@ struct Maat_hierarchy_compile_mid struct Maat_hierarchy* ref_hier; int thread_num; int Nth_scan; - size_t this_scan_region_hits; + size_t this_scan_region_hit_cnt; int not_clause_hitted_flag; size_t hit_path_cnt; struct hit_path_q hit_path_qhead; @@ -1042,9 +1055,10 @@ void Maat_hierarchy_compile_mid_udpate(struct Maat_hierarchy_compile_mid* mid, i struct Maat_hierarchy* hier=mid->ref_hier; if(mid->Nth_scan!=Nth_scan) { - assert(mid->this_scan_region_hits==0); + assert(mid->this_scan_region_hit_cnt==0); mid->Nth_scan=Nth_scan; } + mid->this_scan_region_hit_cnt++; pthread_rwlock_rdlock(&hier->rwlock); HASH_FIND_INT(hier->hash_region_by_id, ®ion_id, region); group=region->ref_parent_group; @@ -1099,7 +1113,7 @@ static size_t Maat_hierarchy_compile_mid_update_by_compile(struct Maat_hierarchy { size_t r_in_c_cnt=0, this_scan_hit_region_cnt=0; struct Maat_hierarchy_hit_path* p=NULL, *q=NULL; - struct Maat_hierarchy_literal_id literal_id={0,0}, *l=NULL; + struct Maat_hierarchy_literal_id literal_id={0, 0}, *l=NULL; struct Maat_hit_path_t condition; size_t n_exsited_path=0; @@ -1152,8 +1166,7 @@ static size_t Maat_hierarchy_compile_mid_update_by_compile(struct Maat_hierarchy } } - assert(this_scan_hit_region_cnt==mid->this_scan_region_hits); - mid->this_scan_region_hits=0; + assert(this_scan_hit_region_cnt>=mid->this_scan_region_hit_cnt); p = TAILQ_FIRST(&new_path_qhead); while(p != NULL) @@ -1172,13 +1185,13 @@ int Maat_hierarchy_region_compile(struct Maat_hierarchy_compile_mid* mid, int is struct Maat_hierarchy* hier=mid->ref_hier; struct Maat_hierarchy_compile* compile_array[ud_array_sz]; - size_t r_in_c_cnt=0, this_scan_region_hits=mid->this_scan_region_hits; + size_t r_in_c_cnt=0, this_scan_region_hits=mid->this_scan_region_hit_cnt; size_t ud_result_cnt=0; if(!hier->bm) { + mid->this_scan_region_hit_cnt=0; return 0; } - pthread_rwlock_rdlock(&hier->rwlock); bool_match_ret=bool_matcher_match(hier->bm, mid->thread_num, mid->all_hit_clause_array, mid->all_hit_clause_cnt, @@ -1201,6 +1214,8 @@ int Maat_hierarchy_region_compile(struct Maat_hierarchy_compile_mid* mid, int is } } pthread_rwlock_unlock(&hier->rwlock); + + mid->this_scan_region_hit_cnt=0; return ud_result_cnt; } diff --git a/src/entry/Maat_rule.cpp b/src/entry/Maat_rule.cpp index 399cc4b..55cbd78 100644 --- a/src/entry/Maat_rule.cpp +++ b/src/entry/Maat_rule.cpp @@ -720,7 +720,7 @@ void Maat_region_inner_free(struct Maat_region_inner* region) void Maat_region_inner_cancel_last_expr_id(struct Maat_region_inner* region) { - assert(region->expr_id_cnt==region->expr_id_ub-region->expr_id_lb); + assert(region->expr_id_cnt==region->expr_id_ub-region->expr_id_lb+1); region->expr_id_ub--; region->expr_id_cnt--; return; @@ -1086,6 +1086,7 @@ int add_expr_rule(struct Maat_table_schema* table,struct db_str_rule_t* db_rule, switch(db_rule->expr_type) { case EXPR_TYPE_AND: + case EXPR_TYPE_REGEX: for(i=0,p=db_rule->keywords;;i++,p=NULL) { if(i>=MAAT_MAX_EXPR_ITEM_NUM) @@ -1099,7 +1100,14 @@ int add_expr_rule(struct Maat_table_schema* table,struct db_str_rule_t* db_rule, { break; } - sub_key_array[i]=str_unescape(sub_key_array[i]); + if(db_rule->expr_type==EXPR_TYPE_REGEX) + { + sub_key_array[i]=str_unescape_and(sub_key_array[i]);//regex remain use str_unescape_and + } + else + { + sub_key_array[i]=str_unescape(sub_key_array[i]); + } } sub_expr_cnt=i; break; @@ -1136,28 +1144,6 @@ int add_expr_rule(struct Maat_table_schema* table,struct db_str_rule_t* db_rule, } sub_expr_cnt=i; break; - case EXPR_TYPE_REGEX://it's easy,no need to charset convert - expr_id=scanner->exprid_generator++; - op_expr=create_op_expr(expr_id - ,0 - ,u_para - ,table->table_id); - for(i=0,p=db_rule->keywords;;i++,p=NULL) - { - if(i>=MAAT_MAX_EXPR_ITEM_NUM) - { - MESA_handle_runtime_log(logger,RLOG_LV_FATAL,maat_module , - "Table %s region cfg %d too many expr.",table->table_name[table->updating_name],db_rule->region_id); - return -1; - } - sub_key_array[i]=strtok_r_esc(p,'&',&saveptr); - if(sub_key_array[i]==NULL) - { - break; - } - sub_key_array[i]=str_unescape_and(sub_key_array[i]);//regex remain use str_unescape_and - } - break; case EXPR_TYPE_STRING: sub_expr_cnt=1; sub_key_array[0]=db_rule->keywords; @@ -1185,8 +1171,8 @@ int add_expr_rule(struct Maat_table_schema* table,struct db_str_rule_t* db_rule, u_para=NULL; return -1; } - - if(db_rule->is_hexbin==FALSE) + + if(db_rule->is_hexbin==FALSE && db_rule->expr_type!=EXPR_TYPE_REGEX) { for(j=0;jexpr_type==EXPR_TYPE_REGEX) - { - p_rule->rule_type=RULETYPE_REG; - } op_expr_add_rule(op_expr, p_rule); free(region_string); region_string=NULL; @@ -1274,7 +1256,7 @@ int add_expr_rule(struct Maat_table_schema* table,struct db_str_rule_t* db_rule, } } - else + else //For hexbin and regex, no need to do charset conversion. { expr_id=scanner->exprid_generator++; Maat_region_inner_add_expr_id(u_para, expr_id); @@ -1285,20 +1267,35 @@ int add_expr_rule(struct Maat_table_schema* table,struct db_str_rule_t* db_rule, ); for(k=0;ktable_id,dst_charset,expr_desc->do_charset_merge), - db_rule->match_method, - db_rule->is_case_sensitive, - region_string, - region_str_len, - key_left_offset[k], - key_right_offset[k]); + if(db_rule->expr_type==EXPR_TYPE_REGEX) + { + p_rule=create_rs_str_rule(make_sub_type(table->table_id,dst_charset,expr_desc->do_charset_merge), + db_rule->match_method, + db_rule->is_case_sensitive, + sub_key_array[k], + strlen(sub_key_array[k]), + key_left_offset[k], + key_right_offset[k]); + p_rule->rule_type=RULETYPE_REG; + } + else + { + region_str_len=strlen(sub_key_array[k])+1; + region_string=ALLOC(char, region_str_len); + region_str_len=hex2bin(sub_key_array[k], strlen(sub_key_array[k]), region_string, region_str_len); + + p_rule=create_rs_str_rule(make_sub_type(table->table_id,dst_charset,expr_desc->do_charset_merge), + db_rule->match_method, + db_rule->is_case_sensitive, + region_string, + region_str_len, + key_left_offset[k], + key_right_offset[k]); + + free(region_string); + region_string=NULL; + } op_expr_add_rule(op_expr, p_rule); - free(region_string); - region_string=NULL; } MESA_lqueue_join_tail(scanner->region_update_q,&op_expr, sizeof(void*)); } @@ -1444,7 +1441,7 @@ void update_group2compile_rule(struct Maat_table_schema* table, const char* tabl &(db_g2c_rule.not_flag), virtual_table_name, &(db_g2c_rule.Nth_clause)); - if(ret!=5) + if(ret!=6) { MESA_handle_runtime_log(logger,RLOG_LV_INFO,maat_module , "update error, invalid format of group2compile table %s:%s", diff --git a/src/entry/json2iris.cpp b/src/entry/json2iris.cpp index acae5b6..d876dc3 100644 --- a/src/entry/json2iris.cpp +++ b/src/entry/json2iris.cpp @@ -193,7 +193,7 @@ int set_iris_descriptor(const char* json_file,cJSON *json, const char* encrypt_k iris_cfg->compile_table=query_table_info(iris_cfg, compile_tn, TABLE_TYPE_COMPILE); iris_cfg->group2compile_table=query_table_info(iris_cfg, group2compile_tn, TABLE_TYPE_GROUP2COMPILE); - iris_cfg->group2group_table=query_table_info(iris_cfg, group2compile_tn, TABLE_TYPE_GROUP2GROUP); + iris_cfg->group2group_table=query_table_info(iris_cfg, group2group_tn, TABLE_TYPE_GROUP2GROUP); if(encrypt_key && encrypt_algo) { @@ -1200,11 +1200,12 @@ int write_iris(cJSON *json, struct iris_description_t *p_iris, void* logger) i=1; cJSON_ArrayForEach(group_obj, group_array) { - ret=write_group_rule(group_obj, PARENT_TYPE_COMPILE, compile_id, compile_id, i, p_iris, logger); + ret=write_group_rule(group_obj, compile_id, PARENT_TYPE_COMPILE, compile_id, i, p_iris, logger); if(ret<0) { return -1; } + i++; } } ret=write_index_file(p_iris, logger); diff --git a/src/version.map b/src/version.map index 4f4c55d..b19179f 100644 --- a/src/version.map +++ b/src/version.map @@ -1,6 +1,6 @@ VERS_3.0{ global: - extern "C++" { + extern "C" { *MAAT_FRAME_VERSION_*; *Maat_*; *SFH_*; diff --git a/test/maat_json.json b/test/maat_json.json index 02e8e9c..184bf33 100644 --- a/test/maat_json.json +++ b/test/maat_json.json @@ -1,6 +1,7 @@ { "compile_table": "COMPILE", - "group_table": "GROUP", + "group2compile_table": "GROUP2COMPILE", + "group2group_table": "GROUP2GROUP", "groups": [ { "group_name": "ASN1234", @@ -1749,9 +1750,11 @@ "is_valid": "yes", "groups": [ { - "group_name":"ipv4_composition.session", - "virtual_table":"COMPOSITION_IP_SESSION", - "not_flag":0 + "group_name":"ipv4_composition.session", + "virtual_table":"COMPOSITION_IP_SESSION", + "not_flag":0, + "nth_clause":1 + } ] }, diff --git a/test/table_info.conf b/test/table_info.conf index f7f24c8..0d2a17e 100644 --- a/test/table_info.conf +++ b/test/table_info.conf @@ -18,37 +18,38 @@ #For expr/expr_plus Table #id name type src_charset dst_charset do_merge cross_cache 0 COMPILE compile escape -- -1 GROUP group -- -2 HTTP_URL expr UTF8 GBK/BIG5/UNICODE/UTF8/url_encode_gb2312/url_encode_utf8 yes 128 -2 HTTP_HOST expr UTF8 GBK/BIG5/UNICODE/UTF8/url_encode_gb2312/url_encode_utf8 yes 128 -3 KEYWORDS_TABLE expr UTF8 GBK/BIG5/UNICODE/UTF8/unicode_ascii_esc/unicode_ascii_aligned/unicode_ncr_dec/unicode_ncr_hex/windows-1251 yes 0 -4 IP_CONFIG ip -- -5 CONTENT_SIZE intval -- -6 QD_ENTRY_INFO plugin 4 -- -7 FILE_DIGEST digest -- -8 HTTP_SIGNATURE expr_plus GBK GBK yes 0 -9 SIM_URL similar -- -10 IMAGE_FP expr UTF8 UTF8 yes 128 -11 TEST_EFFECTIVE_RANGE_TABLE plugin {"valid":4,"tag":5} -- -12 TEST_FOREIGN_KEY plugin {"valid":4,"foreign":[6,8],"tag":3} -- -13 COMPILE_ALIAS compile escape -- -14 TEST_PLUGIN_EXDATA_TABLE plugin {"key":2,"valid":4,"tag":5,"estimate_size":1024} -- -15 IR_INTERCEPT_IP plugin {"valid":14,"tag":18} -16 APP_PAYLOAD expr_plus UTF8 UTF8 yes 0 -17 TROJAN_PAYLOAD expr UTF8 UTF8 yes 0 -18 MAIL_ADDR expr UTF8 UTF8 yes 0 -19 IP_PLUS_CONFIG ip_plus -- -20 HTTP_RESPONSE_KEYWORDS virtual KEYWORDS_TABLE -- -21 HTTP_REQUEST_HEADER virtual HTTP_SIGNATURE -- -22 HTTP_RESPONSE_HEADER virtual HTTP_SIGNATURE -- -23 VIRTUAL_IP_PLUS_TABLE virtual IP_PLUS_CONFIG -- -23 VIRTUAL_IP_PLUS_SOURCE virtual IP_PLUS_CONFIG -- -23 VIRTUAL_IP_PLUS_DESTINATION virtual IP_PLUS_CONFIG -- -24 COMPOSITION_IP_SOURCE virtual IP_PLUS_CONFIG -- -25 COMPOSITION_IP_DESTINATION virtual IP_PLUS_CONFIG -- -26 COMPOSITION_IP_SESSION virtual IP_PLUS_CONFIG -- -27 COMPOSITION_IP composition {"source":"COMPOSITION_IP_SOURCE","destination":"COMPOSITION_IP_DESTINATION","session":"COMPOSITION_IP_SESSION"} -28 TEST_IP_PLUGIN_WITH_EXDATA ip_plugin {"row_id":1,"ip_type":2,"start_ip":3,"end_ip":4,"valid":6} -- -29 AS_NUMBER expr UTF8 UTF8 yes 0 -30 SOURCE_IP_ASN virtual AS_NUMBER -- -31 DESTINATION_IP_ASN virtual AS_NUMBER -- \ No newline at end of file +1 GROUP2COMPILE group2compile -- +2 GROUP2GROUP group2group -- +3 HTTP_URL expr UTF8 GBK/BIG5/UNICODE/UTF8/url_encode_gb2312/url_encode_utf8 yes 128 +3 HTTP_HOST expr UTF8 GBK/BIG5/UNICODE/UTF8/url_encode_gb2312/url_encode_utf8 yes 128 +4 KEYWORDS_TABLE expr UTF8 GBK/BIG5/UNICODE/UTF8/unicode_ascii_esc/unicode_ascii_aligned/unicode_ncr_dec/unicode_ncr_hex/windows-1251 yes 0 +5 IP_CONFIG ip -- +6 CONTENT_SIZE intval -- +7 QD_ENTRY_INFO plugin 4 -- +8 FILE_DIGEST digest -- +9 HTTP_SIGNATURE expr_plus GBK GBK yes 0 +10 SIM_URL similar -- +11 IMAGE_FP expr UTF8 UTF8 yes 128 +12 TEST_EFFECTIVE_RANGE_TABLE plugin {"valid":4,"tag":5} -- +13 TEST_FOREIGN_KEY plugin {"valid":4,"foreign":[6,8],"tag":3} -- +14 COMPILE_ALIAS compile escape -- +15 TEST_PLUGIN_EXDATA_TABLE plugin {"key":2,"valid":4,"tag":5,"estimate_size":1024} -- +16 IR_INTERCEPT_IP plugin {"valid":14,"tag":18} +17 APP_PAYLOAD expr_plus UTF8 UTF8 yes 0 +18 TROJAN_PAYLOAD expr UTF8 UTF8 yes 0 +19 MAIL_ADDR expr UTF8 UTF8 yes 0 +20 IP_PLUS_CONFIG ip_plus -- +21 HTTP_RESPONSE_KEYWORDS virtual KEYWORDS_TABLE -- +22 HTTP_REQUEST_HEADER virtual HTTP_SIGNATURE -- +23 HTTP_RESPONSE_HEADER virtual HTTP_SIGNATURE -- +24 VIRTUAL_IP_PLUS_TABLE virtual IP_PLUS_CONFIG -- +24 VIRTUAL_IP_PLUS_SOURCE virtual IP_PLUS_CONFIG -- +24 VIRTUAL_IP_PLUS_DESTINATION virtual IP_PLUS_CONFIG -- +25 COMPOSITION_IP_SOURCE virtual IP_PLUS_CONFIG -- +26 COMPOSITION_IP_DESTINATION virtual IP_PLUS_CONFIG -- +27 COMPOSITION_IP_SESSION virtual IP_PLUS_CONFIG -- +28 COMPOSITION_IP composition {"source":"COMPOSITION_IP_SOURCE","destination":"COMPOSITION_IP_DESTINATION","session":"COMPOSITION_IP_SESSION"} +29 TEST_IP_PLUGIN_WITH_EXDATA ip_plugin {"row_id":1,"ip_type":2,"start_ip":3,"end_ip":4,"valid":6} -- +30 AS_NUMBER expr UTF8 UTF8 yes 0 +31 SOURCE_IP_ASN virtual AS_NUMBER -- +32 DESTINATION_IP_ASN virtual AS_NUMBER -- \ No newline at end of file diff --git a/test/test_maatframe.cpp b/test/test_maatframe.cpp index 8be1d90..a3de88b 100644 --- a/test/test_maatframe.cpp +++ b/test/test_maatframe.cpp @@ -318,7 +318,7 @@ TEST(StringScan, Full) int found_pos[4]; const char* table_name="HTTP_URL"; scan_status_t mid=NULL; - const char* scan_data="http://www.cyberessays.com/search_results.php?action=search&query=yulingjing,abckkk,1234567"; + const char* scan_data="http://www.cyberessays.com/search_results.php?action=search&query=username,abckkk,1234567"; table_id=Maat_table_register(g_feather,table_name); ASSERT_GT(table_id, 0); @@ -326,6 +326,7 @@ TEST(StringScan, Full) result,found_pos, 4, &mid, 0); EXPECT_GE(ret, 1); + EXPECT_EQ(result[0].config_id, 125); Maat_clean_status(&mid); }