diff --git a/plugin/business/pangu-http/src/edit_element.cpp b/plugin/business/pangu-http/src/edit_element.cpp index ed686e2..988ba6e 100644 --- a/plugin/business/pangu-http/src/edit_element.cpp +++ b/plugin/business/pangu-http/src/edit_element.cpp @@ -100,14 +100,14 @@ int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, int * { if(*node != NULL && strcasecmp(*node, start_indicator) != 0) { - return 0; + return -2; } } if(a_element->type == cJSON_Array) { if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator)) { - return 0; + return -2; } } } @@ -135,14 +135,14 @@ int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, int * { if(*node != NULL && strcasecmp(*node, start_indicator) != 0) { - return 0; + return -2; } } if(a_element->type == cJSON_Array) { if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator)) { - return 0; + return -2; } } } @@ -157,7 +157,7 @@ int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, int * } } - return 0; + return 1; } int construct_html_by_treatment(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match) @@ -271,15 +271,15 @@ int cjson_dump_array(cJSON *a, int *depth, int *step, int *step_array_level, cha } if(*depth == 0) { - construct_cjson_by_treatment(a_element, node, step, step_array_level, rules); + xret = construct_cjson_by_treatment(a_element, node, step, step_array_level, rules); } - if(xret == 1) + if(xret == 1 || xret == -2) { *step = (*step >= 2047) ? 2047 : *step; step_array_level[*step] = array_cnt; *node = a_element->string; *depth = *depth -1; - return 1; + return xret; } array_cnt++; a_element = a_element->next; @@ -302,13 +302,13 @@ int cjson_dump_object(cJSON *a, int *depth, int *step, int *step_array_level, ch } if(*depth == 0) { - construct_cjson_by_treatment(a_element, node, step, step_array_level, rules); + xret = construct_cjson_by_treatment(a_element, node, step, step_array_level, rules); } - if(xret == 1) + if(xret == 1 || xret == -2) { *node = a_element->string; *depth = *depth -1; - return 1; + return xret; } } return xret; @@ -526,7 +526,7 @@ size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct } } - if(match == 0) + if(match == 0 || match == -2) { goto finish; } @@ -587,7 +587,7 @@ size_t format_json_file_type(const char * in, size_t in_sz, const struct edit_el match_num--; } - if(match == 0) + if(match == 0 || match == -2) { goto finish; } @@ -751,7 +751,12 @@ size_t format_input_html(const char * in, size_t in_sz, const struct edit_elemen { if(doc->children != NULL && doc->children->next != NULL) { - xmlNewProp(doc->children->next, (const xmlChar *)"need_check", (const xmlChar *)"true"); + xmlNodePtr node = doc->children->next; + while(node->type != XML_ELEMENT_NODE) + { + node = node->next; + } + xmlNewProp(node, (const xmlChar *)"need_check", (const xmlChar *)"true"); } else if(doc->children != NULL) { diff --git a/plugin/business/pangu-http/src/test_edit_element.cpp b/plugin/business/pangu-http/src/test_edit_element.cpp index 07b2eb8..5893846 100644 --- a/plugin/business/pangu-http/src/test_edit_element.cpp +++ b/plugin/business/pangu-http/src/test_edit_element.cpp @@ -134,6 +134,95 @@ TEST(EditElement, Libxml_Inside_Remove_Facebook) free(output); } +TEST(EditElement, Libxml_Whole_Remove_Google_Test) +{ + char* output=NULL; + size_t output_sz=0,input_len=0; + + const char* filename="./test_data/google_search_gtest_cnblog.html"; + + FILE* fp=NULL; + struct stat file_info; + stat(filename, &file_info); + size_t input_sz=file_info.st_size; + + fp=fopen(filename,"r"); + ASSERT_FALSE(fp==NULL); + if(fp==NULL) + { + return; + } + char* input=(char*)malloc(input_sz); + fread(input,1,input_sz,fp); + fclose(fp); + + const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"whole_file\",\"start_indicator\":\"g\",\"contained_keyword\":\"thrift-service-framework/src/test/java/com/cnblogs/yjmyzz at master\"},\ + \"target_element\":{\"target_distance_from_matching\":4,\"element_treatment\":\"remove\"}}]}"; + + simple_edit_element(user_region, input, input_len, &output, &output_sz, 0); + EXPECT_TRUE(output_sz>0); + + EXPECT_TRUE(NULL==strstr(output, "thrift-service-framework/src/test/java/com/cnblogs/yjmyzz at master")); + free(input); + free(output); +} + +TEST(EditElement, Cjson_Inside_Mark_NO_Match) +{ + char* output=NULL; + size_t output_sz=0; + + const char *input ="{\"A\":\"B\",\"C\":[\"D\", \"E\"],\"F\":{\"G\":{\"H\":\"I\"}},\"J\":{\"K\":[{\"L\":\"M\"}],\"N\":\"Q\"}}\n\n{\"data\":{\"user\":{\"id\":\"1\",\"units\":\ + {\"edges\":[{\"eligible\":true,\"story\":{\"message\":{\"text\":\"Firefox 3.6 with System\\uff0c\\u7f16\\u8f91\"}}}]}}}}"; + + const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"inside_element\",\"start_indicator\":\"error_message\",\"contained_keyword\":\"Firefox 3.6 with System\"},\ + \"target_element\":{\"target_distance_from_matching\":1,\"element_treatment\":\"mark\"}}]}"; + + simple_edit_element(user_region, input, strlen(input), &output, &output_sz, 0); + EXPECT_TRUE(output_sz==0); + + free(output); +} + +TEST(EditElement, Cjson_Inside_Mark_Match) +{ + char* output=NULL; + size_t output_sz=0; + + const char *input ="{\"A\":\"B\",\"C\":[\"D\", \"E\"],\"F\":{\"G\":{\"H\":\"I\"}},\"J\":{\"K\":[{\"L\":\"M\"}],\"N\":\"Q\"}}\n\n{\"data\":{\"user\":{\"id\":\"1\",\"units\":\ + {\"edges\":[{\"eligible\":true,\"story\":{\"message\":{\"text\":\"Firefox 3.6 with System\\uff0c\\u7f16\\u8f91\"}}}]}}}}"; + + const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"inside_element\",\"start_indicator\":\"message\",\"contained_keyword\":\"Firefox 3.6 with System\"},\ + \"target_element\":{\"target_distance_from_matching\":1,\"element_treatment\":\"mark\"}}]}"; + + simple_edit_element(user_region, input, strlen(input), &output, &output_sz, 0); + EXPECT_TRUE(output_sz>0); + + EXPECT_TRUE(NULL!=strstr(output, "need_filter")); + EXPECT_TRUE(NULL!=strstr(output, "need_check")); + + free(output); +} + +TEST(EditElement, LibXML_Inside_Mark_Match) +{ + char* output=NULL; + size_t output_sz=0; + + const char *input = "\n\n\n\ +\nFacebook\n\n\n
Firefox 3.6 with System
\n"; + + const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"inside_element\",\"start_indicator\":\"f530mmz5\",\"contained_keyword\":\"Firefox 3.6 with System\"},\ + \"target_element\":{\"target_distance_from_matching\":1,\"element_treatment\":\"mark\"}}]}"; + + simple_edit_element(user_region, input, strlen(input), &output, &output_sz, 0); + EXPECT_TRUE(output_sz>0); + + EXPECT_TRUE(NULL!=strstr(output, "need_filter")); + EXPECT_TRUE(NULL!=strstr(output, "need_check")); + free(output); +} + int main(int argc, char ** argv) {