TSG-8935 修复JSON数组格式MARK标记问题

增加元素编辑自测试用例,文件名和变量变更
This commit is contained in:
fengweihao
2021-12-17 16:39:49 +08:00
parent 3e3dedadcd
commit 84ce78c1ce
5 changed files with 308 additions and 50 deletions

View File

@@ -1,4 +1,4 @@
add_library(pangu-http src/pangu_logger.cpp src/pangu_http.cpp src/pattern_replace.cpp src/pangu_web_cache.cpp src/pangu_element_edit.cpp)
add_library(pangu-http src/pangu_logger.cpp src/pangu_http.cpp src/pattern_replace.cpp src/pangu_web_cache.cpp src/edit_element.cpp)
target_link_libraries(pangu-http PUBLIC common http tango-cache-client)
target_link_libraries(pangu-http PUBLIC rdkafka ctemplate-static cjson pcre2-static ratelimiter-static libdablooms pthread)
target_link_libraries(pangu-http PUBLIC maatframe)
@@ -8,5 +8,8 @@ add_executable(test_pattern_replace src/test_pattern_replace.cpp src/pattern_rep
target_link_libraries(test_pattern_replace common gtest pcre2-static)
file(COPY test_data DESTINATION ./)
add_executable(test_edit_element src/test_edit_element.cpp src/edit_element.cpp)
target_link_libraries(test_edit_element common gtest pcre2-static libxml2-static z)
add_executable(replace_tool src/replace_tool.cpp src/pattern_replace.cpp)
target_link_libraries(replace_tool common pcre2-static)

View File

@@ -10,11 +10,11 @@
#include <libxml/HTMLparser.h>
#include <cjson/cJSON.h>
#include "pangu_element_edit.h"
#include "edit_element.h"
int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop);
static void html_node_list(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match);
size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct element_rule * rules, char** out);
int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct edit_element_rule * rules, int *match_num, int loop);
static void html_node_list(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match);
size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out);
enum search_scope scope_name_to_id(const char * name)
{
@@ -52,7 +52,7 @@ int match_start_indicator(xmlNodePtr parent, char * start_indicator)
return 0;
}
int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, int *step_level, const struct element_rule * rules)
int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, int *step_array_level, const struct edit_element_rule * rules)
{
const char *element_treatment=rules->element_treatment;
char * start_indicator = rules->start_indicator;
@@ -81,6 +81,15 @@ int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, int *
{
cJSON_AddBoolToObject(a_element, "need_filter", true);
}
if(a_element->type == cJSON_Array)
{
cJSON *object = NULL;
object = cJSON_GetArrayItem(a_element, step_array_level[*step]);
if(object != NULL)
{
cJSON_AddBoolToObject(object, "need_filter", true);
}
}
}
if(element_treatment != NULL && !strcasecmp(element_treatment, "remove"))
@@ -109,14 +118,14 @@ int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, int *
}
if(a_element->type == cJSON_Array)
{
cJSON_DeleteItemFromArray(a_element, step_level[*step]);
cJSON_DeleteItemFromArray(a_element, step_array_level[*step]);
}
}
return 0;
}
int construct_html_by_treatment(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
int construct_html_by_treatment(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
{
int k=0;
char *new_out=NULL;
@@ -210,7 +219,7 @@ int construct_html_by_treatment(const struct element_rule * rules, xmlNodePtr no
return 0;
}
int cjson_dump_array(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop)
int cjson_dump_array(cJSON *a, int *depth, int *step, int *step_array_level, char **node, const struct edit_element_rule * rules, int *match_num, int loop)
{
int xret=0, array_cnt=0;
@@ -218,18 +227,19 @@ int cjson_dump_array(cJSON *a, int *depth, int *step, int *step_level, char **no
*step= *step + 1;
for (; (a_element != NULL);)
{
xret = cjson_element_foreach(a_element, depth, step, step_level, node, rules, match_num, loop);
xret = cjson_element_foreach(a_element, depth, step, step_array_level, node, rules, match_num, loop);
if(xret == -1)
{
return -1;
}
if(*depth == 0)
{
construct_cjson_by_treatment(a_element, node, step, step_level, rules);
construct_cjson_by_treatment(a_element, node, step, step_array_level, rules);
}
if(xret == 1)
{
step_level[*step] = array_cnt;
*step = (*step >= 2047) ? 2047 : *step;
step_array_level[*step] = array_cnt;
*node = a_element->string;
*depth = *depth -1;
return 1;
@@ -241,21 +251,21 @@ int cjson_dump_array(cJSON *a, int *depth, int *step, int *step_level, char **no
return xret;
}
int cjson_dump_object(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop)
int cjson_dump_object(cJSON *a, int *depth, int *step, int *step_array_level, char **node, const struct edit_element_rule * rules, int *match_num, int loop)
{
int xret=0;
cJSON *a_element=NULL;
cJSON_ArrayForEach(a_element, a)
{
xret = cjson_element_foreach(a_element, depth, step, step_level, node, rules, match_num, loop);
xret = cjson_element_foreach(a_element, depth, step, step_array_level, node, rules, match_num, loop);
if(xret == -1)
{
return -1;
}
if(*depth == 0)
{
construct_cjson_by_treatment(a_element, node, step, step_level, rules);
construct_cjson_by_treatment(a_element, node, step, step_array_level, rules);
}
if(xret == 1)
{
@@ -267,7 +277,7 @@ int cjson_dump_object(cJSON *a, int *depth, int *step, int *step_level, char **n
return xret;
}
int cjson_dump_string(cJSON *a, int *depth, const struct element_rule * rules, int *match_num, int loop)
int cjson_dump_string(cJSON *a, int *depth, const struct edit_element_rule * rules, int *match_num, int loop)
{
int xret=0;
@@ -296,7 +306,7 @@ finish:
return xret;
}
int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop)
int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_array_level, char **node, const struct edit_element_rule * rules, int *match_num, int loop)
{
if ((a == NULL) || cJSON_IsInvalid(a))
{
@@ -310,10 +320,10 @@ int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_level, char
return cjson_dump_string(a, depth, rules, match_num, loop);
case cJSON_Array:
return cjson_dump_array(a, depth, step, step_level, node, rules, match_num, loop);
return cjson_dump_array(a, depth, step, step_array_level, node, rules, match_num, loop);
case cJSON_Object:
return cjson_dump_object(a, depth, step, step_level, node, rules, match_num, loop);
return cjson_dump_object(a, depth, step, step_array_level, node, rules, match_num, loop);
case cJSON_Number:
case cJSON_False:
@@ -335,7 +345,7 @@ static void html_namespace_list(xmlNsPtr ns)
}
}
static void html_attr_list(const struct element_rule * rules, xmlAttrPtr attr, xmlNodePtr *parent_array, size_t *n_parent, int *match)
static void html_attr_list(const struct edit_element_rule * rules, xmlAttrPtr attr, xmlNodePtr *parent_array, size_t *n_parent, int *match)
{
while (attr != NULL)
{
@@ -348,7 +358,7 @@ static void html_attr_list(const struct element_rule * rules, xmlAttrPtr attr, x
}
}
static void html_dump_one_node(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
static void html_dump_one_node(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
{
switch (node->type)
{
@@ -387,7 +397,7 @@ static void html_dump_one_node(const struct element_rule * rules, xmlNodePtr nod
}
}
static void html_dump_node(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
static void html_dump_node(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
{
html_dump_one_node(rules, node, parent_array, n_parent, match);
if ((node->type != XML_NAMESPACE_DECL) && (node->children != NULL) && (node->type != XML_ENTITY_REF_NODE))
@@ -396,7 +406,7 @@ static void html_dump_node(const struct element_rule * rules, xmlNodePtr node, x
}
}
static void html_node_list(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
static void html_node_list(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
{
while (node != NULL)
{
@@ -405,7 +415,7 @@ static void html_node_list(const struct element_rule * rules, xmlNodePtr node, x
}
}
static void html_element_foreach(const struct element_rule * rules, xmlDocPtr doc, xmlNodePtr *parent_array, size_t *n_parent, int *match)
static void html_element_foreach(const struct edit_element_rule * rules, xmlDocPtr doc, xmlNodePtr *parent_array, size_t *n_parent, int *match)
{
if (((doc->type == XML_DOCUMENT_NODE) || (doc->type == XML_HTML_DOCUMENT_NODE)) && (doc->children != NULL))
{
@@ -413,7 +423,7 @@ static void html_element_foreach(const struct element_rule * rules, xmlDocPtr do
}
}
size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out)
{
int match_num_peer=0;
int step=0, depth=0, match_num=0,i=0, match=0;
@@ -422,7 +432,7 @@ size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct
size_t outlen=0;
char *element_treatment=NULL;
int step_level[2048] = {0};
int step_array_level[2048] = {0};
interator = cJSON_Parse(in);
if(interator==NULL)
@@ -433,15 +443,15 @@ size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct
depth = -1;
element_treatment=rules->element_treatment;
cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, 0);
cjson_element_foreach(interator, &depth, &step, step_array_level, &node, rules, &match_num, 0);
match_num_peer = match_num;
for(i=0; i< match_num_peer; i++)
{
depth = (rules->distane_from_matching + 1);
step=0; node=NULL; match_num=0;
memset(step_level, 0, sizeof(step_level));
memset(step_array_level, 0, sizeof(step_array_level));
match |= cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, i);
match |= cjson_element_foreach(interator, &depth, &step, step_array_level, &node, rules, &match_num, i);
if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0)
{
@@ -455,6 +465,15 @@ size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct
{
cJSON_AddBoolToObject(interator, "need_check", true);
}
if(interator->type==cJSON_Array)
{
cJSON *child = interator->child;
for (; (child != NULL);)
{
cJSON_AddBoolToObject(child, "need_check", true);
child = child->next;
}
}
}
new_out = cJSON_PrintUnformatted(interator);
@@ -470,7 +489,7 @@ finish:
return outlen;
}
size_t format_json_file_type(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
size_t format_json_file_type(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out)
{
int match_num_peer=0;
int step=0, depth=0, match=0, i=0;
@@ -479,7 +498,7 @@ size_t format_json_file_type(const char * in, size_t in_sz, const struct element
size_t outlen=0; int match_num=0;
char *element_treatment=NULL;
int step_level[2048] = {0};
int step_array_level[2048] = {0};
char*new_in = ALLOC(char, in_sz+1);
memcpy(new_in, in, in_sz);
@@ -495,15 +514,15 @@ size_t format_json_file_type(const char * in, size_t in_sz, const struct element
/*When the node has inclusion relation, cjson is not null when deleted
So multiple loops delete **/
cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, 0);
cjson_element_foreach(interator, &depth, &step, step_array_level, &node, rules, &match_num, 0);
match_num_peer = match_num;
for(i=0; i< match_num_peer; i++)
{
depth = (rules->distane_from_matching + 1);
step=0; node=NULL; match_num=0;
memset(step_level, 0, sizeof(step_level));
memset(step_array_level, 0, sizeof(step_array_level));
match |= cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, i);
match |= cjson_element_foreach(interator, &depth, &step, step_array_level, &node, rules, &match_num, i);
if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0)
{
@@ -524,9 +543,19 @@ size_t format_json_file_type(const char * in, size_t in_sz, const struct element
{
cJSON_AddBoolToObject(interator, "need_check", true);
}
if(interator->type==cJSON_Array)
{
cJSON *child = interator->child;
for (; (child != NULL);)
{
cJSON_AddBoolToObject(child, "need_check", true);
child = child->next;
}
}
}
new_out = cJSON_Print(interator);
new_out = cJSON_PrintUnformatted(interator);
if(new_out!=NULL)
{
*out = new_out;
@@ -540,7 +569,7 @@ finish:
return outlen;
}
size_t format_multidelete_json_type(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
size_t format_multidelete_json_type(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out)
{
char *new_out=NULL, *pre_out=NULL;
char * tmp = ALLOC(char, in_sz+1);
@@ -596,7 +625,7 @@ size_t construct_format_html(htmlDocPtr doc, char**out)
goto finish;
}
saveCtxtPtr = xmlSaveToBuffer(out_buffer, "UTF-8", XML_SAVE_NO_DECL);
saveCtxtPtr = xmlSaveToBuffer(out_buffer, "UTF-8", XML_SAVE_NO_DECL | XML_SAVE_AS_HTML);
if (xmlSaveDoc(saveCtxtPtr, doc) < 0)
{
goto finish;
@@ -622,15 +651,14 @@ finish:
return outlen;
}
size_t format_input_html(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
size_t format_input_html(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out)
{
size_t outlen=0, n_parent=0, n_parent_peer=0;
int match=0, i=0;
htmlDocPtr doc = NULL;
const char *element_treatment=NULL;
xmlNodePtr parent_array[16];
int options = XML_PARSE_NOERROR;
int options = XML_PARSE_NOERROR | HTML_PARSE_NODEFDTD;
doc = htmlReadMemory(in, in_sz, NULL, NULL, options);
if (doc == NULL)
@@ -669,6 +697,10 @@ size_t format_input_html(const char * in, size_t in_sz, const struct element_rul
{
xmlNewProp(doc->children->next, (const xmlChar *)"need_check", (const xmlChar *)"true");
}
else if(doc->children != NULL)
{
xmlNewProp(doc->children, (const xmlChar *)"need_check", (const xmlChar *)"true");
}
}
outlen = construct_format_html(doc, out);
@@ -685,7 +717,7 @@ finish:
return outlen;
}
size_t format_html_file_type(const char * interator, size_t interator_sz, const struct element_rule *rule, char **new_out)
size_t format_html_file_type(const char * interator, size_t interator_sz, const struct edit_element_rule *rule, char **new_out)
{
size_t output_size=0;
@@ -701,7 +733,7 @@ size_t format_html_file_type(const char * interator, size_t interator_sz, const
return output_size;
}
size_t parse_string(const char * interator, size_t interator_sz, const struct element_rule *rule, char **new_out, int options)
size_t parse_string(const char * interator, size_t interator_sz, const struct edit_element_rule *rule, char **new_out, int options)
{
size_t output_size=0;
@@ -717,9 +749,9 @@ size_t parse_string(const char * interator, size_t interator_sz, const struct el
return output_size;
}
size_t execute_edit_element_rule(const char * in, size_t in_sz, const struct element_rule *rules, size_t n_rule, char** out, int options)
size_t execute_edit_element_rule(const char * in, size_t in_sz, const struct edit_element_rule *rules, size_t n_rule, char** out, int options)
{
const struct element_rule * todo = rules;
const struct edit_element_rule * todo = rules;
size_t i = 0, interator_sz=0, pre_out_sz=0;
const char * interator = NULL;
char* new_out = NULL, * pre_out = NULL;
@@ -762,3 +794,82 @@ size_t execute_edit_element_rule(const char * in, size_t in_sz, const struct ele
}
}
size_t __attribute__((__unused__))
format_edit_element_rule(struct edit_element_rule *edit_element, const char *user_region, size_t n_edit_element)
{
size_t idx=0;
cJSON *json=NULL, *rules=NULL, *item=NULL, *sub_item=NULL;
json=cJSON_Parse(user_region);
if(json !=NULL )
{
rules = cJSON_GetObjectItem(json, "rules");
if(rules == NULL)
{
goto finish;
}
idx = 0;
for (item = rules->child; item != NULL; item = item->next)
{
sub_item=cJSON_GetObjectItem(item,"anchor_element");
if(sub_item != NULL && sub_item->type ==cJSON_Object)
{
char * search_scope = cJSON_GetObjectItem(sub_item , "search_scope")->valuestring;
if (search_scope == NULL) break;
edit_element[idx].scope = scope_name_to_id(search_scope);
if (edit_element[idx].scope == KScopeMax)
{
break;
}
if(edit_element[idx].scope == kScopeInside)
{
edit_element[idx].start_indicator = tfe_strdup(cJSON_GetObjectItem(sub_item , "start_indicator")->valuestring);
}
edit_element[idx].contained_keyword = tfe_strdup(cJSON_GetObjectItem(sub_item,"contained_keyword")->valuestring);
}
sub_item=cJSON_GetObjectItem(item,"target_element");
if(sub_item != NULL && sub_item->type ==cJSON_Object)
{
edit_element[idx].distane_from_matching = cJSON_GetObjectItem(sub_item , "target_distance_from_matching")->valueint;
edit_element[idx].element_treatment = tfe_strdup(cJSON_GetObjectItem(sub_item,"element_treatment")->valuestring);
}
if (idx == n_edit_element)
{
break;
}
idx++;
}
}
finish:
if (json) cJSON_Delete(json);
return idx;
}
void simple_edit_element(const char *user_region, const char* input, size_t in_sz, char** output, size_t *output_sz, int options)
{
size_t n_got_rule=0, i=0;
struct edit_element_rule rules[16];
memset(rules, 0, sizeof(struct edit_element_rule)*16);
n_got_rule=format_edit_element_rule(rules, user_region, sizeof(rules)/sizeof(rules[0]));
*output_sz=execute_edit_element_rule(input, strlen(input), rules, n_got_rule, output, options);
for(i=0; i<n_got_rule; i++)
{
if(rules[i].start_indicator!=NULL)
{
FREE(&(rules[i].start_indicator));
}
FREE(&(rules[i].element_treatment));
FREE(&(rules[i].contained_keyword));
}
return;
}

View File

@@ -8,7 +8,7 @@ enum search_scope
KScopeMax
};
struct element_rule
struct edit_element_rule
{
enum search_scope scope;
int distane_from_matching;
@@ -17,6 +17,8 @@ struct element_rule
char * contained_keyword;
};
size_t execute_edit_element_rule(const char * in, size_t in_sz, const struct element_rule *rules, size_t n_rule, char** out, int options);
size_t execute_edit_element_rule(const char * in, size_t in_sz, const struct edit_element_rule *rules, size_t n_rule, char** out, int options);
enum search_scope scope_name_to_id(const char * name);
void simple_edit_element(const char *user_region, const char* input, size_t in_sz, char** output, size_t *output_sz, int options);

View File

@@ -1,5 +1,5 @@
#include "pangu_logger.h"
#include "pangu_element_edit.h"
#include "edit_element.h"
#include "pattern_replace.h"
#include "pangu_web_cache.h"
@@ -126,7 +126,7 @@ struct policy_action_param
struct replace_rule *repl_rule;
size_t e_rule;
struct element_rule *elem_rule;
struct edit_element_rule *elem_rule;
pthread_mutex_t lock;
};
@@ -486,7 +486,7 @@ void policy_action_param_new(int idx, const struct Maat_rule_t* rule, const char
break;
}
rule_id = 0;
param->elem_rule = ALLOC(struct element_rule, MAX_EDIT_ZONE_NUM);
param->elem_rule = ALLOC(struct edit_element_rule, MAX_EDIT_ZONE_NUM);
for (item = rules->child; item != NULL; item = item->next)
{
sub_item=cJSON_GetObjectItem(item,"anchor_element");
@@ -995,7 +995,7 @@ struct insert_ctx
struct edit_element_ctx
{
struct element_rule *item;
struct edit_element_rule *item;
size_t n_item;
struct tfe_http_half * editing;
struct evbuffer *http_body;

View File

@@ -0,0 +1,142 @@
#include <tfe_utils.h>
#include "edit_element.h"
#include <sys/types.h>
#include <sys/ioctl.h>
#include <string.h>
#include <stdio.h>
#include <gtest/gtest.h>
TEST(EditElement, Libxml_Whole_Remove_Index01)
{
char* output=NULL;
size_t output_sz=0;
const char *input = "<html>\n\n<head>\n<title>index</title>\n</head>\n\n<body>\n<p>body</p>\n<p>hello world</p>\n<a href=\"edit.html\">跳转</a>\n<script onerror=\"mimgError()\" src=\"https://mimg.127.net/p/freemail/lib/polyfill/es5-polyfill.js\"></script>\n</body>\n\n</html>\n";
const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"whole_file\",\"contained_keyword\":\"hello world\"},\
\"target_element\":{\"target_distance_from_matching\":0,\"element_treatment\":\"remove\"}}]}";
simple_edit_element(user_region, input, strlen(input), &output, &output_sz, 0);
EXPECT_TRUE(output_sz>0);
printf("output = %s\n", output);
EXPECT_TRUE(NULL==strstr(output, "hello world"));
free(output);
}
TEST(EditElement, Libxml_Whole_Mark_Index01)
{
char* output=NULL;
size_t output_sz=0;
const char *input = "<html>\n\n<head>\n<title>index</title>\n</head>\n\n<body>\n<p>body</p>\n<p>hello world</p>\n<a href=\"edit.html\">跳转</a>\n<script onerror=\"mimgError()\" src=\"https://mimg.127.net/p/freemail/lib/polyfill/es5-polyfill.js\"></script>\n</body>\n\n</html>\n";
const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"whole_file\",\"contained_keyword\":\"hello world\"},\
\"target_element\":{\"target_distance_from_matching\":0,\"element_treatment\":\"mark\"}}]}";
simple_edit_element(user_region, input, strlen(input), &output, &output_sz, 0);
EXPECT_TRUE(output_sz>0);
const char *expect_output = "<html need_check=\"true\">\n\n<head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n<title>index</title>\n</head>\n\n<body>\n<p>body</p>\n<p need_filter=\"true\">hello world</p>\n<a href=\"edit.html\">跳转</a>\n<script onerror=\"mimgError()\" src=\"https://mimg.127.net/p/freemail/lib/polyfill/es5-polyfill.js\"></script>\n</body>\n\n</html>\n";
printf("output = %s\n", output);
EXPECT_TRUE(0==strcmp(output, expect_output));
free(output);
}
TEST(EditElement, Libxml_Inside_Remove_Index01)
{
char* output=NULL;
size_t output_sz=0;
const char *input = "<html>\n\n<head>\n<title>index</title>\n</head>\n\n<body>\n<p>body</p>\n<p class=\"LC20lb\">hello world</p>\n<a href=\"edit.html\">\
跳转</a>\n<script onerror=\"mimgError()\" src=\"https://mimg.127.net/p/freemail/lib/polyfill/es5-polyfill.js\"></script>\n</body>\n\n</html>\n";
const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"inside_element\",\"start_indicator\":\"LC20lb\",\"contained_keyword\":\"hello world\"},\
\"target_element\":{\"target_distance_from_matching\":0,\"element_treatment\":\"remove\"}}]}";
simple_edit_element(user_region, input, strlen(input), &output, &output_sz, 0);
EXPECT_TRUE(output_sz>0);
printf("output = %s\n", output);
EXPECT_TRUE(NULL==strstr(output, "hello world"));
free(output);
}
TEST(EditElement, Cjson_Whole_Remove_Simple)
{
char* output=NULL;
size_t output_sz=0;
const char *input = "{\"testkey\":\"value\",\"verifyList\":[{\"policyType\":\"tsg_security\",\"verifySession\":{\"attributes\":[{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"192.168.50.37\"}},{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"2.5.6.7\"}},{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"192.168.50.37\"}}]}}]}";
const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"whole_file\",\"contained_keyword\":\"192.168.50.37\"},\
\"target_element\":{\"target_distance_from_matching\":2,\"element_treatment\":\"remove\"}}]}";
simple_edit_element(user_region, input, strlen(input), &output, &output_sz, 0);
EXPECT_TRUE(output_sz>0);
printf("output = %s\n", output);
EXPECT_TRUE(NULL==strstr(output, "192.168.50.37"));
free(output);
}
TEST(EditElement, Cjson_Whole_mark_Simple)
{
char* output=NULL;
size_t output_sz=0;
const char *input = "{\"testkey\":\"value\",\"verifyList\":[{\"policyType\":\"tsg_security\",\"verifySession\":{\"attributes\":[{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"192.168.50.37\"}},{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"2.5.6.7\"}},{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"192.168.50.37\"}}]}}]}";
const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"whole_file\",\"contained_keyword\":\"192.168.50.37\"},\
\"target_element\":{\"target_distance_from_matching\":2,\"element_treatment\":\"mark\"}}]}";
simple_edit_element(user_region, input, strlen(input), &output, &output_sz, 0);
EXPECT_TRUE(output_sz>0);
const char *expect_output = "{\"testkey\":\"value\",\"verifyList\":[{\"policyType\":\"tsg_security\",\"verifySession\":{\"attributes\":[{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"192.168.50.37\"},\"need_filter\":true},{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"2.5.6.7\"}},{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"192.168.50.37\"},\"need_filter\":true}]}}],\"need_check\":true}";
printf("output = %s\n", output);
EXPECT_TRUE(0==strcmp(output, expect_output));
free(output);
}
TEST(EditElement, Libxml_Whole_Remove_Facebook)
{
char* output=NULL;
size_t output_sz=0,input_len=0;
const char* filename="./test_data/facebook_index.html";
FILE* fp=NULL;
struct stat file_info;
stat(filename, &file_info);
size_t input_sz=file_info.st_size;
fp=fopen(filename,"r");
ASSERT_FALSE(fp==NULL);
if(fp==NULL)
{
return;
}
char* input=(char*)malloc(input_sz);
fread(input,1,input_sz,fp);
fclose(fp);
const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"inside_element\",\"start_indicator\":\"_2t-a _4pmj _2t-d\",\"contained_keyword\":\"Facebook\"},\
\"target_element\":{\"target_distance_from_matching\":5,\"element_treatment\":\"remove\"}}]}";
simple_edit_element(user_region, input, input_len, &output, &output_sz, 0);
EXPECT_TRUE(output_sz>0);
EXPECT_TRUE(NULL==strstr(output, "_2t-a _4pmj _2t-d"));
free(output);
}
int main(int argc, char ** argv)
{
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}