TSG-8935 修复JSON数组格式MARK标记问题
增加元素编辑自测试用例,文件名和变量变更
This commit is contained in:
@@ -10,11 +10,11 @@
|
||||
#include <libxml/HTMLparser.h>
|
||||
#include <cjson/cJSON.h>
|
||||
|
||||
#include "pangu_element_edit.h"
|
||||
#include "edit_element.h"
|
||||
|
||||
int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop);
|
||||
static void html_node_list(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match);
|
||||
size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct element_rule * rules, char** out);
|
||||
int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct edit_element_rule * rules, int *match_num, int loop);
|
||||
static void html_node_list(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match);
|
||||
size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out);
|
||||
|
||||
enum search_scope scope_name_to_id(const char * name)
|
||||
{
|
||||
@@ -52,7 +52,7 @@ int match_start_indicator(xmlNodePtr parent, char * start_indicator)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, int *step_level, const struct element_rule * rules)
|
||||
int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, int *step_array_level, const struct edit_element_rule * rules)
|
||||
{
|
||||
const char *element_treatment=rules->element_treatment;
|
||||
char * start_indicator = rules->start_indicator;
|
||||
@@ -81,6 +81,15 @@ int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, int *
|
||||
{
|
||||
cJSON_AddBoolToObject(a_element, "need_filter", true);
|
||||
}
|
||||
if(a_element->type == cJSON_Array)
|
||||
{
|
||||
cJSON *object = NULL;
|
||||
object = cJSON_GetArrayItem(a_element, step_array_level[*step]);
|
||||
if(object != NULL)
|
||||
{
|
||||
cJSON_AddBoolToObject(object, "need_filter", true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(element_treatment != NULL && !strcasecmp(element_treatment, "remove"))
|
||||
@@ -109,14 +118,14 @@ int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, int *
|
||||
}
|
||||
if(a_element->type == cJSON_Array)
|
||||
{
|
||||
cJSON_DeleteItemFromArray(a_element, step_level[*step]);
|
||||
cJSON_DeleteItemFromArray(a_element, step_array_level[*step]);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int construct_html_by_treatment(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
int construct_html_by_treatment(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
{
|
||||
int k=0;
|
||||
char *new_out=NULL;
|
||||
@@ -210,7 +219,7 @@ int construct_html_by_treatment(const struct element_rule * rules, xmlNodePtr no
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cjson_dump_array(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop)
|
||||
int cjson_dump_array(cJSON *a, int *depth, int *step, int *step_array_level, char **node, const struct edit_element_rule * rules, int *match_num, int loop)
|
||||
{
|
||||
int xret=0, array_cnt=0;
|
||||
|
||||
@@ -218,18 +227,19 @@ int cjson_dump_array(cJSON *a, int *depth, int *step, int *step_level, char **no
|
||||
*step= *step + 1;
|
||||
for (; (a_element != NULL);)
|
||||
{
|
||||
xret = cjson_element_foreach(a_element, depth, step, step_level, node, rules, match_num, loop);
|
||||
xret = cjson_element_foreach(a_element, depth, step, step_array_level, node, rules, match_num, loop);
|
||||
if(xret == -1)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
if(*depth == 0)
|
||||
{
|
||||
construct_cjson_by_treatment(a_element, node, step, step_level, rules);
|
||||
construct_cjson_by_treatment(a_element, node, step, step_array_level, rules);
|
||||
}
|
||||
if(xret == 1)
|
||||
{
|
||||
step_level[*step] = array_cnt;
|
||||
*step = (*step >= 2047) ? 2047 : *step;
|
||||
step_array_level[*step] = array_cnt;
|
||||
*node = a_element->string;
|
||||
*depth = *depth -1;
|
||||
return 1;
|
||||
@@ -241,21 +251,21 @@ int cjson_dump_array(cJSON *a, int *depth, int *step, int *step_level, char **no
|
||||
return xret;
|
||||
}
|
||||
|
||||
int cjson_dump_object(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop)
|
||||
int cjson_dump_object(cJSON *a, int *depth, int *step, int *step_array_level, char **node, const struct edit_element_rule * rules, int *match_num, int loop)
|
||||
{
|
||||
int xret=0;
|
||||
cJSON *a_element=NULL;
|
||||
|
||||
cJSON_ArrayForEach(a_element, a)
|
||||
{
|
||||
xret = cjson_element_foreach(a_element, depth, step, step_level, node, rules, match_num, loop);
|
||||
xret = cjson_element_foreach(a_element, depth, step, step_array_level, node, rules, match_num, loop);
|
||||
if(xret == -1)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
if(*depth == 0)
|
||||
{
|
||||
construct_cjson_by_treatment(a_element, node, step, step_level, rules);
|
||||
construct_cjson_by_treatment(a_element, node, step, step_array_level, rules);
|
||||
}
|
||||
if(xret == 1)
|
||||
{
|
||||
@@ -267,7 +277,7 @@ int cjson_dump_object(cJSON *a, int *depth, int *step, int *step_level, char **n
|
||||
return xret;
|
||||
}
|
||||
|
||||
int cjson_dump_string(cJSON *a, int *depth, const struct element_rule * rules, int *match_num, int loop)
|
||||
int cjson_dump_string(cJSON *a, int *depth, const struct edit_element_rule * rules, int *match_num, int loop)
|
||||
{
|
||||
int xret=0;
|
||||
|
||||
@@ -296,7 +306,7 @@ finish:
|
||||
return xret;
|
||||
}
|
||||
|
||||
int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop)
|
||||
int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_array_level, char **node, const struct edit_element_rule * rules, int *match_num, int loop)
|
||||
{
|
||||
if ((a == NULL) || cJSON_IsInvalid(a))
|
||||
{
|
||||
@@ -310,10 +320,10 @@ int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_level, char
|
||||
return cjson_dump_string(a, depth, rules, match_num, loop);
|
||||
|
||||
case cJSON_Array:
|
||||
return cjson_dump_array(a, depth, step, step_level, node, rules, match_num, loop);
|
||||
return cjson_dump_array(a, depth, step, step_array_level, node, rules, match_num, loop);
|
||||
|
||||
case cJSON_Object:
|
||||
return cjson_dump_object(a, depth, step, step_level, node, rules, match_num, loop);
|
||||
return cjson_dump_object(a, depth, step, step_array_level, node, rules, match_num, loop);
|
||||
|
||||
case cJSON_Number:
|
||||
case cJSON_False:
|
||||
@@ -335,7 +345,7 @@ static void html_namespace_list(xmlNsPtr ns)
|
||||
}
|
||||
}
|
||||
|
||||
static void html_attr_list(const struct element_rule * rules, xmlAttrPtr attr, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
static void html_attr_list(const struct edit_element_rule * rules, xmlAttrPtr attr, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
{
|
||||
while (attr != NULL)
|
||||
{
|
||||
@@ -348,7 +358,7 @@ static void html_attr_list(const struct element_rule * rules, xmlAttrPtr attr, x
|
||||
}
|
||||
}
|
||||
|
||||
static void html_dump_one_node(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
static void html_dump_one_node(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
{
|
||||
switch (node->type)
|
||||
{
|
||||
@@ -387,7 +397,7 @@ static void html_dump_one_node(const struct element_rule * rules, xmlNodePtr nod
|
||||
}
|
||||
}
|
||||
|
||||
static void html_dump_node(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
static void html_dump_node(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
{
|
||||
html_dump_one_node(rules, node, parent_array, n_parent, match);
|
||||
if ((node->type != XML_NAMESPACE_DECL) && (node->children != NULL) && (node->type != XML_ENTITY_REF_NODE))
|
||||
@@ -396,7 +406,7 @@ static void html_dump_node(const struct element_rule * rules, xmlNodePtr node, x
|
||||
}
|
||||
}
|
||||
|
||||
static void html_node_list(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
static void html_node_list(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
{
|
||||
while (node != NULL)
|
||||
{
|
||||
@@ -405,7 +415,7 @@ static void html_node_list(const struct element_rule * rules, xmlNodePtr node, x
|
||||
}
|
||||
}
|
||||
|
||||
static void html_element_foreach(const struct element_rule * rules, xmlDocPtr doc, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
static void html_element_foreach(const struct edit_element_rule * rules, xmlDocPtr doc, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
{
|
||||
if (((doc->type == XML_DOCUMENT_NODE) || (doc->type == XML_HTML_DOCUMENT_NODE)) && (doc->children != NULL))
|
||||
{
|
||||
@@ -413,7 +423,7 @@ static void html_element_foreach(const struct element_rule * rules, xmlDocPtr do
|
||||
}
|
||||
}
|
||||
|
||||
size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
|
||||
size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out)
|
||||
{
|
||||
int match_num_peer=0;
|
||||
int step=0, depth=0, match_num=0,i=0, match=0;
|
||||
@@ -422,7 +432,7 @@ size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct
|
||||
size_t outlen=0;
|
||||
char *element_treatment=NULL;
|
||||
|
||||
int step_level[2048] = {0};
|
||||
int step_array_level[2048] = {0};
|
||||
|
||||
interator = cJSON_Parse(in);
|
||||
if(interator==NULL)
|
||||
@@ -433,15 +443,15 @@ size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct
|
||||
depth = -1;
|
||||
element_treatment=rules->element_treatment;
|
||||
|
||||
cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, 0);
|
||||
cjson_element_foreach(interator, &depth, &step, step_array_level, &node, rules, &match_num, 0);
|
||||
match_num_peer = match_num;
|
||||
for(i=0; i< match_num_peer; i++)
|
||||
{
|
||||
depth = (rules->distane_from_matching + 1);
|
||||
step=0; node=NULL; match_num=0;
|
||||
memset(step_level, 0, sizeof(step_level));
|
||||
memset(step_array_level, 0, sizeof(step_array_level));
|
||||
|
||||
match |= cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, i);
|
||||
match |= cjson_element_foreach(interator, &depth, &step, step_array_level, &node, rules, &match_num, i);
|
||||
|
||||
if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0)
|
||||
{
|
||||
@@ -455,6 +465,15 @@ size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct
|
||||
{
|
||||
cJSON_AddBoolToObject(interator, "need_check", true);
|
||||
}
|
||||
if(interator->type==cJSON_Array)
|
||||
{
|
||||
cJSON *child = interator->child;
|
||||
for (; (child != NULL);)
|
||||
{
|
||||
cJSON_AddBoolToObject(child, "need_check", true);
|
||||
child = child->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
new_out = cJSON_PrintUnformatted(interator);
|
||||
@@ -470,7 +489,7 @@ finish:
|
||||
return outlen;
|
||||
}
|
||||
|
||||
size_t format_json_file_type(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
|
||||
size_t format_json_file_type(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out)
|
||||
{
|
||||
int match_num_peer=0;
|
||||
int step=0, depth=0, match=0, i=0;
|
||||
@@ -479,7 +498,7 @@ size_t format_json_file_type(const char * in, size_t in_sz, const struct element
|
||||
size_t outlen=0; int match_num=0;
|
||||
char *element_treatment=NULL;
|
||||
|
||||
int step_level[2048] = {0};
|
||||
int step_array_level[2048] = {0};
|
||||
|
||||
char*new_in = ALLOC(char, in_sz+1);
|
||||
memcpy(new_in, in, in_sz);
|
||||
@@ -495,15 +514,15 @@ size_t format_json_file_type(const char * in, size_t in_sz, const struct element
|
||||
|
||||
/*When the node has inclusion relation, cjson is not null when deleted
|
||||
So multiple loops delete **/
|
||||
cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, 0);
|
||||
cjson_element_foreach(interator, &depth, &step, step_array_level, &node, rules, &match_num, 0);
|
||||
match_num_peer = match_num;
|
||||
for(i=0; i< match_num_peer; i++)
|
||||
{
|
||||
depth = (rules->distane_from_matching + 1);
|
||||
step=0; node=NULL; match_num=0;
|
||||
memset(step_level, 0, sizeof(step_level));
|
||||
memset(step_array_level, 0, sizeof(step_array_level));
|
||||
|
||||
match |= cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, i);
|
||||
match |= cjson_element_foreach(interator, &depth, &step, step_array_level, &node, rules, &match_num, i);
|
||||
|
||||
if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0)
|
||||
{
|
||||
@@ -524,9 +543,19 @@ size_t format_json_file_type(const char * in, size_t in_sz, const struct element
|
||||
{
|
||||
cJSON_AddBoolToObject(interator, "need_check", true);
|
||||
}
|
||||
|
||||
if(interator->type==cJSON_Array)
|
||||
{
|
||||
cJSON *child = interator->child;
|
||||
for (; (child != NULL);)
|
||||
{
|
||||
cJSON_AddBoolToObject(child, "need_check", true);
|
||||
child = child->next;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
new_out = cJSON_Print(interator);
|
||||
new_out = cJSON_PrintUnformatted(interator);
|
||||
if(new_out!=NULL)
|
||||
{
|
||||
*out = new_out;
|
||||
@@ -540,7 +569,7 @@ finish:
|
||||
return outlen;
|
||||
}
|
||||
|
||||
size_t format_multidelete_json_type(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
|
||||
size_t format_multidelete_json_type(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out)
|
||||
{
|
||||
char *new_out=NULL, *pre_out=NULL;
|
||||
char * tmp = ALLOC(char, in_sz+1);
|
||||
@@ -596,7 +625,7 @@ size_t construct_format_html(htmlDocPtr doc, char**out)
|
||||
goto finish;
|
||||
}
|
||||
|
||||
saveCtxtPtr = xmlSaveToBuffer(out_buffer, "UTF-8", XML_SAVE_NO_DECL);
|
||||
saveCtxtPtr = xmlSaveToBuffer(out_buffer, "UTF-8", XML_SAVE_NO_DECL | XML_SAVE_AS_HTML);
|
||||
if (xmlSaveDoc(saveCtxtPtr, doc) < 0)
|
||||
{
|
||||
goto finish;
|
||||
@@ -622,15 +651,14 @@ finish:
|
||||
return outlen;
|
||||
}
|
||||
|
||||
size_t format_input_html(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
|
||||
size_t format_input_html(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out)
|
||||
{
|
||||
size_t outlen=0, n_parent=0, n_parent_peer=0;
|
||||
int match=0, i=0;
|
||||
htmlDocPtr doc = NULL;
|
||||
const char *element_treatment=NULL;
|
||||
xmlNodePtr parent_array[16];
|
||||
|
||||
int options = XML_PARSE_NOERROR;
|
||||
int options = XML_PARSE_NOERROR | HTML_PARSE_NODEFDTD;
|
||||
|
||||
doc = htmlReadMemory(in, in_sz, NULL, NULL, options);
|
||||
if (doc == NULL)
|
||||
@@ -669,6 +697,10 @@ size_t format_input_html(const char * in, size_t in_sz, const struct element_rul
|
||||
{
|
||||
xmlNewProp(doc->children->next, (const xmlChar *)"need_check", (const xmlChar *)"true");
|
||||
}
|
||||
else if(doc->children != NULL)
|
||||
{
|
||||
xmlNewProp(doc->children, (const xmlChar *)"need_check", (const xmlChar *)"true");
|
||||
}
|
||||
}
|
||||
|
||||
outlen = construct_format_html(doc, out);
|
||||
@@ -685,7 +717,7 @@ finish:
|
||||
return outlen;
|
||||
}
|
||||
|
||||
size_t format_html_file_type(const char * interator, size_t interator_sz, const struct element_rule *rule, char **new_out)
|
||||
size_t format_html_file_type(const char * interator, size_t interator_sz, const struct edit_element_rule *rule, char **new_out)
|
||||
{
|
||||
size_t output_size=0;
|
||||
|
||||
@@ -701,7 +733,7 @@ size_t format_html_file_type(const char * interator, size_t interator_sz, const
|
||||
return output_size;
|
||||
}
|
||||
|
||||
size_t parse_string(const char * interator, size_t interator_sz, const struct element_rule *rule, char **new_out, int options)
|
||||
size_t parse_string(const char * interator, size_t interator_sz, const struct edit_element_rule *rule, char **new_out, int options)
|
||||
{
|
||||
size_t output_size=0;
|
||||
|
||||
@@ -717,9 +749,9 @@ size_t parse_string(const char * interator, size_t interator_sz, const struct el
|
||||
return output_size;
|
||||
}
|
||||
|
||||
size_t execute_edit_element_rule(const char * in, size_t in_sz, const struct element_rule *rules, size_t n_rule, char** out, int options)
|
||||
size_t execute_edit_element_rule(const char * in, size_t in_sz, const struct edit_element_rule *rules, size_t n_rule, char** out, int options)
|
||||
{
|
||||
const struct element_rule * todo = rules;
|
||||
const struct edit_element_rule * todo = rules;
|
||||
size_t i = 0, interator_sz=0, pre_out_sz=0;
|
||||
const char * interator = NULL;
|
||||
char* new_out = NULL, * pre_out = NULL;
|
||||
@@ -762,3 +794,82 @@ size_t execute_edit_element_rule(const char * in, size_t in_sz, const struct ele
|
||||
}
|
||||
}
|
||||
|
||||
size_t __attribute__((__unused__))
|
||||
format_edit_element_rule(struct edit_element_rule *edit_element, const char *user_region, size_t n_edit_element)
|
||||
{
|
||||
size_t idx=0;
|
||||
cJSON *json=NULL, *rules=NULL, *item=NULL, *sub_item=NULL;
|
||||
|
||||
json=cJSON_Parse(user_region);
|
||||
if(json !=NULL )
|
||||
{
|
||||
rules = cJSON_GetObjectItem(json, "rules");
|
||||
if(rules == NULL)
|
||||
{
|
||||
goto finish;
|
||||
}
|
||||
|
||||
idx = 0;
|
||||
for (item = rules->child; item != NULL; item = item->next)
|
||||
{
|
||||
sub_item=cJSON_GetObjectItem(item,"anchor_element");
|
||||
if(sub_item != NULL && sub_item->type ==cJSON_Object)
|
||||
{
|
||||
char * search_scope = cJSON_GetObjectItem(sub_item , "search_scope")->valuestring;
|
||||
if (search_scope == NULL) break;
|
||||
|
||||
edit_element[idx].scope = scope_name_to_id(search_scope);
|
||||
if (edit_element[idx].scope == KScopeMax)
|
||||
{
|
||||
break;
|
||||
}
|
||||
if(edit_element[idx].scope == kScopeInside)
|
||||
{
|
||||
edit_element[idx].start_indicator = tfe_strdup(cJSON_GetObjectItem(sub_item , "start_indicator")->valuestring);
|
||||
}
|
||||
edit_element[idx].contained_keyword = tfe_strdup(cJSON_GetObjectItem(sub_item,"contained_keyword")->valuestring);
|
||||
}
|
||||
|
||||
sub_item=cJSON_GetObjectItem(item,"target_element");
|
||||
if(sub_item != NULL && sub_item->type ==cJSON_Object)
|
||||
{
|
||||
edit_element[idx].distane_from_matching = cJSON_GetObjectItem(sub_item , "target_distance_from_matching")->valueint;
|
||||
edit_element[idx].element_treatment = tfe_strdup(cJSON_GetObjectItem(sub_item,"element_treatment")->valuestring);
|
||||
}
|
||||
|
||||
if (idx == n_edit_element)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
|
||||
finish:
|
||||
if (json) cJSON_Delete(json);
|
||||
return idx;
|
||||
}
|
||||
|
||||
void simple_edit_element(const char *user_region, const char* input, size_t in_sz, char** output, size_t *output_sz, int options)
|
||||
{
|
||||
size_t n_got_rule=0, i=0;
|
||||
struct edit_element_rule rules[16];
|
||||
memset(rules, 0, sizeof(struct edit_element_rule)*16);
|
||||
|
||||
n_got_rule=format_edit_element_rule(rules, user_region, sizeof(rules)/sizeof(rules[0]));
|
||||
*output_sz=execute_edit_element_rule(input, strlen(input), rules, n_got_rule, output, options);
|
||||
for(i=0; i<n_got_rule; i++)
|
||||
{
|
||||
if(rules[i].start_indicator!=NULL)
|
||||
{
|
||||
FREE(&(rules[i].start_indicator));
|
||||
}
|
||||
|
||||
FREE(&(rules[i].element_treatment));
|
||||
FREE(&(rules[i].contained_keyword));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ enum search_scope
|
||||
KScopeMax
|
||||
};
|
||||
|
||||
struct element_rule
|
||||
struct edit_element_rule
|
||||
{
|
||||
enum search_scope scope;
|
||||
int distane_from_matching;
|
||||
@@ -17,6 +17,8 @@ struct element_rule
|
||||
char * contained_keyword;
|
||||
};
|
||||
|
||||
size_t execute_edit_element_rule(const char * in, size_t in_sz, const struct element_rule *rules, size_t n_rule, char** out, int options);
|
||||
size_t execute_edit_element_rule(const char * in, size_t in_sz, const struct edit_element_rule *rules, size_t n_rule, char** out, int options);
|
||||
enum search_scope scope_name_to_id(const char * name);
|
||||
|
||||
void simple_edit_element(const char *user_region, const char* input, size_t in_sz, char** output, size_t *output_sz, int options);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#include "pangu_logger.h"
|
||||
#include "pangu_element_edit.h"
|
||||
#include "edit_element.h"
|
||||
#include "pattern_replace.h"
|
||||
#include "pangu_web_cache.h"
|
||||
|
||||
@@ -126,7 +126,7 @@ struct policy_action_param
|
||||
struct replace_rule *repl_rule;
|
||||
|
||||
size_t e_rule;
|
||||
struct element_rule *elem_rule;
|
||||
struct edit_element_rule *elem_rule;
|
||||
|
||||
pthread_mutex_t lock;
|
||||
};
|
||||
@@ -486,7 +486,7 @@ void policy_action_param_new(int idx, const struct Maat_rule_t* rule, const char
|
||||
break;
|
||||
}
|
||||
rule_id = 0;
|
||||
param->elem_rule = ALLOC(struct element_rule, MAX_EDIT_ZONE_NUM);
|
||||
param->elem_rule = ALLOC(struct edit_element_rule, MAX_EDIT_ZONE_NUM);
|
||||
for (item = rules->child; item != NULL; item = item->next)
|
||||
{
|
||||
sub_item=cJSON_GetObjectItem(item,"anchor_element");
|
||||
@@ -995,7 +995,7 @@ struct insert_ctx
|
||||
|
||||
struct edit_element_ctx
|
||||
{
|
||||
struct element_rule *item;
|
||||
struct edit_element_rule *item;
|
||||
size_t n_item;
|
||||
struct tfe_http_half * editing;
|
||||
struct evbuffer *http_body;
|
||||
|
||||
142
plugin/business/pangu-http/src/test_edit_element.cpp
Normal file
142
plugin/business/pangu-http/src/test_edit_element.cpp
Normal file
@@ -0,0 +1,142 @@
|
||||
#include <tfe_utils.h>
|
||||
#include "edit_element.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
TEST(EditElement, Libxml_Whole_Remove_Index01)
|
||||
{
|
||||
char* output=NULL;
|
||||
size_t output_sz=0;
|
||||
|
||||
const char *input = "<html>\n\n<head>\n<title>index</title>\n</head>\n\n<body>\n<p>body</p>\n<p>hello world</p>\n<a href=\"edit.html\">跳转</a>\n<script onerror=\"mimgError()\" src=\"https://mimg.127.net/p/freemail/lib/polyfill/es5-polyfill.js\"></script>\n</body>\n\n</html>\n";
|
||||
|
||||
const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"whole_file\",\"contained_keyword\":\"hello world\"},\
|
||||
\"target_element\":{\"target_distance_from_matching\":0,\"element_treatment\":\"remove\"}}]}";
|
||||
|
||||
simple_edit_element(user_region, input, strlen(input), &output, &output_sz, 0);
|
||||
EXPECT_TRUE(output_sz>0);
|
||||
|
||||
printf("output = %s\n", output);
|
||||
EXPECT_TRUE(NULL==strstr(output, "hello world"));
|
||||
free(output);
|
||||
}
|
||||
|
||||
TEST(EditElement, Libxml_Whole_Mark_Index01)
|
||||
{
|
||||
char* output=NULL;
|
||||
size_t output_sz=0;
|
||||
|
||||
const char *input = "<html>\n\n<head>\n<title>index</title>\n</head>\n\n<body>\n<p>body</p>\n<p>hello world</p>\n<a href=\"edit.html\">跳转</a>\n<script onerror=\"mimgError()\" src=\"https://mimg.127.net/p/freemail/lib/polyfill/es5-polyfill.js\"></script>\n</body>\n\n</html>\n";
|
||||
|
||||
const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"whole_file\",\"contained_keyword\":\"hello world\"},\
|
||||
\"target_element\":{\"target_distance_from_matching\":0,\"element_treatment\":\"mark\"}}]}";
|
||||
|
||||
simple_edit_element(user_region, input, strlen(input), &output, &output_sz, 0);
|
||||
EXPECT_TRUE(output_sz>0);
|
||||
|
||||
const char *expect_output = "<html need_check=\"true\">\n\n<head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n<title>index</title>\n</head>\n\n<body>\n<p>body</p>\n<p need_filter=\"true\">hello world</p>\n<a href=\"edit.html\">跳转</a>\n<script onerror=\"mimgError()\" src=\"https://mimg.127.net/p/freemail/lib/polyfill/es5-polyfill.js\"></script>\n</body>\n\n</html>\n";
|
||||
|
||||
printf("output = %s\n", output);
|
||||
EXPECT_TRUE(0==strcmp(output, expect_output));
|
||||
free(output);
|
||||
}
|
||||
|
||||
TEST(EditElement, Libxml_Inside_Remove_Index01)
|
||||
{
|
||||
char* output=NULL;
|
||||
size_t output_sz=0;
|
||||
|
||||
const char *input = "<html>\n\n<head>\n<title>index</title>\n</head>\n\n<body>\n<p>body</p>\n<p class=\"LC20lb\">hello world</p>\n<a href=\"edit.html\">\
|
||||
跳转</a>\n<script onerror=\"mimgError()\" src=\"https://mimg.127.net/p/freemail/lib/polyfill/es5-polyfill.js\"></script>\n</body>\n\n</html>\n";
|
||||
|
||||
const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"inside_element\",\"start_indicator\":\"LC20lb\",\"contained_keyword\":\"hello world\"},\
|
||||
\"target_element\":{\"target_distance_from_matching\":0,\"element_treatment\":\"remove\"}}]}";
|
||||
|
||||
simple_edit_element(user_region, input, strlen(input), &output, &output_sz, 0);
|
||||
EXPECT_TRUE(output_sz>0);
|
||||
|
||||
printf("output = %s\n", output);
|
||||
EXPECT_TRUE(NULL==strstr(output, "hello world"));
|
||||
free(output);
|
||||
}
|
||||
|
||||
TEST(EditElement, Cjson_Whole_Remove_Simple)
|
||||
{
|
||||
char* output=NULL;
|
||||
size_t output_sz=0;
|
||||
|
||||
const char *input = "{\"testkey\":\"value\",\"verifyList\":[{\"policyType\":\"tsg_security\",\"verifySession\":{\"attributes\":[{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"192.168.50.37\"}},{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"2.5.6.7\"}},{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"192.168.50.37\"}}]}}]}";
|
||||
|
||||
const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"whole_file\",\"contained_keyword\":\"192.168.50.37\"},\
|
||||
\"target_element\":{\"target_distance_from_matching\":2,\"element_treatment\":\"remove\"}}]}";
|
||||
|
||||
simple_edit_element(user_region, input, strlen(input), &output, &output_sz, 0);
|
||||
EXPECT_TRUE(output_sz>0);
|
||||
|
||||
printf("output = %s\n", output);
|
||||
EXPECT_TRUE(NULL==strstr(output, "192.168.50.37"));
|
||||
free(output);
|
||||
}
|
||||
|
||||
TEST(EditElement, Cjson_Whole_mark_Simple)
|
||||
{
|
||||
char* output=NULL;
|
||||
size_t output_sz=0;
|
||||
|
||||
const char *input = "{\"testkey\":\"value\",\"verifyList\":[{\"policyType\":\"tsg_security\",\"verifySession\":{\"attributes\":[{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"192.168.50.37\"}},{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"2.5.6.7\"}},{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"192.168.50.37\"}}]}}]}";
|
||||
|
||||
const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"whole_file\",\"contained_keyword\":\"192.168.50.37\"},\
|
||||
\"target_element\":{\"target_distance_from_matching\":2,\"element_treatment\":\"mark\"}}]}";
|
||||
|
||||
simple_edit_element(user_region, input, strlen(input), &output, &output_sz, 0);
|
||||
EXPECT_TRUE(output_sz>0);
|
||||
|
||||
const char *expect_output = "{\"testkey\":\"value\",\"verifyList\":[{\"policyType\":\"tsg_security\",\"verifySession\":{\"attributes\":[{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"192.168.50.37\"},\"need_filter\":true},{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"2.5.6.7\"}},{\"attributeType\":\"ip\",\"attributeValue\":{\"ip\":\"192.168.50.37\"},\"need_filter\":true}]}}],\"need_check\":true}";
|
||||
printf("output = %s\n", output);
|
||||
EXPECT_TRUE(0==strcmp(output, expect_output));
|
||||
free(output);
|
||||
}
|
||||
|
||||
TEST(EditElement, Libxml_Whole_Remove_Facebook)
|
||||
{
|
||||
char* output=NULL;
|
||||
size_t output_sz=0,input_len=0;
|
||||
|
||||
const char* filename="./test_data/facebook_index.html";
|
||||
|
||||
FILE* fp=NULL;
|
||||
struct stat file_info;
|
||||
stat(filename, &file_info);
|
||||
size_t input_sz=file_info.st_size;
|
||||
|
||||
fp=fopen(filename,"r");
|
||||
ASSERT_FALSE(fp==NULL);
|
||||
if(fp==NULL)
|
||||
{
|
||||
return;
|
||||
}
|
||||
char* input=(char*)malloc(input_sz);
|
||||
fread(input,1,input_sz,fp);
|
||||
fclose(fp);
|
||||
|
||||
const char *user_region = "{\"rules\":[{\"anchor_element\":{\"search_scope\":\"inside_element\",\"start_indicator\":\"_2t-a _4pmj _2t-d\",\"contained_keyword\":\"Facebook\"},\
|
||||
\"target_element\":{\"target_distance_from_matching\":5,\"element_treatment\":\"remove\"}}]}";
|
||||
|
||||
simple_edit_element(user_region, input, input_len, &output, &output_sz, 0);
|
||||
EXPECT_TRUE(output_sz>0);
|
||||
|
||||
EXPECT_TRUE(NULL==strstr(output, "_2t-a _4pmj _2t-d"));
|
||||
free(output);
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user