TSG-8932 JSON评论数组层无法完全删除问题

TSG-8888 JSON重复内容Mark标记问题
TSG-8895 filed_stat输出edit_elem_num统计
This commit is contained in:
fengweihao
2021-12-10 13:32:40 +08:00
parent 99f92f7777
commit 2fac4e9162
2 changed files with 98 additions and 34 deletions

View File

@@ -12,7 +12,7 @@
#include "pangu_element_edit.h"
int cjson_element_foreach(cJSON *a, int *depth, int *step, char **node, const struct element_rule * rules, int *match_num);
int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop);
static void html_node_list(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match);
size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct element_rule * rules, char** out);
@@ -52,7 +52,7 @@ int match_start_indicator(xmlNodePtr parent, char * start_indicator)
return 0;
}
int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, const struct element_rule * rules)
int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, int *step_level, const struct element_rule * rules)
{
const char *element_treatment=rules->element_treatment;
char * start_indicator = rules->start_indicator;
@@ -61,24 +61,47 @@ int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, const
{
if (rules->scope == kScopeInside)
{
if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator) != 0)
if(a_element->type == cJSON_Object)
{
if(*node != NULL && strcasecmp(*node, start_indicator) != 0)
{
return 0;
}
}
if(a_element->type == cJSON_Array)
{
if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator))
{
return 0;
}
}
}
if(a_element->type==cJSON_Object)
{
cJSON_AddBoolToObject(a_element, "need_filter", true);
}
}
if(element_treatment != NULL && !strcasecmp(element_treatment, "remove"))
{
if (rules->scope == kScopeInside)
{
if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator) != 0)
if(a_element->type == cJSON_Object)
{
if(*node != NULL && strcasecmp(*node, start_indicator) != 0)
{
return 0;
}
}
if(a_element->type == cJSON_Array)
{
if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator))
{
return 0;
}
}
}
if(a_element->type == cJSON_Object && *node != NULL)
{
@@ -86,7 +109,7 @@ int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, const
}
if(a_element->type == cJSON_Array)
{
cJSON_DeleteItemFromArray(a_element, *step);
cJSON_DeleteItemFromArray(a_element, step_level[*step]);
}
}
@@ -187,50 +210,52 @@ int construct_html_by_treatment(const struct element_rule * rules, xmlNodePtr no
return 0;
}
int cjson_dump_array(cJSON *a, int *depth, int *step, char **node, const struct element_rule * rules, int *match_num)
int cjson_dump_array(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop)
{
int xret=0;
int xret=0, array_cnt=0;
cJSON *a_element = a->child;
*step= *step + 1;
for (; (a_element != NULL);)
{
xret = cjson_element_foreach(a_element, depth, step, node, rules, match_num);
xret = cjson_element_foreach(a_element, depth, step, step_level, node, rules, match_num, loop);
if(xret == -1)
{
return -1;
}
if(*depth == 0)
{
construct_cjson_by_treatment(a_element, node, step, rules);
construct_cjson_by_treatment(a_element, node, step, step_level, rules);
}
if(xret == 1)
{
step_level[*step] = array_cnt;
*node = a_element->string;
*depth = *depth -1;
return 1;
}
*step= *step + 1;
array_cnt++;
a_element = a_element->next;
}
return xret;
}
int cjson_dump_object(cJSON *a, int *depth, int *step, char **node, const struct element_rule * rules, int *match_num)
int cjson_dump_object(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop)
{
int xret=0;
cJSON *a_element=NULL;
cJSON_ArrayForEach(a_element, a)
{
xret = cjson_element_foreach(a_element, depth, step, node, rules, match_num);
xret = cjson_element_foreach(a_element, depth, step, step_level, node, rules, match_num, loop);
if(xret == -1)
{
return -1;
}
if(*depth == 0)
{
construct_cjson_by_treatment(a_element, node, step, rules);
construct_cjson_by_treatment(a_element, node, step, step_level, rules);
}
if(xret == 1)
{
@@ -242,24 +267,36 @@ int cjson_dump_object(cJSON *a, int *depth, int *step, char **node, const struct
return xret;
}
int cjson_dump_string(cJSON *a, int *depth, const struct element_rule * rules, int *match_num)
int cjson_dump_string(cJSON *a, int *depth, const struct element_rule * rules, int *match_num, int loop)
{
int xret=0;
if((a->valuestring != NULL) && strcasestr(a->valuestring, rules->contained_keyword))
{
if(*depth != -1)
{
if(!strcasecmp(rules->element_treatment, "mark"))
{
if(*match_num == loop)
{
xret = 1;
goto finish;
}
}
else
{
xret = 1;
goto finish;
}
}
*match_num = *match_num + 1;
}
finish:
return xret;
}
int cjson_element_foreach(cJSON *a, int *depth, int *step, char **node, const struct element_rule * rules, int *match_num)
int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop)
{
if ((a == NULL) || cJSON_IsInvalid(a))
{
@@ -270,13 +307,13 @@ int cjson_element_foreach(cJSON *a, int *depth, int *step, char **node, const st
{
case cJSON_String:
case cJSON_Raw:
return cjson_dump_string(a, depth, rules, match_num);
return cjson_dump_string(a, depth, rules, match_num, loop);
case cJSON_Array:
return cjson_dump_array(a, depth, step, node, rules, match_num);
return cjson_dump_array(a, depth, step, step_level, node, rules, match_num, loop);
case cJSON_Object:
return cjson_dump_object(a, depth, step, node, rules, match_num);
return cjson_dump_object(a, depth, step, step_level, node, rules, match_num, loop);
case cJSON_Number:
case cJSON_False:
@@ -378,12 +415,15 @@ static void html_element_foreach(const struct element_rule * rules, xmlDocPtr do
size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
{
int match_num_peer=0;
int step=0, depth=0, match_num=0,i=0, match=0;
cJSON* interator=NULL;
char* new_out = NULL, *node=NULL;
size_t outlen=0;
char *element_treatment=NULL;
int step_level[2048] = {0};
interator = cJSON_Parse(in);
if(interator==NULL)
{
@@ -393,12 +433,15 @@ size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct
depth = -1;
element_treatment=rules->element_treatment;
cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num);
for(i=0; i< match_num; i++)
cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, 0);
match_num_peer = match_num;
for(i=0; i< match_num_peer; i++)
{
depth = (rules->distane_from_matching + 1);
step=0; node=NULL;
match |= cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num);
step=0; node=NULL; match_num=0;
memset(step_level, 0, sizeof(step_level));
match |= cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, i);
if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0)
{
@@ -407,9 +450,12 @@ size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct
}
if(match==1 && element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
{
if(interator->type==cJSON_Object)
{
cJSON_AddBoolToObject(interator, "need_check", true);
}
}
new_out = cJSON_PrintUnformatted(interator);
if(new_out!=NULL)
@@ -426,12 +472,15 @@ finish:
size_t format_json_file_type(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
{
int match_num_peer=0;
int step=0, depth=0, match=0, i=0;
cJSON* interator=NULL;
char* new_out = NULL, *node=NULL;
size_t outlen=0; int match_num=0;
char *element_treatment=NULL;
int step_level[2048] = {0};
char*new_in = ALLOC(char, in_sz+1);
memcpy(new_in, in, in_sz);
@@ -444,17 +493,24 @@ size_t format_json_file_type(const char * in, size_t in_sz, const struct element
depth = -1;
element_treatment=rules->element_treatment;
cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num);
for(i=0; i< match_num; i++)
/*When the node has inclusion relation, cjson is not null when deleted
So multiple loops delete **/
cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, 0);
match_num_peer = match_num;
for(i=0; i< match_num_peer; i++)
{
depth = (rules->distane_from_matching + 1);
step=0; node=NULL;
match |= cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num);
step=0; node=NULL; match_num=0;
memset(step_level, 0, sizeof(step_level));
match |= cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, i);
if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0)
{
cJSON_DeleteItemFromObject(interator, node);
}
match_num--;
}
if(match == 0)
@@ -463,9 +519,12 @@ size_t format_json_file_type(const char * in, size_t in_sz, const struct element
}
if(element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
{
if(interator->type==cJSON_Object)
{
cJSON_AddBoolToObject(interator, "need_check", true);
}
}
new_out = cJSON_Print(interator);
if(new_out!=NULL)
@@ -579,6 +638,8 @@ size_t format_input_html(const char * in, size_t in_sz, const struct element_rul
goto finish;
}
/*When the node has inclusion relation, libxml2 is not null when deleted
So multiple loops delete **/
html_element_foreach(rules, doc, parent_array, &n_parent, &match);
if(match != 1)
{

View File

@@ -83,6 +83,7 @@ enum pangu_http_stat
STAT_ACTION_HIJACK_SZ,
STAT_ACTION_INSERT,
STAT_ACTION_INSERT_SZ,
STAT_ACTION_EDIT_ELEMENT,
STAT_ACTION_WHITELSIT,
STAT_SUSPENDING,
__PG_STAT_MAX
@@ -187,6 +188,7 @@ static void pangu_http_stat_init(struct pangu_rt * pangu_runtime)
spec[STAT_ACTION_HIJACK_SZ]="hijk_bytes";
spec[STAT_ACTION_INSERT]="intcp_ins_num";
spec[STAT_ACTION_INSERT_SZ]="ins_bytes";
spec[STAT_ACTION_EDIT_ELEMENT]="edit_elem_num";
spec[STAT_ACTION_WHITELSIT]="intcp_allow_num";
spec[STAT_SUSPENDING]="suspending";
@@ -2145,6 +2147,7 @@ void http_element(const struct tfe_stream * stream, const struct tfe_http_sessio
if (rewrite_sz >0 )
{
tfe_http_half_append_body(edit_ctx->editing, rewrite_buff, rewrite_sz, 0);
ATOMIC_INC(&(g_pangu_rt->stat_val[STAT_ACTION_EDIT_ELEMENT]));
edit_ctx->actually_edited=1;
}
else