TSG-8932 JSON评论数组层无法完全删除问题

TSG-8888 JSON重复内容Mark标记问题
TSG-8895 filed_stat输出edit_elem_num统计
This commit is contained in:
fengweihao
2021-12-10 13:32:40 +08:00
parent 99f92f7777
commit 2fac4e9162
2 changed files with 98 additions and 34 deletions

View File

@@ -12,7 +12,7 @@
#include "pangu_element_edit.h" #include "pangu_element_edit.h"
int cjson_element_foreach(cJSON *a, int *depth, int *step, char **node, const struct element_rule * rules, int *match_num); int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop);
static void html_node_list(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match); static void html_node_list(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match);
size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct element_rule * rules, char** out); size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct element_rule * rules, char** out);
@@ -52,7 +52,7 @@ int match_start_indicator(xmlNodePtr parent, char * start_indicator)
return 0; return 0;
} }
int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, const struct element_rule * rules) int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, int *step_level, const struct element_rule * rules)
{ {
const char *element_treatment=rules->element_treatment; const char *element_treatment=rules->element_treatment;
char * start_indicator = rules->start_indicator; char * start_indicator = rules->start_indicator;
@@ -61,24 +61,47 @@ int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, const
{ {
if (rules->scope == kScopeInside) if (rules->scope == kScopeInside)
{ {
if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator) != 0) if(a_element->type == cJSON_Object)
{
if(*node != NULL && strcasecmp(*node, start_indicator) != 0)
{ {
return 0; return 0;
} }
} }
if(a_element->type == cJSON_Array)
{
if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator))
{
return 0;
}
}
}
if(a_element->type==cJSON_Object)
{
cJSON_AddBoolToObject(a_element, "need_filter", true); cJSON_AddBoolToObject(a_element, "need_filter", true);
} }
}
if(element_treatment != NULL && !strcasecmp(element_treatment, "remove")) if(element_treatment != NULL && !strcasecmp(element_treatment, "remove"))
{ {
if (rules->scope == kScopeInside) if (rules->scope == kScopeInside)
{ {
if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator) != 0) if(a_element->type == cJSON_Object)
{
if(*node != NULL && strcasecmp(*node, start_indicator) != 0)
{ {
return 0; return 0;
} }
} }
if(a_element->type == cJSON_Array)
{
if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator))
{
return 0;
}
}
}
if(a_element->type == cJSON_Object && *node != NULL) if(a_element->type == cJSON_Object && *node != NULL)
{ {
@@ -86,7 +109,7 @@ int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, const
} }
if(a_element->type == cJSON_Array) if(a_element->type == cJSON_Array)
{ {
cJSON_DeleteItemFromArray(a_element, *step); cJSON_DeleteItemFromArray(a_element, step_level[*step]);
} }
} }
@@ -187,50 +210,52 @@ int construct_html_by_treatment(const struct element_rule * rules, xmlNodePtr no
return 0; return 0;
} }
int cjson_dump_array(cJSON *a, int *depth, int *step, char **node, const struct element_rule * rules, int *match_num) int cjson_dump_array(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop)
{ {
int xret=0; int xret=0, array_cnt=0;
cJSON *a_element = a->child; cJSON *a_element = a->child;
*step= *step + 1;
for (; (a_element != NULL);) for (; (a_element != NULL);)
{ {
xret = cjson_element_foreach(a_element, depth, step, node, rules, match_num); xret = cjson_element_foreach(a_element, depth, step, step_level, node, rules, match_num, loop);
if(xret == -1) if(xret == -1)
{ {
return -1; return -1;
} }
if(*depth == 0) if(*depth == 0)
{ {
construct_cjson_by_treatment(a_element, node, step, rules); construct_cjson_by_treatment(a_element, node, step, step_level, rules);
} }
if(xret == 1) if(xret == 1)
{ {
step_level[*step] = array_cnt;
*node = a_element->string; *node = a_element->string;
*depth = *depth -1; *depth = *depth -1;
return 1; return 1;
} }
*step= *step + 1; array_cnt++;
a_element = a_element->next; a_element = a_element->next;
} }
return xret; return xret;
} }
int cjson_dump_object(cJSON *a, int *depth, int *step, char **node, const struct element_rule * rules, int *match_num) int cjson_dump_object(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop)
{ {
int xret=0; int xret=0;
cJSON *a_element=NULL; cJSON *a_element=NULL;
cJSON_ArrayForEach(a_element, a) cJSON_ArrayForEach(a_element, a)
{ {
xret = cjson_element_foreach(a_element, depth, step, node, rules, match_num); xret = cjson_element_foreach(a_element, depth, step, step_level, node, rules, match_num, loop);
if(xret == -1) if(xret == -1)
{ {
return -1; return -1;
} }
if(*depth == 0) if(*depth == 0)
{ {
construct_cjson_by_treatment(a_element, node, step, rules); construct_cjson_by_treatment(a_element, node, step, step_level, rules);
} }
if(xret == 1) if(xret == 1)
{ {
@@ -242,24 +267,36 @@ int cjson_dump_object(cJSON *a, int *depth, int *step, char **node, const struct
return xret; return xret;
} }
int cjson_dump_string(cJSON *a, int *depth, const struct element_rule * rules, int *match_num) int cjson_dump_string(cJSON *a, int *depth, const struct element_rule * rules, int *match_num, int loop)
{ {
int xret=0; int xret=0;
if((a->valuestring != NULL) && strcasestr(a->valuestring, rules->contained_keyword)) if((a->valuestring != NULL) && strcasestr(a->valuestring, rules->contained_keyword))
{ {
if(*depth != -1) if(*depth != -1)
{
if(!strcasecmp(rules->element_treatment, "mark"))
{
if(*match_num == loop)
{ {
xret = 1; xret = 1;
goto finish; goto finish;
} }
}
else
{
xret = 1;
goto finish;
}
}
*match_num = *match_num + 1; *match_num = *match_num + 1;
} }
finish: finish:
return xret; return xret;
} }
int cjson_element_foreach(cJSON *a, int *depth, int *step, char **node, const struct element_rule * rules, int *match_num) int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct element_rule * rules, int *match_num, int loop)
{ {
if ((a == NULL) || cJSON_IsInvalid(a)) if ((a == NULL) || cJSON_IsInvalid(a))
{ {
@@ -270,13 +307,13 @@ int cjson_element_foreach(cJSON *a, int *depth, int *step, char **node, const st
{ {
case cJSON_String: case cJSON_String:
case cJSON_Raw: case cJSON_Raw:
return cjson_dump_string(a, depth, rules, match_num); return cjson_dump_string(a, depth, rules, match_num, loop);
case cJSON_Array: case cJSON_Array:
return cjson_dump_array(a, depth, step, node, rules, match_num); return cjson_dump_array(a, depth, step, step_level, node, rules, match_num, loop);
case cJSON_Object: case cJSON_Object:
return cjson_dump_object(a, depth, step, node, rules, match_num); return cjson_dump_object(a, depth, step, step_level, node, rules, match_num, loop);
case cJSON_Number: case cJSON_Number:
case cJSON_False: case cJSON_False:
@@ -378,12 +415,15 @@ static void html_element_foreach(const struct element_rule * rules, xmlDocPtr do
size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct element_rule * rules, char** out) size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
{ {
int match_num_peer=0;
int step=0, depth=0, match_num=0,i=0, match=0; int step=0, depth=0, match_num=0,i=0, match=0;
cJSON* interator=NULL; cJSON* interator=NULL;
char* new_out = NULL, *node=NULL; char* new_out = NULL, *node=NULL;
size_t outlen=0; size_t outlen=0;
char *element_treatment=NULL; char *element_treatment=NULL;
int step_level[2048] = {0};
interator = cJSON_Parse(in); interator = cJSON_Parse(in);
if(interator==NULL) if(interator==NULL)
{ {
@@ -393,12 +433,15 @@ size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct
depth = -1; depth = -1;
element_treatment=rules->element_treatment; element_treatment=rules->element_treatment;
cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num); cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, 0);
for(i=0; i< match_num; i++) match_num_peer = match_num;
for(i=0; i< match_num_peer; i++)
{ {
depth = (rules->distane_from_matching + 1); depth = (rules->distane_from_matching + 1);
step=0; node=NULL; step=0; node=NULL; match_num=0;
match |= cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num); memset(step_level, 0, sizeof(step_level));
match |= cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, i);
if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0) if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0)
{ {
@@ -407,9 +450,12 @@ size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct
} }
if(match==1 && element_treatment != NULL && !strcasecmp(element_treatment, "mark")) if(match==1 && element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
{
if(interator->type==cJSON_Object)
{ {
cJSON_AddBoolToObject(interator, "need_check", true); cJSON_AddBoolToObject(interator, "need_check", true);
} }
}
new_out = cJSON_PrintUnformatted(interator); new_out = cJSON_PrintUnformatted(interator);
if(new_out!=NULL) if(new_out!=NULL)
@@ -426,12 +472,15 @@ finish:
size_t format_json_file_type(const char * in, size_t in_sz, const struct element_rule * rules, char** out) size_t format_json_file_type(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
{ {
int match_num_peer=0;
int step=0, depth=0, match=0, i=0; int step=0, depth=0, match=0, i=0;
cJSON* interator=NULL; cJSON* interator=NULL;
char* new_out = NULL, *node=NULL; char* new_out = NULL, *node=NULL;
size_t outlen=0; int match_num=0; size_t outlen=0; int match_num=0;
char *element_treatment=NULL; char *element_treatment=NULL;
int step_level[2048] = {0};
char*new_in = ALLOC(char, in_sz+1); char*new_in = ALLOC(char, in_sz+1);
memcpy(new_in, in, in_sz); memcpy(new_in, in, in_sz);
@@ -444,17 +493,24 @@ size_t format_json_file_type(const char * in, size_t in_sz, const struct element
depth = -1; depth = -1;
element_treatment=rules->element_treatment; element_treatment=rules->element_treatment;
cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num); /*When the node has inclusion relation, cjson is not null when deleted
for(i=0; i< match_num; i++) So multiple loops delete **/
cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, 0);
match_num_peer = match_num;
for(i=0; i< match_num_peer; i++)
{ {
depth = (rules->distane_from_matching + 1); depth = (rules->distane_from_matching + 1);
step=0; node=NULL; step=0; node=NULL; match_num=0;
match |= cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num); memset(step_level, 0, sizeof(step_level));
match |= cjson_element_foreach(interator, &depth, &step, step_level, &node, rules, &match_num, i);
if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0) if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0)
{ {
cJSON_DeleteItemFromObject(interator, node); cJSON_DeleteItemFromObject(interator, node);
} }
match_num--;
} }
if(match == 0) if(match == 0)
@@ -463,9 +519,12 @@ size_t format_json_file_type(const char * in, size_t in_sz, const struct element
} }
if(element_treatment != NULL && !strcasecmp(element_treatment, "mark")) if(element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
{
if(interator->type==cJSON_Object)
{ {
cJSON_AddBoolToObject(interator, "need_check", true); cJSON_AddBoolToObject(interator, "need_check", true);
} }
}
new_out = cJSON_Print(interator); new_out = cJSON_Print(interator);
if(new_out!=NULL) if(new_out!=NULL)
@@ -579,6 +638,8 @@ size_t format_input_html(const char * in, size_t in_sz, const struct element_rul
goto finish; goto finish;
} }
/*When the node has inclusion relation, libxml2 is not null when deleted
So multiple loops delete **/
html_element_foreach(rules, doc, parent_array, &n_parent, &match); html_element_foreach(rules, doc, parent_array, &n_parent, &match);
if(match != 1) if(match != 1)
{ {

View File

@@ -83,6 +83,7 @@ enum pangu_http_stat
STAT_ACTION_HIJACK_SZ, STAT_ACTION_HIJACK_SZ,
STAT_ACTION_INSERT, STAT_ACTION_INSERT,
STAT_ACTION_INSERT_SZ, STAT_ACTION_INSERT_SZ,
STAT_ACTION_EDIT_ELEMENT,
STAT_ACTION_WHITELSIT, STAT_ACTION_WHITELSIT,
STAT_SUSPENDING, STAT_SUSPENDING,
__PG_STAT_MAX __PG_STAT_MAX
@@ -187,6 +188,7 @@ static void pangu_http_stat_init(struct pangu_rt * pangu_runtime)
spec[STAT_ACTION_HIJACK_SZ]="hijk_bytes"; spec[STAT_ACTION_HIJACK_SZ]="hijk_bytes";
spec[STAT_ACTION_INSERT]="intcp_ins_num"; spec[STAT_ACTION_INSERT]="intcp_ins_num";
spec[STAT_ACTION_INSERT_SZ]="ins_bytes"; spec[STAT_ACTION_INSERT_SZ]="ins_bytes";
spec[STAT_ACTION_EDIT_ELEMENT]="edit_elem_num";
spec[STAT_ACTION_WHITELSIT]="intcp_allow_num"; spec[STAT_ACTION_WHITELSIT]="intcp_allow_num";
spec[STAT_SUSPENDING]="suspending"; spec[STAT_SUSPENDING]="suspending";
@@ -2145,6 +2147,7 @@ void http_element(const struct tfe_stream * stream, const struct tfe_http_sessio
if (rewrite_sz >0 ) if (rewrite_sz >0 )
{ {
tfe_http_half_append_body(edit_ctx->editing, rewrite_buff, rewrite_sz, 0); tfe_http_half_append_body(edit_ctx->editing, rewrite_buff, rewrite_sz, 0);
ATOMIC_INC(&(g_pangu_rt->stat_val[STAT_ACTION_EDIT_ELEMENT]));
edit_ctx->actually_edited=1; edit_ctx->actually_edited=1;
} }
else else