bugfix: 修复html嵌套json无法删除问题

This commit is contained in:
fengweihao
2021-12-08 19:05:44 +08:00
parent 8705a6fa23
commit 99f92f7777
2 changed files with 43 additions and 7 deletions

View File

@@ -14,6 +14,7 @@
int cjson_element_foreach(cJSON *a, int *depth, int *step, char **node, const struct element_rule * rules, int *match_num); int cjson_element_foreach(cJSON *a, int *depth, int *step, char **node, const struct element_rule * rules, int *match_num);
static void html_node_list(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match); static void html_node_list(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match);
size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct element_rule * rules, char** out);
enum search_scope scope_name_to_id(const char * name) enum search_scope scope_name_to_id(const char * name)
{ {
@@ -54,18 +55,26 @@ int match_start_indicator(xmlNodePtr parent, char * start_indicator)
int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, const struct element_rule * rules) int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, const struct element_rule * rules)
{ {
const char *element_treatment=rules->element_treatment; const char *element_treatment=rules->element_treatment;
char * start_indicator = rules->start_indicator;
if(element_treatment != NULL && !strcasecmp(element_treatment, "mark")) if(element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
{ {
if (rules->scope == kScopeInside)
{
if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator) != 0)
{
return 0;
}
}
cJSON_AddBoolToObject(a_element, "need_filter", true); cJSON_AddBoolToObject(a_element, "need_filter", true);
} }
char * start_indicator = rules->start_indicator;
if(element_treatment != NULL && !strcasecmp(element_treatment, "remove")) if(element_treatment != NULL && !strcasecmp(element_treatment, "remove"))
{ {
if (rules->scope == kScopeInside) if (rules->scope == kScopeInside)
{ {
if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator)) if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator) != 0)
{ {
return 0; return 0;
} }
@@ -87,14 +96,37 @@ int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, const
int construct_html_by_treatment(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match) int construct_html_by_treatment(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
{ {
int k=0; int k=0;
char *new_out=NULL;
size_t output_size=0;
char * token = NULL, * saveptr = NULL;
if(strcasestr((char *)node->content, "var ytInitialData"))
{
token = strtok_r((char *)node->content, "=", &saveptr);
if(token != NULL && ((saveptr[0] == '{') || (saveptr[1] == '{')))
{
output_size = parse_json_output_unformatted(saveptr, strlen(saveptr), rules, &new_out);
if(output_size != 0 && new_out != NULL)
{
new_out[output_size] = ';';
FREE(&node->content);
node->content = (xmlChar*)new_out;
*match =1;
return 0;
}
}
}
if(strcasestr((char *)node->content, rules->contained_keyword) == NULL) if(strcasestr((char *)node->content, rules->contained_keyword) == NULL)
{ {
return 0; return 0;
} }
char * start_indicator = rules->start_indicator;
const char *element_treatment=rules->element_treatment; const char *element_treatment=rules->element_treatment;
int distane_from_matching = rules->distane_from_matching; int distane_from_matching = (rules->distane_from_matching + 1);
if(element_treatment != NULL && !strcasecmp(element_treatment, "mark")) if(element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
{ {
@@ -104,6 +136,11 @@ int construct_html_by_treatment(const struct element_rule * rules, xmlNodePtr no
{ {
if(k == distane_from_matching) if(k == distane_from_matching)
{ {
if (rules->scope == kScopeInside && match_start_indicator(parent, start_indicator) == 0)
{
break;
}
xmlNewProp(parent, (const xmlChar *)"need_filter", (const xmlChar *)"true"); xmlNewProp(parent, (const xmlChar *)"need_filter", (const xmlChar *)"true");
*match =1; *match =1;
break; break;
@@ -113,7 +150,6 @@ int construct_html_by_treatment(const struct element_rule * rules, xmlNodePtr no
} }
} }
char * start_indicator = rules->start_indicator;
if(element_treatment != NULL && !strcasecmp(element_treatment, "remove")) if(element_treatment != NULL && !strcasecmp(element_treatment, "remove"))
{ {
xmlNodePtr parent = node->parent; xmlNodePtr parent = node->parent;
@@ -360,7 +396,7 @@ size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct
cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num); cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num);
for(i=0; i< match_num; i++) for(i=0; i< match_num; i++)
{ {
depth = rules->distane_from_matching; depth = (rules->distane_from_matching + 1);
step=0; node=NULL; step=0; node=NULL;
match |= cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num); match |= cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num);
@@ -411,7 +447,7 @@ size_t format_json_file_type(const char * in, size_t in_sz, const struct element
cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num); cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num);
for(i=0; i< match_num; i++) for(i=0; i< match_num; i++)
{ {
depth = rules->distane_from_matching; depth = (rules->distane_from_matching + 1);
step=0; node=NULL; step=0; node=NULL;
match |= cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num); match |= cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num);

View File

@@ -122,7 +122,7 @@ int pangu_send_log(struct pangu_logger* handle, const struct pangu_log* log_msg)
const char *app_proto[]= {"unkonw","http1", "http2"}; const char *app_proto[]= {"unkonw","http1", "http2"};
const char *manipulate_action_map[]= {"redirect","block","replace","hijack","insert","element_edit"}; const char *manipulate_action_map[]= {"redirect","block","replace","hijack","insert","edit_element"};
const char *panggu_action_map[__LG_ACTION_MAX]; const char *panggu_action_map[__LG_ACTION_MAX];
panggu_action_map[LG_ACTION_MONIT]="monitor"; panggu_action_map[LG_ACTION_MONIT]="monitor";