TSG-9422 TSG-9554 TSG-9470 修复元素编辑MARK问题, 代理支持4级或以上定位库
This commit is contained in:
@@ -5,6 +5,11 @@
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
|
||||
#if 0
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
#include <pcre2.h>
|
||||
#endif
|
||||
|
||||
#include <libxml/tree.h>
|
||||
#include <libxml/xmlsave.h>
|
||||
#include <libxml/HTMLparser.h>
|
||||
@@ -13,7 +18,7 @@
|
||||
#include "edit_element.h"
|
||||
|
||||
int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct edit_element_rule * rules, int *match_num, int loop);
|
||||
static void html_node_list(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match);
|
||||
static void html_node_list(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match,size_t mark_tag);
|
||||
size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out);
|
||||
|
||||
enum search_scope scope_name_to_id(const char * name)
|
||||
@@ -52,6 +57,36 @@ int match_start_indicator(xmlNodePtr parent, char * start_indicator)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
int match_string(const char * in, size_t in_sz, char *contained_keyword)
|
||||
{
|
||||
assert(strlen(contained_keyword) != 0);
|
||||
|
||||
int error=0;
|
||||
PCRE2_SIZE erroffset=0;
|
||||
|
||||
const PCRE2_SPTR pattern = (PCRE2_SPTR)contained_keyword;
|
||||
uint32_t pcre2_options = PCRE2_UTF;
|
||||
|
||||
pcre2_code *re = pcre2_compile(pattern, strlen(contained_keyword), pcre2_options, &error, &erroffset, 0);
|
||||
if(!re)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(re, NULL);
|
||||
int rc = 0;
|
||||
const PCRE2_SPTR subject = (PCRE2_SPTR)in;
|
||||
|
||||
rc = pcre2_match(re, subject, in_sz, 0, 0, match_data, NULL);
|
||||
|
||||
pcre2_match_data_free(match_data);
|
||||
pcre2_code_free(re);
|
||||
|
||||
return rc;
|
||||
}
|
||||
#endif
|
||||
|
||||
int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, int *step_array_level, const struct edit_element_rule * rules)
|
||||
{
|
||||
const char *element_treatment=rules->element_treatment;
|
||||
@@ -151,6 +186,12 @@ int construct_html_by_treatment(const struct edit_element_rule * rules, xmlNodeP
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
if(match_string((char *)node->content, strlen((char *)node->content), rules->contained_keyword) < 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
if(strcasestr((char *)node->content, rules->contained_keyword) == NULL)
|
||||
{
|
||||
return 0;
|
||||
@@ -201,12 +242,8 @@ int construct_html_by_treatment(const struct edit_element_rule * rules, xmlNodeP
|
||||
break;
|
||||
}
|
||||
|
||||
if(*n_parent < 16)
|
||||
{
|
||||
parent_array[*n_parent] = parent;
|
||||
*n_parent = *n_parent+1;
|
||||
}
|
||||
|
||||
parent_array[0] = parent;
|
||||
*n_parent = *n_parent+1;
|
||||
*match =1;
|
||||
break;
|
||||
}
|
||||
@@ -281,6 +318,9 @@ int cjson_dump_string(cJSON *a, int *depth, const struct edit_element_rule * rul
|
||||
{
|
||||
int xret=0;
|
||||
|
||||
#if 0
|
||||
if((a->valuestring != NULL) && (match_string(a->valuestring, strlen(a->valuestring), rules->contained_keyword) > 0))
|
||||
#endif
|
||||
if((a->valuestring != NULL) && strcasestr(a->valuestring, rules->contained_keyword))
|
||||
{
|
||||
if(*depth != -1)
|
||||
@@ -345,20 +385,20 @@ static void html_namespace_list(xmlNsPtr ns)
|
||||
}
|
||||
}
|
||||
|
||||
static void html_attr_list(const struct edit_element_rule * rules, xmlAttrPtr attr, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
static void html_attr_list(const struct edit_element_rule * rules, xmlAttrPtr attr, xmlNodePtr *parent_array, size_t *n_parent, int *match, size_t mark_tag)
|
||||
{
|
||||
while (attr != NULL)
|
||||
{
|
||||
if (attr->children != NULL)
|
||||
{
|
||||
html_node_list(rules, attr->children, parent_array, n_parent, match);
|
||||
html_node_list(rules, attr->children, parent_array, n_parent, match, mark_tag);
|
||||
}
|
||||
|
||||
attr = attr->next;
|
||||
}
|
||||
}
|
||||
|
||||
static void html_dump_one_node(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
static void html_dump_one_node(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match, size_t mark_tag)
|
||||
{
|
||||
switch (node->type)
|
||||
{
|
||||
@@ -385,7 +425,7 @@ static void html_dump_one_node(const struct edit_element_rule * rules, xmlNodePt
|
||||
|
||||
if ((node->type == XML_ELEMENT_NODE) && (node->properties != NULL))
|
||||
{
|
||||
html_attr_list(rules, node->properties, parent_array, n_parent, match);
|
||||
html_attr_list(rules, node->properties, parent_array, n_parent, match, mark_tag);
|
||||
}
|
||||
|
||||
if (node->type != XML_ENTITY_REF_NODE)
|
||||
@@ -397,29 +437,39 @@ static void html_dump_one_node(const struct edit_element_rule * rules, xmlNodePt
|
||||
}
|
||||
}
|
||||
|
||||
static void html_dump_node(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
static void html_dump_node(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match, size_t mark_tag)
|
||||
{
|
||||
html_dump_one_node(rules, node, parent_array, n_parent, match);
|
||||
html_dump_one_node(rules, node, parent_array, n_parent, match, mark_tag);
|
||||
if(*match == 1 && mark_tag == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if ((node->type != XML_NAMESPACE_DECL) && (node->children != NULL) && (node->type != XML_ENTITY_REF_NODE))
|
||||
{
|
||||
html_node_list(rules, node->children, parent_array, n_parent, match);
|
||||
html_node_list(rules, node->children, parent_array, n_parent, match, mark_tag);
|
||||
}
|
||||
}
|
||||
|
||||
static void html_node_list(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
static void html_node_list(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match, size_t mark_tag)
|
||||
{
|
||||
while (node != NULL)
|
||||
{
|
||||
html_dump_node(rules, node, parent_array, n_parent, match);
|
||||
html_dump_node(rules, node, parent_array, n_parent, match, mark_tag);
|
||||
if(*match == 1 && mark_tag == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
node = node->next;
|
||||
}
|
||||
}
|
||||
|
||||
static void html_element_foreach(const struct edit_element_rule * rules, xmlDocPtr doc, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
static void html_element_foreach(const struct edit_element_rule * rules, xmlDocPtr doc, xmlNodePtr *parent_array, size_t *n_parent, int *match, size_t mark_tag)
|
||||
{
|
||||
if (((doc->type == XML_DOCUMENT_NODE) || (doc->type == XML_HTML_DOCUMENT_NODE)) && (doc->children != NULL))
|
||||
{
|
||||
html_node_list(rules, doc->children, parent_array, n_parent, match);
|
||||
html_node_list(rules, doc->children, parent_array, n_parent, match, mark_tag);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -476,6 +526,11 @@ size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct
|
||||
}
|
||||
}
|
||||
|
||||
if(match == 0)
|
||||
{
|
||||
goto finish;
|
||||
}
|
||||
|
||||
new_out = cJSON_PrintUnformatted(interator);
|
||||
if(new_out!=NULL)
|
||||
{
|
||||
@@ -666,9 +721,8 @@ size_t format_input_html(const char * in, size_t in_sz, const struct edit_elemen
|
||||
goto finish;
|
||||
}
|
||||
|
||||
/*When the node has inclusion relation, libxml2 is not null when deleted
|
||||
So multiple loops delete **/
|
||||
html_element_foreach(rules, doc, parent_array, &n_parent, &match);
|
||||
/*Delete all at once, valgrind is invalid read **/
|
||||
html_element_foreach(rules, doc, parent_array, &n_parent, &match, 1);
|
||||
if(match != 1)
|
||||
{
|
||||
goto finish;
|
||||
@@ -677,13 +731,15 @@ size_t format_input_html(const char * in, size_t in_sz, const struct edit_elemen
|
||||
n_parent_peer = n_parent;
|
||||
element_treatment=rules->element_treatment;
|
||||
|
||||
/*When the node has inclusion relation, libxml2 is not null when deleted
|
||||
So multiple loops delete **/
|
||||
if(element_treatment != NULL && !strcasecmp(element_treatment, "remove"))
|
||||
{
|
||||
for(i=0; i < (int)n_parent_peer; i++)
|
||||
{
|
||||
match =0; n_parent = 0;
|
||||
html_element_foreach(rules, doc, parent_array, &n_parent, &match);
|
||||
if(match == 1)
|
||||
html_element_foreach(rules, doc, parent_array, &n_parent, &match, 0);
|
||||
if(match == 1 && n_parent > 0)
|
||||
{
|
||||
xmlUnlinkNode(parent_array[0]);
|
||||
xmlFreeNode(parent_array[0]);
|
||||
@@ -721,7 +777,7 @@ size_t format_html_file_type(const char * interator, size_t interator_sz, const
|
||||
{
|
||||
size_t output_size=0;
|
||||
|
||||
if(interator[0] == '{')
|
||||
if((interator[0] == '{') || (interator[0] == '['))
|
||||
{
|
||||
output_size = format_multidelete_json_type(interator, interator_sz, rule, new_out);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user