2021-12-06 18:14:10 +08:00
|
|
|
#include <tfe_utils.h>
|
|
|
|
|
#include <string.h>
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
|
#include <sys/stat.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <assert.h>
|
|
|
|
|
|
2022-02-11 09:57:33 +08:00
|
|
|
#if 0
|
|
|
|
|
#define PCRE2_CODE_UNIT_WIDTH 8
|
|
|
|
|
#include <pcre2.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
2021-12-06 18:14:10 +08:00
|
|
|
#include <libxml/tree.h>
|
|
|
|
|
#include <libxml/xmlsave.h>
|
|
|
|
|
#include <libxml/HTMLparser.h>
|
|
|
|
|
#include <cjson/cJSON.h>
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
#include "edit_element.h"
|
2021-12-06 18:14:10 +08:00
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_level, char **node, const struct edit_element_rule * rules, int *match_num, int loop);
|
2022-02-11 09:57:33 +08:00
|
|
|
static void html_node_list(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match,size_t mark_tag);
|
2021-12-17 16:39:49 +08:00
|
|
|
size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out);
|
2021-12-06 18:14:10 +08:00
|
|
|
|
|
|
|
|
enum search_scope scope_name_to_id(const char * name)
|
|
|
|
|
{
|
|
|
|
|
const char * std_name[] = {"inside_element","whole_file"};
|
|
|
|
|
size_t i = 0;
|
|
|
|
|
for (i = 0; i < sizeof(std_name) / sizeof(const char *); i++)
|
|
|
|
|
{
|
|
|
|
|
if (0 == strcasecmp(name, std_name[i]))
|
|
|
|
|
{
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return (enum search_scope) i;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int match_start_indicator(xmlNodePtr parent, char * start_indicator)
|
|
|
|
|
{
|
|
|
|
|
if(parent->properties == NULL)
|
|
|
|
|
{
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct _xmlAttr *properties = parent->properties;
|
|
|
|
|
if(properties->children == NULL || properties->children->content == NULL)
|
|
|
|
|
{
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
xmlNodePtr children = properties->children;
|
|
|
|
|
if(!strcasecmp((char *)children->content, start_indicator))
|
|
|
|
|
{
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-11 09:57:33 +08:00
|
|
|
#if 0
|
|
|
|
|
int match_string(const char * in, size_t in_sz, char *contained_keyword)
|
|
|
|
|
{
|
|
|
|
|
assert(strlen(contained_keyword) != 0);
|
|
|
|
|
|
|
|
|
|
int error=0;
|
|
|
|
|
PCRE2_SIZE erroffset=0;
|
|
|
|
|
|
|
|
|
|
const PCRE2_SPTR pattern = (PCRE2_SPTR)contained_keyword;
|
|
|
|
|
uint32_t pcre2_options = PCRE2_UTF;
|
|
|
|
|
|
|
|
|
|
pcre2_code *re = pcre2_compile(pattern, strlen(contained_keyword), pcre2_options, &error, &erroffset, 0);
|
|
|
|
|
if(!re)
|
|
|
|
|
{
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(re, NULL);
|
|
|
|
|
int rc = 0;
|
|
|
|
|
const PCRE2_SPTR subject = (PCRE2_SPTR)in;
|
|
|
|
|
|
|
|
|
|
rc = pcre2_match(re, subject, in_sz, 0, 0, match_data, NULL);
|
|
|
|
|
|
|
|
|
|
pcre2_match_data_free(match_data);
|
|
|
|
|
pcre2_code_free(re);
|
|
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, int *step_array_level, const struct edit_element_rule * rules)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
|
|
|
|
const char *element_treatment=rules->element_treatment;
|
2021-12-08 19:05:44 +08:00
|
|
|
char * start_indicator = rules->start_indicator;
|
2021-12-06 18:14:10 +08:00
|
|
|
|
2021-12-10 13:32:40 +08:00
|
|
|
if(element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
2021-12-08 19:05:44 +08:00
|
|
|
if (rules->scope == kScopeInside)
|
|
|
|
|
{
|
2021-12-10 13:32:40 +08:00
|
|
|
if(a_element->type == cJSON_Object)
|
2021-12-08 19:05:44 +08:00
|
|
|
{
|
2021-12-10 13:32:40 +08:00
|
|
|
if(*node != NULL && strcasecmp(*node, start_indicator) != 0)
|
|
|
|
|
{
|
2022-03-11 10:26:56 +08:00
|
|
|
return -2;
|
2021-12-10 13:32:40 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if(a_element->type == cJSON_Array)
|
|
|
|
|
{
|
|
|
|
|
if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator))
|
|
|
|
|
{
|
2022-03-11 10:26:56 +08:00
|
|
|
return -2;
|
2021-12-10 13:32:40 +08:00
|
|
|
}
|
2021-12-08 19:05:44 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-10 13:32:40 +08:00
|
|
|
if(a_element->type==cJSON_Object)
|
|
|
|
|
{
|
|
|
|
|
cJSON_AddBoolToObject(a_element, "need_filter", true);
|
|
|
|
|
}
|
2021-12-17 16:39:49 +08:00
|
|
|
if(a_element->type == cJSON_Array)
|
|
|
|
|
{
|
|
|
|
|
cJSON *object = NULL;
|
|
|
|
|
object = cJSON_GetArrayItem(a_element, step_array_level[*step]);
|
|
|
|
|
if(object != NULL)
|
|
|
|
|
{
|
|
|
|
|
cJSON_AddBoolToObject(object, "need_filter", true);
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-12-06 18:14:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(element_treatment != NULL && !strcasecmp(element_treatment, "remove"))
|
|
|
|
|
{
|
|
|
|
|
if (rules->scope == kScopeInside)
|
|
|
|
|
{
|
2021-12-10 13:32:40 +08:00
|
|
|
if(a_element->type == cJSON_Object)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
2021-12-10 13:32:40 +08:00
|
|
|
if(*node != NULL && strcasecmp(*node, start_indicator) != 0)
|
|
|
|
|
{
|
2022-03-11 10:26:56 +08:00
|
|
|
return -2;
|
2021-12-10 13:32:40 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if(a_element->type == cJSON_Array)
|
|
|
|
|
{
|
|
|
|
|
if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator))
|
|
|
|
|
{
|
2022-03-11 10:26:56 +08:00
|
|
|
return -2;
|
2021-12-10 13:32:40 +08:00
|
|
|
}
|
2021-12-06 18:14:10 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(a_element->type == cJSON_Object && *node != NULL)
|
|
|
|
|
{
|
|
|
|
|
cJSON_DeleteItemFromObject(a_element, *node);
|
|
|
|
|
}
|
|
|
|
|
if(a_element->type == cJSON_Array)
|
|
|
|
|
{
|
2021-12-17 16:39:49 +08:00
|
|
|
cJSON_DeleteItemFromArray(a_element, step_array_level[*step]);
|
2021-12-06 18:14:10 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-11 10:26:56 +08:00
|
|
|
return 1;
|
2021-12-06 18:14:10 +08:00
|
|
|
}
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
int construct_html_by_treatment(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
|
|
|
|
int k=0;
|
2021-12-08 19:05:44 +08:00
|
|
|
char *new_out=NULL;
|
|
|
|
|
size_t output_size=0;
|
|
|
|
|
char * token = NULL, * saveptr = NULL;
|
|
|
|
|
|
|
|
|
|
if(strcasestr((char *)node->content, "var ytInitialData"))
|
|
|
|
|
{
|
|
|
|
|
token = strtok_r((char *)node->content, "=", &saveptr);
|
|
|
|
|
if(token != NULL && ((saveptr[0] == '{') || (saveptr[1] == '{')))
|
|
|
|
|
{
|
|
|
|
|
output_size = parse_json_output_unformatted(saveptr, strlen(saveptr), rules, &new_out);
|
|
|
|
|
if(output_size != 0 && new_out != NULL)
|
|
|
|
|
{
|
|
|
|
|
new_out[output_size] = ';';
|
|
|
|
|
|
|
|
|
|
FREE(&node->content);
|
|
|
|
|
|
|
|
|
|
node->content = (xmlChar*)new_out;
|
|
|
|
|
*match =1;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-12-06 18:14:10 +08:00
|
|
|
|
2022-02-11 09:57:33 +08:00
|
|
|
#if 0
|
|
|
|
|
if(match_string((char *)node->content, strlen((char *)node->content), rules->contained_keyword) < 0)
|
|
|
|
|
{
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
2021-12-06 18:14:10 +08:00
|
|
|
if(strcasestr((char *)node->content, rules->contained_keyword) == NULL)
|
|
|
|
|
{
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-08 19:05:44 +08:00
|
|
|
char * start_indicator = rules->start_indicator;
|
2021-12-06 18:14:10 +08:00
|
|
|
const char *element_treatment=rules->element_treatment;
|
2021-12-08 19:05:44 +08:00
|
|
|
int distane_from_matching = (rules->distane_from_matching + 1);
|
2021-12-06 18:14:10 +08:00
|
|
|
|
|
|
|
|
if(element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
|
|
|
|
|
{
|
|
|
|
|
xmlNodePtr parent = node->parent;
|
|
|
|
|
k++;
|
|
|
|
|
while (parent != NULL)
|
|
|
|
|
{
|
|
|
|
|
if(k == distane_from_matching)
|
|
|
|
|
{
|
2021-12-08 19:05:44 +08:00
|
|
|
if (rules->scope == kScopeInside && match_start_indicator(parent, start_indicator) == 0)
|
|
|
|
|
{
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-06 18:14:10 +08:00
|
|
|
xmlNewProp(parent, (const xmlChar *)"need_filter", (const xmlChar *)"true");
|
|
|
|
|
*match =1;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
k++;
|
|
|
|
|
parent = parent->parent;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(element_treatment != NULL && !strcasecmp(element_treatment, "remove"))
|
|
|
|
|
{
|
|
|
|
|
xmlNodePtr parent = node->parent;
|
|
|
|
|
k++;
|
|
|
|
|
while (parent != NULL)
|
|
|
|
|
{
|
|
|
|
|
if(k == distane_from_matching)
|
|
|
|
|
{
|
|
|
|
|
if (rules->scope == kScopeInside && match_start_indicator(parent, start_indicator) == 0)
|
|
|
|
|
{
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*This is the top floor, Don't deal with**/
|
|
|
|
|
if(parent->parent == NULL)
|
|
|
|
|
{
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-11 09:57:33 +08:00
|
|
|
parent_array[0] = parent;
|
|
|
|
|
*n_parent = *n_parent+1;
|
2021-12-06 18:14:10 +08:00
|
|
|
*match =1;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
k++;
|
|
|
|
|
parent = parent->parent;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
int cjson_dump_array(cJSON *a, int *depth, int *step, int *step_array_level, char **node, const struct edit_element_rule * rules, int *match_num, int loop)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
2021-12-10 13:32:40 +08:00
|
|
|
int xret=0, array_cnt=0;
|
2021-12-06 18:14:10 +08:00
|
|
|
|
|
|
|
|
cJSON *a_element = a->child;
|
2021-12-10 13:32:40 +08:00
|
|
|
*step= *step + 1;
|
2021-12-06 18:14:10 +08:00
|
|
|
for (; (a_element != NULL);)
|
|
|
|
|
{
|
2021-12-17 16:39:49 +08:00
|
|
|
xret = cjson_element_foreach(a_element, depth, step, step_array_level, node, rules, match_num, loop);
|
2021-12-06 18:14:10 +08:00
|
|
|
if(xret == -1)
|
|
|
|
|
{
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
if(*depth == 0)
|
|
|
|
|
{
|
2022-03-11 10:26:56 +08:00
|
|
|
xret = construct_cjson_by_treatment(a_element, node, step, step_array_level, rules);
|
2021-12-06 18:14:10 +08:00
|
|
|
}
|
2022-03-11 10:26:56 +08:00
|
|
|
if(xret == 1 || xret == -2)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
2021-12-17 16:39:49 +08:00
|
|
|
*step = (*step >= 2047) ? 2047 : *step;
|
|
|
|
|
step_array_level[*step] = array_cnt;
|
2021-12-06 18:14:10 +08:00
|
|
|
*node = a_element->string;
|
|
|
|
|
*depth = *depth -1;
|
2022-03-11 10:26:56 +08:00
|
|
|
return xret;
|
2021-12-06 18:14:10 +08:00
|
|
|
}
|
2021-12-10 13:32:40 +08:00
|
|
|
array_cnt++;
|
2021-12-06 18:14:10 +08:00
|
|
|
a_element = a_element->next;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return xret;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
int cjson_dump_object(cJSON *a, int *depth, int *step, int *step_array_level, char **node, const struct edit_element_rule * rules, int *match_num, int loop)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
|
|
|
|
int xret=0;
|
|
|
|
|
cJSON *a_element=NULL;
|
|
|
|
|
|
|
|
|
|
cJSON_ArrayForEach(a_element, a)
|
|
|
|
|
{
|
2021-12-17 16:39:49 +08:00
|
|
|
xret = cjson_element_foreach(a_element, depth, step, step_array_level, node, rules, match_num, loop);
|
2021-12-06 18:14:10 +08:00
|
|
|
if(xret == -1)
|
|
|
|
|
{
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
if(*depth == 0)
|
|
|
|
|
{
|
2022-03-11 10:26:56 +08:00
|
|
|
xret = construct_cjson_by_treatment(a_element, node, step, step_array_level, rules);
|
2021-12-06 18:14:10 +08:00
|
|
|
}
|
2022-03-11 10:26:56 +08:00
|
|
|
if(xret == 1 || xret == -2)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
|
|
|
|
*node = a_element->string;
|
|
|
|
|
*depth = *depth -1;
|
2022-03-11 10:26:56 +08:00
|
|
|
return xret;
|
2021-12-06 18:14:10 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return xret;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
int cjson_dump_string(cJSON *a, int *depth, const struct edit_element_rule * rules, int *match_num, int loop)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
|
|
|
|
int xret=0;
|
|
|
|
|
|
2022-02-11 09:57:33 +08:00
|
|
|
#if 0
|
|
|
|
|
if((a->valuestring != NULL) && (match_string(a->valuestring, strlen(a->valuestring), rules->contained_keyword) > 0))
|
|
|
|
|
#endif
|
2021-12-06 18:14:10 +08:00
|
|
|
if((a->valuestring != NULL) && strcasestr(a->valuestring, rules->contained_keyword))
|
|
|
|
|
{
|
|
|
|
|
if(*depth != -1)
|
|
|
|
|
{
|
2021-12-10 13:32:40 +08:00
|
|
|
if(!strcasecmp(rules->element_treatment, "mark"))
|
|
|
|
|
{
|
|
|
|
|
if(*match_num == loop)
|
|
|
|
|
{
|
|
|
|
|
xret = 1;
|
|
|
|
|
goto finish;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
xret = 1;
|
|
|
|
|
goto finish;
|
|
|
|
|
|
|
|
|
|
}
|
2021-12-06 18:14:10 +08:00
|
|
|
}
|
|
|
|
|
*match_num = *match_num + 1;
|
|
|
|
|
}
|
|
|
|
|
finish:
|
|
|
|
|
return xret;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
int cjson_element_foreach(cJSON *a, int *depth, int *step, int *step_array_level, char **node, const struct edit_element_rule * rules, int *match_num, int loop)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
|
|
|
|
if ((a == NULL) || cJSON_IsInvalid(a))
|
|
|
|
|
{
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (a->type & 0xFF)
|
|
|
|
|
{
|
|
|
|
|
case cJSON_String:
|
|
|
|
|
case cJSON_Raw:
|
2021-12-10 13:32:40 +08:00
|
|
|
return cjson_dump_string(a, depth, rules, match_num, loop);
|
2021-12-06 18:14:10 +08:00
|
|
|
|
|
|
|
|
case cJSON_Array:
|
2021-12-17 16:39:49 +08:00
|
|
|
return cjson_dump_array(a, depth, step, step_array_level, node, rules, match_num, loop);
|
2021-12-06 18:14:10 +08:00
|
|
|
|
|
|
|
|
case cJSON_Object:
|
2021-12-17 16:39:49 +08:00
|
|
|
return cjson_dump_object(a, depth, step, step_array_level, node, rules, match_num, loop);
|
2021-12-06 18:14:10 +08:00
|
|
|
|
|
|
|
|
case cJSON_Number:
|
|
|
|
|
case cJSON_False:
|
|
|
|
|
case cJSON_True:
|
|
|
|
|
case cJSON_NULL:
|
|
|
|
|
return 0;
|
|
|
|
|
default:
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void html_namespace_list(xmlNsPtr ns)
|
|
|
|
|
{
|
|
|
|
|
while (ns != NULL)
|
|
|
|
|
{
|
|
|
|
|
ns = ns->next;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-11 09:57:33 +08:00
|
|
|
static void html_attr_list(const struct edit_element_rule * rules, xmlAttrPtr attr, xmlNodePtr *parent_array, size_t *n_parent, int *match, size_t mark_tag)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
|
|
|
|
while (attr != NULL)
|
|
|
|
|
{
|
|
|
|
|
if (attr->children != NULL)
|
|
|
|
|
{
|
2022-02-11 09:57:33 +08:00
|
|
|
html_node_list(rules, attr->children, parent_array, n_parent, match, mark_tag);
|
2021-12-06 18:14:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
attr = attr->next;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-11 09:57:33 +08:00
|
|
|
static void html_dump_one_node(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match, size_t mark_tag)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
|
|
|
|
switch (node->type)
|
|
|
|
|
{
|
|
|
|
|
case XML_ELEMENT_NODE:
|
|
|
|
|
case XML_ELEMENT_DECL:
|
|
|
|
|
case XML_CDATA_SECTION_NODE:
|
|
|
|
|
case XML_ENTITY_REF_NODE:
|
|
|
|
|
case XML_ENTITY_NODE:
|
|
|
|
|
case XML_PI_NODE:
|
|
|
|
|
case XML_COMMENT_NODE:
|
|
|
|
|
case XML_DOCUMENT_TYPE_NODE:
|
|
|
|
|
case XML_DOCUMENT_FRAG_NODE:
|
|
|
|
|
case XML_NOTATION_NODE:
|
|
|
|
|
case XML_TEXT_NODE:
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if ((node->type == XML_ELEMENT_NODE) && (node->nsDef != NULL))
|
|
|
|
|
{
|
|
|
|
|
html_namespace_list(node->nsDef);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ((node->type == XML_ELEMENT_NODE) && (node->properties != NULL))
|
|
|
|
|
{
|
2022-02-11 09:57:33 +08:00
|
|
|
html_attr_list(rules, node->properties, parent_array, n_parent, match, mark_tag);
|
2021-12-06 18:14:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (node->type != XML_ENTITY_REF_NODE)
|
|
|
|
|
{
|
|
|
|
|
if ((node->type != XML_ELEMENT_NODE) && (node->content != NULL))
|
|
|
|
|
{
|
|
|
|
|
construct_html_by_treatment(rules, node, parent_array, n_parent, match);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-11 09:57:33 +08:00
|
|
|
static void html_dump_node(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match, size_t mark_tag)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
2022-02-11 09:57:33 +08:00
|
|
|
html_dump_one_node(rules, node, parent_array, n_parent, match, mark_tag);
|
|
|
|
|
if(*match == 1 && mark_tag == 0)
|
|
|
|
|
{
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-06 18:14:10 +08:00
|
|
|
if ((node->type != XML_NAMESPACE_DECL) && (node->children != NULL) && (node->type != XML_ENTITY_REF_NODE))
|
|
|
|
|
{
|
2022-02-11 09:57:33 +08:00
|
|
|
html_node_list(rules, node->children, parent_array, n_parent, match, mark_tag);
|
2021-12-06 18:14:10 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-11 09:57:33 +08:00
|
|
|
static void html_node_list(const struct edit_element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match, size_t mark_tag)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
|
|
|
|
while (node != NULL)
|
|
|
|
|
{
|
2022-02-11 09:57:33 +08:00
|
|
|
html_dump_node(rules, node, parent_array, n_parent, match, mark_tag);
|
|
|
|
|
if(*match == 1 && mark_tag == 0)
|
|
|
|
|
{
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-06 18:14:10 +08:00
|
|
|
node = node->next;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-11 09:57:33 +08:00
|
|
|
static void html_element_foreach(const struct edit_element_rule * rules, xmlDocPtr doc, xmlNodePtr *parent_array, size_t *n_parent, int *match, size_t mark_tag)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
|
|
|
|
if (((doc->type == XML_DOCUMENT_NODE) || (doc->type == XML_HTML_DOCUMENT_NODE)) && (doc->children != NULL))
|
|
|
|
|
{
|
2022-02-11 09:57:33 +08:00
|
|
|
html_node_list(rules, doc->children, parent_array, n_parent, match, mark_tag);
|
2021-12-06 18:14:10 +08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
2021-12-10 13:32:40 +08:00
|
|
|
int match_num_peer=0;
|
2021-12-06 18:14:10 +08:00
|
|
|
int step=0, depth=0, match_num=0,i=0, match=0;
|
|
|
|
|
cJSON* interator=NULL;
|
|
|
|
|
char* new_out = NULL, *node=NULL;
|
|
|
|
|
size_t outlen=0;
|
|
|
|
|
char *element_treatment=NULL;
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
int step_array_level[2048] = {0};
|
2021-12-10 13:32:40 +08:00
|
|
|
|
2021-12-06 18:14:10 +08:00
|
|
|
interator = cJSON_Parse(in);
|
|
|
|
|
if(interator==NULL)
|
|
|
|
|
{
|
|
|
|
|
goto finish;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
depth = -1;
|
|
|
|
|
element_treatment=rules->element_treatment;
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
cjson_element_foreach(interator, &depth, &step, step_array_level, &node, rules, &match_num, 0);
|
2021-12-10 13:32:40 +08:00
|
|
|
match_num_peer = match_num;
|
|
|
|
|
for(i=0; i< match_num_peer; i++)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
2021-12-08 19:05:44 +08:00
|
|
|
depth = (rules->distane_from_matching + 1);
|
2021-12-10 13:32:40 +08:00
|
|
|
step=0; node=NULL; match_num=0;
|
2021-12-17 16:39:49 +08:00
|
|
|
memset(step_array_level, 0, sizeof(step_array_level));
|
2021-12-10 13:32:40 +08:00
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
match |= cjson_element_foreach(interator, &depth, &step, step_array_level, &node, rules, &match_num, i);
|
2021-12-06 18:14:10 +08:00
|
|
|
|
|
|
|
|
if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0)
|
|
|
|
|
{
|
|
|
|
|
cJSON_DeleteItemFromObject(interator, node);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(match==1 && element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
|
|
|
|
|
{
|
2021-12-10 13:32:40 +08:00
|
|
|
if(interator->type==cJSON_Object)
|
|
|
|
|
{
|
|
|
|
|
cJSON_AddBoolToObject(interator, "need_check", true);
|
|
|
|
|
}
|
2021-12-17 16:39:49 +08:00
|
|
|
if(interator->type==cJSON_Array)
|
|
|
|
|
{
|
|
|
|
|
cJSON *child = interator->child;
|
|
|
|
|
for (; (child != NULL);)
|
|
|
|
|
{
|
|
|
|
|
cJSON_AddBoolToObject(child, "need_check", true);
|
|
|
|
|
child = child->next;
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-12-06 18:14:10 +08:00
|
|
|
}
|
|
|
|
|
|
2022-03-11 10:26:56 +08:00
|
|
|
if(match == 0 || match == -2)
|
2022-02-11 09:57:33 +08:00
|
|
|
{
|
|
|
|
|
goto finish;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-06 18:14:10 +08:00
|
|
|
new_out = cJSON_PrintUnformatted(interator);
|
|
|
|
|
if(new_out!=NULL)
|
|
|
|
|
{
|
|
|
|
|
*out = new_out;
|
|
|
|
|
outlen = strlen(new_out);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
finish:
|
|
|
|
|
if(interator != NULL)
|
|
|
|
|
cJSON_Delete(interator);
|
|
|
|
|
return outlen;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
size_t format_json_file_type(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
2021-12-10 13:32:40 +08:00
|
|
|
int match_num_peer=0;
|
2021-12-06 18:14:10 +08:00
|
|
|
int step=0, depth=0, match=0, i=0;
|
|
|
|
|
cJSON* interator=NULL;
|
|
|
|
|
char* new_out = NULL, *node=NULL;
|
|
|
|
|
size_t outlen=0; int match_num=0;
|
|
|
|
|
char *element_treatment=NULL;
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
int step_array_level[2048] = {0};
|
2021-12-10 13:32:40 +08:00
|
|
|
|
2021-12-06 18:14:10 +08:00
|
|
|
char*new_in = ALLOC(char, in_sz+1);
|
|
|
|
|
memcpy(new_in, in, in_sz);
|
|
|
|
|
|
|
|
|
|
interator = cJSON_Parse(new_in);
|
|
|
|
|
if(interator==NULL)
|
|
|
|
|
{
|
|
|
|
|
goto finish;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
depth = -1;
|
|
|
|
|
element_treatment=rules->element_treatment;
|
|
|
|
|
|
2021-12-10 13:32:40 +08:00
|
|
|
/*When the node has inclusion relation, cjson is not null when deleted
|
|
|
|
|
So multiple loops delete **/
|
2021-12-17 16:39:49 +08:00
|
|
|
cjson_element_foreach(interator, &depth, &step, step_array_level, &node, rules, &match_num, 0);
|
2021-12-10 13:32:40 +08:00
|
|
|
match_num_peer = match_num;
|
|
|
|
|
for(i=0; i< match_num_peer; i++)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
2021-12-08 19:05:44 +08:00
|
|
|
depth = (rules->distane_from_matching + 1);
|
2021-12-10 13:32:40 +08:00
|
|
|
step=0; node=NULL; match_num=0;
|
2021-12-17 16:39:49 +08:00
|
|
|
memset(step_array_level, 0, sizeof(step_array_level));
|
2021-12-10 13:32:40 +08:00
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
match |= cjson_element_foreach(interator, &depth, &step, step_array_level, &node, rules, &match_num, i);
|
2021-12-06 18:14:10 +08:00
|
|
|
|
|
|
|
|
if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0)
|
|
|
|
|
{
|
|
|
|
|
cJSON_DeleteItemFromObject(interator, node);
|
|
|
|
|
}
|
2021-12-10 13:32:40 +08:00
|
|
|
|
|
|
|
|
match_num--;
|
2021-12-06 18:14:10 +08:00
|
|
|
}
|
|
|
|
|
|
2022-03-11 10:26:56 +08:00
|
|
|
if(match == 0 || match == -2)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
|
|
|
|
goto finish;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
|
|
|
|
|
{
|
2021-12-10 13:32:40 +08:00
|
|
|
if(interator->type==cJSON_Object)
|
|
|
|
|
{
|
|
|
|
|
cJSON_AddBoolToObject(interator, "need_check", true);
|
|
|
|
|
}
|
2021-12-17 16:39:49 +08:00
|
|
|
|
|
|
|
|
if(interator->type==cJSON_Array)
|
|
|
|
|
{
|
|
|
|
|
cJSON *child = interator->child;
|
|
|
|
|
for (; (child != NULL);)
|
|
|
|
|
{
|
|
|
|
|
cJSON_AddBoolToObject(child, "need_check", true);
|
|
|
|
|
child = child->next;
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-12-06 18:14:10 +08:00
|
|
|
}
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
new_out = cJSON_PrintUnformatted(interator);
|
2021-12-06 18:14:10 +08:00
|
|
|
if(new_out!=NULL)
|
|
|
|
|
{
|
|
|
|
|
*out = new_out;
|
|
|
|
|
outlen = strlen(*out);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
finish:
|
|
|
|
|
if(interator != NULL)
|
|
|
|
|
cJSON_Delete(interator);
|
|
|
|
|
FREE(&new_in);
|
|
|
|
|
return outlen;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
size_t format_multidelete_json_type(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
|
|
|
|
char *new_out=NULL, *pre_out=NULL;
|
|
|
|
|
char * tmp = ALLOC(char, in_sz+1);
|
|
|
|
|
char * token = NULL, * sub_token = NULL, * saveptr = NULL;
|
|
|
|
|
size_t output_size = 0;
|
|
|
|
|
|
|
|
|
|
size_t new_out_len=0;
|
|
|
|
|
/**Follow-up optimization */
|
|
|
|
|
new_out = ALLOC(char, in_sz+in_sz/3);
|
|
|
|
|
|
|
|
|
|
memcpy(tmp, in, in_sz);
|
|
|
|
|
|
|
|
|
|
for (token = tmp;; token = NULL)
|
|
|
|
|
{
|
|
|
|
|
sub_token = strtok_r(token, "\n", &saveptr);
|
|
|
|
|
if (sub_token == NULL)
|
|
|
|
|
{
|
|
|
|
|
new_out[new_out_len-2]='\0';
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
output_size = parse_json_output_unformatted(sub_token, strlen(sub_token), rules, &pre_out);
|
|
|
|
|
if(output_size>0 && pre_out!=NULL)
|
|
|
|
|
{
|
|
|
|
|
memcpy(new_out+new_out_len, pre_out, strlen(pre_out));
|
|
|
|
|
new_out_len += strlen(pre_out);
|
|
|
|
|
memcpy(new_out+new_out_len, "\r\n", 2);
|
|
|
|
|
new_out_len +=2;
|
|
|
|
|
FREE(&pre_out);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(new_out)
|
|
|
|
|
{
|
|
|
|
|
*out = new_out;
|
|
|
|
|
output_size = strlen(new_out);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
free(tmp);
|
|
|
|
|
tmp = NULL;
|
|
|
|
|
return output_size;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t construct_format_html(htmlDocPtr doc, char**out)
|
|
|
|
|
{
|
|
|
|
|
size_t outlen=0;
|
|
|
|
|
xmlBufferPtr out_buffer;
|
|
|
|
|
const xmlChar *xmlCharBuffer;
|
|
|
|
|
xmlSaveCtxtPtr saveCtxtPtr;
|
|
|
|
|
|
|
|
|
|
out_buffer = xmlBufferCreate();
|
|
|
|
|
if (out_buffer == NULL)
|
|
|
|
|
{
|
|
|
|
|
goto finish;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
saveCtxtPtr = xmlSaveToBuffer(out_buffer, "UTF-8", XML_SAVE_NO_DECL | XML_SAVE_AS_HTML);
|
2021-12-06 18:14:10 +08:00
|
|
|
if (xmlSaveDoc(saveCtxtPtr, doc) < 0)
|
|
|
|
|
{
|
|
|
|
|
goto finish;
|
|
|
|
|
}
|
|
|
|
|
xmlSaveClose(saveCtxtPtr);
|
|
|
|
|
|
|
|
|
|
xmlCharBuffer = xmlBufferContent(out_buffer);
|
|
|
|
|
if(xmlCharBuffer != NULL)
|
|
|
|
|
{
|
|
|
|
|
char*new_out = ALLOC(char, strlen((char *)xmlCharBuffer)+1);
|
|
|
|
|
memcpy(new_out, (char *)xmlCharBuffer, strlen((char *)xmlCharBuffer));
|
|
|
|
|
|
|
|
|
|
*out = new_out;
|
|
|
|
|
outlen = strlen((char *)xmlCharBuffer);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
finish:
|
|
|
|
|
if(out_buffer != NULL)
|
|
|
|
|
{
|
|
|
|
|
xmlBufferFree(out_buffer);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return outlen;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
size_t format_input_html(const char * in, size_t in_sz, const struct edit_element_rule * rules, char** out)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
|
|
|
|
size_t outlen=0, n_parent=0, n_parent_peer=0;
|
|
|
|
|
int match=0, i=0;
|
|
|
|
|
htmlDocPtr doc = NULL;
|
|
|
|
|
const char *element_treatment=NULL;
|
|
|
|
|
xmlNodePtr parent_array[16];
|
2021-12-17 16:39:49 +08:00
|
|
|
int options = XML_PARSE_NOERROR | HTML_PARSE_NODEFDTD;
|
2021-12-06 18:14:10 +08:00
|
|
|
|
|
|
|
|
doc = htmlReadMemory(in, in_sz, NULL, NULL, options);
|
|
|
|
|
if (doc == NULL)
|
|
|
|
|
{
|
|
|
|
|
goto finish;
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-11 09:57:33 +08:00
|
|
|
/*Delete all at once, valgrind is invalid read **/
|
|
|
|
|
html_element_foreach(rules, doc, parent_array, &n_parent, &match, 1);
|
2021-12-06 18:14:10 +08:00
|
|
|
if(match != 1)
|
|
|
|
|
{
|
|
|
|
|
goto finish;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
n_parent_peer = n_parent;
|
|
|
|
|
element_treatment=rules->element_treatment;
|
|
|
|
|
|
2022-02-11 09:57:33 +08:00
|
|
|
/*When the node has inclusion relation, libxml2 is not null when deleted
|
|
|
|
|
So multiple loops delete **/
|
2021-12-06 18:14:10 +08:00
|
|
|
if(element_treatment != NULL && !strcasecmp(element_treatment, "remove"))
|
|
|
|
|
{
|
|
|
|
|
for(i=0; i < (int)n_parent_peer; i++)
|
|
|
|
|
{
|
|
|
|
|
match =0; n_parent = 0;
|
2022-02-11 09:57:33 +08:00
|
|
|
html_element_foreach(rules, doc, parent_array, &n_parent, &match, 0);
|
|
|
|
|
if(match == 1 && n_parent > 0)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
|
|
|
|
xmlUnlinkNode(parent_array[0]);
|
|
|
|
|
xmlFreeNode(parent_array[0]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
|
|
|
|
|
{
|
|
|
|
|
if(doc->children != NULL && doc->children->next != NULL)
|
|
|
|
|
{
|
2022-03-11 10:26:56 +08:00
|
|
|
xmlNodePtr node = doc->children->next;
|
|
|
|
|
while(node->type != XML_ELEMENT_NODE)
|
|
|
|
|
{
|
|
|
|
|
node = node->next;
|
|
|
|
|
}
|
|
|
|
|
xmlNewProp(node, (const xmlChar *)"need_check", (const xmlChar *)"true");
|
2021-12-06 18:14:10 +08:00
|
|
|
}
|
2021-12-17 16:39:49 +08:00
|
|
|
else if(doc->children != NULL)
|
|
|
|
|
{
|
|
|
|
|
xmlNewProp(doc->children, (const xmlChar *)"need_check", (const xmlChar *)"true");
|
|
|
|
|
}
|
2021-12-06 18:14:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
outlen = construct_format_html(doc, out);
|
|
|
|
|
if(outlen<=0)
|
|
|
|
|
{
|
|
|
|
|
outlen=0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
finish:
|
|
|
|
|
if(doc!=NULL)
|
|
|
|
|
{
|
|
|
|
|
xmlFreeDoc(doc);
|
|
|
|
|
}
|
|
|
|
|
return outlen;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
size_t format_html_file_type(const char * interator, size_t interator_sz, const struct edit_element_rule *rule, char **new_out)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
|
|
|
|
size_t output_size=0;
|
|
|
|
|
|
2022-02-11 09:57:33 +08:00
|
|
|
if((interator[0] == '{') || (interator[0] == '['))
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
|
|
|
|
output_size = format_multidelete_json_type(interator, interator_sz, rule, new_out);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
output_size = format_input_html(interator, interator_sz, rule, new_out);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return output_size;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
size_t parse_string(const char * interator, size_t interator_sz, const struct edit_element_rule *rule, char **new_out, int options)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
|
|
|
|
size_t output_size=0;
|
|
|
|
|
|
|
|
|
|
if(options)
|
|
|
|
|
{
|
|
|
|
|
output_size = format_json_file_type(interator, interator_sz, rule, new_out);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
output_size = format_html_file_type(interator, interator_sz, rule, new_out);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return output_size;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
size_t execute_edit_element_rule(const char * in, size_t in_sz, const struct edit_element_rule *rules, size_t n_rule, char** out, int options)
|
2021-12-06 18:14:10 +08:00
|
|
|
{
|
2021-12-17 16:39:49 +08:00
|
|
|
const struct edit_element_rule * todo = rules;
|
2021-12-06 18:14:10 +08:00
|
|
|
size_t i = 0, interator_sz=0, pre_out_sz=0;
|
|
|
|
|
const char * interator = NULL;
|
|
|
|
|
char* new_out = NULL, * pre_out = NULL;
|
|
|
|
|
size_t output_size=0;
|
|
|
|
|
if (in_sz == 0 || in==NULL)
|
|
|
|
|
{
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
interator = in;
|
|
|
|
|
interator_sz = in_sz;
|
|
|
|
|
for (i = 0; i < n_rule; i++)
|
|
|
|
|
{
|
|
|
|
|
output_size = parse_string(interator, interator_sz, &(todo[i]), &new_out, options);
|
|
|
|
|
if (output_size == 0)
|
|
|
|
|
{
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (pre_out != NULL)
|
|
|
|
|
{
|
|
|
|
|
free(pre_out);
|
|
|
|
|
pre_out = NULL;
|
|
|
|
|
}
|
|
|
|
|
pre_out = new_out;
|
|
|
|
|
pre_out_sz = output_size;
|
|
|
|
|
|
|
|
|
|
interator = new_out;
|
|
|
|
|
interator_sz = output_size;
|
|
|
|
|
|
|
|
|
|
new_out=NULL;
|
|
|
|
|
output_size=0;
|
|
|
|
|
}
|
|
|
|
|
if(pre_out_sz>0)
|
|
|
|
|
{
|
|
|
|
|
*out=pre_out;
|
|
|
|
|
return pre_out_sz;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-17 16:39:49 +08:00
|
|
|
size_t __attribute__((__unused__))
|
|
|
|
|
format_edit_element_rule(struct edit_element_rule *edit_element, const char *user_region, size_t n_edit_element)
|
|
|
|
|
{
|
|
|
|
|
size_t idx=0;
|
|
|
|
|
cJSON *json=NULL, *rules=NULL, *item=NULL, *sub_item=NULL;
|
|
|
|
|
|
|
|
|
|
json=cJSON_Parse(user_region);
|
|
|
|
|
if(json !=NULL )
|
|
|
|
|
{
|
|
|
|
|
rules = cJSON_GetObjectItem(json, "rules");
|
|
|
|
|
if(rules == NULL)
|
|
|
|
|
{
|
|
|
|
|
goto finish;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
idx = 0;
|
|
|
|
|
for (item = rules->child; item != NULL; item = item->next)
|
|
|
|
|
{
|
|
|
|
|
sub_item=cJSON_GetObjectItem(item,"anchor_element");
|
|
|
|
|
if(sub_item != NULL && sub_item->type ==cJSON_Object)
|
|
|
|
|
{
|
|
|
|
|
char * search_scope = cJSON_GetObjectItem(sub_item , "search_scope")->valuestring;
|
|
|
|
|
if (search_scope == NULL) break;
|
|
|
|
|
|
|
|
|
|
edit_element[idx].scope = scope_name_to_id(search_scope);
|
|
|
|
|
if (edit_element[idx].scope == KScopeMax)
|
|
|
|
|
{
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if(edit_element[idx].scope == kScopeInside)
|
|
|
|
|
{
|
|
|
|
|
edit_element[idx].start_indicator = tfe_strdup(cJSON_GetObjectItem(sub_item , "start_indicator")->valuestring);
|
|
|
|
|
}
|
|
|
|
|
edit_element[idx].contained_keyword = tfe_strdup(cJSON_GetObjectItem(sub_item,"contained_keyword")->valuestring);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sub_item=cJSON_GetObjectItem(item,"target_element");
|
|
|
|
|
if(sub_item != NULL && sub_item->type ==cJSON_Object)
|
|
|
|
|
{
|
|
|
|
|
edit_element[idx].distane_from_matching = cJSON_GetObjectItem(sub_item , "target_distance_from_matching")->valueint;
|
|
|
|
|
edit_element[idx].element_treatment = tfe_strdup(cJSON_GetObjectItem(sub_item,"element_treatment")->valuestring);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (idx == n_edit_element)
|
|
|
|
|
{
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
idx++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
finish:
|
|
|
|
|
if (json) cJSON_Delete(json);
|
|
|
|
|
return idx;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void simple_edit_element(const char *user_region, const char* input, size_t in_sz, char** output, size_t *output_sz, int options)
|
|
|
|
|
{
|
|
|
|
|
size_t n_got_rule=0, i=0;
|
|
|
|
|
struct edit_element_rule rules[16];
|
|
|
|
|
memset(rules, 0, sizeof(struct edit_element_rule)*16);
|
|
|
|
|
|
|
|
|
|
n_got_rule=format_edit_element_rule(rules, user_region, sizeof(rules)/sizeof(rules[0]));
|
|
|
|
|
*output_sz=execute_edit_element_rule(input, strlen(input), rules, n_got_rule, output, options);
|
|
|
|
|
for(i=0; i<n_got_rule; i++)
|
|
|
|
|
{
|
|
|
|
|
if(rules[i].start_indicator!=NULL)
|
|
|
|
|
{
|
|
|
|
|
FREE(&(rules[i].start_indicator));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FREE(&(rules[i].element_treatment));
|
|
|
|
|
FREE(&(rules[i].contained_keyword));
|
|
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|