TSG-8550 Proxy Policy支持对网页中的元素进行编辑 Element Editing
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
add_library(pangu-http src/pangu_logger.cpp src/pangu_http.cpp src/pattern_replace.cpp src/pangu_web_cache.cpp)
|
||||
add_library(pangu-http src/pangu_logger.cpp src/pangu_http.cpp src/pattern_replace.cpp src/pangu_web_cache.cpp src/pangu_element_edit.cpp)
|
||||
target_link_libraries(pangu-http PUBLIC common http tango-cache-client)
|
||||
target_link_libraries(pangu-http PUBLIC rdkafka ctemplate-static cjson pcre2-static ratelimiter-static libdablooms pthread)
|
||||
target_link_libraries(pangu-http PUBLIC maatframe)
|
||||
target_link_libraries(pangu-http PUBLIC libxml2-static z)
|
||||
|
||||
add_executable(test_pattern_replace src/test_pattern_replace.cpp src/pattern_replace.cpp)
|
||||
target_link_libraries(test_pattern_replace common gtest pcre2-static)
|
||||
|
||||
667
plugin/business/pangu-http/src/pangu_element_edit.cpp
Normal file
667
plugin/business/pangu-http/src/pangu_element_edit.cpp
Normal file
@@ -0,0 +1,667 @@
|
||||
#include <tfe_utils.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include <libxml/tree.h>
|
||||
#include <libxml/xmlsave.h>
|
||||
#include <libxml/HTMLparser.h>
|
||||
#include <cjson/cJSON.h>
|
||||
|
||||
#include "pangu_element_edit.h"
|
||||
|
||||
int cjson_element_foreach(cJSON *a, int *depth, int *step, char **node, const struct element_rule * rules, int *match_num);
|
||||
static void html_node_list(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match);
|
||||
|
||||
enum search_scope scope_name_to_id(const char * name)
|
||||
{
|
||||
const char * std_name[] = {"inside_element","whole_file"};
|
||||
size_t i = 0;
|
||||
for (i = 0; i < sizeof(std_name) / sizeof(const char *); i++)
|
||||
{
|
||||
if (0 == strcasecmp(name, std_name[i]))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (enum search_scope) i;
|
||||
}
|
||||
|
||||
int match_start_indicator(xmlNodePtr parent, char * start_indicator)
|
||||
{
|
||||
if(parent->properties == NULL)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct _xmlAttr *properties = parent->properties;
|
||||
if(properties->children == NULL || properties->children->content == NULL)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
xmlNodePtr children = properties->children;
|
||||
if(!strcasecmp((char *)children->content, start_indicator))
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int construct_cjson_by_treatment(cJSON *a_element, char **node, int *step, const struct element_rule * rules)
|
||||
{
|
||||
const char *element_treatment=rules->element_treatment;
|
||||
|
||||
if(element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
|
||||
{
|
||||
cJSON_AddBoolToObject(a_element, "need_filter", true);
|
||||
}
|
||||
|
||||
char * start_indicator = rules->start_indicator;
|
||||
if(element_treatment != NULL && !strcasecmp(element_treatment, "remove"))
|
||||
{
|
||||
if (rules->scope == kScopeInside)
|
||||
{
|
||||
if(a_element->string != NULL && strcasecmp(a_element->string, start_indicator))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if(a_element->type == cJSON_Object && *node != NULL)
|
||||
{
|
||||
cJSON_DeleteItemFromObject(a_element, *node);
|
||||
}
|
||||
if(a_element->type == cJSON_Array)
|
||||
{
|
||||
cJSON_DeleteItemFromArray(a_element, *step);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int construct_html_by_treatment(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
{
|
||||
int k=0;
|
||||
|
||||
if(strcasestr((char *)node->content, rules->contained_keyword) == NULL)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *element_treatment=rules->element_treatment;
|
||||
int distane_from_matching = rules->distane_from_matching;
|
||||
|
||||
if(element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
|
||||
{
|
||||
xmlNodePtr parent = node->parent;
|
||||
k++;
|
||||
while (parent != NULL)
|
||||
{
|
||||
if(k == distane_from_matching)
|
||||
{
|
||||
xmlNewProp(parent, (const xmlChar *)"need_filter", (const xmlChar *)"true");
|
||||
*match =1;
|
||||
break;
|
||||
}
|
||||
k++;
|
||||
parent = parent->parent;
|
||||
}
|
||||
}
|
||||
|
||||
char * start_indicator = rules->start_indicator;
|
||||
if(element_treatment != NULL && !strcasecmp(element_treatment, "remove"))
|
||||
{
|
||||
xmlNodePtr parent = node->parent;
|
||||
k++;
|
||||
while (parent != NULL)
|
||||
{
|
||||
if(k == distane_from_matching)
|
||||
{
|
||||
if (rules->scope == kScopeInside && match_start_indicator(parent, start_indicator) == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
/*This is the top floor, Don't deal with**/
|
||||
if(parent->parent == NULL)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if(*n_parent < 16)
|
||||
{
|
||||
parent_array[*n_parent] = parent;
|
||||
*n_parent = *n_parent+1;
|
||||
}
|
||||
|
||||
*match =1;
|
||||
break;
|
||||
}
|
||||
|
||||
k++;
|
||||
parent = parent->parent;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cjson_dump_array(cJSON *a, int *depth, int *step, char **node, const struct element_rule * rules, int *match_num)
|
||||
{
|
||||
int xret=0;
|
||||
|
||||
cJSON *a_element = a->child;
|
||||
for (; (a_element != NULL);)
|
||||
{
|
||||
xret = cjson_element_foreach(a_element, depth, step, node, rules, match_num);
|
||||
if(xret == -1)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
if(*depth == 0)
|
||||
{
|
||||
construct_cjson_by_treatment(a_element, node, step, rules);
|
||||
}
|
||||
if(xret == 1)
|
||||
{
|
||||
*node = a_element->string;
|
||||
*depth = *depth -1;
|
||||
return 1;
|
||||
}
|
||||
*step= *step + 1;
|
||||
a_element = a_element->next;
|
||||
}
|
||||
|
||||
return xret;
|
||||
}
|
||||
|
||||
int cjson_dump_object(cJSON *a, int *depth, int *step, char **node, const struct element_rule * rules, int *match_num)
|
||||
{
|
||||
int xret=0;
|
||||
cJSON *a_element=NULL;
|
||||
|
||||
cJSON_ArrayForEach(a_element, a)
|
||||
{
|
||||
xret = cjson_element_foreach(a_element, depth, step, node, rules, match_num);
|
||||
if(xret == -1)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
if(*depth == 0)
|
||||
{
|
||||
construct_cjson_by_treatment(a_element, node, step, rules);
|
||||
}
|
||||
if(xret == 1)
|
||||
{
|
||||
*node = a_element->string;
|
||||
*depth = *depth -1;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return xret;
|
||||
}
|
||||
|
||||
int cjson_dump_string(cJSON *a, int *depth, const struct element_rule * rules, int *match_num)
|
||||
{
|
||||
int xret=0;
|
||||
|
||||
if((a->valuestring != NULL) && strcasestr(a->valuestring, rules->contained_keyword))
|
||||
{
|
||||
if(*depth != -1)
|
||||
{
|
||||
xret = 1;
|
||||
goto finish;
|
||||
}
|
||||
*match_num = *match_num + 1;
|
||||
}
|
||||
finish:
|
||||
return xret;
|
||||
}
|
||||
|
||||
int cjson_element_foreach(cJSON *a, int *depth, int *step, char **node, const struct element_rule * rules, int *match_num)
|
||||
{
|
||||
if ((a == NULL) || cJSON_IsInvalid(a))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
switch (a->type & 0xFF)
|
||||
{
|
||||
case cJSON_String:
|
||||
case cJSON_Raw:
|
||||
return cjson_dump_string(a, depth, rules, match_num);
|
||||
|
||||
case cJSON_Array:
|
||||
return cjson_dump_array(a, depth, step, node, rules, match_num);
|
||||
|
||||
case cJSON_Object:
|
||||
return cjson_dump_object(a, depth, step, node, rules, match_num);
|
||||
|
||||
case cJSON_Number:
|
||||
case cJSON_False:
|
||||
case cJSON_True:
|
||||
case cJSON_NULL:
|
||||
return 0;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void html_namespace_list(xmlNsPtr ns)
|
||||
{
|
||||
while (ns != NULL)
|
||||
{
|
||||
ns = ns->next;
|
||||
}
|
||||
}
|
||||
|
||||
static void html_attr_list(const struct element_rule * rules, xmlAttrPtr attr, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
{
|
||||
while (attr != NULL)
|
||||
{
|
||||
if (attr->children != NULL)
|
||||
{
|
||||
html_node_list(rules, attr->children, parent_array, n_parent, match);
|
||||
}
|
||||
|
||||
attr = attr->next;
|
||||
}
|
||||
}
|
||||
|
||||
static void html_dump_one_node(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
{
|
||||
switch (node->type)
|
||||
{
|
||||
case XML_ELEMENT_NODE:
|
||||
case XML_ELEMENT_DECL:
|
||||
case XML_CDATA_SECTION_NODE:
|
||||
case XML_ENTITY_REF_NODE:
|
||||
case XML_ENTITY_NODE:
|
||||
case XML_PI_NODE:
|
||||
case XML_COMMENT_NODE:
|
||||
case XML_DOCUMENT_TYPE_NODE:
|
||||
case XML_DOCUMENT_FRAG_NODE:
|
||||
case XML_NOTATION_NODE:
|
||||
case XML_TEXT_NODE:
|
||||
break;
|
||||
|
||||
default:
|
||||
return;
|
||||
}
|
||||
if ((node->type == XML_ELEMENT_NODE) && (node->nsDef != NULL))
|
||||
{
|
||||
html_namespace_list(node->nsDef);
|
||||
}
|
||||
|
||||
if ((node->type == XML_ELEMENT_NODE) && (node->properties != NULL))
|
||||
{
|
||||
html_attr_list(rules, node->properties, parent_array, n_parent, match);
|
||||
}
|
||||
|
||||
if (node->type != XML_ENTITY_REF_NODE)
|
||||
{
|
||||
if ((node->type != XML_ELEMENT_NODE) && (node->content != NULL))
|
||||
{
|
||||
construct_html_by_treatment(rules, node, parent_array, n_parent, match);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void html_dump_node(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
{
|
||||
html_dump_one_node(rules, node, parent_array, n_parent, match);
|
||||
if ((node->type != XML_NAMESPACE_DECL) && (node->children != NULL) && (node->type != XML_ENTITY_REF_NODE))
|
||||
{
|
||||
html_node_list(rules, node->children, parent_array, n_parent, match);
|
||||
}
|
||||
}
|
||||
|
||||
static void html_node_list(const struct element_rule * rules, xmlNodePtr node, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
{
|
||||
while (node != NULL)
|
||||
{
|
||||
html_dump_node(rules, node, parent_array, n_parent, match);
|
||||
node = node->next;
|
||||
}
|
||||
}
|
||||
|
||||
static void html_element_foreach(const struct element_rule * rules, xmlDocPtr doc, xmlNodePtr *parent_array, size_t *n_parent, int *match)
|
||||
{
|
||||
if (((doc->type == XML_DOCUMENT_NODE) || (doc->type == XML_HTML_DOCUMENT_NODE)) && (doc->children != NULL))
|
||||
{
|
||||
html_node_list(rules, doc->children, parent_array, n_parent, match);
|
||||
}
|
||||
}
|
||||
|
||||
size_t parse_json_output_unformatted(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
|
||||
{
|
||||
int step=0, depth=0, match_num=0,i=0, match=0;
|
||||
cJSON* interator=NULL;
|
||||
char* new_out = NULL, *node=NULL;
|
||||
size_t outlen=0;
|
||||
char *element_treatment=NULL;
|
||||
|
||||
interator = cJSON_Parse(in);
|
||||
if(interator==NULL)
|
||||
{
|
||||
goto finish;
|
||||
}
|
||||
|
||||
depth = -1;
|
||||
element_treatment=rules->element_treatment;
|
||||
|
||||
cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num);
|
||||
for(i=0; i< match_num; i++)
|
||||
{
|
||||
depth = rules->distane_from_matching;
|
||||
step=0; node=NULL;
|
||||
match |= cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num);
|
||||
|
||||
if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0)
|
||||
{
|
||||
cJSON_DeleteItemFromObject(interator, node);
|
||||
}
|
||||
}
|
||||
|
||||
if(match==1 && element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
|
||||
{
|
||||
cJSON_AddBoolToObject(interator, "need_check", true);
|
||||
}
|
||||
|
||||
new_out = cJSON_PrintUnformatted(interator);
|
||||
if(new_out!=NULL)
|
||||
{
|
||||
*out = new_out;
|
||||
outlen = strlen(new_out);
|
||||
}
|
||||
|
||||
finish:
|
||||
if(interator != NULL)
|
||||
cJSON_Delete(interator);
|
||||
return outlen;
|
||||
}
|
||||
|
||||
size_t format_json_file_type(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
|
||||
{
|
||||
int step=0, depth=0, match=0, i=0;
|
||||
cJSON* interator=NULL;
|
||||
char* new_out = NULL, *node=NULL;
|
||||
size_t outlen=0; int match_num=0;
|
||||
char *element_treatment=NULL;
|
||||
|
||||
char*new_in = ALLOC(char, in_sz+1);
|
||||
memcpy(new_in, in, in_sz);
|
||||
|
||||
interator = cJSON_Parse(new_in);
|
||||
if(interator==NULL)
|
||||
{
|
||||
goto finish;
|
||||
}
|
||||
|
||||
depth = -1;
|
||||
element_treatment=rules->element_treatment;
|
||||
|
||||
cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num);
|
||||
for(i=0; i< match_num; i++)
|
||||
{
|
||||
depth = rules->distane_from_matching;
|
||||
step=0; node=NULL;
|
||||
match |= cjson_element_foreach(interator, &depth, &step, &node, rules, &match_num);
|
||||
|
||||
if(!strcasecmp(element_treatment, "remove") && match == 1 && node != NULL && depth == 0)
|
||||
{
|
||||
cJSON_DeleteItemFromObject(interator, node);
|
||||
}
|
||||
}
|
||||
|
||||
if(match == 0)
|
||||
{
|
||||
goto finish;
|
||||
}
|
||||
|
||||
if(element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
|
||||
{
|
||||
cJSON_AddBoolToObject(interator, "need_check", true);
|
||||
}
|
||||
|
||||
new_out = cJSON_Print(interator);
|
||||
if(new_out!=NULL)
|
||||
{
|
||||
*out = new_out;
|
||||
outlen = strlen(*out);
|
||||
}
|
||||
|
||||
finish:
|
||||
if(interator != NULL)
|
||||
cJSON_Delete(interator);
|
||||
FREE(&new_in);
|
||||
return outlen;
|
||||
}
|
||||
|
||||
size_t format_multidelete_json_type(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
|
||||
{
|
||||
char *new_out=NULL, *pre_out=NULL;
|
||||
char * tmp = ALLOC(char, in_sz+1);
|
||||
char * token = NULL, * sub_token = NULL, * saveptr = NULL;
|
||||
size_t output_size = 0;
|
||||
|
||||
size_t new_out_len=0;
|
||||
/**Follow-up optimization */
|
||||
new_out = ALLOC(char, in_sz+in_sz/3);
|
||||
|
||||
memcpy(tmp, in, in_sz);
|
||||
|
||||
for (token = tmp;; token = NULL)
|
||||
{
|
||||
sub_token = strtok_r(token, "\n", &saveptr);
|
||||
if (sub_token == NULL)
|
||||
{
|
||||
new_out[new_out_len-2]='\0';
|
||||
break;
|
||||
}
|
||||
output_size = parse_json_output_unformatted(sub_token, strlen(sub_token), rules, &pre_out);
|
||||
if(output_size>0 && pre_out!=NULL)
|
||||
{
|
||||
memcpy(new_out+new_out_len, pre_out, strlen(pre_out));
|
||||
new_out_len += strlen(pre_out);
|
||||
memcpy(new_out+new_out_len, "\r\n", 2);
|
||||
new_out_len +=2;
|
||||
FREE(&pre_out);
|
||||
}
|
||||
}
|
||||
|
||||
if(new_out)
|
||||
{
|
||||
*out = new_out;
|
||||
output_size = strlen(new_out);
|
||||
}
|
||||
|
||||
free(tmp);
|
||||
tmp = NULL;
|
||||
return output_size;
|
||||
}
|
||||
|
||||
size_t construct_format_html(htmlDocPtr doc, char**out)
|
||||
{
|
||||
size_t outlen=0;
|
||||
xmlBufferPtr out_buffer;
|
||||
const xmlChar *xmlCharBuffer;
|
||||
xmlSaveCtxtPtr saveCtxtPtr;
|
||||
|
||||
out_buffer = xmlBufferCreate();
|
||||
if (out_buffer == NULL)
|
||||
{
|
||||
goto finish;
|
||||
}
|
||||
|
||||
saveCtxtPtr = xmlSaveToBuffer(out_buffer, "UTF-8", XML_SAVE_NO_DECL);
|
||||
if (xmlSaveDoc(saveCtxtPtr, doc) < 0)
|
||||
{
|
||||
goto finish;
|
||||
}
|
||||
xmlSaveClose(saveCtxtPtr);
|
||||
|
||||
xmlCharBuffer = xmlBufferContent(out_buffer);
|
||||
if(xmlCharBuffer != NULL)
|
||||
{
|
||||
char*new_out = ALLOC(char, strlen((char *)xmlCharBuffer)+1);
|
||||
memcpy(new_out, (char *)xmlCharBuffer, strlen((char *)xmlCharBuffer));
|
||||
|
||||
*out = new_out;
|
||||
outlen = strlen((char *)xmlCharBuffer);
|
||||
}
|
||||
|
||||
finish:
|
||||
if(out_buffer != NULL)
|
||||
{
|
||||
xmlBufferFree(out_buffer);
|
||||
}
|
||||
|
||||
return outlen;
|
||||
}
|
||||
|
||||
size_t format_input_html(const char * in, size_t in_sz, const struct element_rule * rules, char** out)
|
||||
{
|
||||
size_t outlen=0, n_parent=0, n_parent_peer=0;
|
||||
int match=0, i=0;
|
||||
htmlDocPtr doc = NULL;
|
||||
const char *element_treatment=NULL;
|
||||
xmlNodePtr parent_array[16];
|
||||
|
||||
int options = XML_PARSE_NOERROR;
|
||||
|
||||
doc = htmlReadMemory(in, in_sz, NULL, NULL, options);
|
||||
if (doc == NULL)
|
||||
{
|
||||
goto finish;
|
||||
}
|
||||
|
||||
html_element_foreach(rules, doc, parent_array, &n_parent, &match);
|
||||
if(match != 1)
|
||||
{
|
||||
goto finish;
|
||||
}
|
||||
|
||||
n_parent_peer = n_parent;
|
||||
element_treatment=rules->element_treatment;
|
||||
|
||||
if(element_treatment != NULL && !strcasecmp(element_treatment, "remove"))
|
||||
{
|
||||
for(i=0; i < (int)n_parent_peer; i++)
|
||||
{
|
||||
match =0; n_parent = 0;
|
||||
html_element_foreach(rules, doc, parent_array, &n_parent, &match);
|
||||
if(match == 1)
|
||||
{
|
||||
xmlUnlinkNode(parent_array[0]);
|
||||
xmlFreeNode(parent_array[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(element_treatment != NULL && !strcasecmp(element_treatment, "mark"))
|
||||
{
|
||||
if(doc->children != NULL && doc->children->next != NULL)
|
||||
{
|
||||
xmlNewProp(doc->children->next, (const xmlChar *)"need_check", (const xmlChar *)"true");
|
||||
}
|
||||
}
|
||||
|
||||
outlen = construct_format_html(doc, out);
|
||||
if(outlen<=0)
|
||||
{
|
||||
outlen=0;
|
||||
}
|
||||
|
||||
finish:
|
||||
if(doc!=NULL)
|
||||
{
|
||||
xmlFreeDoc(doc);
|
||||
}
|
||||
return outlen;
|
||||
}
|
||||
|
||||
size_t format_html_file_type(const char * interator, size_t interator_sz, const struct element_rule *rule, char **new_out)
|
||||
{
|
||||
size_t output_size=0;
|
||||
|
||||
if(interator[0] == '{')
|
||||
{
|
||||
output_size = format_multidelete_json_type(interator, interator_sz, rule, new_out);
|
||||
}
|
||||
else
|
||||
{
|
||||
output_size = format_input_html(interator, interator_sz, rule, new_out);
|
||||
}
|
||||
|
||||
return output_size;
|
||||
}
|
||||
|
||||
size_t parse_string(const char * interator, size_t interator_sz, const struct element_rule *rule, char **new_out, int options)
|
||||
{
|
||||
size_t output_size=0;
|
||||
|
||||
if(options)
|
||||
{
|
||||
output_size = format_json_file_type(interator, interator_sz, rule, new_out);
|
||||
}
|
||||
else
|
||||
{
|
||||
output_size = format_html_file_type(interator, interator_sz, rule, new_out);
|
||||
}
|
||||
|
||||
return output_size;
|
||||
}
|
||||
|
||||
size_t execute_edit_element_rule(const char * in, size_t in_sz, const struct element_rule *rules, size_t n_rule, char** out, int options)
|
||||
{
|
||||
const struct element_rule * todo = rules;
|
||||
size_t i = 0, interator_sz=0, pre_out_sz=0;
|
||||
const char * interator = NULL;
|
||||
char* new_out = NULL, * pre_out = NULL;
|
||||
size_t output_size=0;
|
||||
if (in_sz == 0 || in==NULL)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
interator = in;
|
||||
interator_sz = in_sz;
|
||||
for (i = 0; i < n_rule; i++)
|
||||
{
|
||||
output_size = parse_string(interator, interator_sz, &(todo[i]), &new_out, options);
|
||||
if (output_size == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if (pre_out != NULL)
|
||||
{
|
||||
free(pre_out);
|
||||
pre_out = NULL;
|
||||
}
|
||||
pre_out = new_out;
|
||||
pre_out_sz = output_size;
|
||||
|
||||
interator = new_out;
|
||||
interator_sz = output_size;
|
||||
|
||||
new_out=NULL;
|
||||
output_size=0;
|
||||
}
|
||||
if(pre_out_sz>0)
|
||||
{
|
||||
*out=pre_out;
|
||||
return pre_out_sz;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
22
plugin/business/pangu-http/src/pangu_element_edit.h
Normal file
22
plugin/business/pangu-http/src/pangu_element_edit.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#pragma once
|
||||
#include <stddef.h>
|
||||
|
||||
enum search_scope
|
||||
{
|
||||
kScopeInside = 0,
|
||||
kScopeWhole,
|
||||
KScopeMax
|
||||
};
|
||||
|
||||
struct element_rule
|
||||
{
|
||||
enum search_scope scope;
|
||||
int distane_from_matching;
|
||||
char * start_indicator;
|
||||
char *element_treatment;
|
||||
char * contained_keyword;
|
||||
};
|
||||
|
||||
size_t execute_edit_element_rule(const char * in, size_t in_sz, const struct element_rule *rules, size_t n_rule, char** out, int options);
|
||||
enum search_scope scope_name_to_id(const char * name);
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#include "pangu_logger.h"
|
||||
#include "pangu_element_edit.h"
|
||||
#include "pattern_replace.h"
|
||||
#include "pangu_web_cache.h"
|
||||
|
||||
@@ -53,6 +54,7 @@ enum manipulate_action
|
||||
MA_ACTION_REPLACE,
|
||||
MA_ACTION_HIJACK,
|
||||
MA_ACTION_INSERT,
|
||||
MA_ACTION_ELEMENT,
|
||||
__MA_ACTION_MAX
|
||||
};
|
||||
|
||||
@@ -120,7 +122,11 @@ struct policy_action_param
|
||||
int status_code;
|
||||
|
||||
size_t n_rule;
|
||||
struct replace_rule *rule;
|
||||
struct replace_rule *repl_rule;
|
||||
|
||||
size_t e_rule;
|
||||
struct element_rule *elem_rule;
|
||||
|
||||
pthread_mutex_t lock;
|
||||
};
|
||||
|
||||
@@ -308,6 +314,7 @@ static enum manipulate_action manipulate_action_str2idx(const char *action_str)
|
||||
clue_action_map[MA_ACTION_REPLACE]= "replace";
|
||||
clue_action_map[MA_ACTION_HIJACK]= "hijack";
|
||||
clue_action_map[MA_ACTION_INSERT]= "insert";
|
||||
clue_action_map[MA_ACTION_ELEMENT] = "edit_element";
|
||||
|
||||
size_t i = 0;
|
||||
|
||||
@@ -334,7 +341,7 @@ void policy_action_param_new(int idx, const struct Maat_rule_t* rule, const char
|
||||
return;
|
||||
}
|
||||
int rule_id;
|
||||
cJSON *json=NULL, *rules=NULL, *item=NULL;
|
||||
cJSON *json=NULL, *rules=NULL, *item=NULL, *sub_item=NULL;
|
||||
json=cJSON_Parse(srv_def_large);
|
||||
if(json==NULL)
|
||||
{
|
||||
@@ -419,19 +426,19 @@ void policy_action_param_new(int idx, const struct Maat_rule_t* rule, const char
|
||||
break;
|
||||
}
|
||||
rule_id = 0;
|
||||
param->rule = ALLOC(struct replace_rule, MAX_EDIT_ZONE_NUM);
|
||||
param->repl_rule = ALLOC(struct replace_rule, MAX_EDIT_ZONE_NUM);
|
||||
for (item = rules->child; item != NULL; item = item->next)
|
||||
{
|
||||
char * search = cJSON_GetObjectItem(item , "search_in")->valuestring;
|
||||
if (search == NULL) break;
|
||||
|
||||
param->rule[rule_id].zone = zone_name_to_id(search);
|
||||
if (param->rule[rule_id].zone == kZoneMax)
|
||||
param->repl_rule[rule_id].zone = zone_name_to_id(search);
|
||||
if (param->repl_rule[rule_id].zone == kZoneMax)
|
||||
{
|
||||
break;
|
||||
}
|
||||
param->rule[rule_id].find = tfe_strdup(cJSON_GetObjectItem(item , "find")->valuestring);
|
||||
param->rule[rule_id].replace_with = tfe_strdup(cJSON_GetObjectItem(item , "replace_with")->valuestring);
|
||||
param->repl_rule[rule_id].find = tfe_strdup(cJSON_GetObjectItem(item , "find")->valuestring);
|
||||
param->repl_rule[rule_id].replace_with = tfe_strdup(cJSON_GetObjectItem(item , "replace_with")->valuestring);
|
||||
rule_id++;
|
||||
}
|
||||
param->n_rule = rule_id;
|
||||
@@ -470,6 +477,44 @@ void policy_action_param_new(int idx, const struct Maat_rule_t* rule, const char
|
||||
param->enforcement_ratio = 1;
|
||||
}
|
||||
break;
|
||||
case MA_ACTION_ELEMENT:
|
||||
rules = cJSON_GetObjectItem(json, "rules");
|
||||
if(rules == NULL)
|
||||
{
|
||||
break;
|
||||
}
|
||||
rule_id = 0;
|
||||
param->elem_rule = ALLOC(struct element_rule, MAX_EDIT_ZONE_NUM);
|
||||
for (item = rules->child; item != NULL; item = item->next)
|
||||
{
|
||||
sub_item=cJSON_GetObjectItem(item,"anchor_element");
|
||||
if(sub_item != NULL && sub_item->type ==cJSON_Object)
|
||||
{
|
||||
char * search_scope = cJSON_GetObjectItem(sub_item , "search_scope")->valuestring;
|
||||
if (search_scope == NULL) break;
|
||||
|
||||
param->elem_rule[rule_id].scope = scope_name_to_id(search_scope);
|
||||
if (param->elem_rule[rule_id].scope == KScopeMax)
|
||||
{
|
||||
break;
|
||||
}
|
||||
if(param->elem_rule[rule_id].scope == kScopeInside)
|
||||
{
|
||||
param->elem_rule[rule_id].start_indicator = tfe_strdup(cJSON_GetObjectItem(sub_item , "start_indicator")->valuestring);
|
||||
}
|
||||
param->elem_rule[rule_id].contained_keyword = tfe_strdup(cJSON_GetObjectItem(sub_item,"contained_keyword")->valuestring);
|
||||
}
|
||||
|
||||
sub_item=cJSON_GetObjectItem(item,"target_element");
|
||||
if(sub_item != NULL && sub_item->type ==cJSON_Object)
|
||||
{
|
||||
param->elem_rule[rule_id].distane_from_matching = cJSON_GetObjectItem(sub_item , "target_distance_from_matching")->valueint;
|
||||
param->elem_rule[rule_id].element_treatment = tfe_strdup(cJSON_GetObjectItem(sub_item,"element_treatment")->valuestring);
|
||||
}
|
||||
rule_id++;
|
||||
}
|
||||
param->e_rule = rule_id;
|
||||
break;
|
||||
default: assert(0);
|
||||
break;
|
||||
}
|
||||
@@ -497,10 +542,19 @@ void policy_action_param_free_cb(int table_id, const struct Maat_rule_t* rule, c
|
||||
}
|
||||
pthread_mutex_unlock(&(param->lock));
|
||||
pthread_mutex_destroy(&(param->lock));
|
||||
for(i=0; i<param->e_rule; i++)
|
||||
{
|
||||
if(param->elem_rule[i].start_indicator!=NULL)
|
||||
{
|
||||
FREE(&(param->elem_rule[i].start_indicator));
|
||||
}
|
||||
FREE(&(param->elem_rule[i].element_treatment));
|
||||
FREE(&(param->elem_rule[i].contained_keyword));
|
||||
}
|
||||
for(i=0; i<param->n_rule; i++)
|
||||
{
|
||||
FREE(&(param->rule[i].find));
|
||||
FREE(&(param->rule[i].replace_with));
|
||||
FREE(&(param->repl_rule[i].find));
|
||||
FREE(&(param->repl_rule[i].replace_with));
|
||||
}
|
||||
|
||||
if (param->message)
|
||||
@@ -933,6 +987,15 @@ struct insert_ctx
|
||||
int actually_inserted;
|
||||
};
|
||||
|
||||
struct edit_element_ctx
|
||||
{
|
||||
struct element_rule *item;
|
||||
size_t n_item;
|
||||
struct tfe_http_half * editing;
|
||||
struct evbuffer *http_body;
|
||||
int actually_edited;
|
||||
};
|
||||
|
||||
struct ip_data_ctx
|
||||
{
|
||||
char *asn_client;
|
||||
@@ -960,6 +1023,7 @@ struct pangu_http_ctx
|
||||
int manipulate_replaced;
|
||||
struct replace_ctx * rep_ctx;
|
||||
struct insert_ctx * ins_ctx;
|
||||
struct edit_element_ctx * edit_ctx;
|
||||
struct ip_data_ctx ip_ctx;
|
||||
|
||||
int (* resumed_cb)(const struct tfe_stream * stream,
|
||||
@@ -1007,6 +1071,17 @@ void http_ins_ctx_free(struct insert_ctx* ins_ctx)
|
||||
return;
|
||||
}
|
||||
|
||||
void http_element_ctx_free(struct edit_element_ctx *edit_ctx)
|
||||
{
|
||||
if (edit_ctx->http_body)
|
||||
{
|
||||
evbuffer_free(edit_ctx->http_body);
|
||||
edit_ctx->http_body = NULL;
|
||||
}
|
||||
FREE(&edit_ctx);
|
||||
return;
|
||||
}
|
||||
|
||||
void http_ip_ctx_free(struct ip_data_ctx *ip_ctx)
|
||||
{
|
||||
if(ip_ctx->asn_client)
|
||||
@@ -1042,6 +1117,11 @@ static void pangu_http_ctx_free(struct pangu_http_ctx * ctx)
|
||||
http_ins_ctx_free(ctx->ins_ctx);
|
||||
ctx->ins_ctx = NULL;
|
||||
}
|
||||
if(ctx->edit_ctx)
|
||||
{
|
||||
http_element_ctx_free(ctx->edit_ctx);
|
||||
ctx->edit_ctx = NULL;
|
||||
}
|
||||
|
||||
http_ip_ctx_free(&ctx->ip_ctx);
|
||||
ctx->manipulate_replaced=0;
|
||||
@@ -1351,7 +1431,7 @@ void http_replace(const struct tfe_stream * stream, const struct tfe_http_sessio
|
||||
{
|
||||
struct policy_action_param *param = ctx->param;
|
||||
ctx->rep_ctx = rep_ctx = ALLOC(struct replace_ctx, 1);
|
||||
rep_ctx->rule = param->rule;
|
||||
rep_ctx->rule = param->repl_rule;
|
||||
rep_ctx->n_rule = param->n_rule;
|
||||
}
|
||||
else
|
||||
@@ -1447,8 +1527,7 @@ void http_replace(const struct tfe_stream * stream, const struct tfe_http_sessio
|
||||
{
|
||||
options = 1;
|
||||
}
|
||||
rewrite_sz = execute_replace_rule(__http_body, __http_body_len, r_zone,
|
||||
rep_ctx->rule, rep_ctx->n_rule, &rewrite_buff, options);
|
||||
rewrite_sz = execute_replace_rule(__http_body, __http_body_len, r_zone, rep_ctx->rule, rep_ctx->n_rule, &rewrite_buff, options);
|
||||
|
||||
if (rewrite_sz >0 )
|
||||
{
|
||||
@@ -1878,7 +1957,7 @@ static void http_insert(const struct tfe_stream * stream, const struct tfe_http_
|
||||
}
|
||||
else
|
||||
{
|
||||
TFE_STREAM_LOG_INFO(stream, "Can only setup replace on REQ/RESP headers, detached.");
|
||||
TFE_STREAM_LOG_INFO(stream, "Can only setup insert on REQ/RESP headers, detached.");
|
||||
ctx->action = PG_ACTION_NONE;
|
||||
tfe_http_session_detach(session); return;
|
||||
}
|
||||
@@ -1974,6 +2053,124 @@ static void http_insert(const struct tfe_stream * stream, const struct tfe_http_
|
||||
return;
|
||||
}
|
||||
|
||||
void http_element(const struct tfe_stream * stream, const struct tfe_http_session * session, enum tfe_http_event events,
|
||||
const unsigned char * body_frag, size_t frag_size, struct pangu_http_ctx * ctx)
|
||||
{
|
||||
struct tfe_http_session * to_write_sess = NULL;
|
||||
char * rewrite_buff = NULL;
|
||||
size_t rewrite_sz = 0;
|
||||
|
||||
if (tfe_http_in_request(events))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
to_write_sess = tfe_http_session_allow_write(session);
|
||||
if (to_write_sess == NULL)
|
||||
{
|
||||
TFE_STREAM_LOG_INFO(stream, "tfe_http_session_allow_write() %s failed.", session->req->req_spec.uri);
|
||||
ctx->action = PG_ACTION_NONE;
|
||||
tfe_http_session_detach(session); return;
|
||||
}
|
||||
|
||||
struct edit_element_ctx * edit_ctx = ctx->edit_ctx;
|
||||
if (ctx->edit_ctx == NULL)
|
||||
{
|
||||
if (events & EV_HTTP_RESP_HDR)
|
||||
{
|
||||
struct policy_action_param *param = ctx->param;
|
||||
ctx->edit_ctx = edit_ctx = ALLOC(struct edit_element_ctx, 1);
|
||||
edit_ctx->item = param->elem_rule;
|
||||
edit_ctx->n_item = param->e_rule;
|
||||
}
|
||||
else
|
||||
{
|
||||
TFE_STREAM_LOG_INFO(stream, "Can only setup editing on RESP headers, detached.");
|
||||
ctx->action = PG_ACTION_NONE;
|
||||
tfe_http_session_detach(session); return;
|
||||
}
|
||||
}
|
||||
|
||||
struct tfe_http_half * in_resp_half = session->resp;
|
||||
struct tfe_http_resp_spec * in_resp_spec = &in_resp_half->resp_spec;
|
||||
|
||||
if (events & EV_HTTP_RESP_HDR)
|
||||
{
|
||||
edit_ctx->editing= tfe_http_session_response_create(to_write_sess, in_resp_spec->resp_code);
|
||||
tfe_http_session_response_set(to_write_sess, edit_ctx->editing);
|
||||
struct tfe_http_half * in_half = in_resp_half;
|
||||
|
||||
struct http_field_name in_header_field{};
|
||||
const char * in_header_value = NULL;
|
||||
void * iterator = NULL;
|
||||
|
||||
while (true)
|
||||
{
|
||||
if ((in_header_value = tfe_http_field_iterate(in_half, &iterator, &in_header_field)) == NULL)
|
||||
{
|
||||
break;
|
||||
}
|
||||
tfe_http_field_write(edit_ctx->editing, &in_header_field, in_header_value);
|
||||
}
|
||||
}
|
||||
|
||||
if (events & EV_HTTP_RESP_BODY_BEGIN)
|
||||
{
|
||||
assert(edit_ctx->http_body == NULL);
|
||||
edit_ctx->http_body = evbuffer_new();
|
||||
}
|
||||
|
||||
if (events & EV_HTTP_RESP_BODY_CONT)
|
||||
{
|
||||
evbuffer_add(edit_ctx->http_body, body_frag, frag_size);
|
||||
}
|
||||
|
||||
if (events & EV_HTTP_RESP_BODY_END)
|
||||
{
|
||||
char * __http_body = (char *) evbuffer_pullup(edit_ctx->http_body, -1);
|
||||
size_t __http_body_len = evbuffer_get_length(edit_ctx->http_body);
|
||||
|
||||
rewrite_buff = NULL;
|
||||
rewrite_sz = 0;
|
||||
|
||||
if(in_resp_spec->content_type != NULL && strcasestr(in_resp_spec->content_type, "text/html"))
|
||||
{
|
||||
rewrite_sz = execute_edit_element_rule(__http_body, __http_body_len, edit_ctx->item, edit_ctx->n_item, &rewrite_buff, 0);
|
||||
}
|
||||
if(in_resp_spec->content_type != NULL && strcasestr(in_resp_spec->content_type, "json"))
|
||||
{
|
||||
rewrite_sz = execute_edit_element_rule(__http_body, __http_body_len, edit_ctx->item, edit_ctx->n_item, &rewrite_buff, 1);
|
||||
}
|
||||
|
||||
if (rewrite_sz >0 )
|
||||
{
|
||||
tfe_http_half_append_body(edit_ctx->editing, rewrite_buff, rewrite_sz, 0);
|
||||
edit_ctx->actually_edited=1;
|
||||
}
|
||||
else
|
||||
{
|
||||
tfe_http_half_append_body(edit_ctx->editing, __http_body, __http_body_len, 0);
|
||||
}
|
||||
|
||||
if (rewrite_buff != NULL)
|
||||
{
|
||||
FREE(&rewrite_buff);
|
||||
}
|
||||
|
||||
if (edit_ctx->http_body != NULL)
|
||||
{
|
||||
evbuffer_free(edit_ctx->http_body);
|
||||
edit_ctx->http_body = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (events & EV_HTTP_RESP_END)
|
||||
{
|
||||
tfe_http_half_append_body(edit_ctx->editing, NULL, 0, 0);
|
||||
edit_ctx->editing = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void http_manipulate(const struct tfe_stream * stream, const struct tfe_http_session * session,
|
||||
enum tfe_http_event events, const unsigned char * body_frag, size_t frag_size, struct pangu_http_ctx * ctx)
|
||||
{
|
||||
@@ -2003,6 +2200,9 @@ static void http_manipulate(const struct tfe_stream * stream, const struct tfe_h
|
||||
case MA_ACTION_INSERT:
|
||||
http_insert(stream, session, events, body_frag, frag_size, ctx);
|
||||
break;
|
||||
case MA_ACTION_ELEMENT:
|
||||
http_element(stream, session, events, body_frag, frag_size, ctx);
|
||||
break;
|
||||
default: assert(0);
|
||||
break;
|
||||
}
|
||||
@@ -2512,6 +2712,21 @@ static inline int ctx_actually_inserted(struct pangu_http_ctx * ctx)
|
||||
}
|
||||
}
|
||||
|
||||
static inline int ctx_actually_edited(struct pangu_http_ctx * ctx)
|
||||
{
|
||||
|
||||
if(ctx->action == PG_ACTION_MANIPULATE &&
|
||||
ctx->param->action == MA_ACTION_ELEMENT && ctx->edit_ctx != NULL &&
|
||||
ctx->edit_ctx->actually_edited==1)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int ctx_actually_manipulate(struct pangu_http_ctx * ctx)
|
||||
{
|
||||
if(ctx->action == PG_ACTION_MANIPULATE &&
|
||||
@@ -2570,12 +2785,9 @@ void pangu_on_http_end(const struct tfe_stream * stream,
|
||||
}
|
||||
|
||||
if(ctx->action != PG_ACTION_NONE &&
|
||||
(((ctx_actually_replaced(ctx)) ||
|
||||
(ctx_actually_inserted(ctx)) ||
|
||||
(ctx_actually_manipulate(ctx))) ||
|
||||
(ctx->action == PG_ACTION_MONIT ||
|
||||
ctx->action == PG_ACTION_REJECT ||
|
||||
ctx->action == PG_ACTION_WHITELIST)))
|
||||
(((ctx_actually_replaced(ctx)) || (ctx_actually_inserted(ctx)) || (ctx_actually_edited(ctx)) ||
|
||||
(ctx_actually_manipulate(ctx))) || (ctx->action == PG_ACTION_MONIT ||
|
||||
ctx->action == PG_ACTION_REJECT || ctx->action == PG_ACTION_WHITELIST)))
|
||||
{
|
||||
ret=pangu_send_log(g_pangu_rt->send_logger, &log_msg);
|
||||
ATOMIC_ADD(&(g_pangu_rt->stat_val[STAT_LOG_NUM]), ret);
|
||||
|
||||
@@ -122,7 +122,7 @@ int pangu_send_log(struct pangu_logger* handle, const struct pangu_log* log_msg)
|
||||
|
||||
const char *app_proto[]= {"unkonw","http1", "http2"};
|
||||
|
||||
const char *manipulate_action_map[]= {"redirect","block","replace","hijack","insert"};
|
||||
const char *manipulate_action_map[]= {"redirect","block","replace","hijack","insert","element_edit"};
|
||||
|
||||
const char *panggu_action_map[__LG_ACTION_MAX];
|
||||
panggu_action_map[LG_ACTION_MONIT]="monitor";
|
||||
|
||||
Reference in New Issue
Block a user