使用pcre2替代glib从中的regex进行替换。
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
#include "pangu_logger.h"
|
||||
#include "pattern_replace.h"
|
||||
|
||||
#include <tfe_stream.h>
|
||||
#include <tfe_utils.h>
|
||||
@@ -19,11 +20,10 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <regex.h>
|
||||
|
||||
|
||||
#define MAX_SCAN_RESULT 16
|
||||
#define MAX_EDIT_ZONE_NUM 64
|
||||
#define MAX_EDIT_MATCHES 16
|
||||
|
||||
enum pangu_action//Bigger action number is prior.
|
||||
{
|
||||
@@ -221,21 +221,7 @@ static void _wrap_non_std_field_write(struct tfe_http_half * half, const char* f
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
enum replace_zone
|
||||
{
|
||||
kZoneRequestUri = 0,
|
||||
kZoneRequestHeaders,
|
||||
kZoneRequestBody,
|
||||
kZoneResponseHeader,
|
||||
kZoneResponseBody,
|
||||
kZoneMax
|
||||
};
|
||||
struct replace_rule
|
||||
{
|
||||
enum replace_zone zone;
|
||||
char * find;
|
||||
char * replace_with;
|
||||
};
|
||||
|
||||
struct replace_ctx
|
||||
{
|
||||
struct replace_rule * rule;
|
||||
@@ -365,10 +351,10 @@ static enum pangu_action decide_ctrl_action(const struct Maat_rule_t * hit_rules
|
||||
|
||||
return prior_action;
|
||||
}
|
||||
//https://github.com/AndiDittrich/HttpErrorPages
|
||||
//HTML template is downloaded from https://github.com/AndiDittrich/HttpErrorPages
|
||||
static void html_generate(int cfg_id, int status_code, char ** page_buff, size_t * page_size)
|
||||
{
|
||||
ctemplate::TemplateDictionary dict("pg_page_dict");
|
||||
ctemplate::TemplateDictionary dict("pg_page_dict"); //dict is automatically finalized after function returned.
|
||||
dict.SetIntValue("cfg_id", cfg_id);
|
||||
std::string output;
|
||||
ctemplate::Template * tpl = NULL;
|
||||
@@ -385,7 +371,6 @@ static void html_generate(int cfg_id, int status_code, char ** page_buff, size_t
|
||||
}
|
||||
|
||||
tpl->Expand(&output, &dict);
|
||||
//todo: do I need to delete dict?
|
||||
*page_size = output.length();
|
||||
*page_buff = ALLOC(char, *page_size);
|
||||
memcpy(*page_buff, output.c_str(), *page_size);
|
||||
@@ -409,244 +394,6 @@ static int is_http_request(enum tfe_http_event events)
|
||||
}
|
||||
}
|
||||
|
||||
enum replace_zone zone_name_to_id(const char * name)
|
||||
{
|
||||
const char * std_name[] = {"http_req_uri",
|
||||
"http_req_header",
|
||||
"http_req_body",
|
||||
"http_resp_header",
|
||||
"http_resp_body",
|
||||
"http_resp_body"};
|
||||
size_t i = 0;
|
||||
for (i = 0; i < sizeof(std_name) / sizeof(const char *); i++)
|
||||
{
|
||||
if (0 == strcasecmp(name, std_name[i]))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (enum replace_zone) i;
|
||||
}
|
||||
static char * strchr_esc(char * s, const char delim)
|
||||
{
|
||||
char * token;
|
||||
if (s == NULL)
|
||||
return NULL;
|
||||
for (token = s; *token != '\0'; token++)
|
||||
{
|
||||
if (*token == '\\')
|
||||
{
|
||||
token++;
|
||||
continue;
|
||||
}
|
||||
if (*token == delim)
|
||||
break;
|
||||
}
|
||||
if (*token == '\0')
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
return token;
|
||||
}
|
||||
}
|
||||
static char * strtok_r_esc(char * s, const char delim, char ** save_ptr)
|
||||
{
|
||||
char * token;
|
||||
|
||||
if (s == NULL) s = *save_ptr;
|
||||
|
||||
/* Scan leading delimiters. */
|
||||
token = strchr_esc(s, delim);
|
||||
if (token == NULL)
|
||||
{
|
||||
*save_ptr = token;
|
||||
return s;
|
||||
}
|
||||
/* Find the end of the token. */
|
||||
*token = '\0';
|
||||
token++;
|
||||
*save_ptr = token;
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
size_t format_replace_rule(const char * exec_para, struct replace_rule * replace, size_t n_replace)
|
||||
{
|
||||
char * tmp = ALLOC(char, strlen(exec_para) + 1);
|
||||
char * token = NULL, * sub_token = NULL, * saveptr = NULL, * saveptr2 = NULL;
|
||||
size_t idx = 0;
|
||||
|
||||
const char * str_zone = "zone=";
|
||||
const char * str_subs = "substitute=";
|
||||
memcpy(tmp, exec_para, strlen(exec_para));
|
||||
|
||||
for (token = tmp;; token = NULL)
|
||||
{
|
||||
sub_token = strtok_r(token, ";", &saveptr);
|
||||
if (sub_token == NULL) break;
|
||||
|
||||
if (0 == strncasecmp(sub_token, str_zone, strlen(str_zone)))
|
||||
{
|
||||
replace[idx].zone = zone_name_to_id(sub_token + strlen(str_zone));
|
||||
if (replace[idx].zone == kZoneMax)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
sub_token = strtok_r(NULL, ";", &saveptr);
|
||||
if (0 == strncasecmp(sub_token, str_subs, strlen(str_subs)))
|
||||
{
|
||||
sub_token += strlen(str_subs) + 1;
|
||||
replace[idx].find = tfe_strdup(strtok_r_esc(sub_token, '/', &saveptr2));
|
||||
replace[idx].replace_with = tfe_strdup(strtok_r_esc(NULL, '/', &saveptr2));
|
||||
|
||||
idx++;
|
||||
if (idx == n_replace)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(tmp);
|
||||
tmp = NULL;
|
||||
return idx;
|
||||
}
|
||||
|
||||
size_t select_replace_rule(enum replace_zone zone, const struct replace_rule * replace, size_t n_replace,
|
||||
const struct replace_rule ** selected, size_t n_selected)
|
||||
{
|
||||
size_t i = 0, j = 0;
|
||||
for (i = 0; i < n_replace && j < n_selected; i++)
|
||||
{
|
||||
if (replace[i].zone == zone)
|
||||
{
|
||||
selected[j] = replace + i;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
return j;
|
||||
}
|
||||
|
||||
static struct evbuffer * replace_string(const char * in, const struct replace_rule * zone)
|
||||
{
|
||||
//Reference to https://www.lemoda.net/c/unix-regex/
|
||||
// Regular Expression test: https://regex101.com/
|
||||
regex_t reg;
|
||||
int status = 0, is_replaced = 0;
|
||||
struct evbuffer * out = NULL;
|
||||
size_t in_sz = strlen(in);
|
||||
|
||||
size_t replace_len = strlen(zone->replace_with);
|
||||
|
||||
assert(strlen(zone->find) != 0);
|
||||
status = regcomp(®, zone->find, REG_EXTENDED | REG_NEWLINE);
|
||||
if (status != 0)
|
||||
{
|
||||
char error_message[TFE_STRING_MAX];
|
||||
regerror(status, ®, error_message, sizeof(error_message));
|
||||
TFE_LOG_ERROR(g_pangu_rt->local_logger, "Regex error compiling '%s': %s\n",
|
||||
zone->find, error_message);
|
||||
regfree(®);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* "p" is a pointer into the string which points to the end of the previous match. */
|
||||
const char * p = in;
|
||||
/* "pre_sub_expr_end" is a pointer into the string which points to the end of the previous sub expression match. */
|
||||
const char * pre_sub_expr_end = NULL;
|
||||
|
||||
/* "N_matches" is the maximum number of matches allowed. */
|
||||
const int n_matches = MAX_EDIT_MATCHES;
|
||||
/* "M" contains the matches found. */
|
||||
regmatch_t m[n_matches];
|
||||
int i = 0;
|
||||
|
||||
while (1)
|
||||
{
|
||||
int nomatch = regexec(®, p, n_matches, m, 0);
|
||||
if (nomatch)
|
||||
{
|
||||
break;
|
||||
}
|
||||
if (is_replaced == 0)
|
||||
{
|
||||
out = evbuffer_new();
|
||||
is_replaced = 1;
|
||||
}
|
||||
assert(m[0].rm_so != -1);
|
||||
pre_sub_expr_end = p;
|
||||
if (m[1].rm_so == -1)//no sub expr, replace the entire expr.
|
||||
{
|
||||
evbuffer_add(out, pre_sub_expr_end, m[0].rm_so - (pre_sub_expr_end - p));
|
||||
evbuffer_add(out, zone->replace_with, replace_len);
|
||||
pre_sub_expr_end = p + m[0].rm_eo;
|
||||
}
|
||||
else //have sub expr, replace the sub expr.
|
||||
{
|
||||
for (i = 1, pre_sub_expr_end = p; i < n_matches; i++)
|
||||
{
|
||||
if (m[i].rm_so == -1)
|
||||
{
|
||||
break;
|
||||
}
|
||||
evbuffer_add(out, pre_sub_expr_end, m[i].rm_so - (pre_sub_expr_end - p));
|
||||
evbuffer_add(out, zone->replace_with, replace_len);
|
||||
pre_sub_expr_end = p + m[i].rm_eo;
|
||||
}
|
||||
}
|
||||
p += m[0].rm_eo;
|
||||
}
|
||||
|
||||
if (is_replaced)
|
||||
{
|
||||
evbuffer_add(out, pre_sub_expr_end, in_sz - (pre_sub_expr_end - p));
|
||||
}
|
||||
|
||||
regfree(®);
|
||||
return out;
|
||||
}
|
||||
|
||||
struct evbuffer * execute_replace_rule(const char * in, size_t in_sz,
|
||||
enum replace_zone zone, const struct replace_rule * rules, size_t n_rule)
|
||||
{
|
||||
const struct replace_rule * todo[MAX_EDIT_ZONE_NUM];
|
||||
size_t n_todo = 0, i = 0;
|
||||
struct evbuffer * out = NULL;
|
||||
const char * interator = NULL;
|
||||
struct evbuffer * new_out = NULL, * pre_out = NULL;
|
||||
if (in == 0)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
//Do not process buffer that contains '\0'.
|
||||
if (0 != memchr(in, '\0', in_sz))
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
n_todo = select_replace_rule(zone, rules, n_rule, todo, MAX_EDIT_ZONE_NUM);
|
||||
interator = in;
|
||||
for (i = 0; i < n_todo; i++)
|
||||
{
|
||||
new_out = replace_string(interator, todo[i]);
|
||||
if (new_out != NULL)
|
||||
{
|
||||
pre_out = out;
|
||||
out = new_out;
|
||||
interator = (char *) evbuffer_pullup(out, -1);
|
||||
|
||||
if (pre_out != NULL)
|
||||
{
|
||||
evbuffer_free(pre_out);
|
||||
pre_out = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
void http_replace(const struct tfe_stream * stream, const struct tfe_http_session * session,
|
||||
enum tfe_http_event events, const unsigned char * body_frag, size_t frag_size, struct pangu_http_ctx * ctx)
|
||||
|
||||
Reference in New Issue
Block a user