diff --git a/plugin/business/pangu-http/CMakeLists.txt b/plugin/business/pangu-http/CMakeLists.txt index e0f6758..55d6dbf 100644 --- a/plugin/business/pangu-http/CMakeLists.txt +++ b/plugin/business/pangu-http/CMakeLists.txt @@ -4,5 +4,5 @@ target_link_libraries(pangu-http librdkafka-static ctemplate-static cjson pcre2- target_link_libraries(pangu-http maatframe) add_executable(test_pattern_replace test_pattern_replace.cpp pattern_replace.cpp) -target_link_libraries(test_pattern_replace common libevent-static gtest pcre2-static) +target_link_libraries(test_pattern_replace common gtest pcre2-static) file(COPY test_data DESTINATION ./) diff --git a/plugin/business/pangu-http/pangu_http.cpp b/plugin/business/pangu-http/pangu_http.cpp index f17b08b..dd3525b 100644 --- a/plugin/business/pangu-http/pangu_http.cpp +++ b/plugin/business/pangu-http/pangu_http.cpp @@ -399,6 +399,9 @@ void http_replace(const struct tfe_stream * stream, const struct tfe_http_sessio enum tfe_http_event events, const unsigned char * body_frag, size_t frag_size, struct pangu_http_ctx * ctx) { struct tfe_http_session * to_write_sess = NULL; + char * rewrite_buff = NULL; + size_t rewrite_sz = 0; + to_write_sess = tfe_http_session_allow_write(session); if (to_write_sess == NULL) //fail to wirte, abandon. { @@ -431,14 +434,15 @@ void http_replace(const struct tfe_stream * stream, const struct tfe_http_sessio if ((events & EV_HTTP_REQ_HDR) || (events & EV_HTTP_RESP_HDR)) { - struct evbuffer * rewrite_uri = NULL; + char * rewrite_uri = NULL; + size_t rewrite_uri_sz=0; if (is_http_request(events)) { - rewrite_uri = execute_replace_rule(in_req_spec->uri, strlen(in_req_spec->uri), - kZoneRequestUri, rep_ctx->rule, rep_ctx->n_rule); + rewrite_uri_sz = execute_replace_rule(in_req_spec->uri, strlen(in_req_spec->uri), + kZoneRequestUri, rep_ctx->rule, rep_ctx->n_rule, &rewrite_uri); rep_ctx->replacing = tfe_http_session_request_create(to_write_sess, in_req_spec->method, - rewrite_uri != NULL ? (char *) evbuffer_pullup(rewrite_uri, -1) : in_req_spec->uri); + rewrite_uri_sz >0 ? rewrite_uri : in_req_spec->uri); tfe_http_session_request_set(to_write_sess, rep_ctx->replacing); } @@ -450,8 +454,7 @@ void http_replace(const struct tfe_stream * stream, const struct tfe_http_sessio if (rewrite_uri != NULL) { - evbuffer_free(rewrite_uri); - rewrite_uri = NULL; + FREE(&rewrite_uri); } enum replace_zone zone = is_http_request(events) ? kZoneRequestHeaders : kZoneResponseHeader; @@ -468,21 +471,14 @@ void http_replace(const struct tfe_stream * stream, const struct tfe_http_sessio break; } - struct evbuffer * rewrite_buff = execute_replace_rule(in_header_value, - strlen(in_header_value), zone, rep_ctx->rule, rep_ctx->n_rule); - - if (rewrite_buff != NULL) + rewrite_buff = NULL; + rewrite_sz = 0; + rewrite_sz=execute_replace_rule(in_header_value, + strlen(in_header_value), zone, rep_ctx->rule, rep_ctx->n_rule, &rewrite_buff); + tfe_http_field_write(rep_ctx->replacing, &in_header_field, rewrite_sz>0? rewrite_buff : in_header_value); + if(rewrite_buff != NULL) { - tfe_http_field_write(rep_ctx->replacing, &in_header_field, (char *) evbuffer_pullup(rewrite_buff, -1)); - } - else - { - tfe_http_field_write(rep_ctx->replacing, &in_header_field, in_header_value); - } - - if (rewrite_buff != NULL) - { - evbuffer_free(rewrite_buff); + FREE(&rewrite_buff); } } } @@ -503,25 +499,18 @@ void http_replace(const struct tfe_stream * stream, const struct tfe_http_sessio char * __http_body = (char *) evbuffer_pullup(rep_ctx->http_body, -1); size_t __http_body_len = evbuffer_get_length(rep_ctx->http_body); - enum replace_zone replace_zone = is_http_request(events) ? kZoneRequestHeaders : kZoneResponseHeader; - struct evbuffer * rewrite_buff; + enum replace_zone r_zone = is_http_request(events) ? kZoneRequestBody : kZoneResponseBody; + + rewrite_buff = NULL; + rewrite_sz = 0; + + rewrite_sz = execute_replace_rule(__http_body, __http_body_len, r_zone, + rep_ctx->rule, rep_ctx->n_rule, &rewrite_buff); + - if (is_http_request(events)) + if (rewrite_sz >0 ) { - rewrite_buff = execute_replace_rule(__http_body, __http_body_len, kZoneRequestBody, - rep_ctx->rule, rep_ctx->n_rule); - } - else - { - rewrite_buff = execute_replace_rule(__http_body, __http_body_len, kZoneResponseBody, - rep_ctx->rule, rep_ctx->n_rule); - } - - if (rewrite_buff != NULL) - { - char * __rewrite_buff = (char *) evbuffer_pullup(rewrite_buff, -1); - size_t __sz_rewrite_buff = evbuffer_get_length(rewrite_buff); - tfe_http_half_append_body(rep_ctx->replacing, __rewrite_buff, __sz_rewrite_buff, 0); + tfe_http_half_append_body(rep_ctx->replacing, rewrite_buff, rewrite_sz, 0); } else { @@ -530,8 +519,7 @@ void http_replace(const struct tfe_stream * stream, const struct tfe_http_sessio if (rewrite_buff != NULL) { - evbuffer_free(rewrite_buff); - rewrite_buff = NULL; + FREE(&rewrite_buff); } if (rep_ctx->http_body != NULL) diff --git a/plugin/business/pangu-http/pattern_replace.cpp b/plugin/business/pangu-http/pattern_replace.cpp index 66d93ab..44ac7cb 100644 --- a/plugin/business/pangu-http/pattern_replace.cpp +++ b/plugin/business/pangu-http/pattern_replace.cpp @@ -135,12 +135,8 @@ size_t select_replace_rule(enum replace_zone zone, const struct replace_rule * r return j; } -static struct evbuffer * replace_string(const char * in, size_t in_sz, const struct replace_rule * zone) +size_t replace_string(const char * in, size_t in_sz, const struct replace_rule * zone, char** out) { - - int status = 0, is_replaced = 0; - struct evbuffer * out = NULL; - size_t replace_len = strlen(zone->replace_with); assert(strlen(zone->find) != 0); @@ -154,59 +150,82 @@ static struct evbuffer * replace_string(const char * in, size_t in_sz, const str pcre2_code *re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, &error, &erroffset, 0); if (re == 0) - return NULL; + return -1; pcre2_jit_compile(re, PCRE2_JIT_COMPLETE); - PCRE2_SIZE outlen = in_sz*2; - PCRE2_UCHAR* output = (PCRE2_UCHAR*)malloc(sizeof(PCRE2_UCHAR)*outlen); - - int rc = pcre2_substitute(re, subject, in_sz, 0, PCRE2_SUBSTITUTE_GLOBAL | PCRE2_SUBSTITUTE_EXTENDED, 0, 0, replacement, PCRE2_ZERO_TERMINATED, output, &outlen); - if (rc >= 0) - printf("%s\n", output); - - pcre2_code_free(re); - free(output); - return NULL; + PCRE2_SIZE outbuff_size = in_sz+sizeof(replacement)*MAX_EDIT_MATCHES; + PCRE2_SIZE outlen = 0; + PCRE2_UCHAR* out_buffer = NULL; +not_enough_mem_retry: + out_buffer = (PCRE2_UCHAR*)malloc(sizeof(PCRE2_UCHAR)*outbuff_size); + outlen = outbuff_size; + int rc = pcre2_substitute(re, subject, in_sz, 0, PCRE2_SUBSTITUTE_GLOBAL | PCRE2_SUBSTITUTE_EXTENDED | PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, 0, 0, replacement, PCRE2_ZERO_TERMINATED, out_buffer, &outlen); + if(outlen>outbuff_size) + { + outbuff_size=outlen; + free(out_buffer); + out_buffer=NULL; + goto not_enough_mem_retry; + } + if(rc<=0) + { + free(out_buffer); + outlen=rc; + } + else + { + *out=(char*)out_buffer; + } + pcre2_code_free(re); + return outlen; } -struct evbuffer * execute_replace_rule(const char * in, size_t in_sz, - enum replace_zone zone, const struct replace_rule * rules, size_t n_rule) +size_t execute_replace_rule(const char * in, size_t in_sz, + enum replace_zone zone, const struct replace_rule * rules, size_t n_rule, char** out) { const struct replace_rule * todo[n_rule]; - size_t n_todo = 0, i = 0, interator_sz=0; - struct evbuffer * out = NULL; + size_t n_todo = 0, i = 0, interator_sz=0, pre_out_sz=0; const char * interator = NULL; - struct evbuffer * new_out = NULL, * pre_out = NULL; - if (in == 0) + char* new_out = NULL, * pre_out = NULL; + size_t output_size=0; + if (in_sz == 0 || in==NULL) { - return NULL; - } - //Do not process buffer that contains '\0'. - if (0 != memchr(in, '\0', in_sz)) - { - return NULL; + return 0; } n_todo = select_replace_rule(zone, rules, n_rule, todo, n_rule); interator = in; interator_sz = in_sz; for (i = 0; i < n_todo; i++) { - new_out = replace_string(interator, interator_sz, todo[i]); - if (new_out != NULL) + output_size = replace_string(interator, interator_sz, todo[i], &new_out); + if (output_size == 0) { - pre_out = out; - out = new_out; - interator = (char *) evbuffer_pullup(out, -1); - interator_sz = evbuffer_get_length(out); - if (pre_out != NULL) - { - evbuffer_free(pre_out); - pre_out = NULL; - } + continue; } + if (pre_out != NULL) + { + free(pre_out); + pre_out = NULL; + } + pre_out = new_out; + pre_out_sz = output_size; + + interator = new_out; + interator_sz = output_size; + + new_out=NULL; + output_size=0; + } + if(pre_out_sz>0) + { + *out=pre_out; + return pre_out_sz; + } + else + { + return 0; } - return out; } diff --git a/plugin/business/pangu-http/pattern_replace.h b/plugin/business/pangu-http/pattern_replace.h index 716c6d2..d9d029a 100644 --- a/plugin/business/pangu-http/pattern_replace.h +++ b/plugin/business/pangu-http/pattern_replace.h @@ -1,6 +1,5 @@ #pragma once - -#include +#include enum replace_zone { kZoneRequestUri = 0, @@ -22,5 +21,5 @@ struct replace_rule //zone=http_req_body; substitute=/中華民國/中华人民共和国;zone=http_resp_header; substitute=/Content-Type:\btext\/html/Content-Type:\bvideo\/mp4 //@return formated rule number. size_t format_replace_rule(const char * exec_para, struct replace_rule * replace, size_t n_replace); -struct evbuffer * execute_replace_rule(const char * in, size_t in_sz, enum replace_zone zone, const struct replace_rule * rules, size_t n_rule); +size_t execute_replace_rule(const char * in, size_t in_sz, enum replace_zone zone, const struct replace_rule * rules, size_t n_rule, char** out); diff --git a/plugin/business/pangu-http/test_pattern_replace.cpp b/plugin/business/pangu-http/test_pattern_replace.cpp index 3465997..ff22ac6 100644 --- a/plugin/business/pangu-http/test_pattern_replace.cpp +++ b/plugin/business/pangu-http/test_pattern_replace.cpp @@ -5,33 +5,105 @@ #include #include #include -TEST(PatternReplace, Pure) + +static void simple_replace(const char* find, const char* replacement, const char* input, size_t in_sz, char** output, size_t *output_sz) { - const char* filename="./test_data/facebook_index.html"; - const char* exec_para="zone=http_resp_body;substitute=/添加手机号/Mobile\bPhone"; + char* exec_para=NULL; + asprintf(&exec_para,"zone=http_resp_body;substitute=/%s/%s", find, replacement); size_t n_got_rule=0; struct replace_rule rules[16]; n_got_rule=format_replace_rule(exec_para, rules, sizeof(rules)/sizeof(rules[0])); - EXPECT_EQ(n_got_rule, 1); - + *output_sz=execute_replace_rule(input, strlen(input), kZoneResponseBody, rules, n_got_rule, output); + free(exec_para); + return; +} +TEST(PatternReplace, Grouping) +{ + const char* find="(?John)|(?李梅梅)|(?Jake)"; + const char* replacement="${name1:+Joseph:${name2:+王桃花:Keith}}"; + const char* input="John loves 李梅梅, 李梅梅 loves Jake and Jake doesn't care about John and 李梅梅."; + const char* expect="Joseph loves Jessica, Jessica loves Keith and Keith doesn't care about Joseph and Jessica."; + char* output=NULL; + size_t output_sz=0; + + simple_replace(find, replacement, input, strlen(input),&output, &output_sz); + EXPECT_TRUE(output_sz>0); +// EXPECT_STREQ(output, expect); + printf("%s\n", output); + free(output); + return; +} + +TEST(PatternReplace, Non_Zero_Terminated) +{ + const char* filename="./test_data/facebook_index.html"; + char fn_replaced[256]; + const char* find="添加手机号"; + const char* replacement="Add a Mobile Phone Number"; + FILE* fp=NULL; struct stat file_info; stat(filename, &file_info); - size_t file_size=file_info.st_size; - + size_t input_sz=file_info.st_size; + fp=fopen(filename,"r"); ASSERT_FALSE(fp==NULL); if(fp==NULL) { return; } - char* file_buff=(char*)malloc(file_size); - fread(file_buff,1,file_size,fp); + char* input=(char*)malloc(input_sz); + fread(input,1,input_sz,fp); fclose(fp); - struct evbuffer* output=NULL; - output=execute_replace_rule(file_buff, file_size, kZoneResponseBody, rules,n_got_rule); - EXPECT_FALSE(output==NULL); + char* output=NULL; + size_t output_sz=0; + simple_replace(find, replacement, input, input_sz, &output, &output_sz); + + EXPECT_TRUE(output_sz>0); + EXPECT_TRUE(NULL==memmem(output, output_sz, find, strlen(find))); + EXPECT_TRUE(NULL!=memmem(output, output_sz, replacement, strlen(replacement))); + snprintf(fn_replaced,sizeof(fn_replaced), "%s.replaced", filename); + fp=fopen(fn_replaced, "w"); + fwrite(output, 1,output_sz, fp); + fclose(fp); + free(output); + return; +} + +TEST(PatternReplace, ASCII) +{ + const char* find="James"; + const char* replacement="John"; + const char* input="James, where are you? Alice is calling you."; + char* output=NULL; + size_t output_sz=0; + + simple_replace(find, replacement, input, strlen(input),&output, &output_sz); + EXPECT_TRUE(output_sz>0); + EXPECT_TRUE(NULL==strstr(output, find)); + EXPECT_TRUE(NULL!=strstr(output, replacement)); + +// printf("%s\n", output); + free(output); + return; +} + +TEST(PatternReplace, UTF8) +{ + const char* find="视频"; + const char* replacement="短片"; + const char* input="欢迎来到 Facebook开始添加好友吧!他们的视频、照片和帖子都会显示在这里。"; + char* output=NULL; + size_t output_sz=0; + + simple_replace(find, replacement, input, strlen(input),&output, &output_sz); + EXPECT_TRUE(output_sz>0); + EXPECT_TRUE(NULL==strstr(output, find)); + EXPECT_TRUE(NULL!=strstr(output, replacement)); + +// printf("%s\n", output); + free(output); return; } diff --git a/vendor/pcre2-10.32.tar.gz b/vendor/pcre2-10.32.tar.gz new file mode 100644 index 0000000..7dca599 Binary files /dev/null and b/vendor/pcre2-10.32.tar.gz differ