From 044d512184002cfaeaf12bd568e33d76beb99172 Mon Sep 17 00:00:00 2001 From: zhengchao Date: Mon, 3 Jun 2019 13:00:27 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dpcre2=5Fcompile=E4=B8=AD?= =?UTF-8?q?=E5=8F=82=E6=95=B0=E9=A1=BA=E5=BA=8F=E9=94=99=E8=AF=AF=EF=BC=8C?= =?UTF-8?q?=E6=94=AF=E6=8C=81utf8=E7=9A=84=E6=9B=BF=E6=8D=A2=EF=BC=8C?= =?UTF-8?q?=E4=BF=84=E8=AF=AD=E4=B8=8D=E5=8C=BA=E5=88=86=E5=A4=A7=E5=B0=8F?= =?UTF-8?q?=E5=86=99=E6=B5=8B=E8=AF=95=E9=80=9A=E8=BF=87=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pangu-http/src/pattern_replace.cpp | 14 ++++-- .../pangu-http/src/test_pattern_replace.cpp | 48 ++++++++++++++++++- 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/plugin/business/pangu-http/src/pattern_replace.cpp b/plugin/business/pangu-http/src/pattern_replace.cpp index 95e3fed..667bae3 100644 --- a/plugin/business/pangu-http/src/pattern_replace.cpp +++ b/plugin/business/pangu-http/src/pattern_replace.cpp @@ -192,15 +192,15 @@ size_t replace_string(const char * in, size_t in_sz, const struct replace_rule * { assert(strlen(zone->find) != 0); - int error; - PCRE2_SIZE erroffset; + int error=0; + PCRE2_SIZE erroffset=0; const PCRE2_SPTR pattern = (PCRE2_SPTR)zone->find; const PCRE2_SPTR subject = (PCRE2_SPTR)in; const PCRE2_SPTR replacement = (PCRE2_SPTR)zone->replace_with; - pcre2_code *re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, &error, &erroffset, 0); - if (re == 0) + pcre2_code *re = pcre2_compile(pattern, strlen(zone->find), PCRE2_UTF, &error, &erroffset, 0); + if (!re) return -1; pcre2_jit_compile(re, PCRE2_JIT_COMPLETE); @@ -211,7 +211,11 @@ size_t replace_string(const char * in, size_t in_sz, const struct replace_rule * not_enough_mem_retry: out_buffer = (PCRE2_UCHAR*)malloc(sizeof(PCRE2_UCHAR)*outbuff_size); outlen = outbuff_size; - int rc = pcre2_substitute(re, subject, in_sz, 0, PCRE2_SUBSTITUTE_GLOBAL | PCRE2_SUBSTITUTE_EXTENDED | PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, 0, 0, replacement, PCRE2_ZERO_TERMINATED, out_buffer, &outlen); + int rc = pcre2_substitute(re, subject, in_sz, 0, + PCRE2_SUBSTITUTE_GLOBAL | PCRE2_SUBSTITUTE_EXTENDED | PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, + 0, 0, //pcre2_match_data *match_data, pcre2_match_context + replacement, strlen(zone->replace_with), + out_buffer, &outlen); if(outlen>outbuff_size) { outbuff_size=outlen; diff --git a/plugin/business/pangu-http/src/test_pattern_replace.cpp b/plugin/business/pangu-http/src/test_pattern_replace.cpp index 0672703..5017ee5 100644 --- a/plugin/business/pangu-http/src/test_pattern_replace.cpp +++ b/plugin/business/pangu-http/src/test_pattern_replace.cpp @@ -33,6 +33,10 @@ TEST(PatternReplace, Grouping2) simple_replace(find, replacement, input, strlen(input),&output, &output_sz); EXPECT_TRUE(output_sz>0); + EXPECT_TRUE(NULL==strstr(output, "中央空调")); + EXPECT_TRUE(NULL!=strstr(output, "中央Air conditioner")); + EXPECT_TRUE(NULL!=strstr(output, "家用立式空调")); + // EXPECT_STREQ(output, expect); printf("%s\n", output); free(output); @@ -112,6 +116,46 @@ TEST(PatternReplace, UTF8) free(output); return; } +TEST(PatternReplace, CaseInsensitive) +{ + const char* find="(?i)Abc(?-i)视频"; + const char* replacement="ABC短片"; + const char* input="欢迎来到 Facebook开始添加好友吧!他们的aBc视频、照片和帖子都会显示在这里。"; + char* output=NULL; + size_t output_sz=0; + + simple_replace(find, replacement, input, strlen(input),&output, &output_sz); + EXPECT_TRUE(output_sz>0); + EXPECT_TRUE(NULL==strstr(output, find)); + EXPECT_TRUE(NULL!=strstr(output, replacement)); + + printf("%s\n", output); + free(output); + return; +} + +TEST(PatternReplace, CaseInsensitiveRussian) +{ + const char* find="(?i)САМРУК-КАЗЫНА(?-i)"; + + const char* replacement="МММММММММ"; + const char* input="Как мы сообщали, 22 мая прошло заседание Совета по управлению Фондом национального \ + благосостояния \"Самрук-Казына\" под председательством Первого Президента Республики Казахстан – \ + Елбасы Нурсултана Назарбаева. Чуть более года назад была принята новая стратегия управления Фондом \ + на ближайшие десять лет, и председатель правления \"Самрук-Қазына\" Ахметжан Есимов докладывал Н. \ + Назарбаеву о том, как она выполняется. В распоряжении нашей редакции оказались некоторые детали,\ + о которых не сообщалось в СМИ ранее."; + char* output=NULL; + size_t output_sz=0; + + simple_replace(find, replacement, input, strlen(input),&output, &output_sz); + EXPECT_TRUE(output_sz>0); + EXPECT_TRUE(NULL==strstr(output, find)); + EXPECT_TRUE(NULL!=strstr(output, replacement)); + + free(output); + return; +} TEST(PatternInsert, CSS) { @@ -131,7 +175,7 @@ TEST(PatternInsert, CSS) free(input); } -TEST(PatternInsert, after_body) +TEST(PatternInsert, AfterBody) { const char* filename="./test_data/index_of__centos.html"; const char* custom = "var now=new Date();var year=now.getYear()+1900;var month=now.getMonth()+1;var date=now.getDate();var day=now.getDay();\ @@ -149,7 +193,7 @@ TEST(PatternInsert, after_body) free(output); } -TEST(PatternInsert, before_body) +TEST(PatternInsert, BeforeBody) { const char* filename="./test_data/index_of__centos.html"; const char* custom = "var now=new Date();var year=now.getYear()+1900;var month=now.getMonth()+1;var date=now.getDate();var day=now.getDay();\