修复pcre2_compile中参数顺序错误,支持utf8的替换,俄语不区分大小写测试通过。
This commit is contained in:
@@ -192,15 +192,15 @@ size_t replace_string(const char * in, size_t in_sz, const struct replace_rule *
|
|||||||
{
|
{
|
||||||
assert(strlen(zone->find) != 0);
|
assert(strlen(zone->find) != 0);
|
||||||
|
|
||||||
int error;
|
int error=0;
|
||||||
PCRE2_SIZE erroffset;
|
PCRE2_SIZE erroffset=0;
|
||||||
|
|
||||||
const PCRE2_SPTR pattern = (PCRE2_SPTR)zone->find;
|
const PCRE2_SPTR pattern = (PCRE2_SPTR)zone->find;
|
||||||
const PCRE2_SPTR subject = (PCRE2_SPTR)in;
|
const PCRE2_SPTR subject = (PCRE2_SPTR)in;
|
||||||
const PCRE2_SPTR replacement = (PCRE2_SPTR)zone->replace_with;
|
const PCRE2_SPTR replacement = (PCRE2_SPTR)zone->replace_with;
|
||||||
|
|
||||||
pcre2_code *re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, &error, &erroffset, 0);
|
pcre2_code *re = pcre2_compile(pattern, strlen(zone->find), PCRE2_UTF, &error, &erroffset, 0);
|
||||||
if (re == 0)
|
if (!re)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
||||||
@@ -211,7 +211,11 @@ size_t replace_string(const char * in, size_t in_sz, const struct replace_rule *
|
|||||||
not_enough_mem_retry:
|
not_enough_mem_retry:
|
||||||
out_buffer = (PCRE2_UCHAR*)malloc(sizeof(PCRE2_UCHAR)*outbuff_size);
|
out_buffer = (PCRE2_UCHAR*)malloc(sizeof(PCRE2_UCHAR)*outbuff_size);
|
||||||
outlen = outbuff_size;
|
outlen = outbuff_size;
|
||||||
int rc = pcre2_substitute(re, subject, in_sz, 0, PCRE2_SUBSTITUTE_GLOBAL | PCRE2_SUBSTITUTE_EXTENDED | PCRE2_SUBSTITUTE_OVERFLOW_LENGTH, 0, 0, replacement, PCRE2_ZERO_TERMINATED, out_buffer, &outlen);
|
int rc = pcre2_substitute(re, subject, in_sz, 0,
|
||||||
|
PCRE2_SUBSTITUTE_GLOBAL | PCRE2_SUBSTITUTE_EXTENDED | PCRE2_SUBSTITUTE_OVERFLOW_LENGTH,
|
||||||
|
0, 0, //pcre2_match_data *match_data, pcre2_match_context
|
||||||
|
replacement, strlen(zone->replace_with),
|
||||||
|
out_buffer, &outlen);
|
||||||
if(outlen>outbuff_size)
|
if(outlen>outbuff_size)
|
||||||
{
|
{
|
||||||
outbuff_size=outlen;
|
outbuff_size=outlen;
|
||||||
|
|||||||
@@ -33,6 +33,10 @@ TEST(PatternReplace, Grouping2)
|
|||||||
|
|
||||||
simple_replace(find, replacement, input, strlen(input),&output, &output_sz);
|
simple_replace(find, replacement, input, strlen(input),&output, &output_sz);
|
||||||
EXPECT_TRUE(output_sz>0);
|
EXPECT_TRUE(output_sz>0);
|
||||||
|
EXPECT_TRUE(NULL==strstr(output, "中央空调"));
|
||||||
|
EXPECT_TRUE(NULL!=strstr(output, "中央Air conditioner"));
|
||||||
|
EXPECT_TRUE(NULL!=strstr(output, "家用立式空调"));
|
||||||
|
|
||||||
// EXPECT_STREQ(output, expect);
|
// EXPECT_STREQ(output, expect);
|
||||||
printf("%s\n", output);
|
printf("%s\n", output);
|
||||||
free(output);
|
free(output);
|
||||||
@@ -112,6 +116,46 @@ TEST(PatternReplace, UTF8)
|
|||||||
free(output);
|
free(output);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
TEST(PatternReplace, CaseInsensitive)
|
||||||
|
{
|
||||||
|
const char* find="(?i)Abc(?-i)视频";
|
||||||
|
const char* replacement="ABC短片";
|
||||||
|
const char* input="欢迎来到 Facebook开始添加好友吧!他们的aBc视频、照片和帖子都会显示在这里。";
|
||||||
|
char* output=NULL;
|
||||||
|
size_t output_sz=0;
|
||||||
|
|
||||||
|
simple_replace(find, replacement, input, strlen(input),&output, &output_sz);
|
||||||
|
EXPECT_TRUE(output_sz>0);
|
||||||
|
EXPECT_TRUE(NULL==strstr(output, find));
|
||||||
|
EXPECT_TRUE(NULL!=strstr(output, replacement));
|
||||||
|
|
||||||
|
printf("%s\n", output);
|
||||||
|
free(output);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(PatternReplace, CaseInsensitiveRussian)
|
||||||
|
{
|
||||||
|
const char* find="(?i)САМРУК-КАЗЫНА(?-i)";
|
||||||
|
|
||||||
|
const char* replacement="МММММММММ";
|
||||||
|
const char* input="Как мы сообщали, 22 мая прошло заседание Совета по управлению Фондом национального \
|
||||||
|
благосостояния \"Самрук-Казына\" под председательством Первого Президента Республики Казахстан – \
|
||||||
|
Елбасы Нурсултана Назарбаева. Чуть более года назад была принята новая стратегия управления Фондом \
|
||||||
|
на ближайшие десять лет, и председатель правления \"Самрук-Қазына\" Ахметжан Есимов докладывал Н. \
|
||||||
|
Назарбаеву о том, как она выполняется. В распоряжении нашей редакции оказались некоторые детали,\
|
||||||
|
о которых не сообщалось в СМИ ранее.";
|
||||||
|
char* output=NULL;
|
||||||
|
size_t output_sz=0;
|
||||||
|
|
||||||
|
simple_replace(find, replacement, input, strlen(input),&output, &output_sz);
|
||||||
|
EXPECT_TRUE(output_sz>0);
|
||||||
|
EXPECT_TRUE(NULL==strstr(output, find));
|
||||||
|
EXPECT_TRUE(NULL!=strstr(output, replacement));
|
||||||
|
|
||||||
|
free(output);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
TEST(PatternInsert, CSS)
|
TEST(PatternInsert, CSS)
|
||||||
{
|
{
|
||||||
@@ -131,7 +175,7 @@ TEST(PatternInsert, CSS)
|
|||||||
free(input);
|
free(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(PatternInsert, after_body)
|
TEST(PatternInsert, AfterBody)
|
||||||
{
|
{
|
||||||
const char* filename="./test_data/index_of__centos.html";
|
const char* filename="./test_data/index_of__centos.html";
|
||||||
const char* custom = "var now=new Date();var year=now.getYear()+1900;var month=now.getMonth()+1;var date=now.getDate();var day=now.getDay();\
|
const char* custom = "var now=new Date();var year=now.getYear()+1900;var month=now.getMonth()+1;var date=now.getDate();var day=now.getDay();\
|
||||||
@@ -149,7 +193,7 @@ TEST(PatternInsert, after_body)
|
|||||||
free(output);
|
free(output);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(PatternInsert, before_body)
|
TEST(PatternInsert, BeforeBody)
|
||||||
{
|
{
|
||||||
const char* filename="./test_data/index_of__centos.html";
|
const char* filename="./test_data/index_of__centos.html";
|
||||||
const char* custom = "var now=new Date();var year=now.getYear()+1900;var month=now.getMonth()+1;var date=now.getDate();var day=now.getDay();\
|
const char* custom = "var now=new Date();var year=now.getYear()+1900;var month=now.getMonth()+1;var date=now.getDate();var day=now.getDay();\
|
||||||
|
|||||||
Reference in New Issue
Block a user