TSG-23793 Fixed the issue where comments in the HTML page containing <head> or <body> tags would cause the inserted script to fail

This commit is contained in:
fengweihao
2024-11-20 10:03:12 +08:00
parent bbf084acad
commit ff436e3f26
4 changed files with 108 additions and 4 deletions

View File

@@ -35,6 +35,7 @@ struct insert_rule
size_t execute_replace_rule(const char * in, size_t in_sz, enum replace_zone zone, const struct replace_rule * rules, size_t n_rule, char** out, int options); size_t execute_replace_rule(const char * in, size_t in_sz, enum replace_zone zone, const struct replace_rule * rules, size_t n_rule, char** out, int options);
size_t execute_insert_rule(char * in, size_t in_sz, const struct insert_rule * rules, char** out); size_t execute_insert_rule(char * in, size_t in_sz, const struct insert_rule * rules, char** out);
size_t insert_string(char * in, size_t in_sz, const char *insert_on, const char *stype, const char *type, char** out); size_t insert_string(char * in, size_t in_sz, const char *insert_on, const char *stype, const char *type, char** out);
size_t simple_insert(char * in, size_t in_sz, const char *insert_on, const char *script, const char *type, char** out);
void simple_replace(const char* find, const char* replacement, const char* input, size_t in_sz, char** output, size_t *output_sz, int options); void simple_replace(const char* find, const char* replacement, const char* input, size_t in_sz, char** output, size_t *output_sz, int options);
enum replace_zone zone_name_to_id(const char * name); enum replace_zone zone_name_to_id(const char * name);

View File

@@ -260,6 +260,53 @@ static char *find_insert_position(char * in)
return insert_from; return insert_from;
} }
int find_remove_position(char *start, char *end)
{
if(end - start <=0)
{
return 0;
}
char *tags = ALLOC(char, (end - start)+1);
memcpy(tags, start, end - start);
if(strstr(tags, "<head>") != NULL || strstr(tags, "<body>") != NULL)
{
free(tags);
return 1;
}
free(tags);
return 0;
}
void remove_string_with_tags(char *html)
{
char *start = NULL, *end = NULL;
while ((start = strstr(html, "<!--")) != NULL)
{
end = strstr(start, "-->");
if (end != NULL)
{
end += 3;
if (find_remove_position(start, end) &&(strstr(start, "<head>") != NULL || strstr(start, "<body>") != NULL))
{
memmove(start, end, strlen(end) + 1);
}
else
{
html = end;
}
}
else
{
break;
}
}
return;
}
size_t insert_string(char * in, size_t in_sz, const char *insert_on, const char *script, const char *type, char** out) size_t insert_string(char * in, size_t in_sz, const char *insert_on, const char *script, const char *type, char** out)
{ {
char *target=NULL; char *target=NULL;
@@ -329,7 +376,36 @@ size_t insert_string(char * in, size_t in_sz, const char *insert_on, const char
size_t execute_insert_rule(char * in, size_t in_sz, const struct insert_rule * rules, char** out) size_t execute_insert_rule(char * in, size_t in_sz, const struct insert_rule * rules, char** out)
{ {
return insert_string(in, in_sz, rules->position, rules->script, rules->type, out); size_t out_size=0;
if (in == NULL || in_sz < 0)
{
return 0;
}
char*new_in = ALLOC(char, in_sz+1);
memcpy(new_in, in, in_sz);
remove_string_with_tags(new_in);
out_size = insert_string(new_in, strlen(new_in), rules->position, rules->script, rules->type, out);
free(new_in);
new_in=NULL;
return out_size;
}
size_t simple_insert(char * in, size_t in_sz, const char *insert_on, const char *script, const char *type, char** out)
{
struct insert_rule rules;
memset(&rules, 0, sizeof(rules));
rules.type=(char *)type;
rules.script=(char *)script;
rules.position=(char *)insert_on;
rules.inject_sz=strlen(script);
return execute_insert_rule(in, in_sz, &rules, out);
} }
void simple_replace(const char* find, const char* replacement, const char* input, size_t in_sz, char** output, size_t *output_sz, int options) void simple_replace(const char* find, const char* replacement, const char* input, size_t in_sz, char** output, size_t *output_sz, int options)

View File

@@ -43,7 +43,6 @@ enum _log_action
#define get_time_ms(tv) ((long long)(tv.tv_sec) * 1000 + (long long)(tv.tv_usec) / 1000) #define get_time_ms(tv) ((long long)(tv.tv_sec) * 1000 + (long long)(tv.tv_usec) / 1000)
#ifdef __x86_64__ #ifdef __x86_64__
#include "uuid_v4_x86.h" #include "uuid_v4_x86.h"
#else #else

View File

@@ -7,7 +7,6 @@
#include <stdio.h> #include <stdio.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
TEST(PatternReplace, Grouping1) TEST(PatternReplace, Grouping1)
{ {
const char* find="(?<name1>John)|(?<name2>李梅梅)|(?<name3>Jake)"; const char* find="(?<name1>John)|(?<name2>李梅梅)|(?<name3>Jake)";
@@ -305,9 +304,38 @@ TEST(PatternInsert, BeforeBody)
free(output); free(output);
} }
TEST(PatternInsert, HtmlCommentsContain)
{
char input[]="<!doctype html><!--<html><head><title></title><body></body></head></html>--><html><head><title></title><body></body></head></html>";
const char* custom = "alert(\"Insert\");";
char *output=NULL;
size_t output_sz=0, input_sz = strlen(input) - 5;
output_sz = simple_insert(input, input_sz, "before-page-load", custom, "js", &output);
printf("output =%s\n", output);
EXPECT_TRUE(output_sz>0);
EXPECT_TRUE(NULL!=strstr(output, custom));
free(output);
}
TEST(PatternInsert, HtmlNoCommentsContain)
{
char input[]="<!doctype html><!--<html><title></title></html>--><html><head></head><!--<html><body></body></html>--><title></title><body></body></html>";
const char* custom = "alert(\"Insert\");";
char *output=NULL;
size_t output_sz=0, input_sz = strlen(input);
output_sz = simple_insert(input, input_sz, "before-page-load", custom, "js", &output);
printf("output =%s\n", output);
EXPECT_TRUE(output_sz>0);
EXPECT_TRUE(NULL!=strstr(output, custom));
free(output);
}
int main(int argc, char ** argv) int main(int argc, char ** argv)
{ {
::testing::InitGoogleTest(&argc, argv); ::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS(); return RUN_ALL_TESTS();
} }