perf: 性能优化

* io_uring使用buffer pool避免内存分配与释放
    * packet io thread与worker thread无锁访问cmsg
    * 为解密流量的fd设置默认的TTL
This commit is contained in:
luwenpeng
2023-07-14 19:38:18 +08:00
parent 2b00650d3e
commit c3b887f1c5
19 changed files with 935 additions and 939 deletions

View File

@@ -24,7 +24,6 @@
#include "tfe_cmsg.h"
#include "tfe_tcp_restore.h"
#include "tfe_stream.h"
#include "raw_socket.h"
#include "packet_construct.h"
#include "mpack.h"
#include "tap.h"
@@ -135,30 +134,6 @@ struct metadata
struct route_ctx route_ctx;
};
struct tcp_option_mss {
uint8_t kind;
uint8_t length;
uint16_t mss_value;
} __attribute__((__packed__));
struct tcp_option_window_scale {
uint8_t kind;
uint8_t length;
uint8_t shift_count;
} __attribute__((__packed__));
struct tcp_option_sack {
uint8_t kind;
uint8_t length;
} __attribute__((__packed__));
struct tcp_option_time_stamp {
uint8_t kind;
uint8_t length;
uint32_t tsval;
uint32_t tsecr;
} __attribute__((__packed__));
extern int tcp_policy_enforce(struct tcp_policy_enforcer *tcp_enforcer, struct tfe_cmsg *cmsg);
extern int tfe_proxy_fds_accept(struct tfe_proxy * ctx, int fd_downstream, int fd_upstream, int fd_fake_c, int fd_fake_s, struct tfe_cmsg * cmsg);
extern void chaining_policy_enforce(struct chaining_policy_enforcer *enforcer, struct tfe_cmsg *cmsg, uint64_t rule_id);
@@ -312,233 +287,6 @@ static int add_ether_proto(void *raw_data, uint16_t proto){
return 0;
}
static int fake_tcp_handshake(struct tfe_proxy *proxy, struct tcp_restore_info *restore_info)
{
char buffer[1500] = {0};
int length = 0;
char tcp_option_buffer_c[40] = {0};
char tcp_option_buffer_s[40] = {0};
char tcp_option_buffer_c2[40] = {0};
int tcp_option_length_c = 0;
int tcp_option_length_s = 0;
int tcp_option_length_c2 = 0;
const struct tcp_restore_endpoint *client = &restore_info->client;
const struct tcp_restore_endpoint *server = &restore_info->server;
struct raw_socket *raw_socket_c = raw_socket_create(proxy->traffic_steering_options.device_client, proxy->traffic_steering_options.so_mask_client);
struct raw_socket *raw_socket_s = raw_socket_create(proxy->traffic_steering_options.device_server, proxy->traffic_steering_options.so_mask_server);
if (raw_socket_c == NULL || raw_socket_s == NULL)
{
raw_socket_destory(raw_socket_c);
raw_socket_destory(raw_socket_s);
return -1;
}
uint32_t c_seq = client->seq - 1;
uint32_t s_seq = server->seq - 1;
/*
* Maximum segment size: Kind: 2, Length: 4
* +---------+---------+---------+
* | Kind=2 |Length=4 |mss.value|
* +---------+---------+---------+
* 1 1 2
*/
if (client->mss && server->mss)
{
struct tcp_option_mss *option_c = (struct tcp_option_mss *)(tcp_option_buffer_c + tcp_option_length_c);
option_c->kind = 2;
option_c->length = 4;
option_c->mss_value = htons(client->mss);
tcp_option_length_c += sizeof(struct tcp_option_mss);
struct tcp_option_mss *option_s = (struct tcp_option_mss *)(tcp_option_buffer_s + tcp_option_length_s);
option_s->kind = 2;
option_s->length = 4;
option_s->mss_value = htons(server->mss);
tcp_option_length_s += sizeof(struct tcp_option_mss);
}
/*
* Window Scale option: Kind: 3, Length: 3
* +---------+---------+---------+
* | Kind=3 |Length=3 |shift.cnt|
* +---------+---------+---------+
* 1 1 1
*/
if (client->wscale_perm && server->wscale_perm)
{
// padding
memset(tcp_option_buffer_c + tcp_option_length_c, 1, 1);
tcp_option_length_c += 1;
memset(tcp_option_buffer_s + tcp_option_length_s, 1, 1);
tcp_option_length_s += 1;
struct tcp_option_window_scale *option_c = (struct tcp_option_window_scale *)(tcp_option_buffer_c + tcp_option_length_c);
option_c->kind = 3;
option_c->length = 3;
option_c->shift_count = client->wscale;
tcp_option_length_c += sizeof(struct tcp_option_window_scale);
struct tcp_option_window_scale *option_s = (struct tcp_option_window_scale *)(tcp_option_buffer_s + tcp_option_length_s);
option_s->kind = 3;
option_s->length = 3;
option_s->shift_count = server->wscale;
tcp_option_length_s += sizeof(struct tcp_option_window_scale);
}
/*
* SACK option: Kind: 4, Length: 2
* +---------+---------+
* | Kind=4 |Length=2 |
* +---------+---------+
* 1 1
*/
if (client->sack_perm && server->sack_perm)
{
// padding
memset(tcp_option_buffer_c + tcp_option_length_c, 1, 2);
tcp_option_length_c += 2;
memset(tcp_option_buffer_s + tcp_option_length_s, 1, 2);
tcp_option_length_s += 2;
struct tcp_option_sack *option_c = (struct tcp_option_sack *)(tcp_option_buffer_c + tcp_option_length_c);
option_c->kind = 4;
option_c->length = 2;
tcp_option_length_c += sizeof(struct tcp_option_sack);
struct tcp_option_sack *option_s = (struct tcp_option_sack *)(tcp_option_buffer_s + tcp_option_length_s);
option_s->kind = 4;
option_s->length = 2;
tcp_option_length_s += sizeof(struct tcp_option_sack);
}
/*
* Time Stamp option: Kind: 8, Length: 10
* +---------+---------+-----+-----+
* | Kind=8 |Length=10|tsval|tsecr|
* +---------+---------+-----+-----+
* 1 1 4 4
*/
if (client->timestamp_perm && server->timestamp_perm)
{
// padding
memset(tcp_option_buffer_c + tcp_option_length_c, 1, 2);
tcp_option_length_c += 2;
memset(tcp_option_buffer_s + tcp_option_length_s, 1, 2);
tcp_option_length_s += 2;
memset(tcp_option_buffer_c2 + tcp_option_length_c2, 1, 2);
tcp_option_length_c2 += 2;
struct tcp_option_time_stamp *option_c = (struct tcp_option_time_stamp *)(tcp_option_buffer_c + tcp_option_length_c);
option_c->kind = 8;
option_c->length = 10;
option_c->tsval = htonl(client->ts_val);
option_c->tsecr = htonl(0);
tcp_option_length_c += sizeof(struct tcp_option_time_stamp);
struct tcp_option_time_stamp *option_s = (struct tcp_option_time_stamp *)(tcp_option_buffer_s + tcp_option_length_s);
option_s->kind = 8;
option_s->length = 10;
option_s->tsval = htonl(server->ts_val);
option_s->tsecr = htonl(client->ts_val);
tcp_option_length_s += sizeof(struct tcp_option_time_stamp);
struct tcp_option_time_stamp *option_c2 = (struct tcp_option_time_stamp *)(tcp_option_buffer_c2 + tcp_option_length_c2);
option_c2->kind = 8;
option_c2->length = 10;
option_c2->tsval = htonl(client->ts_val);
option_c2->tsecr = htonl(server->ts_val);
tcp_option_length_c2 += sizeof(struct tcp_option_time_stamp);
}
if (client->addr.ss_family == AF_INET6)
{
struct sockaddr_in6 *sk_client = (struct sockaddr_in6 *)&client->addr;
struct sockaddr_in6 *sk_server = (struct sockaddr_in6 *)&server->addr;
uint16_t port_client = sk_client->sin6_port;
uint16_t port_server = sk_server->sin6_port;
// C -> S
length = tcp_packet_v6_construct(
buffer, // buffer
&raw_socket_c->mac_addr, &raw_socket_s->mac_addr, 0, ETH_P_IPV6, // Ether
&sk_client->sin6_addr, &sk_server->sin6_addr, 55, // IPv6
port_client, port_server, c_seq, 0, TCP_SYN_FLAG, client->window, // TCP Header
tcp_option_buffer_c, tcp_option_length_c, // TCP Options
NULL, 0); // Payload
raw_socket_send(raw_socket_c, buffer, length);
c_seq += 1;
// S -> C
length = tcp_packet_v6_construct(
buffer, // buffer
&raw_socket_s->mac_addr, &raw_socket_c->mac_addr, 0, ETH_P_IPV6, // Ether
&sk_server->sin6_addr, &sk_client->sin6_addr, 65, // IPv6
port_server, port_client, s_seq, c_seq, TCP_SYN_FLAG | TCP_ACK_FLAG, server->window, // TCP Header
tcp_option_buffer_s, tcp_option_length_s, // TCP Options
NULL, 0); // Payload
raw_socket_send(raw_socket_s, buffer, length);
s_seq += 1;
// C -> S
length = tcp_packet_v6_construct(
buffer, // buffer
&raw_socket_c->mac_addr, &raw_socket_s->mac_addr, 0, ETH_P_IPV6, // Ether
&sk_client->sin6_addr, &sk_server->sin6_addr, 55, // IPv6
port_client, port_server, c_seq, s_seq, TCP_ACK_FLAG, client->window, // TCP Header
tcp_option_buffer_c2, tcp_option_length_c2, // TCP Options
NULL, 0); // Payload
raw_socket_send(raw_socket_c, buffer, length);
}
else
{
struct sockaddr_in *sk_client = (struct sockaddr_in *)&client->addr;
struct sockaddr_in *sk_server = (struct sockaddr_in *)&server->addr;
uint16_t port_client = sk_client->sin_port;
uint16_t port_server = sk_server->sin_port;
// C -> S
length = tcp_packet_v4_construct(
buffer, // buffer
&raw_socket_c->mac_addr, &raw_socket_s->mac_addr, 0, ETH_P_IP, // Ether
&sk_client->sin_addr, &sk_server->sin_addr, 0, 55, 0x11, // IPv4
port_client, port_server, c_seq, 0, TCP_SYN_FLAG, client->window, // TCP Header
tcp_option_buffer_c, tcp_option_length_c, // TCP Options
NULL, 0);
raw_socket_send(raw_socket_c, buffer, length);
c_seq += 1;
// S -> C
length = tcp_packet_v4_construct(
buffer, // buffer
&raw_socket_s->mac_addr, &raw_socket_c->mac_addr, 0, ETH_P_IP, // Ether
&sk_server->sin_addr,&sk_client->sin_addr, 0, 65, 0x12, // IPv4
port_server, port_client, s_seq, c_seq, TCP_SYN_FLAG | TCP_ACK_FLAG, server->window, // TCP Header
tcp_option_buffer_s, tcp_option_length_s, // TCP Options
NULL, 0);
raw_socket_send(raw_socket_s, buffer, length);
s_seq += 1;
// C -> S
length = tcp_packet_v4_construct(
buffer, // buffer
&raw_socket_c->mac_addr, &raw_socket_s->mac_addr, 0, ETH_P_IP, // Ether
&sk_client->sin_addr, &sk_server->sin_addr, 0, 55, 0x13, // IPv4
port_client, port_server, c_seq, s_seq, TCP_ACK_FLAG, client->window, // TCP Header
tcp_option_buffer_c2, tcp_option_length_c2, // TCP Options
NULL, 0);
raw_socket_send(raw_socket_c, buffer, length);
}
raw_socket_destory(raw_socket_c);
raw_socket_destory(raw_socket_s);
return 0;
}
static int overwrite_tcp_mss(struct tfe_cmsg *cmsg, struct tcp_restore_info *restore, uint64_t session_id, void *logger)
{
int ret = 0;
@@ -1185,6 +933,53 @@ static void set_passthrough_reason(struct tfe_cmsg *cmsg, char *reason)
tfe_cmsg_set_flag(cmsg, TFE_CMSG_FLAG_USER0);
}
typedef int tcp_handshake_fn(struct tcp_restore_info *info, struct ether_addr *client_mac, struct ether_addr *server_mac, char *buffer, int size);
static void packet_io_send_fake_pkt(struct packet_io_thread_ctx *thread, struct tcp_restore_info *info, uint64_t session_id, int c2s_is_e2i_dir)
{
struct acceptor_kni_v4 *acceptor_ctx = thread->ref_acceptor_ctx;
struct packet_io *packet_io = thread->ref_io;
struct packet_io_fs *packet_io_fs = thread->ret_fs_state;
struct ether_addr *client_mac = (struct ether_addr *)&packet_io->config.tap_c_mac;
struct ether_addr *server_mac = (struct ether_addr *)&packet_io->config.tap_s_mac;
void *logger = thread->logger;
char buffer[1500];
struct metadata meta = {0};
meta.session_id = session_id;
meta.is_decrypted = SET_TRAFFIC_IS_DECRYPTED(0);
meta.is_ctrl_pkt = 0;
meta.l7offset = 0;
meta.sids.num = 2;
meta.sids.elems[0] = acceptor_ctx->sce_sids;
meta.sids.elems[1] = acceptor_ctx->proxy_sids;
static tcp_handshake_fn *fn[3] = {tfe_tcp_restore_syn_packet, tfe_tcp_restore_synack_packet, tfe_tcp_restore_ack_packet};
marsio_buff_t *tx_buffs[3];
marsio_buff_malloc_global(packet_io->instance, tx_buffs, 3, 0, thread->thread_index);
for (int i = 0; i < 3; i++)
{
meta.raw_len = fn[i](info, client_mac, server_mac, buffer, sizeof(buffer));
meta.raw_data = marsio_buff_append(tx_buffs[i], meta.raw_len);
memcpy(meta.raw_data, buffer, meta.raw_len);
switch (i)
{
case 0: /* fail through */
case 2:
meta.is_e2i_dir = c2s_is_e2i_dir;
break;
case 1:
meta.is_e2i_dir = !c2s_is_e2i_dir;
break;
}
packet_io_set_metadata(tx_buffs[i], &meta, logger);
throughput_metrics_inc(&packet_io_fs->decrypt_tx, 1, meta.raw_len);
}
marsio_send_burst_with_options(packet_io->dev_nf_interface.mr_path, thread->thread_index, tx_buffs, 3, MARSIO_SEND_OPT_REHASH);
}
// return 0 : success
// return -1 : error
static int handle_session_opening(struct metadata *meta, struct ctrl_pkt_parser *parser, int thread_seq, void *ctx)
@@ -1287,13 +1082,7 @@ static int handle_session_opening(struct metadata *meta, struct ctrl_pkt_parser
(STREAM_PROTO_SSL == (enum tfe_stream_proto)stream_protocol_in_char && thread->ref_proxy->traffic_steering_options.enable_steering_ssl) ||
enable_decrypted_traffic_steering == 1)
{
if (fake_tcp_handshake(thread->ref_proxy, &restore_info) == -1)
{
TFE_LOG_ERROR(logger, "%s: session %lu Failed at fake_tcp_handshake()", LOG_TAG_PKTIO, meta->session_id);
is_passthrough = 1;
set_passthrough_reason(parser->cmsg, reason_invalid_tcp_policy_param);
goto passthrough;
}
packet_io_send_fake_pkt(thread, &restore_info, meta->session_id, meta->is_e2i_dir);
fd_fake_c = tfe_tcp_restore_fd_create(&(restore_info.client), &(restore_info.server), thread->ref_proxy->traffic_steering_options.device_client, thread->ref_proxy->traffic_steering_options.so_mask_client);
if (fd_fake_c < 0)
@@ -1314,7 +1103,13 @@ static int handle_session_opening(struct metadata *meta, struct ctrl_pkt_parser
}
}
stream_common_direction = meta->is_e2i_dir ? 'I' : 'E';
tfe_cmsg_set(parser->cmsg, TFE_CMSG_COMMON_DIRECTION, (const unsigned char *)&stream_common_direction, sizeof(stream_common_direction));
snprintf(stream_traceid, 24, "%" PRIu64, meta->session_id);
tfe_cmsg_set(parser->cmsg, TFE_CMSG_STREAM_TRACE_ID, (const unsigned char *)stream_traceid, strlen(stream_traceid));
tfe_cmsg_dup(parser->cmsg);
// 为避免 packet IO thread 与 worker 访问 cmsg 时出现竞争packet IO thread 必须在调用 tfe_proxy_fds_accept 之前 set cmsg
if (tfe_proxy_fds_accept(thread->ref_proxy, fd_downstream, fd_upstream, fd_fake_c, fd_fake_s, parser->cmsg) < 0)
{
TFE_LOG_ERROR(logger, "%s: session %lu Failed at tfe_proxy_fds_accept()", LOG_TAG_PKTIO, meta->session_id);
@@ -1322,14 +1117,6 @@ static int handle_session_opening(struct metadata *meta, struct ctrl_pkt_parser
set_passthrough_reason(parser->cmsg, reason_invalid_tcp_policy_param);
goto passthrough;
}
// E -> I
if (meta->is_e2i_dir)
stream_common_direction = 'I';
// I -> E
else
stream_common_direction = 'E';
tfe_cmsg_set(parser->cmsg, TFE_CMSG_COMMON_DIRECTION, (const unsigned char *)&stream_common_direction, sizeof(stream_common_direction));
}
else if (parser->intercpet_data & (IS_SINGLE | IS_TUNNEL)) {
is_passthrough = 1;
@@ -1386,8 +1173,6 @@ passthrough:
route_ctx_copy(&s_ctx->raw_meta_e2i->route_ctx, &parser->ack_route_ctx);
}
snprintf(stream_traceid, 24, "%" PRIu64 , s_ctx->session_id);
tfe_cmsg_set(parser->cmsg, TFE_CMSG_STREAM_TRACE_ID, (const unsigned char *)stream_traceid, strlen(stream_traceid));
TFE_LOG_INFO(logger, "%s: session %lu %s active first", LOG_TAG_PKTIO, s_ctx->session_id, s_ctx->session_addr);
session_table_insert(thread->session_table, s_ctx->session_id, &(s_ctx->c2s_info.tuple4), s_ctx, session_value_free_cb);
@@ -1587,7 +1372,7 @@ static int handle_raw_packet_from_nf(struct packet_io *handle, marsio_buff_t *rx
add_ether_header(raw_data, packet_io->config.tap_c_mac, packet_io->config.tap_s_mac);
throughput_metrics_inc(&packet_io_fs->tap_s_pkt_tx, 1, raw_len);
if (packet_io->config.enable_iouring) {
io_uring_submit_write_entry(thread->tap_ctx->io_uring_s, raw_data, raw_len);
io_uring_write(thread->tap_ctx->io_uring_s, raw_data, raw_len);
}
else {
tap_write(thread->tap_ctx->tap_s, raw_data, raw_len, logger);
@@ -1598,7 +1383,7 @@ static int handle_raw_packet_from_nf(struct packet_io *handle, marsio_buff_t *rx
add_ether_header(raw_data, packet_io->config.tap_s_mac, packet_io->config.tap_c_mac);
throughput_metrics_inc(&packet_io_fs->tap_c_pkt_tx, 1, raw_len);
if (packet_io->config.enable_iouring) {
io_uring_submit_write_entry(thread->tap_ctx->io_uring_c, raw_data, raw_len);
io_uring_write(thread->tap_ctx->io_uring_c, raw_data, raw_len);
}
else {
tap_write(thread->tap_ctx->tap_c, raw_data, raw_len, logger);
@@ -1646,7 +1431,7 @@ static int handle_raw_packet_from_nf(struct packet_io *handle, marsio_buff_t *rx
add_ether_proto(packet_buff, ETH_P_IPV6);
if (packet_io->config.enable_iouring) {
io_uring_submit_write_entry(thread->tap_ctx->io_uring_fd, packet_buff, packet_len);
io_uring_write(thread->tap_ctx->io_uring_fd, packet_buff, packet_len);
}
else {
tap_write(thread->tap_ctx->tap_fd, packet_buff, packet_len, logger);
@@ -1659,7 +1444,7 @@ static int handle_raw_packet_from_nf(struct packet_io *handle, marsio_buff_t *rx
// send to tap0
add_ether_header(raw_data, packet_io->config.src_mac, packet_io->config.tap_mac);
if (packet_io->config.enable_iouring) {
io_uring_submit_write_entry(thread->tap_ctx->io_uring_fd, raw_data, raw_len);
io_uring_write(thread->tap_ctx->io_uring_fd, raw_data, raw_len);
}
else {
tap_write(thread->tap_ctx->tap_fd, raw_data, raw_len, logger);
@@ -1700,10 +1485,6 @@ void tfe_tap_ctx_destory(struct tap_ctx *handler)
tap_close(handler->tap_fd);
tap_close(handler->tap_c);
tap_close(handler->tap_s);
if (handler->buff) {
free(handler->buff);
handler->buff = NULL;
}
free(handler);
handler = NULL;
@@ -2015,7 +1796,7 @@ void handle_decryption_packet_from_tap(const char *data, int len, void *args)
struct session_ctx *s_ctx = (struct session_ctx *)node->val_data;
marsio_buff_t *tx_buffs[1];
int alloc_ret = marsio_buff_malloc_device(packet_io->dev_nf_interface.mr_dev, tx_buffs, 1, 0, thread->thread_index);
int alloc_ret = marsio_buff_malloc_global(packet_io->instance, tx_buffs, 1, 0, thread->thread_index);
if (alloc_ret < 0){
TFE_LOG_ERROR(logger, "Failed at alloc marsio buffer, ret = %d, thread_seq = %d",
alloc_ret, thread->thread_index);
@@ -2099,7 +1880,7 @@ void handle_raw_packet_from_tap(const char *data, int len, void *args)
struct session_ctx *s_ctx = (struct session_ctx *)node->val_data;
marsio_buff_t *tx_buffs[1];
int alloc_ret = marsio_buff_malloc_device(packet_io->dev_nf_interface.mr_dev, tx_buffs, 1, 0, thread->thread_index);
int alloc_ret = marsio_buff_malloc_global(packet_io->instance, tx_buffs, 1, 0, thread->thread_index);
if (alloc_ret < 0){
TFE_LOG_ERROR(logger, "Failed at alloc marsio buffer, ret = %d, thread_seq = %d",
alloc_ret, thread->thread_index);
@@ -2158,4 +1939,4 @@ void handle_raw_packet_from_tap(const char *data, int len, void *args)
add_ether_header(dst, src_mac, dst_mac);
throughput_metrics_inc(&packet_io_fs->raw_pkt_tx, 1, packet_len);
marsio_send_burst_with_options(packet_io->dev_nf_interface.mr_path, thread->thread_index, tx_buffs, 1, MARSIO_SEND_OPT_REHASH);
}
}