This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
tango-tfe/platform/src/acceptor_kni_v3.cpp

1031 lines
33 KiB
C++
Raw Normal View History

#include <sys/prctl.h>
#include <unistd.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <linux/tcp.h>
#include <linux/netfilter.h> // for NF_ACCEPT
#include <libnetfilter_queue/libnetfilter_queue.h>
#include <tfe_utils.h>
#include <tfe_cmsg.h>
#include <proxy.h>
#include <tfe_pkt_util.h>
#include <tfe_tcp_restore.h>
#include <MESA/MESA_prof_load.h>
#include <watchdog_3rd_device.h>
#include <raw_socket.h>
#include <packet_construct.h>
#include <intercept_policy.h>
#define TCP_RESTORE_TCPOPT_KIND 88
extern int tcp_policy_enforce(struct tcp_policy_enforcer *tcp_enforcer, struct tfe_cmsg *cmsg);
extern void chaining_policy_enforce(struct chaining_policy_enforcer *enforcer, struct tfe_cmsg *cmsg, uint64_t rule_id);
struct acceptor_kni_v3
{
struct tfe_proxy *proxy;
const char *profile;
char device[IFNAMSIZ];
struct nfq_handle *h;
struct nfq_q_handle *qh;
int fd_nfq_socket;
struct event_base *ev_base;
struct event *ev_nfq_socket;
struct timespec start;
struct timespec end;
pthread_t thread;
unsigned int queue_id;
unsigned int queue_maxlen;
unsigned int queue_rcvbufsiz;
unsigned int queue_no_enobufs;
};
#define TCP_RESTORE_TCPOPT_KIND 88
struct tcp_restore_info_tlv
{
uint16_t type;
uint16_t length;
union {
uint8_t value_as_uint8[0];
uint16_t value_as_uint16[0];
uint32_t value_as_uint32[0];
unsigned char value_as_string[0];
};
} __attribute__((packed));
struct tcp_restore_info_header
{
uint8_t __magic__[2]; /* Must be 0x4d, 0x5a */
uint16_t nr_tlvs;
struct tcp_restore_info_tlv tlvs[0];
} __attribute__((packed));
static int tcp_restore_info_parse_from_cmsg(const char *data, unsigned int datalen, struct tcp_restore_info *out)
{
unsigned int tlv_iter;
unsigned int nr_tlvs;
struct tcp_restore_info_header *header = (struct tcp_restore_info_header *)data;
if (header->__magic__[0] != 0x4d || header->__magic__[1] != 0x5a)
{
TFE_LOG_ERROR(g_default_logger, "Failed at parser TCP options from cmsg, wrong magic");
goto invalid_format;
}
nr_tlvs = ntohs(header->nr_tlvs);
if (nr_tlvs >= 256)
{
TFE_LOG_ERROR(g_default_logger, "Failed at parser TCP options from cmsg, numbers of tlvs is larger than 256");
goto invalid_format;
}
if (datalen < sizeof(struct tcp_restore_info_header))
{
TFE_LOG_ERROR(g_default_logger, "Failed at parser TCP options from cmsg, length is shorter than tlv header");
goto invalid_format;
}
memcpy(out->cmsg, data, datalen);
out->cmsg_len = datalen;
datalen -= sizeof(struct tcp_restore_info_header);
data += sizeof(struct tcp_restore_info_header);
for (tlv_iter = 0; tlv_iter < nr_tlvs; tlv_iter++)
{
struct tcp_restore_info_tlv *tlv = (struct tcp_restore_info_tlv *)data;
uint16_t tlv_type = ntohs(tlv->type);
uint16_t tlv_length = ntohs(tlv->length);
unsigned int __length = tlv_length;
if (datalen < __length)
{
TFE_LOG_ERROR(g_default_logger, "Failed at parser TCP options from cmsg, left space is smaller than tlv's length, "
"datalen is %u, tlv's length is %u", datalen, __length);
goto invalid_format;
}
if (tlv_length < sizeof(uint16_t) * 2)
{
TFE_LOG_ERROR(g_default_logger, "Failed at parser TCP options from cmsg, invalid tlv length, should larger than sizeof(type) + sizeof(length)");
goto invalid_format;
}
tlv_length -= sizeof(uint16_t) * 2;
#define __CHECK_TLV_LENGTH(x) \
do \
{ \
if (x != tlv_length) \
{ \
TFE_LOG_ERROR(g_default_logger, "Failed at parser TCP options from cmsg, invalid tlv length, should be %u, actually is %u", \
(unsigned int)x, (unsigned int)tlv_length); \
goto invalid_format; \
} \
} while (0)
switch (tlv_type)
{
case TFE_CMSG_TCP_RESTORE_SEQ:
__CHECK_TLV_LENGTH(sizeof(uint32_t));
out->client.seq = ntohl(tlv->value_as_uint32[0]);
out->server.ack = ntohl(tlv->value_as_uint32[0]);
break;
case TFE_CMSG_TCP_RESTORE_ACK:
__CHECK_TLV_LENGTH(sizeof(uint32_t));
out->client.ack = ntohl(tlv->value_as_uint32[0]);
out->server.seq = ntohl(tlv->value_as_uint32[0]);
break;
case TFE_CMSG_TCP_RESTORE_TS_CLIENT:
__CHECK_TLV_LENGTH(sizeof(uint8_t));
out->client.timestamp_perm = !!(tlv->value_as_uint8[0]);
break;
case TFE_CMSG_TCP_RESTORE_TS_SERVER:
__CHECK_TLV_LENGTH(sizeof(uint8_t));
out->server.timestamp_perm = !!(tlv->value_as_uint8[0]);
break;
case TFE_CMSG_TCP_RESTORE_TS_CLIENT_VAL:
__CHECK_TLV_LENGTH(sizeof(uint32_t));
out->client.ts_val = ntohl(tlv->value_as_uint32[0]);
break;
case TFE_CMSG_TCP_RESTORE_TS_SERVER_VAL:
__CHECK_TLV_LENGTH(sizeof(uint32_t));
out->server.ts_val = ntohl(tlv->value_as_uint32[0]);
break;
case TFE_CMSG_TCP_RESTORE_WSACLE_CLIENT:
__CHECK_TLV_LENGTH(sizeof(uint8_t));
out->client.wscale_perm = true;
out->client.wscale = tlv->value_as_uint8[0];
break;
case TFE_CMSG_TCP_RESTORE_WSACLE_SERVER:
__CHECK_TLV_LENGTH(sizeof(uint8_t));
out->server.wscale_perm = true;
out->server.wscale = tlv->value_as_uint8[0];
break;
case TFE_CMSG_TCP_RESTORE_SACK_CLIENT:
__CHECK_TLV_LENGTH(sizeof(uint8_t));
out->client.sack_perm = true;
break;
case TFE_CMSG_TCP_RESTORE_SACK_SERVER:
__CHECK_TLV_LENGTH(sizeof(uint8_t));
out->server.sack_perm = true;
break;
case TFE_CMSG_TCP_RESTORE_MSS_CLIENT:
__CHECK_TLV_LENGTH(sizeof(uint16_t));
out->client.mss = ntohs(tlv->value_as_uint16[0]);
break;
case TFE_CMSG_TCP_RESTORE_MSS_SERVER:
__CHECK_TLV_LENGTH(sizeof(uint16_t));
out->server.mss = ntohs(tlv->value_as_uint16[0]);
break;
case TFE_CMSG_TCP_RESTORE_WINDOW_CLIENT:
__CHECK_TLV_LENGTH(sizeof(uint16_t));
out->client.window = ntohs(tlv->value_as_uint16[0]);
break;
case TFE_CMSG_TCP_RESTORE_WINDOW_SERVER:
__CHECK_TLV_LENGTH(sizeof(uint16_t));
out->server.window = ntohs(tlv->value_as_uint16[0]);
break;
case TFE_CMSG_TCP_RESTORE_INFO_PACKET_CUR_DIR:
__CHECK_TLV_LENGTH(sizeof(uint8_t));
out->cur_dir = (enum tcp_restore_pkt_dir)(tlv->value_as_uint8[0]);
default:
break;
}
data += __length;
datalen -= __length;
}
return 0;
invalid_format:
return -EINVAL;
}
static void tcp_restore_info_parse_from_pkt(struct pkt_info *pktinfo, struct tcp_restore_info *out)
{
if (pktinfo->addr_type == ADDR_TYPE_IPV4)
{
struct iphdr *iphdr = pktinfo->iphdr.v4;
struct tcphdr *tcphdr = pktinfo->tcphdr;
struct sockaddr_in *in_addr_client;
struct sockaddr_in *in_addr_server;
if (out->cur_dir == PKT_DIR_NOT_SET || out->cur_dir == PKT_DIR_C2S)
{
in_addr_client = (struct sockaddr_in *)&out->client.addr;
in_addr_server = (struct sockaddr_in *)&out->server.addr;
}
else
{
in_addr_client = (struct sockaddr_in *)&out->server.addr;
in_addr_server = (struct sockaddr_in *)&out->client.addr;
}
in_addr_client->sin_family = AF_INET;
in_addr_client->sin_addr.s_addr = iphdr->saddr;
in_addr_client->sin_port = tcphdr->source;
in_addr_server->sin_family = AF_INET;
in_addr_server->sin_addr.s_addr = iphdr->daddr;
in_addr_server->sin_port = tcphdr->dest;
}
if (pktinfo->addr_type == ADDR_TYPE_IPV6)
{
struct ip6_hdr *ipv6hdr = (struct ip6_hdr *)(pktinfo->iphdr.v6);
struct tcphdr *tcphdr = pktinfo->tcphdr;
struct sockaddr_in6 *in6_addr_client;
struct sockaddr_in6 *in6_addr_server;
if (out->cur_dir == PKT_DIR_NOT_SET || out->cur_dir == PKT_DIR_C2S)
{
in6_addr_client = (struct sockaddr_in6 *)&out->client.addr;
in6_addr_server = (struct sockaddr_in6 *)&out->server.addr;
}
else
{
in6_addr_client = (struct sockaddr_in6 *)&out->server.addr;
in6_addr_server = (struct sockaddr_in6 *)&out->client.addr;
}
in6_addr_client->sin6_family = AF_INET6;
in6_addr_client->sin6_addr = ipv6hdr->ip6_src;
in6_addr_client->sin6_port = tcphdr->source;
in6_addr_server->sin6_family = AF_INET6;
in6_addr_server->sin6_addr = ipv6hdr->ip6_dst;
in6_addr_server->sin6_port = tcphdr->dest;
}
}
struct tcp_option_mss {
uint8_t kind;
uint8_t length;
uint16_t mss_value;
} __attribute__((__packed__));
struct tcp_option_window_scale {
uint8_t kind;
uint8_t length;
uint8_t shift_count;
} __attribute__((__packed__));
struct tcp_option_sack {
uint8_t kind;
uint8_t length;
} __attribute__((__packed__));
struct tcp_option_time_stamp {
uint8_t kind;
uint8_t length;
uint32_t tsval;
uint32_t tsecr;
} __attribute__((__packed__));
static int fake_tcp_handshake(struct tfe_proxy *proxy, struct tcp_restore_info *restore_info)
{
char buffer[1500] = {0};
int length = 0;
char tcp_option_buffer_c[40] = {0};
char tcp_option_buffer_s[40] = {0};
char tcp_option_buffer_c2[40] = {0};
int tcp_option_length_c = 0;
int tcp_option_length_s = 0;
int tcp_option_length_c2 = 0;
const struct tcp_restore_endpoint *client = &restore_info->client;
const struct tcp_restore_endpoint *server = &restore_info->server;
struct raw_socket *raw_socket_c = raw_socket_create(proxy->traffic_steering_options.device_client, proxy->traffic_steering_options.so_mask_client);
struct raw_socket *raw_socket_s = raw_socket_create(proxy->traffic_steering_options.device_server, proxy->traffic_steering_options.so_mask_server);
if (raw_socket_c == NULL || raw_socket_s == NULL)
{
raw_socket_destory(raw_socket_c);
raw_socket_destory(raw_socket_s);
return -1;
}
uint32_t c_seq = client->seq - 1;
uint32_t s_seq = server->seq - 1;
/*
* Maximum segment size: Kind: 2, Length: 4
* +---------+---------+---------+
* | Kind=2 |Length=4 |mss.value|
* +---------+---------+---------+
* 1 1 2
*/
if (client->mss && server->mss)
{
struct tcp_option_mss *option_c = (struct tcp_option_mss *)(tcp_option_buffer_c + tcp_option_length_c);
option_c->kind = 2;
option_c->length = 4;
option_c->mss_value = htons(client->mss);
tcp_option_length_c += sizeof(struct tcp_option_mss);
struct tcp_option_mss *option_s = (struct tcp_option_mss *)(tcp_option_buffer_s + tcp_option_length_s);
option_s->kind = 2;
option_s->length = 4;
option_s->mss_value = htons(server->mss);
tcp_option_length_s += sizeof(struct tcp_option_mss);
}
/*
* Window Scale option: Kind: 3, Length: 3
* +---------+---------+---------+
* | Kind=3 |Length=3 |shift.cnt|
* +---------+---------+---------+
* 1 1 1
*/
if (client->wscale_perm && server->wscale_perm)
{
// padding
memset(tcp_option_buffer_c + tcp_option_length_c, 1, 1);
tcp_option_length_c += 1;
memset(tcp_option_buffer_s + tcp_option_length_s, 1, 1);
tcp_option_length_s += 1;
struct tcp_option_window_scale *option_c = (struct tcp_option_window_scale *)(tcp_option_buffer_c + tcp_option_length_c);
option_c->kind = 3;
option_c->length = 3;
option_c->shift_count = client->wscale;
tcp_option_length_c += sizeof(struct tcp_option_window_scale);
struct tcp_option_window_scale *option_s = (struct tcp_option_window_scale *)(tcp_option_buffer_s + tcp_option_length_s);
option_s->kind = 3;
option_s->length = 3;
option_s->shift_count = server->wscale;
tcp_option_length_s += sizeof(struct tcp_option_window_scale);
}
/*
* SACK option: Kind: 4, Length: 2
* +---------+---------+
* | Kind=4 |Length=2 |
* +---------+---------+
* 1 1
*/
if (client->sack_perm && server->sack_perm)
{
// padding
memset(tcp_option_buffer_c + tcp_option_length_c, 1, 2);
tcp_option_length_c += 2;
memset(tcp_option_buffer_s + tcp_option_length_s, 1, 2);
tcp_option_length_s += 2;
struct tcp_option_sack *option_c = (struct tcp_option_sack *)(tcp_option_buffer_c + tcp_option_length_c);
option_c->kind = 4;
option_c->length = 2;
tcp_option_length_c += sizeof(struct tcp_option_sack);
struct tcp_option_sack *option_s = (struct tcp_option_sack *)(tcp_option_buffer_s + tcp_option_length_s);
option_s->kind = 4;
option_s->length = 2;
tcp_option_length_s += sizeof(struct tcp_option_sack);
}
/*
* Time Stamp option: Kind: 8, Length: 10
* +---------+---------+-----+-----+
* | Kind=8 |Length=10|tsval|tsecr|
* +---------+---------+-----+-----+
* 1 1 4 4
*/
if (client->timestamp_perm && server->timestamp_perm)
{
// padding
memset(tcp_option_buffer_c + tcp_option_length_c, 1, 2);
tcp_option_length_c += 2;
memset(tcp_option_buffer_s + tcp_option_length_s, 1, 2);
tcp_option_length_s += 2;
memset(tcp_option_buffer_c2 + tcp_option_length_c2, 1, 2);
tcp_option_length_c2 += 2;
struct tcp_option_time_stamp *option_c = (struct tcp_option_time_stamp *)(tcp_option_buffer_c + tcp_option_length_c);
option_c->kind = 8;
option_c->length = 10;
option_c->tsval = htonl(client->ts_val);
option_c->tsecr = htonl(0);
tcp_option_length_c += sizeof(struct tcp_option_time_stamp);
struct tcp_option_time_stamp *option_s = (struct tcp_option_time_stamp *)(tcp_option_buffer_s + tcp_option_length_s);
option_s->kind = 8;
option_s->length = 10;
option_s->tsval = htonl(server->ts_val);
option_s->tsecr = htonl(client->ts_val);
tcp_option_length_s += sizeof(struct tcp_option_time_stamp);
struct tcp_option_time_stamp *option_c2 = (struct tcp_option_time_stamp *)(tcp_option_buffer_c2 + tcp_option_length_c2);
option_c2->kind = 8;
option_c2->length = 10;
option_c2->tsval = htonl(client->ts_val);
option_c2->tsecr = htonl(server->ts_val);
tcp_option_length_c2 += sizeof(struct tcp_option_time_stamp);
}
if (client->addr.ss_family == AF_INET6)
{
struct sockaddr_in6 *sk_client = (struct sockaddr_in6 *)&client->addr;
struct sockaddr_in6 *sk_server = (struct sockaddr_in6 *)&server->addr;
uint16_t port_client = sk_client->sin6_port;
uint16_t port_server = sk_server->sin6_port;
// C -> S
length = tcp_packet_v6_construct(
buffer, // buffer
&raw_socket_c->mac_addr, &raw_socket_s->mac_addr, 0, ETH_P_IPV6, // Ether
&sk_client->sin6_addr, &sk_server->sin6_addr, 55, // IPv6
port_client, port_server, c_seq, 0, TCP_SYN_FLAG, client->window, // TCP Header
tcp_option_buffer_c, tcp_option_length_c, // TCP Options
NULL, 0); // Payload
raw_socket_send(raw_socket_c, buffer, length);
c_seq += 1;
// S -> C
length = tcp_packet_v6_construct(
buffer, // buffer
&raw_socket_s->mac_addr, &raw_socket_c->mac_addr, 0, ETH_P_IPV6, // Ether
&sk_server->sin6_addr, &sk_client->sin6_addr, 65, // IPv6
port_server, port_client, s_seq, c_seq, TCP_SYN_FLAG | TCP_ACK_FLAG, server->window, // TCP Header
tcp_option_buffer_s, tcp_option_length_s, // TCP Options
NULL, 0); // Payload
raw_socket_send(raw_socket_s, buffer, length);
s_seq += 1;
// C -> S
length = tcp_packet_v6_construct(
buffer, // buffer
&raw_socket_c->mac_addr, &raw_socket_s->mac_addr, 0, ETH_P_IPV6, // Ether
&sk_client->sin6_addr, &sk_server->sin6_addr, 55, // IPv6
port_client, port_server, c_seq, s_seq, TCP_ACK_FLAG, client->window, // TCP Header
tcp_option_buffer_c2, tcp_option_length_c2, // TCP Options
NULL, 0); // Payload
raw_socket_send(raw_socket_c, buffer, length);
}
else
{
struct sockaddr_in *sk_client = (struct sockaddr_in *)&client->addr;
struct sockaddr_in *sk_server = (struct sockaddr_in *)&server->addr;
uint16_t port_client = sk_client->sin_port;
uint16_t port_server = sk_server->sin_port;
// C -> S
length = tcp_packet_v4_construct(
buffer, // buffer
&raw_socket_c->mac_addr, &raw_socket_s->mac_addr, 0, ETH_P_IP, // Ether
&sk_client->sin_addr, &sk_server->sin_addr, 0, 55, 0x11, // IPv4
port_client, port_server, c_seq, 0, TCP_SYN_FLAG, client->window, // TCP Header
tcp_option_buffer_c, tcp_option_length_c, // TCP Options
NULL, 0);
raw_socket_send(raw_socket_c, buffer, length);
c_seq += 1;
// S -> C
length = tcp_packet_v4_construct(
buffer, // buffer
&raw_socket_s->mac_addr, &raw_socket_c->mac_addr, 0, ETH_P_IP, // Ether
&sk_server->sin_addr,&sk_client->sin_addr, 0, 65, 0x12, // IPv4
port_server, port_client, s_seq, c_seq, TCP_SYN_FLAG | TCP_ACK_FLAG, server->window, // TCP Header
tcp_option_buffer_s, tcp_option_length_s, // TCP Options
NULL, 0);
raw_socket_send(raw_socket_s, buffer, length);
s_seq += 1;
// C -> S
length = tcp_packet_v4_construct(
buffer, // buffer
&raw_socket_c->mac_addr, &raw_socket_s->mac_addr, 0, ETH_P_IP, // Ether
&sk_client->sin_addr, &sk_server->sin_addr, 0, 55, 0x13, // IPv4
port_client, port_server, c_seq, s_seq, TCP_ACK_FLAG, client->window, // TCP Header
tcp_option_buffer_c2, tcp_option_length_c2, // TCP Options
NULL, 0);
raw_socket_send(raw_socket_c, buffer, length);
}
raw_socket_destory(raw_socket_c);
raw_socket_destory(raw_socket_s);
return 0;
}
static int overwrite_tcp_mss(struct tfe_cmsg *cmsg, struct tcp_restore_info *restore)
{
int ret = 0;
uint16_t size = 0;
int server_side_mss_enable = 0;
int server_side_mss_value = 0;
int client_side_mss_enable = 0;
int client_side_mss_value = 0;
ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_DOWNSTREAM_TCP_MSS_ENABLE, (unsigned char *)&client_side_mss_enable, sizeof(client_side_mss_enable), &size);
if (ret < 0)
{
TFE_LOG_ERROR(g_default_logger, "failed at fetch client side tcp mss from cmsg: %s", strerror(-ret));
return -1;
}
ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_DOWNSTREAM_TCP_MSS_VALUE, (unsigned char *)&client_side_mss_value, sizeof(client_side_mss_value), &size);
if (ret < 0)
{
TFE_LOG_ERROR(g_default_logger, "failed at fetch client side tcp mss value from cmsg: %s", strerror(-ret));
return -1;
}
ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_UPSTREAM_TCP_MSS_ENABLE, (unsigned char *)&server_side_mss_enable, sizeof(server_side_mss_enable), &size);
if (ret < 0)
{
TFE_LOG_ERROR(g_default_logger, "failed at fetch server side tcp mss from cmsg: %s", strerror(-ret));
return -1;
}
ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_UPSTREAM_TCP_MSS_VALUE, (unsigned char *)&server_side_mss_value, sizeof(server_side_mss_value), &size);
if (ret < 0)
{
TFE_LOG_ERROR(g_default_logger, "failed at fetch server side tcp mss value from cmsg: %s", strerror(-ret));
return -1;
}
if (client_side_mss_enable)
{
restore->client.mss = client_side_mss_value;
}
if (server_side_mss_enable)
{
restore->server.mss = server_side_mss_value;
}
return 0;
}
/*
* nfmsg : message objetc that contains the packet
* nfad : Netlink packet data handle
*/
static int payload_handler_cb(struct nfq_q_handle *qh, struct nfgenmsg *nfmsg, struct nfq_data *nfa, void *data)
{
int id = 0;
int ret = 0;
int fd_downstream = 0;
int fd_upstream = 0;
int fd_fake_c = 0;
int fd_fake_s = 0;
int hit_tcpopt = 0;
uint16_t cmsg_offset = 0;
uint8_t restore_opt_len = 0;
int raw_payload_len = 0;
unsigned int cmsg_payload_len = 0;
char *cmsg_payload = NULL;
uint64_t jiffies_us = 0;
unsigned char *raw_payload = NULL;
struct iphdr *iphdr = NULL;
struct tfe_cmsg *cmsg = NULL;
struct pkt_info pktinfo;
struct tcp_restore_info restore_info;
uint8_t stream_protocol_in_char = 0;
uint8_t enalbe_decrypted_traffic_steering = 0;
uint16_t size = 0;
// uint64_t chaining_rule_id = 0; // only use for acceptv4
struct acceptor_kni_v3 *__ctx = (struct acceptor_kni_v3 *)data;
clock_gettime(CLOCK_MONOTONIC, &(__ctx->start));
memset(&pktinfo, 0, sizeof(pktinfo));
memset(&restore_info, 0, sizeof(restore_info));
struct nfqnl_msg_packet_hdr *ph = nfq_get_msg_packet_hdr(nfa);
if (ph == NULL)
{
TFE_LOG_ERROR(g_default_logger, "Failed at nfq_get_msg_packet_hdr(), result is NULL");
goto end;
}
id = ntohl(ph->packet_id);
raw_payload_len = nfq_get_payload(nfa, &raw_payload);
if ((unsigned int)raw_payload_len <= (MIN(sizeof(struct iphdr), sizeof(struct ip6_hdr)) + sizeof(struct tcphdr)))
{
TFE_LOG_ERROR(g_default_logger, "Failed at nfq_get_payload(), paylod len %d too small, less than %lu", raw_payload_len, (MIN(sizeof(struct iphdr), sizeof(struct ip6_hdr)) + sizeof(struct tcphdr)));
tfe_hexdump2file(stderr, "Failed at parsing payload, payload len too small", raw_payload, (unsigned int)raw_payload_len);
goto end;
}
iphdr = (struct iphdr *)raw_payload;
if (iphdr->version == 4)
{
if (iphdr->protocol == IPPROTO_TCP)
{
tfe_pkt_parse_ipv4_header(raw_payload, &pktinfo);
}
else
{
TFE_LOG_ERROR(g_default_logger, "Failed at parse IPv4 header, sub protocol not tcp");
tfe_hexdump2file(stderr, "Failed at parsing IPv4 header, TCP no found", raw_payload, (unsigned int)raw_payload_len);
goto end;
}
}
else
{
tfe_pkt_parse_ipv6_header(raw_payload, &pktinfo);
if (pktinfo.parse_failed)
{
TFE_LOG_ERROR(g_default_logger, "Failed at parse IPv6 header, sub protocol not tcp");
tfe_hexdump2file(stderr, "Failed at parsing IPv6 header, TCP no found", raw_payload, (unsigned int)raw_payload_len);
goto end;
}
}
if (pktinfo.ip_totlen > raw_payload_len)
{
TFE_LOG_ERROR(g_default_logger, "Failed at parser IP header, invalid ip header totlen");
tfe_hexdump2file(stderr, "Failed at parsing IP header, IP totlen too small", raw_payload, (unsigned int)raw_payload_len);
goto end;
}
// check if there is a tcp options
if (pktinfo.tcphdr_len <= sizeof(struct tcphdr))
{
TFE_LOG_ERROR(g_default_logger, "Failed at parser TCP header, TCP header len %d too small, less than %lu", pktinfo.tcphdr_len, sizeof(struct tcphdr));
tfe_hexdump2file(stderr, "Failed at parsing TCP header, TCP header len too small", raw_payload, (unsigned int)raw_payload_len);
goto end;
}
// Parse tcp options
hit_tcpopt = tfe_pkt_find_tcp_option(TCP_RESTORE_TCPOPT_KIND, (char *)pktinfo.tcphdr, pktinfo.tcphdr_len - sizeof(struct tcphdr),
&restore_opt_len, (char *)&cmsg_offset, sizeof(cmsg_offset));
if (!hit_tcpopt || restore_opt_len != 2)
{
TFE_LOG_ERROR(g_default_logger, "Failed at parser TCP options, tcp option hit:%d, opt len:%d", hit_tcpopt, restore_opt_len);
tfe_hexdump2file(stderr, "Failed at parsing TCP options, TCP options no found", raw_payload, (unsigned int)raw_payload_len);
goto end;
}
cmsg_offset = ntohs(cmsg_offset);
cmsg_payload = (char *)(pktinfo.data + cmsg_offset);
cmsg_payload_len = pktinfo.data_len - cmsg_offset;
// 从 cmsg 中解析信息存储到 restore_info 中
ret = tcp_restore_info_parse_from_cmsg(cmsg_payload, cmsg_payload_len, &restore_info);
if (ret < 0)
{
TFE_LOG_ERROR(g_default_logger, "Failed at parser TCP options from cmsg");
goto end;
}
tcp_restore_info_parse_from_pkt(&pktinfo, &restore_info);
// Remove cmsg from payload
pktinfo.ip_totlen = pktinfo.ip_totlen - cmsg_payload_len;
if (pktinfo.addr_type == ADDR_TYPE_IPV4)
{
pktinfo.iphdr.v4->tot_len = htons(pktinfo.ip_totlen);
pktinfo.iphdr.v4->check = 0;
pktinfo.iphdr.v4->check = tfe_pkt_checksum_ip((void*)pktinfo.iphdr.v4, pktinfo.iphdr_len);
pktinfo.tcphdr->check = 0;
pktinfo.tcphdr->check = tfe_pkt_checksum_tcp_v4((void*)pktinfo.tcphdr, pktinfo.ip_totlen - pktinfo.iphdr_len, pktinfo.iphdr.v4->saddr, pktinfo.iphdr.v4->daddr);
}
if (pktinfo.addr_type == ADDR_TYPE_IPV6)
{
pktinfo.iphdr.v6->ip6_ctlun.ip6_un1.ip6_un1_plen = 0;
pktinfo.iphdr.v6->ip6_ctlun.ip6_un1.ip6_un1_plen = htons(pktinfo.ip_totlen - sizeof(struct ip6_hdr));
// IPv6 header no checksum
pktinfo.tcphdr->check = 0;
pktinfo.tcphdr->check = tfe_pkt_checksum_tcp_v6((void*)pktinfo.tcphdr, pktinfo.ip_totlen - pktinfo.iphdr_len, pktinfo.iphdr.v6->ip6_src, pktinfo.iphdr.v6->ip6_dst);
}
if (tfe_cmsg_deserialize((const unsigned char *)restore_info.cmsg, restore_info.cmsg_len, &cmsg) < 0)
{
TFE_LOG_ERROR(g_default_logger, "failed at tfe_cmsg_deserialize()");
goto end;
}
intercept_policy_enforce(__ctx->proxy->int_ply_enforcer, cmsg);
tcp_policy_enforce(__ctx->proxy->tcp_ply_enforcer, cmsg);
// chaining_policy_enforce(__ctx->proxy->chain_ply_enforcer, cmsg, chaining_rule_id);
if (overwrite_tcp_mss(cmsg, &restore_info))
{
goto end;
}
tfe_tcp_restore_info_dump(&restore_info);
// tcp repair C2S
fd_upstream = tfe_tcp_restore_fd_create(&(restore_info.client), &(restore_info.server), __ctx->device, 0x65);
if (fd_upstream < 0)
{
TFE_LOG_ERROR(g_default_logger, "Failed at tcp_restore_fd_create(UPSTREAM)");
goto end;
}
// tcp repair S2C
fd_downstream = tfe_tcp_restore_fd_create(&(restore_info.server), &(restore_info.client), __ctx->device, 0x65);
if (fd_downstream < 0)
{
TFE_LOG_ERROR(g_default_logger, "Failed at tcp_restore_fd_create(DOWNSTREAM)");
goto end;
}
tfe_cmsg_get_value(cmsg, TFE_CMSG_TCP_RESTORE_PROTOCOL, (unsigned char *)&stream_protocol_in_char, sizeof(stream_protocol_in_char), &size);
// tfe_cmsg_get_value(cmsg, TFE_CMSG_TCP_DECRYPTED_TRAFFIC_STEERING, (unsigned char *)&enalbe_decrypted_traffic_steering, sizeof(enalbe_decrypted_traffic_steering), &size);
if (steering_device_is_available() && (
(STREAM_PROTO_PLAIN == (enum tfe_stream_proto)stream_protocol_in_char && __ctx->proxy->traffic_steering_options.enable_steering_http) ||
(STREAM_PROTO_SSL == (enum tfe_stream_proto)stream_protocol_in_char && __ctx->proxy->traffic_steering_options.enable_steering_ssl) ||
enalbe_decrypted_traffic_steering == 1))
{
if (fake_tcp_handshake(__ctx->proxy, &restore_info) == -1)
{
TFE_LOG_ERROR(g_default_logger, "Failed at fake_tcp_handshake()");
goto end;
}
fd_fake_c = tfe_tcp_restore_fd_create(&(restore_info.client), &(restore_info.server), __ctx->proxy->traffic_steering_options.device_client, __ctx->proxy->traffic_steering_options.so_mask_client);
if (fd_fake_c < 0)
{
TFE_LOG_ERROR(g_default_logger, "Failed at tcp_restore_fd_create(fd_fake_c)");
goto end;
}
fd_fake_s = tfe_tcp_restore_fd_create(&(restore_info.server), &(restore_info.client), __ctx->proxy->traffic_steering_options.device_server, __ctx->proxy->traffic_steering_options.so_mask_server);
if (fd_fake_s < 0)
{
TFE_LOG_ERROR(g_default_logger, "Failed at tcp_restore_fd_create(fd_fake_s)");
goto end;
}
}
if (tfe_proxy_fds_accept(__ctx->proxy, fd_downstream, fd_upstream, fd_fake_c, fd_fake_s, cmsg) < 0)
{
TFE_LOG_ERROR(g_default_logger, "Failed at tfe_proxy_fds_accept()");
goto end;
}
TFE_PROXY_STAT_INCREASE(STAT_FD_OPEN_BY_KNI_ACCEPT, 2);
clock_gettime(CLOCK_MONOTONIC, &(__ctx->end));
jiffies_us = (__ctx->end.tv_sec - __ctx->start.tv_sec) * 1000 * 1000 + (__ctx->end.tv_nsec - __ctx->start.tv_nsec) / 1000;
TFE_LOG_DEBUG(g_default_logger, "nfqueue tcp_restore=%p time=%ldus hw_protocol=0x%04x hook=%u id=%010u protocol=%s total_len=%d inject_len=%d iphdr_len=%d tcphdr_len=%d data_len=%d",
&restore_info, jiffies_us, ntohs(ph->hw_protocol), ph->hook, id, (pktinfo.addr_type == ADDR_TYPE_IPV4 ? "IPv4" : "IPv6"),
raw_payload_len, pktinfo.ip_totlen, pktinfo.iphdr_len, pktinfo.tcphdr_len, pktinfo.data_len);
/*
* NF_DROP : discarded the packet
* NF_ACCEPT : the packet passes, continue iterations
* NF_QUEUE : inject the packet into a different queue (the target queue number is in the high 16 bits of the verdict)
* NF_REPEAT : iterate the same cycle once more
* NF_STOP : accept, but don't continue iterations
*/
// nfq_set_verdict()
// nfq_set_verdict2()
// nfq_set_verdict_batch()
// nfq_set_verdict_batch2()
// nfq_set_verdict_mark()
return nfq_set_verdict(qh, id, NF_ACCEPT, pktinfo.ip_totlen, raw_payload);
end:
if (fd_upstream > 0)
{
TFE_PROXY_STAT_INCREASE(STAT_FD_CLOSE_BY_KNI_ACCEPT_FAIL, 1);
close(fd_upstream);
}
if (fd_downstream > 0)
{
TFE_PROXY_STAT_INCREASE(STAT_FD_CLOSE_BY_KNI_ACCEPT_FAIL, 1);
close(fd_downstream);
}
return nfq_set_verdict(qh, id, NF_ACCEPT, 0, NULL);
}
void acceptor_kni_v3_event(evutil_socket_t fd, short what, void *user)
{
struct acceptor_kni_v3 *__ctx = (struct acceptor_kni_v3 *) user;
assert(__ctx != NULL && __ctx->thread == pthread_self());
assert(what & EV_READ);
char buf[4096] __attribute__ ((aligned));
int rv;
rv = recv(fd, buf, sizeof(buf), 0);
if (rv >= 0)
{
TFE_LOG_DEBUG(g_default_logger, "nfqueue acceptor thread recv %d bytes form nfqueue fd %d", rv, fd);
nfq_handle_packet(__ctx->h, buf, rv);
return;
}
else
{
/* if your application is too slow to digest the packets that
* are sent from kernel-space, the socket buffer that we use
* to enqueue packets may fill up returning ENOBUFS. Depending
* on your application, this error may be ignored. Please, see
* the doxygen documentation of this library on how to improve
* this situation.
*/
if (errno == ENOBUFS)
{
TFE_LOG_ERROR(g_default_logger, "nfqueue losing packets!");
}
TFE_LOG_ERROR(g_default_logger, "Failed at recv() data from nfqueue, %d: %s", errno, strerror(errno));
}
}
void *acceptor_kni_v3_event_thread_entry(void *args)
{
struct acceptor_kni_v3 *__ctx = (struct acceptor_kni_v3 *)args;
assert(__ctx != NULL && __ctx->thread == pthread_self());
char thread_name[16] = { 0 };
snprintf(thread_name, sizeof(thread_name), "tfe:acceptor-v3");
prctl(PR_SET_NAME, (unsigned long long) thread_name, NULL, NULL, NULL);
char affinity[32] = {0};
if (__ctx->proxy->enable_cpu_affinity)
{
tfe_thread_set_affinity(__ctx->proxy->cpu_affinity_mask[0]);
snprintf(affinity, sizeof(affinity), "affinity cpu%d", __ctx->proxy->cpu_affinity_mask[0]);
}
TFE_LOG_INFO(g_default_logger, "nfq acceptor thread %s is running.", __ctx->proxy->enable_cpu_affinity ? affinity : "");
event_base_dispatch(__ctx->ev_base);
DIE("nfq acceptor thread is exited, abort.");
}
void acceptor_kni_v3_destroy(struct acceptor_kni_v3 *ctx)
{
if (ctx != NULL && ctx->qh != NULL)
{
nfq_destroy_queue(ctx->qh);
ctx->qh = NULL;
}
if (ctx != NULL && ctx->h != NULL)
{
nfq_close(ctx->h);
ctx->h = NULL;
}
if (ctx != NULL && ctx->ev_base != NULL)
{
event_base_free(ctx->ev_base);
ctx->ev_base = NULL;
}
if (ctx != NULL)
{
free(ctx);
ctx = NULL;
}
}
struct acceptor_kni_v3 *acceptor_kni_v3_create(struct tfe_proxy *proxy, const char *profile, void *logger)
{
struct acceptor_kni_v3 *__ctx = ALLOC(struct acceptor_kni_v3, 1);
int ret = 0;
__ctx->proxy = proxy;
__ctx->profile = profile;
MESA_load_profile_string_def(profile, "nfq", "device", __ctx->device, sizeof(__ctx->device), "");
MESA_load_profile_uint_def(profile, "nfq", "queue_id", &(__ctx->queue_id), 1);
MESA_load_profile_uint_def(profile, "nfq", "queue_maxlen", &(__ctx->queue_maxlen), 65535);
MESA_load_profile_uint_def(profile, "nfq", "queue_rcvbufsiz", &(__ctx->queue_rcvbufsiz), 98302500);
MESA_load_profile_uint_def(profile, "nfq", "queue_no_enobufs", &(__ctx->queue_no_enobufs), 1);
__ctx->h = nfq_open();
if (!__ctx->h)
{
TFE_LOG_ERROR(g_default_logger, "Failed at nfq_open(), %d: %s", errno, strerror(errno));
errno = 0;
goto __errout;
}
if (nfq_unbind_pf(__ctx->h, AF_INET) < 0)
{
TFE_LOG_ERROR(g_default_logger, "Failed at nfq_unbind_pf(AF_INET), %d: %s", errno, strerror(errno));
errno = 0;
goto __errout;
}
if (nfq_unbind_pf(__ctx->h, AF_INET6) < 0)
{
TFE_LOG_ERROR(g_default_logger, "Failed at nfq_unbind_pf(AF_INET6), %d: %s", errno, strerror(errno));
errno = 0;
goto __errout;
}
if (nfq_bind_pf(__ctx->h, AF_INET) < 0)
{
TFE_LOG_ERROR(g_default_logger, "Failed at nfq_bind_pf(AF_INET), %d: %s", errno, strerror(errno));
errno = 0;
goto __errout;
}
if (nfq_bind_pf(__ctx->h, AF_INET6) < 0)
{
TFE_LOG_ERROR(g_default_logger, "Failed at nfq_bind_pf(AF_INET6), %d: %s", errno, strerror(errno));
errno = 0;
goto __errout;
}
__ctx->qh = nfq_create_queue(__ctx->h, __ctx->queue_id, &payload_handler_cb, __ctx);
if (!__ctx->qh)
{
TFE_LOG_ERROR(g_default_logger, "Failed at nfq_create_queue(), %d: %s", errno, strerror(errno));
errno = 0;
goto __errout;
}
/*
* NFQNL_COPY_NONE - noop, do not use it
* NFQNL_COPY_META - copy only packet metadata
* NFQNL_COPY_PACKET - copy entire packet
*/
if (nfq_set_mode(__ctx->qh, NFQNL_COPY_PACKET, 0xffff) < 0)
{
TFE_LOG_ERROR(g_default_logger, "Failed at nfq_set_mode(NFQNL_COPY_PACKET), %d: %s", errno, strerror(errno));
errno = 0;
goto __errout;
}
if (nfq_set_queue_maxlen(__ctx->qh, __ctx->queue_maxlen) < 0)
{
TFE_LOG_ERROR(g_default_logger, "Failed at nfq_set_queue_maxlen(%d), %d: %s", __ctx->queue_maxlen, errno, strerror(errno));
errno = 0;
goto __errout;
}
nfnl_rcvbufsiz(nfq_nfnlh(__ctx->h), __ctx->queue_rcvbufsiz);
__ctx->fd_nfq_socket = nfq_fd(__ctx->h);
/*
* set NETLINK_NO_ENOBUFS socket option to avoid receiving ENOBUFS errors (requires Linux kernel >= 2.6.30).
* Don't send error about no buffer space available but drop the packets instead
*/
if (__ctx->queue_no_enobufs)
{
if (setsockopt(__ctx->fd_nfq_socket, SOL_NETLINK, NETLINK_NO_ENOBUFS, &__ctx->queue_no_enobufs, sizeof(__ctx->queue_no_enobufs)) == -1)
{
TFE_LOG_ERROR(g_default_logger, "Failed at setsockopt(NETLINK_NO_ENOBUFS) for nfq fd, %d: %s", errno, strerror(errno));
errno = 0;
goto __errout;
}
}
evutil_make_socket_nonblocking(__ctx->fd_nfq_socket);
__ctx->ev_base = event_base_new();
if (unlikely(__ctx->ev_base == NULL))
{
TFE_LOG_ERROR(g_default_logger, "Failed at event_base_new()");
goto __errout;
}
__ctx->ev_nfq_socket = event_new(__ctx->ev_base, __ctx->fd_nfq_socket, EV_READ | EV_PERSIST, acceptor_kni_v3_event, __ctx);
if (unlikely(__ctx->ev_nfq_socket == NULL))
{
TFE_LOG_ERROR(g_default_logger, "Failed at setup READ event for nfqueue socket");
goto __errout;
}
ret = event_add(__ctx->ev_nfq_socket, NULL);
if (unlikely(ret < 0))
{
TFE_LOG_ERROR(g_default_logger, "Failed at adding nfqueue socket event to evbase");
goto __errout;
}
ret = pthread_create(&__ctx->thread, NULL, acceptor_kni_v3_event_thread_entry, (void *) __ctx);
if (unlikely(ret < 0))
{
TFE_LOG_ERROR(g_default_logger, "Failed at creating event thread: %s", strerror(errno));
errno = 0;
goto __errout;
}
TFE_LOG_INFO(g_default_logger, "KNIv3 acceptor init successfully");
return __ctx;
__errout:
acceptor_kni_v3_destroy(__ctx);
return NULL;
}