#include #include #include #include #include #include // for NF_ACCEPT #include #include #include #include #include #include #include #include #include #include #include #define TCP_RESTORE_TCPOPT_KIND 88 extern int tcp_policy_enforce(struct tcp_policy_enforcer *tcp_enforcer, struct tfe_cmsg *cmsg); extern void chaining_policy_enforce(struct chaining_policy_enforcer *enforcer, struct tfe_cmsg *cmsg, uint64_t rule_id); struct acceptor_kni_v3 { struct tfe_proxy *proxy; const char *profile; char device[IFNAMSIZ]; struct nfq_handle *h; struct nfq_q_handle *qh; int fd_nfq_socket; struct event_base *ev_base; struct event *ev_nfq_socket; struct timespec start; struct timespec end; pthread_t thread; unsigned int queue_id; unsigned int queue_maxlen; unsigned int queue_rcvbufsiz; unsigned int queue_no_enobufs; }; #define TCP_RESTORE_TCPOPT_KIND 88 struct tcp_restore_info_tlv { uint16_t type; uint16_t length; union { uint8_t value_as_uint8[0]; uint16_t value_as_uint16[0]; uint32_t value_as_uint32[0]; unsigned char value_as_string[0]; }; } __attribute__((packed)); struct tcp_restore_info_header { uint8_t __magic__[2]; /* Must be 0x4d, 0x5a */ uint16_t nr_tlvs; struct tcp_restore_info_tlv tlvs[0]; } __attribute__((packed)); static int tcp_restore_info_parse_from_cmsg(const char *data, unsigned int datalen, struct tcp_restore_info *out) { unsigned int tlv_iter; unsigned int nr_tlvs; struct tcp_restore_info_header *header = (struct tcp_restore_info_header *)data; if (header->__magic__[0] != 0x4d || header->__magic__[1] != 0x5a) { TFE_LOG_ERROR(g_default_logger, "Failed at parser TCP options from cmsg, wrong magic"); goto invalid_format; } nr_tlvs = ntohs(header->nr_tlvs); if (nr_tlvs >= 256) { TFE_LOG_ERROR(g_default_logger, "Failed at parser TCP options from cmsg, numbers of tlvs is larger than 256"); goto invalid_format; } if (datalen < sizeof(struct tcp_restore_info_header)) { TFE_LOG_ERROR(g_default_logger, "Failed at parser TCP options from cmsg, length is shorter than tlv header"); goto invalid_format; } memcpy(out->cmsg, data, datalen); out->cmsg_len = datalen; datalen -= sizeof(struct tcp_restore_info_header); data += sizeof(struct tcp_restore_info_header); for (tlv_iter = 0; tlv_iter < nr_tlvs; tlv_iter++) { struct tcp_restore_info_tlv *tlv = (struct tcp_restore_info_tlv *)data; uint16_t tlv_type = ntohs(tlv->type); uint16_t tlv_length = ntohs(tlv->length); unsigned int __length = tlv_length; if (datalen < __length) { TFE_LOG_ERROR(g_default_logger, "Failed at parser TCP options from cmsg, left space is smaller than tlv's length, " "datalen is %u, tlv's length is %u", datalen, __length); goto invalid_format; } if (tlv_length < sizeof(uint16_t) * 2) { TFE_LOG_ERROR(g_default_logger, "Failed at parser TCP options from cmsg, invalid tlv length, should larger than sizeof(type) + sizeof(length)"); goto invalid_format; } tlv_length -= sizeof(uint16_t) * 2; #define __CHECK_TLV_LENGTH(x) \ do \ { \ if (x != tlv_length) \ { \ TFE_LOG_ERROR(g_default_logger, "Failed at parser TCP options from cmsg, invalid tlv length, should be %u, actually is %u", \ (unsigned int)x, (unsigned int)tlv_length); \ goto invalid_format; \ } \ } while (0) switch (tlv_type) { case TFE_CMSG_TCP_RESTORE_SEQ: __CHECK_TLV_LENGTH(sizeof(uint32_t)); out->client.seq = ntohl(tlv->value_as_uint32[0]); out->server.ack = ntohl(tlv->value_as_uint32[0]); break; case TFE_CMSG_TCP_RESTORE_ACK: __CHECK_TLV_LENGTH(sizeof(uint32_t)); out->client.ack = ntohl(tlv->value_as_uint32[0]); out->server.seq = ntohl(tlv->value_as_uint32[0]); break; case TFE_CMSG_TCP_RESTORE_TS_CLIENT: __CHECK_TLV_LENGTH(sizeof(uint8_t)); out->client.timestamp_perm = !!(tlv->value_as_uint8[0]); break; case TFE_CMSG_TCP_RESTORE_TS_SERVER: __CHECK_TLV_LENGTH(sizeof(uint8_t)); out->server.timestamp_perm = !!(tlv->value_as_uint8[0]); break; case TFE_CMSG_TCP_RESTORE_TS_CLIENT_VAL: __CHECK_TLV_LENGTH(sizeof(uint32_t)); out->client.ts_val = ntohl(tlv->value_as_uint32[0]); break; case TFE_CMSG_TCP_RESTORE_TS_SERVER_VAL: __CHECK_TLV_LENGTH(sizeof(uint32_t)); out->server.ts_val = ntohl(tlv->value_as_uint32[0]); break; case TFE_CMSG_TCP_RESTORE_WSACLE_CLIENT: __CHECK_TLV_LENGTH(sizeof(uint8_t)); out->client.wscale_perm = true; out->client.wscale = tlv->value_as_uint8[0]; break; case TFE_CMSG_TCP_RESTORE_WSACLE_SERVER: __CHECK_TLV_LENGTH(sizeof(uint8_t)); out->server.wscale_perm = true; out->server.wscale = tlv->value_as_uint8[0]; break; case TFE_CMSG_TCP_RESTORE_SACK_CLIENT: __CHECK_TLV_LENGTH(sizeof(uint8_t)); out->client.sack_perm = true; break; case TFE_CMSG_TCP_RESTORE_SACK_SERVER: __CHECK_TLV_LENGTH(sizeof(uint8_t)); out->server.sack_perm = true; break; case TFE_CMSG_TCP_RESTORE_MSS_CLIENT: __CHECK_TLV_LENGTH(sizeof(uint16_t)); out->client.mss = ntohs(tlv->value_as_uint16[0]); break; case TFE_CMSG_TCP_RESTORE_MSS_SERVER: __CHECK_TLV_LENGTH(sizeof(uint16_t)); out->server.mss = ntohs(tlv->value_as_uint16[0]); break; case TFE_CMSG_TCP_RESTORE_WINDOW_CLIENT: __CHECK_TLV_LENGTH(sizeof(uint16_t)); out->client.window = ntohs(tlv->value_as_uint16[0]); break; case TFE_CMSG_TCP_RESTORE_WINDOW_SERVER: __CHECK_TLV_LENGTH(sizeof(uint16_t)); out->server.window = ntohs(tlv->value_as_uint16[0]); break; case TFE_CMSG_TCP_RESTORE_INFO_PACKET_CUR_DIR: __CHECK_TLV_LENGTH(sizeof(uint8_t)); out->cur_dir = (enum tcp_restore_pkt_dir)(tlv->value_as_uint8[0]); default: break; } data += __length; datalen -= __length; } return 0; invalid_format: return -EINVAL; } static void tcp_restore_info_parse_from_pkt(struct pkt_info *pktinfo, struct tcp_restore_info *out) { if (pktinfo->addr_type == ADDR_TYPE_IPV4) { struct iphdr *iphdr = pktinfo->iphdr.v4; struct tcphdr *tcphdr = pktinfo->tcphdr; struct sockaddr_in *in_addr_client; struct sockaddr_in *in_addr_server; if (out->cur_dir == PKT_DIR_NOT_SET || out->cur_dir == PKT_DIR_C2S) { in_addr_client = (struct sockaddr_in *)&out->client.addr; in_addr_server = (struct sockaddr_in *)&out->server.addr; } else { in_addr_client = (struct sockaddr_in *)&out->server.addr; in_addr_server = (struct sockaddr_in *)&out->client.addr; } in_addr_client->sin_family = AF_INET; in_addr_client->sin_addr.s_addr = iphdr->saddr; in_addr_client->sin_port = tcphdr->source; in_addr_server->sin_family = AF_INET; in_addr_server->sin_addr.s_addr = iphdr->daddr; in_addr_server->sin_port = tcphdr->dest; } if (pktinfo->addr_type == ADDR_TYPE_IPV6) { struct ip6_hdr *ipv6hdr = (struct ip6_hdr *)(pktinfo->iphdr.v6); struct tcphdr *tcphdr = pktinfo->tcphdr; struct sockaddr_in6 *in6_addr_client; struct sockaddr_in6 *in6_addr_server; if (out->cur_dir == PKT_DIR_NOT_SET || out->cur_dir == PKT_DIR_C2S) { in6_addr_client = (struct sockaddr_in6 *)&out->client.addr; in6_addr_server = (struct sockaddr_in6 *)&out->server.addr; } else { in6_addr_client = (struct sockaddr_in6 *)&out->server.addr; in6_addr_server = (struct sockaddr_in6 *)&out->client.addr; } in6_addr_client->sin6_family = AF_INET6; in6_addr_client->sin6_addr = ipv6hdr->ip6_src; in6_addr_client->sin6_port = tcphdr->source; in6_addr_server->sin6_family = AF_INET6; in6_addr_server->sin6_addr = ipv6hdr->ip6_dst; in6_addr_server->sin6_port = tcphdr->dest; } } struct tcp_option_mss { uint8_t kind; uint8_t length; uint16_t mss_value; } __attribute__((__packed__)); struct tcp_option_window_scale { uint8_t kind; uint8_t length; uint8_t shift_count; } __attribute__((__packed__)); struct tcp_option_sack { uint8_t kind; uint8_t length; } __attribute__((__packed__)); struct tcp_option_time_stamp { uint8_t kind; uint8_t length; uint32_t tsval; uint32_t tsecr; } __attribute__((__packed__)); static int fake_tcp_handshake(struct tfe_proxy *proxy, struct tcp_restore_info *restore_info) { char buffer[1500] = {0}; int length = 0; char tcp_option_buffer_c[40] = {0}; char tcp_option_buffer_s[40] = {0}; char tcp_option_buffer_c2[40] = {0}; int tcp_option_length_c = 0; int tcp_option_length_s = 0; int tcp_option_length_c2 = 0; const struct tcp_restore_endpoint *client = &restore_info->client; const struct tcp_restore_endpoint *server = &restore_info->server; struct raw_socket *raw_socket_c = raw_socket_create(proxy->traffic_steering_options.device_client, proxy->traffic_steering_options.so_mask_client); struct raw_socket *raw_socket_s = raw_socket_create(proxy->traffic_steering_options.device_server, proxy->traffic_steering_options.so_mask_server); if (raw_socket_c == NULL || raw_socket_s == NULL) { raw_socket_destory(raw_socket_c); raw_socket_destory(raw_socket_s); return -1; } uint32_t c_seq = client->seq - 1; uint32_t s_seq = server->seq - 1; /* * Maximum segment size: Kind: 2, Length: 4 * +---------+---------+---------+ * | Kind=2 |Length=4 |mss.value| * +---------+---------+---------+ * 1 1 2 */ if (client->mss && server->mss) { struct tcp_option_mss *option_c = (struct tcp_option_mss *)(tcp_option_buffer_c + tcp_option_length_c); option_c->kind = 2; option_c->length = 4; option_c->mss_value = htons(client->mss); tcp_option_length_c += sizeof(struct tcp_option_mss); struct tcp_option_mss *option_s = (struct tcp_option_mss *)(tcp_option_buffer_s + tcp_option_length_s); option_s->kind = 2; option_s->length = 4; option_s->mss_value = htons(server->mss); tcp_option_length_s += sizeof(struct tcp_option_mss); } /* * Window Scale option: Kind: 3, Length: 3 * +---------+---------+---------+ * | Kind=3 |Length=3 |shift.cnt| * +---------+---------+---------+ * 1 1 1 */ if (client->wscale_perm && server->wscale_perm) { // padding memset(tcp_option_buffer_c + tcp_option_length_c, 1, 1); tcp_option_length_c += 1; memset(tcp_option_buffer_s + tcp_option_length_s, 1, 1); tcp_option_length_s += 1; struct tcp_option_window_scale *option_c = (struct tcp_option_window_scale *)(tcp_option_buffer_c + tcp_option_length_c); option_c->kind = 3; option_c->length = 3; option_c->shift_count = client->wscale; tcp_option_length_c += sizeof(struct tcp_option_window_scale); struct tcp_option_window_scale *option_s = (struct tcp_option_window_scale *)(tcp_option_buffer_s + tcp_option_length_s); option_s->kind = 3; option_s->length = 3; option_s->shift_count = server->wscale; tcp_option_length_s += sizeof(struct tcp_option_window_scale); } /* * SACK option: Kind: 4, Length: 2 * +---------+---------+ * | Kind=4 |Length=2 | * +---------+---------+ * 1 1 */ if (client->sack_perm && server->sack_perm) { // padding memset(tcp_option_buffer_c + tcp_option_length_c, 1, 2); tcp_option_length_c += 2; memset(tcp_option_buffer_s + tcp_option_length_s, 1, 2); tcp_option_length_s += 2; struct tcp_option_sack *option_c = (struct tcp_option_sack *)(tcp_option_buffer_c + tcp_option_length_c); option_c->kind = 4; option_c->length = 2; tcp_option_length_c += sizeof(struct tcp_option_sack); struct tcp_option_sack *option_s = (struct tcp_option_sack *)(tcp_option_buffer_s + tcp_option_length_s); option_s->kind = 4; option_s->length = 2; tcp_option_length_s += sizeof(struct tcp_option_sack); } /* * Time Stamp option: Kind: 8, Length: 10 * +---------+---------+-----+-----+ * | Kind=8 |Length=10|tsval|tsecr| * +---------+---------+-----+-----+ * 1 1 4 4 */ if (client->timestamp_perm && server->timestamp_perm) { // padding memset(tcp_option_buffer_c + tcp_option_length_c, 1, 2); tcp_option_length_c += 2; memset(tcp_option_buffer_s + tcp_option_length_s, 1, 2); tcp_option_length_s += 2; memset(tcp_option_buffer_c2 + tcp_option_length_c2, 1, 2); tcp_option_length_c2 += 2; struct tcp_option_time_stamp *option_c = (struct tcp_option_time_stamp *)(tcp_option_buffer_c + tcp_option_length_c); option_c->kind = 8; option_c->length = 10; option_c->tsval = htonl(client->ts_val); option_c->tsecr = htonl(0); tcp_option_length_c += sizeof(struct tcp_option_time_stamp); struct tcp_option_time_stamp *option_s = (struct tcp_option_time_stamp *)(tcp_option_buffer_s + tcp_option_length_s); option_s->kind = 8; option_s->length = 10; option_s->tsval = htonl(server->ts_val); option_s->tsecr = htonl(client->ts_val); tcp_option_length_s += sizeof(struct tcp_option_time_stamp); struct tcp_option_time_stamp *option_c2 = (struct tcp_option_time_stamp *)(tcp_option_buffer_c2 + tcp_option_length_c2); option_c2->kind = 8; option_c2->length = 10; option_c2->tsval = htonl(client->ts_val); option_c2->tsecr = htonl(server->ts_val); tcp_option_length_c2 += sizeof(struct tcp_option_time_stamp); } if (client->addr.ss_family == AF_INET6) { struct sockaddr_in6 *sk_client = (struct sockaddr_in6 *)&client->addr; struct sockaddr_in6 *sk_server = (struct sockaddr_in6 *)&server->addr; uint16_t port_client = sk_client->sin6_port; uint16_t port_server = sk_server->sin6_port; // C -> S length = tcp_packet_v6_construct( buffer, // buffer &raw_socket_c->mac_addr, &raw_socket_s->mac_addr, 0, ETH_P_IPV6, // Ether &sk_client->sin6_addr, &sk_server->sin6_addr, 55, // IPv6 port_client, port_server, c_seq, 0, TCP_SYN_FLAG, client->window, // TCP Header tcp_option_buffer_c, tcp_option_length_c, // TCP Options NULL, 0); // Payload raw_socket_send(raw_socket_c, buffer, length); c_seq += 1; // S -> C length = tcp_packet_v6_construct( buffer, // buffer &raw_socket_s->mac_addr, &raw_socket_c->mac_addr, 0, ETH_P_IPV6, // Ether &sk_server->sin6_addr, &sk_client->sin6_addr, 65, // IPv6 port_server, port_client, s_seq, c_seq, TCP_SYN_FLAG | TCP_ACK_FLAG, server->window, // TCP Header tcp_option_buffer_s, tcp_option_length_s, // TCP Options NULL, 0); // Payload raw_socket_send(raw_socket_s, buffer, length); s_seq += 1; // C -> S length = tcp_packet_v6_construct( buffer, // buffer &raw_socket_c->mac_addr, &raw_socket_s->mac_addr, 0, ETH_P_IPV6, // Ether &sk_client->sin6_addr, &sk_server->sin6_addr, 55, // IPv6 port_client, port_server, c_seq, s_seq, TCP_ACK_FLAG, client->window, // TCP Header tcp_option_buffer_c2, tcp_option_length_c2, // TCP Options NULL, 0); // Payload raw_socket_send(raw_socket_c, buffer, length); } else { struct sockaddr_in *sk_client = (struct sockaddr_in *)&client->addr; struct sockaddr_in *sk_server = (struct sockaddr_in *)&server->addr; uint16_t port_client = sk_client->sin_port; uint16_t port_server = sk_server->sin_port; // C -> S length = tcp_packet_v4_construct( buffer, // buffer &raw_socket_c->mac_addr, &raw_socket_s->mac_addr, 0, ETH_P_IP, // Ether &sk_client->sin_addr, &sk_server->sin_addr, 0, 55, 0x11, // IPv4 port_client, port_server, c_seq, 0, TCP_SYN_FLAG, client->window, // TCP Header tcp_option_buffer_c, tcp_option_length_c, // TCP Options NULL, 0); raw_socket_send(raw_socket_c, buffer, length); c_seq += 1; // S -> C length = tcp_packet_v4_construct( buffer, // buffer &raw_socket_s->mac_addr, &raw_socket_c->mac_addr, 0, ETH_P_IP, // Ether &sk_server->sin_addr,&sk_client->sin_addr, 0, 65, 0x12, // IPv4 port_server, port_client, s_seq, c_seq, TCP_SYN_FLAG | TCP_ACK_FLAG, server->window, // TCP Header tcp_option_buffer_s, tcp_option_length_s, // TCP Options NULL, 0); raw_socket_send(raw_socket_s, buffer, length); s_seq += 1; // C -> S length = tcp_packet_v4_construct( buffer, // buffer &raw_socket_c->mac_addr, &raw_socket_s->mac_addr, 0, ETH_P_IP, // Ether &sk_client->sin_addr, &sk_server->sin_addr, 0, 55, 0x13, // IPv4 port_client, port_server, c_seq, s_seq, TCP_ACK_FLAG, client->window, // TCP Header tcp_option_buffer_c2, tcp_option_length_c2, // TCP Options NULL, 0); raw_socket_send(raw_socket_c, buffer, length); } raw_socket_destory(raw_socket_c); raw_socket_destory(raw_socket_s); return 0; } static int overwrite_tcp_mss(struct tfe_cmsg *cmsg, struct tcp_restore_info *restore) { int ret = 0; uint16_t size = 0; int server_side_mss_enable = 0; int server_side_mss_value = 0; int client_side_mss_enable = 0; int client_side_mss_value = 0; ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_DOWNSTREAM_TCP_MSS_ENABLE, (unsigned char *)&client_side_mss_enable, sizeof(client_side_mss_enable), &size); if (ret < 0) { TFE_LOG_ERROR(g_default_logger, "failed at fetch client side tcp mss from cmsg: %s", strerror(-ret)); return -1; } ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_DOWNSTREAM_TCP_MSS_VALUE, (unsigned char *)&client_side_mss_value, sizeof(client_side_mss_value), &size); if (ret < 0) { TFE_LOG_ERROR(g_default_logger, "failed at fetch client side tcp mss value from cmsg: %s", strerror(-ret)); return -1; } ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_UPSTREAM_TCP_MSS_ENABLE, (unsigned char *)&server_side_mss_enable, sizeof(server_side_mss_enable), &size); if (ret < 0) { TFE_LOG_ERROR(g_default_logger, "failed at fetch server side tcp mss from cmsg: %s", strerror(-ret)); return -1; } ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_UPSTREAM_TCP_MSS_VALUE, (unsigned char *)&server_side_mss_value, sizeof(server_side_mss_value), &size); if (ret < 0) { TFE_LOG_ERROR(g_default_logger, "failed at fetch server side tcp mss value from cmsg: %s", strerror(-ret)); return -1; } if (client_side_mss_enable) { restore->client.mss = client_side_mss_value; } if (server_side_mss_enable) { restore->server.mss = server_side_mss_value; } return 0; } /* * nfmsg : message objetc that contains the packet * nfad : Netlink packet data handle */ static int payload_handler_cb(struct nfq_q_handle *qh, struct nfgenmsg *nfmsg, struct nfq_data *nfa, void *data) { int id = 0; int ret = 0; int fd_downstream = 0; int fd_upstream = 0; int fd_fake_c = 0; int fd_fake_s = 0; int hit_tcpopt = 0; uint16_t cmsg_offset = 0; uint8_t restore_opt_len = 0; int raw_payload_len = 0; unsigned int cmsg_payload_len = 0; char *cmsg_payload = NULL; uint64_t jiffies_us = 0; unsigned char *raw_payload = NULL; struct iphdr *iphdr = NULL; struct tfe_cmsg *cmsg = NULL; struct pkt_info pktinfo; struct tcp_restore_info restore_info; uint8_t stream_protocol_in_char = 0; uint8_t enalbe_decrypted_traffic_steering = 0; uint16_t size = 0; // uint64_t chaining_rule_id = 0; // only use for acceptv4 struct acceptor_kni_v3 *__ctx = (struct acceptor_kni_v3 *)data; clock_gettime(CLOCK_MONOTONIC, &(__ctx->start)); memset(&pktinfo, 0, sizeof(pktinfo)); memset(&restore_info, 0, sizeof(restore_info)); struct nfqnl_msg_packet_hdr *ph = nfq_get_msg_packet_hdr(nfa); if (ph == NULL) { TFE_LOG_ERROR(g_default_logger, "Failed at nfq_get_msg_packet_hdr(), result is NULL"); goto end; } id = ntohl(ph->packet_id); raw_payload_len = nfq_get_payload(nfa, &raw_payload); if ((unsigned int)raw_payload_len <= (MIN(sizeof(struct iphdr), sizeof(struct ip6_hdr)) + sizeof(struct tcphdr))) { TFE_LOG_ERROR(g_default_logger, "Failed at nfq_get_payload(), paylod len %d too small, less than %lu", raw_payload_len, (MIN(sizeof(struct iphdr), sizeof(struct ip6_hdr)) + sizeof(struct tcphdr))); tfe_hexdump2file(stderr, "Failed at parsing payload, payload len too small", raw_payload, (unsigned int)raw_payload_len); goto end; } iphdr = (struct iphdr *)raw_payload; if (iphdr->version == 4) { if (iphdr->protocol == IPPROTO_TCP) { tfe_pkt_parse_ipv4_header(raw_payload, &pktinfo); } else { TFE_LOG_ERROR(g_default_logger, "Failed at parse IPv4 header, sub protocol not tcp"); tfe_hexdump2file(stderr, "Failed at parsing IPv4 header, TCP no found", raw_payload, (unsigned int)raw_payload_len); goto end; } } else { tfe_pkt_parse_ipv6_header(raw_payload, &pktinfo); if (pktinfo.parse_failed) { TFE_LOG_ERROR(g_default_logger, "Failed at parse IPv6 header, sub protocol not tcp"); tfe_hexdump2file(stderr, "Failed at parsing IPv6 header, TCP no found", raw_payload, (unsigned int)raw_payload_len); goto end; } } if (pktinfo.ip_totlen > raw_payload_len) { TFE_LOG_ERROR(g_default_logger, "Failed at parser IP header, invalid ip header totlen"); tfe_hexdump2file(stderr, "Failed at parsing IP header, IP totlen too small", raw_payload, (unsigned int)raw_payload_len); goto end; } // check if there is a tcp options if (pktinfo.tcphdr_len <= sizeof(struct tcphdr)) { TFE_LOG_ERROR(g_default_logger, "Failed at parser TCP header, TCP header len %d too small, less than %lu", pktinfo.tcphdr_len, sizeof(struct tcphdr)); tfe_hexdump2file(stderr, "Failed at parsing TCP header, TCP header len too small", raw_payload, (unsigned int)raw_payload_len); goto end; } // Parse tcp options hit_tcpopt = tfe_pkt_find_tcp_option(TCP_RESTORE_TCPOPT_KIND, (char *)pktinfo.tcphdr, pktinfo.tcphdr_len - sizeof(struct tcphdr), &restore_opt_len, (char *)&cmsg_offset, sizeof(cmsg_offset)); if (!hit_tcpopt || restore_opt_len != 2) { TFE_LOG_ERROR(g_default_logger, "Failed at parser TCP options, tcp option hit:%d, opt len:%d", hit_tcpopt, restore_opt_len); tfe_hexdump2file(stderr, "Failed at parsing TCP options, TCP options no found", raw_payload, (unsigned int)raw_payload_len); goto end; } cmsg_offset = ntohs(cmsg_offset); cmsg_payload = (char *)(pktinfo.data + cmsg_offset); cmsg_payload_len = pktinfo.data_len - cmsg_offset; // 从 cmsg 中解析信息存储到 restore_info 中 ret = tcp_restore_info_parse_from_cmsg(cmsg_payload, cmsg_payload_len, &restore_info); if (ret < 0) { TFE_LOG_ERROR(g_default_logger, "Failed at parser TCP options from cmsg"); goto end; } tcp_restore_info_parse_from_pkt(&pktinfo, &restore_info); // Remove cmsg from payload pktinfo.ip_totlen = pktinfo.ip_totlen - cmsg_payload_len; if (pktinfo.addr_type == ADDR_TYPE_IPV4) { pktinfo.iphdr.v4->tot_len = htons(pktinfo.ip_totlen); pktinfo.iphdr.v4->check = 0; pktinfo.iphdr.v4->check = tfe_pkt_checksum_ip((void*)pktinfo.iphdr.v4, pktinfo.iphdr_len); pktinfo.tcphdr->check = 0; pktinfo.tcphdr->check = tfe_pkt_checksum_tcp_v4((void*)pktinfo.tcphdr, pktinfo.ip_totlen - pktinfo.iphdr_len, pktinfo.iphdr.v4->saddr, pktinfo.iphdr.v4->daddr); } if (pktinfo.addr_type == ADDR_TYPE_IPV6) { pktinfo.iphdr.v6->ip6_ctlun.ip6_un1.ip6_un1_plen = 0; pktinfo.iphdr.v6->ip6_ctlun.ip6_un1.ip6_un1_plen = htons(pktinfo.ip_totlen - sizeof(struct ip6_hdr)); // IPv6 header no checksum pktinfo.tcphdr->check = 0; pktinfo.tcphdr->check = tfe_pkt_checksum_tcp_v6((void*)pktinfo.tcphdr, pktinfo.ip_totlen - pktinfo.iphdr_len, pktinfo.iphdr.v6->ip6_src, pktinfo.iphdr.v6->ip6_dst); } if (tfe_cmsg_deserialize((const unsigned char *)restore_info.cmsg, restore_info.cmsg_len, &cmsg) < 0) { TFE_LOG_ERROR(g_default_logger, "failed at tfe_cmsg_deserialize()"); goto end; } intercept_policy_enforce(__ctx->proxy->int_ply_enforcer, cmsg); tcp_policy_enforce(__ctx->proxy->tcp_ply_enforcer, cmsg); // chaining_policy_enforce(__ctx->proxy->chain_ply_enforcer, cmsg, chaining_rule_id); if (overwrite_tcp_mss(cmsg, &restore_info)) { goto end; } tfe_tcp_restore_info_dump(&restore_info); // tcp repair C2S fd_upstream = tfe_tcp_restore_fd_create(&(restore_info.client), &(restore_info.server), __ctx->device, 0x65); if (fd_upstream < 0) { TFE_LOG_ERROR(g_default_logger, "Failed at tcp_restore_fd_create(UPSTREAM)"); goto end; } // tcp repair S2C fd_downstream = tfe_tcp_restore_fd_create(&(restore_info.server), &(restore_info.client), __ctx->device, 0x65); if (fd_downstream < 0) { TFE_LOG_ERROR(g_default_logger, "Failed at tcp_restore_fd_create(DOWNSTREAM)"); goto end; } tfe_cmsg_get_value(cmsg, TFE_CMSG_TCP_RESTORE_PROTOCOL, (unsigned char *)&stream_protocol_in_char, sizeof(stream_protocol_in_char), &size); // tfe_cmsg_get_value(cmsg, TFE_CMSG_TCP_DECRYPTED_TRAFFIC_STEERING, (unsigned char *)&enalbe_decrypted_traffic_steering, sizeof(enalbe_decrypted_traffic_steering), &size); if (steering_device_is_available() && ( (STREAM_PROTO_PLAIN == (enum tfe_stream_proto)stream_protocol_in_char && __ctx->proxy->traffic_steering_options.enable_steering_http) || (STREAM_PROTO_SSL == (enum tfe_stream_proto)stream_protocol_in_char && __ctx->proxy->traffic_steering_options.enable_steering_ssl) || enalbe_decrypted_traffic_steering == 1)) { if (fake_tcp_handshake(__ctx->proxy, &restore_info) == -1) { TFE_LOG_ERROR(g_default_logger, "Failed at fake_tcp_handshake()"); goto end; } fd_fake_c = tfe_tcp_restore_fd_create(&(restore_info.client), &(restore_info.server), __ctx->proxy->traffic_steering_options.device_client, __ctx->proxy->traffic_steering_options.so_mask_client); if (fd_fake_c < 0) { TFE_LOG_ERROR(g_default_logger, "Failed at tcp_restore_fd_create(fd_fake_c)"); goto end; } fd_fake_s = tfe_tcp_restore_fd_create(&(restore_info.server), &(restore_info.client), __ctx->proxy->traffic_steering_options.device_server, __ctx->proxy->traffic_steering_options.so_mask_server); if (fd_fake_s < 0) { TFE_LOG_ERROR(g_default_logger, "Failed at tcp_restore_fd_create(fd_fake_s)"); goto end; } } if (tfe_proxy_fds_accept(__ctx->proxy, fd_downstream, fd_upstream, fd_fake_c, fd_fake_s, cmsg) < 0) { TFE_LOG_ERROR(g_default_logger, "Failed at tfe_proxy_fds_accept()"); goto end; } TFE_PROXY_STAT_INCREASE(STAT_FD_OPEN_BY_KNI_ACCEPT, 2); clock_gettime(CLOCK_MONOTONIC, &(__ctx->end)); jiffies_us = (__ctx->end.tv_sec - __ctx->start.tv_sec) * 1000 * 1000 + (__ctx->end.tv_nsec - __ctx->start.tv_nsec) / 1000; TFE_LOG_DEBUG(g_default_logger, "nfqueue tcp_restore=%p time=%ldus hw_protocol=0x%04x hook=%u id=%010u protocol=%s total_len=%d inject_len=%d iphdr_len=%d tcphdr_len=%d data_len=%d", &restore_info, jiffies_us, ntohs(ph->hw_protocol), ph->hook, id, (pktinfo.addr_type == ADDR_TYPE_IPV4 ? "IPv4" : "IPv6"), raw_payload_len, pktinfo.ip_totlen, pktinfo.iphdr_len, pktinfo.tcphdr_len, pktinfo.data_len); /* * NF_DROP : discarded the packet * NF_ACCEPT : the packet passes, continue iterations * NF_QUEUE : inject the packet into a different queue (the target queue number is in the high 16 bits of the verdict) * NF_REPEAT : iterate the same cycle once more * NF_STOP : accept, but don't continue iterations */ // nfq_set_verdict() // nfq_set_verdict2() // nfq_set_verdict_batch() // nfq_set_verdict_batch2() // nfq_set_verdict_mark() return nfq_set_verdict(qh, id, NF_ACCEPT, pktinfo.ip_totlen, raw_payload); end: if (fd_upstream > 0) { TFE_PROXY_STAT_INCREASE(STAT_FD_CLOSE_BY_KNI_ACCEPT_FAIL, 1); close(fd_upstream); } if (fd_downstream > 0) { TFE_PROXY_STAT_INCREASE(STAT_FD_CLOSE_BY_KNI_ACCEPT_FAIL, 1); close(fd_downstream); } return nfq_set_verdict(qh, id, NF_ACCEPT, 0, NULL); } void acceptor_kni_v3_event(evutil_socket_t fd, short what, void *user) { struct acceptor_kni_v3 *__ctx = (struct acceptor_kni_v3 *) user; assert(__ctx != NULL && __ctx->thread == pthread_self()); assert(what & EV_READ); char buf[4096] __attribute__ ((aligned)); int rv; rv = recv(fd, buf, sizeof(buf), 0); if (rv >= 0) { TFE_LOG_DEBUG(g_default_logger, "nfqueue acceptor thread recv %d bytes form nfqueue fd %d", rv, fd); nfq_handle_packet(__ctx->h, buf, rv); return; } else { /* if your application is too slow to digest the packets that * are sent from kernel-space, the socket buffer that we use * to enqueue packets may fill up returning ENOBUFS. Depending * on your application, this error may be ignored. Please, see * the doxygen documentation of this library on how to improve * this situation. */ if (errno == ENOBUFS) { TFE_LOG_ERROR(g_default_logger, "nfqueue losing packets!"); } TFE_LOG_ERROR(g_default_logger, "Failed at recv() data from nfqueue, %d: %s", errno, strerror(errno)); } } void *acceptor_kni_v3_event_thread_entry(void *args) { struct acceptor_kni_v3 *__ctx = (struct acceptor_kni_v3 *)args; assert(__ctx != NULL && __ctx->thread == pthread_self()); char thread_name[16] = { 0 }; snprintf(thread_name, sizeof(thread_name), "tfe:acceptor-v3"); prctl(PR_SET_NAME, (unsigned long long) thread_name, NULL, NULL, NULL); char affinity[32] = {0}; if (__ctx->proxy->enable_cpu_affinity) { tfe_thread_set_affinity(__ctx->proxy->cpu_affinity_mask[0]); snprintf(affinity, sizeof(affinity), "affinity cpu%d", __ctx->proxy->cpu_affinity_mask[0]); } TFE_LOG_INFO(g_default_logger, "nfq acceptor thread %s is running.", __ctx->proxy->enable_cpu_affinity ? affinity : ""); event_base_dispatch(__ctx->ev_base); DIE("nfq acceptor thread is exited, abort."); } void acceptor_kni_v3_destroy(struct acceptor_kni_v3 *ctx) { if (ctx != NULL && ctx->qh != NULL) { nfq_destroy_queue(ctx->qh); ctx->qh = NULL; } if (ctx != NULL && ctx->h != NULL) { nfq_close(ctx->h); ctx->h = NULL; } if (ctx != NULL && ctx->ev_base != NULL) { event_base_free(ctx->ev_base); ctx->ev_base = NULL; } if (ctx != NULL) { free(ctx); ctx = NULL; } } struct acceptor_kni_v3 *acceptor_kni_v3_create(struct tfe_proxy *proxy, const char *profile, void *logger) { struct acceptor_kni_v3 *__ctx = ALLOC(struct acceptor_kni_v3, 1); int ret = 0; __ctx->proxy = proxy; __ctx->profile = profile; MESA_load_profile_string_def(profile, "nfq", "device", __ctx->device, sizeof(__ctx->device), ""); MESA_load_profile_uint_def(profile, "nfq", "queue_id", &(__ctx->queue_id), 1); MESA_load_profile_uint_def(profile, "nfq", "queue_maxlen", &(__ctx->queue_maxlen), 65535); MESA_load_profile_uint_def(profile, "nfq", "queue_rcvbufsiz", &(__ctx->queue_rcvbufsiz), 98302500); MESA_load_profile_uint_def(profile, "nfq", "queue_no_enobufs", &(__ctx->queue_no_enobufs), 1); __ctx->h = nfq_open(); if (!__ctx->h) { TFE_LOG_ERROR(g_default_logger, "Failed at nfq_open(), %d: %s", errno, strerror(errno)); errno = 0; goto __errout; } if (nfq_unbind_pf(__ctx->h, AF_INET) < 0) { TFE_LOG_ERROR(g_default_logger, "Failed at nfq_unbind_pf(AF_INET), %d: %s", errno, strerror(errno)); errno = 0; goto __errout; } if (nfq_unbind_pf(__ctx->h, AF_INET6) < 0) { TFE_LOG_ERROR(g_default_logger, "Failed at nfq_unbind_pf(AF_INET6), %d: %s", errno, strerror(errno)); errno = 0; goto __errout; } if (nfq_bind_pf(__ctx->h, AF_INET) < 0) { TFE_LOG_ERROR(g_default_logger, "Failed at nfq_bind_pf(AF_INET), %d: %s", errno, strerror(errno)); errno = 0; goto __errout; } if (nfq_bind_pf(__ctx->h, AF_INET6) < 0) { TFE_LOG_ERROR(g_default_logger, "Failed at nfq_bind_pf(AF_INET6), %d: %s", errno, strerror(errno)); errno = 0; goto __errout; } __ctx->qh = nfq_create_queue(__ctx->h, __ctx->queue_id, &payload_handler_cb, __ctx); if (!__ctx->qh) { TFE_LOG_ERROR(g_default_logger, "Failed at nfq_create_queue(), %d: %s", errno, strerror(errno)); errno = 0; goto __errout; } /* * NFQNL_COPY_NONE - noop, do not use it * NFQNL_COPY_META - copy only packet metadata * NFQNL_COPY_PACKET - copy entire packet */ if (nfq_set_mode(__ctx->qh, NFQNL_COPY_PACKET, 0xffff) < 0) { TFE_LOG_ERROR(g_default_logger, "Failed at nfq_set_mode(NFQNL_COPY_PACKET), %d: %s", errno, strerror(errno)); errno = 0; goto __errout; } if (nfq_set_queue_maxlen(__ctx->qh, __ctx->queue_maxlen) < 0) { TFE_LOG_ERROR(g_default_logger, "Failed at nfq_set_queue_maxlen(%d), %d: %s", __ctx->queue_maxlen, errno, strerror(errno)); errno = 0; goto __errout; } nfnl_rcvbufsiz(nfq_nfnlh(__ctx->h), __ctx->queue_rcvbufsiz); __ctx->fd_nfq_socket = nfq_fd(__ctx->h); /* * set NETLINK_NO_ENOBUFS socket option to avoid receiving ENOBUFS errors (requires Linux kernel >= 2.6.30). * Don't send error about no buffer space available but drop the packets instead */ if (__ctx->queue_no_enobufs) { if (setsockopt(__ctx->fd_nfq_socket, SOL_NETLINK, NETLINK_NO_ENOBUFS, &__ctx->queue_no_enobufs, sizeof(__ctx->queue_no_enobufs)) == -1) { TFE_LOG_ERROR(g_default_logger, "Failed at setsockopt(NETLINK_NO_ENOBUFS) for nfq fd, %d: %s", errno, strerror(errno)); errno = 0; goto __errout; } } evutil_make_socket_nonblocking(__ctx->fd_nfq_socket); __ctx->ev_base = event_base_new(); if (unlikely(__ctx->ev_base == NULL)) { TFE_LOG_ERROR(g_default_logger, "Failed at event_base_new()"); goto __errout; } __ctx->ev_nfq_socket = event_new(__ctx->ev_base, __ctx->fd_nfq_socket, EV_READ | EV_PERSIST, acceptor_kni_v3_event, __ctx); if (unlikely(__ctx->ev_nfq_socket == NULL)) { TFE_LOG_ERROR(g_default_logger, "Failed at setup READ event for nfqueue socket"); goto __errout; } ret = event_add(__ctx->ev_nfq_socket, NULL); if (unlikely(ret < 0)) { TFE_LOG_ERROR(g_default_logger, "Failed at adding nfqueue socket event to evbase"); goto __errout; } ret = pthread_create(&__ctx->thread, NULL, acceptor_kni_v3_event_thread_entry, (void *) __ctx); if (unlikely(ret < 0)) { TFE_LOG_ERROR(g_default_logger, "Failed at creating event thread: %s", strerror(errno)); errno = 0; goto __errout; } TFE_LOG_INFO(g_default_logger, "KNIv3 acceptor init successfully"); return __ctx; __errout: acceptor_kni_v3_destroy(__ctx); return NULL; }