This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
tango-tfe/bpf/bpf_tun_rss_steering.c

413 lines
14 KiB
C

#include <linux/in.h>
#include <linux/udp.h>
#include <linux/tcp.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/if_ether.h>
#include "libendian.h"
#include "crc32_hash.h"
#include "bpf_config_kernel.h"
#define IP6_EXTENSIONS_COUNT 11
struct packet
{
__u8 is_ipv4;
__u8 is_ipv6;
__u8 is_udp;
__u8 is_tcp;
__u8 is_fragmented;
__u16 src_port;
__u16 dst_port;
__u32 in4_src;
__u32 in4_dst;
struct in6_addr in6_src;
struct in6_addr in6_dst;
__u32 src_addr_hash;
__u32 dst_addr_hash;
__u32 src_port_hash;
__u32 dst_port_hash;
__u32 last_hash;
int select_queue;
struct __sk_buff *skb;
};
static inline void dump_ipv4_header(struct packet *packet, struct iphdr *ip4)
{
bpf_printk("tun_rss_steering: ipv4 %p src_addr ip[0-1]: %d.%d", packet->skb, (packet->in4_src) & 0xFF, (packet->in4_src >> 8) & 0xFF);
bpf_printk("tun_rss_steering: ipv4 %p src_addr ip[2-3]: %d.%d", packet->skb, (packet->in4_src >> 16) & 0xFF, (packet->in4_src >> 24) & 0xFF);
bpf_printk("tun_rss_steering: ipv4 %p dst_addr ip[0-1]: %d.%d", packet->skb, (packet->in4_dst) & 0xFF, (packet->in4_dst >> 8) & 0xFF);
bpf_printk("tun_rss_steering: ipv4 %p dst_addr ip[2-3]: %d.%d", packet->skb, (packet->in4_dst >> 16) & 0xFF, (packet->in4_dst >> 24) & 0xFF);
char *ptr = (char *)ip4;
int len = sizeof(*ip4);
for (int i = 0; i < len; i++)
{
bpf_printk("tun_rss_steering: ipv4 %p header hex[%d]: %0x", packet->skb, i, ptr[i]);
}
}
static inline void dump_ipv6_packet(struct packet *packet, struct ipv6hdr *ip6)
{
bpf_printk("tun_rss_steering: ipv6 %p src_addr ip[0-1]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[0]), bpf_ntohs(packet->in6_src.s6_addr16[1]));
bpf_printk("tun_rss_steering: ipv6 %p src_addr ip[2-3]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[2]), bpf_ntohs(packet->in6_src.s6_addr16[3]));
bpf_printk("tun_rss_steering: ipv6 %p src_addr ip[4-5]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[4]), bpf_ntohs(packet->in6_src.s6_addr16[5]));
bpf_printk("tun_rss_steering: ipv6 %p src_addr ip[6-7]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[6]), bpf_ntohs(packet->in6_src.s6_addr16[7]));
bpf_printk("tun_rss_steering: ipv6 %p dst_addr ip[0-1]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[0]), bpf_ntohs(packet->in6_dst.s6_addr16[1]));
bpf_printk("tun_rss_steering: ipv6 %p dst_addr ip[2-3]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[2]), bpf_ntohs(packet->in6_dst.s6_addr16[3]));
bpf_printk("tun_rss_steering: ipv6 %p dst_addr ip[4-5]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[4]), bpf_ntohs(packet->in6_dst.s6_addr16[5]));
bpf_printk("tun_rss_steering: ipv6 %p dst_addr ip[6-7]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[6]), bpf_ntohs(packet->in6_dst.s6_addr16[7]));
char *ptr = (char *)ip6;
int len = sizeof(*ip6);
for (int i = 0; i < len; i++)
{
bpf_printk("tun_rss_steering: ipv6 %p header hex[%d]: %0x", packet->skb, i, ptr[i]);
}
}
static inline void dump_packet_info(struct packet *packet, struct bpf_config *config)
{
if (packet->is_ipv4)
{
bpf_printk("tun_rss_steering ipv4 %p src_addr ip[0-1]: %d.%d", packet->skb, (packet->in4_src) & 0xFF, (packet->in4_src >> 8) & 0xFF);
bpf_printk("tun_rss_steering ipv4 %p src_addr ip[2-3]: %d.%d", packet->skb, (packet->in4_src >> 16) & 0xFF, (packet->in4_src >> 24) & 0xFF);
bpf_printk("tun_rss_steering ipv4 %p dst_addr ip[0-1]: %d.%d", packet->skb, (packet->in4_dst) & 0xFF, (packet->in4_dst >> 8) & 0xFF);
bpf_printk("tun_rss_steering ipv4 %p dst_addr ip[2-3]: %d.%d", packet->skb, (packet->in4_dst >> 16) & 0xFF, (packet->in4_dst >> 24) & 0xFF);
bpf_printk("tun_rss_steering ipv4 %p src_port: %d dst_port: %d", packet->skb, bpf_ntohs(packet->src_port), bpf_ntohs(packet->dst_port));
bpf_printk("tun_rss_steering ipv4 %p src_addr_hash: %d dst_addr_hash: %d", packet->skb, packet->src_addr_hash, packet->dst_addr_hash);
bpf_printk("tun_rss_steering ipv4 %p src_port_hash: %d dst_port_hash: %d", packet->skb, packet->src_port_hash, packet->dst_port_hash);
bpf_printk("tun_rss_steering ipv4 %p last_hash: %d select_queue: %d", packet->skb, packet->last_hash, packet->select_queue);
}
if (packet->is_ipv6)
{
bpf_printk("tun_rss_steering ipv6 %p src_addr ip[0-1]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[0]), bpf_ntohs(packet->in6_src.s6_addr16[1]));
bpf_printk("tun_rss_steering ipv6 %p src_addr ip[2-3]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[2]), bpf_ntohs(packet->in6_src.s6_addr16[3]));
bpf_printk("tun_rss_steering ipv6 %p src_addr ip[4-5]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[4]), bpf_ntohs(packet->in6_src.s6_addr16[5]));
bpf_printk("tun_rss_steering ipv6 %p src_addr ip[6-7]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[6]), bpf_ntohs(packet->in6_src.s6_addr16[7]));
bpf_printk("tun_rss_steering ipv6 %p dst_addr ip[0-1]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[0]), bpf_ntohs(packet->in6_dst.s6_addr16[1]));
bpf_printk("tun_rss_steering ipv6 %p dst_addr ip[2-3]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[2]), bpf_ntohs(packet->in6_dst.s6_addr16[3]));
bpf_printk("tun_rss_steering ipv6 %p dst_addr ip[4-5]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[4]), bpf_ntohs(packet->in6_dst.s6_addr16[5]));
bpf_printk("tun_rss_steering ipv6 %p dst_addr ip[6-7]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[6]), bpf_ntohs(packet->in6_dst.s6_addr16[7]));
bpf_printk("tun_rss_steering ipv6 %p src_port: %d dst_port: %d", packet->skb, bpf_ntohs(packet->src_port), bpf_ntohs(packet->dst_port));
bpf_printk("tun_rss_steering ipv6 %p src_addr_hash: %d dst_addr_hash: %d", packet->skb, packet->src_addr_hash, packet->dst_addr_hash);
bpf_printk("tun_rss_steering ipv6 %p src_port_hash: %d dst_port_hash: %d", packet->skb, packet->src_port_hash, packet->dst_port_hash);
bpf_printk("tun_rss_steering ipv6 %p last_hash: %d select_queue: %d", packet->skb, packet->last_hash, packet->select_queue);
}
}
static inline void dump_config_info(struct bpf_config *config)
{
if (bpf_config_get_debug_log(config))
{
bpf_printk("tun_rss_steering: config->bpf_debug_log %d", bpf_config_get_debug_log(config));
bpf_printk("tun_rss_steering: config->bpf_queue_num %d", bpf_config_get_queue_num(config));
bpf_printk("tun_rss_steering: config->bpf_hash_mode %d", bpf_config_get_hash_mode(config));
}
}
/*
* reutrn 1: 表示不需要处理扩展头
* return 0: 表示需要处理扩展头
*/
static inline int ipv6_extension_need_skip(__u8 hdr_type)
{
/*
* TODO
* 因为 kni_ipv6_header_parse() 中只处理了以下 4 种 IPv6 扩展头部:
* IPPROTO_AH
* IPPROTO_HOPOPTS
* IPPROTO_ROUTING
* IPPROTO_DSTOPTS
*
* 即 KNI 回流给 TFE 的 IPv6 流量中只支持以上 4 种 IPv6 扩展头部。
* 当 TFE 回注给 KNI 的 IPv6 流量中不会出现其他 IPv6 扩展头部,故此处 BPF 只处理这 4 种 IPv6 扩展头部。
*
* 由于 BPF 要支持四元组分流,所以要判断 IPv6 是否分片,故此处要处理 IPPROTO_FRAGMENT IPv6 扩展头部。
*/
switch (hdr_type)
{
case IPPROTO_AH:
case IPPROTO_HOPOPTS:
case IPPROTO_ROUTING:
case IPPROTO_DSTOPTS:
case IPPROTO_FRAGMENT:
return 0;
default:
return 1;
}
}
static inline int parse_ipv6_extension(struct packet *packet, __u8 *l4_protocol, int *l4_offset)
{
if (ipv6_extension_need_skip(*l4_protocol))
{
return 0;
}
struct ipv6_opt_hdr ext_hdr = {0};
for (unsigned int i = 0; i < IP6_EXTENSIONS_COUNT; ++i)
{
if (bpf_skb_load_bytes_relative(packet->skb, *l4_offset, &ext_hdr, sizeof(ext_hdr), BPF_HDR_START_NET))
{
bpf_printk("tun_rss_steering: unable get ipv6 ext header");
return -1;
}
if (*l4_protocol == IPPROTO_FRAGMENT)
{
packet->is_fragmented = 1;
}
*l4_protocol = ext_hdr.nexthdr;
*l4_offset += (ext_hdr.hdrlen + 1) * 8;
if (ipv6_extension_need_skip(ext_hdr.nexthdr))
{
return 0;
}
}
return -1;
}
static inline int parse_packet(struct packet *packet, struct bpf_config *config)
{
int l3_offset = 12;
int l4_offset = 0;
__u8 l4_protocol = 0;
__u16 l3_protocol = 0;
if (!packet || !packet->skb)
{
bpf_printk("tun_rss_steering: invalid __sk_buff pointer");
return -1;
}
if (bpf_skb_load_bytes_relative(packet->skb, l3_offset, &l3_protocol, sizeof(l3_protocol), BPF_HDR_START_MAC))
{
bpf_printk("tun_rss_steering: unable get l3 protocol");
return -1;
}
if (bpf_ntohs(l3_protocol) == ETH_P_IP)
{
packet->is_ipv4 = 1;
struct iphdr ip = {0};
if (bpf_skb_load_bytes_relative(packet->skb, 0, &ip, sizeof(ip), BPF_HDR_START_NET))
{
bpf_printk("tun_rss_steering: unable get ipv4 header");
return -1;
}
packet->in4_src = ip.saddr;
packet->in4_dst = ip.daddr;
/*
* The frag_off portion of the header consists of:
* +----+----+----+----------------------------------+
* | RS | DF | MF | ...13 bits of fragment offset... |
* +----+----+----+----------------------------------+
* If "More fragments" or the offset is nonzero, then this is an IP fragment (RFC791).
*/
packet->is_fragmented = !(bpf_ntohs(ip.frag_off) & 0x4000);
l4_protocol = ip.protocol;
l4_offset = ip.ihl * 4;
if (packet->is_fragmented)
{
bpf_printk("tun_rss_steering: ipv4 %p is fragmented", packet->skb);
if (bpf_config_get_debug_log(config))
{
dump_ipv4_header(packet, &ip);
}
return -1;
}
}
else if (bpf_ntohs(l3_protocol) == ETH_P_IPV6)
{
packet->is_ipv6 = 1;
struct ipv6hdr ip6 = {0};
if (bpf_skb_load_bytes_relative(packet->skb, 0, &ip6, sizeof(ip6), BPF_HDR_START_NET))
{
bpf_printk("tun_rss_steering: unable get ipv6 header");
return -1;
}
packet->in6_src = ip6.saddr;
packet->in6_dst = ip6.daddr;
l4_protocol = ip6.nexthdr;
l4_offset = sizeof(ip6);
if (parse_ipv6_extension(packet, &l4_protocol, &l4_offset) == -1)
{
return -1;
}
if (packet->is_fragmented)
{
bpf_printk("tun_rss_steering: ipv6 %p is fragmented", packet->skb);
if (bpf_config_get_debug_log(config))
{
dump_ipv6_packet(packet, &ip6);
}
return -1;
}
}
else
{
bpf_printk("tun_rss_steering: invalid l3 protocol %d", bpf_ntohs(l3_protocol));
return -1;
}
if (l4_protocol == IPPROTO_TCP)
{
packet->is_tcp = 1;
struct tcphdr tcp = {0};
if (bpf_skb_load_bytes_relative(packet->skb, l4_offset, &tcp, sizeof(tcp), BPF_HDR_START_NET))
{
bpf_printk("tun_rss_steering: unable get tcp header");
return -1;
}
packet->src_port = tcp.source;
packet->dst_port = tcp.dest;
}
else if (l4_protocol == IPPROTO_UDP)
{
packet->is_udp = 1;
struct udphdr udp = {0};
if (bpf_skb_load_bytes_relative(packet->skb, l4_offset, &udp, sizeof(udp), BPF_HDR_START_NET))
{
bpf_printk("tun_rss_steering: unable get udp header");
return -1;
}
packet->src_port = udp.source;
packet->dst_port = udp.dest;
}
else
{
bpf_printk("tun_rss_steering: invalid l4 protocol %d", l4_protocol);
return -1;
}
return 0;
}
static inline void select_rss_queue(struct packet *packet, struct bpf_config *config)
{
packet->select_queue = -1;
if (packet->is_ipv4)
{
if (bpf_config_get_hash_mode(config) == BPF_HASH_MODE_TUPLE4)
{
packet->src_addr_hash = crc32_hash(&packet->in4_src, 4, 0);
packet->dst_addr_hash = crc32_hash(&packet->in4_dst, 4, 0);
packet->last_hash = packet->src_addr_hash ^ packet->dst_addr_hash;
packet->src_port_hash = crc32_hash(&packet->src_port, 2, packet->last_hash);
packet->dst_port_hash = crc32_hash(&packet->dst_port, 2, packet->last_hash);
packet->last_hash = packet->src_port_hash ^ packet->dst_port_hash;
packet->select_queue = packet->last_hash % bpf_config_get_queue_num(config);
}
else if (bpf_config_get_hash_mode(config) == BPF_HASH_MODE_TUPLE2)
{
packet->src_addr_hash = crc32_hash(&packet->in4_src, 4, 0);
packet->dst_addr_hash = crc32_hash(&packet->in4_dst, 4, 0);
packet->last_hash = packet->src_addr_hash ^ packet->dst_addr_hash;
packet->select_queue = packet->last_hash % bpf_config_get_queue_num(config);
}
}
if (packet->is_ipv6)
{
if (bpf_config_get_hash_mode(config) == BPF_HASH_MODE_TUPLE4)
{
packet->src_addr_hash = crc32_hash(&packet->in6_src, 16, 0);
packet->dst_addr_hash = crc32_hash(&packet->in6_dst, 16, 0);
packet->last_hash = packet->src_addr_hash ^ packet->dst_addr_hash;
packet->src_port_hash = crc32_hash(&packet->src_port, 2, packet->last_hash);
packet->dst_port_hash = crc32_hash(&packet->dst_port, 2, packet->last_hash);
packet->last_hash = packet->src_port_hash ^ packet->dst_port_hash;
packet->select_queue = packet->last_hash % bpf_config_get_queue_num(config);
}
else if (bpf_config_get_hash_mode(config) == BPF_HASH_MODE_TUPLE2)
{
packet->src_addr_hash = crc32_hash(&packet->in6_src, 16, 0);
packet->dst_addr_hash = crc32_hash(&packet->in6_dst, 16, 0);
packet->last_hash = packet->src_addr_hash ^ packet->dst_addr_hash;
packet->select_queue = packet->last_hash % bpf_config_get_queue_num(config);
}
}
}
SEC("tun_rss_steering")
int bpf_tun_rss_steering(struct __sk_buff *skb)
{
struct packet packet = {0};
struct bpf_config config = {0};
bpf_config_lookup_map(&config);
dump_config_info(&config);
if (bpf_config_get_queue_num(&config) <= 0)
{
bpf_printk("tun_rss_steering: invalid queue num %d", bpf_config_get_queue_num(&config));
return 0;
}
if (bpf_config_get_hash_mode(&config) != BPF_HASH_MODE_TUPLE2 && bpf_config_get_hash_mode(&config) != BPF_HASH_MODE_TUPLE4)
{
bpf_printk("tun_rss_steering: invalid hash mode %d", bpf_config_get_hash_mode(&config));
return 0;
}
packet.is_ipv4 = 0;
packet.is_ipv6 = 0;
packet.is_fragmented = 0;
packet.skb = skb;
if (parse_packet(&packet, &config) == -1)
{
return 0;
}
select_rss_queue(&packet, &config);
if (bpf_config_get_debug_log(&config))
{
dump_packet_info(&packet, &config);
}
return packet.select_queue;
}
char _license[] SEC("license") = "GPL";