This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
tango-kni/bpf/bpf_tun_rss_steering.c

423 lines
13 KiB
C
Raw Normal View History

#include <linux/in.h>
#include <linux/bpf.h>
#include <linux/udp.h>
#include <linux/tcp.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/if_ether.h>
#include "rte_hash_crc.h"
#include "bpf_conf_kernel.h"
#include "bpf_helpers.h"
#include "libendian.h"
#define IP6_EXTENSIONS_COUNT 11
typedef struct bpf_pkt_s
{
__u8 is_ipv4;
__u8 is_ipv6;
__u8 is_udp;
__u8 is_tcp;
__u8 is_fragmented;
__u16 src_port;
__u16 dst_port;
__u32 in4_src;
__u32 in4_dst;
struct in6_addr in6_src;
struct in6_addr in6_dst;
__u32 src_addr_hash;
__u32 dst_addr_hash;
__u32 src_port_hash;
__u32 dst_port_hash;
__u32 last_hash;
int select_queue;
struct __sk_buff *skb;
} bpf_pkt_t;
/*
* reutrn 0:
* return 1:
*/
static inline int ip6_next_header_is_need_proc(__u8 hdr_type)
{
/*
* TODO
* kni_ipv6_header_parse() 4 IPv6
* IPPROTO_AH
* IPPROTO_HOPOPTS
* IPPROTO_ROUTING
* IPPROTO_DSTOPTS
*
* KNI TFE IPv6 4 IPv6
* TFE KNI IPv6 IPv6 BPF 4 IPv6
*
* BPF IPv6 IPPROTO_FRAGMENT IPv6
*/
switch (hdr_type)
{
case IPPROTO_AH:
case IPPROTO_HOPOPTS:
case IPPROTO_ROUTING:
case IPPROTO_DSTOPTS:
case IPPROTO_FRAGMENT:
return 1;
default:
return 0;
}
}
static inline int bpf_pkt_parser_ext6(bpf_pkt_t *pkt, __u8 *l4_protocol, int *l4_offset)
{
if (!ip6_next_header_is_need_proc(*l4_protocol))
{
return 0;
}
struct ipv6_opt_hdr ext_hdr = {0};
for (unsigned int i = 0; i < IP6_EXTENSIONS_COUNT; ++i)
{
if (bpf_skb_load_bytes_relative(pkt->skb, *l4_offset, &ext_hdr, sizeof(ext_hdr), BPF_HDR_START_NET))
{
bpf_printk("bpf_tun_rss_steering unable get ipv6 ext header");
return -1;
}
if (*l4_protocol == IPPROTO_FRAGMENT)
{
pkt->is_fragmented = 1;
}
*l4_protocol = ext_hdr.nexthdr;
*l4_offset += (ext_hdr.hdrlen + 1) * 8;
if (!ip6_next_header_is_need_proc(ext_hdr.nexthdr))
{
return 0;
}
}
return -1;
}
static inline void bpf_dump_ipv4_header(bpf_pkt_t *pkt, struct iphdr *ip4)
{
bpf_printk("bpf_tun_rss_steering ipv4 %p fragmented, src_addr ip[0-1]: %d.%d", pkt->skb, (pkt->in4_src) & 0xFF, (pkt->in4_src >> 8) & 0xFF);
bpf_printk("bpf_tun_rss_steering ipv4 %p fragmented, src_addr ip[2-3]: %d.%d", pkt->skb, (pkt->in4_src >> 16) & 0xFF, (pkt->in4_src >> 24) & 0xFF);
bpf_printk("bpf_tun_rss_steering ipv4 %p fragmented, dst_addr ip[0-1]: %d.%d", pkt->skb, (pkt->in4_dst) & 0xFF, (pkt->in4_dst >> 8) & 0xFF);
bpf_printk("bpf_tun_rss_steering ipv4 %p fragmented, dst_addr ip[2-3]: %d.%d", pkt->skb, (pkt->in4_dst >> 16) & 0xFF, (pkt->in4_dst >> 24) & 0xFF);
char *ptr = (char *)ip4;
int len = sizeof(*ip4);
for (int i = 0; i < len; i++)
{
bpf_printk("bpf_tun_rss_steering ipv4 %p fragmented, dump header hex[%d]: %0x", pkt->skb, i, ptr[i]);
}
}
static inline void bpf_dump_ipv6_header(bpf_pkt_t *pkt, struct ipv6hdr *ip6)
{
bpf_printk("bpf_tun_rss_steering ipv6 %p fragmented, src_addr ip[0-1]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_src.s6_addr16[0]), bpf_ntohs(pkt->in6_src.s6_addr16[1]));
// bpf_printk("bpf_tun_rss_steering ipv6 %p fragmented, src_addr ip[2-3]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_src.s6_addr16[2]), bpf_ntohs(pkt->in6_src.s6_addr16[3]));
// bpf_printk("bpf_tun_rss_steering ipv6 %p fragmented, src_addr ip[4-5]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_src.s6_addr16[4]), bpf_ntohs(pkt->in6_src.s6_addr16[5]));
bpf_printk("bpf_tun_rss_steering ipv6 %p fragmented, src_addr ip[6-7]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_src.s6_addr16[6]), bpf_ntohs(pkt->in6_src.s6_addr16[7]));
bpf_printk("bpf_tun_rss_steering ipv6 %p fragmented, dst_addr ip[0-1]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_dst.s6_addr16[0]), bpf_ntohs(pkt->in6_dst.s6_addr16[1]));
// bpf_printk("bpf_tun_rss_steering ipv6 %p fragmented, dst_addr ip[2-3]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_dst.s6_addr16[2]), bpf_ntohs(pkt->in6_dst.s6_addr16[3]));
// bpf_printk("bpf_tun_rss_steering ipv6 %p fragmented, dst_addr ip[4-5]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_dst.s6_addr16[4]), bpf_ntohs(pkt->in6_dst.s6_addr16[5]));
bpf_printk("bpf_tun_rss_steering ipv6 %p fragmented, dst_addr ip[6-7]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_dst.s6_addr16[6]), bpf_ntohs(pkt->in6_dst.s6_addr16[7]));
char *ptr = (char *)ip6;
int len = sizeof(*ip6);
for (int i = 0; i < len; i++)
{
bpf_printk("bpf_tun_rss_steering ipv6 %p fragmented, dump header hex[%d]: %0x", pkt->skb, i, ptr[i]);
}
}
static inline int ipv4_is_fragment(const struct iphdr *ip4)
{
/* The frag_off portion of the header consists of:
*
* +----+----+----+----------------------------------+
* | RS | DF | MF | ...13 bits of fragment offset... |
* +----+----+----+----------------------------------+
*
* If "More fragments" or the offset is nonzero, then this is an IP fragment (RFC791).
*/
return ip4->frag_off & bpf_htons(0x3FFF);
}
static inline int bpf_pkt_parser(bpf_pkt_t *pkt, bpf_conf_t *conf)
{
int l3_offset = 12;
int l4_offset = 0;
__u8 l4_protocol = 0;
__u16 l3_protocol = 0;
if (!pkt || !pkt->skb)
{
bpf_printk("bpf_tun_rss_steering skb is null");
return -1;
}
if (bpf_skb_load_bytes_relative(pkt->skb, l3_offset, &l3_protocol, sizeof(l3_protocol), BPF_HDR_START_MAC))
{
bpf_printk("bpf_tun_rss_steering unable get l3 protocol");
return -1;
}
if (bpf_ntohs(l3_protocol) == ETH_P_IP)
{
pkt->is_ipv4 = 1;
struct iphdr ip = {};
if (bpf_skb_load_bytes_relative(pkt->skb, 0, &ip, sizeof(ip), BPF_HDR_START_NET))
{
bpf_printk("bpf_tun_rss_steering unable get ipv4 header");
return -1;
}
pkt->in4_src = ip.saddr;
pkt->in4_dst = ip.daddr;
pkt->is_fragmented = ipv4_is_fragment(&ip);
l4_protocol = ip.protocol;
l4_offset = ip.ihl * 4;
if (pkt->is_fragmented)
{
bpf_printk("bpf_tun_rss_steering ipv4 is fragmented");
if (bpf_conf_get_debug_log(conf))
{
bpf_dump_ipv4_header(pkt, &ip);
}
return -1;
}
}
else if (bpf_ntohs(l3_protocol) == ETH_P_IPV6)
{
pkt->is_ipv6 = 1;
struct ipv6hdr ip6 = {};
if (bpf_skb_load_bytes_relative(pkt->skb, 0, &ip6, sizeof(ip6), BPF_HDR_START_NET))
{
bpf_printk("bpf_tun_rss_steering unable get ipv6 header");
return -1;
}
pkt->in6_src = ip6.saddr;
pkt->in6_dst = ip6.daddr;
l4_protocol = ip6.nexthdr;
l4_offset = sizeof(ip6);
if (bpf_pkt_parser_ext6(pkt, &l4_protocol, &l4_offset) == -1)
{
return -1;
}
if (pkt->is_fragmented)
{
bpf_printk("bpf_tun_rss_steering ipv6 is fragmented");
if (bpf_conf_get_debug_log(conf))
{
bpf_dump_ipv6_header(pkt, &ip6);
}
return -1;
}
}
else
{
bpf_printk("bpf_tun_rss_steering l3 protocol %d not support", bpf_ntohs(l3_protocol));
return -1;
}
if (l4_protocol == IPPROTO_TCP)
{
pkt->is_tcp = 1;
struct tcphdr tcp = {};
if (bpf_skb_load_bytes_relative(pkt->skb, l4_offset, &tcp, sizeof(tcp), BPF_HDR_START_NET))
{
bpf_printk("bpf_tun_rss_steering unable get tcp header");
return -1;
}
pkt->src_port = tcp.source;
pkt->dst_port = tcp.dest;
}
else if (l4_protocol == IPPROTO_UDP)
{
pkt->is_udp = 1;
struct udphdr udp = {};
if (bpf_skb_load_bytes_relative(pkt->skb, l4_offset, &udp, sizeof(udp), BPF_HDR_START_NET))
{
bpf_printk("bpf_tun_rss_steering unable get udp header");
return -1;
}
pkt->src_port = udp.source;
pkt->dst_port = udp.dest;
}
else
{
bpf_printk("bpf_tun_rss_steering l4 protocol %d not support", l4_protocol);
return -1;
}
return 0;
}
static inline void bpf_pkt_debug_log(bpf_pkt_t *pkt, bpf_conf_t *conf)
{
if (pkt->is_ipv4)
{
bpf_printk("bpf_tun_rss_steering ipv4 %p src_addr ip[0-1]: %d.%d", pkt->skb, (pkt->in4_src) & 0xFF, (pkt->in4_src >> 8) & 0xFF);
bpf_printk("bpf_tun_rss_steering ipv4 %p src_addr ip[2-3]: %d.%d", pkt->skb, (pkt->in4_src >> 16) & 0xFF, (pkt->in4_src >> 24) & 0xFF);
bpf_printk("bpf_tun_rss_steering ipv4 %p dst_addr ip[0-1]: %d.%d", pkt->skb, (pkt->in4_dst) & 0xFF, (pkt->in4_dst >> 8) & 0xFF);
bpf_printk("bpf_tun_rss_steering ipv4 %p dst_addr ip[2-3]: %d.%d", pkt->skb, (pkt->in4_dst >> 16) & 0xFF, (pkt->in4_dst >> 24) & 0xFF);
bpf_printk("bpf_tun_rss_steering ipv4 %p src_port: %d dst_port: %d", pkt->skb, bpf_ntohs(pkt->src_port), bpf_ntohs(pkt->dst_port));
bpf_printk("bpf_tun_rss_steering ipv4 %p src_addr_hash: %d dst_addr_hash: %d", pkt->skb, pkt->src_addr_hash, pkt->dst_addr_hash);
bpf_printk("bpf_tun_rss_steering ipv4 %p src_port_hash: %d dst_port_hash: %d", pkt->skb, pkt->src_port_hash, pkt->dst_port_hash);
bpf_printk("bpf_tun_rss_steering ipv4 %p last_hash: %d select_queue: %d", pkt->skb, pkt->last_hash, pkt->select_queue);
}
if (pkt->is_ipv6)
{
bpf_printk("bpf_tun_rss_steering ipv6 %p src_addr ip[0-1]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_src.s6_addr16[0]), bpf_ntohs(pkt->in6_src.s6_addr16[1]));
// bpf_printk("bpf_tun_rss_steering ipv6 %p src_addr ip[2-3]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_src.s6_addr16[2]), bpf_ntohs(pkt->in6_src.s6_addr16[3]));
// bpf_printk("bpf_tun_rss_steering ipv6 %p src_addr ip[4-5]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_src.s6_addr16[4]), bpf_ntohs(pkt->in6_src.s6_addr16[5]));
bpf_printk("bpf_tun_rss_steering ipv6 %p src_addr ip[6-7]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_src.s6_addr16[6]), bpf_ntohs(pkt->in6_src.s6_addr16[7]));
bpf_printk("bpf_tun_rss_steering ipv6 %p dst_addr ip[0-1]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_dst.s6_addr16[0]), bpf_ntohs(pkt->in6_dst.s6_addr16[1]));
// bpf_printk("bpf_tun_rss_steering ipv6 %p dst_addr ip[2-3]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_dst.s6_addr16[2]), bpf_ntohs(pkt->in6_dst.s6_addr16[3]));
// bpf_printk("bpf_tun_rss_steering ipv6 %p dst_addr ip[4-5]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_dst.s6_addr16[4]), bpf_ntohs(pkt->in6_dst.s6_addr16[5]));
bpf_printk("bpf_tun_rss_steering ipv6 %p dst_addr ip[6-7]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_dst.s6_addr16[6]), bpf_ntohs(pkt->in6_dst.s6_addr16[7]));
bpf_printk("bpf_tun_rss_steering ipv6 %p src_port: %d dst_port: %d", pkt->skb, bpf_ntohs(pkt->src_port), bpf_ntohs(pkt->dst_port));
bpf_printk("bpf_tun_rss_steering ipv6 %p src_addr_hash: %d dst_addr_hash: %d", pkt->skb, pkt->src_addr_hash, pkt->dst_addr_hash);
bpf_printk("bpf_tun_rss_steering ipv6 %p src_port_hash: %d dst_port_hash: %d", pkt->skb, pkt->src_port_hash, pkt->dst_port_hash);
bpf_printk("bpf_tun_rss_steering ipv6 %p last_hash: %d select_queue: %d", pkt->skb, pkt->last_hash, pkt->select_queue);
}
}
static inline void bpf_pkt_select_queue(bpf_pkt_t *pkt, bpf_conf_t *conf)
{
pkt->select_queue = -1;
if (pkt->is_ipv4)
{
if (bpf_conf_get_hash_mode(conf) == BPF_HASH_MODE_TUPLE4)
{
pkt->src_addr_hash = rte_hash_crc(&pkt->in4_src, 4, 0);
pkt->dst_addr_hash = rte_hash_crc(&pkt->in4_dst, 4, 0);
pkt->last_hash = pkt->src_addr_hash ^ pkt->dst_addr_hash;
pkt->src_port_hash = rte_hash_crc(&pkt->src_port, 2, pkt->last_hash);
pkt->dst_port_hash = rte_hash_crc(&pkt->dst_port, 2, pkt->last_hash);
pkt->last_hash = pkt->src_port_hash ^ pkt->dst_port_hash;
pkt->select_queue = pkt->last_hash % bpf_conf_get_queue_num(conf);
}
else if (bpf_conf_get_hash_mode(conf) == BPF_HASH_MODE_TUPLE2)
{
pkt->src_addr_hash = rte_hash_crc(&pkt->in4_src, 4, 0);
pkt->dst_addr_hash = rte_hash_crc(&pkt->in4_dst, 4, 0);
pkt->last_hash = pkt->src_addr_hash ^ pkt->dst_addr_hash;
pkt->select_queue = pkt->last_hash % bpf_conf_get_queue_num(conf);
}
}
if (pkt->is_ipv6)
{
if (bpf_conf_get_hash_mode(conf) == BPF_HASH_MODE_TUPLE4)
{
pkt->src_addr_hash = rte_hash_crc(&pkt->in6_src, 16, 0);
pkt->dst_addr_hash = rte_hash_crc(&pkt->in6_dst, 16, 0);
pkt->last_hash = pkt->src_addr_hash ^ pkt->dst_addr_hash;
pkt->src_port_hash = rte_hash_crc(&pkt->src_port, 2, pkt->last_hash);
pkt->dst_port_hash = rte_hash_crc(&pkt->dst_port, 2, pkt->last_hash);
pkt->last_hash = pkt->src_port_hash ^ pkt->dst_port_hash;
pkt->select_queue = pkt->last_hash % bpf_conf_get_queue_num(conf);
}
else if (bpf_conf_get_hash_mode(conf) == BPF_HASH_MODE_TUPLE2)
{
pkt->src_addr_hash = rte_hash_crc(&pkt->in6_src, 16, 0);
pkt->dst_addr_hash = rte_hash_crc(&pkt->in6_dst, 16, 0);
pkt->last_hash = pkt->src_addr_hash ^ pkt->dst_addr_hash;
pkt->select_queue = pkt->last_hash % bpf_conf_get_queue_num(conf);
}
}
}
static void bpf_conf_dump(bpf_conf_t *conf)
{
if (bpf_conf_get_debug_log(conf))
{
bpf_printk("bpf_debug_log : %d", bpf_conf_get_debug_log(conf));
bpf_printk("bpf_queue_num : %d", bpf_conf_get_queue_num(conf));
bpf_printk("bpf_hash_mode : %d", bpf_conf_get_hash_mode(conf));
}
}
SEC("tun_rss_steering")
int bpf_tun_rss_steering(struct __sk_buff *skb)
{
bpf_pkt_t pkt = {};
bpf_conf_t conf = {};
bpf_conf_lookup_map(&conf);
bpf_conf_dump(&conf);
if (bpf_conf_get_queue_num(&conf) <= 0)
{
bpf_printk("bpf_tun_rss_steering invalid queue num: %d", bpf_conf_get_queue_num(&conf));
return -1;
}
if (bpf_conf_get_hash_mode(&conf) != BPF_HASH_MODE_TUPLE2 && bpf_conf_get_hash_mode(&conf) != BPF_HASH_MODE_TUPLE4)
{
bpf_printk("bpf_tun_rss_steering invalid hash mode: %d", bpf_conf_get_hash_mode(&conf));
return -1;
}
pkt.skb = skb;
if (bpf_pkt_parser(&pkt, &conf) == -1)
{
return -1;
}
bpf_pkt_select_queue(&pkt, &conf);
if (bpf_conf_get_debug_log(&conf))
{
bpf_pkt_debug_log(&pkt, &conf);
}
return pkt.select_queue;
}
char _license[] SEC("license") = "GPL";