413 lines
14 KiB
C
413 lines
14 KiB
C
#include <linux/in.h>
|
|
#include <linux/udp.h>
|
|
#include <linux/tcp.h>
|
|
#include <linux/ip.h>
|
|
#include <linux/ipv6.h>
|
|
#include <linux/if_ether.h>
|
|
|
|
#include "libendian.h"
|
|
#include "crc32_hash.h"
|
|
#include "bpf_config_kernel.h"
|
|
|
|
#define IP6_EXTENSIONS_COUNT 11
|
|
|
|
struct packet
|
|
{
|
|
__u8 is_ipv4;
|
|
__u8 is_ipv6;
|
|
|
|
__u8 is_udp;
|
|
__u8 is_tcp;
|
|
|
|
__u8 is_fragmented;
|
|
|
|
__u16 src_port;
|
|
__u16 dst_port;
|
|
|
|
__u32 in4_src;
|
|
__u32 in4_dst;
|
|
|
|
struct in6_addr in6_src;
|
|
struct in6_addr in6_dst;
|
|
|
|
__u32 src_addr_hash;
|
|
__u32 dst_addr_hash;
|
|
__u32 src_port_hash;
|
|
__u32 dst_port_hash;
|
|
__u32 last_hash;
|
|
|
|
int select_queue;
|
|
|
|
struct __sk_buff *skb;
|
|
};
|
|
|
|
static inline void dump_ipv4_header(struct packet *packet, struct iphdr *ip4)
|
|
{
|
|
bpf_printk("tun_rss_steering: ipv4 %p src_addr ip[0-1]: %d.%d", packet->skb, (packet->in4_src) & 0xFF, (packet->in4_src >> 8) & 0xFF);
|
|
bpf_printk("tun_rss_steering: ipv4 %p src_addr ip[2-3]: %d.%d", packet->skb, (packet->in4_src >> 16) & 0xFF, (packet->in4_src >> 24) & 0xFF);
|
|
|
|
bpf_printk("tun_rss_steering: ipv4 %p dst_addr ip[0-1]: %d.%d", packet->skb, (packet->in4_dst) & 0xFF, (packet->in4_dst >> 8) & 0xFF);
|
|
bpf_printk("tun_rss_steering: ipv4 %p dst_addr ip[2-3]: %d.%d", packet->skb, (packet->in4_dst >> 16) & 0xFF, (packet->in4_dst >> 24) & 0xFF);
|
|
|
|
char *ptr = (char *)ip4;
|
|
int len = sizeof(*ip4);
|
|
for (int i = 0; i < len; i++)
|
|
{
|
|
bpf_printk("tun_rss_steering: ipv4 %p header hex[%d]: %0x", packet->skb, i, ptr[i]);
|
|
}
|
|
}
|
|
|
|
static inline void dump_ipv6_packet(struct packet *packet, struct ipv6hdr *ip6)
|
|
{
|
|
bpf_printk("tun_rss_steering: ipv6 %p src_addr ip[0-1]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[0]), bpf_ntohs(packet->in6_src.s6_addr16[1]));
|
|
bpf_printk("tun_rss_steering: ipv6 %p src_addr ip[2-3]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[2]), bpf_ntohs(packet->in6_src.s6_addr16[3]));
|
|
bpf_printk("tun_rss_steering: ipv6 %p src_addr ip[4-5]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[4]), bpf_ntohs(packet->in6_src.s6_addr16[5]));
|
|
bpf_printk("tun_rss_steering: ipv6 %p src_addr ip[6-7]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[6]), bpf_ntohs(packet->in6_src.s6_addr16[7]));
|
|
|
|
bpf_printk("tun_rss_steering: ipv6 %p dst_addr ip[0-1]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[0]), bpf_ntohs(packet->in6_dst.s6_addr16[1]));
|
|
bpf_printk("tun_rss_steering: ipv6 %p dst_addr ip[2-3]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[2]), bpf_ntohs(packet->in6_dst.s6_addr16[3]));
|
|
bpf_printk("tun_rss_steering: ipv6 %p dst_addr ip[4-5]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[4]), bpf_ntohs(packet->in6_dst.s6_addr16[5]));
|
|
bpf_printk("tun_rss_steering: ipv6 %p dst_addr ip[6-7]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[6]), bpf_ntohs(packet->in6_dst.s6_addr16[7]));
|
|
|
|
char *ptr = (char *)ip6;
|
|
int len = sizeof(*ip6);
|
|
for (int i = 0; i < len; i++)
|
|
{
|
|
bpf_printk("tun_rss_steering: ipv6 %p header hex[%d]: %0x", packet->skb, i, ptr[i]);
|
|
}
|
|
}
|
|
|
|
static inline void dump_packet_info(struct packet *packet, struct bpf_config *config)
|
|
{
|
|
if (packet->is_ipv4)
|
|
{
|
|
bpf_printk("tun_rss_steering ipv4 %p src_addr ip[0-1]: %d.%d", packet->skb, (packet->in4_src) & 0xFF, (packet->in4_src >> 8) & 0xFF);
|
|
bpf_printk("tun_rss_steering ipv4 %p src_addr ip[2-3]: %d.%d", packet->skb, (packet->in4_src >> 16) & 0xFF, (packet->in4_src >> 24) & 0xFF);
|
|
|
|
bpf_printk("tun_rss_steering ipv4 %p dst_addr ip[0-1]: %d.%d", packet->skb, (packet->in4_dst) & 0xFF, (packet->in4_dst >> 8) & 0xFF);
|
|
bpf_printk("tun_rss_steering ipv4 %p dst_addr ip[2-3]: %d.%d", packet->skb, (packet->in4_dst >> 16) & 0xFF, (packet->in4_dst >> 24) & 0xFF);
|
|
|
|
bpf_printk("tun_rss_steering ipv4 %p src_port: %d dst_port: %d", packet->skb, bpf_ntohs(packet->src_port), bpf_ntohs(packet->dst_port));
|
|
|
|
bpf_printk("tun_rss_steering ipv4 %p src_addr_hash: %d dst_addr_hash: %d", packet->skb, packet->src_addr_hash, packet->dst_addr_hash);
|
|
bpf_printk("tun_rss_steering ipv4 %p src_port_hash: %d dst_port_hash: %d", packet->skb, packet->src_port_hash, packet->dst_port_hash);
|
|
bpf_printk("tun_rss_steering ipv4 %p last_hash: %d select_queue: %d", packet->skb, packet->last_hash, packet->select_queue);
|
|
}
|
|
|
|
if (packet->is_ipv6)
|
|
{
|
|
bpf_printk("tun_rss_steering ipv6 %p src_addr ip[0-1]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[0]), bpf_ntohs(packet->in6_src.s6_addr16[1]));
|
|
bpf_printk("tun_rss_steering ipv6 %p src_addr ip[2-3]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[2]), bpf_ntohs(packet->in6_src.s6_addr16[3]));
|
|
bpf_printk("tun_rss_steering ipv6 %p src_addr ip[4-5]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[4]), bpf_ntohs(packet->in6_src.s6_addr16[5]));
|
|
bpf_printk("tun_rss_steering ipv6 %p src_addr ip[6-7]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[6]), bpf_ntohs(packet->in6_src.s6_addr16[7]));
|
|
|
|
bpf_printk("tun_rss_steering ipv6 %p dst_addr ip[0-1]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[0]), bpf_ntohs(packet->in6_dst.s6_addr16[1]));
|
|
bpf_printk("tun_rss_steering ipv6 %p dst_addr ip[2-3]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[2]), bpf_ntohs(packet->in6_dst.s6_addr16[3]));
|
|
bpf_printk("tun_rss_steering ipv6 %p dst_addr ip[4-5]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[4]), bpf_ntohs(packet->in6_dst.s6_addr16[5]));
|
|
bpf_printk("tun_rss_steering ipv6 %p dst_addr ip[6-7]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[6]), bpf_ntohs(packet->in6_dst.s6_addr16[7]));
|
|
|
|
bpf_printk("tun_rss_steering ipv6 %p src_port: %d dst_port: %d", packet->skb, bpf_ntohs(packet->src_port), bpf_ntohs(packet->dst_port));
|
|
|
|
bpf_printk("tun_rss_steering ipv6 %p src_addr_hash: %d dst_addr_hash: %d", packet->skb, packet->src_addr_hash, packet->dst_addr_hash);
|
|
bpf_printk("tun_rss_steering ipv6 %p src_port_hash: %d dst_port_hash: %d", packet->skb, packet->src_port_hash, packet->dst_port_hash);
|
|
bpf_printk("tun_rss_steering ipv6 %p last_hash: %d select_queue: %d", packet->skb, packet->last_hash, packet->select_queue);
|
|
}
|
|
}
|
|
|
|
static inline void dump_config_info(struct bpf_config *config)
|
|
{
|
|
if (bpf_config_get_debug_log(config))
|
|
{
|
|
bpf_printk("tun_rss_steering: config->bpf_debug_log %d", bpf_config_get_debug_log(config));
|
|
bpf_printk("tun_rss_steering: config->bpf_queue_num %d", bpf_config_get_queue_num(config));
|
|
bpf_printk("tun_rss_steering: config->bpf_hash_mode %d", bpf_config_get_hash_mode(config));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* reutrn 1: 表示不需要处理扩展头
|
|
* return 0: 表示需要处理扩展头
|
|
*/
|
|
static inline int ipv6_extension_need_skip(__u8 hdr_type)
|
|
{
|
|
/*
|
|
* TODO
|
|
* 因为 kni_ipv6_header_parse() 中只处理了以下 4 种 IPv6 扩展头部:
|
|
* IPPROTO_AH
|
|
* IPPROTO_HOPOPTS
|
|
* IPPROTO_ROUTING
|
|
* IPPROTO_DSTOPTS
|
|
*
|
|
* 即 KNI 回流给 TFE 的 IPv6 流量中只支持以上 4 种 IPv6 扩展头部。
|
|
* 当 TFE 回注给 KNI 的 IPv6 流量中不会出现其他 IPv6 扩展头部,故此处 BPF 只处理这 4 种 IPv6 扩展头部。
|
|
*
|
|
* 由于 BPF 要支持四元组分流,所以要判断 IPv6 是否分片,故此处要处理 IPPROTO_FRAGMENT IPv6 扩展头部。
|
|
*/
|
|
|
|
switch (hdr_type)
|
|
{
|
|
case IPPROTO_AH:
|
|
case IPPROTO_HOPOPTS:
|
|
case IPPROTO_ROUTING:
|
|
case IPPROTO_DSTOPTS:
|
|
case IPPROTO_FRAGMENT:
|
|
return 0;
|
|
default:
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
static inline int parse_ipv6_extension(struct packet *packet, __u8 *l4_protocol, int *l4_offset)
|
|
{
|
|
if (ipv6_extension_need_skip(*l4_protocol))
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
struct ipv6_opt_hdr ext_hdr = {0};
|
|
for (unsigned int i = 0; i < IP6_EXTENSIONS_COUNT; ++i)
|
|
{
|
|
if (bpf_skb_load_bytes_relative(packet->skb, *l4_offset, &ext_hdr, sizeof(ext_hdr), BPF_HDR_START_NET))
|
|
{
|
|
bpf_printk("tun_rss_steering: unable get ipv6 ext header");
|
|
return -1;
|
|
}
|
|
|
|
if (*l4_protocol == IPPROTO_FRAGMENT)
|
|
{
|
|
packet->is_fragmented = 1;
|
|
}
|
|
|
|
*l4_protocol = ext_hdr.nexthdr;
|
|
*l4_offset += (ext_hdr.hdrlen + 1) * 8;
|
|
|
|
if (ipv6_extension_need_skip(ext_hdr.nexthdr))
|
|
{
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
static inline int parse_packet(struct packet *packet, struct bpf_config *config)
|
|
{
|
|
int l3_offset = 12;
|
|
int l4_offset = 0;
|
|
__u8 l4_protocol = 0;
|
|
__u16 l3_protocol = 0;
|
|
|
|
if (!packet || !packet->skb)
|
|
{
|
|
bpf_printk("tun_rss_steering: invalid __sk_buff pointer");
|
|
return -1;
|
|
}
|
|
|
|
if (bpf_skb_load_bytes_relative(packet->skb, l3_offset, &l3_protocol, sizeof(l3_protocol), BPF_HDR_START_MAC))
|
|
{
|
|
bpf_printk("tun_rss_steering: unable get l3 protocol");
|
|
return -1;
|
|
}
|
|
|
|
if (bpf_ntohs(l3_protocol) == ETH_P_IP)
|
|
{
|
|
packet->is_ipv4 = 1;
|
|
struct iphdr ip = {0};
|
|
if (bpf_skb_load_bytes_relative(packet->skb, 0, &ip, sizeof(ip), BPF_HDR_START_NET))
|
|
{
|
|
bpf_printk("tun_rss_steering: unable get ipv4 header");
|
|
return -1;
|
|
}
|
|
|
|
packet->in4_src = ip.saddr;
|
|
packet->in4_dst = ip.daddr;
|
|
|
|
/*
|
|
* The frag_off portion of the header consists of:
|
|
* +----+----+----+----------------------------------+
|
|
* | RS | DF | MF | ...13 bits of fragment offset... |
|
|
* +----+----+----+----------------------------------+
|
|
* If "More fragments" or the offset is nonzero, then this is an IP fragment (RFC791).
|
|
*/
|
|
packet->is_fragmented = !(bpf_ntohs(ip.frag_off) & 0x4000);
|
|
l4_protocol = ip.protocol;
|
|
l4_offset = ip.ihl * 4;
|
|
|
|
if (packet->is_fragmented)
|
|
{
|
|
bpf_printk("tun_rss_steering: ipv4 %p is fragmented", packet->skb);
|
|
if (bpf_config_get_debug_log(config))
|
|
{
|
|
dump_ipv4_header(packet, &ip);
|
|
}
|
|
return -1;
|
|
}
|
|
}
|
|
else if (bpf_ntohs(l3_protocol) == ETH_P_IPV6)
|
|
{
|
|
packet->is_ipv6 = 1;
|
|
struct ipv6hdr ip6 = {0};
|
|
|
|
if (bpf_skb_load_bytes_relative(packet->skb, 0, &ip6, sizeof(ip6), BPF_HDR_START_NET))
|
|
{
|
|
bpf_printk("tun_rss_steering: unable get ipv6 header");
|
|
return -1;
|
|
}
|
|
|
|
packet->in6_src = ip6.saddr;
|
|
packet->in6_dst = ip6.daddr;
|
|
|
|
l4_protocol = ip6.nexthdr;
|
|
l4_offset = sizeof(ip6);
|
|
|
|
if (parse_ipv6_extension(packet, &l4_protocol, &l4_offset) == -1)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
if (packet->is_fragmented)
|
|
{
|
|
bpf_printk("tun_rss_steering: ipv6 %p is fragmented", packet->skb);
|
|
if (bpf_config_get_debug_log(config))
|
|
{
|
|
dump_ipv6_packet(packet, &ip6);
|
|
}
|
|
return -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
bpf_printk("tun_rss_steering: invalid l3 protocol %d", bpf_ntohs(l3_protocol));
|
|
return -1;
|
|
}
|
|
|
|
if (l4_protocol == IPPROTO_TCP)
|
|
{
|
|
packet->is_tcp = 1;
|
|
struct tcphdr tcp = {0};
|
|
|
|
if (bpf_skb_load_bytes_relative(packet->skb, l4_offset, &tcp, sizeof(tcp), BPF_HDR_START_NET))
|
|
{
|
|
bpf_printk("tun_rss_steering: unable get tcp header");
|
|
return -1;
|
|
}
|
|
|
|
packet->src_port = tcp.source;
|
|
packet->dst_port = tcp.dest;
|
|
}
|
|
else if (l4_protocol == IPPROTO_UDP)
|
|
{
|
|
packet->is_udp = 1;
|
|
struct udphdr udp = {0};
|
|
|
|
if (bpf_skb_load_bytes_relative(packet->skb, l4_offset, &udp, sizeof(udp), BPF_HDR_START_NET))
|
|
{
|
|
bpf_printk("tun_rss_steering: unable get udp header");
|
|
return -1;
|
|
}
|
|
|
|
packet->src_port = udp.source;
|
|
packet->dst_port = udp.dest;
|
|
}
|
|
else
|
|
{
|
|
bpf_printk("tun_rss_steering: invalid l4 protocol %d", l4_protocol);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline void select_rss_queue(struct packet *packet, struct bpf_config *config)
|
|
{
|
|
packet->select_queue = -1;
|
|
|
|
if (packet->is_ipv4)
|
|
{
|
|
if (bpf_config_get_hash_mode(config) == BPF_HASH_MODE_TUPLE4)
|
|
{
|
|
packet->src_addr_hash = crc32_hash(&packet->in4_src, 4, 0);
|
|
packet->dst_addr_hash = crc32_hash(&packet->in4_dst, 4, 0);
|
|
packet->last_hash = packet->src_addr_hash ^ packet->dst_addr_hash;
|
|
|
|
packet->src_port_hash = crc32_hash(&packet->src_port, 2, packet->last_hash);
|
|
packet->dst_port_hash = crc32_hash(&packet->dst_port, 2, packet->last_hash);
|
|
packet->last_hash = packet->src_port_hash ^ packet->dst_port_hash;
|
|
|
|
packet->select_queue = packet->last_hash % bpf_config_get_queue_num(config);
|
|
}
|
|
else if (bpf_config_get_hash_mode(config) == BPF_HASH_MODE_TUPLE2)
|
|
{
|
|
packet->src_addr_hash = crc32_hash(&packet->in4_src, 4, 0);
|
|
packet->dst_addr_hash = crc32_hash(&packet->in4_dst, 4, 0);
|
|
packet->last_hash = packet->src_addr_hash ^ packet->dst_addr_hash;
|
|
|
|
packet->select_queue = packet->last_hash % bpf_config_get_queue_num(config);
|
|
}
|
|
}
|
|
|
|
if (packet->is_ipv6)
|
|
{
|
|
if (bpf_config_get_hash_mode(config) == BPF_HASH_MODE_TUPLE4)
|
|
{
|
|
packet->src_addr_hash = crc32_hash(&packet->in6_src, 16, 0);
|
|
packet->dst_addr_hash = crc32_hash(&packet->in6_dst, 16, 0);
|
|
packet->last_hash = packet->src_addr_hash ^ packet->dst_addr_hash;
|
|
|
|
packet->src_port_hash = crc32_hash(&packet->src_port, 2, packet->last_hash);
|
|
packet->dst_port_hash = crc32_hash(&packet->dst_port, 2, packet->last_hash);
|
|
packet->last_hash = packet->src_port_hash ^ packet->dst_port_hash;
|
|
|
|
packet->select_queue = packet->last_hash % bpf_config_get_queue_num(config);
|
|
}
|
|
else if (bpf_config_get_hash_mode(config) == BPF_HASH_MODE_TUPLE2)
|
|
{
|
|
packet->src_addr_hash = crc32_hash(&packet->in6_src, 16, 0);
|
|
packet->dst_addr_hash = crc32_hash(&packet->in6_dst, 16, 0);
|
|
packet->last_hash = packet->src_addr_hash ^ packet->dst_addr_hash;
|
|
|
|
packet->select_queue = packet->last_hash % bpf_config_get_queue_num(config);
|
|
}
|
|
}
|
|
}
|
|
|
|
SEC("tun_rss_steering")
|
|
int bpf_tun_rss_steering(struct __sk_buff *skb)
|
|
{
|
|
struct packet packet = {0};
|
|
struct bpf_config config = {0};
|
|
|
|
bpf_config_lookup_map(&config);
|
|
dump_config_info(&config);
|
|
|
|
if (bpf_config_get_queue_num(&config) <= 0)
|
|
{
|
|
bpf_printk("tun_rss_steering: invalid queue num %d", bpf_config_get_queue_num(&config));
|
|
return 0;
|
|
}
|
|
|
|
if (bpf_config_get_hash_mode(&config) != BPF_HASH_MODE_TUPLE2 && bpf_config_get_hash_mode(&config) != BPF_HASH_MODE_TUPLE4)
|
|
{
|
|
bpf_printk("tun_rss_steering: invalid hash mode %d", bpf_config_get_hash_mode(&config));
|
|
return 0;
|
|
}
|
|
|
|
packet.is_ipv4 = 0;
|
|
packet.is_ipv6 = 0;
|
|
packet.is_fragmented = 0;
|
|
packet.skb = skb;
|
|
if (parse_packet(&packet, &config) == -1)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
select_rss_queue(&packet, &config);
|
|
if (bpf_config_get_debug_log(&config))
|
|
{
|
|
dump_packet_info(&packet, &config);
|
|
}
|
|
|
|
return packet.select_queue;
|
|
}
|
|
|
|
char _license[] SEC("license") = "GPL"; |