#include #include #include #include #include #include #include #include "rte_hash_crc.h" #include "bpf_conf_kernel.h" #include "bpf_helpers.h" #include "libendian.h" #define IP6_EXTENSIONS_COUNT 11 typedef struct bpf_pkt_s { __u8 is_ipv4; __u8 is_ipv6; __u8 is_udp; __u8 is_tcp; __u8 is_fragmented; __u16 src_port; __u16 dst_port; __u32 in4_src; __u32 in4_dst; struct in6_addr in6_src; struct in6_addr in6_dst; __u32 src_addr_hash; __u32 dst_addr_hash; __u32 src_port_hash; __u32 dst_port_hash; __u32 last_hash; int select_queue; struct __sk_buff *skb; } bpf_pkt_t; /* * reutrn 0: 表示不需要处理扩展头 * return 1: 表示需要处理扩展头 */ static inline int ip6_next_header_is_need_proc(__u8 hdr_type) { /* * TODO * 因为 kni_ipv6_header_parse() 中只跳过了以下 4 种 IPv6 扩展头部: * IPPROTO_AH * IPPROTO_HOPOPTS * IPPROTO_ROUTING * IPPROTO_DSTOPTS * * 即 KNI 回流给 TFE 的 IPv6 流量中只支持以上 4 种 IPv6 扩展头部。 * 当 TFE 回注给 KNI 的 IPv6 流量中不会出现其他 IPv6 扩展头部,故此处 BPF 只处理这 4 种 IPv6 扩展头部。 * * 由于 BPF 要支持四元组分流,所以要判断 IPv6 是否分片,故此处要处理 IPPROTO_FRAGMENT IPv6 扩展头部。 */ switch (hdr_type) { case IPPROTO_AH: case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: case IPPROTO_FRAGMENT: return 1; default: return 0; } } static inline int bpf_pkt_parser_ext6(bpf_pkt_t *pkt, __u8 *l4_protocol, int *l4_offset) { if (!ip6_next_header_is_need_proc(*l4_protocol)) { return 0; } struct ipv6_opt_hdr ext_hdr = {0}; for (unsigned int i = 0; i < IP6_EXTENSIONS_COUNT; ++i) { if (bpf_skb_load_bytes_relative(pkt->skb, *l4_offset, &ext_hdr, sizeof(ext_hdr), BPF_HDR_START_NET)) { bpf_printk("bpf_tun_rss_steering unable get ipv6 ext header"); return -1; } if (*l4_protocol == IPPROTO_FRAGMENT) { pkt->is_fragmented = 1; } *l4_protocol = ext_hdr.nexthdr; *l4_offset += (ext_hdr.hdrlen + 1) * 8; if (!ip6_next_header_is_need_proc(ext_hdr.nexthdr)) { return 0; } } return -1; } static inline void bpf_dump_ipv4_header(bpf_pkt_t *pkt, struct iphdr *ip4) { bpf_printk("bpf_tun_rss_steering ipv4 %p fragmented, src_addr ip[0-1]: %d.%d", pkt->skb, (pkt->in4_src) & 0xFF, (pkt->in4_src >> 8) & 0xFF); bpf_printk("bpf_tun_rss_steering ipv4 %p fragmented, src_addr ip[2-3]: %d.%d", pkt->skb, (pkt->in4_src >> 16) & 0xFF, (pkt->in4_src >> 24) & 0xFF); bpf_printk("bpf_tun_rss_steering ipv4 %p fragmented, dst_addr ip[0-1]: %d.%d", pkt->skb, (pkt->in4_dst) & 0xFF, (pkt->in4_dst >> 8) & 0xFF); bpf_printk("bpf_tun_rss_steering ipv4 %p fragmented, dst_addr ip[2-3]: %d.%d", pkt->skb, (pkt->in4_dst >> 16) & 0xFF, (pkt->in4_dst >> 24) & 0xFF); char *ptr = (char *)ip4; int len = sizeof(*ip4); for (int i = 0; i < len; i++) { bpf_printk("bpf_tun_rss_steering ipv4 %p fragmented, dump header hex[%d]: %0x", pkt->skb, i, ptr[i]); } } static inline void bpf_dump_ipv6_header(bpf_pkt_t *pkt, struct ipv6hdr *ip6) { bpf_printk("bpf_tun_rss_steering ipv6 %p fragmented, src_addr ip[0-1]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_src.s6_addr16[0]), bpf_ntohs(pkt->in6_src.s6_addr16[1])); // bpf_printk("bpf_tun_rss_steering ipv6 %p fragmented, src_addr ip[2-3]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_src.s6_addr16[2]), bpf_ntohs(pkt->in6_src.s6_addr16[3])); // bpf_printk("bpf_tun_rss_steering ipv6 %p fragmented, src_addr ip[4-5]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_src.s6_addr16[4]), bpf_ntohs(pkt->in6_src.s6_addr16[5])); bpf_printk("bpf_tun_rss_steering ipv6 %p fragmented, src_addr ip[6-7]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_src.s6_addr16[6]), bpf_ntohs(pkt->in6_src.s6_addr16[7])); bpf_printk("bpf_tun_rss_steering ipv6 %p fragmented, dst_addr ip[0-1]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_dst.s6_addr16[0]), bpf_ntohs(pkt->in6_dst.s6_addr16[1])); // bpf_printk("bpf_tun_rss_steering ipv6 %p fragmented, dst_addr ip[2-3]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_dst.s6_addr16[2]), bpf_ntohs(pkt->in6_dst.s6_addr16[3])); // bpf_printk("bpf_tun_rss_steering ipv6 %p fragmented, dst_addr ip[4-5]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_dst.s6_addr16[4]), bpf_ntohs(pkt->in6_dst.s6_addr16[5])); bpf_printk("bpf_tun_rss_steering ipv6 %p fragmented, dst_addr ip[6-7]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_dst.s6_addr16[6]), bpf_ntohs(pkt->in6_dst.s6_addr16[7])); char *ptr = (char *)ip6; int len = sizeof(*ip6); for (int i = 0; i < len; i++) { bpf_printk("bpf_tun_rss_steering ipv6 %p fragmented, dump header hex[%d]: %0x", pkt->skb, i, ptr[i]); } } static inline int ipv4_is_fragment(const struct iphdr *ip4) { /* The frag_off portion of the header consists of: * * +----+----+----+----------------------------------+ * | RS | DF | MF | ...13 bits of fragment offset... | * +----+----+----+----------------------------------+ * * If "More fragments" or the offset is nonzero, then this is an IP fragment (RFC791). */ return ip4->frag_off & bpf_htons(0x3FFF); } static inline int bpf_pkt_parser(bpf_pkt_t *pkt, bpf_conf_t *conf) { int l3_offset = 12; int l4_offset = 0; __u8 l4_protocol = 0; __u16 l3_protocol = 0; if (!pkt || !pkt->skb) { bpf_printk("bpf_tun_rss_steering skb is null"); return -1; } if (bpf_skb_load_bytes_relative(pkt->skb, l3_offset, &l3_protocol, sizeof(l3_protocol), BPF_HDR_START_MAC)) { bpf_printk("bpf_tun_rss_steering unable get l3 protocol"); return -1; } if (bpf_ntohs(l3_protocol) == ETH_P_IP) { pkt->is_ipv4 = 1; struct iphdr ip = {}; if (bpf_skb_load_bytes_relative(pkt->skb, 0, &ip, sizeof(ip), BPF_HDR_START_NET)) { bpf_printk("bpf_tun_rss_steering unable get ipv4 header"); return -1; } pkt->in4_src = ip.saddr; pkt->in4_dst = ip.daddr; pkt->is_fragmented = ipv4_is_fragment(&ip); l4_protocol = ip.protocol; l4_offset = ip.ihl * 4; if (pkt->is_fragmented) { bpf_printk("bpf_tun_rss_steering ipv4 is fragmented"); if (bpf_conf_get_debug_log(conf)) { bpf_dump_ipv4_header(pkt, &ip); } return -1; } } else if (bpf_ntohs(l3_protocol) == ETH_P_IPV6) { pkt->is_ipv6 = 1; struct ipv6hdr ip6 = {}; if (bpf_skb_load_bytes_relative(pkt->skb, 0, &ip6, sizeof(ip6), BPF_HDR_START_NET)) { bpf_printk("bpf_tun_rss_steering unable get ipv6 header"); return -1; } pkt->in6_src = ip6.saddr; pkt->in6_dst = ip6.daddr; l4_protocol = ip6.nexthdr; l4_offset = sizeof(ip6); if (bpf_pkt_parser_ext6(pkt, &l4_protocol, &l4_offset) == -1) { return -1; } if (pkt->is_fragmented) { bpf_printk("bpf_tun_rss_steering ipv6 is fragmented"); if (bpf_conf_get_debug_log(conf)) { bpf_dump_ipv6_header(pkt, &ip6); } return -1; } } else { bpf_printk("bpf_tun_rss_steering l3 protocol %d not support", bpf_ntohs(l3_protocol)); return -1; } if (l4_protocol == IPPROTO_TCP) { pkt->is_tcp = 1; struct tcphdr tcp = {}; if (bpf_skb_load_bytes_relative(pkt->skb, l4_offset, &tcp, sizeof(tcp), BPF_HDR_START_NET)) { bpf_printk("bpf_tun_rss_steering unable get tcp header"); return -1; } pkt->src_port = tcp.source; pkt->dst_port = tcp.dest; } else if (l4_protocol == IPPROTO_UDP) { pkt->is_udp = 1; struct udphdr udp = {}; if (bpf_skb_load_bytes_relative(pkt->skb, l4_offset, &udp, sizeof(udp), BPF_HDR_START_NET)) { bpf_printk("bpf_tun_rss_steering unable get udp header"); return -1; } pkt->src_port = udp.source; pkt->dst_port = udp.dest; } else { bpf_printk("bpf_tun_rss_steering l4 protocol %d not support", l4_protocol); return -1; } return 0; } static inline void bpf_pkt_debug_log(bpf_pkt_t *pkt, bpf_conf_t *conf) { if (pkt->is_ipv4) { bpf_printk("bpf_tun_rss_steering ipv4 %p src_addr ip[0-1]: %d.%d", pkt->skb, (pkt->in4_src) & 0xFF, (pkt->in4_src >> 8) & 0xFF); bpf_printk("bpf_tun_rss_steering ipv4 %p src_addr ip[2-3]: %d.%d", pkt->skb, (pkt->in4_src >> 16) & 0xFF, (pkt->in4_src >> 24) & 0xFF); bpf_printk("bpf_tun_rss_steering ipv4 %p dst_addr ip[0-1]: %d.%d", pkt->skb, (pkt->in4_dst) & 0xFF, (pkt->in4_dst >> 8) & 0xFF); bpf_printk("bpf_tun_rss_steering ipv4 %p dst_addr ip[2-3]: %d.%d", pkt->skb, (pkt->in4_dst >> 16) & 0xFF, (pkt->in4_dst >> 24) & 0xFF); bpf_printk("bpf_tun_rss_steering ipv4 %p src_port: %d dst_port: %d", pkt->skb, bpf_ntohs(pkt->src_port), bpf_ntohs(pkt->dst_port)); bpf_printk("bpf_tun_rss_steering ipv4 %p src_addr_hash: %d dst_addr_hash: %d", pkt->skb, pkt->src_addr_hash, pkt->dst_addr_hash); bpf_printk("bpf_tun_rss_steering ipv4 %p src_port_hash: %d dst_port_hash: %d", pkt->skb, pkt->src_port_hash, pkt->dst_port_hash); bpf_printk("bpf_tun_rss_steering ipv4 %p last_hash: %d select_queue: %d", pkt->skb, pkt->last_hash, pkt->select_queue); } if (pkt->is_ipv6) { bpf_printk("bpf_tun_rss_steering ipv6 %p src_addr ip[0-1]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_src.s6_addr16[0]), bpf_ntohs(pkt->in6_src.s6_addr16[1])); // bpf_printk("bpf_tun_rss_steering ipv6 %p src_addr ip[2-3]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_src.s6_addr16[2]), bpf_ntohs(pkt->in6_src.s6_addr16[3])); // bpf_printk("bpf_tun_rss_steering ipv6 %p src_addr ip[4-5]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_src.s6_addr16[4]), bpf_ntohs(pkt->in6_src.s6_addr16[5])); bpf_printk("bpf_tun_rss_steering ipv6 %p src_addr ip[6-7]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_src.s6_addr16[6]), bpf_ntohs(pkt->in6_src.s6_addr16[7])); bpf_printk("bpf_tun_rss_steering ipv6 %p dst_addr ip[0-1]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_dst.s6_addr16[0]), bpf_ntohs(pkt->in6_dst.s6_addr16[1])); // bpf_printk("bpf_tun_rss_steering ipv6 %p dst_addr ip[2-3]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_dst.s6_addr16[2]), bpf_ntohs(pkt->in6_dst.s6_addr16[3])); // bpf_printk("bpf_tun_rss_steering ipv6 %p dst_addr ip[4-5]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_dst.s6_addr16[4]), bpf_ntohs(pkt->in6_dst.s6_addr16[5])); bpf_printk("bpf_tun_rss_steering ipv6 %p dst_addr ip[6-7]: %x:%x", pkt->skb, bpf_ntohs(pkt->in6_dst.s6_addr16[6]), bpf_ntohs(pkt->in6_dst.s6_addr16[7])); bpf_printk("bpf_tun_rss_steering ipv6 %p src_port: %d dst_port: %d", pkt->skb, bpf_ntohs(pkt->src_port), bpf_ntohs(pkt->dst_port)); bpf_printk("bpf_tun_rss_steering ipv6 %p src_addr_hash: %d dst_addr_hash: %d", pkt->skb, pkt->src_addr_hash, pkt->dst_addr_hash); bpf_printk("bpf_tun_rss_steering ipv6 %p src_port_hash: %d dst_port_hash: %d", pkt->skb, pkt->src_port_hash, pkt->dst_port_hash); bpf_printk("bpf_tun_rss_steering ipv6 %p last_hash: %d select_queue: %d", pkt->skb, pkt->last_hash, pkt->select_queue); } } static inline void bpf_pkt_select_queue(bpf_pkt_t *pkt, bpf_conf_t *conf) { pkt->select_queue = -1; if (pkt->is_ipv4) { if (bpf_conf_get_hash_mode(conf) == BPF_HASH_MODE_TUPLE4) { pkt->src_addr_hash = rte_hash_crc(&pkt->in4_src, 4, 0); pkt->dst_addr_hash = rte_hash_crc(&pkt->in4_dst, 4, 0); pkt->last_hash = pkt->src_addr_hash ^ pkt->dst_addr_hash; pkt->src_port_hash = rte_hash_crc(&pkt->src_port, 2, pkt->last_hash); pkt->dst_port_hash = rte_hash_crc(&pkt->dst_port, 2, pkt->last_hash); pkt->last_hash = pkt->src_port_hash ^ pkt->dst_port_hash; pkt->select_queue = pkt->last_hash % bpf_conf_get_queue_num(conf); } else if (bpf_conf_get_hash_mode(conf) == BPF_HASH_MODE_TUPLE2) { pkt->src_addr_hash = rte_hash_crc(&pkt->in4_src, 4, 0); pkt->dst_addr_hash = rte_hash_crc(&pkt->in4_dst, 4, 0); pkt->last_hash = pkt->src_addr_hash ^ pkt->dst_addr_hash; pkt->select_queue = pkt->last_hash % bpf_conf_get_queue_num(conf); } } if (pkt->is_ipv6) { if (bpf_conf_get_hash_mode(conf) == BPF_HASH_MODE_TUPLE4) { pkt->src_addr_hash = rte_hash_crc(&pkt->in6_src, 16, 0); pkt->dst_addr_hash = rte_hash_crc(&pkt->in6_dst, 16, 0); pkt->last_hash = pkt->src_addr_hash ^ pkt->dst_addr_hash; pkt->src_port_hash = rte_hash_crc(&pkt->src_port, 2, pkt->last_hash); pkt->dst_port_hash = rte_hash_crc(&pkt->dst_port, 2, pkt->last_hash); pkt->last_hash = pkt->src_port_hash ^ pkt->dst_port_hash; pkt->select_queue = pkt->last_hash % bpf_conf_get_queue_num(conf); } else if (bpf_conf_get_hash_mode(conf) == BPF_HASH_MODE_TUPLE2) { pkt->src_addr_hash = rte_hash_crc(&pkt->in6_src, 16, 0); pkt->dst_addr_hash = rte_hash_crc(&pkt->in6_dst, 16, 0); pkt->last_hash = pkt->src_addr_hash ^ pkt->dst_addr_hash; pkt->select_queue = pkt->last_hash % bpf_conf_get_queue_num(conf); } } } static void bpf_conf_dump(bpf_conf_t *conf) { if (bpf_conf_get_debug_log(conf)) { bpf_printk("bpf_debug_log : %d", bpf_conf_get_debug_log(conf)); bpf_printk("bpf_queue_num : %d", bpf_conf_get_queue_num(conf)); bpf_printk("bpf_hash_mode : %d", bpf_conf_get_hash_mode(conf)); } } SEC("tun_rss_steering") int bpf_tun_rss_steering(struct __sk_buff *skb) { bpf_pkt_t pkt = {}; bpf_conf_t conf = {}; bpf_conf_lookup_map(&conf); bpf_conf_dump(&conf); if (bpf_conf_get_queue_num(&conf) <= 0) { bpf_printk("bpf_tun_rss_steering invalid queue num: %d", bpf_conf_get_queue_num(&conf)); return -1; } if (bpf_conf_get_hash_mode(&conf) != BPF_HASH_MODE_TUPLE2 && bpf_conf_get_hash_mode(&conf) != BPF_HASH_MODE_TUPLE4) { bpf_printk("bpf_tun_rss_steering invalid hash mode: %d", bpf_conf_get_hash_mode(&conf)); return -1; } pkt.skb = skb; if (bpf_pkt_parser(&pkt, &conf) == -1) { return -1; } bpf_pkt_select_queue(&pkt, &conf); if (bpf_conf_get_debug_log(&conf)) { bpf_pkt_debug_log(&pkt, &conf); } return pkt.select_queue; } char _license[] SEC("license") = "GPL";