#include #include #include #include #include #include #include "libendian.h" #include "crc32_hash.h" #include "bpf_config_kernel.h" #define IP6_EXTENSIONS_COUNT 11 struct packet { __u8 is_ipv4; __u8 is_ipv6; __u8 is_udp; __u8 is_tcp; __u8 is_fragmented; __u16 src_port; __u16 dst_port; __u32 in4_src; __u32 in4_dst; struct in6_addr in6_src; struct in6_addr in6_dst; __u32 src_addr_hash; __u32 dst_addr_hash; __u32 src_port_hash; __u32 dst_port_hash; __u32 last_hash; int select_queue; struct __sk_buff *skb; }; static inline void dump_ipv4_header(struct packet *packet, struct iphdr *ip4) { bpf_printk("tun_rss_steering: ipv4 %p src_addr ip[0-1]: %d.%d", packet->skb, (packet->in4_src) & 0xFF, (packet->in4_src >> 8) & 0xFF); bpf_printk("tun_rss_steering: ipv4 %p src_addr ip[2-3]: %d.%d", packet->skb, (packet->in4_src >> 16) & 0xFF, (packet->in4_src >> 24) & 0xFF); bpf_printk("tun_rss_steering: ipv4 %p dst_addr ip[0-1]: %d.%d", packet->skb, (packet->in4_dst) & 0xFF, (packet->in4_dst >> 8) & 0xFF); bpf_printk("tun_rss_steering: ipv4 %p dst_addr ip[2-3]: %d.%d", packet->skb, (packet->in4_dst >> 16) & 0xFF, (packet->in4_dst >> 24) & 0xFF); char *ptr = (char *)ip4; int len = sizeof(*ip4); for (int i = 0; i < len; i++) { bpf_printk("tun_rss_steering: ipv4 %p header hex[%d]: %0x", packet->skb, i, ptr[i]); } } static inline void dump_ipv6_packet(struct packet *packet, struct ipv6hdr *ip6) { bpf_printk("tun_rss_steering: ipv6 %p src_addr ip[0-1]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[0]), bpf_ntohs(packet->in6_src.s6_addr16[1])); bpf_printk("tun_rss_steering: ipv6 %p src_addr ip[2-3]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[2]), bpf_ntohs(packet->in6_src.s6_addr16[3])); bpf_printk("tun_rss_steering: ipv6 %p src_addr ip[4-5]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[4]), bpf_ntohs(packet->in6_src.s6_addr16[5])); bpf_printk("tun_rss_steering: ipv6 %p src_addr ip[6-7]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[6]), bpf_ntohs(packet->in6_src.s6_addr16[7])); bpf_printk("tun_rss_steering: ipv6 %p dst_addr ip[0-1]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[0]), bpf_ntohs(packet->in6_dst.s6_addr16[1])); bpf_printk("tun_rss_steering: ipv6 %p dst_addr ip[2-3]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[2]), bpf_ntohs(packet->in6_dst.s6_addr16[3])); bpf_printk("tun_rss_steering: ipv6 %p dst_addr ip[4-5]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[4]), bpf_ntohs(packet->in6_dst.s6_addr16[5])); bpf_printk("tun_rss_steering: ipv6 %p dst_addr ip[6-7]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[6]), bpf_ntohs(packet->in6_dst.s6_addr16[7])); char *ptr = (char *)ip6; int len = sizeof(*ip6); for (int i = 0; i < len; i++) { bpf_printk("tun_rss_steering: ipv6 %p header hex[%d]: %0x", packet->skb, i, ptr[i]); } } static inline void dump_packet_info(struct packet *packet, struct bpf_config *config) { if (packet->is_ipv4) { bpf_printk("tun_rss_steering ipv4 %p src_addr ip[0-1]: %d.%d", packet->skb, (packet->in4_src) & 0xFF, (packet->in4_src >> 8) & 0xFF); bpf_printk("tun_rss_steering ipv4 %p src_addr ip[2-3]: %d.%d", packet->skb, (packet->in4_src >> 16) & 0xFF, (packet->in4_src >> 24) & 0xFF); bpf_printk("tun_rss_steering ipv4 %p dst_addr ip[0-1]: %d.%d", packet->skb, (packet->in4_dst) & 0xFF, (packet->in4_dst >> 8) & 0xFF); bpf_printk("tun_rss_steering ipv4 %p dst_addr ip[2-3]: %d.%d", packet->skb, (packet->in4_dst >> 16) & 0xFF, (packet->in4_dst >> 24) & 0xFF); bpf_printk("tun_rss_steering ipv4 %p src_port: %d dst_port: %d", packet->skb, bpf_ntohs(packet->src_port), bpf_ntohs(packet->dst_port)); bpf_printk("tun_rss_steering ipv4 %p src_addr_hash: %d dst_addr_hash: %d", packet->skb, packet->src_addr_hash, packet->dst_addr_hash); bpf_printk("tun_rss_steering ipv4 %p src_port_hash: %d dst_port_hash: %d", packet->skb, packet->src_port_hash, packet->dst_port_hash); bpf_printk("tun_rss_steering ipv4 %p last_hash: %d select_queue: %d", packet->skb, packet->last_hash, packet->select_queue); } if (packet->is_ipv6) { bpf_printk("tun_rss_steering ipv6 %p src_addr ip[0-1]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[0]), bpf_ntohs(packet->in6_src.s6_addr16[1])); bpf_printk("tun_rss_steering ipv6 %p src_addr ip[2-3]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[2]), bpf_ntohs(packet->in6_src.s6_addr16[3])); bpf_printk("tun_rss_steering ipv6 %p src_addr ip[4-5]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[4]), bpf_ntohs(packet->in6_src.s6_addr16[5])); bpf_printk("tun_rss_steering ipv6 %p src_addr ip[6-7]: %x:%x", packet->skb, bpf_ntohs(packet->in6_src.s6_addr16[6]), bpf_ntohs(packet->in6_src.s6_addr16[7])); bpf_printk("tun_rss_steering ipv6 %p dst_addr ip[0-1]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[0]), bpf_ntohs(packet->in6_dst.s6_addr16[1])); bpf_printk("tun_rss_steering ipv6 %p dst_addr ip[2-3]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[2]), bpf_ntohs(packet->in6_dst.s6_addr16[3])); bpf_printk("tun_rss_steering ipv6 %p dst_addr ip[4-5]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[4]), bpf_ntohs(packet->in6_dst.s6_addr16[5])); bpf_printk("tun_rss_steering ipv6 %p dst_addr ip[6-7]: %x:%x", packet->skb, bpf_ntohs(packet->in6_dst.s6_addr16[6]), bpf_ntohs(packet->in6_dst.s6_addr16[7])); bpf_printk("tun_rss_steering ipv6 %p src_port: %d dst_port: %d", packet->skb, bpf_ntohs(packet->src_port), bpf_ntohs(packet->dst_port)); bpf_printk("tun_rss_steering ipv6 %p src_addr_hash: %d dst_addr_hash: %d", packet->skb, packet->src_addr_hash, packet->dst_addr_hash); bpf_printk("tun_rss_steering ipv6 %p src_port_hash: %d dst_port_hash: %d", packet->skb, packet->src_port_hash, packet->dst_port_hash); bpf_printk("tun_rss_steering ipv6 %p last_hash: %d select_queue: %d", packet->skb, packet->last_hash, packet->select_queue); } } static inline void dump_config_info(struct bpf_config *config) { if (bpf_config_get_debug_log(config)) { bpf_printk("tun_rss_steering: config->bpf_debug_log %d", bpf_config_get_debug_log(config)); bpf_printk("tun_rss_steering: config->bpf_queue_num %d", bpf_config_get_queue_num(config)); bpf_printk("tun_rss_steering: config->bpf_hash_mode %d", bpf_config_get_hash_mode(config)); } } /* * reutrn 1: 表示不需要处理扩展头 * return 0: 表示需要处理扩展头 */ static inline int ipv6_extension_need_skip(__u8 hdr_type) { /* * TODO * 因为 kni_ipv6_header_parse() 中只处理了以下 4 种 IPv6 扩展头部: * IPPROTO_AH * IPPROTO_HOPOPTS * IPPROTO_ROUTING * IPPROTO_DSTOPTS * * 即 KNI 回流给 TFE 的 IPv6 流量中只支持以上 4 种 IPv6 扩展头部。 * 当 TFE 回注给 KNI 的 IPv6 流量中不会出现其他 IPv6 扩展头部,故此处 BPF 只处理这 4 种 IPv6 扩展头部。 * * 由于 BPF 要支持四元组分流,所以要判断 IPv6 是否分片,故此处要处理 IPPROTO_FRAGMENT IPv6 扩展头部。 */ switch (hdr_type) { case IPPROTO_AH: case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: case IPPROTO_FRAGMENT: return 0; default: return 1; } } static inline int parse_ipv6_extension(struct packet *packet, __u8 *l4_protocol, int *l4_offset) { if (ipv6_extension_need_skip(*l4_protocol)) { return 0; } struct ipv6_opt_hdr ext_hdr = {0}; for (unsigned int i = 0; i < IP6_EXTENSIONS_COUNT; ++i) { if (bpf_skb_load_bytes_relative(packet->skb, *l4_offset, &ext_hdr, sizeof(ext_hdr), BPF_HDR_START_NET)) { bpf_printk("tun_rss_steering: unable get ipv6 ext header"); return -1; } if (*l4_protocol == IPPROTO_FRAGMENT) { packet->is_fragmented = 1; } *l4_protocol = ext_hdr.nexthdr; *l4_offset += (ext_hdr.hdrlen + 1) * 8; if (ipv6_extension_need_skip(ext_hdr.nexthdr)) { return 0; } } return -1; } static inline int parse_packet(struct packet *packet, struct bpf_config *config) { int l3_offset = 12; int l4_offset = 0; __u8 l4_protocol = 0; __u16 l3_protocol = 0; if (!packet || !packet->skb) { bpf_printk("tun_rss_steering: invalid __sk_buff pointer"); return -1; } if (bpf_skb_load_bytes_relative(packet->skb, l3_offset, &l3_protocol, sizeof(l3_protocol), BPF_HDR_START_MAC)) { bpf_printk("tun_rss_steering: unable get l3 protocol"); return -1; } if (bpf_ntohs(l3_protocol) == ETH_P_IP) { packet->is_ipv4 = 1; struct iphdr ip = {0}; if (bpf_skb_load_bytes_relative(packet->skb, 0, &ip, sizeof(ip), BPF_HDR_START_NET)) { bpf_printk("tun_rss_steering: unable get ipv4 header"); return -1; } packet->in4_src = ip.saddr; packet->in4_dst = ip.daddr; /* * The frag_off portion of the header consists of: * +----+----+----+----------------------------------+ * | RS | DF | MF | ...13 bits of fragment offset... | * +----+----+----+----------------------------------+ * If "More fragments" or the offset is nonzero, then this is an IP fragment (RFC791). */ packet->is_fragmented = !(bpf_ntohs(ip.frag_off) & 0x4000); l4_protocol = ip.protocol; l4_offset = ip.ihl * 4; if (packet->is_fragmented) { bpf_printk("tun_rss_steering: ipv4 %p is fragmented", packet->skb); if (bpf_config_get_debug_log(config)) { dump_ipv4_header(packet, &ip); } return -1; } } else if (bpf_ntohs(l3_protocol) == ETH_P_IPV6) { packet->is_ipv6 = 1; struct ipv6hdr ip6 = {0}; if (bpf_skb_load_bytes_relative(packet->skb, 0, &ip6, sizeof(ip6), BPF_HDR_START_NET)) { bpf_printk("tun_rss_steering: unable get ipv6 header"); return -1; } packet->in6_src = ip6.saddr; packet->in6_dst = ip6.daddr; l4_protocol = ip6.nexthdr; l4_offset = sizeof(ip6); if (parse_ipv6_extension(packet, &l4_protocol, &l4_offset) == -1) { return -1; } if (packet->is_fragmented) { bpf_printk("tun_rss_steering: ipv6 %p is fragmented", packet->skb); if (bpf_config_get_debug_log(config)) { dump_ipv6_packet(packet, &ip6); } return -1; } } else { bpf_printk("tun_rss_steering: invalid l3 protocol %d", bpf_ntohs(l3_protocol)); return -1; } if (l4_protocol == IPPROTO_TCP) { packet->is_tcp = 1; struct tcphdr tcp = {0}; if (bpf_skb_load_bytes_relative(packet->skb, l4_offset, &tcp, sizeof(tcp), BPF_HDR_START_NET)) { bpf_printk("tun_rss_steering: unable get tcp header"); return -1; } packet->src_port = tcp.source; packet->dst_port = tcp.dest; } else if (l4_protocol == IPPROTO_UDP) { packet->is_udp = 1; struct udphdr udp = {0}; if (bpf_skb_load_bytes_relative(packet->skb, l4_offset, &udp, sizeof(udp), BPF_HDR_START_NET)) { bpf_printk("tun_rss_steering: unable get udp header"); return -1; } packet->src_port = udp.source; packet->dst_port = udp.dest; } else { bpf_printk("tun_rss_steering: invalid l4 protocol %d", l4_protocol); return -1; } return 0; } static inline void select_rss_queue(struct packet *packet, struct bpf_config *config) { packet->select_queue = -1; if (packet->is_ipv4) { if (bpf_config_get_hash_mode(config) == BPF_HASH_MODE_TUPLE4) { packet->src_addr_hash = crc32_hash(&packet->in4_src, 4, 0); packet->dst_addr_hash = crc32_hash(&packet->in4_dst, 4, 0); packet->last_hash = packet->src_addr_hash ^ packet->dst_addr_hash; packet->src_port_hash = crc32_hash(&packet->src_port, 2, packet->last_hash); packet->dst_port_hash = crc32_hash(&packet->dst_port, 2, packet->last_hash); packet->last_hash = packet->src_port_hash ^ packet->dst_port_hash; packet->select_queue = packet->last_hash % bpf_config_get_queue_num(config); } else if (bpf_config_get_hash_mode(config) == BPF_HASH_MODE_TUPLE2) { packet->src_addr_hash = crc32_hash(&packet->in4_src, 4, 0); packet->dst_addr_hash = crc32_hash(&packet->in4_dst, 4, 0); packet->last_hash = packet->src_addr_hash ^ packet->dst_addr_hash; packet->select_queue = packet->last_hash % bpf_config_get_queue_num(config); } } if (packet->is_ipv6) { if (bpf_config_get_hash_mode(config) == BPF_HASH_MODE_TUPLE4) { packet->src_addr_hash = crc32_hash(&packet->in6_src, 16, 0); packet->dst_addr_hash = crc32_hash(&packet->in6_dst, 16, 0); packet->last_hash = packet->src_addr_hash ^ packet->dst_addr_hash; packet->src_port_hash = crc32_hash(&packet->src_port, 2, packet->last_hash); packet->dst_port_hash = crc32_hash(&packet->dst_port, 2, packet->last_hash); packet->last_hash = packet->src_port_hash ^ packet->dst_port_hash; packet->select_queue = packet->last_hash % bpf_config_get_queue_num(config); } else if (bpf_config_get_hash_mode(config) == BPF_HASH_MODE_TUPLE2) { packet->src_addr_hash = crc32_hash(&packet->in6_src, 16, 0); packet->dst_addr_hash = crc32_hash(&packet->in6_dst, 16, 0); packet->last_hash = packet->src_addr_hash ^ packet->dst_addr_hash; packet->select_queue = packet->last_hash % bpf_config_get_queue_num(config); } } } SEC("tun_rss_steering") int bpf_tun_rss_steering(struct __sk_buff *skb) { struct packet packet = {0}; struct bpf_config config = {0}; bpf_config_lookup_map(&config); dump_config_info(&config); if (bpf_config_get_queue_num(&config) <= 0) { bpf_printk("tun_rss_steering: invalid queue num %d", bpf_config_get_queue_num(&config)); return 0; } if (bpf_config_get_hash_mode(&config) != BPF_HASH_MODE_TUPLE2 && bpf_config_get_hash_mode(&config) != BPF_HASH_MODE_TUPLE4) { bpf_printk("tun_rss_steering: invalid hash mode %d", bpf_config_get_hash_mode(&config)); return 0; } packet.is_ipv4 = 0; packet.is_ipv6 = 0; packet.is_fragmented = 0; packet.skb = skb; if (parse_packet(&packet, &config) == -1) { return 0; } select_rss_queue(&packet, &config); if (bpf_config_get_debug_log(&config)) { dump_packet_info(&packet, &config); } return packet.select_queue; } char _license[] SEC("license") = "GPL";