#include #include #include #include #define __FAVOR_BSD 1 #include #include #include "mpack.h" #include #include "log.h" #include "sce.h" #include "utils.h" #include "vxlan.h" #include "packet_io.h" #include "sf_metrics.h" #include "control_packet.h" #include "global_metrics.h" #define RX_BURST_MAX 128 #define MR_MASK_DECRYPTED 0x01 /****************************************************************************** * struct ******************************************************************************/ struct config { int bypass_traffic; int rx_burst_max; int min_timeout_ms; char app_symbol[256]; // dev_nf char dev_nf_name[256]; // dev_endpoint_l3 char dev_endpoint_l3_name[256]; char dev_endpoint_l3_ip_str[16]; char dev_endpoint_l3_mac_str[32]; in_addr_t dev_endpoint_l3_ip; u_char dev_endpoint_l3_mac[ETH_ALEN]; // dev_endpoint_l2 char dev_endpoint_l2_name[256]; int vlan_encapsulate_replace_orig_vlan_header; }; struct device { struct mr_vdev *mr_dev; struct mr_sendpath *mr_path; }; struct packet_io { int thread_num; struct mr_instance *instance; struct device dev_nf; struct device dev_endpoint_l3; struct device dev_endpoint_l2; struct config config; }; /****************************************************************************** * metadata ******************************************************************************/ static inline void sids_copy(struct sids *dst, struct sids *src) { if (dst && src) { dst->num = src->num; memcpy(dst->elems, src->elems, sizeof(dst->elems[0]) * dst->num); } } static inline void route_ctx_copy(struct route_ctx *dst, struct route_ctx *src) { memcpy(dst->data, src->data, src->len); dst->len = src->len; } static inline void route_ctx_copy_once(struct route_ctx *dst, struct route_ctx *src) { if (dst->len == 0) { memcpy(dst->data, src->data, src->len); dst->len = src->len; } } void sce_packet_get_innermost_tuple(const struct packet *handler, struct four_tuple *tuple) { memset(tuple, 0, sizeof(struct four_tuple)); if (packet_get_innermost_four_tuple(handler, tuple) == -1) { packet_get_innermost_two_tuple(handler, &tuple->two_tuple); } } // return 0 : success // return -1 : error int mbuff_get_metadata(marsio_buff_t *rx_buff, struct metadata *meta) { if (marsio_buff_get_metadata(rx_buff, MR_BUFF_SESSION_ID, &(meta->session_id), sizeof(meta->session_id)) <= 0) { LOG_ERROR("%s: unable to get session_id from metadata", LOG_TAG_PKTIO); return -1; } if (marsio_buff_get_metadata(rx_buff, MR_BUFF_REHASH_INDEX, &(meta->rehash_index), sizeof(meta->rehash_index)) <= 0) { LOG_ERROR("%s: unable to get rehash_index from metadata", LOG_TAG_PKTIO); return -1; } // 1: E2I // 0: I2E if (marsio_buff_get_metadata(rx_buff, MR_BUFF_DIR, &(meta->direction), sizeof(meta->direction)) <= 0) { LOG_ERROR("%s: unable to get buff_dir from metadata", LOG_TAG_PKTIO); return -1; } if (meta->is_ctrl_pkt) { if (marsio_buff_get_metadata(rx_buff, MR_BUFF_PAYLOAD_OFFSET, &(meta->l7offset), sizeof(meta->l7offset)) <= 0) { LOG_ERROR("%s: unable to get l7offset from metadata", LOG_TAG_PKTIO); return -1; } } else { uint16_t user_data = 0; if (marsio_buff_get_metadata(rx_buff, MR_BUFF_USER_0, &user_data, sizeof(user_data)) <= 0) { LOG_ERROR("%s: unable to get is_decrypted from metadata", LOG_TAG_PKTIO); return -1; } if (user_data & MR_MASK_DECRYPTED) { meta->is_decrypted = 1; } else { meta->is_decrypted = 0; } } meta->sids.num = marsio_buff_get_sid_list(rx_buff, meta->sids.elems, sizeof(meta->sids.elems) / sizeof(meta->sids.elems[0])); if (meta->sids.num < 0) { LOG_ERROR("%s: unable to get sid_list from metadata", LOG_TAG_PKTIO); return -1; } meta->route_ctx.len = marsio_buff_get_metadata(rx_buff, MR_BUFF_ROUTE_CTX, meta->route_ctx.data, sizeof(meta->route_ctx.data)); if (meta->route_ctx.len <= 0) { LOG_ERROR("%s: unable to get route_ctx from metadata", LOG_TAG_PKTIO); return -1; } return 0; } // return 0 : success // return -1 : error int mbuff_set_metadata(marsio_buff_t *tx_buff, struct metadata *meta) { if (meta->session_id) { if (marsio_buff_set_metadata(tx_buff, MR_BUFF_SESSION_ID, &(meta->session_id), sizeof(meta->session_id)) != 0) { LOG_ERROR("%s: unable to set session_id for metadata", LOG_TAG_PKTIO); return -1; } } // need't set MR_BUFF_DIR, set MR_BUFF_ROUTE_CTX instead if (meta->is_ctrl_pkt) { marsio_buff_set_ctrlbuf(tx_buff); if (marsio_buff_set_metadata(tx_buff, MR_BUFF_PAYLOAD_OFFSET, &(meta->l7offset), sizeof(meta->l7offset)) != 0) { LOG_ERROR("%s: unable to set l7offset for metadata", LOG_TAG_PKTIO); return -1; } } else { uint16_t user_data = 0; if (meta->is_decrypted) { user_data = MR_MASK_DECRYPTED; } if (marsio_buff_set_metadata(tx_buff, MR_BUFF_USER_0, &user_data, sizeof(user_data)) != 0) { LOG_ERROR("%s: unable to set is_decrypted for metadata", LOG_TAG_PKTIO); return -1; } } if (meta->sids.num > 0) { if (marsio_buff_set_sid_list(tx_buff, meta->sids.elems, meta->sids.num) != 0) { LOG_ERROR("%s: unable to set sid_list for metadata", LOG_TAG_PKTIO); return -1; } } if (meta->route_ctx.len > 0) { if (marsio_buff_set_metadata(tx_buff, MR_BUFF_ROUTE_CTX, meta->route_ctx.data, meta->route_ctx.len) != 0) { LOG_ERROR("%s: unable to set route_ctx for metadata", LOG_TAG_PKTIO); return -1; } } return 0; } static void update_session_by_metadata(struct session_ctx *ctx, struct metadata *meta) { struct sids *e2i_sids = NULL; struct sids *i2e_sids = NULL; struct route_ctx *e2i_route_ctx = NULL; struct route_ctx *i2e_route_ctx = NULL; if (meta->is_decrypted) { e2i_sids = &ctx->decrypted_e2i_sids; i2e_sids = &ctx->decrypted_i2e_sids; e2i_route_ctx = &ctx->decrypted_e2i_route_ctx; i2e_route_ctx = &ctx->decrypted_i2e_route_ctx; } else { e2i_sids = &ctx->raw_e2i_sids; i2e_sids = &ctx->raw_i2e_sids; e2i_route_ctx = &ctx->raw_e2i_route_ctx; i2e_route_ctx = &ctx->raw_i2e_route_ctx; } // 1: E2I // 0: I2E if (meta->direction) { route_ctx_copy_once(e2i_route_ctx, &meta->route_ctx); sids_copy(e2i_sids, &meta->sids); } else { route_ctx_copy_once(i2e_route_ctx, &meta->route_ctx); sids_copy(i2e_sids, &meta->sids); } } static void update_metadata_by_session(struct session_ctx *ctx, struct metadata *meta) { struct sids *sids = NULL; struct route_ctx *route_ctx = NULL; meta->session_id = ctx->session_id; // 1: E2I // 0: I2E if (meta->direction) { if (meta->is_decrypted) { sids = &(ctx->decrypted_e2i_sids); route_ctx = &(ctx->decrypted_e2i_route_ctx); } else { sids = &(ctx->raw_e2i_sids); route_ctx = &(ctx->raw_e2i_route_ctx); } } else { if (meta->is_decrypted) { sids = &(ctx->decrypted_i2e_sids); route_ctx = &(ctx->decrypted_i2e_route_ctx); } else { sids = &(ctx->raw_i2e_sids); route_ctx = &(ctx->raw_i2e_route_ctx); } } sids_copy(&meta->sids, sids); route_ctx_copy(&meta->route_ctx, route_ctx); } /****************************************************************************** * keepalive ******************************************************************************/ // return 0 : not keepalive packet // return 1 : is keepalive packet static inline int is_downlink_keepalive_packet(marsio_buff_t *rx_buff, int raw_len) { char *raw_data = marsio_buff_mtod(rx_buff); if (raw_data == NULL || raw_len < (int)(sizeof(struct ethhdr))) { return 0; } struct ethhdr *eth_hdr = (struct ethhdr *)raw_data; if (eth_hdr->h_proto == 0xAAAA) { return 1; } else { return 0; } } // return 0 : not keepalive packet // return 1 : is keepalive packet static inline int is_uplink_keepalive_packet(marsio_buff_t *rx_buff, int raw_len) { char *raw_data = marsio_buff_mtod(rx_buff); if (raw_data == NULL || raw_len < (int)(sizeof(struct ethhdr) + sizeof(struct ip) + sizeof(struct udphdr))) { return 0; } struct ethhdr *eth_hdr = (struct ethhdr *)raw_data; if (eth_hdr->h_proto != htons(ETH_P_IP)) { return 0; } struct ip *ip_hdr = (struct ip *)((char *)eth_hdr + sizeof(struct ethhdr)); if (ip_hdr->ip_p != IPPROTO_UDP) { return 0; } struct udphdr *udp_hdr = (struct udphdr *)((char *)ip_hdr + sizeof(struct ip)); if (udp_hdr->uh_dport != htons(3784)) { return 0; } return 1; } /****************************************************************************** * search session ctx ******************************************************************************/ // return !NULL // return NULL static struct session_ctx *data_packet_search_session(struct session_table *table, const char *raw_data, int raw_len, uint64_t session_id) { struct four_tuple inner_addr; struct four_tuple reverse_addr; struct packet data_pkt; packet_parse(&data_pkt, raw_data, raw_len); sce_packet_get_innermost_tuple(&data_pkt, &inner_addr); four_tuple_reverse(&inner_addr, &reverse_addr); struct session_ctx *session_ctx = (struct session_ctx *)session_table_search_by_id(table, session_id); if (session_ctx == NULL) { return NULL; } if (memcmp(&session_ctx->inner_tuple4, &inner_addr, sizeof(struct four_tuple)) != 0 && memcmp(&session_ctx->inner_tuple4, &reverse_addr, sizeof(struct four_tuple)) != 0) { char *addr_str = four_tuple_tostring(&inner_addr); LOG_ERROR("%s: unexpected raw packet, session %lu expected address tuple4 is %s, but current packet's address tuple4 is %s, bypass !!!", LOG_TAG_PKTIO, session_ctx->session_id, session_ctx->session_addr, addr_str); free(addr_str); return NULL; } return session_ctx; } // return !NULL // return NULL static struct session_ctx *inject_packet_search_session(struct session_table *table, const char *raw_data, int raw_len) { struct four_tuple inner_addr; struct packet data_pkt; packet_parse(&data_pkt, raw_data, raw_len); sce_packet_get_innermost_tuple(&data_pkt, &inner_addr); struct session_ctx *session_ctx = (struct session_ctx *)session_table_search_by_addr(table, &inner_addr); if (session_ctx == NULL) { char *addr_str = four_tuple_tostring(&inner_addr); LOG_ERROR("%s: unexpected inject packet, unable to find session %s from session table, drop !!!", LOG_TAG_PKTIO, addr_str); free(addr_str); return NULL; } return session_ctx; } /****************************************************************************** * action bypass/block/forward ******************************************************************************/ struct vlan_hdr { uint16_t vlan_cfi; uint16_t protocol; } __attribute__((__packed__)); static void build_vlan_header(struct vlan_hdr *vlan_hdr, uint16_t vlan_id, uint16_t protocol) { vlan_hdr->vlan_cfi = 0; vlan_hdr->vlan_cfi = htons(vlan_id & 0xFFF); vlan_hdr->protocol = htons(protocol); } static void overwrite_vlan_id(struct vlan_hdr *vlan_hdr, uint16_t vlan_id) { vlan_hdr->vlan_cfi = 0; vlan_hdr->vlan_cfi = htons(vlan_id & 0xFFF); } void vlan_encapsulate(marsio_buff_t *mbuff, int vlan_id, int replace_orig_vlan_header) { struct ethhdr *old_eth_hdr = (struct ethhdr *)marsio_buff_mtod(mbuff); if (replace_orig_vlan_header == 0) { append: struct ethhdr *new_eth_hdr = (struct ethhdr *)marsio_buff_prepend(mbuff, sizeof(struct vlan_hdr)); char *dst = (char *)new_eth_hdr; char *src = (char *)old_eth_hdr; memcpy(dst, src, 4); memcpy(dst + 4, src + 4, 4); memcpy(dst + 8, src + 8, 4); new_eth_hdr->h_proto = htons(ETH_P_8021Q); struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)((char *)new_eth_hdr + sizeof(struct ethhdr)); build_vlan_header(vlan_hdr, vlan_id, ntohs(old_eth_hdr->h_proto)); return; } else { uint16_t next_proto = old_eth_hdr->h_proto; char *start_layer = (char *)old_eth_hdr + sizeof(struct ethhdr); char *next_layer = start_layer; while (next_proto == htons(ETH_P_8021Q) || next_proto == htons(ETH_P_8021AD)) { struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)next_layer; next_proto = vlan_hdr->protocol; next_layer += sizeof(struct vlan_hdr); } // No vlan header found uint64_t offset = next_layer - start_layer; if (offset == 0) { goto append; } // Find a layer of vlan header if (offset == sizeof(struct vlan_hdr)) { struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)start_layer; overwrite_vlan_id(vlan_hdr, vlan_id); return; } // Find the multi-layer vlan header if (offset > sizeof(struct vlan_hdr)) { struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(next_layer - sizeof(struct vlan_hdr)); struct ethhdr *new_eth_hdr = (struct ethhdr *)((char *)vlan_hdr - sizeof(struct ethhdr)); overwrite_vlan_id(vlan_hdr, vlan_id); memmove(new_eth_hdr, (char *)old_eth_hdr, sizeof(struct ethhdr)); new_eth_hdr->h_proto = htons(ETH_P_8021Q); marsio_buff_adj(mbuff, offset - sizeof(struct vlan_hdr)); return; } assert(0); } } static inline int send_packet_to_sf(struct session_ctx *session_ctx, marsio_buff_t *mbuff, struct metadata *meta, struct selected_sf *sf, struct thread_ctx *thread_ctx) { int nsend = 0; char *buffer = NULL; struct packet_io *packet_io = thread_ctx->ref_io; struct thread_metrics *thread_metrics = &thread_ctx->thread_metrics; uint32_t rehash_index = session_ctx->rehash_index; marsio_buff_ctrlzone_reset(mbuff); switch (sf->sf_connectivity.method) { case ENCAPSULATE_METHOD_VXLAN_G: thread_ctx->tx_packets_ipid++; buffer = marsio_buff_prepend(mbuff, VXLAN_FRAME_HDR_LEN); vxlan_frame_encode(buffer, packet_io->config.dev_endpoint_l3_mac, sf->sf_dst_mac, packet_io->config.dev_endpoint_l3_ip, sf->sf_dst_ip, thread_ctx->tx_packets_ipid % 65535, session_ctx->vxlan_src_port, meta->raw_len, meta->direction, meta->is_decrypted, sf->sf_index); nsend = marsio_buff_datalen(mbuff); marsio_buff_set_metadata(mbuff, MR_BUFF_REHASH_INDEX, &rehash_index, sizeof(rehash_index)); marsio_send_burst(packet_io->dev_endpoint_l3.mr_path, thread_ctx->thread_index, &mbuff, 1); THROUGHPUT_METRICS_INC(&(thread_metrics->endpoint_vxlan_tx), 1, nsend); break; case ENCAPSULATE_METHOD_LAYER2_SWITCH: vlan_encapsulate(mbuff, meta->direction ? sf->sf_connectivity.ext_vlan_tag : sf->sf_connectivity.int_vlan_tag, packet_io->config.vlan_encapsulate_replace_orig_vlan_header); nsend = marsio_buff_datalen(mbuff); marsio_buff_set_metadata(mbuff, MR_BUFF_REHASH_INDEX, &rehash_index, sizeof(rehash_index)); marsio_send_burst(packet_io->dev_endpoint_l2.mr_path, thread_ctx->thread_index, &mbuff, 1); THROUGHPUT_METRICS_INC(&(thread_metrics->endpoint_vlan_tx), 1, nsend); break; case ENCAPSULATE_METHOD_LAYER3_SWITCH: // TODO break; default: break; } return nsend; } static inline int action_nf_inject(marsio_buff_t *rx_buff, struct metadata *meta, struct selected_sf *sf, struct thread_ctx *thread_ctx); static inline void action_err_bypass(marsio_buff_t *rx_buff, struct metadata *meta, struct selected_sf *sf, struct thread_ctx *thread_ctx) { struct thread_metrics *thread_metrics = &thread_ctx->thread_metrics; int nsend = action_nf_inject(rx_buff, meta, sf, thread_ctx); if (nsend > 0) { THROUGHPUT_METRICS_INC(&(thread_metrics->error_bypass), 1, nsend); } } static inline void action_err_block(marsio_buff_t *rx_buff, struct metadata *meta, struct selected_sf *sf, struct thread_ctx *thread_ctx) { struct thread_metrics *thread_metrics = &thread_ctx->thread_metrics; struct packet_io *packet_io = thread_ctx->ref_io; int thread_index = thread_ctx->thread_index; THROUGHPUT_METRICS_INC(&(thread_metrics->error_block), 1, meta->raw_len); marsio_buff_free(packet_io->instance, &rx_buff, 1, 0, thread_index); } // return nsend static inline int action_nf_inject(marsio_buff_t *rx_buff, struct metadata *meta, struct selected_sf *sf, struct thread_ctx *thread_ctx) { struct thread_metrics *thread_metrics = &thread_ctx->thread_metrics; struct packet_io *packet_io = thread_ctx->ref_io; int thread_index = thread_ctx->thread_index; marsio_buff_ctrlzone_reset(rx_buff); if (mbuff_set_metadata(rx_buff, meta) != 0) { action_err_block(rx_buff, meta, sf, thread_ctx); return 0; } marsio_send_burst(packet_io->dev_nf.mr_path, thread_index, &rx_buff, 1); THROUGHPUT_METRICS_INC(&(thread_metrics->nf_tx), 1, meta->raw_len); return meta->raw_len; } static inline void action_mirr_bypass(marsio_buff_t *rx_buff, struct metadata *meta, struct selected_sf *sf, struct thread_ctx *thread_ctx) { struct thread_metrics *thread_metrics = &thread_ctx->thread_metrics; THROUGHPUT_METRICS_INC(&(thread_metrics->mirr_bypass), 1, meta->raw_len); } static inline void action_mirr_block(marsio_buff_t *rx_buff, struct metadata *meta, struct selected_sf *sf, struct thread_ctx *thread_ctx) { struct thread_metrics *thread_metrics = &thread_ctx->thread_metrics; THROUGHPUT_METRICS_INC(&(thread_metrics->mirr_block), 1, meta->raw_len); } static inline void action_mirr_forward(struct session_ctx *session_ctx, marsio_buff_t *rx_buff, struct metadata *meta, struct selected_sf *sf, struct thread_ctx *thread_ctx) { struct thread_metrics *thread_metrics = &thread_ctx->thread_metrics; struct packet_io *packet_io = thread_ctx->ref_io; int thread_index = thread_ctx->thread_index; char *raw_data = marsio_buff_mtod(rx_buff); marsio_buff_t *new_buff = NULL; if (marsio_buff_malloc_global(packet_io->instance, &new_buff, 1, MARSIO_SOCKET_ID_ANY, MARSIO_LCORE_ID_ANY) < 0) { LOG_ERROR("%s: unable to malloc buff on marsio instance, thread_index: %d", LOG_TAG_PKTIO, thread_index); return; } char *copy_ptr = marsio_buff_append(new_buff, meta->raw_len); memcpy(copy_ptr, raw_data, meta->raw_len); int nsend = send_packet_to_sf(session_ctx, new_buff, meta, sf, thread_ctx); THROUGHPUT_METRICS_INC(&(thread_metrics->mirr_tx), 1, meta->raw_len); THROUGHPUT_METRICS_INC(&sf->tx, 1, nsend); struct sf_metrics_key key = {0}; key.rule_id = sf->rule_id; key.sff_profile_id = sf->sff_profile_id; key.sf_profile_id = sf->sf_profile_id; key.vsys_id = sf->rule_vsys_id; sf_metrics_inc(thread_ctx->sf_metrics, &key, 0, 0, 1, nsend); } static inline void action_stee_bypass(marsio_buff_t *rx_buff, struct metadata *meta, struct selected_sf *sf, struct thread_ctx *thread_ctx) { struct thread_metrics *thread_metrics = &thread_ctx->thread_metrics; THROUGHPUT_METRICS_INC(&(thread_metrics->stee_bypass), 1, meta->raw_len); } static inline void action_stee_block(marsio_buff_t *rx_buff, struct metadata *meta, struct selected_sf *sf, struct thread_ctx *thread_ctx) { struct thread_metrics *thread_metrics = &thread_ctx->thread_metrics; struct packet_io *packet_io = thread_ctx->ref_io; int thread_index = thread_ctx->thread_index; THROUGHPUT_METRICS_INC(&(thread_metrics->stee_block), 1, meta->raw_len); marsio_buff_free(packet_io->instance, &rx_buff, 1, 0, thread_index); } static inline void action_stee_forward(struct session_ctx *session_ctx, marsio_buff_t *rx_buff, struct metadata *meta, struct selected_sf *sf, struct thread_ctx *thread_ctx) { struct thread_metrics *thread_metrics = &thread_ctx->thread_metrics; int nsend = send_packet_to_sf(session_ctx, rx_buff, meta, sf, thread_ctx); THROUGHPUT_METRICS_INC(&(thread_metrics->stee_tx), 1, meta->raw_len); THROUGHPUT_METRICS_INC(&sf->tx, 1, nsend); struct sf_metrics_key key = {0}; key.rule_id = sf->rule_id; key.sff_profile_id = sf->sff_profile_id; key.sf_profile_id = sf->sf_profile_id; key.vsys_id = sf->rule_vsys_id; sf_metrics_inc(thread_ctx->sf_metrics, &key, 0, 0, 1, nsend); } static void action_sf_chaining(struct thread_ctx *thread_ctx, struct session_ctx *session_ctx, struct selected_chaining *chaining, marsio_buff_t *rx_buff, struct metadata *meta, int next_sf_index) { int sf_index; for (sf_index = next_sf_index; sf_index < chaining->chaining_used; sf_index++) { struct selected_sf *sf = &(chaining->chaining[sf_index]); LOG_DEBUG("%s: session: %lu %s execute chaining [%d/%d]: policy %lu->%d->%d, action %s->%s->%s->%s", LOG_TAG_POLICY, session_ctx->session_id, session_ctx->session_addr, sf_index, chaining->chaining_used, sf->rule_id, sf->sff_profile_id, sf->sf_profile_id, (meta->is_decrypted ? "decrypted" : "raw"), (meta->direction ? "E2I" : "I2E"), forward_type_tostring(sf->sff_forward_type), action_desc_tostring(sf->sf_action_desc)); switch (sf->sf_action) { case SESSION_ACTION_BYPASS: if (sf->sff_forward_type == FORWARD_TYPE_STEERING) { action_stee_bypass(rx_buff, meta, sf, thread_ctx); continue; } else { action_mirr_bypass(rx_buff, meta, sf, thread_ctx); continue; } case SESSION_ACTION_BLOCK: if (sf->sff_forward_type == FORWARD_TYPE_STEERING) { action_stee_block(rx_buff, meta, sf, thread_ctx); return; } else { action_mirr_block(rx_buff, meta, sf, thread_ctx); action_nf_inject(rx_buff, meta, NULL, thread_ctx); return; } case SESSION_ACTION_FORWARD: if (sf->sf_connectivity.method != ENCAPSULATE_METHOD_VXLAN_G && sf->sf_connectivity.method != ENCAPSULATE_METHOD_LAYER2_SWITCH) { LOG_ERROR("%s: processing packets, session %lu %s requires encapsulation format not supported, bypass !!!", LOG_TAG_PKTIO, session_ctx->session_id, session_ctx->session_addr); action_err_bypass(rx_buff, meta, sf, thread_ctx); return; } if (sf->sff_forward_type == FORWARD_TYPE_STEERING) { action_stee_forward(session_ctx, rx_buff, meta, sf, thread_ctx); return; } else { action_mirr_forward(session_ctx, rx_buff, meta, sf, thread_ctx); continue; } } } if (sf_index == chaining->chaining_used) { action_nf_inject(rx_buff, meta, NULL, thread_ctx); } } /****************************************************************************** * handle session status ******************************************************************************/ static int send_ctrl_packet(struct session_ctx *session_ctx, struct thread_ctx *thread_ctx) { struct sce_ctx *sce_ctx = thread_ctx->ref_sce_ctx; struct packet_io *packet_io = thread_ctx->ref_io; struct mutable_array *rule_ids = &session_ctx->rule_ids; struct selected_chaining *chaining_raw = session_ctx->chaining_raw; struct selected_chaining *chaining_decrypted = session_ctx->chaining_decrypted; int thread_index = thread_ctx->thread_index; int sc_rsp_raw_exist = 0; int sc_rsp_decrypted_exist = 0; char *data; size_t size; mpack_writer_t writer; mpack_writer_init_growable(&writer, &data, &size); // write the example on the msgpack homepage mpack_build_map(&writer); // root begin // tsync mpack_write_cstr(&writer, "tsync"); mpack_write_cstr(&writer, "2.0"); // session_id mpack_write_cstr(&writer, "session_id"); mpack_write_u64(&writer, session_ctx->session_id); // state mpack_write_cstr(&writer, "state"); mpack_write_cstr(&writer, "active"); // method mpack_write_cstr(&writer, "method"); mpack_write_cstr(&writer, "log_update"); // params { mpack_write_cstr(&writer, "params"); mpack_build_map(&writer); // params value begin // sce { mpack_write_cstr(&writer, "sce"); mpack_build_map(&writer); // sce value begin { mpack_write_cstr(&writer, "sc_rule_list"); mpack_build_array(&writer); // sc_rule_list begin for (int i = 0; i < rule_ids->num; i++) { mpack_write_u64(&writer, mutable_array_index_elem(rule_ids, i)); } mpack_complete_array(&writer); // sc_rule_list end } { for (int i = 0; i < chaining_raw->chaining_used; i++) { struct selected_sf *sf = &(chaining_raw->chaining[i]); if (sf->sf_action == SESSION_ACTION_FORWARD) { if (sc_rsp_raw_exist == 0) { mpack_write_cstr(&writer, "sc_rsp_raw"); mpack_build_array(&writer); // sc_rsp_raw begin sc_rsp_raw_exist = 1; } mpack_write_u64(&writer, sf->sf_profile_id); } } if (sc_rsp_raw_exist == 1) { mpack_complete_array(&writer); // sc_rsp_raw end } } { for (int i = 0; i < chaining_decrypted->chaining_used; i++) { struct selected_sf *sf = &(chaining_decrypted->chaining[i]); if (sf->sf_action == SESSION_ACTION_FORWARD) { if (sc_rsp_decrypted_exist == 0) { mpack_write_cstr(&writer, "sc_rsp_decrypted"); mpack_build_array(&writer); // sc_rsp_decrypted begin sc_rsp_decrypted_exist = 1; } mpack_write_u64(&writer, sf->sf_profile_id); } } if (sc_rsp_decrypted_exist == 1) { mpack_complete_array(&writer); // sc_rsp_decrypted end } } mpack_complete_map(&writer); // sce value end } mpack_complete_map(&writer); // params value end } mpack_complete_map(&writer); // root end // finish writing if (mpack_writer_destroy(&writer) != mpack_ok) { assert(0); if (data) { free(data); data = NULL; } return 0; } LOG_INFO("%s: session %lu %s send event log %ld bytes", LOG_TAG_SFMETRICS, session_ctx->session_id, session_ctx->session_addr, size); marsio_buff_t *tx_buffs[1]; const char *packet_header_data = session_ctx->ctrl_pkt_hdr_ptr; int packet_header_len = session_ctx->ctrl_pkt_hdr_len; marsio_buff_malloc_global(packet_io->instance, tx_buffs, 1, 0, thread_index); char *dst = marsio_buff_append(tx_buffs[0], packet_header_len + size); memcpy(dst, packet_header_data, packet_header_len); memcpy(dst + packet_header_len, data, size); struct metadata meta = {0}; meta.session_id = session_ctx->session_id; meta.l7offset = packet_header_len; meta.is_ctrl_pkt = 1; meta.sids.num = 1; meta.sids.elems[0] = sce_ctx->firewall_sids; route_ctx_copy(&meta.route_ctx, &(session_ctx->ctrl_route_ctx)); mbuff_set_metadata(tx_buffs[0], &meta); int nsend = marsio_buff_datalen(tx_buffs[0]); marsio_send_burst(packet_io->dev_nf.mr_path, thread_index, tx_buffs, 1); free(data); return nsend; } static void send_event_log(struct session_ctx *session_ctx, struct thread_ctx *thread_ctx) { int nsend = 0; struct sce_ctx *sce_ctx = thread_ctx->ref_sce_ctx; struct thread_metrics *thread_metrics = &thread_ctx->thread_metrics; if (sce_ctx->enable_send_log) { nsend = send_ctrl_packet(session_ctx, thread_ctx); if (nsend > 0) { ATOMIC_INC(&(thread_metrics->session_log)); THROUGHPUT_METRICS_INC(&(thread_metrics->ctrl_tx), 1, nsend); THROUGHPUT_METRICS_INC(&(thread_metrics->nf_tx), 1, nsend); } } } static void dump_sf_metrics(struct session_ctx *session_ctx, struct selected_chaining *chaining) { if (chaining == NULL) { return; } for (int i = 0; i < chaining->chaining_used; i++) { struct selected_sf *sf = &(chaining->chaining[i]); LOG_INFO("%s: session %lu %s metrics: policy %lu->%d->%d action %s->%s->%s rx_pkts %lu rx_bytes %lu tx_pkts %lu tx_bytes %lu", LOG_TAG_SFMETRICS, session_ctx->session_id, session_ctx->session_addr, sf->rule_id, sf->sff_profile_id, sf->sf_profile_id, traffic_type_tostring(sf->traffic_type), forward_type_tostring(sf->sff_forward_type), action_desc_tostring(sf->sf_action_desc), sf->rx.n_pkts, sf->rx.n_bytes, sf->tx.n_pkts, sf->tx.n_bytes); } } static void session_value_free_cb(void *ctx) { struct session_ctx *s_ctx = (struct session_ctx *)ctx; session_ctx_free(s_ctx); } static void handle_policy_mutil_hits(struct session_ctx *session_ctx, struct control_packet *ctrl_pkt, packet *data_pkt, int direction, struct thread_ctx *thread_ctx) { struct policy_enforcer *enforcer = thread_ctx->ref_enforcer; struct sce_ctx *sce_ctx = thread_ctx->ref_sce_ctx; for (int i = 0; i < ctrl_pkt->rule_id_num; i++) { uint64_t rule_id = ctrl_pkt->rule_ids[i]; if (mutable_array_exist_elem(&session_ctx->rule_ids, rule_id)) { continue; } else { policy_enforce_select_chainings(enforcer, session_ctx, data_pkt, rule_id, direction); if (sce_ctx->enable_debug) { selected_chaining_bref(session_ctx->chaining_raw); selected_chaining_bref(session_ctx->chaining_decrypted); } mutable_array_add_elem(&session_ctx->rule_ids, rule_id); } } } static void handle_session_closing(struct metadata *meta, struct control_packet *ctrl_pkt, struct thread_ctx *thread_ctx) { struct thread_metrics *thread_metrics = &thread_ctx->thread_metrics; struct session_table *session_table = thread_ctx->session_table; struct session_ctx *s_ctx = (struct session_ctx *)session_table_search_by_id(session_table, meta->session_id); if (s_ctx) { LOG_INFO("%s: session %lu %s closing", LOG_TAG_PKTIO, s_ctx->session_id, s_ctx->session_addr); dump_sf_metrics(s_ctx, s_ctx->chaining_raw); dump_sf_metrics(s_ctx, s_ctx->chaining_decrypted); session_table_delete_by_id(session_table, meta->session_id); ATOMIC_DEC(&(thread_metrics->session_num)); } } static void handle_session_active(struct metadata *meta, struct control_packet *ctrl_pkt, struct thread_ctx *thread_ctx) { struct session_table *session_table = thread_ctx->session_table; struct thread_metrics *thread_metrics = &thread_ctx->thread_metrics; struct policy_enforcer *enforcer = thread_ctx->ref_enforcer; struct sce_ctx *sce_ctx = thread_ctx->ref_sce_ctx; struct session_ctx *session_ctx = (struct session_ctx *)session_table_search_by_id(session_table, meta->session_id); if (session_ctx) { struct packet data_pkt; const char *payload = packet_parse(&data_pkt, meta->raw_data, meta->raw_len); uint16_t real_offset = payload - meta->raw_data; if (real_offset != meta->l7offset) { LOG_ERROR("%s: incorrect dataoffset %d in the control zone of session %lu %s, the expect value is %d", LOG_TAG_PKTIO, meta->l7offset, meta->session_id, session_ctx->session_addr, real_offset); } LOG_INFO("%s: session %lu %s active again", LOG_TAG_PKTIO, session_ctx->session_id, session_ctx->session_addr); handle_policy_mutil_hits(session_ctx, ctrl_pkt, &data_pkt, meta->direction, thread_ctx); send_event_log(session_ctx, thread_ctx); } else { struct packet data_pkt; struct four_tuple inner_tuple4; const char *payload = packet_parse(&data_pkt, meta->raw_data, meta->raw_len); sce_packet_get_innermost_tuple(&data_pkt, &inner_tuple4); uint16_t real_offset = payload - meta->raw_data; if (real_offset != meta->l7offset) { char *addr_str = four_tuple_tostring(&inner_tuple4); LOG_ERROR("%s: incorrect dataoffset %d in the control zone of session %lu %s, the expect value is %d", LOG_TAG_PKTIO, meta->l7offset, meta->session_id, addr_str, real_offset); free(addr_str); } int chaining_size = policy_enforce_chaining_size(enforcer); struct session_ctx *session_ctx = session_ctx_new(); session_ctx->session_id = meta->session_id; session_ctx->session_addr = sce_ctx->enable_debug ? four_tuple_tostring(&inner_tuple4) : NULL; session_ctx->rehash_index = meta->rehash_index; session_ctx->vxlan_src_port = calculate_vxlan_source_port(&inner_tuple4); four_tuple_copy(&session_ctx->inner_tuple4, &inner_tuple4); route_ctx_copy(&session_ctx->ctrl_route_ctx, &meta->route_ctx); session_ctx->ctrl_pkt_hdr_ptr = memdup(meta->raw_data, meta->raw_len); session_ctx->ctrl_pkt_hdr_len = meta->raw_len; session_ctx->chaining_raw = selected_chaining_create(chaining_size, session_ctx->session_id, session_ctx->session_addr); session_ctx->chaining_decrypted = selected_chaining_create(chaining_size, session_ctx->session_id, session_ctx->session_addr); session_ctx->ref_thread_ctx = thread_ctx; LOG_INFO("%s: session %lu %s active first", LOG_TAG_PKTIO, session_ctx->session_id, session_ctx->session_addr); handle_policy_mutil_hits(session_ctx, ctrl_pkt, &data_pkt, meta->direction, thread_ctx); send_event_log(session_ctx, thread_ctx); session_table_insert(session_table, session_ctx->session_id, &session_ctx->inner_tuple4, session_ctx, session_value_free_cb); ATOMIC_INC(&(thread_metrics->session_num)); } } static void handle_session_resetall(struct metadata *meta, struct control_packet *ctrl_pkt, struct thread_ctx *thread_ctx) { struct global_metrics *global_metrics = thread_ctx->ref_global_metrics; struct sce_ctx *sce_ctx = thread_ctx->ref_sce_ctx; LOG_ERROR("%s: session %lu resetall: notification clears all session tables !!!", LOG_TAG_PKTIO, meta->session_id); ATOMIC_ZERO(&(global_metrics->sum.session_num)); for (int i = 0; i < sce_ctx->nr_worker_threads; i++) { struct thread_ctx *temp_ctx = &sce_ctx->work_threads[i]; ATOMIC_INC(&temp_ctx->session_table_need_reset); } } /****************************************************************************** * handle control/raw/inject packet ******************************************************************************/ static void handle_control_packet(marsio_buff_t *rx_buff, struct thread_ctx *thread_ctx, int raw_len) { struct thread_metrics *thread_metrics = &thread_ctx->thread_metrics; struct metadata meta; struct control_packet ctrl_pkt; memset(&meta, 0, sizeof(struct metadata)); meta.is_ctrl_pkt = 1; meta.raw_len = raw_len; meta.raw_data = marsio_buff_mtod(rx_buff); if (mbuff_get_metadata(rx_buff, &meta) == -1) { LOG_ERROR("%s: unexpected control packet, unable to get metadata", LOG_TAG_PKTIO); goto error_ctrl_pkt; } if (control_packet_parse(&ctrl_pkt, meta.raw_data + meta.l7offset, meta.raw_len - meta.l7offset) == -1) { LOG_ERROR("%s: unexpected control packet, unable to parse data", LOG_TAG_PKTIO); goto error_ctrl_pkt; } if (ctrl_pkt.session_id != meta.session_id) { LOG_ERROR("%s: unexpected control packet, metadata's session %lu != control packet's session %lu", LOG_TAG_PKTIO, meta.session_id, ctrl_pkt.session_id); goto error_ctrl_pkt; } switch (ctrl_pkt.state) { case SESSION_STATE_OPENING: ATOMIC_INC(&(thread_metrics->ctrl_opening)); // when session opening, firewall not send policy id // return handle_session_opening(&meta, &ctrl_pkt, ctx); break; case SESSION_STATE_CLOSING: ATOMIC_INC(&(thread_metrics->ctrl_closing)); handle_session_closing(&meta, &ctrl_pkt, thread_ctx); break; case SESSION_STATE_ACTIVE: ATOMIC_INC(&(thread_metrics->ctrl_active)); handle_session_active(&meta, &ctrl_pkt, thread_ctx); break; case SESSION_STATE_RESETALL: ATOMIC_INC(&(thread_metrics->ctrl_resetall)); handle_session_resetall(&meta, &ctrl_pkt, thread_ctx); break; default: goto error_ctrl_pkt; } return; error_ctrl_pkt: ATOMIC_INC(&(thread_metrics->ctrl_error)); return; } static void handle_data_packet(marsio_buff_t *rx_buff, struct thread_ctx *thread_ctx, int raw_len) { struct session_table *session_table = thread_ctx->session_table; struct thread_metrics *thread_metrics = &thread_ctx->thread_metrics; struct metadata meta; struct session_ctx *session_ctx = NULL; struct selected_chaining *chaining = NULL; memset(&meta, 0, sizeof(struct metadata)); meta.is_ctrl_pkt = 0; meta.raw_len = raw_len; meta.raw_data = marsio_buff_mtod(rx_buff); if (mbuff_get_metadata(rx_buff, &meta) == -1) { LOG_ERROR("%s: unexpected raw packet, unable to get metadata, bypass !!!", LOG_TAG_PKTIO); goto error_bypass; } // bypass_traffic:0 disable // bypass_traffic:1 bypass all traffic // bypass_traffic:2 bypass raw traffic // bypass_traffic:3 bypass decrypted traffic if (unlikely(thread_ctx->ref_io->config.bypass_traffic == 2 && meta.is_decrypted == 0)) { LOG_DEBUG("%s: session %lu bypass, enable raw traffic bypass !!!", LOG_TAG_PKTIO, meta.session_id); goto error_bypass; } if (unlikely(thread_ctx->ref_io->config.bypass_traffic == 3 && meta.is_decrypted == 1)) { LOG_DEBUG("%s: session %lu bypass, enable decrypted traffic bypass !!!", LOG_TAG_PKTIO, meta.session_id); goto error_bypass; } session_ctx = data_packet_search_session(session_table, meta.raw_data, meta.raw_len, meta.session_id); if (session_ctx == NULL) { THROUGHPUT_METRICS_INC(&(thread_metrics->miss_sess), 1, meta.raw_len); goto error_bypass; } update_session_by_metadata(session_ctx, &meta); if (meta.is_decrypted == 1) { chaining = session_ctx->chaining_decrypted; } else { chaining = session_ctx->chaining_raw; } if (chaining == NULL) { LOG_ERROR("%s: unexpected raw packet, session %lu %s misses policy, bypass !!!", LOG_TAG_PKTIO, session_ctx->session_id, session_ctx->session_addr); goto error_bypass; } action_sf_chaining(thread_ctx, session_ctx, chaining, rx_buff, &meta, 0); return; error_bypass: action_err_bypass(rx_buff, &meta, NULL, thread_ctx); } static void handle_inject_vxlan_packet(marsio_buff_t *rx_buff, struct thread_ctx *thread_ctx, int raw_len) { struct session_table *session_table = thread_ctx->session_table; struct thread_metrics *thread_metrics = &thread_ctx->thread_metrics; struct metadata meta; struct vxlan_hdr *vxlan_hdr = NULL; struct session_ctx *session_ctx = NULL; struct selected_chaining *chaining = NULL; memset(&meta, 0, sizeof(struct metadata)); int sf_index = 0; char *raw_data = marsio_buff_mtod(rx_buff); if (vxlan_frame_decode(&vxlan_hdr, raw_data, raw_len) == -1) { THROUGHPUT_METRICS_INC(&(thread_metrics->endpoint_vxlan_drop), 1, raw_len); meta.raw_len = raw_len; action_err_block(rx_buff, &meta, NULL, thread_ctx); return; } meta.raw_data = (char *)vxlan_hdr + sizeof(struct vxlan_hdr); meta.raw_len = raw_len - VXLAN_FRAME_HDR_LEN; meta.l7offset = 0; meta.is_ctrl_pkt = 0; sf_index = vxlan_get_opt(vxlan_hdr, VNI_OPT_SFINDEX); meta.direction = vxlan_get_opt(vxlan_hdr, VNI_OPT_DIR); meta.is_decrypted = vxlan_get_opt(vxlan_hdr, VNI_OPT_TRAFFIC); session_ctx = inject_packet_search_session(session_table, meta.raw_data, meta.raw_len); if (session_ctx == NULL) { goto error_block; } update_metadata_by_session(session_ctx, &meta); if (meta.is_decrypted == 1) { chaining = session_ctx->chaining_decrypted; } else { chaining = session_ctx->chaining_raw; } if (chaining == NULL || sf_index < 0 || sf_index >= chaining->chaining_used) { LOG_ERROR("%s: unexpected inject packet, session %lu %s misses chaining index, drop !!!", LOG_TAG_PKTIO, session_ctx->session_id, session_ctx->session_addr); goto error_block; } if (chaining->chaining[sf_index].sff_forward_type == FORWARD_TYPE_MIRRORING) { LOG_DEBUG("%s: unexpected inject packet, session %lu %s with sf_profile_id %d executes mirror and does not require reflow, drop !!!", LOG_TAG_PKTIO, session_ctx->session_id, session_ctx->session_addr, chaining->chaining[sf_index].sf_profile_id); THROUGHPUT_METRICS_INC(&(thread_metrics->mirr_rx_drop), 1, meta.raw_len); goto error_block; } else { struct selected_sf *sf = &(chaining->chaining[sf_index]); THROUGHPUT_METRICS_INC(&sf->rx, 1, raw_len); THROUGHPUT_METRICS_INC(&(thread_metrics->stee_rx), 1, meta.raw_len); struct sf_metrics_key key = {0}; key.rule_id = sf->rule_id; key.sff_profile_id = sf->sff_profile_id; key.sf_profile_id = sf->sf_profile_id; key.vsys_id = sf->rule_vsys_id; sf_metrics_inc(thread_ctx->sf_metrics, &key, 1, raw_len, 0, 0); } marsio_buff_adj(rx_buff, raw_len - meta.raw_len); action_sf_chaining(thread_ctx, session_ctx, chaining, rx_buff, &meta, sf_index + 1); return; error_block: THROUGHPUT_METRICS_INC(&(thread_metrics->endpoint_vxlan_drop), 1, raw_len); marsio_buff_adj(rx_buff, raw_len - meta.raw_len); action_err_block(rx_buff, &meta, NULL, thread_ctx); } /****************************************************************************** * packet io ******************************************************************************/ // return 0 : success // return -1 : error static int packet_io_config(const char *profile, struct config *config) { // bypass_traffic:0 disable // bypass_traffic:1 bypass all traffic // bypass_traffic:2 bypass raw traffic // bypass_traffic:3 bypass decrypted traffic MESA_load_profile_int_def(profile, "PACKET_IO", "bypass_traffic", (int *)&(config->bypass_traffic), 0); MESA_load_profile_int_def(profile, "PACKET_IO", "rx_burst_max", (int *)&(config->rx_burst_max), 1); MESA_load_profile_int_def(profile, "PACKET_IO", "min_timeout_ms", (int *)&(config->min_timeout_ms), 900); MESA_load_profile_string_nodef(profile, "PACKET_IO", "app_symbol", config->app_symbol, sizeof(config->app_symbol)); MESA_load_profile_string_nodef(profile, "PACKET_IO", "dev_nf_name", config->dev_nf_name, sizeof(config->dev_nf_name)); MESA_load_profile_string_nodef(profile, "PACKET_IO", "dev_endpoint_l3_name", config->dev_endpoint_l3_name, sizeof(config->dev_endpoint_l3_name)); MESA_load_profile_string_nodef(profile, "PACKET_IO", "dev_endpoint_l3_ip", config->dev_endpoint_l3_ip_str, sizeof(config->dev_endpoint_l3_ip_str)); MESA_load_profile_string_nodef(profile, "PACKET_IO", "dev_endpoint_l3_mac", config->dev_endpoint_l3_mac_str, sizeof(config->dev_endpoint_l3_mac_str)); MESA_load_profile_string_nodef(profile, "PACKET_IO", "dev_endpoint_l2_name", config->dev_endpoint_l2_name, sizeof(config->dev_endpoint_l2_name)); MESA_load_profile_int_def(profile, "PACKET_IO", "vlan_encapsulate_replace_orig_vlan_header", (int *)&(config->vlan_encapsulate_replace_orig_vlan_header), 0); if (config->rx_burst_max > RX_BURST_MAX) { LOG_ERROR("%s: invalid rx_burst_max, exceeds limit %d", LOG_TAG_PKTIO, RX_BURST_MAX); return -1; } if (strlen(config->app_symbol) == 0) { LOG_ERROR("%s: invalid app_symbol in %s", LOG_TAG_PKTIO, profile); return -1; } if (strlen(config->dev_nf_name) == 0) { LOG_ERROR("%s: invalid dev_nf_name in %s", LOG_TAG_PKTIO, profile); return -1; } if (strlen(config->dev_endpoint_l3_name) == 0) { LOG_ERROR("%s: invalid dev_endpoint_l3_name in %s", LOG_TAG_PKTIO, profile); return -1; } if (strlen(config->dev_endpoint_l2_name) == 0) { LOG_ERROR("%s: invalid dev_endpoint_l2_name in %s", LOG_TAG_PKTIO, profile); return -1; } LOG_DEBUG("%s: PACKET_IO->bypass_traffic : %d", LOG_TAG_PKTIO, config->bypass_traffic); LOG_DEBUG("%s: PACKET_IO->rx_burst_max : %d", LOG_TAG_PKTIO, config->rx_burst_max); LOG_DEBUG("%s: PACKET_IO->min_timeout_ms : %d", LOG_TAG_PKTIO, config->min_timeout_ms); LOG_DEBUG("%s: PACKET_IO->app_symbol : %s", LOG_TAG_PKTIO, config->app_symbol); LOG_DEBUG("%s: PACKET_IO->dev_nf_name : %s", LOG_TAG_PKTIO, config->dev_nf_name); LOG_DEBUG("%s: PACKET_IO->dev_endpoint_l3_name : %s", LOG_TAG_PKTIO, config->dev_endpoint_l3_name); LOG_DEBUG("%s: PACKET_IO->dev_endpoint_l3_ip : %s", LOG_TAG_PKTIO, config->dev_endpoint_l3_ip_str); LOG_DEBUG("%s: PACKET_IO->dev_endpoint_l2_name : %s", LOG_TAG_PKTIO, config->dev_endpoint_l2_name); LOG_DEBUG("%s: PACKET_IO->vlan_encapsulate_replace_orig_vlan_header : %d", LOG_TAG_PKTIO, config->vlan_encapsulate_replace_orig_vlan_header); return 0; } struct packet_io *packet_io_create(const char *profile, int thread_num, cpu_set_t *coremask) { int opt = 1; struct packet_io *handle = (struct packet_io *)calloc(1, sizeof(struct packet_io)); assert(handle != NULL); handle->thread_num = thread_num; if (packet_io_config(profile, &(handle->config)) != 0) { goto error_out; } handle->instance = marsio_create(); if (handle->instance == NULL) { LOG_ERROR("%s: unable to create marsio instance", LOG_TAG_PKTIO); goto error_out; } if (marsio_option_set(handle->instance, MARSIO_OPT_THREAD_MASK_IN_CPUSET, coremask, sizeof(cpu_set_t)) != 0) { LOG_ERROR("%s: unable to set MARSIO_OPT_EXIT_WHEN_ERR option for marsio instance", LOG_TAG_PKTIO); goto error_out; } if (marsio_option_set(handle->instance, MARSIO_OPT_EXIT_WHEN_ERR, &opt, sizeof(opt)) != 0) { LOG_ERROR("%s: unable to set MARSIO_OPT_EXIT_WHEN_ERR option for marsio instance", LOG_TAG_PKTIO); goto error_out; } if (marsio_init(handle->instance, handle->config.app_symbol) != 0) { LOG_ERROR("%s: unable to initialize marsio instance", LOG_TAG_PKTIO); goto error_out; } handle->dev_nf.mr_dev = marsio_open_device(handle->instance, handle->config.dev_nf_name, handle->thread_num, handle->thread_num); if (handle->dev_nf.mr_dev == NULL) { LOG_ERROR("%s: unable to open device %s", LOG_TAG_PKTIO, handle->config.dev_nf_name); goto error_out; } handle->dev_nf.mr_path = marsio_sendpath_create_by_vdev(handle->dev_nf.mr_dev); if (handle->dev_nf.mr_path == NULL) { LOG_ERROR("%s: unable to create sendpath for device %s", LOG_TAG_PKTIO, handle->config.dev_nf_name); goto error_out; } handle->dev_endpoint_l3.mr_dev = marsio_open_device(handle->instance, handle->config.dev_endpoint_l3_name, handle->thread_num, handle->thread_num); if (handle->dev_endpoint_l3.mr_dev == NULL) { LOG_ERROR("%s: unable to open device %s", LOG_TAG_PKTIO, handle->config.dev_endpoint_l3_name); goto error_out; } handle->dev_endpoint_l3.mr_path = marsio_sendpath_create_by_vdev(handle->dev_endpoint_l3.mr_dev); if (handle->dev_endpoint_l3.mr_path == NULL) { LOG_ERROR("%s: unable to create sendpath for device %s", LOG_TAG_PKTIO, handle->config.dev_endpoint_l3_name); goto error_out; } if (strlen(handle->config.dev_endpoint_l3_mac_str) == 0) { marsio_get_device_ether_addr(handle->dev_endpoint_l3.mr_dev, handle->config.dev_endpoint_l3_mac_str, sizeof(handle->config.dev_endpoint_l3_mac_str)); LOG_DEBUG("%s: PACKET_IO->dev_endpoint_l3_mac : %s (get from marsio api)", LOG_TAG_PKTIO, handle->config.dev_endpoint_l3_mac_str); } str_to_mac(handle->config.dev_endpoint_l3_mac_str, handle->config.dev_endpoint_l3_mac); handle->config.dev_endpoint_l3_ip = inet_addr(handle->config.dev_endpoint_l3_ip_str); handle->dev_endpoint_l2.mr_dev = marsio_open_device(handle->instance, handle->config.dev_endpoint_l2_name, handle->thread_num, handle->thread_num); if (handle->dev_endpoint_l2.mr_dev == NULL) { LOG_ERROR("%s: unable to open device %s", LOG_TAG_PKTIO, handle->config.dev_endpoint_l2_name); goto error_out; } handle->dev_endpoint_l2.mr_path = marsio_sendpath_create_by_vdev(handle->dev_endpoint_l2.mr_dev); if (handle->dev_endpoint_l2.mr_path == NULL) { LOG_ERROR("%s: unable to create sendpath for device %s", LOG_TAG_PKTIO, handle->config.dev_endpoint_l2_name); goto error_out; } return handle; error_out: packet_io_destory(handle); return NULL; } void packet_io_destory(struct packet_io *handle) { if (handle) { if (handle->dev_nf.mr_path) { marsio_sendpath_destory(handle->dev_nf.mr_path); handle->dev_nf.mr_path = NULL; } if (handle->dev_nf.mr_dev) { marsio_close_device(handle->dev_nf.mr_dev); handle->dev_nf.mr_dev = NULL; } if (handle->dev_endpoint_l3.mr_path) { marsio_sendpath_destory(handle->dev_endpoint_l3.mr_path); handle->dev_endpoint_l3.mr_path = NULL; } if (handle->dev_endpoint_l3.mr_dev) { marsio_close_device(handle->dev_endpoint_l3.mr_dev); handle->dev_endpoint_l3.mr_dev = NULL; } if (handle->dev_endpoint_l2.mr_path) { marsio_sendpath_destory(handle->dev_endpoint_l2.mr_path); handle->dev_endpoint_l2.mr_path = NULL; } if (handle->dev_endpoint_l2.mr_dev) { marsio_close_device(handle->dev_endpoint_l2.mr_dev); handle->dev_endpoint_l2.mr_dev = NULL; } if (handle->instance) { marsio_destory(handle->instance); handle->instance = NULL; } free(handle); handle = NULL; } } int packet_io_init(struct packet_io *handle, struct thread_ctx *thread_ctx) { if (marsio_thread_init(handle->instance) != 0) { LOG_ERROR("%s: unable to init marsio thread %d", LOG_TAG_PKTIO, thread_ctx->thread_index); return -1; } return 0; } void packet_io_wait(struct packet_io *handle, struct thread_ctx *thread_ctx, int timeout_ms) { struct mr_vdev *vdevs[3] = { handle->dev_nf.mr_dev, handle->dev_endpoint_l3.mr_dev, handle->dev_endpoint_l2.mr_dev, }; int min_timeout_ms = MIN(handle->config.min_timeout_ms, timeout_ms); if (min_timeout_ms > 0) { marsio_poll_wait(handle->instance, vdevs, 3, thread_ctx->thread_index, min_timeout_ms); } else { return; } } int packet_io_polling_nf(struct packet_io *handle, struct thread_ctx *thread_ctx) { struct thread_metrics *thread_metrics = &thread_ctx->thread_metrics; int thread_index = thread_ctx->thread_index; static __thread marsio_buff_t *rx_buffs[RX_BURST_MAX]; int nr_recv = marsio_recv_burst(handle->dev_nf.mr_dev, thread_index, rx_buffs, handle->config.rx_burst_max); if (nr_recv <= 0) { return 0; } if (handle->config.bypass_traffic == 1) { for (int j = 0; j < nr_recv; j++) { int raw_len = marsio_buff_datalen(rx_buffs[j]); THROUGHPUT_METRICS_INC(&(thread_metrics->nf_rx), 1, raw_len); THROUGHPUT_METRICS_INC(&(thread_metrics->nf_tx), 1, raw_len); } marsio_send_burst(handle->dev_nf.mr_path, thread_index, rx_buffs, nr_recv); return nr_recv; } for (int j = 0; j < nr_recv; j++) { marsio_buff_t *rx_buff = rx_buffs[j]; int raw_len = marsio_buff_datalen(rx_buff); if (is_downlink_keepalive_packet(rx_buff, raw_len)) { THROUGHPUT_METRICS_INC(&(thread_metrics->nf_rx), 1, raw_len); THROUGHPUT_METRICS_INC(&(thread_metrics->nf_tx), 1, raw_len); THROUGHPUT_METRICS_INC(&(thread_metrics->downlink_rx), 1, raw_len); THROUGHPUT_METRICS_INC(&(thread_metrics->downlink_tx), 1, raw_len); marsio_send_burst(handle->dev_nf.mr_path, thread_index, &rx_buff, 1); } else if (marsio_buff_is_ctrlbuf(rx_buff)) { THROUGHPUT_METRICS_INC(&(thread_metrics->nf_rx), 1, raw_len); THROUGHPUT_METRICS_INC(&(thread_metrics->nf_tx), 1, raw_len); THROUGHPUT_METRICS_INC(&(thread_metrics->ctrl_rx), 1, raw_len); THROUGHPUT_METRICS_INC(&(thread_metrics->ctrl_tx), 1, raw_len); handle_control_packet(rx_buff, thread_ctx, raw_len); marsio_send_burst(handle->dev_nf.mr_path, thread_index, &rx_buff, 1); } else { THROUGHPUT_METRICS_INC(&(thread_metrics->nf_rx), 1, raw_len); handle_data_packet(rx_buff, thread_ctx, raw_len); } } return nr_recv; } int packet_io_polling_endpoint_l3(struct packet_io *handle, struct thread_ctx *thread_ctx) { struct thread_metrics *thread_metrics = &thread_ctx->thread_metrics; int thread_index = thread_ctx->thread_index; static __thread marsio_buff_t *rx_buffs[RX_BURST_MAX]; int nr_recv = marsio_recv_burst(handle->dev_endpoint_l3.mr_dev, thread_index, rx_buffs, handle->config.rx_burst_max); if (nr_recv <= 0) { return 0; } for (int j = 0; j < nr_recv; j++) { marsio_buff_t *rx_buff = rx_buffs[j]; int raw_len = marsio_buff_datalen(rx_buff); if (is_uplink_keepalive_packet(rx_buff, raw_len)) { THROUGHPUT_METRICS_INC(&(thread_metrics->endpoint_vxlan_rx), 1, raw_len); THROUGHPUT_METRICS_INC(&(thread_metrics->uplink_rx), 1, raw_len); THROUGHPUT_METRICS_INC(&(thread_metrics->uplink_tx_drop), 1, raw_len); marsio_buff_free(handle->instance, &rx_buff, 1, 0, thread_index); } else { THROUGHPUT_METRICS_INC(&(thread_metrics->endpoint_vxlan_rx), 1, raw_len); handle_inject_vxlan_packet(rx_buff, thread_ctx, raw_len); } } return nr_recv; } int packet_io_polling_endpoint_l2(struct packet_io *handle, struct thread_ctx *thread_ctx) { struct thread_metrics *thread_metrics = &thread_ctx->thread_metrics; int thread_index = thread_ctx->thread_index; static __thread marsio_buff_t *rx_buffs[RX_BURST_MAX]; int nr_recv = marsio_recv_burst(handle->dev_endpoint_l2.mr_dev, thread_index, rx_buffs, handle->config.rx_burst_max); if (nr_recv <= 0) { return 0; } for (int j = 0; j < nr_recv; j++) { marsio_buff_t *rx_buff = rx_buffs[j]; int raw_len = marsio_buff_datalen(rx_buffs[j]); THROUGHPUT_METRICS_INC(&(thread_metrics->endpoint_vlan_rx), 1, raw_len); THROUGHPUT_METRICS_INC(&(thread_metrics->endpoint_vlan_drop), 1, raw_len); marsio_buff_free(handle->instance, &rx_buff, 1, 0, thread_index); } return nr_recv; } struct mr_instance *packet_io_get_mr_instance(struct packet_io *handle) { if (handle) { return handle->instance; } else { return NULL; } }