diff --git a/CMakeLists.txt b/CMakeLists.txt index b467b2d..458de43 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,14 @@ add_definitions(-D_GNU_SOURCE) set(CMAKE_CXX_STANDARD 11) set(CMAKE_C_STANDARD 11) +find_package(LIBBPF) +if (SUPPORT_BPF) + add_definitions(-DSUPPORT_BPF=1) + message(STATUS "Support BPF") +else() + message(STATUS "Not support BPF") +endif() + if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE RelWithDebInfo) endif() diff --git a/cache/src/tango_cache_client.cpp b/cache/src/tango_cache_client.cpp index cdfb1bc..80a462d 100644 --- a/cache/src/tango_cache_client.cpp +++ b/cache/src/tango_cache_client.cpp @@ -47,7 +47,7 @@ void caculate_sha256(const char *data, unsigned long len, char *result, u_int32_ SHA256_Update(&c, data, len); SHA256_Final(sha256, &c); - length = (size > 64)?32:(size-1)/2; //Ԥһռ + length = (size > 64)?32:(size-1)/2; //Ԥ��һ���ռ� for(u_int32_t i=0; ibuff[estr->len]='\0'; } -//callback: ǷûصҪΪֱӵAPIʱʧܣٵûصֵͨж +//callback: �Ƿ���ûص���������ҪΪ���ֱ�ӵ���APIʱʧ�ܣ����ٵ��ûص�������ͨ������ֵ�ж� void tango_cache_ctx_destroy(struct tango_cache_ctx *ctx, bool callback) { struct multipart_etag_list *etag; @@ -255,7 +255,7 @@ void tango_cache_ctx_destroy(struct tango_cache_ctx *ctx, bool callback) free(ctx); } -//жsessionǷ񳬹ƣȡʼжwhere_to_getǷȫMINIO +//�ж�session�Ƿ񳬹����ƣ�����ȡ��ʼ���ж�where_to_get�Ƿ�ȫ����MINIO�� bool sessions_exceeds_limit(struct tango_cache_instance *instance, enum OBJECT_LOCATION where_to_get) { if(where_to_get == OBJECT_IN_HOS) @@ -269,7 +269,7 @@ bool sessions_exceeds_limit(struct tango_cache_instance *instance, enum OBJECT_L } -//ϴAPIʹctxevbuffer޷ctxȡ +//�����ϴ�API��ʹ��ctx��evbuffer�������޷�����ctx��ȡ���� enum OBJECT_LOCATION tango_cache_object_locate(struct tango_cache_instance *instance, size_t object_size) { if(instance->param->fsstatid_trig) @@ -340,7 +340,7 @@ int tango_cache_update_frag_data(struct tango_cache_ctx *ctx, const char *data, { if(ctx->fail_state) { - return 0; //TODO: ʱԷֵ!! + return 0; //TODO: ��ʱ���Է���ֵ!! } if(evbuffer_add(ctx->put.evbuf, data, size)) { @@ -362,7 +362,7 @@ int tango_cache_update_frag_evbuf(struct tango_cache_ctx *ctx, enum EVBUFFER_COP if(ctx->fail_state) { - return 0;//TODO: ʱԷֵ!! + return 0;//TODO: ��ʱ���Է���ֵ!! } size = evbuffer_get_length(evbuf); @@ -424,7 +424,7 @@ struct tango_cache_ctx *tango_cache_update_prepare(struct tango_cache_instance * { snprintf(ctx->object_key, 256, "%s/%c%c_%c%c_%s", instance->param->bucketname, buffer[0], buffer[1], buffer[2], buffer[3], buffer+4); } - //ԭʼURL + //����ԭʼURL snprintf(buffer, 2064, "x-amz-meta-url: %s", meta->url); ctx->headers = curl_slist_append(ctx->headers, buffer); } @@ -441,7 +441,7 @@ struct tango_cache_ctx *tango_cache_update_prepare(struct tango_cache_instance * return NULL; } - //ExpiresֶΣڻڲжǷʱ + //Expires�ֶΣ����ڻ����ڲ��ж������Ƿ�ʱ now = time(NULL); expires = (meta->put.timeout==0||meta->put.timeout>instance->param->relative_ttl)?instance->param->relative_ttl:meta->put.timeout; if(expires_timestamp2hdr_str(now + expires, buffer, 256)) @@ -449,7 +449,7 @@ struct tango_cache_ctx *tango_cache_update_prepare(struct tango_cache_instance * ctx->headers = curl_slist_append(ctx->headers, buffer); } ctx->put.object_ttl = expires; - //Last-ModifyֶΣGETʱжǷ + //Last-Modify�ֶΣ�����GETʱ�ж��Ƿ����� last_modify = (meta->put.date > meta->put.last_modified)?meta->put.date:meta->put.last_modified; if(last_modify == 0) { @@ -457,7 +457,7 @@ struct tango_cache_ctx *tango_cache_update_prepare(struct tango_cache_instance * } sprintf(buffer, "x-amz-meta-lm: %lu", last_modify); ctx->headers = curl_slist_append(ctx->headers, buffer); - //бֵ֧ı׼ͷ + //�б���֧�ֵı�׼ͷ�� for(int i=0; istd_hdr[i] != NULL) @@ -478,11 +478,11 @@ struct tango_cache_ctx *tango_cache_update_prepare(struct tango_cache_instance * easy_string_savedata(&hdr_estr, "Content-Type: application/octet-stream\r\n", strlen("Content-Type: application/octet-stream\r\n")); } } - ctx->headers = curl_slist_append(ctx->headers, "Expect:");//עPOSTExpectϵҪȷCURLOPT_POSTFIELDSIZE - //ͷGETʱԭ + ctx->headers = curl_slist_append(ctx->headers, "Expect:");//ע��POST������Expect��ϵ��Ҫ��ȷ����CURLOPT_POSTFIELDSIZE + //���������ͷ����GETʱ��ԭ������ if(meta->usertag_len>0 && meta->usertag_len<=USER_TAG_MAX_LEN) { - user_tag = (char *)malloc((meta->usertag_len/3 + 1)*4 + 18); //ռ䣻18=17+1: ͷ+ַ + user_tag = (char *)malloc((meta->usertag_len/3 + 1)*4 + 18); //������������ռ䣻18=17+1: ͷ��+�ַ��������� memcpy(user_tag, "x-amz-meta-user: ", 17); user_tag_value = user_tag+17; Base64_EncodeBlock((const unsigned char*)meta->usertag, meta->usertag_len, (unsigned char*)user_tag_value); @@ -535,7 +535,7 @@ struct tango_cache_ctx *tango_cache_update_start(struct tango_cache_instance *in return ctx; } -//һϴʱֱӶλϴλ +//һ�����ϴ�ʱ��ֱ�Ӷ�λ�����ϴ���λ�� struct tango_cache_ctx *tango_cache_update_once_prepare(struct tango_cache_instance *instance, struct future* f, struct tango_cache_meta_put *meta, size_t object_size, char *path, size_t pathsize) { @@ -600,7 +600,7 @@ struct tango_cache_ctx *tango_cache_fetch_prepare(struct tango_cache_instance *i ctx = (struct tango_cache_ctx *)calloc(1, sizeof(struct tango_cache_ctx)); ctx->instance = instance; ctx->promise = future_to_promise(f); - promise_allow_many_successes(ctx->promise); //λصʱpromise_finish + promise_allow_many_successes(ctx->promise); //��λص��������ʱ����promise_finish ctx->method = method; ctx->get.state = GET_STATE_START; ctx->get.max_age = meta->get.max_age; @@ -647,7 +647,7 @@ int tango_cache_head_object(struct tango_cache_instance *instance, struct future struct tango_cache_ctx *ctx; enum OBJECT_LOCATION location; - //RedisԪϢ洢Redis + //���������Redis����Ԫ��Ϣ�洢��Redis�� location = (instance->param->object_store_way != CACHE_ALL_HOS)?OBJECT_IN_REDIS:OBJECT_IN_HOS; ctx = tango_cache_fetch_prepare(instance, CACHE_REQUEST_HEAD, f, meta, location); if(ctx == NULL) diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 4bb1b70..8c98f04 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -1,11 +1,15 @@ add_library( common src/tfe_utils.cpp src/tfe_types.cpp src/tfe_future.cpp src/tfe_http.cpp src/tfe_plugin.cpp src/tfe_rpc.cpp src/tfe_cmsg.cpp src/tfe_kafka_logger.cpp src/tfe_resource.cpp src/tfe_scan.cpp - src/tfe_pkt_util.cpp src/tfe_tcp_restore.cpp src/raw_socket.cpp src/packet_construct.cpp src/tfe_fieldstat.cpp - src/tap.cpp src/io_uring.cpp src/intercept_policy.cpp) + src/tfe_pkt_util.cpp src/tfe_tcp_restore.cpp src/raw_socket.cpp src/packet_construct.cpp + src/tap.cpp src/io_uring.cpp src/intercept_policy.cpp src/tfe_fieldstat.cpp + src/tfe_addr_tuple4.cpp src/tfe_packet_io.cpp src/tfe_session_table.cpp src/tfe_timestamp.cpp + src/tfe_acceptor_kni.cpp src/tfe_ctrl_packet.cpp src/tfe_raw_packet.cpp + src/tfe_mpack.cpp src/mpack.cpp src/tfe_tap_rss.cpp src/tfe_metrics.cpp) target_include_directories(common PUBLIC ${CMAKE_CURRENT_LIST_DIR}/include) +target_include_directories(common PRIVATE ${CMAKE_CURRENT_LIST_DIR}/../platform/include/internal) target_link_libraries(common PUBLIC libevent-static libevent-static-openssl libevent-static-pthreads rdkafka) -target_link_libraries(common PUBLIC MESA_handle_logger cjson) +target_link_libraries(common PUBLIC MESA_handle_logger cjson msgpack) if (SUPPORT_LIBURING) target_link_libraries(common PUBLIC uring) diff --git a/common/include/tfe_acceptor_kni.h b/common/include/tfe_acceptor_kni.h new file mode 100644 index 0000000..7bb3fb3 --- /dev/null +++ b/common/include/tfe_acceptor_kni.h @@ -0,0 +1,152 @@ +#ifndef _TFE_ACCEPTOR_KNI_H +#define _TFE_ACCEPTOR_KNI_H + +#ifdef __cpluscplus +extern "C" +{ +#endif + +#include + +// #include "proxy.h" +#include "tfe_utils.h" +#include "tfe_timestamp.h" +#include "tfe_packet_io.h" +#include "tfe_session_table.h" + + /****************************************************************************** + * Struct For tap + ******************************************************************************/ + + struct tap_config + { + int enable_iouring; + int enable_debuglog; + + int ring_size; + int buff_size; + + int flags; + int sq_thread_idle; + + char src_mac[6]; + char tap_mac[6]; + char tap_c_mac[6]; + char tap_s_mac[6]; + char tap_device[16]; + char tap_c_device[16]; + char tap_s_device[16]; + int tap_rps_enable; + char tap_rps_mask[TFE_SYMBOL_MAX]; + + struct bpf_ctx *tap_bpf_ctx; + }; + + struct tap_ctx + { + int tap_s; + int tap_c; + int tap_fd; + + struct io_uring_instance *io_uring_fd; + struct io_uring_instance *io_uring_c; + struct io_uring_instance *io_uring_s; + + int buff_size; + char *buff; + }; + + /****************************************************************************** + * Struct For Thread + ******************************************************************************/ + struct acceptor_thread_ctx + { + pthread_t tid; + int thread_index; + + struct tap_ctx *tap_ctx; + struct session_table *session_table; + struct sf_metrics *sf_metrics; + + struct tap_config *ref_tap_config; + struct packet_io *ref_io; + struct global_metrics *ref_metrics; + struct policy_enforcer *ref_enforcer; + struct acceptor_ctx *ref_acceptor_ctx; + struct tfe_proxy *ref_proxy; + + int session_table_need_reset; + }; + + /****************************************************************************** + * Struct For Session + ******************************************************************************/ + + struct packet_info + { + int dir_is_e2i; + struct addr_tuple4 tuple4; + char *addr_string; + + char *header_data; + int header_len; + + struct sids sids; + struct route_ctx route_ctx; + }; + + struct session_ctx + { + int policy_ids; + uint64_t session_id; + + uint16_t user_field; + + struct route_ctx raw_pkt_i2e_route_ctx; + struct route_ctx raw_pkt_e2i_route_ctx; + + struct sids raw_pkt_i2e_sids; + struct sids raw_pkt_e2i_sids; + + // depending on first control packet + struct packet_info first_ctrl_pkt; + + // 加锁 + struct tfe_cmsg *cmsg; + struct acceptor_thread_ctx *ref_thread_ctx; + }; + + struct session_ctx *session_ctx_new(); + void session_ctx_free(struct session_ctx *ctx); + + /****************************************************************************** + * Struct For KNI + ******************************************************************************/ + + struct acceptor_ctx + { + int firewall_sids; + int sce_sids; + int nr_worker_threads; + + int cpu_affinity_mask[TFE_THREAD_MAX]; + + cpu_set_t coremask; + struct tap_config *config; + struct timestamp *ts; + struct packet_io *io; + struct global_metrics *metrics; + struct policy_enforcer *enforcer; + struct acceptor_thread_ctx work_threads[TFE_THREAD_MAX]; + + struct tfe_proxy *ref_proxy; + }; + + struct acceptor_ctx *acceptor_ctx_create(const char *profile); + void acceptor_ctx_destory(struct acceptor_ctx *ctx); + +#ifdef __cpluscplus +} +#endif + +#endif diff --git a/common/include/tfe_addr_tuple4.h b/common/include/tfe_addr_tuple4.h new file mode 100644 index 0000000..81aab88 --- /dev/null +++ b/common/include/tfe_addr_tuple4.h @@ -0,0 +1,66 @@ +#ifndef _TFE_ADDR_TUPLE4_H +#define _TFE_ADDR_TUPLE4_H + +#ifdef __cpluscplus +extern "C" +{ +#endif + +#include + + enum addr_tuple4_type + { + ADDR_TUPLE4_TYPE_V4, + ADDR_TUPLE4_TYPE_V6, + }; + + struct addr_v4 + { + struct in_addr src_addr; /* network order */ + struct in_addr dst_addr; /* network order */ + }; + + struct addr_v6 + { + struct in6_addr src_addr; /* network order */ + struct in6_addr dst_addr; /* network order */ + }; + + struct addr_tuple4 + { + enum addr_tuple4_type addr_type; + in_port_t src_port; /* network order */ + in_port_t dst_port; /* network order */ + union + { + struct addr_v4 addr_v4; + struct addr_v6 addr_v6; + }; + }; + +#define INIT_ADDR_V4(name, src_addr_str, src_port_num, dst_addr_str, dst_port_num) \ + struct addr_tuple4 name; \ + memset(&name, 0, sizeof(name)); \ + (name).addr_type = ADDR_TUPLE4_TYPE_V4; \ + (name).src_port = htons((src_port_num)); \ + (name).dst_port = htons((dst_port_num)); \ + inet_pton(AF_INET, (src_addr_str), &(name).addr_v4.src_addr); \ + inet_pton(AF_INET, (dst_addr_str), &(name).addr_v4.dst_addr); + +#define INIT_ADDR_V6(name, src_addr_str, src_port_num, dst_addr_str, dst_port_num) \ + struct addr_tuple4 name; \ + memset(&name, 0, sizeof(name)); \ + (name).addr_type = ADDR_TUPLE4_TYPE_V6; \ + (name).src_port = htons((src_port_num)); \ + (name).dst_port = htons((dst_port_num)); \ + inet_pton(AF_INET6, (src_addr_str), &(name).addr_v6.src_addr); \ + inet_pton(AF_INET6, (dst_addr_str), &(name).addr_v6.dst_addr); + + char *addr_tuple4_to_str(const struct addr_tuple4 *addr); + void addr_tuple4_reverse(const struct addr_tuple4 *orin, struct addr_tuple4 *out); + +#ifdef __cpluscplus +} +#endif + +#endif diff --git a/common/include/tfe_ctrl_packet.h b/common/include/tfe_ctrl_packet.h new file mode 100644 index 0000000..2ae43db --- /dev/null +++ b/common/include/tfe_ctrl_packet.h @@ -0,0 +1,44 @@ +#ifndef _TFE_CTRL_PACKET_H +#define _TFE_CTRL_PACKET_H + +#ifdef __cpluscplus +extern "C" +{ +#endif + +#include + +enum session_state +{ + SESSION_STATE_OPENING = 1, + SESSION_STATE_CLOSING = 2, + SESSION_STATE_ACTIVE = 3, + SESSION_STATE_RESETALL = 4, +}; + +struct ctrl_pkt_parser +{ + char tsync[4]; + uint64_t session_id; + enum session_state state; + char method[32]; + uint64_t tfe_policy_ids[32]; + int tfe_policy_id_num; + uint64_t sce_policy_ids[32]; + int sce_policy_id_num; + struct tfe_cmsg *cmsg; +}; + +const char *session_state_to_string(enum session_state state); +void ctrl_packet_parser_init(struct ctrl_pkt_parser *handler); + +// return 0 : success +// return -1 : error +int ctrl_packet_parser_parse(struct ctrl_pkt_parser *handler, const char *data, size_t length); +void ctrl_packet_parser_dump(struct ctrl_pkt_parser *handler); + +#ifdef __cpluscplus +} +#endif + +#endif diff --git a/common/include/tfe_metrics.h b/common/include/tfe_metrics.h new file mode 100644 index 0000000..32c76e7 --- /dev/null +++ b/common/include/tfe_metrics.h @@ -0,0 +1,61 @@ +#ifndef _GLOBAL_METRICS_H +#define _GLOBAL_METRICS_H + +#ifdef __cpluscplus +extern "C" +{ +#endif + +#include "tfe_utils.h" +#include + +struct global_metrics_config +{ + char output_file[256]; + char statsd_server[32]; + int statsd_port; + int statsd_format; + int statsd_cycle; + + int prometheus_listen_port; + char prometheus_listen_url[256]; +}; + +struct global_metrics +{ + struct throughput_metrics raw_pkt_rx; // 累计值 + struct throughput_metrics raw_pkt_tx; // 累计值 + + struct throughput_metrics hit_policy; // 累计值 + + struct throughput_metrics decrypt_tx; // 累计值 + struct throughput_metrics decrypt_rx; // 累计值 + + struct throughput_metrics ctrl_pkt_rx; // 累计值 + + uint64_t ctrl_pkt_opening_num; // 累计值 + uint64_t ctrl_pkt_active_num; // 累计值 + uint64_t ctrl_pkt_closing_num; // 累计值 + uint64_t ctrl_pkt_resetall_num; // 累计值 + uint64_t ctrl_pkt_error_num; // 累计值 + + uint64_t sf_active_times; // 累计值 + uint64_t sf_inactive_times; // 累计值 + + uint64_t session_nums; // 瞬时值 + uint64_t send_log; // 瞬时值 + + struct global_metrics_config config; + screen_stat_handle_t fs_handle; + int fs_id[128]; +}; + +struct global_metrics *global_metrics_create(); +void global_metrics_destory(struct global_metrics *metrics); +void global_metrics_dump(struct global_metrics *metrics); + +#ifdef __cpluscplus +} +#endif + +#endif diff --git a/common/include/tfe_mpack.h b/common/include/tfe_mpack.h new file mode 100644 index 0000000..299cbbf --- /dev/null +++ b/common/include/tfe_mpack.h @@ -0,0 +1,17 @@ +#ifndef _TFE_MPACK_H +#define _TFE_MPACK_H + +#ifdef __cpluscplus +extern "C" +{ +#endif + +#include "tfe_cmsg.h" + +int parse_messagepack(const char* data, size_t length, void *ctx); + +#ifdef __cpluscplus +} +#endif + +#endif \ No newline at end of file diff --git a/common/include/tfe_packet_io.h b/common/include/tfe_packet_io.h new file mode 100644 index 0000000..5f1a6d8 --- /dev/null +++ b/common/include/tfe_packet_io.h @@ -0,0 +1,23 @@ +#ifndef _TFE_PACKET_IO_H +#define _TFE_PACKET_IO_H + +#ifdef __cpluscplus +extern "C" +{ +#endif + +struct packet_io *packet_io_create(const char *profile, int thread_num, cpu_set_t *coremask); +void packet_io_destory(struct packet_io *handle); + +int packet_io_thread_init(struct packet_io *handle, struct acceptor_thread_ctx *thread_ctx); +void packet_io_thread_wait(struct packet_io *handle, struct acceptor_thread_ctx *thread_ctx, int timeout_ms); + +int packet_io_polling_nf_interface(struct packet_io *handle, int thread_seq, void *ctx); +void handle_raw_packet_from_tap(const char *data, int len, void *args); +void handle_decryption_packet_from_tap(const char *data, int len, void *args); + +#ifdef __cpluscplus +} +#endif + +#endif diff --git a/common/include/tfe_raw_packet.h b/common/include/tfe_raw_packet.h new file mode 100644 index 0000000..a360b71 --- /dev/null +++ b/common/include/tfe_raw_packet.h @@ -0,0 +1,99 @@ +#ifndef _TFE_RAW_PACKET_H +#define _TFE_RAW_PACKET_H + +#ifdef __cpluscplus +extern "C" +{ +#endif + +#include + +enum layer_type +{ + // 数据链路层 + LAYER_TYPE_ETHER = 1 << 0, + LAYER_TYPE_PPP = 1 << 1, + LAYER_TYPE_HDLC = 1 << 2, + LAYER_TYPE_L2 = (LAYER_TYPE_ETHER | LAYER_TYPE_PPP | LAYER_TYPE_HDLC), + + // 数据链路层 -- 隧道 + LAYER_TYPE_VLAN = 1 << 3, + LAYER_TYPE_PPPOE = 1 << 4, + LAYER_TYPE_MPLS = 1 << 5, + LAYER_TYPE_L2_TUN = (LAYER_TYPE_VLAN | LAYER_TYPE_PPPOE | LAYER_TYPE_MPLS), + + // 网络层 + LAYER_TYPE_IPV4 = 1 << 6, + LAYER_TYPE_IPV6 = 1 << 7, + LAYER_TYPE_L3 = (LAYER_TYPE_IPV4 | LAYER_TYPE_IPV6), + + // 网络层 -- 隧道 + + // 传输层 + LAYER_TYPE_UDP = 1 << 8, + LAYER_TYPE_TCP = 1 << 9, + LAYER_TYPE_L4 = (LAYER_TYPE_UDP | LAYER_TYPE_TCP), + + // 传输层 -- 隧道 + LAYER_TYPE_G_VXLAN = 1 << 10, + LAYER_TYPE_GTPV1_U = 1 << 11, + + // ALL + LAYER_TYPE_ALL = (LAYER_TYPE_L2 | LAYER_TYPE_L2_TUN | LAYER_TYPE_L3 | LAYER_TYPE_L4 | LAYER_TYPE_G_VXLAN | LAYER_TYPE_GTPV1_U), + + // UNKNOWN + LAYER_TYPE_UNKNOWN, +}; + +enum ldbc_method +{ + LDBC_METHOD_HASH_INT_IP = 1, + LDBC_METHOD_HASH_EXT_IP = 2, + LDBC_METHOD_HASH_INT_IP_AND_EXT_IP = 3, + LDBC_METHOD_HASH_INNERMOST_INT_IP = 4, + LDBC_METHOD_HASH_INNERMOST_EXT_IP = 5, +}; + +struct layer_result +{ + uint16_t offset; + enum layer_type type; +}; + +struct layer_results +{ + struct layer_result layers[16]; + uint16_t layers_used; + uint16_t layers_size; +}; + +struct raw_pkt_parser +{ + enum layer_type expect_type; + struct layer_results results; + + const void *ptr_pkt_start; + uint64_t pkt_trace_id; +}; + +void raw_packet_parser_init(struct raw_pkt_parser *handler, uint64_t pkt_trace_id, enum layer_type expect_type, uint16_t expect_results_num); +// return most inner payload +const void *raw_packet_parser_parse(struct raw_pkt_parser *handler, const void *data, size_t length); + +// return 0 : success +// return -1 : error +int raw_packet_parser_get_most_inner_tuple4(struct raw_pkt_parser *handler, struct addr_tuple4 *addr); +int raw_packet_parser_get_most_outer_tuple4(struct raw_pkt_parser *handler, struct addr_tuple4 *addr); + +// return 0 : success +// return -1 : error +int raw_packet_parser_get_most_inner_address(struct raw_pkt_parser *handler, struct addr_tuple4 *addr); +int raw_packet_parser_get_most_outer_address(struct raw_pkt_parser *handler, struct addr_tuple4 *addr); + +uint64_t raw_packet_parser_get_hash_value(struct raw_pkt_parser *handler, enum ldbc_method method, int dir_is_internal); + +#ifdef __cpluscplus +} +#endif + +#endif diff --git a/common/include/tfe_session_table.h b/common/include/tfe_session_table.h new file mode 100644 index 0000000..9478915 --- /dev/null +++ b/common/include/tfe_session_table.h @@ -0,0 +1,58 @@ +#ifndef _SESSION_TABLE_H +#define _SESSION_TABLE_H + +#ifdef __cpluscplus +extern "C" +{ +#endif + +#include +#include + +#include "uthash.h" +#include "tfe_addr_tuple4.h" + +// Note: session_addr must be initialized by memset(0) before use !!! + +typedef void fn_free_cb(void *args); + +struct session_node +{ + uint64_t session_id; /* first key */ + struct addr_tuple4 session_addr; /* second key */ + + void *val_data; + fn_free_cb *val_freecb; + + UT_hash_handle hh1; /* handle for first hash table */ + UT_hash_handle hh2; /* handle for second hash table */ +}; + +struct session_table; + +struct session_table *session_table_create(); +void session_table_destory(struct session_table *table); +void session_table_reset(struct session_table *table); +uint64_t session_table_count(struct session_table *table); + +// session_addr : deep copy +// val_data : shallow copy (malloc by user, free by val_freecb) +// return 0 : suceess +// return -1 : key exists +int session_table_insert(struct session_table *table, uint64_t session_id, const struct addr_tuple4 *session_addr, void *val_data, const fn_free_cb *val_freecb); + +// return 0 : success +// return -1 : key not exists +int session_table_delete_by_id(struct session_table *table, uint64_t session_id); +int session_table_delete_by_addr(struct session_table *table, const struct addr_tuple4 *session_addr); + +// return NULL : key not exists +// return UnNULL : success +struct session_node *session_table_search_by_id(struct session_table *table, uint64_t session_id); +struct session_node *session_table_search_by_addr(struct session_table *table, const struct addr_tuple4 *session_addr); + +#ifdef __cpluscplus +} +#endif + +#endif diff --git a/common/include/tfe_tap_rss.h b/common/include/tfe_tap_rss.h new file mode 100644 index 0000000..3ae0872 --- /dev/null +++ b/common/include/tfe_tap_rss.h @@ -0,0 +1,35 @@ +#ifndef _TFE_TAP_RSS_H_ +#define _TFE_TAP_RSS_H_ + +#ifdef __cplusplus +extern "C" +{ +#endif + +#define TAP_RSS_LOG_TAG "TAP_RSS: " + +struct bpf_ctx; + +int tfe_tap_get_bpf_prog_fd(struct bpf_ctx *ctx); + +struct bpf_ctx *tfe_tap_global_load_rss_bpf(const char *bpf_obj_file, uint32_t bpf_queue_num, uint32_t bpf_hash_mode, uint32_t bpf_debug_log, void *logger); +void tfe_tap_global_unload_rss_bpf(struct bpf_ctx *ctx); + +struct tap_ctx *tfe_tap_ctx_create(void *ctx); + +struct tap_config *tfe_tap_config_create(const char *profile, int thread_num); +void tfe_tap_destory(struct tap_config *tap); + +int tfe_tap_set_rps(void *local_logger, const char *tap_name, int thread_num, const char *rps_mask); + +int tfe_tap_open_per_thread(const char *tap_dev, int tap_flags, int bpf_prog_fd, void *logger); +void tfe_tap_close_per_thread(int tap_fd); + +int tfe_tap_read_per_thread(int tap_fd, char *buff, int buff_size, void *logger); +int tfe_tap_write_per_thread(int tap_fd, const char *data, int data_len, void *logger); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/common/include/tfe_timestamp.h b/common/include/tfe_timestamp.h new file mode 100644 index 0000000..4266c87 --- /dev/null +++ b/common/include/tfe_timestamp.h @@ -0,0 +1,24 @@ +#ifndef _TFE_TIMESTAMP_H +#define _TFE_TIMESTAMP_H + +#ifdef __cpluscplus +extern "C" +{ +#endif + +#include + +struct timestamp *timestamp_new(uint64_t update_interval_ms); +void timestamp_free(struct timestamp *ts); + +void timestamp_update(struct timestamp *ts); +uint64_t timestamp_update_interval_ms(struct timestamp *ts); + +uint64_t timestamp_get_sec(struct timestamp *ts); +uint64_t timestamp_get_msec(struct timestamp *ts); + +#ifdef __cpluscplus +} +#endif + +#endif diff --git a/common/include/tfe_utils.h b/common/include/tfe_utils.h index 1b9af35..4081782 100644 --- a/common/include/tfe_utils.h +++ b/common/include/tfe_utils.h @@ -10,6 +10,18 @@ #include //scan_dir #include +#define LOG_TAG_POLICY "POLICY" +#define LOG_TAG_UTILS "UTILS" +#define LOG_TAG_RAWPKT "RAW_PACKET" +#define LOG_TAG_CTRLPKT "CTRL_PACKET" +#define LOG_TAG_STABLE "SESSION_TABLE" +#define LOG_TAG_PKTIO "PACKET_IO" +#define LOG_TAG_METRICS "G_METRICS" +#define LOG_TAG_SF_METRICS "SF_METRICS" +#define LOG_TAG_SF_STATUS "SF_STATUS" +#define LOG_TAG_SCE "SCE" +#define LOG_TAG_TIMESTAMP "TIMESTAMP" + #define TFE_STRING_MAX 2048 #define TFE_PATH_MAX 256 #define TFE_SYMBOL_MAX 64 @@ -170,4 +182,59 @@ int tfe_scandir(const char *dir, struct dirent ***namelist, char *tfe_read_file(const char *filename, size_t *filelen); const char * tfe_version(); -int tfe_decode_base64url(u_char *dst, u_char *src); \ No newline at end of file +int tfe_decode_base64url(u_char *dst, u_char *src); + +/****************************************************************************** + * sids + ******************************************************************************/ + +#include + +struct sids +{ + int num; + sid_t elems[MR_SID_LIST_MAXLEN]; +}; + +void sids_write_once(struct sids *dst, struct sids *src); +void sids_copy(struct sids *dst, struct sids *src); + +/****************************************************************************** + * route_ctx + ******************************************************************************/ + +struct route_ctx +{ + char data[64]; + int len; +}; + +int route_ctx_is_empty(struct route_ctx *ctx); +void route_ctx_copy(struct route_ctx *dst, struct route_ctx *src); + +/****************************************************************************** + * protocol + ******************************************************************************/ + +struct udp_hdr +{ + u_int16_t uh_sport; /* source port */ + u_int16_t uh_dport; /* destination port */ + u_int16_t uh_ulen; /* udp length */ + u_int16_t uh_sum; /* udp checksum */ +} __attribute__((__packed__)); + +void build_udp_header(const char *l3_hdr, int l3_hdr_len, struct udp_hdr *udp_hdr, u_int16_t udp_sport, u_int16_t udp_dport, int payload_len); +void build_ip_header(struct ip *ip_hdr, u_int8_t next_protocol, const char *src_addr, const char *dst_addr, uint16_t payload_len); +void build_ether_header(struct ethhdr *eth_hdr, uint16_t next_protocol, const char *src_mac, const char *dst_mac); + +int str_to_mac(const char *str, char *mac_buff); +int get_mac_by_device_name(const char *dev_name, char *mac_buff); + +struct throughput_metrics +{ + uint64_t n_pkts; + uint64_t n_bytes; +}; + +void throughput_metrics_inc(struct throughput_metrics *iterm, uint64_t n_pkts, uint64_t n_bytes); diff --git a/common/include/uthash.h b/common/include/uthash.h new file mode 100644 index 0000000..f6a2a4a --- /dev/null +++ b/common/include/uthash.h @@ -0,0 +1,1316 @@ +/* +Copyright (c) 2003-2021, Troy D. Hanson http://troydhanson.github.com/uthash/ +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef UTHASH_H +#define UTHASH_H + +#define UTHASH_VERSION 2.3.0 + +#include /* memcmp, memset, strlen */ +#include /* ptrdiff_t */ +#include /* exit */ + +#if defined(HASH_DEFINE_OWN_STDINT) && HASH_DEFINE_OWN_STDINT +/* This codepath is provided for backward compatibility, but I plan to remove it. */ +#warning "HASH_DEFINE_OWN_STDINT is deprecated; please use HASH_NO_STDINT instead" +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; +#elif defined(HASH_NO_STDINT) && HASH_NO_STDINT +#else +#include /* uint8_t, uint32_t */ +#endif + +/* These macros use decltype or the earlier __typeof GNU extension. + As decltype is only available in newer compilers (VS2010 or gcc 4.3+ + when compiling c++ source) this code uses whatever method is needed + or, for VS2008 where neither is available, uses casting workarounds. */ +#if !defined(DECLTYPE) && !defined(NO_DECLTYPE) +#if defined(_MSC_VER) /* MS compiler */ +#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ +#define DECLTYPE(x) (decltype(x)) +#else /* VS2008 or older (or VS2010 in C mode) */ +#define NO_DECLTYPE +#endif +#elif defined(__BORLANDC__) || defined(__ICCARM__) || defined(__LCC__) || defined(__WATCOMC__) +#define NO_DECLTYPE +#else /* GNU, Sun and other compilers */ +#define DECLTYPE(x) (__typeof(x)) +#endif +#endif + +#ifdef NO_DECLTYPE +#define DECLTYPE(x) +#define DECLTYPE_ASSIGN(dst, src) \ + do \ + { \ + char **_da_dst = (char **)(&(dst)); \ + *_da_dst = (char *)(src); \ + } while (0) +#else +#define DECLTYPE_ASSIGN(dst, src) \ + do \ + { \ + (dst) = DECLTYPE(dst)(src); \ + } while (0) +#endif + +#ifndef uthash_malloc +#define uthash_malloc(sz) malloc(sz) /* malloc fcn */ +#endif +#ifndef uthash_free +#define uthash_free(ptr, sz) free(ptr) /* free fcn */ +#endif +#ifndef uthash_bzero +#define uthash_bzero(a, n) memset(a, '\0', n) +#endif +#ifndef uthash_strlen +#define uthash_strlen(s) strlen(s) +#endif + +#ifndef HASH_FUNCTION +#define HASH_FUNCTION(keyptr, keylen, hashv) HASH_JEN(keyptr, keylen, hashv) +#endif + +#ifndef HASH_KEYCMP +#define HASH_KEYCMP(a, b, n) memcmp(a, b, n) +#endif + +#ifndef uthash_noexpand_fyi +#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ +#endif +#ifndef uthash_expand_fyi +#define uthash_expand_fyi(tbl) /* can be defined to log expands */ +#endif + +#ifndef HASH_NONFATAL_OOM +#define HASH_NONFATAL_OOM 0 +#endif + +#if HASH_NONFATAL_OOM +/* malloc failures can be recovered from */ + +#ifndef uthash_nonfatal_oom +#define uthash_nonfatal_oom(obj) \ + do \ + { \ + } while (0) /* non-fatal OOM error */ +#endif + +#define HASH_RECORD_OOM(oomed) \ + do \ + { \ + (oomed) = 1; \ + } while (0) +#define IF_HASH_NONFATAL_OOM(x) x + +#else +/* malloc failures result in lost memory, hash tables are unusable */ + +#ifndef uthash_fatal +#define uthash_fatal(msg) exit(-1) /* fatal OOM error */ +#endif + +#define HASH_RECORD_OOM(oomed) uthash_fatal("out of memory") +#define IF_HASH_NONFATAL_OOM(x) + +#endif + +/* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS 32U /* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */ +#define HASH_BKT_CAPACITY_THRESH 10U /* expand when bucket count reaches */ + +/* calculate the element whose hash handle address is hhp */ +#define ELMT_FROM_HH(tbl, hhp) ((void *)(((char *)(hhp)) - ((tbl)->hho))) +/* calculate the hash handle from element address elp */ +#define HH_FROM_ELMT(tbl, elp) ((UT_hash_handle *)(void *)(((char *)(elp)) + ((tbl)->hho))) + +#define HASH_ROLLBACK_BKT(hh, head, itemptrhh) \ + do \ + { \ + struct UT_hash_handle *_hd_hh_item = (itemptrhh); \ + unsigned _hd_bkt; \ + HASH_TO_BKT(_hd_hh_item->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ + (head)->hh.tbl->buckets[_hd_bkt].count++; \ + _hd_hh_item->hh_next = NULL; \ + _hd_hh_item->hh_prev = NULL; \ + } while (0) + +#define HASH_VALUE(keyptr, keylen, hashv) \ + do \ + { \ + HASH_FUNCTION(keyptr, keylen, hashv); \ + } while (0) + +#define HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, hashval, out) \ + do \ + { \ + (out) = NULL; \ + if (head) \ + { \ + unsigned _hf_bkt; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _hf_bkt); \ + if (HASH_BLOOM_TEST((head)->hh.tbl, hashval) != 0) \ + { \ + HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[_hf_bkt], keyptr, keylen, hashval, out); \ + } \ + } \ + } while (0) + +#define HASH_FIND(hh, head, keyptr, keylen, out) \ + do \ + { \ + (out) = NULL; \ + if (head) \ + { \ + unsigned _hf_hashv; \ + HASH_VALUE(keyptr, keylen, _hf_hashv); \ + HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, _hf_hashv, out); \ + } \ + } while (0) + +#ifdef HASH_BLOOM +#define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM) +#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN / 8UL) + (((HASH_BLOOM_BITLEN % 8UL) != 0UL) ? 1UL : 0UL) +#define HASH_BLOOM_MAKE(tbl, oomed) \ + do \ + { \ + (tbl)->bloom_nbits = HASH_BLOOM; \ + (tbl)->bloom_bv = (uint8_t *)uthash_malloc(HASH_BLOOM_BYTELEN); \ + if (!(tbl)->bloom_bv) \ + { \ + HASH_RECORD_OOM(oomed); \ + } \ + else \ + { \ + uthash_bzero((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ + (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ + } \ + } while (0) + +#define HASH_BLOOM_FREE(tbl) \ + do \ + { \ + uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ + } while (0) + +#define HASH_BLOOM_BITSET(bv, idx) (bv[(idx) / 8U] |= (1U << ((idx) % 8U))) +#define HASH_BLOOM_BITTEST(bv, idx) (bv[(idx) / 8U] & (1U << ((idx) % 8U))) + +#define HASH_BLOOM_ADD(tbl, hashv) \ + HASH_BLOOM_BITSET((tbl)->bloom_bv, ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U))) + +#define HASH_BLOOM_TEST(tbl, hashv) \ + HASH_BLOOM_BITTEST((tbl)->bloom_bv, ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U))) + +#else +#define HASH_BLOOM_MAKE(tbl, oomed) +#define HASH_BLOOM_FREE(tbl) +#define HASH_BLOOM_ADD(tbl, hashv) +#define HASH_BLOOM_TEST(tbl, hashv) (1) +#define HASH_BLOOM_BYTELEN 0U +#endif + +#define HASH_MAKE_TABLE(hh, head, oomed) \ + do \ + { \ + (head)->hh.tbl = (UT_hash_table *)uthash_malloc(sizeof(UT_hash_table)); \ + if (!(head)->hh.tbl) \ + { \ + HASH_RECORD_OOM(oomed); \ + } \ + else \ + { \ + uthash_bzero((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head)->hh.tbl->tail = &((head)->hh); \ + (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ + (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ + (head)->hh.tbl->hho = (char *)(&(head)->hh) - (char *)(head); \ + (head)->hh.tbl->buckets = (UT_hash_bucket *)uthash_malloc( \ + HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket)); \ + (head)->hh.tbl->signature = HASH_SIGNATURE; \ + if (!(head)->hh.tbl->buckets) \ + { \ + HASH_RECORD_OOM(oomed); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + } \ + else \ + { \ + uthash_bzero((head)->hh.tbl->buckets, \ + HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_MAKE((head)->hh.tbl, oomed); \ + IF_HASH_NONFATAL_OOM( \ + if (oomed) { \ + uthash_free((head)->hh.tbl->buckets, \ + HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket)); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + }) \ + } \ + } \ + } while (0) + +#define HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, hashval, add, replaced, cmpfcn) \ + do \ + { \ + (replaced) = NULL; \ + HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ + if (replaced) \ + { \ + HASH_DELETE(hh, head, replaced); \ + } \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn); \ + } while (0) + +#define HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, hashval, add, replaced) \ + do \ + { \ + (replaced) = NULL; \ + HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ + if (replaced) \ + { \ + HASH_DELETE(hh, head, replaced); \ + } \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add); \ + } while (0) + +#define HASH_REPLACE(hh, head, fieldname, keylen_in, add, replaced) \ + do \ + { \ + unsigned _hr_hashv; \ + HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ + HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced); \ + } while (0) + +#define HASH_REPLACE_INORDER(hh, head, fieldname, keylen_in, add, replaced, cmpfcn) \ + do \ + { \ + unsigned _hr_hashv; \ + HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ + HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced, cmpfcn); \ + } while (0) + +#define HASH_APPEND_LIST(hh, head, add) \ + do \ + { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ + (head)->hh.tbl->tail->next = (add); \ + (head)->hh.tbl->tail = &((add)->hh); \ + } while (0) + +#define HASH_AKBI_INNER_LOOP(hh, head, add, cmpfcn) \ + do \ + { \ + do \ + { \ + if (cmpfcn(DECLTYPE(head)(_hs_iter), add) > 0) \ + { \ + break; \ + } \ + } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next)); \ + } while (0) + +#ifdef NO_DECLTYPE +#undef HASH_AKBI_INNER_LOOP +#define HASH_AKBI_INNER_LOOP(hh, head, add, cmpfcn) \ + do \ + { \ + char *_hs_saved_head = (char *)(head); \ + do \ + { \ + DECLTYPE_ASSIGN(head, _hs_iter); \ + if (cmpfcn(head, add) > 0) \ + { \ + DECLTYPE_ASSIGN(head, _hs_saved_head); \ + break; \ + } \ + DECLTYPE_ASSIGN(head, _hs_saved_head); \ + } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next)); \ + } while (0) +#endif + +#if HASH_NONFATAL_OOM + +#define HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, oomed) \ + do \ + { \ + if (!(oomed)) \ + { \ + unsigned _ha_bkt; \ + (head)->hh.tbl->num_items++; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, oomed); \ + if (oomed) \ + { \ + HASH_ROLLBACK_BKT(hh, head, &(add)->hh); \ + HASH_DELETE_HH(hh, head, &(add)->hh); \ + (add)->hh.tbl = NULL; \ + uthash_nonfatal_oom(add); \ + } \ + else \ + { \ + HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ + HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ + } \ + } \ + else \ + { \ + (add)->hh.tbl = NULL; \ + uthash_nonfatal_oom(add); \ + } \ + } while (0) + +#else + +#define HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, oomed) \ + do \ + { \ + unsigned _ha_bkt; \ + (head)->hh.tbl->num_items++; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, oomed); \ + HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ + HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ + } while (0) + +#endif + +#define HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in, hashval, add, cmpfcn) \ + do \ + { \ + IF_HASH_NONFATAL_OOM(int _ha_oomed = 0;) \ + (add)->hh.hashv = (hashval); \ + (add)->hh.key = (char *)(keyptr); \ + (add)->hh.keylen = (unsigned)(keylen_in); \ + if (!(head)) \ + { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = NULL; \ + HASH_MAKE_TABLE(hh, add, _ha_oomed); \ + IF_HASH_NONFATAL_OOM(if (!_ha_oomed) { ) \ + (head) = (add); \ + IF_HASH_NONFATAL_OOM( }) \ + } \ + else \ + { \ + void *_hs_iter = (head); \ + (add)->hh.tbl = (head)->hh.tbl; \ + HASH_AKBI_INNER_LOOP(hh, head, add, cmpfcn); \ + if (_hs_iter) \ + { \ + (add)->hh.next = _hs_iter; \ + if (((add)->hh.prev = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev)) \ + { \ + HH_FROM_ELMT((head)->hh.tbl, (add)->hh.prev)->next = (add); \ + } \ + else \ + { \ + (head) = (add); \ + } \ + HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->prev = (add); \ + } \ + else \ + { \ + HASH_APPEND_LIST(hh, head, add); \ + } \ + } \ + HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, _ha_oomed); \ + HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE_INORDER"); \ + } while (0) + +#define HASH_ADD_KEYPTR_INORDER(hh, head, keyptr, keylen_in, add, cmpfcn) \ + do \ + { \ + unsigned _hs_hashv; \ + HASH_VALUE(keyptr, keylen_in, _hs_hashv); \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in, _hs_hashv, add, cmpfcn); \ + } while (0) + +#define HASH_ADD_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, hashval, add, cmpfcn) \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn) + +#define HASH_ADD_INORDER(hh, head, fieldname, keylen_in, add, cmpfcn) \ + HASH_ADD_KEYPTR_INORDER(hh, head, &((add)->fieldname), keylen_in, add, cmpfcn) + +#define HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, hashval, add) \ + do \ + { \ + IF_HASH_NONFATAL_OOM(int _ha_oomed = 0;) \ + (add)->hh.hashv = (hashval); \ + (add)->hh.key = (const void *)(keyptr); \ + (add)->hh.keylen = (unsigned)(keylen_in); \ + if (!(head)) \ + { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = NULL; \ + HASH_MAKE_TABLE(hh, add, _ha_oomed); \ + IF_HASH_NONFATAL_OOM(if (!_ha_oomed) { ) \ + (head) = (add); \ + IF_HASH_NONFATAL_OOM( }) \ + } \ + else \ + { \ + (add)->hh.tbl = (head)->hh.tbl; \ + HASH_APPEND_LIST(hh, head, add); \ + } \ + HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, _ha_oomed); \ + HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE"); \ + } while (0) + +#define HASH_ADD_KEYPTR(hh, head, keyptr, keylen_in, add) \ + do \ + { \ + unsigned _ha_hashv; \ + HASH_VALUE(keyptr, keylen_in, _ha_hashv); \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, _ha_hashv, add); \ + } while (0) + +#define HASH_ADD_BYHASHVALUE(hh, head, fieldname, keylen_in, hashval, add) \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add) + +#define HASH_ADD(hh, head, fieldname, keylen_in, add) \ + HASH_ADD_KEYPTR(hh, head, &((add)->fieldname), keylen_in, add) + +#define HASH_TO_BKT(hashv, num_bkts, bkt) \ + do \ + { \ + bkt = ((hashv) & ((num_bkts)-1U)); \ + } while (0) + +/* delete "delptr" from the hash table. + * "the usual" patch-up process for the app-order doubly-linked-list. + * The use of _hd_hh_del below deserves special explanation. + * These used to be expressed using (delptr) but that led to a bug + * if someone used the same symbol for the head and deletee, like + * HASH_DELETE(hh,users,users); + * We want that to work, but by changing the head (users) below + * we were forfeiting our ability to further refer to the deletee (users) + * in the patch-up process. Solution: use scratch space to + * copy the deletee pointer, then the latter references are via that + * scratch pointer rather than through the repointed (users) symbol. + */ +#define HASH_DELETE(hh, head, delptr) \ + HASH_DELETE_HH(hh, head, &(delptr)->hh) + +#define HASH_DELETE_HH(hh, head, delptrhh) \ + do \ + { \ + struct UT_hash_handle *_hd_hh_del = (delptrhh); \ + if ((_hd_hh_del->prev == NULL) && (_hd_hh_del->next == NULL)) \ + { \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head) = NULL; \ + } \ + else \ + { \ + unsigned _hd_bkt; \ + if (_hd_hh_del == (head)->hh.tbl->tail) \ + { \ + (head)->hh.tbl->tail = HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev); \ + } \ + if (_hd_hh_del->prev != NULL) \ + { \ + HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev)->next = _hd_hh_del->next; \ + } \ + else \ + { \ + DECLTYPE_ASSIGN(head, _hd_hh_del->next); \ + } \ + if (_hd_hh_del->next != NULL) \ + { \ + HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->next)->prev = _hd_hh_del->prev; \ + } \ + HASH_TO_BKT(_hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ + HASH_DEL_IN_BKT((head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ + (head)->hh.tbl->num_items--; \ + } \ + HASH_FSCK(hh, head, "HASH_DELETE_HH"); \ + } while (0) + +/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ +#define HASH_FIND_STR(head, findstr, out) \ + do \ + { \ + unsigned _uthash_hfstr_keylen = (unsigned)uthash_strlen(findstr); \ + HASH_FIND(hh, head, findstr, _uthash_hfstr_keylen, out); \ + } while (0) +#define HASH_ADD_STR(head, strfield, add) \ + do \ + { \ + unsigned _uthash_hastr_keylen = (unsigned)uthash_strlen((add)->strfield); \ + HASH_ADD(hh, head, strfield[0], _uthash_hastr_keylen, add); \ + } while (0) +#define HASH_REPLACE_STR(head, strfield, add, replaced) \ + do \ + { \ + unsigned _uthash_hrstr_keylen = (unsigned)uthash_strlen((add)->strfield); \ + HASH_REPLACE(hh, head, strfield[0], _uthash_hrstr_keylen, add, replaced); \ + } while (0) +#define HASH_FIND_INT(head, findint, out) \ + HASH_FIND(hh, head, findint, sizeof(int), out) +#define HASH_ADD_INT(head, intfield, add) \ + HASH_ADD(hh, head, intfield, sizeof(int), add) +#define HASH_REPLACE_INT(head, intfield, add, replaced) \ + HASH_REPLACE(hh, head, intfield, sizeof(int), add, replaced) +#define HASH_FIND_PTR(head, findptr, out) \ + HASH_FIND(hh, head, findptr, sizeof(void *), out) +#define HASH_ADD_PTR(head, ptrfield, add) \ + HASH_ADD(hh, head, ptrfield, sizeof(void *), add) +#define HASH_REPLACE_PTR(head, ptrfield, add, replaced) \ + HASH_REPLACE(hh, head, ptrfield, sizeof(void *), add, replaced) +#define HASH_DEL(head, delptr) \ + HASH_DELETE(hh, head, delptr) + +/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. + * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. + */ +#ifdef HASH_DEBUG +#include /* fprintf, stderr */ +#define HASH_OOPS(...) \ + do \ + { \ + fprintf(stderr, __VA_ARGS__); \ + exit(-1); \ + } while (0) +#define HASH_FSCK(hh, head, where) \ + do \ + { \ + struct UT_hash_handle *_thh; \ + if (head) \ + { \ + unsigned _bkt_i; \ + unsigned _count = 0; \ + char *_prev; \ + for (_bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; ++_bkt_i) \ + { \ + unsigned _bkt_count = 0; \ + _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ + _prev = NULL; \ + while (_thh) \ + { \ + if (_prev != (char *)(_thh->hh_prev)) \ + { \ + HASH_OOPS("%s: invalid hh_prev %p, actual %p\n", \ + (where), (void *)_thh->hh_prev, (void *)_prev); \ + } \ + _bkt_count++; \ + _prev = (char *)(_thh); \ + _thh = _thh->hh_next; \ + } \ + _count += _bkt_count; \ + if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) \ + { \ + HASH_OOPS("%s: invalid bucket count %u, actual %u\n", \ + (where), (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ + } \ + } \ + if (_count != (head)->hh.tbl->num_items) \ + { \ + HASH_OOPS("%s: invalid hh item count %u, actual %u\n", \ + (where), (head)->hh.tbl->num_items, _count); \ + } \ + _count = 0; \ + _prev = NULL; \ + _thh = &(head)->hh; \ + while (_thh) \ + { \ + _count++; \ + if (_prev != (char *)_thh->prev) \ + { \ + HASH_OOPS("%s: invalid prev %p, actual %p\n", \ + (where), (void *)_thh->prev, (void *)_prev); \ + } \ + _prev = (char *)ELMT_FROM_HH((head)->hh.tbl, _thh); \ + _thh = (_thh->next ? HH_FROM_ELMT((head)->hh.tbl, _thh->next) : NULL); \ + } \ + if (_count != (head)->hh.tbl->num_items) \ + { \ + HASH_OOPS("%s: invalid app item count %u, actual %u\n", \ + (where), (head)->hh.tbl->num_items, _count); \ + } \ + } \ + } while (0) +#else +#define HASH_FSCK(hh, head, where) +#endif + +/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to + * the descriptor to which this macro is defined for tuning the hash function. + * The app can #include to get the prototype for write(2). */ +#ifdef HASH_EMIT_KEYS +#define HASH_EMIT_KEY(hh, head, keyptr, fieldlen) \ + do \ + { \ + unsigned _klen = fieldlen; \ + write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ + write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen); \ + } while (0) +#else +#define HASH_EMIT_KEY(hh, head, keyptr, fieldlen) +#endif + +/* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */ +#define HASH_BER(key, keylen, hashv) \ + do \ + { \ + unsigned _hb_keylen = (unsigned)keylen; \ + const unsigned char *_hb_key = (const unsigned char *)(key); \ + (hashv) = 0; \ + while (_hb_keylen-- != 0U) \ + { \ + (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; \ + } \ + } while (0) + +/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at + * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ +#define HASH_SAX(key, keylen, hashv) \ + do \ + { \ + unsigned _sx_i; \ + const unsigned char *_hs_key = (const unsigned char *)(key); \ + hashv = 0; \ + for (_sx_i = 0; _sx_i < keylen; _sx_i++) \ + { \ + hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ + } \ + } while (0) +/* FNV-1a variation */ +#define HASH_FNV(key, keylen, hashv) \ + do \ + { \ + unsigned _fn_i; \ + const unsigned char *_hf_key = (const unsigned char *)(key); \ + (hashv) = 2166136261U; \ + for (_fn_i = 0; _fn_i < keylen; _fn_i++) \ + { \ + hashv = hashv ^ _hf_key[_fn_i]; \ + hashv = hashv * 16777619U; \ + } \ + } while (0) + +#define HASH_OAT(key, keylen, hashv) \ + do \ + { \ + unsigned _ho_i; \ + const unsigned char *_ho_key = (const unsigned char *)(key); \ + hashv = 0; \ + for (_ho_i = 0; _ho_i < keylen; _ho_i++) \ + { \ + hashv += _ho_key[_ho_i]; \ + hashv += (hashv << 10); \ + hashv ^= (hashv >> 6); \ + } \ + hashv += (hashv << 3); \ + hashv ^= (hashv >> 11); \ + hashv += (hashv << 15); \ + } while (0) + +#define HASH_JEN_MIX(a, b, c) \ + do \ + { \ + a -= b; \ + a -= c; \ + a ^= (c >> 13); \ + b -= c; \ + b -= a; \ + b ^= (a << 8); \ + c -= a; \ + c -= b; \ + c ^= (b >> 13); \ + a -= b; \ + a -= c; \ + a ^= (c >> 12); \ + b -= c; \ + b -= a; \ + b ^= (a << 16); \ + c -= a; \ + c -= b; \ + c ^= (b >> 5); \ + a -= b; \ + a -= c; \ + a ^= (c >> 3); \ + b -= c; \ + b -= a; \ + b ^= (a << 10); \ + c -= a; \ + c -= b; \ + c ^= (b >> 15); \ + } while (0) + +#define HASH_JEN(key, keylen, hashv) \ + do \ + { \ + unsigned _hj_i, _hj_j, _hj_k; \ + unsigned const char *_hj_key = (unsigned const char *)(key); \ + hashv = 0xfeedbeefu; \ + _hj_i = _hj_j = 0x9e3779b9u; \ + _hj_k = (unsigned)(keylen); \ + while (_hj_k >= 12U) \ + { \ + _hj_i += (_hj_key[0] + ((unsigned)_hj_key[1] << 8) + ((unsigned)_hj_key[2] << 16) + ((unsigned)_hj_key[3] << 24)); \ + _hj_j += (_hj_key[4] + ((unsigned)_hj_key[5] << 8) + ((unsigned)_hj_key[6] << 16) + ((unsigned)_hj_key[7] << 24)); \ + hashv += (_hj_key[8] + ((unsigned)_hj_key[9] << 8) + ((unsigned)_hj_key[10] << 16) + ((unsigned)_hj_key[11] << 24)); \ + \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + \ + _hj_key += 12; \ + _hj_k -= 12U; \ + } \ + hashv += (unsigned)(keylen); \ + switch (_hj_k) \ + { \ + case 11: \ + hashv += ((unsigned)_hj_key[10] << 24); /* FALLTHROUGH */ \ + case 10: \ + hashv += ((unsigned)_hj_key[9] << 16); /* FALLTHROUGH */ \ + case 9: \ + hashv += ((unsigned)_hj_key[8] << 8); /* FALLTHROUGH */ \ + case 8: \ + _hj_j += ((unsigned)_hj_key[7] << 24); /* FALLTHROUGH */ \ + case 7: \ + _hj_j += ((unsigned)_hj_key[6] << 16); /* FALLTHROUGH */ \ + case 6: \ + _hj_j += ((unsigned)_hj_key[5] << 8); /* FALLTHROUGH */ \ + case 5: \ + _hj_j += _hj_key[4]; /* FALLTHROUGH */ \ + case 4: \ + _hj_i += ((unsigned)_hj_key[3] << 24); /* FALLTHROUGH */ \ + case 3: \ + _hj_i += ((unsigned)_hj_key[2] << 16); /* FALLTHROUGH */ \ + case 2: \ + _hj_i += ((unsigned)_hj_key[1] << 8); /* FALLTHROUGH */ \ + case 1: \ + _hj_i += _hj_key[0]; /* FALLTHROUGH */ \ + default:; \ + } \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + } while (0) + +/* The Paul Hsieh hash function */ +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) || defined(_MSC_VER) || defined(__BORLANDC__) || defined(__TURBOC__) +#define get16bits(d) (*((const uint16_t *)(d))) +#endif + +#if !defined(get16bits) +#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) + (uint32_t)(((const uint8_t *)(d))[0])) +#endif +#define HASH_SFH(key, keylen, hashv) \ + do \ + { \ + unsigned const char *_sfh_key = (unsigned const char *)(key); \ + uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen; \ + \ + unsigned _sfh_rem = _sfh_len & 3U; \ + _sfh_len >>= 2; \ + hashv = 0xcafebabeu; \ + \ + /* Main loop */ \ + for (; _sfh_len > 0U; _sfh_len--) \ + { \ + hashv += get16bits(_sfh_key); \ + _sfh_tmp = ((uint32_t)(get16bits(_sfh_key + 2)) << 11) ^ hashv; \ + hashv = (hashv << 16) ^ _sfh_tmp; \ + _sfh_key += 2U * sizeof(uint16_t); \ + hashv += hashv >> 11; \ + } \ + \ + /* Handle end cases */ \ + switch (_sfh_rem) \ + { \ + case 3: \ + hashv += get16bits(_sfh_key); \ + hashv ^= hashv << 16; \ + hashv ^= (uint32_t)(_sfh_key[sizeof(uint16_t)]) << 18; \ + hashv += hashv >> 11; \ + break; \ + case 2: \ + hashv += get16bits(_sfh_key); \ + hashv ^= hashv << 11; \ + hashv += hashv >> 17; \ + break; \ + case 1: \ + hashv += *_sfh_key; \ + hashv ^= hashv << 10; \ + hashv += hashv >> 1; \ + break; \ + default:; \ + } \ + \ + /* Force "avalanching" of final 127 bits */ \ + hashv ^= hashv << 3; \ + hashv += hashv >> 5; \ + hashv ^= hashv << 4; \ + hashv += hashv >> 17; \ + hashv ^= hashv << 25; \ + hashv += hashv >> 6; \ + } while (0) + +/* iterate over items in a known bucket to find desired item */ +#define HASH_FIND_IN_BKT(tbl, hh, head, keyptr, keylen_in, hashval, out) \ + do \ + { \ + if ((head).hh_head != NULL) \ + { \ + DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (head).hh_head)); \ + } \ + else \ + { \ + (out) = NULL; \ + } \ + while ((out) != NULL) \ + { \ + if ((out)->hh.hashv == (hashval) && (out)->hh.keylen == (keylen_in)) \ + { \ + if (HASH_KEYCMP((out)->hh.key, keyptr, keylen_in) == 0) \ + { \ + break; \ + } \ + } \ + if ((out)->hh.hh_next != NULL) \ + { \ + DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (out)->hh.hh_next)); \ + } \ + else \ + { \ + (out) = NULL; \ + } \ + } \ + } while (0) + +/* add an item to a bucket */ +#define HASH_ADD_TO_BKT(head, hh, addhh, oomed) \ + do \ + { \ + UT_hash_bucket *_ha_head = &(head); \ + _ha_head->count++; \ + (addhh)->hh_next = _ha_head->hh_head; \ + (addhh)->hh_prev = NULL; \ + if (_ha_head->hh_head != NULL) \ + { \ + _ha_head->hh_head->hh_prev = (addhh); \ + } \ + _ha_head->hh_head = (addhh); \ + if ((_ha_head->count >= ((_ha_head->expand_mult + 1U) * HASH_BKT_CAPACITY_THRESH)) && !(addhh)->tbl->noexpand) \ + { \ + HASH_EXPAND_BUCKETS(addhh, (addhh)->tbl, oomed); \ + IF_HASH_NONFATAL_OOM( \ + if (oomed) { \ + HASH_DEL_IN_BKT(head, addhh); \ + }) \ + } \ + } while (0) + +/* remove an item from a given bucket */ +#define HASH_DEL_IN_BKT(head, delhh) \ + do \ + { \ + UT_hash_bucket *_hd_head = &(head); \ + _hd_head->count--; \ + if (_hd_head->hh_head == (delhh)) \ + { \ + _hd_head->hh_head = (delhh)->hh_next; \ + } \ + if ((delhh)->hh_prev) \ + { \ + (delhh)->hh_prev->hh_next = (delhh)->hh_next; \ + } \ + if ((delhh)->hh_next) \ + { \ + (delhh)->hh_next->hh_prev = (delhh)->hh_prev; \ + } \ + } while (0) + +/* Bucket expansion has the effect of doubling the number of buckets + * and redistributing the items into the new buckets. Ideally the + * items will distribute more or less evenly into the new buckets + * (the extent to which this is true is a measure of the quality of + * the hash function as it applies to the key domain). + * + * With the items distributed into more buckets, the chain length + * (item count) in each bucket is reduced. Thus by expanding buckets + * the hash keeps a bound on the chain length. This bounded chain + * length is the essence of how a hash provides constant time lookup. + * + * The calculation of tbl->ideal_chain_maxlen below deserves some + * explanation. First, keep in mind that we're calculating the ideal + * maximum chain length based on the *new* (doubled) bucket count. + * In fractions this is just n/b (n=number of items,b=new num buckets). + * Since the ideal chain length is an integer, we want to calculate + * ceil(n/b). We don't depend on floating point arithmetic in this + * hash, so to calculate ceil(n/b) with integers we could write + * + * ceil(n/b) = (n/b) + ((n%b)?1:0) + * + * and in fact a previous version of this hash did just that. + * But now we have improved things a bit by recognizing that b is + * always a power of two. We keep its base 2 log handy (call it lb), + * so now we can write this with a bit shift and logical AND: + * + * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) + * + */ +#define HASH_EXPAND_BUCKETS(hh, tbl, oomed) \ + do \ + { \ + unsigned _he_bkt; \ + unsigned _he_bkt_i; \ + struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ + UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ + _he_new_buckets = (UT_hash_bucket *)uthash_malloc( \ + sizeof(struct UT_hash_bucket) * (tbl)->num_buckets * 2U); \ + if (!_he_new_buckets) \ + { \ + HASH_RECORD_OOM(oomed); \ + } \ + else \ + { \ + uthash_bzero(_he_new_buckets, \ + sizeof(struct UT_hash_bucket) * (tbl)->num_buckets * 2U); \ + (tbl)->ideal_chain_maxlen = \ + ((tbl)->num_items >> ((tbl)->log2_num_buckets + 1U)) + \ + ((((tbl)->num_items & (((tbl)->num_buckets * 2U) - 1U)) != 0U) ? 1U : 0U); \ + (tbl)->nonideal_items = 0; \ + for (_he_bkt_i = 0; _he_bkt_i < (tbl)->num_buckets; _he_bkt_i++) \ + { \ + _he_thh = (tbl)->buckets[_he_bkt_i].hh_head; \ + while (_he_thh != NULL) \ + { \ + _he_hh_nxt = _he_thh->hh_next; \ + HASH_TO_BKT(_he_thh->hashv, (tbl)->num_buckets * 2U, _he_bkt); \ + _he_newbkt = &(_he_new_buckets[_he_bkt]); \ + if (++(_he_newbkt->count) > (tbl)->ideal_chain_maxlen) \ + { \ + (tbl)->nonideal_items++; \ + if (_he_newbkt->count > _he_newbkt->expand_mult * (tbl)->ideal_chain_maxlen) \ + { \ + _he_newbkt->expand_mult++; \ + } \ + } \ + _he_thh->hh_prev = NULL; \ + _he_thh->hh_next = _he_newbkt->hh_head; \ + if (_he_newbkt->hh_head != NULL) \ + { \ + _he_newbkt->hh_head->hh_prev = _he_thh; \ + } \ + _he_newbkt->hh_head = _he_thh; \ + _he_thh = _he_hh_nxt; \ + } \ + } \ + uthash_free((tbl)->buckets, (tbl)->num_buckets * sizeof(struct UT_hash_bucket)); \ + (tbl)->num_buckets *= 2U; \ + (tbl)->log2_num_buckets++; \ + (tbl)->buckets = _he_new_buckets; \ + (tbl)->ineff_expands = ((tbl)->nonideal_items > ((tbl)->num_items >> 1)) ? ((tbl)->ineff_expands + 1U) : 0U; \ + if ((tbl)->ineff_expands > 1U) \ + { \ + (tbl)->noexpand = 1; \ + uthash_noexpand_fyi(tbl); \ + } \ + uthash_expand_fyi(tbl); \ + } \ + } while (0) + +/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ +/* Note that HASH_SORT assumes the hash handle name to be hh. + * HASH_SRT was added to allow the hash handle name to be passed in. */ +#define HASH_SORT(head, cmpfcn) HASH_SRT(hh, head, cmpfcn) +#define HASH_SRT(hh, head, cmpfcn) \ + do \ + { \ + unsigned _hs_i; \ + unsigned _hs_looping, _hs_nmerges, _hs_insize, _hs_psize, _hs_qsize; \ + struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ + if (head != NULL) \ + { \ + _hs_insize = 1; \ + _hs_looping = 1; \ + _hs_list = &((head)->hh); \ + while (_hs_looping != 0U) \ + { \ + _hs_p = _hs_list; \ + _hs_list = NULL; \ + _hs_tail = NULL; \ + _hs_nmerges = 0; \ + while (_hs_p != NULL) \ + { \ + _hs_nmerges++; \ + _hs_q = _hs_p; \ + _hs_psize = 0; \ + for (_hs_i = 0; _hs_i < _hs_insize; ++_hs_i) \ + { \ + _hs_psize++; \ + _hs_q = ((_hs_q->next != NULL) ? HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ + if (_hs_q == NULL) \ + { \ + break; \ + } \ + } \ + _hs_qsize = _hs_insize; \ + while ((_hs_psize != 0U) || ((_hs_qsize != 0U) && (_hs_q != NULL))) \ + { \ + if (_hs_psize == 0U) \ + { \ + _hs_e = _hs_q; \ + _hs_q = ((_hs_q->next != NULL) ? HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ + _hs_qsize--; \ + } \ + else if ((_hs_qsize == 0U) || (_hs_q == NULL)) \ + { \ + _hs_e = _hs_p; \ + if (_hs_p != NULL) \ + { \ + _hs_p = ((_hs_p->next != NULL) ? HH_FROM_ELMT((head)->hh.tbl, _hs_p->next) : NULL); \ + } \ + _hs_psize--; \ + } \ + else if ((cmpfcn( \ + DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl, _hs_p)), \ + DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl, _hs_q)))) <= 0) \ + { \ + _hs_e = _hs_p; \ + if (_hs_p != NULL) \ + { \ + _hs_p = ((_hs_p->next != NULL) ? HH_FROM_ELMT((head)->hh.tbl, _hs_p->next) : NULL); \ + } \ + _hs_psize--; \ + } \ + else \ + { \ + _hs_e = _hs_q; \ + _hs_q = ((_hs_q->next != NULL) ? HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ + _hs_qsize--; \ + } \ + if (_hs_tail != NULL) \ + { \ + _hs_tail->next = ((_hs_e != NULL) ? ELMT_FROM_HH((head)->hh.tbl, _hs_e) : NULL); \ + } \ + else \ + { \ + _hs_list = _hs_e; \ + } \ + if (_hs_e != NULL) \ + { \ + _hs_e->prev = ((_hs_tail != NULL) ? ELMT_FROM_HH((head)->hh.tbl, _hs_tail) : NULL); \ + } \ + _hs_tail = _hs_e; \ + } \ + _hs_p = _hs_q; \ + } \ + if (_hs_tail != NULL) \ + { \ + _hs_tail->next = NULL; \ + } \ + if (_hs_nmerges <= 1U) \ + { \ + _hs_looping = 0; \ + (head)->hh.tbl->tail = _hs_tail; \ + DECLTYPE_ASSIGN(head, ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ + } \ + _hs_insize *= 2U; \ + } \ + HASH_FSCK(hh, head, "HASH_SRT"); \ + } \ + } while (0) + +/* This function selects items from one hash into another hash. + * The end result is that the selected items have dual presence + * in both hashes. There is no copy of the items made; rather + * they are added into the new hash through a secondary hash + * hash handle that must be present in the structure. */ +#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ + do \ + { \ + unsigned _src_bkt, _dst_bkt; \ + void *_last_elt = NULL, *_elt; \ + UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh = NULL; \ + ptrdiff_t _dst_hho = ((char *)(&(dst)->hh_dst) - (char *)(dst)); \ + if ((src) != NULL) \ + { \ + for (_src_bkt = 0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) \ + { \ + for (_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ + _src_hh != NULL; \ + _src_hh = _src_hh->hh_next) \ + { \ + _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ + if (cond(_elt)) \ + { \ + IF_HASH_NONFATAL_OOM(int _hs_oomed = 0;) \ + _dst_hh = (UT_hash_handle *)(void *)(((char *)_elt) + _dst_hho); \ + _dst_hh->key = _src_hh->key; \ + _dst_hh->keylen = _src_hh->keylen; \ + _dst_hh->hashv = _src_hh->hashv; \ + _dst_hh->prev = _last_elt; \ + _dst_hh->next = NULL; \ + if (_last_elt_hh != NULL) \ + { \ + _last_elt_hh->next = _elt; \ + } \ + if ((dst) == NULL) \ + { \ + DECLTYPE_ASSIGN(dst, _elt); \ + HASH_MAKE_TABLE(hh_dst, dst, _hs_oomed); \ + IF_HASH_NONFATAL_OOM( \ + if (_hs_oomed) { \ + uthash_nonfatal_oom(_elt); \ + (dst) = NULL; \ + continue; \ + }) \ + } \ + else \ + { \ + _dst_hh->tbl = (dst)->hh_dst.tbl; \ + } \ + HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ + HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt], hh_dst, _dst_hh, _hs_oomed); \ + (dst)->hh_dst.tbl->num_items++; \ + IF_HASH_NONFATAL_OOM( \ + if (_hs_oomed) { \ + HASH_ROLLBACK_BKT(hh_dst, dst, _dst_hh); \ + HASH_DELETE_HH(hh_dst, dst, _dst_hh); \ + _dst_hh->tbl = NULL; \ + uthash_nonfatal_oom(_elt); \ + continue; \ + }) \ + HASH_BLOOM_ADD(_dst_hh->tbl, _dst_hh->hashv); \ + _last_elt = _elt; \ + _last_elt_hh = _dst_hh; \ + } \ + } \ + } \ + } \ + HASH_FSCK(hh_dst, dst, "HASH_SELECT"); \ + } while (0) + +#define HASH_CLEAR(hh, head) \ + do \ + { \ + if ((head) != NULL) \ + { \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head) = NULL; \ + } \ + } while (0) + +#define HASH_OVERHEAD(hh, head) \ + (((head) != NULL) ? ( \ + (size_t)(((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ + ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ + sizeof(UT_hash_table) + \ + (HASH_BLOOM_BYTELEN))) \ + : 0U) + +#ifdef NO_DECLTYPE +#define HASH_ITER(hh, head, el, tmp) \ + for (((el) = (head)), ((*(char **)(&(tmp))) = (char *)((head != NULL) ? (head)->hh.next : NULL)); \ + (el) != NULL; ((el) = (tmp)), ((*(char **)(&(tmp))) = (char *)((tmp != NULL) ? (tmp)->hh.next : NULL))) +#else +#define HASH_ITER(hh, head, el, tmp) \ + for (((el) = (head)), ((tmp) = DECLTYPE(el)((head != NULL) ? (head)->hh.next : NULL)); \ + (el) != NULL; ((el) = (tmp)), ((tmp) = DECLTYPE(el)((tmp != NULL) ? (tmp)->hh.next : NULL))) +#endif + +/* obtain a count of items in the hash */ +#define HASH_COUNT(head) HASH_CNT(hh, head) +#define HASH_CNT(hh, head) ((head != NULL) ? ((head)->hh.tbl->num_items) : 0U) + +typedef struct UT_hash_bucket +{ + struct UT_hash_handle *hh_head; + unsigned count; + + /* expand_mult is normally set to 0. In this situation, the max chain length + * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If + * the bucket's chain exceeds this length, bucket expansion is triggered). + * However, setting expand_mult to a non-zero value delays bucket expansion + * (that would be triggered by additions to this particular bucket) + * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. + * (The multiplier is simply expand_mult+1). The whole idea of this + * multiplier is to reduce bucket expansions, since they are expensive, in + * situations where we know that a particular bucket tends to be overused. + * It is better to let its chain length grow to a longer yet-still-bounded + * value, than to do an O(n) bucket expansion too often. + */ + unsigned expand_mult; + +} UT_hash_bucket; + +/* random signature used only to find hash tables in external analysis */ +#define HASH_SIGNATURE 0xa0111fe1u +#define HASH_BLOOM_SIGNATURE 0xb12220f2u + +typedef struct UT_hash_table +{ + UT_hash_bucket *buckets; + unsigned num_buckets, log2_num_buckets; + unsigned num_items; + struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ + ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ + + /* in an ideal situation (all buckets used equally), no bucket would have + * more than ceil(#items/#buckets) items. that's the ideal chain length. */ + unsigned ideal_chain_maxlen; + + /* nonideal_items is the number of items in the hash whose chain position + * exceeds the ideal chain maxlen. these items pay the penalty for an uneven + * hash distribution; reaching them in a chain traversal takes >ideal steps */ + unsigned nonideal_items; + + /* ineffective expands occur when a bucket doubling was performed, but + * afterward, more than half the items in the hash had nonideal chain + * positions. If this happens on two consecutive expansions we inhibit any + * further expansion, as it's not helping; this happens when the hash + * function isn't a good fit for the key domain. When expansion is inhibited + * the hash will still work, albeit no longer in constant time. */ + unsigned ineff_expands, noexpand; + + uint32_t signature; /* used only to find hash tables in external analysis */ +#ifdef HASH_BLOOM + uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ + uint8_t *bloom_bv; + uint8_t bloom_nbits; +#endif + +} UT_hash_table; + +typedef struct UT_hash_handle +{ + struct UT_hash_table *tbl; + void *prev; /* prev element in app order */ + void *next; /* next element in app order */ + struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ + struct UT_hash_handle *hh_next; /* next hh in bucket order */ + const void *key; /* ptr to enclosing struct's key */ + unsigned keylen; /* enclosing struct's key len */ + unsigned hashv; /* result of hash-fcn(key) */ +} UT_hash_handle; + +#endif /* UTHASH_H */ diff --git a/common/src/tfe_acceptor_kni.cpp b/common/src/tfe_acceptor_kni.cpp new file mode 100644 index 0000000..13e8846 --- /dev/null +++ b/common/src/tfe_acceptor_kni.cpp @@ -0,0 +1,115 @@ +#include +#include + +#include "tfe_cmsg.h" +#include "tfe_tap_rss.h" +#include "tfe_acceptor_kni.h" +#include "tfe_metrics.h" + +/****************************************************************************** + * session_ctx + ******************************************************************************/ + +struct session_ctx *session_ctx_new() +{ + struct session_ctx *ctx = (struct session_ctx *)calloc(1, sizeof(struct session_ctx)); + assert(ctx != NULL); + return ctx; +} + +void session_ctx_free(struct session_ctx *ctx) +{ + if (ctx) + { + if (ctx->first_ctrl_pkt.addr_string) + { + free(ctx->first_ctrl_pkt.addr_string); + ctx->first_ctrl_pkt.addr_string = NULL; + } + + if (ctx->first_ctrl_pkt.header_data) + { + free(ctx->first_ctrl_pkt.header_data); + ctx->first_ctrl_pkt.header_data = NULL; + } + + if (ctx->cmsg) + { + tfe_cmsg_destroy(ctx->cmsg); + } + + free(ctx); + ctx = 0; + } +} + + +/****************************************************************************** + * acceptor_ctx + ******************************************************************************/ +struct acceptor_ctx *acceptor_ctx_create(const char *profile) +{ + int ret = 0; + struct acceptor_ctx *ctx = ALLOC(struct acceptor_ctx, 1); + + MESA_load_profile_int_def(profile, "system", "firewall_sids", (int *)&(ctx->firewall_sids), 1001); + MESA_load_profile_int_def(profile, "system", "service_chaining_sids", (int *)&(ctx->sce_sids), 1002); + MESA_load_profile_int_def(profile, "system", "nr_worker_threads", (int *)&(ctx->nr_worker_threads), 8); + MESA_load_profile_uint_range(profile, "system", "cpu_affinity_mask", TFE_THREAD_MAX, (unsigned int *)ctx->cpu_affinity_mask); + ctx->nr_worker_threads = MIN(ctx->nr_worker_threads, TFE_THREAD_MAX); + + CPU_ZERO(&ctx->coremask); + for (int i = 0; i < ctx->nr_worker_threads; i++) + { + int cpu_id = ctx->cpu_affinity_mask[i]; + CPU_SET(cpu_id, &ctx->coremask); + } + + ctx->io = packet_io_create(profile, ctx->nr_worker_threads, &ctx->coremask); + if (ctx->io == NULL) + { + goto error_out; + } + + ctx->config = tfe_tap_config_create(profile, ctx->nr_worker_threads); + if (ctx->config == NULL) + { + goto error_out; + } + + ctx->metrics = global_metrics_create(); + if (ctx->metrics == NULL) + { + goto error_out; + } + + // ctx->enforcer = policy_enforcer_create("KNI", profile, ctx->nr_worker_threads, NULL); + // if (ctx->enforcer == NULL) + // { + // goto error_out; + // } + + // if (policy_enforcer_register(ctx->enforcer) == -1) + // { + // goto error_out; + // } + + return ctx; + +error_out: + acceptor_ctx_destory(ctx); + return NULL; +} + +void acceptor_ctx_destory(struct acceptor_ctx * ctx) +{ + if (ctx) + { + packet_io_destory(ctx->io); + tfe_tap_destory(ctx->config); + + free(ctx); + ctx = NULL; + } + return; +} diff --git a/common/src/tfe_addr_tuple4.cpp b/common/src/tfe_addr_tuple4.cpp new file mode 100644 index 0000000..24653a6 --- /dev/null +++ b/common/src/tfe_addr_tuple4.cpp @@ -0,0 +1,57 @@ +#include +#include +#include + +#include "tfe_addr_tuple4.h" + +char *addr_tuple4_to_str(const struct addr_tuple4 *addr) +{ + char *str_ret = NULL; + + if (addr->addr_type == ADDR_TUPLE4_TYPE_V4) + { + char src_addr[INET_ADDRSTRLEN] = {0}; + char dst_addr[INET_ADDRSTRLEN] = {0}; + uint16_t src_port = ntohs((uint16_t)addr->src_port); + uint16_t dst_port = ntohs((uint16_t)addr->dst_port); + inet_ntop(AF_INET, &addr->addr_v4.src_addr, src_addr, sizeof(src_addr)); + inet_ntop(AF_INET, &addr->addr_v4.dst_addr, dst_addr, sizeof(dst_addr)); + asprintf(&str_ret, "%s %u %s %u", src_addr, src_port, dst_addr, dst_port); + } + + if (addr->addr_type == ADDR_TUPLE4_TYPE_V6) + { + char src_addr[INET6_ADDRSTRLEN] = {0}; + char dst_addr[INET6_ADDRSTRLEN] = {0}; + uint16_t src_port = ntohs((uint16_t)addr->src_port); + uint16_t dst_port = ntohs((uint16_t)addr->dst_port); + inet_ntop(AF_INET6, &addr->addr_v6.src_addr, src_addr, sizeof(src_addr)); + inet_ntop(AF_INET6, &addr->addr_v6.dst_addr, dst_addr, sizeof(dst_addr)); + asprintf(&str_ret, "%s %u %s %u", src_addr, src_port, dst_addr, dst_port); + } + + return str_ret; +} + +void addr_tuple4_reverse(const struct addr_tuple4 *orin, struct addr_tuple4 *out) +{ + memset(out, 0, sizeof(struct addr_tuple4)); + + if (orin->addr_type == ADDR_TUPLE4_TYPE_V4) + { + out->addr_type = ADDR_TUPLE4_TYPE_V4; + out->addr_v4.src_addr = orin->addr_v4.dst_addr; + out->addr_v4.dst_addr = orin->addr_v4.src_addr; + out->src_port = orin->dst_port; + out->dst_port = orin->src_port; + } + + if (orin->addr_type == ADDR_TUPLE4_TYPE_V6) + { + out->addr_type = ADDR_TUPLE4_TYPE_V6; + out->addr_v6.src_addr = orin->addr_v6.dst_addr; + out->addr_v6.dst_addr = orin->addr_v6.src_addr; + out->src_port = orin->dst_port; + out->dst_port = orin->src_port; + } +} \ No newline at end of file diff --git a/common/src/tfe_ctrl_packet.cpp b/common/src/tfe_ctrl_packet.cpp new file mode 100644 index 0000000..0b6b3dc --- /dev/null +++ b/common/src/tfe_ctrl_packet.cpp @@ -0,0 +1,142 @@ +#include +#include +#include + +#include "tfe_mpack.h" +#include "tfe_cmsg.h" +#include "tfe_utils.h" +#include "tfe_ctrl_packet.h" + +const char *session_state_to_string(enum session_state state) +{ + switch (state) + { + case SESSION_STATE_OPENING: + return "opening"; + case SESSION_STATE_CLOSING: + return "closing"; + case SESSION_STATE_ACTIVE: + return "active"; + case SESSION_STATE_RESETALL: + return "resetall"; + default: + return "unknown"; + } +} + +void ctrl_packet_parser_init(struct ctrl_pkt_parser *handler) +{ + memset(handler, 0, sizeof(struct ctrl_pkt_parser)); + handler->cmsg = tfe_cmsg_init(); +} + +// return 0 : success +// return -1 : error +int ctrl_packet_parser_parse(struct ctrl_pkt_parser *handler, const char *data, size_t length) +{ + // TODO FREE + return parse_messagepack(data, length, handler); +} + +void ctrl_packet_parser_dump(struct ctrl_pkt_parser *handler) +{ + uint16_t cmsg_len; + if (handler) + { + TFE_LOG_INFO(g_default_logger, "%s: tsync : %s", LOG_TAG_POLICY, handler->tsync); + TFE_LOG_INFO(g_default_logger, "%s: session_id : %lu", LOG_TAG_POLICY, handler->session_id); + TFE_LOG_INFO(g_default_logger, "%s: state : %s", LOG_TAG_POLICY, session_state_to_string(handler->state)); + TFE_LOG_INFO(g_default_logger, "%s: method : %s", LOG_TAG_POLICY, handler->method); + TFE_LOG_INFO(g_default_logger, "%s: tfe policy_id_num : %d", LOG_TAG_POLICY, handler->tfe_policy_id_num); + + for (int i = 0; i < handler->tfe_policy_id_num; i++) + { + TFE_LOG_INFO(g_default_logger, "%s: %d tfe policy_ids[%03lu]", LOG_TAG_POLICY, i, handler->tfe_policy_ids[i]); + } + TFE_LOG_INFO(g_default_logger, "%s: sce policy_id_num : %d", LOG_TAG_POLICY, handler->sce_policy_id_num); + + for (int i = 0; i < handler->tfe_policy_id_num; i++) + { + TFE_LOG_INFO(g_default_logger, "%s: %d sce policy_ids[%03lu]", LOG_TAG_POLICY, i, handler->sce_policy_ids[i]); + } + + uint64_t policy_id = 0; + tfe_cmsg_get_value(handler->cmsg, TFE_CMSG_POLICY_ID, (unsigned char *)&policy_id, 64, &cmsg_len); + TFE_LOG_INFO(g_default_logger, "TFE_CMSG_POLICY_ID: %lu", policy_id); + uint16_t client_mss = 0; + tfe_cmsg_get_value(handler->cmsg, TFE_CMSG_TCP_RESTORE_MSS_CLIENT, (unsigned char *)&client_mss, 16, &cmsg_len); + TFE_LOG_INFO(g_default_logger, "TFE_CMSG_TCP_RESTORE_MSS_CLIENT: %u", client_mss); + + } +} + +const char * tfe_cmsg_tlv_type_to_string[TFE_CMSG_TLV_NR_MAX]; +void tfe_cmsg_enum_to_string() +{ + memset(tfe_cmsg_tlv_type_to_string, 0 ,sizeof(tfe_cmsg_tlv_type_to_string)); + tfe_cmsg_tlv_type_to_string[TFE_CMSG_TCP_RESTORE_SEQ] = "TFE_CMSG_TCP_RESTORE_SEQ"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_TCP_RESTORE_ACK] = "TFE_CMSG_TCP_RESTORE_ACK"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_TCP_RESTORE_MSS_CLIENT] = "TFE_CMSG_TCP_RESTORE_MSS_CLIENT"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_TCP_RESTORE_MSS_SERVER] = "TFE_CMSG_TCP_RESTORE_MSS_SERVER"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_TCP_RESTORE_WSACLE_CLIENT] = "TFE_CMSG_TCP_RESTORE_WSACLE_CLIENT"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_TCP_RESTORE_WSACLE_SERVER] = "TFE_CMSG_TCP_RESTORE_WSACLE_SERVER"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_TCP_RESTORE_SACK_CLIENT] = "TFE_CMSG_TCP_RESTORE_SACK_CLIENT"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_TCP_RESTORE_SACK_SERVER] = "TFE_CMSG_TCP_RESTORE_SACK_SERVER"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_TCP_RESTORE_TS_CLIENT] = "TFE_CMSG_TCP_RESTORE_TS_CLIENT"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_TCP_RESTORE_TS_SERVER] = "TFE_CMSG_TCP_RESTORE_TS_SERVER"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_TCP_RESTORE_PROTOCOL] = "TFE_CMSG_TCP_RESTORE_PROTOCOL"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_TCP_RESTORE_WINDOW_CLIENT] = "TFE_CMSG_TCP_RESTORE_WINDOW_CLIENT"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_TCP_RESTORE_WINDOW_SERVER] = "TFE_CMSG_TCP_RESTORE_WINDOW_SERVER"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_TCP_RESTORE_INFO_PACKET_CUR_DIR] = "TFE_CMSG_TCP_RESTORE_INFO_PACKET_CUR_DIR"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_TCP_RESTORE_TS_CLIENT_VAL] = "TFE_CMSG_TCP_RESTORE_TS_CLIENT_VAL"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_TCP_RESTORE_TS_SERVER_VAL] = "TFE_CMSG_TCP_RESTORE_TS_SERVER_VAL"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_POLICY_ID] = "TFE_CMSG_POLICY_ID"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_STREAM_TRACE_ID] = "TFE_CMSG_STREAM_TRACE_ID"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_SSL_INTERCEPT_STATE] = "TFE_CMSG_SSL_INTERCEPT_STATE"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_SSL_SERVER_SIDE_LATENCY] = "TFE_CMSG_SSL_SERVER_SIDE_LATENCY"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_SSL_CLIENT_SIDE_LATENCY] = "TFE_CMSG_SSL_CLIENT_SIDE_LATENCY"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_SSL_SERVER_SIDE_VERSION] = "TFE_CMSG_SSL_SERVER_SIDE_VERSION"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_SSL_CLIENT_SIDE_VERSION] = "TFE_CMSG_SSL_CLIENT_SIDE_VERSION"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_SSL_PINNING_STATE] = "TFE_CMSG_SSL_PINNING_STATE"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_SSL_CERT_VERIFY] = "TFE_CMSG_SSL_CERT_VERIFY"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_SSL_ERROR] = "TFE_CMSG_SSL_ERROR"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_SRC_MAC] = "TFE_CMSG_SRC_MAC"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_DST_MAC] = "TFE_CMSG_DST_MAC"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_DOWNSTREAM_TCP_NODELAY] = "TFE_CMSG_DOWNSTREAM_TCP_NODELAY"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_DOWNSTREAM_TCP_TTL] = "TFE_CMSG_DOWNSTREAM_TCP_TTL"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_DOWNSTREAM_TCP_KEEPALIVE] = "TFE_CMSG_DOWNSTREAM_TCP_KEEPALIVE"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_DOWNSTREAM_TCP_KEEPCNT] = "TFE_CMSG_DOWNSTREAM_TCP_KEEPCNT"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_DOWNSTREAM_TCP_KEEPIDLE] = "TFE_CMSG_DOWNSTREAM_TCP_KEEPIDLE"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_DOWNSTREAM_TCP_KEEPINTVL] = "TFE_CMSG_DOWNSTREAM_TCP_KEEPINTVL"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_DOWNSTREAM_TCP_USER_TIMEOUT] = "TFE_CMSG_DOWNSTREAM_TCP_USER_TIMEOUT"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_UPSTREAM_TCP_NODELAY] = "TFE_CMSG_UPSTREAM_TCP_NODELAY"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_UPSTREAM_TCP_TTL] = "TFE_CMSG_UPSTREAM_TCP_TTL"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_UPSTREAM_TCP_KEEPALIVE] = "TFE_CMSG_UPSTREAM_TCP_KEEPALIVE"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_UPSTREAM_TCP_KEEPCNT] = "TFE_CMSG_UPSTREAM_TCP_KEEPCNT"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_UPSTREAM_TCP_KEEPIDLE] = "TFE_CMSG_UPSTREAM_TCP_KEEPIDLE"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_UPSTREAM_TCP_KEEPINTVL] = "TFE_CMSG_UPSTREAM_TCP_KEEPINTVL"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_UPSTREAM_TCP_USER_TIMEOUT] = "TFE_CMSG_UPSTREAM_TCP_USER_TIMEOUT"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_TCP_PASSTHROUGH] = "TFE_CMSG_TCP_PASSTHROUGH"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_SRC_SUB_ID] = "TFE_CMSG_SRC_SUB_ID"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_DST_SUB_ID] = "TFE_CMSG_DST_SUB_ID"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_SRC_ASN] = "TFE_CMSG_SRC_ASN"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_DST_ASN] = "TFE_CMSG_DST_ASN"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_SRC_ORGANIZATION] = "TFE_CMSG_SRC_ORGANIZATION"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_DST_ORGANIZATION] = "TFE_CMSG_DST_ORGANIZATION"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_SRC_IP_LOCATION_COUNTRY] = "TFE_CMSG_SRC_IP_LOCATION_COUNTRY"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_DST_IP_LOCATION_COUNTRY] = "TFE_CMSG_DST_IP_LOCATION_COUNTRY"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_SRC_IP_LOCATION_PROVINE] = "TFE_CMSG_SRC_IP_LOCATION_PROVINE"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_DST_IP_LOCATION_PROVINE] = "TFE_CMSG_DST_IP_LOCATION_PROVINE"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_SRC_IP_LOCATION_CITY] = "TFE_CMSG_SRC_IP_LOCATION_CITY"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_DST_IP_LOCATION_CITY] = "TFE_CMSG_DST_IP_LOCATION_CITY"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_SRC_IP_LOCATION_SUBDIVISION] = "TFE_CMSG_SRC_IP_LOCATION_SUBDIVISION"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_DST_IP_LOCATION_SUBDIVISION] = "TFE_CMSG_DST_IP_LOCATION_SUBDIVISION"; + + tfe_cmsg_tlv_type_to_string[TFE_CMSG_SSL_CLIENT_JA3_FINGERPRINT] = "TFE_CMSG_SSL_CLIENT_JA3_FINGERPRINT"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_FQDN_CAT_ID_NUM] = "TFE_CMSG_FQDN_CAT_ID_NUM"; + tfe_cmsg_tlv_type_to_string[TFE_CMSG_FQDN_CAT_ID_VAL] = "TFE_CMSG_FQDN_CAT_ID_VAL"; + + tfe_cmsg_tlv_type_to_string[TFE_CMSG_COMMON_DIRECTION] = "TFE_CMSG_COMMON_DIRECTION"; + + tfe_cmsg_tlv_type_to_string[TFE_CMSG_SSL_PASSTHROUGH_REASON] = "TFE_CMSG_SSL_PASSTHROUGH_REASON"; +} diff --git a/common/src/tfe_metrics.cpp b/common/src/tfe_metrics.cpp new file mode 100644 index 0000000..735a0a8 --- /dev/null +++ b/common/src/tfe_metrics.cpp @@ -0,0 +1,133 @@ +#include +#include +#include +#include + +#include "tfe_proxy.h" +#include "tfe_metrics.h" + +enum SCE_STAT_FIELD +{ + // hit block policy + STAT_HIT_POLICY_PKT, + STAT_HIT_POLICY_B, + + // dev nf interface + STAT_RAW_PKT_RX_PKT, + STAT_RAW_PKT_RX_B, + + STAT_RAW_PKT_TX_PKT, + STAT_RAW_PKT_TX_B, + + // steering + STAT_DECRYPTED_TX_PKT, + STAT_DECRYPTED_TX_B, + STAT_DECRYPTED_RX_PKT, + STAT_DECRYPTED_RX_B, + + // control packet + STAT_CONTROL_RX_PKT, + STAT_CONTROL_RX_B, + + STAT_CTRL_PKT_OPENING, + STAT_CTRL_PKT_ACTIVE, + STAT_CTRL_PKT_CLOSING, + STAT_CTRL_PKT_RESETALL, + STAT_CTRL_PKT_ERROR, + + // send log + STAT_SEND_LOG, + + // max + STAT_MAX, +}; + +static const char *stat_map[] = +{ + // hit policy + [STAT_HIT_POLICY_PKT] = "hit_policy_pkt", + [STAT_HIT_POLICY_B] = "hit_policy_B", + + // dev nf interface + [STAT_RAW_PKT_RX_PKT] = "raw_rx_pkt", + [STAT_RAW_PKT_RX_B] = "raw_rx_B", + + [STAT_RAW_PKT_TX_PKT] = "raw_tx_pkt", + [STAT_RAW_PKT_TX_B] = "raw_tx_B", + + // decrypted + [STAT_DECRYPTED_TX_PKT] = "decrypt_tx_pkt", + [STAT_DECRYPTED_TX_B] = "decrypt_tx_B", + [STAT_DECRYPTED_RX_PKT] = "decrypt_rx_pkt", + [STAT_DECRYPTED_RX_B] = "decrypt_rx_B", + + // control packet + [STAT_CONTROL_RX_PKT] = "ctrl_rx_pkt", + [STAT_CONTROL_RX_B] = "ctrl_rx_B", + + [STAT_CTRL_PKT_OPENING] = "ctrl_pkt_open", + [STAT_CTRL_PKT_ACTIVE] = "ctrl_pkt_avtive", + [STAT_CTRL_PKT_CLOSING] = "ctrl_pkt_close", + [STAT_CTRL_PKT_RESETALL] = "ctrl_pkt_reset", + [STAT_CTRL_PKT_ERROR] = "ctrl_pkt_error", + + // send log + [STAT_SEND_LOG] = "send_log", + + [STAT_MAX] = NULL +}; + +struct global_metrics *global_metrics_create() +{ + struct global_metrics *metrics = (struct global_metrics *)calloc(1, sizeof(struct global_metrics)); + + metrics->fs_handle=tfe_proxy_get_fs_handle(); + for (int i = 0; i < STAT_MAX; i++) + { + metrics->fs_id[i] = FS_register(metrics->fs_handle, FS_STYLE_FIELD, FS_CALC_CURRENT, stat_map[i]); + } + + return metrics; +} + +void global_metrics_destory(struct global_metrics *metrics) +{ + if (metrics) + { + FS_library_destroy(); + free(metrics); + metrics = NULL; + } +} + +void global_metrics_dump(struct global_metrics *metrics) +{ + FS_operate(metrics->fs_handle, metrics->fs_id[STAT_HIT_POLICY_PKT], 0, FS_OP_SET, __atomic_fetch_add(&(metrics->hit_policy.n_pkts), 0, __ATOMIC_RELAXED)); + FS_operate(metrics->fs_handle, metrics->fs_id[STAT_HIT_POLICY_B], 0, FS_OP_SET, __atomic_fetch_add(&(metrics->hit_policy.n_bytes), 0, __ATOMIC_RELAXED)); + + FS_operate(metrics->fs_handle, metrics->fs_id[STAT_RAW_PKT_RX_PKT], 0, FS_OP_SET, __atomic_fetch_add(&(metrics->raw_pkt_rx.n_pkts), 0, __ATOMIC_RELAXED)); + FS_operate(metrics->fs_handle, metrics->fs_id[STAT_RAW_PKT_RX_B], 0, FS_OP_SET, __atomic_fetch_add(&(metrics->raw_pkt_rx.n_bytes), 0, __ATOMIC_RELAXED)); + + FS_operate(metrics->fs_handle, metrics->fs_id[STAT_RAW_PKT_TX_PKT], 0, FS_OP_SET, __atomic_fetch_add(&(metrics->raw_pkt_tx.n_pkts), 0, __ATOMIC_RELAXED)); + FS_operate(metrics->fs_handle, metrics->fs_id[STAT_RAW_PKT_TX_B], 0, FS_OP_SET, __atomic_fetch_add(&(metrics->raw_pkt_tx.n_bytes), 0, __ATOMIC_RELAXED)); + + + FS_operate(metrics->fs_handle, metrics->fs_id[STAT_DECRYPTED_TX_PKT], 0, FS_OP_SET, __atomic_fetch_add(&(metrics->decrypt_tx.n_pkts), 0, __ATOMIC_RELAXED)); + FS_operate(metrics->fs_handle, metrics->fs_id[STAT_DECRYPTED_TX_B], 0, FS_OP_SET, __atomic_fetch_add(&(metrics->decrypt_tx.n_bytes), 0, __ATOMIC_RELAXED)); + + FS_operate(metrics->fs_handle, metrics->fs_id[STAT_DECRYPTED_RX_PKT], 0, FS_OP_SET, __atomic_fetch_add(&(metrics->decrypt_rx.n_pkts), 0, __ATOMIC_RELAXED)); + FS_operate(metrics->fs_handle, metrics->fs_id[STAT_DECRYPTED_RX_B], 0, FS_OP_SET, __atomic_fetch_add(&(metrics->decrypt_rx.n_bytes), 0, __ATOMIC_RELAXED)); + + // control packet + FS_operate(metrics->fs_handle, metrics->fs_id[STAT_CONTROL_RX_PKT], 0, FS_OP_SET, __atomic_fetch_add(&(metrics->ctrl_pkt_rx.n_pkts), 0, __ATOMIC_RELAXED)); + FS_operate(metrics->fs_handle, metrics->fs_id[STAT_CONTROL_RX_B], 0, FS_OP_SET, __atomic_fetch_add(&(metrics->ctrl_pkt_rx.n_bytes), 0, __ATOMIC_RELAXED)); + + FS_operate(metrics->fs_handle, metrics->fs_id[STAT_CTRL_PKT_OPENING], 0, FS_OP_SET, __atomic_fetch_add(&(metrics->ctrl_pkt_opening_num), 0, __ATOMIC_RELAXED)); + FS_operate(metrics->fs_handle, metrics->fs_id[STAT_CTRL_PKT_ACTIVE], 0, FS_OP_SET, __atomic_fetch_add(&(metrics->ctrl_pkt_active_num), 0, __ATOMIC_RELAXED)); + FS_operate(metrics->fs_handle, metrics->fs_id[STAT_CTRL_PKT_CLOSING], 0, FS_OP_SET, __atomic_fetch_add(&(metrics->ctrl_pkt_closing_num), 0, __ATOMIC_RELAXED)); + FS_operate(metrics->fs_handle, metrics->fs_id[STAT_CTRL_PKT_RESETALL], 0, FS_OP_SET, __atomic_fetch_add(&(metrics->ctrl_pkt_resetall_num), 0, __ATOMIC_RELAXED)); + FS_operate(metrics->fs_handle, metrics->fs_id[STAT_CTRL_PKT_ERROR], 0, FS_OP_SET, __atomic_fetch_add(&(metrics->ctrl_pkt_error_num), 0, __ATOMIC_RELAXED)); + + // send log + FS_operate(metrics->fs_handle, metrics->fs_id[STAT_SEND_LOG], 0, FS_OP_SET, __atomic_fetch_add(&(metrics->send_log), 0, __ATOMIC_RELAXED)); +} diff --git a/common/src/tfe_mpack.cpp b/common/src/tfe_mpack.cpp new file mode 100644 index 0000000..73a7b89 --- /dev/null +++ b/common/src/tfe_mpack.cpp @@ -0,0 +1,216 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "tfe_cmsg.h" +#include "tfe_utils.h" +#include "tfe_ctrl_packet.h" + +enum ctr_pkt_index +{ + INDEX_TSYNC = 0, + INDEX_SESSION_ID, + INDEX_STATE, + INDEX_METHOD, + INDEX_SCE, + INDEX_SHAPER, + INDEX_PROXY, + INDEX_MAX +}; + +struct mpack_mmap_id2type +{ + int id; + enum tfe_cmsg_tlv_type type; + char *str_name; + int size; +}mpack_table[] = { + {.id = 0, .type = TFE_CMSG_POLICY_ID, .str_name = "TFE_CMSG_POLICY_ID", .size = 8}, + {.id = 1, .type = TFE_CMSG_TCP_RESTORE_SEQ, .str_name = "TFE_CMSG_TCP_RESTORE_SEQ", .size = 4}, + {.id = 2, .type = TFE_CMSG_TCP_RESTORE_ACK, .str_name = "TFE_CMSG_TCP_RESTORE_ACK", .size = 4}, + {.id = 3, .type = TFE_CMSG_TCP_RESTORE_MSS_CLIENT, .str_name = "TFE_CMSG_TCP_RESTORE_MSS_CLIENT", .size = 2}, + {.id = 4, .type = TFE_CMSG_TCP_RESTORE_MSS_SERVER, .str_name = "TFE_CMSG_TCP_RESTORE_MSS_SERVER", .size = 2}, + {.id = 5, .type = TFE_CMSG_TCP_RESTORE_WSACLE_CLIENT, .str_name = "TFE_CMSG_TCP_RESTORE_WSACLE_CLIENT", .size = 1}, + {.id = 6, .type = TFE_CMSG_TCP_RESTORE_WSACLE_SERVER, .str_name = "TFE_CMSG_TCP_RESTORE_WSACLE_SERVER", .size = 1}, + {.id = 7, .type = TFE_CMSG_TCP_RESTORE_SACK_CLIENT, .str_name = "TFE_CMSG_TCP_RESTORE_SACK_CLIENT", .size = 1}, + {.id = 8, .type = TFE_CMSG_TCP_RESTORE_SACK_SERVER, .str_name = "TFE_CMSG_TCP_RESTORE_SACK_SERVER", .size = 1}, + {.id = 9, .type = TFE_CMSG_TCP_RESTORE_TS_CLIENT, .str_name = "TFE_CMSG_TCP_RESTORE_TS_CLIENT", .size = 1}, + {.id = 10, .type = TFE_CMSG_TCP_RESTORE_TS_SERVER, .str_name = "TFE_CMSG_TCP_RESTORE_TS_SERVER", .size = 1}, + {.id = 11, .type = TFE_CMSG_TCP_RESTORE_PROTOCOL, .str_name = "TFE_CMSG_TCP_RESTORE_PROTOCOL", .size = 1}, + {.id = 12, .type = TFE_CMSG_TCP_RESTORE_WINDOW_CLIENT, .str_name = "TFE_CMSG_TCP_RESTORE_WINDOW_CLIENT", .size = 2}, + {.id = 13, .type = TFE_CMSG_TCP_RESTORE_WINDOW_SERVER, .str_name = "TFE_CMSG_TCP_RESTORE_WINDOW_SERVER", .size = 2}, + {.id = 14, .type = TFE_CMSG_TCP_RESTORE_TS_CLIENT_VAL, .str_name = "TFE_CMSG_TCP_RESTORE_TS_CLIENT_VAL", .size = 4}, + {.id = 15, .type = TFE_CMSG_TCP_RESTORE_TS_SERVER_VAL, .str_name = "TFE_CMSG_TCP_RESTORE_TS_SERVER_VAL", .size = 4}, + {.id = 16, .type = TFE_CMSG_TCP_RESTORE_INFO_PACKET_CUR_DIR, .str_name = "TFE_CMSG_TCP_RESTORE_INFO_PACKET_CUR_DIR", .size = 1}, + {.id = 17, .type = TFE_CMSG_SRC_SUB_ID, .str_name = "TFE_CMSG_SRC_SUB_ID", .size = 256}, + {.id = 18, .type = TFE_CMSG_DST_SUB_ID, .str_name = "TFE_CMSG_DST_SUB_ID", .size = 256}, + {.id = 19, .type = TFE_CMSG_SRC_ASN, .str_name = "TFE_CMSG_SRC_ASN", .size = 64}, + {.id = 20, .type = TFE_CMSG_DST_ASN, .str_name = "TFE_CMSG_DST_ASN", .size = 64}, + {.id = 21, .type = TFE_CMSG_SRC_ORGANIZATION, .str_name = "TFE_CMSG_SRC_ORGANIZATION", .size = 256}, + {.id = 22, .type = TFE_CMSG_DST_ORGANIZATION, .str_name = "TFE_CMSG_DST_ORGANIZATION", .size = 256}, + {.id = 23, .type = TFE_CMSG_SRC_IP_LOCATION_COUNTRY, .str_name = "TFE_CMSG_SRC_IP_LOCATION_COUNTRY", .size = 256}, + {.id = 24, .type = TFE_CMSG_DST_IP_LOCATION_COUNTRY, .str_name = "TFE_CMSG_DST_IP_LOCATION_COUNTRY", .size = 256}, + {.id = 25, .type = TFE_CMSG_SRC_IP_LOCATION_PROVINE, .str_name = "TFE_CMSG_SRC_IP_LOCATION_PROVINE", .size = 256}, + {.id = 26, .type = TFE_CMSG_DST_IP_LOCATION_PROVINE, .str_name = "TFE_CMSG_DST_IP_LOCATION_PROVINE", .size = 256}, + {.id = 27, .type = TFE_CMSG_SRC_IP_LOCATION_CITY, .str_name = "TFE_CMSG_SRC_IP_LOCATION_CITY", .size = 256}, + {.id = 28, .type = TFE_CMSG_DST_IP_LOCATION_CITY, .str_name = "TFE_CMSG_DST_IP_LOCATION_CITY", .size = 256}, + {.id = 29, .type = TFE_CMSG_SRC_IP_LOCATION_SUBDIVISION, .str_name = "TFE_CMSG_SRC_IP_LOCATION_SUBDIVISION", .size = 256}, + {.id = 30, .type = TFE_CMSG_DST_IP_LOCATION_SUBDIVISION, .str_name = "TFE_CMSG_DST_IP_LOCATION_SUBDIVISION", .size = 256}, + {.id = 31, .type = TFE_CMSG_SSL_CLIENT_JA3_FINGERPRINT, .str_name = "TFE_CMSG_SSL_CLIENT_JA3_FINGERPRINT", .size = 32}, + {.id = 32, .type = TFE_CMSG_FQDN_CAT_ID_VAL, .str_name = "TFE_CMSG_FQDN_CAT_ID_VAL", .size = 4} +}; + +static int proxy_parse_messagepack(msgpack_object obj, void *ctx) +{ + struct ctrl_pkt_parser *handler = (struct ctrl_pkt_parser *)ctx; + uint32_t fqdn_val[8] = {0}; + + for (unsigned int i = 0; i < obj.via.array.size; i++) { + msgpack_object ptr = obj.via.array.ptr[i]; + + if (i == 0) { + if (ptr.type == MSGPACK_OBJECT_ARRAY) { + handler->tfe_policy_id_num = ptr.via.array.size; + for (uint32_t j = 0; j < ptr.via.array.size; j++) { + handler->tfe_policy_ids[j] = ptr.via.array.ptr[j].via.u64; + } + tfe_cmsg_set(handler->cmsg, mpack_table[i].type, (const unsigned char *)&handler->tfe_policy_ids[0], sizeof(uint64_t)); + TFE_LOG_DEBUG(g_default_logger, "%s: interger msgpack cmsg: [%s] num: [%d]", LOG_TAG_CTRLPKT, mpack_table[i].str_name, handler->tfe_policy_id_num); + for (int j = 0; j < handler->tfe_policy_id_num; j++) { + TFE_LOG_DEBUG(g_default_logger, "%s: policy id:%lu ", LOG_TAG_CTRLPKT, handler->tfe_policy_ids[j]); + } + } + continue; + } + + switch (ptr.type) { + case MSGPACK_OBJECT_POSITIVE_INTEGER: + tfe_cmsg_set(handler->cmsg, mpack_table[i].type, (const unsigned char *)&ptr.via.u64, mpack_table[i].size); + TFE_LOG_DEBUG(g_default_logger, "%s: interger msgpack cmsg: [%s] -> [%lu]", LOG_TAG_CTRLPKT, mpack_table[i].str_name, ptr.via.u64); + break; + case MSGPACK_OBJECT_STR: + tfe_cmsg_set(handler->cmsg, mpack_table[i].type, (const unsigned char *)ptr.via.str.ptr, ptr.via.str.size); + TFE_LOG_DEBUG(g_default_logger, "%s: string msgpack cmsg: [%s] -> [%s]", LOG_TAG_CTRLPKT, mpack_table[i].str_name, ptr.via.str.ptr); + break; + case MSGPACK_OBJECT_ARRAY: + if (i == 32) { + tfe_cmsg_set(handler->cmsg, TFE_CMSG_FQDN_CAT_ID_NUM, (const unsigned char *)&ptr.via.array.size, sizeof(uint32_t)); + for (uint32_t j = 0; j < ptr.via.array.size; j++) { + fqdn_val[j] = ptr.via.array.ptr[j].via.u64; + TFE_LOG_DEBUG(g_default_logger, "%s: array msgpack cmsg: [%s] -> [%lu]", LOG_TAG_CTRLPKT, mpack_table[i].str_name, ptr.via.array.ptr[j].via.u64); + } + tfe_cmsg_set(handler->cmsg ,TFE_CMSG_FQDN_CAT_ID_VAL, (const unsigned char*)fqdn_val, ptr.via.array.size * sizeof(uint32_t)); + } + break; + default: + break; + } + } + return 0; +} + +int parse_messagepack(const char* data, size_t length, void *ctx) +{ + struct ctrl_pkt_parser *handler = (struct ctrl_pkt_parser *)ctx; + size_t off = 0; + + msgpack_unpacked unpacked; + msgpack_unpacked_init(&unpacked); + + msgpack_unpack_return ret = msgpack_unpack_next(&unpacked, data, length, &off); + if (ret != MSGPACK_UNPACK_SUCCESS) { + TFE_LOG_DEBUG(g_default_logger, "%s: unexpected control packet: data[%s]", LOG_TAG_CTRLPKT, data); + return -1; + } + + msgpack_object obj = unpacked.data; + if (obj.type != MSGPACK_OBJECT_ARRAY || obj.via.array.size < INDEX_PROXY) { + // TFE_LOG_DEBUG(g_default_logger, "%s: unexpected control packet: msgpack type[%02x], array size:%d", LOG_TAG_CTRLPKT, obj.type, obj.via.array.size); + return -1; + } + + for (unsigned int i = 0; i < obj.via.array.size; i++) { + msgpack_object ptr = obj.via.array.ptr[i]; + switch (i) { + case INDEX_TSYNC: + if (ptr.type == MSGPACK_OBJECT_STR) { + memcpy(handler->tsync, ptr.via.str.ptr, ptr.via.str.size); + } + else { + TFE_LOG_DEBUG(g_default_logger, "%s: unexpected control packet: (invalid tsync type) %02x", LOG_TAG_CTRLPKT, ptr.type); + } + break; + case INDEX_SESSION_ID: + if (ptr.type == MSGPACK_OBJECT_STR) { + char session_id[64] = {0}; + memcpy(session_id, ptr.via.str.ptr, ptr.via.str.size); + handler->session_id = atoll(session_id); + } + else { + TFE_LOG_DEBUG(g_default_logger, "%s: unexpected control packet: (invalid session id type) %02x", LOG_TAG_CTRLPKT, ptr.type); + } + break; + case INDEX_STATE: + if (ptr.type == MSGPACK_OBJECT_STR) { + if (strncasecmp(ptr.via.str.ptr, "opening", ptr.via.str.size) == 0) + { + handler->state = SESSION_STATE_OPENING; + } + else if (strncasecmp(ptr.via.str.ptr, "active", ptr.via.str.size) == 0) + { + handler->state = SESSION_STATE_ACTIVE; + } + else if (strncasecmp(ptr.via.str.ptr, "closing", ptr.via.str.size) == 0) + { + handler->state = SESSION_STATE_CLOSING; + } + else if (strncasecmp(ptr.via.str.ptr, "resetall", ptr.via.str.size) == 0) + { + handler->state = SESSION_STATE_RESETALL; + } + else + { + TFE_LOG_DEBUG(g_default_logger, "%s: unexpected control packet: (invalid state value) %s", LOG_TAG_CTRLPKT, ptr.via.str.ptr); + } + } + else { + TFE_LOG_DEBUG(g_default_logger, "%s: unexpected control packet: (invalid state type) %02x", LOG_TAG_CTRLPKT, ptr.type); + } + break; + case INDEX_METHOD: + if (ptr.type == MSGPACK_OBJECT_STR) { + memcpy(handler->method, ptr.via.str.ptr, ptr.via.str.size); + } + else { + TFE_LOG_DEBUG(g_default_logger, "%s: unexpected control packet: (invalid method type) %02x", LOG_TAG_CTRLPKT, ptr.type); + } + break; + case INDEX_SCE: + if (ptr.type == MSGPACK_OBJECT_ARRAY) { + msgpack_object rule_id = ptr.via.array.ptr[0]; + handler->sce_policy_id_num = rule_id.via.array.size; + for (uint32_t j = 0; j < rule_id.via.array.size; j++) { + handler->sce_policy_ids[j] = rule_id.via.array.ptr[j].via.u64; + } + } + break; + case INDEX_SHAPER: + break; + case INDEX_PROXY: + if (ptr.type == MSGPACK_OBJECT_ARRAY) { + proxy_parse_messagepack(ptr, handler); + } + else { + TFE_LOG_DEBUG(g_default_logger, "%s: unexpected control packet: (invalid proxy type) %02x", LOG_TAG_CTRLPKT, ptr.type); + } + break; + default: + break; + } + } + return 0; +} diff --git a/common/src/tfe_packet_io.cpp b/common/src/tfe_packet_io.cpp new file mode 100644 index 0000000..e3fb77b --- /dev/null +++ b/common/src/tfe_packet_io.cpp @@ -0,0 +1,1553 @@ +#include +#include +#include +#include + +#include +#include +#include + +#include +// #include +#include +#include "tfe_acceptor_kni.h" +#include "tfe_ctrl_packet.h" +#include "tfe_raw_packet.h" +#include "io_uring.h" +#include "tfe_metrics.h" +#include "tfe_cmsg.h" +#include "tfe_tcp_restore.h" +#include "tfe_stream.h" +#include "raw_socket.h" +#include "packet_construct.h" +#include "tfe_tap_rss.h" +#include + +/* + * add: vxlan_hdr + * del: marsio_buff_ctrlzone_reset() + * +----+ NF2SF +----+ + * | |--------------------------->| | + * | | | | + * | |-------+ | |-------+ + * | NF | | NF2NF (undo) | SF | | SF2SF (del old vxlan_hdr; add new vxlan_hdr) + * | |<------+ | |<------+ + * | | | | + * | |<---------------------------| | + * | | SF2NF | | + * +---+ del: vxlan_hdr +----+ + * add: session_id + route_ctx + sid + */ + +/****************************************************************************** + * Struct + ******************************************************************************/ + +#define RX_BURST_MAX 128 + +#define TRAFFIC_IS_DECRYPTED (1 << 0) +#define SET_TRAFFIC_IS_DECRYPTED(field) (field || TRAFFIC_IS_DECRYPTED) +#define CLEAR_TRAFFIC_IS_DECRYPTED(field) (field && ~TRAFFIC_IS_DECRYPTED) + +struct config +{ + int bypass_all_traffic; + int rx_burst_max; + char app_symbol[256]; + char dev_nf_interface[256]; +}; + +struct device +{ + struct mr_vdev *mr_dev; + struct mr_sendpath *mr_path; +}; + +struct packet_io +{ + int thread_num; + struct mr_instance *instance; + struct device dev_nf_interface; + struct config config; +}; + +enum raw_pkt_action +{ + RAW_PKT_ERR_BYPASS, + RAW_PKT_HIT_BYPASS, + RAW_PKT_HIT_BLOCK, + RAW_PKT_HIT_STEERING, + RAW_PKT_HIT_MIRRORING, +}; + +enum inject_pkt_action +{ + INJT_PKT_ERR_DROP, + INJT_PKT_MIRR_RX_DROP, + INJT_PKT_HIT_BLOCK, + INJT_PKT_HIT_FWD2SF, // forward to service function + INJT_PKT_HIT_FWD2NF, // forward to network function +}; + +struct metadata +{ + uint64_t session_id; + + char *raw_data; + int raw_len; + + int dir_is_e2i; + int is_ctrl_pkt; + + uint16_t l7_offset; // only control packet set l7_offset + uint16_t user_field; // only raw packet set traffic_is_decrypted (1 << 0) + + struct sids sids; + struct route_ctx route_ctx; +}; + +/****************************************************************************** + * API Declaration + ******************************************************************************/ + +struct packet_io *packet_io_create(const char *profile, int thread_num, cpu_set_t *coremask); +void packet_io_destory(struct packet_io *handle); + +int packet_io_polling_nf_interface(struct packet_io *handle, int thread_seq, void *ctx); + +extern int tcp_policy_enforce(struct tcp_policy_enforcer *tcp_enforcer, struct tfe_cmsg *cmsg); +extern int tfe_proxy_fds_accept(struct tfe_proxy * ctx, int fd_downstream, int fd_upstream, int fd_fake_c, int fd_fake_s, struct tfe_cmsg * cmsg); + +extern void chaining_policy_enforce(struct chaining_policy_enforcer *enforcer, struct tfe_cmsg *cmsg, uint64_t rule_id); + +// return 0 : success +// return -1 : error +static int packet_io_config(const char *profile, struct config *config); + +// return 0 : success +// return -1 : error +static int packet_io_get_metadata(marsio_buff_t *tx_buff, struct metadata *meta); +// return 0 : success +// return -1 : error +static int packet_io_set_metadata(marsio_buff_t *tx_buff, struct metadata *meta); +static void packet_io_dump_metadata(marsio_buff_t *tx_buff, struct metadata *meta); + +// return 0 : success +// return -1 : error +static int handle_control_packet(struct packet_io *handle, marsio_buff_t *rx_buff, int thread_seq, void *ctx); + +static int handle_raw_packet_from_nf(struct packet_io *handle, marsio_buff_t *rx_buff, int thread_seq, void *ctx, int *action_bytes); + +static int add_ether_header(void *raw_data, char *src_mac, char *dst_mac){ + struct ethhdr *ether_hdr = (struct ethhdr*)raw_data; + memcpy(ether_hdr->h_dest, dst_mac, sizeof(ether_hdr->h_dest)); + memcpy(ether_hdr->h_source, src_mac, sizeof(ether_hdr->h_source)); + return 0; +} + +// return 0 : success +// return -1 : error +static int handle_session_opening(struct metadata *meta, struct ctrl_pkt_parser *parser, int thread_seq, void *ctx); +// return 0 : success +// return -1 : error +static int handle_session_closing(struct metadata *meta, struct ctrl_pkt_parser *parser, int thread_seq, void *ctx); +// return 0 : success +// return -1 : error +static int handle_session_active(struct metadata *meta, struct ctrl_pkt_parser *parser, int thread_seq, void *ctx); +// return 0 : success +// return -1 : error +static int handle_session_resetall(struct metadata *meta, struct ctrl_pkt_parser *parser, int thread_seq, void *ctx); + +static void session_value_free_cb(void *ctx); + +// return 0 : not keepalive packet +// return 1 : is keepalive packet +static int is_downstream_keepalive_packet(marsio_buff_t *rx_buff); + + +/****************************************************************************** + * API Definition + ******************************************************************************/ + +struct packet_io *packet_io_create(const char *profile, int thread_num, cpu_set_t *coremask) +{ + int opt = 1; + struct packet_io *handle = (struct packet_io *)calloc(1, sizeof(struct packet_io)); + assert(handle != NULL); + handle->thread_num = thread_num; + + if (packet_io_config(profile, &(handle->config)) != 0) + { + goto error_out; + } + + handle->instance = marsio_create(); + if (handle->instance == NULL) + { + TFE_LOG_ERROR(g_default_logger, "%s: unable to create marsio instance", LOG_TAG_PKTIO); + goto error_out; + } + + if (marsio_option_set(handle->instance, MARSIO_OPT_THREAD_MASK_IN_CPUSET, coremask, sizeof(cpu_set_t)) != 0) + { + TFE_LOG_ERROR(g_default_logger, "%s: unable to set MARSIO_OPT_EXIT_WHEN_ERR option for marsio instance", LOG_TAG_PKTIO); + goto error_out; + } + + if (marsio_option_set(handle->instance, MARSIO_OPT_EXIT_WHEN_ERR, &opt, sizeof(opt)) != 0) + { + TFE_LOG_ERROR(g_default_logger, "%s: unable to set MARSIO_OPT_EXIT_WHEN_ERR option for marsio instance", LOG_TAG_PKTIO); + goto error_out; + } + + if (marsio_init(handle->instance, handle->config.app_symbol) != 0) + { + TFE_LOG_ERROR(g_default_logger, "%s: unable to initialize marsio instance", LOG_TAG_PKTIO); + goto error_out; + } + + // Netwrok Function Interface + handle->dev_nf_interface.mr_dev = marsio_open_device(handle->instance, handle->config.dev_nf_interface, handle->thread_num, handle->thread_num); + if (handle->dev_nf_interface.mr_dev == NULL) + { + TFE_LOG_ERROR(g_default_logger, "%s: unable to open device %s", LOG_TAG_PKTIO, handle->config.dev_nf_interface); + goto error_out; + } + + handle->dev_nf_interface.mr_path = marsio_sendpath_create_by_vdev(handle->dev_nf_interface.mr_dev); + if (handle->dev_nf_interface.mr_path == NULL) + { + TFE_LOG_ERROR(g_default_logger, "%s: unable to create sendpath for device %s", LOG_TAG_PKTIO, handle->config.dev_nf_interface); + goto error_out; + } + + return handle; + +error_out: + packet_io_destory(handle); + return NULL; +} + +void packet_io_destory(struct packet_io *handle) +{ + if (handle) + { + if (handle->dev_nf_interface.mr_path) + { + marsio_sendpath_destory(handle->dev_nf_interface.mr_path); + handle->dev_nf_interface.mr_path = NULL; + } + + if (handle->dev_nf_interface.mr_dev) + { + marsio_close_device(handle->dev_nf_interface.mr_dev); + handle->dev_nf_interface.mr_dev = NULL; + } + + if (handle->instance) + { + marsio_destory(handle->instance); + handle->instance = NULL; + } + + free(handle); + handle = NULL; + } +} + +int packet_io_thread_init(struct packet_io *handle, struct acceptor_thread_ctx *thread_ctx) +{ + if (marsio_thread_init(handle->instance) != 0) + { + TFE_LOG_ERROR(g_default_logger, "%s: unable to init marsio thread %d", LOG_TAG_PKTIO, thread_ctx->thread_index); + return -1; + } + + return 0; +} + +void packet_io_thread_wait(struct packet_io *handle, struct acceptor_thread_ctx *thread_ctx, int timeout_ms) +{ + struct mr_vdev *vdevs[] = {handle->dev_nf_interface.mr_dev}; + + marsio_poll_wait(handle->instance, vdevs, 1, thread_ctx->thread_index, timeout_ms); +} + +// return n_packet_recv +int packet_io_polling_nf_interface(struct packet_io *handle, int thread_seq, void *ctx) +{ + struct acceptor_thread_ctx *thread = (struct acceptor_thread_ctx *)ctx; + struct global_metrics *g_metrics = thread->ref_metrics; + + marsio_buff_t *rx_buffs[RX_BURST_MAX]; + + // nr_recv <= rx_burst_max <= RX_BURST_MAX + int nr_recv = marsio_recv_burst(handle->dev_nf_interface.mr_dev, thread_seq, rx_buffs, handle->config.rx_burst_max); + if (nr_recv <= 0) + { + return 0; + } + + for (int j = 0; j < nr_recv; j++) + { + marsio_buff_t *rx_buff = rx_buffs[j]; + int raw_len = marsio_buff_datalen(rx_buff); + + if (is_downstream_keepalive_packet(rx_buff)) + { + marsio_send_burst(handle->dev_nf_interface.mr_path, thread_seq, &rx_buff, 1); + continue; + } + + if (marsio_buff_is_ctrlbuf(rx_buff)) + { + handle_control_packet(handle, rx_buff, thread_seq, ctx); + throughput_metrics_inc(&g_metrics->ctrl_pkt_rx, 1, raw_len); + // all control packet need bypass + marsio_send_burst(handle->dev_nf_interface.mr_path, thread_seq, &rx_buff, 1); + } + else + { + int action_bytes = 0; + handle_raw_packet_from_nf(handle, rx_buff, thread_seq, ctx, &action_bytes); + } + } + + return nr_recv; +} + +void handle_decryption_packet_from_tap(const char *data, int len, void *args) +{ + struct acceptor_thread_ctx *thread = (struct acceptor_thread_ctx *)args; + struct acceptor_ctx *acceptor_ctx = thread->ref_acceptor_ctx; + struct packet_io *packet_io = thread->ref_io; + + struct addr_tuple4 inner_addr; + struct raw_pkt_parser raw_parser; + memset(&inner_addr, 0, sizeof(struct addr_tuple4)); + raw_packet_parser_init(&raw_parser, 0, LAYER_TYPE_ALL, 8); + raw_packet_parser_parse(&raw_parser, (const void *)data, len); + raw_packet_parser_get_most_inner_tuple4(&raw_parser, &inner_addr); + + struct session_node *node = session_table_search_by_addr(thread->session_table, &inner_addr); + if (node == NULL) + { + char *addr_string = addr_tuple4_to_str(&inner_addr); + TFE_LOG_ERROR(g_default_logger, "%s: unexpected inject packet, unable to find session %s from session table, drop !!!", LOG_TAG_PKTIO, addr_string); + free(addr_string); + return; + } + struct session_ctx *s_ctx = (struct session_ctx *)node->val_data; + + marsio_buff_t *tx_buffs[1]; + int alloc_ret = marsio_buff_malloc_device(packet_io->dev_nf_interface.mr_dev, tx_buffs, 1, 0, thread->thread_index); + if (alloc_ret < 0){ + TFE_LOG_ERROR(g_default_logger, "Failed at alloc marsio buffer, ret = %d, thread_seq = %d", + alloc_ret, thread->thread_index); + return; + } + + char *dst = marsio_buff_append(tx_buffs[0], len); + memcpy(dst, data, len); + + struct metadata meta = {0}; + meta.session_id = s_ctx->session_id; + meta.raw_data = dst; + meta.raw_len = len; + meta.user_field = SET_TRAFFIC_IS_DECRYPTED(0); + meta.is_ctrl_pkt = 0; + meta.l7_offset = 0; + meta.sids.num = 1; + meta.sids.elems[0] = acceptor_ctx->sce_sids; + + if (memcmp(&inner_addr, &s_ctx->first_ctrl_pkt.tuple4, sizeof(struct addr_tuple4)) == 0) + meta.dir_is_e2i = s_ctx->first_ctrl_pkt.dir_is_e2i; + else + meta.dir_is_e2i = !s_ctx->first_ctrl_pkt.dir_is_e2i; + + if (meta.dir_is_e2i) + { + route_ctx_copy(&meta.route_ctx, &s_ctx->raw_pkt_e2i_route_ctx); + } + else + { + route_ctx_copy(&meta.route_ctx, &s_ctx->raw_pkt_i2e_route_ctx); + } + packet_io_set_metadata(tx_buffs[0], &meta); + marsio_send_burst(packet_io->dev_nf_interface.mr_path, thread->thread_index, tx_buffs, 1); +} + +void handle_raw_packet_from_tap(const char *data, int len, void *args) +{ + struct acceptor_thread_ctx *thread = (struct acceptor_thread_ctx *)args; + struct acceptor_ctx *acceptor_ctx = thread->ref_acceptor_ctx; + struct packet_io *packet_io = thread->ref_io; + + struct addr_tuple4 inner_addr; + struct raw_pkt_parser raw_parser; + memset(&inner_addr, 0, sizeof(struct addr_tuple4)); + raw_packet_parser_init(&raw_parser, 0, LAYER_TYPE_ALL, 8); + raw_packet_parser_parse(&raw_parser, (const void *)data, len); + raw_packet_parser_get_most_inner_tuple4(&raw_parser, &inner_addr); + + struct session_node *node = session_table_search_by_addr(thread->session_table, &inner_addr); + if (node == NULL) + { + char *addr_string = addr_tuple4_to_str(&inner_addr); + TFE_LOG_ERROR(g_default_logger, "%s: unexpected inject packet, unable to find session %s from session table, drop !!!", LOG_TAG_PKTIO, addr_string); + free(addr_string); + return; + } + struct session_ctx *s_ctx = (struct session_ctx *)node->val_data; + + marsio_buff_t *tx_buffs[1]; + int alloc_ret = marsio_buff_malloc_device(packet_io->dev_nf_interface.mr_dev, tx_buffs, 1, 0, thread->thread_index); + if (alloc_ret < 0){ + TFE_LOG_ERROR(g_default_logger, "Failed at alloc marsio buffer, ret = %d, thread_seq = %d", + alloc_ret, thread->thread_index); + return; + } + + char *dst = marsio_buff_append(tx_buffs[0], len + s_ctx->first_ctrl_pkt.header_len); + memcpy(dst, s_ctx->first_ctrl_pkt.header_data, s_ctx->first_ctrl_pkt.header_len); + memcpy(dst + s_ctx->first_ctrl_pkt.header_len, data, len); + + struct metadata meta = {0}; + meta.session_id = s_ctx->session_id; + meta.raw_data = dst; + meta.raw_len = len; + meta.user_field = s_ctx->user_field; + meta.is_ctrl_pkt = 0; + meta.l7_offset = 0; + + char *src_mac = NULL; + char *dst_mac = NULL; + if (memcmp(&inner_addr, &s_ctx->first_ctrl_pkt.tuple4, sizeof(struct addr_tuple4)) == 0) { + meta.dir_is_e2i = s_ctx->first_ctrl_pkt.dir_is_e2i; + struct ethhdr *ether_hdr = (struct ethhdr *)(s_ctx->first_ctrl_pkt.header_data); + src_mac = (char *)ether_hdr->h_source; + dst_mac = (char *)ether_hdr->h_dest; + } + else { + meta.dir_is_e2i = !s_ctx->first_ctrl_pkt.dir_is_e2i; + struct ethhdr *ether_hdr = (struct ethhdr *)(s_ctx->first_ctrl_pkt.header_data); + dst_mac = (char *)ether_hdr->h_source; + src_mac = (char *)ether_hdr->h_dest; + } + + if (meta.dir_is_e2i) + { + sids_copy(&meta.sids, &s_ctx->raw_pkt_e2i_sids); + route_ctx_copy(&meta.route_ctx, &s_ctx->raw_pkt_e2i_route_ctx); + } + else + { + sids_copy(&meta.sids, &s_ctx->raw_pkt_i2e_sids); + route_ctx_copy(&meta.route_ctx, &s_ctx->raw_pkt_i2e_route_ctx); + } + packet_io_set_metadata(tx_buffs[0], &meta); + // add_ether_header(dst, src_mac, dst_mac); + marsio_send_burst(packet_io->dev_nf_interface.mr_path, thread->thread_index, tx_buffs, 1); +} + +// return 0 : success +// return -1 : error +static int packet_io_config(const char *profile, struct config *config) +{ + MESA_load_profile_int_def(profile, "PACKET_IO", "rx_burst_max", (int *)&(config->rx_burst_max), 1); + MESA_load_profile_string_nodef(profile, "PACKET_IO", "app_symbol", config->app_symbol, sizeof(config->app_symbol)); + MESA_load_profile_string_nodef(profile, "PACKET_IO", "dev_nf_interface", config->dev_nf_interface, sizeof(config->dev_nf_interface)); + + if (config->rx_burst_max > RX_BURST_MAX) + { + TFE_LOG_ERROR(g_default_logger, "%s: invalid rx_burst_max, exceeds limit %d", LOG_TAG_PKTIO, RX_BURST_MAX); + return -1; + } + + if (strlen(config->app_symbol) == 0) + { + TFE_LOG_ERROR(g_default_logger, "%s: invalid app_symbol in %s", LOG_TAG_PKTIO, profile); + return -1; + } + + if (strlen(config->dev_nf_interface) == 0) + { + TFE_LOG_ERROR(g_default_logger, "%s: invalid dev_nf_interface in %s", LOG_TAG_PKTIO, profile); + return -1; + } + + TFE_LOG_DEBUG(g_default_logger, "%s: PACKET_IO->bypass_all_traffic : %d", LOG_TAG_PKTIO, config->bypass_all_traffic); + TFE_LOG_DEBUG(g_default_logger, "%s: PACKET_IO->rx_burst_max : %d", LOG_TAG_PKTIO, config->rx_burst_max); + TFE_LOG_DEBUG(g_default_logger, "%s: PACKET_IO->app_symbol : %s", LOG_TAG_PKTIO, config->app_symbol); + TFE_LOG_DEBUG(g_default_logger, "%s: PACKET_IO->dev_nf_interface : %s", LOG_TAG_PKTIO, config->dev_nf_interface); + + return 0; +} + +// return 0 : success +// return -1 : error +static int packet_io_get_metadata(marsio_buff_t *rx_buff, struct metadata *meta) +{ + memset(meta, 0, sizeof(struct metadata)); + + if (marsio_buff_get_metadata(rx_buff, MR_BUFF_SESSION_ID, &(meta->session_id), sizeof(meta->session_id)) <= 0) + { + TFE_LOG_ERROR(g_default_logger, "%s: unable to get session_id from metadata", LOG_TAG_PKTIO); + return -1; + } + + meta->raw_len = marsio_buff_datalen(rx_buff); + meta->raw_data = marsio_buff_mtod(rx_buff); + if (meta->raw_data == NULL || meta->raw_len == 0) + { + TFE_LOG_ERROR(g_default_logger, "%s: unable to get raw_data from metadata", LOG_TAG_PKTIO); + return -1; + } + + // 1: E2I + // 0: I2E + if (marsio_buff_get_metadata(rx_buff, MR_BUFF_DIR, &(meta->dir_is_e2i), sizeof(meta->dir_is_e2i)) <= 0) + { + TFE_LOG_ERROR(g_default_logger, "%s: unable to get buff_dir from metadata", LOG_TAG_PKTIO); + return -1; + } + + if (marsio_buff_is_ctrlbuf(rx_buff)) + { + meta->is_ctrl_pkt = 1; + // only control packet set MR_BUFF_PAYLOAD_OFFSET + if (marsio_buff_get_metadata(rx_buff, MR_BUFF_PAYLOAD_OFFSET, &(meta->l7_offset), sizeof(meta->l7_offset)) <= 0) + { + TFE_LOG_ERROR(g_default_logger, "%s: unable to get l7_offset from metadata", LOG_TAG_PKTIO); + return -1; + } + } + else + { + meta->is_ctrl_pkt = 0; + // if (marsio_buff_get_metadata(rx_buff, MR_BUFF_USER_0, &(meta->user_field), sizeof(meta->user_field)) <= 0) + // { + // TFE_LOG_ERROR(g_default_logger, "%s: unable to get user_field from metadata", LOG_TAG_PKTIO); + // return -1; + // } + } + + meta->route_ctx.len = marsio_buff_get_metadata(rx_buff, MR_BUFF_ROUTE_CTX, meta->route_ctx.data, sizeof(meta->route_ctx.data)); + if (meta->route_ctx.len <= 0) + { + TFE_LOG_ERROR(g_default_logger, "%s: unable to get route_ctx from metadata", LOG_TAG_PKTIO); + return -1; + } + + meta->sids.num = marsio_buff_get_sid_list(rx_buff, meta->sids.elems, sizeof(meta->sids.elems) / sizeof(meta->sids.elems[0])); + if (meta->sids.num < 0) + { + TFE_LOG_ERROR(g_default_logger, "%s: unable to get sid_list from metadata", LOG_TAG_PKTIO); + return -1; + } + + return 0; +} + +// return 0 : success +// return -1 : error +static int packet_io_set_metadata(marsio_buff_t *tx_buff, struct metadata *meta) +{ + if (meta->session_id) + { + if (marsio_buff_set_metadata(tx_buff, MR_BUFF_SESSION_ID, &(meta->session_id), sizeof(meta->session_id)) != 0) + { + TFE_LOG_ERROR(g_default_logger, "%s: unable to set session_id for metadata", LOG_TAG_PKTIO); + return -1; + } + } + + // 1: E2I + // 0: I2E +#if 0 + // use MR_BUFF_ROUTE_CTX instead + if (marsio_buff_set_metadata(tx_buff, MR_BUFF_DIR, &(meta->dir_is_e2i), sizeof(meta->dir_is_e2i)) != 0) + { + TFE_LOG_ERROR(g_default_logger, "%s: unable to set buff_dir for metadata", LOG_TAG_PKTIO); + return -1; + } +#endif + + if (meta->is_ctrl_pkt) + { + if (marsio_buff_set_metadata(tx_buff, MR_BUFF_PAYLOAD_OFFSET, &(meta->l7_offset), sizeof(meta->l7_offset)) != 0) + { + TFE_LOG_ERROR(g_default_logger, "%s: unable to set l7_offset for metadata", LOG_TAG_PKTIO); + return -1; + } + } + else + { + // if (marsio_buff_set_metadata(tx_buff, MR_BUFF_USER_0, &(meta->user_field), sizeof(meta->user_field)) != 0) + // { + // TFE_LOG_ERROR(g_default_logger, "%s: unable to set user_field for metadata", LOG_TAG_PKTIO); + // return -1; + // } + } + + if (meta->route_ctx.len > 0) + { + if (marsio_buff_set_metadata(tx_buff, MR_BUFF_ROUTE_CTX, meta->route_ctx.data, meta->route_ctx.len) != 0) + { + TFE_LOG_ERROR(g_default_logger, "%s: unable to set route_ctx for metadata", LOG_TAG_PKTIO); + return -1; + } + } + + if (meta->sids.num > 0) + { + if (marsio_buff_set_sid_list(tx_buff, meta->sids.elems, meta->sids.num) != 0) + { + TFE_LOG_ERROR(g_default_logger, "%s: unable to set sid_list for metadata", LOG_TAG_PKTIO); + return -1; + } + } + + return 0; +} + +static void packet_io_dump_metadata(marsio_buff_t *tx_buff, struct metadata *meta) +{ + TFE_LOG_DEBUG(g_default_logger, "%s: META={session_id: %lu, raw_len: %d, dir_is_e2i: %d, is_ctrl_pkt: %d, l7_offset: %d, user_field: %u, sids_num: %d}", LOG_TAG_PKTIO, meta->session_id, meta->raw_len, meta->dir_is_e2i, meta->is_ctrl_pkt, meta->l7_offset, meta->user_field, meta->sids.num); +} + +static int tcp_restore_set_from_cmsg(struct tfe_cmsg *cmsg, struct tcp_restore_info *restore_info) +{ + int ret = 0; + uint16_t length = 0; + + uint32_t seq; + ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_TCP_RESTORE_SEQ, (unsigned char *)&seq, sizeof(uint32_t), &length); + if (ret == 0) + { + restore_info->client.seq = ntohl(seq); + restore_info->server.ack = ntohl(seq); + } + + uint32_t ack; + ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_TCP_RESTORE_ACK, (unsigned char *)&ack, sizeof(uint32_t), &length); + if (ret == 0) + { + restore_info->client.ack = ntohl(ack); + restore_info->server.seq = ntohl(ack); + } + + uint8_t ts_client; + ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_TCP_RESTORE_TS_CLIENT, (unsigned char *)&ts_client, sizeof(uint8_t), &length); + if (ret == 0) + { + restore_info->client.timestamp_perm = !!ts_client; + } + + uint8_t ts_server; + ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_TCP_RESTORE_TS_SERVER, (unsigned char *)&ts_server, sizeof(uint8_t), &length); + if (ret == 0) + { + restore_info->server.timestamp_perm = !!ts_server; + } + + uint32_t ts_client_val; + ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_TCP_RESTORE_TS_CLIENT_VAL, (unsigned char *)&ts_client_val, sizeof(uint32_t), &length); + if (ret == 0) + { + restore_info->client.ts_val = ntohl(ts_client_val); + } + + uint32_t ts_server_val; + ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_TCP_RESTORE_TS_SERVER_VAL, (unsigned char *)&ts_server_val, sizeof(uint32_t), &length); + if (ret == 0) + { + restore_info->server.ts_val = ntohl(ts_server_val); + } + + uint8_t wsacle_client; + ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_TCP_RESTORE_WSACLE_CLIENT, (unsigned char *)&wsacle_client, sizeof(uint8_t), &length); + if (ret == 0) + { + restore_info->client.wscale_perm = true; + restore_info->client.wscale = wsacle_client; + } + + uint8_t wsacle_server; + ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_TCP_RESTORE_WSACLE_SERVER, (unsigned char *)&wsacle_server, sizeof(uint8_t), &length); + if (ret == 0) + { + restore_info->client.wscale_perm = true; + restore_info->client.wscale = wsacle_server; + } + + uint8_t sack_client; + ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_TCP_RESTORE_SACK_CLIENT, (unsigned char *)&sack_client, sizeof(uint8_t), &length); + if (ret == 0) + { + restore_info->client.sack_perm = !!sack_client; + } + + uint8_t sack_server; + ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_TCP_RESTORE_SACK_SERVER, (unsigned char *)&sack_server, sizeof(uint8_t), &length); + if (ret == 0) + { + restore_info->server.sack_perm = !!sack_server; + } + + uint16_t mss_client; + ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_TCP_RESTORE_MSS_CLIENT, (unsigned char *)&mss_client, sizeof(uint16_t), &length); + if (ret == 0) + { + restore_info->client.mss = mss_client; + } + + uint16_t mss_server; + ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_TCP_RESTORE_MSS_SERVER, (unsigned char *)&mss_server, sizeof(uint16_t), &length); + if (ret == 0) + { + restore_info->server.mss = mss_server; + } + + uint16_t window_client; + ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_TCP_RESTORE_WINDOW_CLIENT, (unsigned char *)&window_client, sizeof(uint16_t), &length); + if (ret == 0) + { + restore_info->client.window = window_client; + } + + uint16_t window_server; + ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_TCP_RESTORE_WINDOW_SERVER, (unsigned char *)&window_server, sizeof(uint16_t), &length); + if (ret == 0) + { + restore_info->server.window = window_server; + } + + uint8_t packet_cur_dir; + ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_TCP_RESTORE_INFO_PACKET_CUR_DIR, (unsigned char *)&packet_cur_dir, sizeof(uint8_t), &length); + if (ret == 0) + { + restore_info->cur_dir = (enum tcp_restore_pkt_dir)packet_cur_dir; + } + + return 0; +} + +static int tcp_restore_set_from_pkg(struct addr_tuple4 *tuple4, struct tcp_restore_info *restore_info) +{ + if (tuple4->addr_type == ADDR_TUPLE4_TYPE_V4) + { + struct sockaddr_in *in_addr_client; + struct sockaddr_in *in_addr_server; + + if (restore_info->cur_dir == PKT_DIR_NOT_SET || restore_info->cur_dir == PKT_DIR_C2S) + { + in_addr_client = (struct sockaddr_in *)&restore_info->client.addr; + in_addr_server = (struct sockaddr_in *)&restore_info->server.addr; + } + else + { + in_addr_client = (struct sockaddr_in *)&restore_info->server.addr; + in_addr_server = (struct sockaddr_in *)&restore_info->client.addr; + } + + in_addr_client->sin_family = AF_INET; + in_addr_client->sin_addr = tuple4->addr_v4.src_addr; + in_addr_client->sin_port = tuple4->src_port; + + in_addr_server->sin_family = AF_INET; + in_addr_server->sin_addr = tuple4->addr_v4.dst_addr; + in_addr_server->sin_port = tuple4->dst_port; + } + + if (tuple4->addr_type == ADDR_TUPLE4_TYPE_V6) + { + struct sockaddr_in6 *in6_addr_client; + struct sockaddr_in6 *in6_addr_server; + + if (restore_info->cur_dir == PKT_DIR_NOT_SET || restore_info->cur_dir == PKT_DIR_C2S) + { + in6_addr_client = (struct sockaddr_in6 *)&restore_info->client.addr; + in6_addr_server = (struct sockaddr_in6 *)&restore_info->server.addr; + } + else + { + in6_addr_client = (struct sockaddr_in6 *)&restore_info->server.addr; + in6_addr_server = (struct sockaddr_in6 *)&restore_info->client.addr; + } + + in6_addr_client->sin6_family = AF_INET6; + in6_addr_client->sin6_addr = tuple4->addr_v6.src_addr; + in6_addr_client->sin6_port = tuple4->src_port; + + in6_addr_server->sin6_family = AF_INET6; + in6_addr_server->sin6_addr = tuple4->addr_v6.dst_addr; + in6_addr_server->sin6_port = tuple4->dst_port; + } + + return 0; +} + +// return 0 : success +// return -1 : error +static int handle_control_packet(struct packet_io *handle, marsio_buff_t *rx_buff, int thread_seq, void *ctx) +{ + struct acceptor_thread_ctx *thread = (struct acceptor_thread_ctx *)ctx; + struct acceptor_ctx *acceptor_ctx = thread->ref_acceptor_ctx; + struct global_metrics *g_metrics = thread->ref_metrics; + + struct metadata meta; + if (packet_io_get_metadata(rx_buff, &meta) == -1) + { + TFE_LOG_ERROR(g_default_logger, "%s: unexpected control packet, unable to get metadata", LOG_TAG_PKTIO); + packet_io_dump_metadata(rx_buff, &meta); + return -1; + } + packet_io_dump_metadata(rx_buff, &meta); + + struct ctrl_pkt_parser ctrl_parser; + ctrl_packet_parser_init(&ctrl_parser); + if (ctrl_packet_parser_parse(&ctrl_parser, meta.raw_data + meta.l7_offset, meta.raw_len - meta.l7_offset) == -1) + { + // TFE_LOG_ERROR(g_default_logger, "%s: unexpected control packet, unable to parse data", LOG_TAG_PKTIO); + return -1; + } + ctrl_packet_parser_dump(&ctrl_parser); + + if (ctrl_parser.session_id != meta.session_id) + { + TFE_LOG_ERROR(g_default_logger, "%s: unexpected control packet, metadata's session %lu != control packet's session %lu", LOG_TAG_PKTIO, meta.session_id, ctrl_parser.session_id); + return -1; + } + + switch (ctrl_parser.state) + { + case SESSION_STATE_OPENING: + __atomic_fetch_add(&g_metrics->ctrl_pkt_opening_num, 1, __ATOMIC_RELAXED); + // when session opening, firewall not send policy id + // return handle_session_opening(&meta, &ctrl_parser, thread_seq, ctx); + break; + case SESSION_STATE_CLOSING: + __atomic_fetch_add(&g_metrics->ctrl_pkt_closing_num, 1, __ATOMIC_RELAXED); + return handle_session_closing(&meta, &ctrl_parser, thread_seq, ctx); + case SESSION_STATE_ACTIVE: + __atomic_fetch_add(&g_metrics->ctrl_pkt_active_num, 1, __ATOMIC_RELAXED); + return handle_session_active(&meta, &ctrl_parser, thread_seq, ctx); + case SESSION_STATE_RESETALL: + __atomic_fetch_add(&g_metrics->ctrl_pkt_resetall_num, 1, __ATOMIC_RELAXED); + return handle_session_resetall(&meta, &ctrl_parser, thread_seq, ctx); + default: + __atomic_fetch_add(&g_metrics->ctrl_pkt_error_num, 1, __ATOMIC_RELAXED); + break; + } + + return 0; +} + +static int handle_raw_packet_from_nf(struct packet_io *handle, marsio_buff_t *rx_buff, int thread_seq, void *ctx, int *action_bytes) +{ + int nsend = 0; + struct acceptor_thread_ctx *thread = (struct acceptor_thread_ctx *)ctx; + struct acceptor_ctx *acceptor_ctx = thread->ref_acceptor_ctx; + struct addr_tuple4 inner_addr; + memset(&inner_addr, 0, sizeof(struct addr_tuple4)); + + int raw_len = marsio_buff_datalen(rx_buff); + char *raw_data = marsio_buff_mtod(rx_buff); + *action_bytes = 0; + + struct metadata meta; + if (packet_io_get_metadata(rx_buff, &meta) == -1) + { + TFE_LOG_ERROR(g_default_logger, "%s: unexpected raw packet, unable to get metadata, bypass !!!", LOG_TAG_PKTIO); + packet_io_dump_metadata(rx_buff, &meta); + marsio_send_burst(handle->dev_nf_interface.mr_path, thread_seq, &rx_buff, 1); + *action_bytes = raw_len; + return RAW_PKT_ERR_BYPASS; + } + + struct session_node *node = session_table_search_by_id(thread->session_table, meta.session_id); + if (node == NULL) + { + TFE_LOG_ERROR(g_default_logger, "%s: unexpected raw packet, unable to find session %lu from session table, bypass !!!", LOG_TAG_PKTIO, meta.session_id); + marsio_send_burst(handle->dev_nf_interface.mr_path, thread_seq, &rx_buff, 1); + *action_bytes = raw_len; + return RAW_PKT_ERR_BYPASS; + } + + struct session_ctx *s_ctx = (struct session_ctx *)node->val_data; + + // update sids + if (meta.dir_is_e2i) + { + sids_write_once(&(s_ctx->raw_pkt_e2i_sids), &(meta.sids)); + if (route_ctx_is_empty(&s_ctx->raw_pkt_e2i_route_ctx)) + { + route_ctx_copy(&s_ctx->raw_pkt_e2i_route_ctx, &meta.route_ctx); + } + } + else + { + sids_write_once(&(s_ctx->raw_pkt_i2e_sids), &(meta.sids)); + if (route_ctx_is_empty(&s_ctx->raw_pkt_i2e_route_ctx)) + { + route_ctx_copy(&s_ctx->raw_pkt_i2e_route_ctx, &meta.route_ctx); + } + } + + struct raw_pkt_parser raw_parser; + raw_packet_parser_init(&raw_parser, meta.session_id, LAYER_TYPE_ALL, 8); + const void *payload = raw_packet_parser_parse(&raw_parser, (const void *)meta.raw_data, meta.raw_len); + + if (meta.user_field && TRAFFIC_IS_DECRYPTED) + { + // c2s + if (memcmp(&inner_addr, &s_ctx->first_ctrl_pkt.tuple4, sizeof(struct addr_tuple4)) == 0) { + add_ether_header(raw_data, acceptor_ctx->config->src_mac, acceptor_ctx->config->tap_s_mac); + if (acceptor_ctx->config->enable_iouring) { + io_uring_submit_write_entry(thread->tap_ctx->io_uring_s, raw_data, raw_len); + } + else { + tfe_tap_write_per_thread(thread->tap_ctx->tap_s, raw_data, raw_len, g_default_logger); + } + } + // s2c + else { + add_ether_header(raw_data, acceptor_ctx->config->src_mac, acceptor_ctx->config->tap_c_mac); + if (acceptor_ctx->config->enable_iouring) { + io_uring_submit_write_entry(thread->tap_ctx->io_uring_c, raw_data, raw_len); + } + else { + tfe_tap_write_per_thread(thread->tap_ctx->tap_c, raw_data, raw_len, g_default_logger); + } + } + } + else + { + add_ether_header(raw_data, acceptor_ctx->config->src_mac, acceptor_ctx->config->tap_mac); + // tap0 + if (acceptor_ctx->config->enable_iouring) { + io_uring_submit_write_entry(thread->tap_ctx->io_uring_fd, raw_data, raw_len); + } + else { + tfe_tap_write_per_thread(thread->tap_ctx->tap_fd, raw_data, raw_len, g_default_logger); + } + } +} + +struct tcp_option_mss { + uint8_t kind; + uint8_t length; + uint16_t mss_value; +} __attribute__((__packed__)); + +struct tcp_option_window_scale { + uint8_t kind; + uint8_t length; + uint8_t shift_count; +} __attribute__((__packed__)); + +struct tcp_option_sack { + uint8_t kind; + uint8_t length; +} __attribute__((__packed__)); + +struct tcp_option_time_stamp { + uint8_t kind; + uint8_t length; + uint32_t tsval; + uint32_t tsecr; +} __attribute__((__packed__)); + +static int fake_tcp_handshake(struct tfe_proxy *proxy, struct tcp_restore_info *restore_info) +{ + char buffer[1500] = {0}; + int length = 0; + + char tcp_option_buffer_c[40] = {0}; + char tcp_option_buffer_s[40] = {0}; + char tcp_option_buffer_c2[40] = {0}; + int tcp_option_length_c = 0; + int tcp_option_length_s = 0; + int tcp_option_length_c2 = 0; + + const struct tcp_restore_endpoint *client = &restore_info->client; + const struct tcp_restore_endpoint *server = &restore_info->server; + struct raw_socket *raw_socket_c = raw_socket_create(proxy->traffic_steering_options.device_client, proxy->traffic_steering_options.so_mask_client); + struct raw_socket *raw_socket_s = raw_socket_create(proxy->traffic_steering_options.device_server, proxy->traffic_steering_options.so_mask_server); + if (raw_socket_c == NULL || raw_socket_s == NULL) + { + raw_socket_destory(raw_socket_c); + raw_socket_destory(raw_socket_s); + + return -1; + } + + uint32_t c_seq = client->seq - 1; + uint32_t s_seq = server->seq - 1; + + /* + * Maximum segment size: Kind: 2, Length: 4 + * +---------+---------+---------+ + * | Kind=2 |Length=4 |mss.value| + * +---------+---------+---------+ + * 1 1 2 + */ + if (client->mss && server->mss) + { + struct tcp_option_mss *option_c = (struct tcp_option_mss *)(tcp_option_buffer_c + tcp_option_length_c); + option_c->kind = 2; + option_c->length = 4; + option_c->mss_value = htons(client->mss); + tcp_option_length_c += sizeof(struct tcp_option_mss); + + struct tcp_option_mss *option_s = (struct tcp_option_mss *)(tcp_option_buffer_s + tcp_option_length_s); + option_s->kind = 2; + option_s->length = 4; + option_s->mss_value = htons(server->mss); + tcp_option_length_s += sizeof(struct tcp_option_mss); + } + + /* + * Window Scale option: Kind: 3, Length: 3 + * +---------+---------+---------+ + * | Kind=3 |Length=3 |shift.cnt| + * +---------+---------+---------+ + * 1 1 1 + */ + if (client->wscale_perm && server->wscale_perm) + { + // padding + memset(tcp_option_buffer_c + tcp_option_length_c, 1, 1); + tcp_option_length_c += 1; + memset(tcp_option_buffer_s + tcp_option_length_s, 1, 1); + tcp_option_length_s += 1; + + struct tcp_option_window_scale *option_c = (struct tcp_option_window_scale *)(tcp_option_buffer_c + tcp_option_length_c); + option_c->kind = 3; + option_c->length = 3; + option_c->shift_count = client->wscale; + tcp_option_length_c += sizeof(struct tcp_option_window_scale); + + struct tcp_option_window_scale *option_s = (struct tcp_option_window_scale *)(tcp_option_buffer_s + tcp_option_length_s); + option_s->kind = 3; + option_s->length = 3; + option_s->shift_count = server->wscale; + tcp_option_length_s += sizeof(struct tcp_option_window_scale); + } + + /* + * SACK option: Kind: 4, Length: 2 + * +---------+---------+ + * | Kind=4 |Length=2 | + * +---------+---------+ + * 1 1 + */ + if (client->sack_perm && server->sack_perm) + { + // padding + memset(tcp_option_buffer_c + tcp_option_length_c, 1, 2); + tcp_option_length_c += 2; + memset(tcp_option_buffer_s + tcp_option_length_s, 1, 2); + tcp_option_length_s += 2; + + struct tcp_option_sack *option_c = (struct tcp_option_sack *)(tcp_option_buffer_c + tcp_option_length_c); + option_c->kind = 4; + option_c->length = 2; + tcp_option_length_c += sizeof(struct tcp_option_sack); + + struct tcp_option_sack *option_s = (struct tcp_option_sack *)(tcp_option_buffer_s + tcp_option_length_s); + option_s->kind = 4; + option_s->length = 2; + tcp_option_length_s += sizeof(struct tcp_option_sack); + } + + /* + * Time Stamp option: Kind: 8, Length: 10 + * +---------+---------+-----+-----+ + * | Kind=8 |Length=10|tsval|tsecr| + * +---------+---------+-----+-----+ + * 1 1 4 4 + */ + if (client->timestamp_perm && server->timestamp_perm) + { + // padding + memset(tcp_option_buffer_c + tcp_option_length_c, 1, 2); + tcp_option_length_c += 2; + memset(tcp_option_buffer_s + tcp_option_length_s, 1, 2); + tcp_option_length_s += 2; + memset(tcp_option_buffer_c2 + tcp_option_length_c2, 1, 2); + tcp_option_length_c2 += 2; + + struct tcp_option_time_stamp *option_c = (struct tcp_option_time_stamp *)(tcp_option_buffer_c + tcp_option_length_c); + option_c->kind = 8; + option_c->length = 10; + option_c->tsval = htonl(client->ts_val); + option_c->tsecr = htonl(0); + tcp_option_length_c += sizeof(struct tcp_option_time_stamp); + + struct tcp_option_time_stamp *option_s = (struct tcp_option_time_stamp *)(tcp_option_buffer_s + tcp_option_length_s); + option_s->kind = 8; + option_s->length = 10; + option_s->tsval = htonl(server->ts_val); + option_s->tsecr = htonl(client->ts_val); + tcp_option_length_s += sizeof(struct tcp_option_time_stamp); + + struct tcp_option_time_stamp *option_c2 = (struct tcp_option_time_stamp *)(tcp_option_buffer_c2 + tcp_option_length_c2); + option_c2->kind = 8; + option_c2->length = 10; + option_c2->tsval = htonl(client->ts_val); + option_c2->tsecr = htonl(server->ts_val); + tcp_option_length_c2 += sizeof(struct tcp_option_time_stamp); + } + + if (client->addr.ss_family == AF_INET6) + { + struct sockaddr_in6 *sk_client = (struct sockaddr_in6 *)&client->addr; + struct sockaddr_in6 *sk_server = (struct sockaddr_in6 *)&server->addr; + uint16_t port_client = sk_client->sin6_port; + uint16_t port_server = sk_server->sin6_port; + + // C -> S + length = tcp_packet_v6_construct( + buffer, // buffer + &raw_socket_c->mac_addr, &raw_socket_s->mac_addr, 0, ETH_P_IPV6, // Ether + &sk_client->sin6_addr, &sk_server->sin6_addr, 55, // IPv6 + port_client, port_server, c_seq, 0, TCP_SYN_FLAG, client->window, // TCP Header + tcp_option_buffer_c, tcp_option_length_c, // TCP Options + NULL, 0); // Payload + raw_socket_send(raw_socket_c, buffer, length); + c_seq += 1; + + // S -> C + length = tcp_packet_v6_construct( + buffer, // buffer + &raw_socket_s->mac_addr, &raw_socket_c->mac_addr, 0, ETH_P_IPV6, // Ether + &sk_server->sin6_addr, &sk_client->sin6_addr, 65, // IPv6 + port_server, port_client, s_seq, c_seq, TCP_SYN_FLAG | TCP_ACK_FLAG, server->window, // TCP Header + tcp_option_buffer_s, tcp_option_length_s, // TCP Options + NULL, 0); // Payload + raw_socket_send(raw_socket_s, buffer, length); + s_seq += 1; + + // C -> S + length = tcp_packet_v6_construct( + buffer, // buffer + &raw_socket_c->mac_addr, &raw_socket_s->mac_addr, 0, ETH_P_IPV6, // Ether + &sk_client->sin6_addr, &sk_server->sin6_addr, 55, // IPv6 + port_client, port_server, c_seq, s_seq, TCP_ACK_FLAG, client->window, // TCP Header + tcp_option_buffer_c2, tcp_option_length_c2, // TCP Options + NULL, 0); // Payload + raw_socket_send(raw_socket_c, buffer, length); + } + else + { + struct sockaddr_in *sk_client = (struct sockaddr_in *)&client->addr; + struct sockaddr_in *sk_server = (struct sockaddr_in *)&server->addr; + uint16_t port_client = sk_client->sin_port; + uint16_t port_server = sk_server->sin_port; + + // C -> S + length = tcp_packet_v4_construct( + buffer, // buffer + &raw_socket_c->mac_addr, &raw_socket_s->mac_addr, 0, ETH_P_IP, // Ether + &sk_client->sin_addr, &sk_server->sin_addr, 0, 55, 0x11, // IPv4 + port_client, port_server, c_seq, 0, TCP_SYN_FLAG, client->window, // TCP Header + tcp_option_buffer_c, tcp_option_length_c, // TCP Options + NULL, 0); + raw_socket_send(raw_socket_c, buffer, length); + c_seq += 1; + + // S -> C + length = tcp_packet_v4_construct( + buffer, // buffer + &raw_socket_s->mac_addr, &raw_socket_c->mac_addr, 0, ETH_P_IP, // Ether + &sk_server->sin_addr,&sk_client->sin_addr, 0, 65, 0x12, // IPv4 + port_server, port_client, s_seq, c_seq, TCP_SYN_FLAG | TCP_ACK_FLAG, server->window, // TCP Header + tcp_option_buffer_s, tcp_option_length_s, // TCP Options + NULL, 0); + raw_socket_send(raw_socket_s, buffer, length); + s_seq += 1; + + // C -> S + length = tcp_packet_v4_construct( + buffer, // buffer + &raw_socket_c->mac_addr, &raw_socket_s->mac_addr, 0, ETH_P_IP, // Ether + &sk_client->sin_addr, &sk_server->sin_addr, 0, 55, 0x13, // IPv4 + port_client, port_server, c_seq, s_seq, TCP_ACK_FLAG, client->window, // TCP Header + tcp_option_buffer_c2, tcp_option_length_c2, // TCP Options + NULL, 0); + raw_socket_send(raw_socket_c, buffer, length); + } + + raw_socket_destory(raw_socket_c); + raw_socket_destory(raw_socket_s); + + return 0; +} + +static int overwrite_tcp_mss(struct tfe_cmsg *cmsg, struct tcp_restore_info *restore) +{ + int ret = 0; + uint16_t size = 0; + int server_side_mss_enable = 0; + int server_side_mss_value = 0; + int client_side_mss_enable = 0; + int client_side_mss_value = 0; + + ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_DOWNSTREAM_TCP_MSS_ENABLE, (unsigned char *)&client_side_mss_enable, sizeof(client_side_mss_enable), &size); + if (ret < 0) + { + TFE_LOG_ERROR(g_default_logger, "failed at fetch client side tcp mss from cmsg: %s", strerror(-ret)); + return -1; + } + ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_DOWNSTREAM_TCP_MSS_VALUE, (unsigned char *)&client_side_mss_value, sizeof(client_side_mss_value), &size); + if (ret < 0) + { + TFE_LOG_ERROR(g_default_logger, "failed at fetch client side tcp mss value from cmsg: %s", strerror(-ret)); + return -1; + } + ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_UPSTREAM_TCP_MSS_ENABLE, (unsigned char *)&server_side_mss_enable, sizeof(server_side_mss_enable), &size); + if (ret < 0) + { + TFE_LOG_ERROR(g_default_logger, "failed at fetch server side tcp mss from cmsg: %s", strerror(-ret)); + return -1; + } + ret = tfe_cmsg_get_value(cmsg, TFE_CMSG_UPSTREAM_TCP_MSS_VALUE, (unsigned char *)&server_side_mss_value, sizeof(server_side_mss_value), &size); + if (ret < 0) + { + TFE_LOG_ERROR(g_default_logger, "failed at fetch server side tcp mss value from cmsg: %s", strerror(-ret)); + return -1; + } + if (client_side_mss_enable) + { + restore->client.mss = client_side_mss_value; + } + if (server_side_mss_enable) + { + restore->server.mss = server_side_mss_value; + } + + return 0; +} + +// return 0 : success +// return -1 : error +static int handle_session_opening(struct metadata *meta, struct ctrl_pkt_parser *parser, int thread_seq, void *ctx) +{ + uint8_t *iptmp = NULL; + int ret = 0; + int fd_downstream = 0; + int fd_upstream = 0; + int fd_fake_c = 0; + int fd_fake_s = 0; + uint64_t rule_id = 0; + uint16_t size = 0; + + uint8_t stream_protocol_in_char = 0; + uint8_t enalbe_decrypted_traffic_steering = 0; + struct session_ctx *s_ctx = NULL; + struct addr_tuple4 tuple4; + struct tcp_restore_info restore_info; + memset(&tuple4, 0, sizeof(tuple4)); + memset(&restore_info, 0, sizeof(restore_info)); + + struct sockaddr_in *in_addr_client = (struct sockaddr_in *)&restore_info.client.addr; + struct sockaddr_in *in_addr_server = (struct sockaddr_in *)&restore_info.server.addr; + + struct acceptor_thread_ctx *thread = (struct acceptor_thread_ctx *)ctx; + struct acceptor_ctx *acceptor_ctx = thread->ref_acceptor_ctx; + + struct raw_pkt_parser raw_parser; + raw_packet_parser_init(&raw_parser, meta->session_id, LAYER_TYPE_ALL, 8); + const void *payload = raw_packet_parser_parse(&raw_parser, (const void *)meta->raw_data, meta->raw_len); + if ((char *)payload - meta->raw_data != meta->l7_offset) + { + uint16_t offset = (char *)payload - meta->raw_data; + TFE_LOG_ERROR(g_default_logger, "%s: incorrect dataoffset in the control zone of session %lu, offset:%u, l7_offset:%u, payload:%p, raw_data:%p", LOG_TAG_PKTIO, meta->session_id, offset, meta->l7_offset, payload, meta->raw_data); + } + + raw_packet_parser_get_most_inner_tuple4(&raw_parser, &tuple4); + + ret = tfe_cmsg_get_value(parser->cmsg, TFE_CMSG_POLICY_ID, (unsigned char *)&rule_id, sizeof(rule_id), &size); + if (ret < 0) + { + TFE_LOG_ERROR(g_default_logger, "failed at fetch rule_id from cmsg: %s", strerror(-ret)); + goto end; + } + + intercept_policy_enforce(thread->ref_proxy->int_ply_enforcer, parser->cmsg); + tcp_policy_enforce(thread->ref_proxy->tcp_ply_enforcer, parser->cmsg); + for (int i = 0; i < parser->sce_policy_id_num; i++) { + chaining_policy_enforce(thread->ref_proxy->chain_ply_enforcer, parser->cmsg, parser->sce_policy_ids[i]); + } + + tcp_restore_set_from_cmsg(parser->cmsg, &restore_info); + tcp_restore_set_from_pkg(&tuple4, &restore_info); + + if (overwrite_tcp_mss(parser->cmsg, &restore_info)) + { + goto end; + } + + iptmp = (uint8_t *)&in_addr_client->sin_addr.s_addr; + // tcp repair C2S + TFE_LOG_DEBUG(g_default_logger, "restore_info: client"); + + TFE_LOG_DEBUG(g_default_logger, "\t addr:%d.%d.%d.%d", iptmp[0], iptmp[1], iptmp[2], iptmp[3]); + TFE_LOG_DEBUG(g_default_logger, "\t port:%u", in_addr_client->sin_port); + TFE_LOG_DEBUG(g_default_logger, "\t seq:%u", restore_info.client.seq); + TFE_LOG_DEBUG(g_default_logger, "\t ack:%u", restore_info.client.ack); + TFE_LOG_DEBUG(g_default_logger, "\t ts_val:%u", restore_info.client.ts_val); + TFE_LOG_DEBUG(g_default_logger, "\t mss:%u", restore_info.client.mss); + TFE_LOG_DEBUG(g_default_logger, "\t window:%u", restore_info.client.window); + TFE_LOG_DEBUG(g_default_logger, "\t wscale:%u", restore_info.client.wscale); + TFE_LOG_DEBUG(g_default_logger, "\t wscale_perm:%s", restore_info.client.wscale_perm > 0?"true":"false"); + TFE_LOG_DEBUG(g_default_logger, "\t timestamp_perm:%s", restore_info.client.timestamp_perm > 0?"true":"false"); + TFE_LOG_DEBUG(g_default_logger, "\t sack_perm:%s", restore_info.client.sack_perm > 0?"true":"false"); + + + iptmp = (uint8_t *)&in_addr_server->sin_addr.s_addr; + // tcp repair C2S + TFE_LOG_DEBUG(g_default_logger, "restore_info: server"); + + TFE_LOG_DEBUG(g_default_logger, "\t addr:%d.%d.%d.%d", iptmp[0], iptmp[1], iptmp[2], iptmp[3]); + TFE_LOG_DEBUG(g_default_logger, "\t port:%u", in_addr_server->sin_port); + TFE_LOG_DEBUG(g_default_logger, "\t seq:%u", restore_info.server.seq); + TFE_LOG_DEBUG(g_default_logger, "\t ack:%u", restore_info.server.ack); + TFE_LOG_DEBUG(g_default_logger, "\t ts_val:%u", restore_info.server.ts_val); + TFE_LOG_DEBUG(g_default_logger, "\t mss:%u", restore_info.server.mss); + TFE_LOG_DEBUG(g_default_logger, "\t window:%u", restore_info.server.window); + TFE_LOG_DEBUG(g_default_logger, "\t wscale:%u", restore_info.server.wscale); + TFE_LOG_DEBUG(g_default_logger, "\t wscale_perm:%s", restore_info.server.wscale_perm > 0?"true":"false"); + TFE_LOG_DEBUG(g_default_logger, "\t timestamp_perm:%s", restore_info.server.timestamp_perm > 0?"true":"false"); + TFE_LOG_DEBUG(g_default_logger, "\t sack_perm:%s", restore_info.server.sack_perm > 0?"true":"false"); + + + fd_upstream = tfe_tcp_restore_fd_create(&(restore_info.client), &(restore_info.server), thread->ref_tap_config->tap_device, 0x65); + if (fd_upstream < 0) + { + TFE_LOG_ERROR(g_default_logger, "Failed at tcp_restore_fd_create(UPSTREAM)"); + goto end; + } + + // tcp repair S2C + fd_downstream = tfe_tcp_restore_fd_create(&(restore_info.server), &(restore_info.client), thread->ref_tap_config->tap_device, 0x65); + if (fd_downstream < 0) + { + TFE_LOG_ERROR(g_default_logger, "Failed at tcp_restore_fd_create(DOWNSTREAM)"); + goto end; + } + + tfe_cmsg_get_value(parser->cmsg, TFE_CMSG_TCP_RESTORE_PROTOCOL, (unsigned char *)&stream_protocol_in_char, sizeof(stream_protocol_in_char), &size); + tfe_cmsg_get_value(parser->cmsg, TFE_CMSG_TCP_DECRYPTED_TRAFFIC_STEERING, (unsigned char *)&enalbe_decrypted_traffic_steering, sizeof(enalbe_decrypted_traffic_steering), &size); + if ((STREAM_PROTO_PLAIN == (enum tfe_stream_proto)stream_protocol_in_char && thread->ref_proxy->traffic_steering_options.enable_steering_http) || + (STREAM_PROTO_SSL == (enum tfe_stream_proto)stream_protocol_in_char && thread->ref_proxy->traffic_steering_options.enable_steering_ssl) || + enalbe_decrypted_traffic_steering == 1) + { + if (fake_tcp_handshake(thread->ref_proxy, &restore_info) == -1) + { + TFE_LOG_ERROR(g_default_logger, "Failed at fake_tcp_handshake()"); + goto end; + } + + fd_fake_c = tfe_tcp_restore_fd_create(&(restore_info.client), &(restore_info.server), thread->ref_proxy->traffic_steering_options.device_client, thread->ref_proxy->traffic_steering_options.so_mask_client); + if (fd_fake_c < 0) + { + TFE_LOG_ERROR(g_default_logger, "Failed at tcp_restore_fd_create(fd_fake_c)"); + goto end; + } + + fd_fake_s = tfe_tcp_restore_fd_create(&(restore_info.server), &(restore_info.client), thread->ref_proxy->traffic_steering_options.device_server, thread->ref_proxy->traffic_steering_options.so_mask_server); + if (fd_fake_s < 0) + { + TFE_LOG_ERROR(g_default_logger, "Failed at tcp_restore_fd_create(fd_fake_s)"); + goto end; + } + } + + if (tfe_proxy_fds_accept(thread->ref_proxy, fd_downstream, fd_upstream, fd_fake_c, fd_fake_s, parser->cmsg) < 0) + { + TFE_LOG_ERROR(g_default_logger, "Failed at tfe_proxy_fds_accept()"); + goto end; + } + + s_ctx = session_ctx_new(); + s_ctx->ref_thread_ctx = thread; + s_ctx->session_id = meta->session_id; + s_ctx->cmsg = parser->cmsg; + s_ctx->first_ctrl_pkt.dir_is_e2i = meta->dir_is_e2i; + raw_packet_parser_get_most_inner_tuple4(&raw_parser, &(s_ctx->first_ctrl_pkt.tuple4)); + s_ctx->first_ctrl_pkt.addr_string = addr_tuple4_to_str(&(s_ctx->first_ctrl_pkt.tuple4)); + s_ctx->first_ctrl_pkt.header_data = (char *)calloc(1, meta->l7_offset); + memcpy(s_ctx->first_ctrl_pkt.header_data, meta->raw_data, meta->l7_offset); + s_ctx->first_ctrl_pkt.header_len = meta->l7_offset; + sids_copy(&s_ctx->first_ctrl_pkt.sids, &meta->sids); + route_ctx_copy(&s_ctx->first_ctrl_pkt.route_ctx, &meta->route_ctx); + + TFE_LOG_INFO(g_default_logger, "%s: session %lu %s active first", LOG_TAG_PKTIO, s_ctx->session_id, s_ctx->first_ctrl_pkt.addr_string); + s_ctx->policy_ids = parser->tfe_policy_ids[0]; + + session_table_insert(thread->session_table, s_ctx->session_id, &(s_ctx->first_ctrl_pkt.tuple4), s_ctx, session_value_free_cb); + + return 0; +end: + return -1; +} + +/* +{ + "tsync": "1.0", + "session_id": "123456789", + "state": "active", + "method": "log_update", + "params": { + "sf_profile_ids": [ + 2, + 3, + 4, + 5, + 6, + 7 + ] + } +} +*/ +static void send_event_log(struct session_ctx *s_ctx, int thread_seq, void *ctx) +{ + struct acceptor_thread_ctx *thread = (struct acceptor_thread_ctx *)ctx; + struct acceptor_ctx *acceptor_ctx = thread->ref_acceptor_ctx; + struct packet_io *packet_io = thread->ref_io; + + char buffer[32] = {0}; + sprintf(buffer, "%lu", s_ctx->session_id); + + cJSON *root = cJSON_CreateObject(); + cJSON_AddStringToObject(root, "tsync", "1.0"); + cJSON_AddStringToObject(root, "session_id", buffer); + cJSON_AddStringToObject(root, "state", "closing"); + cJSON_AddStringToObject(root, "method", "log_update"); + cJSON *sf_profile_ids = cJSON_CreateArray(); + + cJSON *params = cJSON_CreateObject(); + cJSON_AddItemToObject(params, "sf_profile_ids", sf_profile_ids); + cJSON_AddItemToObject(root, "params", params); + char *json_str = cJSON_PrintUnformatted(root); + + TFE_LOG_INFO(g_default_logger, "%s: session %lu %s event log: %s", LOG_TAG_METRICS, s_ctx->session_id, s_ctx->first_ctrl_pkt.addr_string, json_str); + + marsio_buff_t *tx_buffs[1]; + marsio_buff_malloc_device(packet_io->dev_nf_interface.mr_dev, tx_buffs, 1, 0, thread_seq); + char *dst = marsio_buff_append(tx_buffs[0], s_ctx->first_ctrl_pkt.header_len + strlen(json_str)); + memcpy(dst, s_ctx->first_ctrl_pkt.header_data, s_ctx->first_ctrl_pkt.header_len); + memcpy(dst + s_ctx->first_ctrl_pkt.header_len, json_str, strlen(json_str)); + + struct metadata meta = {0}; + meta.session_id = s_ctx->session_id; + meta.is_ctrl_pkt = 1; + meta.l7_offset = s_ctx->first_ctrl_pkt.header_len; + meta.sids.num = 1; + meta.sids.elems[0] = acceptor_ctx->firewall_sids; + route_ctx_copy(&meta.route_ctx, &s_ctx->first_ctrl_pkt.route_ctx); + packet_io_set_metadata(tx_buffs[0], &meta); + marsio_send_burst(packet_io->dev_nf_interface.mr_path, thread_seq, tx_buffs, 1); + + free(json_str); + cJSON_Delete(root); +} + +// return 0 : success +// return -1 : error +static int handle_session_closing(struct metadata *meta, struct ctrl_pkt_parser *parser, int thread_seq, void *ctx) +{ + struct acceptor_thread_ctx *thread = (struct acceptor_thread_ctx *)ctx; + + struct session_node *node = session_table_search_by_id(thread->session_table, meta->session_id); + if (node) + { + struct session_ctx *s_ctx = (struct session_ctx *)node->val_data; + TFE_LOG_INFO(g_default_logger, "%s: session %lu %s closing", LOG_TAG_PKTIO, s_ctx->session_id, s_ctx->first_ctrl_pkt.addr_string); + + send_event_log(s_ctx, thread_seq, ctx); + + session_table_delete_by_id(thread->session_table, meta->session_id); + return 0; + } + + return -1; +} + +// return 0 : success +// return -1 : error +static int handle_session_active(struct metadata *meta, struct ctrl_pkt_parser *parser, int thread_seq, void *ctx) +{ + struct acceptor_thread_ctx *thread = (struct acceptor_thread_ctx *)ctx; + + struct session_node *node = session_table_search_by_id(thread->session_table, meta->session_id); + if (node) + { + struct raw_pkt_parser raw_parser; + raw_packet_parser_init(&raw_parser, meta->session_id, LAYER_TYPE_ALL, 8); + const void *payload = raw_packet_parser_parse(&raw_parser, (const void *)meta->raw_data, meta->raw_len); + if ((char *)payload - (char *)&meta->raw_data != meta->l7_offset) + { + TFE_LOG_ERROR(g_default_logger, "%s: incorrect dataoffset in the control zone of session %lu", LOG_TAG_PKTIO, meta->session_id); + } + + struct session_ctx *s_ctx = (struct session_ctx *)node->val_data; + TFE_LOG_INFO(g_default_logger, "%s: session %lu %s active again", LOG_TAG_PKTIO, s_ctx->session_id, s_ctx->first_ctrl_pkt.addr_string); + s_ctx->policy_ids = parser->tfe_policy_ids[0]; + } + else + { + return handle_session_opening(meta, parser, thread_seq, ctx); + } + + return 0; +} + +// return 0 : success +// return -1 : error +static int handle_session_resetall(struct metadata *meta, struct ctrl_pkt_parser *parser, int thread_seq, void *ctx) +{ + struct acceptor_thread_ctx *thread = (struct acceptor_thread_ctx *)ctx; + struct acceptor_ctx *acceptor_ctx = thread->ref_acceptor_ctx; + + TFE_LOG_ERROR(g_default_logger, "%s: session %lu resetall: notification clears all session tables !!!", LOG_TAG_PKTIO, meta->session_id); + + for (int i = 0; i < acceptor_ctx->nr_worker_threads; i++) + { + struct acceptor_thread_ctx *thread_ctx = &acceptor_ctx->work_threads[i]; + __atomic_fetch_add(&thread_ctx->session_table_need_reset, 1, __ATOMIC_RELAXED); + } + + return 0; +} + +static void session_value_free_cb(void *ctx) +{ + struct session_ctx *s_ctx = (struct session_ctx *)ctx; + session_ctx_free(s_ctx); +} + +// return 0 : not keepalive packet +// return 1 : is keepalive packet +static int is_downstream_keepalive_packet(marsio_buff_t *rx_buff) +{ + int raw_len = marsio_buff_datalen(rx_buff); + char *raw_data = marsio_buff_mtod(rx_buff); + if (raw_data == NULL || raw_len < (int)(sizeof(struct ethhdr))) + { + return 0; + } + + struct ethhdr *eth_hdr = (struct ethhdr *)raw_data; + if (eth_hdr->h_proto == 0xAAAA) + { + return 1; + } + else + { + return 0; + } +} diff --git a/common/src/tfe_raw_packet.cpp b/common/src/tfe_raw_packet.cpp new file mode 100644 index 0000000..a407669 --- /dev/null +++ b/common/src/tfe_raw_packet.cpp @@ -0,0 +1,994 @@ +#include +#include +#include +#include + +#include +#include +#define __FAVOR_BSD 1 +#include +#include +#include + +#include "tfe_utils.h" +#include "uthash.h" +#include "tfe_addr_tuple4.h" +#include "tfe_raw_packet.h" + +/****************************************************************************** + * Struct + ******************************************************************************/ + +enum parse_status +{ + PARSE_STATUS_CONTINUE, + PARSE_STATUS_STOP +}; + +struct vlan_hdr +{ + uint16_t vlan_cfi; + uint16_t protocol; +} __attribute__((__packed__)); + +struct vxlan_hdr +{ + uint8_t flags[2]; + uint16_t gdp; // group policy id + uint8_t vni[3]; + uint8_t reserved; +} __attribute__((__packed__)); + +struct gtp_hdr +{ +#if __BYTE_ORDER == __LITTLE_ENDIAN + unsigned char flags; + unsigned char msg_type; + unsigned short len; + unsigned int teid; +#elif __BYTE_ORDER == __BIG_ENDIAN + unsigned int teid; + unsigned short len; + unsigned char msg_type; + unsigned char flags; +#else +#error "Please check " +#endif +} __attribute__((__packed__)); + +#define GTP_HDR_VER_MASK (0xE0) +#define GTP_HDR_FLAG_N_PDU (0x01) +#define GTP_HDR_FLAG_SEQ_NUM (0x02) +#define GTP_HDR_FLAG_NEXT_EXT_HDR (0x04) + +/****************************************************************************** + * Static API + ******************************************************************************/ + +static int raw_packet_parser_push(struct raw_pkt_parser *handler, enum layer_type type, uint16_t offset); +static enum parse_status raw_packet_parser_status(struct raw_pkt_parser *handler, const void *data, enum layer_type this_type); + +static const char *ldbc_method_to_string(enum ldbc_method ldbc_method); + +// parser utils +static void set_addr_tuple4(const void *data, enum layer_type layer_type, struct addr_tuple4 *addr); +static const char *layer_type2str(enum layer_type this_type); +static uint16_t parse_gtphdr_len(const struct gtp_hdr *gtph); + +// parser protocol +static const void *parse_ether(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type); +static const void *parse_ipv4(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type); +static const void *parse_ipv6(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type); +static const void *parse_tcp(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type); +static const void *parse_udp(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type); +static const void *parse_pppoe_ses(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type); +static const void *parse_vxlan(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type); +static const void *parse_vlan8021q(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type); +static const void *parse_gtpv1_u(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type); +static const void *parse_mpls(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type); + +/****************************************************************************** + * Public API + ******************************************************************************/ + +void raw_packet_parser_init(struct raw_pkt_parser *handler, uint64_t pkt_trace_id, enum layer_type expect_type, uint16_t expect_results_num) +{ + memset(handler, 0, sizeof(struct raw_pkt_parser)); + + handler->expect_type = expect_type; + handler->results.layers_used = 0; + handler->results.layers_size = MIN(expect_results_num, (sizeof(handler->results.layers) / sizeof(handler->results.layers[0]))); + handler->ptr_pkt_start = NULL; + handler->pkt_trace_id = pkt_trace_id; +} + +// return most inner payload +const void *raw_packet_parser_parse(struct raw_pkt_parser *handler, const void *data, size_t length) +{ + handler->ptr_pkt_start = data; + + // TESTED + return parse_ether(handler, data, length, LAYER_TYPE_ETHER); +} + +// return 0 : success +// return -1 : error +int raw_packet_parser_get_most_inner_tuple4(struct raw_pkt_parser *handler, struct addr_tuple4 *addr) +{ + const char *l3_layer_data = NULL; + const char *l4_layer_data = NULL; + const struct layer_result *l3_layer_result = NULL; + const struct layer_result *l4_layer_result = NULL; + struct layer_results *results = &handler->results; + + // search L4 layer and L3 layer in reverse order + for (int8_t i = results->layers_used - 1; i >= 0; i--) + { + const struct layer_result *layer = &results->layers[i]; + enum layer_type type = layer->type; + + TFE_LOG_DEBUG(g_default_logger, "%s: find most inner tuple4, pkt_trace_id: %lu, layer[%d/%d]: %s", LOG_TAG_RAWPKT, handler->pkt_trace_id, i, results->layers_size, layer_type2str(type)); + + // first get L4 layer + if (type & LAYER_TYPE_L4) + { + l4_layer_result = layer; + continue; + } + + // second get L3 layer + if (type & LAYER_TYPE_L3) + { + l3_layer_result = layer; + break; + } + } + + if (l3_layer_result) + { + l3_layer_data = (const char *)handler->ptr_pkt_start + l3_layer_result->offset; + set_addr_tuple4(l3_layer_data, l3_layer_result->type, addr); + } + + if (l4_layer_result) + { + l4_layer_data = (const char *)handler->ptr_pkt_start + l4_layer_result->offset; + set_addr_tuple4(l4_layer_data, l4_layer_result->type, addr); + } + + if (l3_layer_result && l4_layer_result) + { + return 0; + } + else + { + return -1; + } +} + +// return 0 : success +// return -1 : error +int raw_packet_parser_get_most_outer_tuple4(struct raw_pkt_parser *handler, struct addr_tuple4 *addr) +{ + const char *l3_layer_data = NULL; + const char *l4_layer_data = NULL; + const struct layer_result *l3_layer_result = NULL; + const struct layer_result *l4_layer_result = NULL; + struct layer_results *results = &handler->results; + + // search L3 layer and L4 layer in order + for (int8_t i = 0; i <= results->layers_used - 1; i++) + { + const struct layer_result *layer = &results->layers[i]; + enum layer_type type = layer->type; + + TFE_LOG_DEBUG(g_default_logger, "%s: find most outer tuple4, pkt_trace_id: %lu, layer[%d/%d]: %s", LOG_TAG_RAWPKT, handler->pkt_trace_id, i, results->layers_size, layer_type2str(type)); + + // first get L3 layer + if (type & LAYER_TYPE_L3) + { + l3_layer_result = layer; + continue; + } + + // second get L4 layer + if (type & LAYER_TYPE_L4) + { + l4_layer_result = layer; + break; + } + } + + if (l3_layer_result) + { + l3_layer_data = (const char *)handler->ptr_pkt_start + l3_layer_result->offset; + set_addr_tuple4(l3_layer_data, l3_layer_result->type, addr); + } + + if (l4_layer_result) + { + l4_layer_data = (const char *)handler->ptr_pkt_start + l4_layer_result->offset; + set_addr_tuple4(l4_layer_data, l4_layer_result->type, addr); + } + + if (l3_layer_result && l4_layer_result) + { + return 0; + } + else + { + return -1; + } +} + +// return 0 : success +// return -1 : error +int raw_packet_parser_get_most_inner_address(struct raw_pkt_parser *handler, struct addr_tuple4 *addr) +{ + const char *l3_layer_data = NULL; + struct layer_results *results = &handler->results; + + // search L3 layer in reverse order + for (int8_t i = results->layers_used - 1; i >= 0; i--) + { + const struct layer_result *layer = &results->layers[i]; + enum layer_type type = layer->type; + + TFE_LOG_DEBUG(g_default_logger, "%s: find most inner address, pkt_trace_id: %lu, layer[%d/%d]: %s", LOG_TAG_RAWPKT, handler->pkt_trace_id, i, results->layers_size, layer_type2str(type)); + if (type & LAYER_TYPE_L3) + { + l3_layer_data = (const char *)handler->ptr_pkt_start + layer->offset; + set_addr_tuple4(l3_layer_data, type, addr); + return 0; + } + } + + return -1; +} + +// return 0 : success +// return -1 : error +int raw_packet_parser_get_most_outer_address(struct raw_pkt_parser *handler, struct addr_tuple4 *addr) +{ + const char *l3_layer_data = NULL; + struct layer_results *results = &handler->results; + + // search L3 layer in order + for (int8_t i = 0; i <= results->layers_used - 1; i++) + { + const struct layer_result *layer = &results->layers[i]; + enum layer_type type = layer->type; + + TFE_LOG_DEBUG(g_default_logger, "%s: find most outer address, pkt_trace_id: %lu, layer[%d/%d]: %s", LOG_TAG_RAWPKT, handler->pkt_trace_id, i, results->layers_size, layer_type2str(type)); + if (type & LAYER_TYPE_L3) + { + l3_layer_data = (const char *)handler->ptr_pkt_start + layer->offset; + set_addr_tuple4(l3_layer_data, type, addr); + return 0; + } + } + + return -1; +} + +uint64_t raw_packet_parser_get_hash_value(struct raw_pkt_parser *handler, enum ldbc_method method, int dir_is_internal) +{ + uint64_t temp = 0; + uint64_t hash_value = 1; + + int inner_addr_len = 0; + int outer_addr_len = 0; + const char *inner_src_addr = NULL; + const char *inner_dst_addr = NULL; + const char *outer_src_addr = NULL; + const char *outer_dst_addr = NULL; + + struct addr_tuple4 inner_addr; + struct addr_tuple4 outer_addr; + memset(&inner_addr, 0, sizeof(inner_addr)); + memset(&outer_addr, 0, sizeof(outer_addr)); + + if (handler == NULL) + { + return hash_value; + } + + if (raw_packet_parser_get_most_inner_address(handler, &inner_addr) == -1) + { + return hash_value; + } + + if (raw_packet_parser_get_most_outer_address(handler, &outer_addr) == -1) + { + return hash_value; + } + + if (inner_addr.addr_type == ADDR_TUPLE4_TYPE_V4) + { + inner_src_addr = (const char *)&(inner_addr.addr_v4.src_addr); + inner_dst_addr = (const char *)&(inner_addr.addr_v4.dst_addr); + inner_addr_len = sizeof(inner_addr.addr_v4.dst_addr); + } + else + { + inner_src_addr = (const char *)&(inner_addr.addr_v6.src_addr); + inner_dst_addr = (const char *)&(inner_addr.addr_v6.dst_addr); + inner_addr_len = sizeof(inner_addr.addr_v6.dst_addr); + } + + if (outer_addr.addr_type == ADDR_TUPLE4_TYPE_V4) + { + outer_src_addr = (const char *)&(outer_addr.addr_v4.src_addr); + outer_dst_addr = (const char *)&(outer_addr.addr_v4.dst_addr); + outer_addr_len = sizeof(outer_addr.addr_v4.dst_addr); + } + else + { + outer_src_addr = (const char *)&(outer_addr.addr_v6.src_addr); + outer_dst_addr = (const char *)&(outer_addr.addr_v6.dst_addr); + outer_addr_len = sizeof(outer_addr.addr_v6.dst_addr); + } + + switch (method) + { + case LDBC_METHOD_HASH_INT_IP: + if (dir_is_internal) + { + // outer src ip + HASH_VALUE(outer_src_addr, outer_addr_len, hash_value); + } + else + { + // outer dst ip + HASH_VALUE(outer_dst_addr, outer_addr_len, hash_value); + } + break; + case LDBC_METHOD_HASH_EXT_IP: + if (dir_is_internal) + { + // outer dst ip + HASH_VALUE(outer_dst_addr, outer_addr_len, hash_value); + } + else + { + // outer src ip + HASH_VALUE(outer_src_addr, outer_addr_len, hash_value); + } + break; + case LDBC_METHOD_HASH_INT_IP_AND_EXT_IP: + // outer dst ip ^ outer src ip + HASH_VALUE(outer_src_addr, outer_addr_len, hash_value); + HASH_VALUE(outer_dst_addr, outer_addr_len, temp); + hash_value = hash_value ^ temp; + break; + case LDBC_METHOD_HASH_INNERMOST_INT_IP: + if (dir_is_internal) + { + // innner src ip + HASH_VALUE(inner_src_addr, inner_addr_len, hash_value); + } + else + { + // innner dst ip + HASH_VALUE(inner_dst_addr, inner_addr_len, hash_value); + } + break; + case LDBC_METHOD_HASH_INNERMOST_EXT_IP: + if (dir_is_internal) + { + // innner dst ip + HASH_VALUE(inner_dst_addr, inner_addr_len, hash_value); + } + else + { + // innner src ip + HASH_VALUE(inner_src_addr, inner_addr_len, hash_value); + } + break; + default: + return hash_value; + } + + char *inner_addr_str = addr_tuple4_to_str(&inner_addr); + char *outer_addr_str = addr_tuple4_to_str(&outer_addr); + TFE_LOG_DEBUG(g_default_logger, "%s: pkt_trace_id: %lu, outer_addr: %s, inner_addr: %s, is_internal: %d, hash_method: %s, hash_value: %lu", + LOG_TAG_RAWPKT, handler->pkt_trace_id, outer_addr_str, inner_addr_str, dir_is_internal, ldbc_method_to_string(method), hash_value); + free(inner_addr_str); + free(outer_addr_str); + + return hash_value; +} + +/****************************************************************************** + * Private API + ******************************************************************************/ + +// return 0 : success +// return -ENOMEM : error +static int raw_packet_parser_push(struct raw_pkt_parser *handler, enum layer_type type, uint16_t offset) +{ + struct layer_results *result = &handler->results; + + if (result->layers_used >= result->layers_size) + { + return -ENOMEM; + } + + result->layers[result->layers_used].offset = offset; + result->layers[result->layers_used].type = type; + result->layers_used++; + + return 0; +} + +// return PARSE_STATUS_CONTINUE +// return PARSE_STATUS_STOP +static enum parse_status raw_packet_parser_status(struct raw_pkt_parser *handler, const void *data, enum layer_type this_type) +{ + /* + * only when this_type & handler->expect_type is true, + * the information of the current layer will be recorded in results. + */ + if (!(this_type & handler->expect_type)) + { + return PARSE_STATUS_CONTINUE; + } + + uint16_t offset = (uintptr_t)data - (uintptr_t)(handler->ptr_pkt_start); + if (raw_packet_parser_push(handler, this_type, offset) < 0) + { + return PARSE_STATUS_STOP; + } + else + { + return PARSE_STATUS_CONTINUE; + } +} + +static const char *ldbc_method_to_string(enum ldbc_method ldbc_method) +{ + switch (ldbc_method) + { + case LDBC_METHOD_HASH_INT_IP: + return "outter_internal_ip"; + case LDBC_METHOD_HASH_EXT_IP: + return "outter_external_ip"; + case LDBC_METHOD_HASH_INT_IP_AND_EXT_IP: + return "outter_internal_ip_and_external_ip"; + case LDBC_METHOD_HASH_INNERMOST_INT_IP: + return "inner_internal_ip"; + case LDBC_METHOD_HASH_INNERMOST_EXT_IP: + return "inner_external_ip"; + default: + return "unknown"; + } +} + +static void set_addr_tuple4(const void *data, enum layer_type layer_type, struct addr_tuple4 *addr) +{ + const struct tcphdr *tcp_hdr = NULL; + const struct udp_hdr *udp_hdr = NULL; + const struct ip *ipv4_hdr = NULL; + const struct ip6_hdr *ipv6_hdr = NULL; + + switch (layer_type) + { + case LAYER_TYPE_TCP: + tcp_hdr = (const struct tcphdr *)data; + addr->src_port = tcp_hdr->th_sport; + addr->dst_port = tcp_hdr->th_dport; + break; + case LAYER_TYPE_UDP: + udp_hdr = (const struct udp_hdr *)data; + addr->src_port = udp_hdr->uh_sport; + addr->dst_port = udp_hdr->uh_dport; + break; + case LAYER_TYPE_IPV4: + ipv4_hdr = (const struct ip *)data; + addr->addr_type = ADDR_TUPLE4_TYPE_V4; + addr->addr_v4.src_addr = ipv4_hdr->ip_src; + addr->addr_v4.dst_addr = ipv4_hdr->ip_dst; + break; + case LAYER_TYPE_IPV6: + ipv6_hdr = (const struct ip6_hdr *)data; + addr->addr_type = ADDR_TUPLE4_TYPE_V6; + memcpy(&addr->addr_v6.src_addr, &ipv6_hdr->ip6_src, sizeof(addr->addr_v6.src_addr)); + memcpy(&addr->addr_v6.dst_addr, &ipv6_hdr->ip6_dst, sizeof(addr->addr_v6.dst_addr)); + break; + default: + break; + } +} + +static const char *layer_type2str(enum layer_type this_type) +{ + switch (this_type) + { + case LAYER_TYPE_ETHER: + return "ETHER"; + case LAYER_TYPE_PPP: + return "PPP"; + case LAYER_TYPE_HDLC: + return "HDLC"; + case LAYER_TYPE_VLAN: + return "VLAN"; + case LAYER_TYPE_PPPOE: + return "PPPOE"; + case LAYER_TYPE_MPLS: + return "MPLS"; + case LAYER_TYPE_IPV4: + return "IPV4"; + case LAYER_TYPE_IPV6: + return "IPV6"; + case LAYER_TYPE_UDP: + return "UDP"; + case LAYER_TYPE_TCP: + return "TCP"; + case LAYER_TYPE_G_VXLAN: + return "G_VXLAN"; + case LAYER_TYPE_GTPV1_U: + return "GTPV1_U"; + default: + return "UNKNOWN"; + } +} + +// FROM SAPP +static uint16_t parse_gtphdr_len(const struct gtp_hdr *gtph) +{ + const unsigned char *p_ext_hdr = (unsigned char *)gtph + sizeof(struct gtp_hdr); + unsigned char next_hdr_type; + unsigned char this_ext_field_cont_len; + + // v0 太古老已废弃,目前仅支持 GTPv1 版本 + if (((gtph->flags & GTP_HDR_VER_MASK) >> 5) != 1) + { + return -1; + } + + if (gtph->flags & (GTP_HDR_FLAG_SEQ_NUM | GTP_HDR_FLAG_N_PDU | GTP_HDR_FLAG_NEXT_EXT_HDR)) + { + // skip seq field (2 bytes) + p_ext_hdr += 2; + + // skip N-PDU field (1 byte) + p_ext_hdr++; + + // 解析 GTP 扩展头部字段,参考 wireshark 源码 packet-gtp.c->dissect_gtp_common() + next_hdr_type = *p_ext_hdr; + if (gtph->flags & GTP_HDR_FLAG_NEXT_EXT_HDR) + { + while (next_hdr_type != 0) + { + // 指向长度字段, 以4个字节为单位 + p_ext_hdr++; + this_ext_field_cont_len = *p_ext_hdr * 4 - 2; + + // 指向数据部分第一个字节 + p_ext_hdr++; + p_ext_hdr += this_ext_field_cont_len; + + // 指向下一个头部字段 + next_hdr_type = *p_ext_hdr; + p_ext_hdr++; + } + } + else + { + p_ext_hdr++; + } + } + + return (char *)p_ext_hdr - (char *)gtph; +} + +static const void *parse_ether(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type) +{ + if (length < sizeof(struct ethhdr)) + { + TFE_LOG_ERROR(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, err: data not enough", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type)); + return data; + } + + if (raw_packet_parser_status(handler, data, this_type) == PARSE_STATUS_STOP) + { + return data; + } + + struct ethhdr *hdr = (struct ethhdr *)data; + uint16_t next_proto = ntohs(hdr->h_proto); + uint16_t hdr_len = sizeof(struct ethhdr); + const void *data_next_layer = (const char *)data + hdr_len; + size_t data_next_length = length - hdr_len; + + TFE_LOG_DEBUG(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, payload_len: [%lu/%lu]", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), data_next_length, length); + switch (next_proto) + { + case ETH_P_8021Q: + // TESTED + return parse_vlan8021q(handler, data_next_layer, data_next_length, LAYER_TYPE_VLAN); + case ETH_P_8021AD: + // TODO + return parse_ether(handler, data_next_layer, data_next_length, LAYER_TYPE_ETHER); + case ETH_P_IP: + // TESTED + return parse_ipv4(handler, data_next_layer, data_next_length, LAYER_TYPE_IPV4); + case ETH_P_IPV6: + // TESTED + return parse_ipv6(handler, data_next_layer, data_next_length, LAYER_TYPE_IPV6); + case ETH_P_PPP_SES: + // TODO + return parse_pppoe_ses(handler, data_next_layer, data_next_length, LAYER_TYPE_PPPOE); + case ETH_P_MPLS_UC: + // TESTED + return parse_mpls(handler, data_next_layer, data_next_length, LAYER_TYPE_MPLS); + default: + TFE_LOG_ERROR(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, stop parse next protocol %d", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), next_proto); + return data_next_layer; + } +} + +static const void *parse_ipv4(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type) +{ + if (length < sizeof(struct ip)) + { + TFE_LOG_ERROR(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, err: data not enough", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type)); + return data; + } + + if (raw_packet_parser_status(handler, data, this_type) == PARSE_STATUS_STOP) + { + return data; + } + + struct ip *hdr = (struct ip *)data; + uint16_t next_proto = hdr->ip_p; + uint16_t hdr_len = (hdr->ip_hl & 0xf) * 4u; + const void *data_next_layer = (const char *)data + hdr_len; + size_t data_next_length = length - hdr_len; + + TFE_LOG_DEBUG(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, payload_len: [%lu/%lu]", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), data_next_length, length); + switch (next_proto) + { + case IPPROTO_TCP: + // TESTED + return parse_tcp(handler, data_next_layer, data_next_length, LAYER_TYPE_TCP); + case IPPROTO_UDP: + // TESTED + return parse_udp(handler, data_next_layer, data_next_length, LAYER_TYPE_UDP); + case IPPROTO_IPIP: + // TESTED + return parse_ipv4(handler, data_next_layer, data_next_length, LAYER_TYPE_IPV4); + case IPPROTO_IPV6: + // TESTED + return parse_ipv6(handler, data_next_layer, data_next_length, LAYER_TYPE_IPV6); + default: + TFE_LOG_ERROR(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, stop parse next protocol %d", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), next_proto); + return data_next_layer; + } +} + +static const void *parse_ipv6(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type) +{ + if (length < sizeof(struct ip6_hdr)) + { + TFE_LOG_ERROR(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, err: data not enough", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type)); + return data; + } + + if (raw_packet_parser_status(handler, data, this_type) == PARSE_STATUS_STOP) + { + return data; + } + + struct ip6_hdr *hdr = (struct ip6_hdr *)data; + uint16_t next_proto = hdr->ip6_nxt; + uint16_t hdr_len = sizeof(struct ip6_hdr); + const void *data_next_layer = (const char *)data + hdr_len; + size_t data_next_length = length - hdr_len; + + TFE_LOG_DEBUG(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, payload_len: [%lu/%lu]", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), data_next_length, length); + switch (next_proto) + { + case IPPROTO_TCP: + // TESTED + return parse_tcp(handler, data_next_layer, data_next_length, LAYER_TYPE_TCP); + case IPPROTO_UDP: + // TESTED + return parse_udp(handler, data_next_layer, data_next_length, LAYER_TYPE_UDP); + case IPPROTO_IPIP: + // TESTED + return parse_ipv4(handler, data_next_layer, data_next_length, LAYER_TYPE_IPV4); + case IPPROTO_IPV6: + // TESTED + return parse_ipv6(handler, data_next_layer, data_next_length, LAYER_TYPE_IPV6); + default: + TFE_LOG_ERROR(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, stop parse next protocol %d", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), next_proto); + return data_next_layer; + } +} + +static const void *parse_tcp(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type) +{ + if (length < sizeof(struct tcphdr)) + { + TFE_LOG_ERROR(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, err: data not enough", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type)); + return data; + } + + if (raw_packet_parser_status(handler, data, this_type) == PARSE_STATUS_STOP) + { + return data; + } + + struct tcphdr *hdr = (struct tcphdr *)data; + uint16_t hdr_len = hdr->th_off << 2; + const void *data_next_layer = (const char *)data + hdr_len; + size_t data_next_length = length - hdr_len; + + TFE_LOG_DEBUG(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, payload_len: [%lu/%lu]", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), data_next_length, length); + + return data_next_layer; +} + +static const void *parse_udp(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type) +{ + if (length < sizeof(struct udp_hdr)) + { + TFE_LOG_ERROR(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, err: data not enough", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type)); + return data; + } + + if (raw_packet_parser_status(handler, data, this_type) == PARSE_STATUS_STOP) + { + return data; + } + + struct udp_hdr *hdr = (struct udp_hdr *)data; + uint16_t hdr_len = sizeof(struct udp_hdr); + const void *data_next_layer = (const char *)data + hdr_len; + size_t data_next_length = length - hdr_len; + + TFE_LOG_DEBUG(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, payload_len: [%lu/%lu]", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), data_next_length, length); + switch (ntohs(hdr->uh_dport)) + { + // VXLAN_DPORT + case 4789: + // TESTED + return parse_vxlan(handler, data_next_layer, data_next_length, LAYER_TYPE_G_VXLAN); + // GTP1U_PORT + case 2152: + // TESTED + return parse_gtpv1_u(handler, data_next_layer, data_next_length, LAYER_TYPE_GTPV1_U); + default: + return data_next_layer; + } +} + +static const void *parse_pppoe_ses(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type) +{ + if (length < 8) + { + TFE_LOG_ERROR(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, err: data not enough", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type)); + return data; + } + + if (raw_packet_parser_status(handler, data, this_type) == PARSE_STATUS_STOP) + { + return data; + } + + uint16_t next_proto = *((uint16_t *)data + 3); + uint16_t hdr_len = 8; + const void *data_next_layer = (const char *)data + hdr_len; + size_t data_next_length = length - hdr_len; + + TFE_LOG_DEBUG(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, payload_len: [%lu/%lu]", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), data_next_length, length); + switch (next_proto) + { + // PPPOE_TYPE_IPV4 + case 0x2100: + // TESTED + return parse_ipv4(handler, data_next_layer, data_next_length, LAYER_TYPE_IPV4); + // PPPOE_TYPE_IPV6 + case 0x5700: + // TODO + return parse_ipv6(handler, data_next_layer, data_next_length, LAYER_TYPE_IPV6); + default: + TFE_LOG_ERROR(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, stop parse next protocol %d", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), next_proto); + return data_next_layer; + } +} + +static const void *parse_vxlan(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type) +{ + if (length < sizeof(struct vxlan_hdr)) + { + TFE_LOG_ERROR(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, err: data not enough", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type)); + return NULL; + } + + if (raw_packet_parser_status(handler, data, this_type) == PARSE_STATUS_STOP) + { + return data; + } + + struct vxlan_hdr *vxlan_hdr = (struct vxlan_hdr *)data; + uint16_t hdr_len = sizeof(struct vxlan_hdr); + const void *data_next_layer = (const char *)data + hdr_len; + size_t data_next_length = length - hdr_len; + + TFE_LOG_DEBUG(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, payload_len: [%lu/%lu]", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), data_next_length, length); + // TESTED + return parse_ether(handler, data_next_layer, data_next_length, LAYER_TYPE_ETHER); +} + +static const void *parse_vlan8021q(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type) +{ + if (length < sizeof(struct vlan_hdr)) + { + TFE_LOG_ERROR(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, err: data not enough", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type)); + return NULL; + } + + if (raw_packet_parser_status(handler, data, this_type) == PARSE_STATUS_STOP) + { + return data; + } + + struct vlan_hdr *hdr = (struct vlan_hdr *)data; + uint16_t next_proto = ntohs(hdr->protocol); + uint16_t hdr_len = sizeof(struct vlan_hdr); + const void *data_next_layer = (const char *)data + hdr_len; + size_t data_next_length = length - hdr_len; + + TFE_LOG_DEBUG(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, payload_len: [%lu/%lu]", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), data_next_length, length); + switch (next_proto) + { + case ETH_P_8021Q: + // TESTED + return parse_vlan8021q(handler, data_next_layer, data_next_length, LAYER_TYPE_VLAN); + case ETH_P_IP: + // TESTED + return parse_ipv4(handler, data_next_layer, data_next_length, LAYER_TYPE_IPV4); + case ETH_P_IPV6: + // TODO + return parse_ipv6(handler, data_next_layer, data_next_length, LAYER_TYPE_IPV6); + case ETH_P_PPP_SES: + // TESTED + return parse_pppoe_ses(handler, data_next_layer, data_next_length, LAYER_TYPE_PPPOE); + case ETH_P_MPLS_UC: + // TODO + return parse_mpls(handler, data_next_layer, data_next_length, LAYER_TYPE_MPLS); + default: + TFE_LOG_ERROR(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, stop parse next protocol %d", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), next_proto); + return data_next_layer; + } +} + +static const void *parse_gtpv1_u(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type) +{ + if (length < sizeof(struct gtp_hdr)) + { + TFE_LOG_ERROR(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, err: data not enough", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type)); + return NULL; + } + + uint16_t hdr_len = parse_gtphdr_len((const struct gtp_hdr *)data); + if (hdr_len < 0) + { + return data; + } + + if (raw_packet_parser_status(handler, data, this_type) == PARSE_STATUS_STOP) + { + return data; + } + + uint8_t next_proto = (((const uint8_t *)((const char *)data + hdr_len))[0]) >> 4; + const void *data_next_layer = (const char *)data + hdr_len; + size_t data_next_length = length - hdr_len; + + TFE_LOG_DEBUG(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, payload_len: [%lu/%lu]", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), data_next_length, length); + switch (next_proto) + { + case 4: + // TESTED + return parse_ipv4(handler, data_next_layer, data_next_length, LAYER_TYPE_IPV4); + case 6: + // TESTED + return parse_ipv6(handler, data_next_layer, data_next_length, LAYER_TYPE_IPV6); + default: + TFE_LOG_ERROR(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, stop parse next protocol %d", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), next_proto); + return data_next_layer; + } +} + +static const void *parse_mpls(struct raw_pkt_parser *handler, const void *data, size_t length, enum layer_type this_type) +{ + if (length < 4) + { + TFE_LOG_ERROR(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, err: data not enough", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type)); + return data; + } + + if (raw_packet_parser_status(handler, data, this_type) == PARSE_STATUS_STOP) + { + return data; + } + +#define MPLS_LABEL_MASK (0xFFFFF000) +#define MPLS_EXP_MASK (0x00000E00) +#define MPLS_BLS_MASK (0x00000100) +#define MPLS_TTL_MASK (0x000000FF) + + /* + * MPLS Format + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Label | Exp |S| TTL | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * Label : Label Value 20 bits + * Exp : Experimental Use 3 bits + * S : Bottom of Stack 1 bit + * TTL : Time to Live 8 bits + */ + + uint32_t *hdr = (uint32_t *)data; + // unsigned int mpls_label = (ntohl(*hdr) & MPLS_LABEL_MASK) >> 12; + // unsigned int mpls_exp = (ntohl(*hdr) & MPLS_EXP_MASK) >> 9; + unsigned int mpls_bls = (ntohl(*hdr) & MPLS_BLS_MASK) >> 8; + // unsigned int mpls_ttl = (ntohl(*hdr) & MPLS_TTL_MASK); + + uint16_t hdr_len = 4; + const void *data_next_layer = (const char *)data + hdr_len; + size_t data_next_length = length - hdr_len; + + if (mpls_bls == 1) + { + uint8_t ip_version = (((uint8_t *)data_next_layer)[0]) >> 4; + if (ip_version == 0) + { + /* + * PW Ethernet Control Word + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |0 0 0 0| Reserved | Sequence Number | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * Reference: https://tools.ietf.org/html/rfc4448 + */ + data_next_layer = (const char *)data_next_layer + 4; + data_next_length = data_next_length - 4; + + TFE_LOG_DEBUG(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, payload_len: [%lu/%lu]", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), data_next_length, length); + // TESTED + return parse_ether(handler, data_next_layer, data_next_length, LAYER_TYPE_ETHER); + } + else if (ip_version == 4) + { + TFE_LOG_DEBUG(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, payload_len: [%lu/%lu]", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), data_next_length, length); + // TESTED + return parse_ipv4(handler, data_next_layer, data_next_length, LAYER_TYPE_IPV4); + } + else if (ip_version == 6) + { + TFE_LOG_DEBUG(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, payload_len: [%lu/%lu]", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), data_next_length, length); + // TODO + return parse_ipv6(handler, data_next_layer, data_next_length, LAYER_TYPE_IPV6); + } + else + { + TFE_LOG_DEBUG(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, payload_len: [%lu/%lu]", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), data_next_length, length); + // TODO + return parse_ether(handler, data_next_layer, data_next_length, LAYER_TYPE_ETHER); + } + } + else + { + TFE_LOG_DEBUG(g_default_logger, "%s: pkt_trace_id: %lu, this_layer: %s, payload_len: [%lu/%lu]", LOG_TAG_RAWPKT, handler->pkt_trace_id, layer_type2str(this_type), data_next_length, length); + // TESTED + return parse_mpls(handler, data_next_layer, data_next_length, LAYER_TYPE_MPLS); + } +} \ No newline at end of file diff --git a/common/src/tfe_session_table.cpp b/common/src/tfe_session_table.cpp new file mode 100644 index 0000000..985a149 --- /dev/null +++ b/common/src/tfe_session_table.cpp @@ -0,0 +1,219 @@ +#include + +#include "tfe_session_table.h" +#include + +struct session_table +{ + struct session_node *root_by_id; + struct session_node *root_by_addr; + uint64_t session_node_count; +}; + +// Note: session_addr must be initialized by memset(0) before use !!! + +struct session_table *session_table_create() +{ + struct session_table *table = (struct session_table *)calloc(1, sizeof(struct session_table)); + assert(table); + table->session_node_count = 0; + + return table; +} + +void session_table_destory(struct session_table *table) +{ + if (table) + { + struct session_node *temp = NULL; + struct session_node *node = NULL; + HASH_ITER(hh1, table->root_by_id, node, temp) + { + HASH_DELETE(hh1, table->root_by_id, node); + HASH_DELETE(hh2, table->root_by_addr, node); + + if (node->val_freecb && node->val_data) + { + node->val_freecb(node->val_data); + } + + free(node); + node = NULL; + } + + free(table); + table = NULL; + } +} + +void session_table_reset(struct session_table *table) +{ + if (table) + { + struct session_node *temp = NULL; + struct session_node *node = NULL; + HASH_ITER(hh1, table->root_by_id, node, temp) + { + HASH_DELETE(hh1, table->root_by_id, node); + HASH_DELETE(hh2, table->root_by_addr, node); + + if (node->val_freecb && node->val_data) + { + node->val_freecb(node->val_data); + } + + free(node); + node = NULL; + + table->session_node_count--; + } + } +} + +uint64_t session_table_count(struct session_table *table) +{ + if (table) + { + return table->session_node_count; + } + else + { + return 0; + } +} + +// session_addr : deep copy +// val_data : shallow copy (malloc by user, free by val_freecb) +int session_table_insert(struct session_table *table, uint64_t session_id, const struct addr_tuple4 *session_addr, void *val_data, const fn_free_cb *val_freecb) +{ + struct session_node *temp = NULL; + HASH_FIND(hh1, table->root_by_id, &session_id, sizeof(session_id), temp); + if (temp) + { + TFE_LOG_DEBUG(g_default_logger, "%s: insert: key %lu exists", LOG_TAG_STABLE, session_id); + return -1; + } + + temp = (struct session_node *)calloc(1, sizeof(struct session_node)); + assert(temp); + + temp->session_id = session_id; + memcpy(&temp->session_addr, session_addr, sizeof(struct addr_tuple4)); + temp->val_data = val_data; + temp->val_freecb = val_freecb; + + HASH_ADD(hh1, table->root_by_id, session_id, sizeof(temp->session_id), temp); + HASH_ADD(hh2, table->root_by_addr, session_addr, sizeof(temp->session_addr), temp); + + TFE_LOG_DEBUG(g_default_logger, "%s: insert: key %lu success", LOG_TAG_STABLE, session_id); + table->session_node_count++; + + return 0; +} + +int session_table_delete_by_id(struct session_table *table, uint64_t session_id) +{ + struct session_node *temp = NULL; + HASH_FIND(hh1, table->root_by_id, &session_id, sizeof(session_id), temp); + if (!temp) + { + TFE_LOG_DEBUG(g_default_logger, "%s: delete: key %lu not exists", LOG_TAG_STABLE, session_id); + return -1; + } + + HASH_DELETE(hh1, table->root_by_id, temp); + HASH_DELETE(hh2, table->root_by_addr, temp); + + if (temp->val_freecb && temp->val_data) + { + temp->val_freecb(temp->val_data); + temp->val_data = NULL; + } + + free(temp); + temp = NULL; + + TFE_LOG_DEBUG(g_default_logger, "%s: delete: key %lu success", LOG_TAG_STABLE, session_id); + table->session_node_count--; + + return 0; +} + +int session_table_delete_by_addr(struct session_table *table, const struct addr_tuple4 *session_addr) +{ + struct session_node *temp = NULL; + char *addr_str = addr_tuple4_to_str(session_addr); + HASH_FIND(hh2, table->root_by_addr, session_addr, sizeof(struct addr_tuple4), temp); + if (!temp) + { + struct addr_tuple4 reverse_addr; + addr_tuple4_reverse(session_addr, &reverse_addr); + HASH_FIND(hh2, table->root_by_addr, &reverse_addr, sizeof(struct addr_tuple4), temp); + if (!temp) + { + TFE_LOG_DEBUG(g_default_logger, "%s: delete: key %s not exists", LOG_TAG_STABLE, addr_str); + free(addr_str); + return -1; + } + } + + HASH_DELETE(hh1, table->root_by_id, temp); + HASH_DELETE(hh2, table->root_by_addr, temp); + + if (temp->val_freecb && temp->val_data) + { + temp->val_freecb(temp->val_data); + temp->val_data = NULL; + } + + free(temp); + temp = NULL; + + TFE_LOG_DEBUG(g_default_logger, "%s: delete: key %s success", LOG_TAG_STABLE, addr_str); + free(addr_str); + addr_str = NULL; + table->session_node_count--; + + return 0; +} + +struct session_node *session_table_search_by_id(struct session_table *table, uint64_t session_id) +{ + struct session_node *temp = NULL; + HASH_FIND(hh1, table->root_by_id, &session_id, sizeof(session_id), temp); + if (!temp) + { + TFE_LOG_DEBUG(g_default_logger, "%s: search: key %lu not exists", LOG_TAG_STABLE, session_id); + return NULL; + } + + TFE_LOG_DEBUG(g_default_logger, "%s: search: key %lu success", LOG_TAG_STABLE, session_id); + + return temp; +} + +struct session_node *session_table_search_by_addr(struct session_table *table, const struct addr_tuple4 *session_addr) +{ + struct session_node *temp = NULL; + char *addr_str = addr_tuple4_to_str(session_addr); + HASH_FIND(hh2, table->root_by_addr, session_addr, sizeof(struct addr_tuple4), temp); + if (!temp) + { + struct addr_tuple4 reverse_addr; + addr_tuple4_reverse(session_addr, &reverse_addr); + HASH_FIND(hh2, table->root_by_addr, &reverse_addr, sizeof(struct addr_tuple4), temp); + if (!temp) + { + TFE_LOG_DEBUG(g_default_logger, "%s: search: key %s not exists", LOG_TAG_STABLE, addr_str); + free(addr_str); + addr_str = NULL; + return NULL; + } + } + + TFE_LOG_DEBUG(g_default_logger, "%s: search: key %s success", LOG_TAG_STABLE, addr_str); + free(addr_str); + addr_str = NULL; + + return temp; +} diff --git a/common/src/tfe_tap_rss.cpp b/common/src/tfe_tap_rss.cpp new file mode 100644 index 0000000..61286e8 --- /dev/null +++ b/common/src/tfe_tap_rss.cpp @@ -0,0 +1,389 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#if (SUPPORT_BPF) +#include "../../bpf/bpf_conf_user.h" +#include +#include +#endif + +#include "tfe_acceptor_kni.h" +#include "tfe_tap_rss.h" +#include "tfe_utils.h" + +#ifndef TUN_PATH +#define TUN_PATH "/dev/net/tun" +#endif + +struct bpf_ctx +{ + int bpf_prog_fd; + int bpf_map_fd; + + char bpf_file[1024]; +#if (SUPPORT_BPF) + struct bpf_object *bpf_obj; + bpf_conf_t bpf_conf; +#endif +}; + +int tfe_tap_get_bpf_prog_fd(struct bpf_ctx *ctx) +{ + if (ctx) + { + return ctx->bpf_prog_fd; + } + else + { + return -1; + } +} + +#if (SUPPORT_BPF) +void tfe_tap_global_unload_rss_bpf(struct bpf_ctx *ctx) +{ + if (ctx) + { + if (ctx->bpf_prog_fd > 0) + { + close(ctx->bpf_prog_fd); + } + + if (ctx->bpf_obj) + { + bpf_object__close(ctx->bpf_obj); + ctx->bpf_obj = NULL; + } + + free(ctx); + ctx = NULL; + } +} +#else +void tfe_tap_global_unload_rss_bpf(struct bpf_ctx *ctx) +{ +} +#endif + +/* + * bpf_queue_num : worker thread number + * bpf_default_queue : -1: for disable(only use for debug, rss to one queue) + * bpf_hash_mode : 2: hash with src/dst addr + * 4: hash with src/dst addr and src/dst port + * bpf_debug_log : 0 for disable(only use for debug, printf bpf debug log) + */ +#if (SUPPORT_BPF) +struct bpf_ctx *tfe_tap_global_load_rss_bpf(const char *bpf_obj_file, uint32_t bpf_queue_num, uint32_t bpf_hash_mode, uint32_t bpf_debug_log, void *logger) +{ + struct bpf_ctx *ctx = (struct bpf_ctx *)calloc(1, sizeof(struct bpf_ctx)); + strncpy(ctx->bpf_file, bpf_obj_file, strlen(bpf_obj_file)); + + bpf_conf_set_debug_log(&ctx->bpf_conf, bpf_debug_log); + bpf_conf_set_hash_mode(&ctx->bpf_conf, bpf_hash_mode); + bpf_conf_set_queue_num(&ctx->bpf_conf, bpf_queue_num); + + if (bpf_prog_load(ctx->bpf_file, BPF_PROG_TYPE_SOCKET_FILTER, &ctx->bpf_obj, &ctx->bpf_prog_fd) < 0) + { + TFE_LOG_ERROR(g_default_logger, TAP_RSS_LOG_TAG "unable to load bpf object %s, aborting: %s", ctx->bpf_file, strerror(errno)); + goto error; + } + + if (bpf_conf_update_map(&ctx->bpf_conf, ctx->bpf_obj) == -1) + { + goto error; + } + + return ctx; + +error: + tfe_tap_global_unload_rss_bpf(ctx); + + return NULL; +} +#else +struct bpf_ctx *tfe_tap_global_load_rss_bpf(const char *bpf_obj_file, uint32_t bpf_queue_num, uint32_t bpf_hash_mode, uint32_t bpf_debug_log, void *logger) +{ + TFE_LOG_ERROR(g_default_logger, TAP_RSS_LOG_TAG "not support bpf"); + return NULL; +} +#endif + +struct tap_ctx *tfe_tap_ctx_create(void *ctx) +{ + struct acceptor_thread_ctx *thread_ctx = (struct acceptor_thread_ctx *)ctx; + struct acceptor_ctx *acceptor_ctx = thread_ctx->ref_acceptor_ctx; + struct tap_ctx *tap_ctx = (struct tap_ctx *)calloc(1, sizeof(struct tap_ctx)); + assert(tap_ctx != NULL); + + tap_ctx->tap_fd = tfe_tap_open_per_thread(acceptor_ctx->config->tap_device, IFF_TAP | IFF_NO_PI | IFF_MULTI_QUEUE, tfe_tap_get_bpf_prog_fd(acceptor_ctx->config->tap_bpf_ctx), g_default_logger); + tap_ctx->tap_c = tfe_tap_open_per_thread(acceptor_ctx->config->tap_c_device, IFF_TAP | IFF_NO_PI | IFF_MULTI_QUEUE, tfe_tap_get_bpf_prog_fd(acceptor_ctx->config->tap_bpf_ctx), g_default_logger); + tap_ctx->tap_s = tfe_tap_open_per_thread(acceptor_ctx->config->tap_s_device, IFF_TAP | IFF_NO_PI | IFF_MULTI_QUEUE, tfe_tap_get_bpf_prog_fd(acceptor_ctx->config->tap_bpf_ctx), g_default_logger); + + return tap_ctx; +} + +struct tap_config *tfe_tap_config_create(const char *profile, int thread_num) +{ + int ret = 0; + int tap_allow_mutilthread = 0; + uint32_t bpf_debug_log = 0; + uint32_t bpf_hash_mode = 2; + uint32_t bpf_queue_num = thread_num; + char bpf_obj[1024] = {0}; + + struct tap_config *tap = (struct tap_config *)calloc(1, sizeof(struct tap_config)); + assert(tap != NULL); + + MESA_load_profile_int_nodef(profile, "tap", "tap_rps_enable", &tap->tap_rps_enable); + MESA_load_profile_string_def(profile, "tap", "tap_name", tap->tap_device, sizeof(tap->tap_device), "tap0"); + MESA_load_profile_string_def(profile, "traffic_steering", "device_client", tap->tap_c_device, sizeof(tap->tap_c_device), "tap_c"); + MESA_load_profile_string_def(profile, "traffic_steering", "device_server", tap->tap_s_device, sizeof(tap->tap_s_device), "tap_s"); + MESA_load_profile_int_nodef(profile, "tap", "bpf_debug_log", (int *)&bpf_debug_log); + MESA_load_profile_int_nodef(profile, "tap", "bpf_hash_mode", (int *)&bpf_hash_mode); + MESA_load_profile_string_nodef(profile, "tap", "bpf_obj", bpf_obj, sizeof(bpf_obj)); + MESA_load_profile_int_nodef(profile, "tap", "tap_allow_mutilthread", &tap_allow_mutilthread); + + MESA_load_profile_int_nodef(profile, "io_uring", "enable_iouring", &tap->enable_iouring); + MESA_load_profile_int_nodef(profile, "io_uring", "enable_debuglog", &tap->enable_debuglog); + MESA_load_profile_int_nodef(profile, "io_uring", "ring_size", &tap->ring_size); + MESA_load_profile_int_nodef(profile, "io_uring", "buff_size", &tap->buff_size); + MESA_load_profile_int_nodef(profile, "io_uring", "flags", &tap->flags); + MESA_load_profile_int_nodef(profile, "io_uring", "sq_thread_idle", &tap->sq_thread_idle); + + char src_mac_addr_str[TFE_SYMBOL_MAX]; + ret = MESA_load_profile_string_nodef(profile, "system", "src_mac_addr", src_mac_addr_str, sizeof(src_mac_addr_str)); + if(ret < 0){ + TFE_LOG_ERROR(g_default_logger, "MESA_prof_load: src_mac_addr not set, profile = %s, section = system", profile); + goto error_out; + } + str_to_mac(src_mac_addr_str, tap->src_mac); + get_mac_by_device_name(tap->tap_device, tap->tap_mac); + get_mac_by_device_name(tap->tap_c_device, tap->tap_c_mac); + get_mac_by_device_name(tap->tap_s_device, tap->tap_s_mac); + + if (tap->tap_rps_enable) + { + if (MESA_load_profile_string_nodef(profile, "tap", "tap_rps_mask", tap->tap_rps_mask, sizeof(tap->tap_rps_mask)) < 0) + { + TFE_LOG_ERROR(g_default_logger, TAP_RSS_LOG_TAG "under tap mode, when enable tap_rps_enable, tap_rps_mask is required."); + goto error_out; + } + } + + if (tap_allow_mutilthread) + { + tap->tap_bpf_ctx = tfe_tap_global_load_rss_bpf(bpf_obj, bpf_queue_num, bpf_hash_mode, bpf_debug_log, g_default_logger); + if (tap->tap_bpf_ctx == NULL) + { + goto error_out; + } + } + else if (thread_num > 1){ + TFE_LOG_ERROR(g_default_logger, TAP_RSS_LOG_TAG "under tap mode, when disable tap_allow_mutilthread, only support one work thread."); + goto error_out; + } + + return tap; + +error_out: + tfe_tap_destory(tap); + return NULL; +} + +void tfe_tap_destory(struct tap_config *tap) +{ + if (tap) + { + if (tap->tap_bpf_ctx) + { + tfe_tap_global_unload_rss_bpf(tap->tap_bpf_ctx); + tap->tap_bpf_ctx = NULL; + } + + free(tap); + tap = NULL; + } +} + +int tfe_tap_set_rps(void *local_logger, const char *tap_name, int thread_num, const char *rps_mask) +{ + char file[1024] = {0}; + + memset(file, 0, sizeof(file)); + snprintf(file, sizeof(file), "/sys/class/net/%s/queues/rx-%d/rps_cpus", tap_name, thread_num); + + FILE *fp = fopen(file, "w"); + if (fp == NULL) + { + TFE_LOG_ERROR(local_logger, "%s can't open %s, %s", TAP_RSS_LOG_TAG, file, strerror(errno)); + return -1; + } + + fwrite(rps_mask, strlen(rps_mask), 1, fp); + TFE_LOG_DEBUG(local_logger, TAP_RSS_LOG_TAG "set rps '%s' to %s", rps_mask, file); + fclose(fp); + return 0; +} + +int tfe_tap_open_per_thread(const char *tap_dev, int tap_flags, int bpf_prog_fd, void *logger) +{ + int fd = -1; + int tap_fd = -1; + int nonblock_flags = -1; + struct ifreq ifr; + + tap_fd = open(TUN_PATH, O_RDWR); + if (tap_fd == -1) + { + TFE_LOG_ERROR(g_default_logger, TAP_RSS_LOG_TAG "unable to open " TUN_PATH ", aborting: %s", strerror(errno)); + return -1; + } + + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_flags = tap_flags; + strcpy(ifr.ifr_name, tap_dev); + if (ioctl(tap_fd, TUNSETIFF, &ifr) == -1) + { + TFE_LOG_ERROR(g_default_logger, TAP_RSS_LOG_TAG "unable to attach %s, aborting: %s", tap_dev, strerror(errno)); + goto error; + } + + /* + * The TUNSETPERSIST ioctl can be used to make the TUN/TAP interface persistent. + * In this mode, the interface won't be destroyed when the last process closes the associated /dev/net/tun file descriptor. + */ + /* + if (ioctl(tap_fd, TUNSETPERSIST, 1) == -1) + { + TFE_LOG_ERROR(g_default_logger, TAP_RSS_LOG_TAG "unable to set persist on %s, aborting: %s", tap_dev, strerror(errno)); + goto error; + } + */ +#if (SUPPORT_BPF) + if (bpf_prog_fd > 0) + { + // Set bpf + if (ioctl(tap_fd, TUNSETSTEERINGEBPF, (void *)&bpf_prog_fd) == -1) + { + TFE_LOG_ERROR(g_default_logger, TAP_RSS_LOG_TAG "unable to set bpf on %s, aborting: %s", tap_dev, strerror(errno)); + goto error; + } + } +#endif + + // Set nonblock + nonblock_flags = fcntl(tap_fd, F_GETFL); + if (nonblock_flags == -1) + { + TFE_LOG_ERROR(g_default_logger, TAP_RSS_LOG_TAG "unable to get nonblock flags on %s fd, aborting: %s", tap_dev, strerror(errno)); + goto error; + } + nonblock_flags |= O_NONBLOCK; + if (fcntl(tap_fd, F_SETFL, nonblock_flags) == -1) + { + TFE_LOG_ERROR(g_default_logger, TAP_RSS_LOG_TAG "unable to set nonblock flags on %s fd, aborting: %s", tap_dev, strerror(errno)); + goto error; + } + + // Get MTU + fd = socket(PF_INET, SOCK_DGRAM, 0); + if (fd == -1) + { + TFE_LOG_ERROR(g_default_logger, TAP_RSS_LOG_TAG "unable to create socket, aborting: %s", strerror(errno)); + goto error; + } + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, tap_dev); + if (ioctl(fd, SIOCGIFMTU, &ifr) < 0) + { + TFE_LOG_ERROR(g_default_logger, TAP_RSS_LOG_TAG "unable to get MTU on %s, aborting: %s", tap_dev, strerror(errno)); + goto error; + } + + // Set eth up + if (ioctl(fd, SIOCGIFFLAGS, &ifr) == -1) + { + TFE_LOG_ERROR(g_default_logger, TAP_RSS_LOG_TAG "unable to get link status on %s, aborting: %s", tap_dev, strerror(errno)); + goto error; + } + + if ((ifr.ifr_flags & IFF_UP) == 0) + { + ifr.ifr_flags |= IFF_UP; + if (ioctl(fd, SIOCSIFFLAGS, &ifr) < 0) + { + TFE_LOG_ERROR(g_default_logger, TAP_RSS_LOG_TAG "unable to set link status on %s, aborting: %s", tap_dev, strerror(errno)); + goto error; + } + } + + TFE_LOG_INFO(logger, TAP_RSS_LOG_TAG "using tap device %s with MTU %d", tap_dev, ifr.ifr_mtu); + close(fd); + + return tap_fd; + +error: + + if (fd > 0) + { + close(fd); + fd = -1; + } + + if (tap_fd > 0) + { + close(tap_fd); + tap_fd = -1; + } + + return -1; +} + +void tfe_tap_close_per_thread(int tap_fd) +{ + if (tap_fd > 0) + { + close(tap_fd); + } +} + +int tfe_tap_read_per_thread(int tap_fd, char *buff, int buff_size, void *logger) +{ + int ret = read(tap_fd, buff, buff_size); + if (ret < 0) + { + if (errno != EWOULDBLOCK && errno != EAGAIN) + { + TFE_LOG_ERROR(g_default_logger, TAP_RSS_LOG_TAG "unable to read data from tapfd %d, aborting: %s", tap_fd, strerror(errno)); + } + } + + return ret; +} + +int tfe_tap_write_per_thread(int tap_fd, const char *data, int data_len, void *logger) +{ + int ret = write(tap_fd, data, data_len); + if (ret != data_len) + { + TFE_LOG_ERROR(g_default_logger, TAP_RSS_LOG_TAG "need send %dB, only send %dB, aborting: %s", data_len, ret, strerror(errno)); + } + + return ret; +} \ No newline at end of file diff --git a/common/src/tfe_timestamp.cpp b/common/src/tfe_timestamp.cpp new file mode 100644 index 0000000..4e8949d --- /dev/null +++ b/common/src/tfe_timestamp.cpp @@ -0,0 +1,65 @@ +#include +#include + +#include +#include "tfe_timestamp.h" + +// 1 s = 1000 ms +// 1 ms = 1000 us +// 1 us = 1000 ns + +struct timestamp +{ + struct timespec timestamp; + uint64_t update_interval_ms; +}; + +struct timestamp *timestamp_new(uint64_t update_interval_ms) +{ + struct timestamp *ts = (struct timestamp *)calloc(1, sizeof(struct timestamp)); + ts->update_interval_ms = update_interval_ms; + + timestamp_update(ts); + TFE_LOG_DEBUG(g_default_logger, "%s: TIMESTAMP->update_interval_ms : %lu", LOG_TAG_TIMESTAMP, timestamp_update_interval_ms(ts)); + TFE_LOG_DEBUG(g_default_logger, "%s: TIMESTAMP->current_sec : %lu", LOG_TAG_TIMESTAMP, timestamp_get_sec(ts)); + TFE_LOG_DEBUG(g_default_logger, "%s: TIMESTAMP->current_msec : %lu", LOG_TAG_TIMESTAMP, timestamp_get_msec(ts)); + + return ts; +} + +void timestamp_free(struct timestamp *ts) +{ + if (ts) + { + free(ts); + ts = NULL; + } +} + +void timestamp_update(struct timestamp *ts) +{ + struct timespec temp; + clock_gettime(CLOCK_MONOTONIC, &temp); + ATOMIC_SET(&(ts->timestamp.tv_sec), temp.tv_sec); + ATOMIC_SET(&(ts->timestamp.tv_nsec), temp.tv_nsec); +} + +uint64_t timestamp_update_interval_ms(struct timestamp *ts) +{ + return ts->update_interval_ms; +} + +uint64_t timestamp_get_sec(struct timestamp *ts) +{ + uint64_t sec = ATOMIC_READ(&(ts->timestamp.tv_sec)); + + return sec; +} + +uint64_t timestamp_get_msec(struct timestamp *ts) +{ + uint64_t sec = ATOMIC_READ(&(ts->timestamp.tv_sec)); + uint64_t nsec = ATOMIC_READ(&(ts->timestamp.tv_nsec)); + + return sec * 1000 + nsec / 1000000; +} diff --git a/common/src/tfe_utils.cpp b/common/src/tfe_utils.cpp index 5f66d30..4e8ef84 100644 --- a/common/src/tfe_utils.cpp +++ b/common/src/tfe_utils.cpp @@ -6,6 +6,12 @@ #include #include #include +#include +#include +#include +#include +#include +#include //functioned as strdup, for dictator compatible. char* tfe_strdup(const char* s) @@ -215,4 +221,173 @@ int tfe_decode_base64url(u_char *dst, u_char *src) }; return tfe_decode_base64_internal(dst, src, basis64); -} \ No newline at end of file +} + +/****************************************************************************** + * sids + ******************************************************************************/ + +void sids_write_once(struct sids *dst, struct sids *src) +{ + if (dst && src) + { + if (dst->num == 0 && src->num > 0) + { + sids_copy(dst, src); + } + } +} + +void sids_copy(struct sids *dst, struct sids *src) +{ + if (dst && src) + { + dst->num = src->num; + memcpy(dst->elems, src->elems, sizeof(dst->elems[0]) * dst->num); + } +} + +/****************************************************************************** + * route_ctx + ******************************************************************************/ + +int route_ctx_is_empty(struct route_ctx *ctx) +{ + if (ctx->len == 0) + { + return 1; + } + else + { + return 0; + } +} + +void route_ctx_copy(struct route_ctx *dst, struct route_ctx *src) +{ + memcpy(dst->data, src->data, src->len); + dst->len = src->len; +} + + +/****************************************************************************** + * protocol + ******************************************************************************/ + +#define CHECKSUM_CARRY(x) (x = (x >> 16) + (x & 0xffff), (~(x + (x >> 16)) & 0xffff)) + +static int checksum(u_int16_t *addr, int len) +{ + int sum = 0; + int nleft = len; + u_int16_t ans = 0; + u_int16_t *w = addr; + + while (nleft > 1) + { + sum += *w++; + nleft -= 2; + } + + if (nleft == 1) + { + *(char *)(&ans) = *(char *)w; + sum += ans; + } + + return sum; +} + +void build_udp_header(const char *l3_hdr, int l3_hdr_len, struct udp_hdr *udp_hdr, u_int16_t udp_sport, u_int16_t udp_dport, int payload_len) +{ + memset(udp_hdr, 0, sizeof(struct udp_hdr)); + + int udp_hlen = sizeof(struct udp_hdr) + payload_len; + + udp_hdr->uh_sport = htons(udp_sport); + udp_hdr->uh_dport = htons(udp_dport); + + udp_hdr->uh_ulen = htons(udp_hlen); + udp_hdr->uh_sum = 0; + + int sum = checksum((u_int16_t *)l3_hdr, l3_hdr_len); + sum += ntohs(IPPROTO_UDP + udp_hlen); + sum += checksum((u_int16_t *)udp_hdr, udp_hlen); + udp_hdr->uh_sum = CHECKSUM_CARRY(sum); +} + +void build_ip_header(struct ip *ip_hdr, u_int8_t next_protocol, const char *src_addr, const char *dst_addr, uint16_t payload_len) +{ + memset(ip_hdr, 0, sizeof(struct ip)); + + ip_hdr->ip_hl = 5; /* 20 byte header */ + ip_hdr->ip_v = 4; /* version 4 */ + ip_hdr->ip_tos = 0; /* IP tos */ + ip_hdr->ip_id = htons(random()); /* IP ID */ + ip_hdr->ip_ttl = 80; /* time to live */ + ip_hdr->ip_p = next_protocol; /* transport protocol */ + ip_hdr->ip_src.s_addr = inet_addr(src_addr); + ip_hdr->ip_dst.s_addr = inet_addr(dst_addr); + ip_hdr->ip_len = htons(sizeof(struct ip) + payload_len); /* total length */ + ip_hdr->ip_off = htons(0); /* fragmentation flags */ + ip_hdr->ip_sum = 0; /* do this later */ + + int sum = checksum((u_int16_t *)ip_hdr, 20); + ip_hdr->ip_sum = CHECKSUM_CARRY(sum); +} + +// l3_protocol: ETH_P_IPV6/ETH_P_IP +void build_ether_header(struct ethhdr *eth_hdr, uint16_t next_protocol, const char *src_mac, const char *dst_mac) +{ + memset(eth_hdr, 0, sizeof(struct ethhdr)); + + str_to_mac(src_mac, (char *)eth_hdr->h_source); + str_to_mac(dst_mac, (char *)eth_hdr->h_dest); + eth_hdr->h_proto = htons(next_protocol); +} + +int str_to_mac(const char *str, char *mac_buff) +{ + if (sscanf(str, "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx", &(mac_buff[0]), &(mac_buff[1]), &(mac_buff[2]), &(mac_buff[3]), &(mac_buff[4]), &(mac_buff[5])) == 6) + { + return 0; + } + else + { + return -1; + } +} + +int get_mac_by_device_name(const char *dev_name, char *mac_buff) +{ + int fd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); + if (fd == -1) + { + return -1; + } + + struct ifreq ifr; + memset(&ifr, 0, sizeof(struct ifreq)); + strcpy(ifr.ifr_name, dev_name); + if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0) + { + close(fd); + return -1; + } + + unsigned char *mac = (unsigned char *)ifr.ifr_hwaddr.sa_data; + sprintf(mac_buff, "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx", mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); + close(fd); + + return 0; +} + +/****************************************************************************** + * throughput_metrics + ******************************************************************************/ + +void throughput_metrics_inc(struct throughput_metrics *iterm, uint64_t n_pkts, uint64_t n_bytes) +{ + __atomic_fetch_add(&iterm->n_bytes, n_bytes, __ATOMIC_RELAXED); + __atomic_fetch_add(&iterm->n_pkts, n_pkts, __ATOMIC_RELAXED); +} diff --git a/conf/tfe/tfe.conf b/conf/tfe/tfe.conf index 7dee4cf..1f6453f 100644 --- a/conf/tfe/tfe.conf +++ b/conf/tfe/tfe.conf @@ -2,7 +2,10 @@ nr_worker_threads=8 enable_kni_v1=0 enable_kni_v2=0 -enable_kni_v3=1 +enable_kni_v3=0 +enable_kni_v4=1 +firewall_sids=1001 +service_chaining_sids=1002 # Only when (disable_coredump == 1 || (enable_breakpad == 1 && enable_breakpad_upload == 1)) is satisfied, the core will not be generated locally disable_coredump=0 @@ -20,6 +23,8 @@ cpu_affinity_mask=1-9 # LEAST_CONN = 0; ROUND_ROBIN = 1 load_balance=1 +src_mac_addr = 00:0e:c6:d6:72:c1 + # for enable kni v3 [nfq] device=tap0 @@ -216,8 +221,52 @@ maat_redis_db_index=4 full_cfg_dir=pangu_policy/full/index/ inc_cfg_dir=pangu_policy/inc/index/ + [proxy_hits] cycle=1000 telegraf_port=8400 telegraf_ip=127.0.0.1 -app_name="proxy_rule_hits" \ No newline at end of file +app_name="proxy_rule_hits" + +# for enable kni v4 +[packet_io] +# bypass_all_traffic:1 NF2NF and SF2SF +bypass_all_traffic=0 +rx_burst_max=128 +app_symbol=sce +dev_nf_interface=eth_nf_interface + +[tap] +tap_name=tap0 + +# 1.tap_allow_mutilthread=1 load bpf rss obj +# 2.tap_allow_mutilthread=0 not load bpf rss obj +tap_allow_mutilthread=1 +bpf_obj=/opt/tsg/sapp/plug/business/kni/bpf_tun_rss_steering.o +bpf_default_queue=-1 +# tap_bpf_debug_log: cat /sys/kernel/debug/tracing/trace_pipe +bpf_debug_log=0 +# 2: BPF 使用二元组分流 +# 4: BPF 使用四元组分流 +bpf_hash_mode=2 + +# 配置 tap 网卡的 RPS +tap_rps_enable=1 +tap_rps_mask=0,1fffffff,c0000000,00000000 + +[io_uring] +enable_iouring=1 +enable_debuglog=0 +ring_size=1024 +buff_size=2048 +# io_uring_setup() flags +# IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */ +# IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */ +# IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */ +# IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ +# IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ +# IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ +# IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ +# IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */ +flags=0 +sq_thread_idle=0 diff --git a/platform/CMakeLists.txt b/platform/CMakeLists.txt index 083def5..029bdc5 100644 --- a/platform/CMakeLists.txt +++ b/platform/CMakeLists.txt @@ -1,7 +1,7 @@ find_package(SYSTEMD REQUIRED) find_package(NFNETLINK REQUIRED) -add_executable(tfe src/acceptor_kni_v1.cpp src/acceptor_kni_v2.cpp src/acceptor_kni_v3.cpp src/ssl_stream.cpp src/key_keeper.cpp src/ssl_fetch_cert.cpp +add_executable(tfe src/acceptor_kni_v1.cpp src/acceptor_kni_v2.cpp src/acceptor_kni_v3.cpp src/acceptor_kni_v4.cpp src/ssl_stream.cpp src/key_keeper.cpp src/ssl_fetch_cert.cpp src/ssl_sess_cache.cpp src/ssl_sess_ticket.cpp src/ssl_service_cache.cpp src/ssl_trusted_cert_storage.cpp src/ev_root_ca_metadata.cpp src/ssl_utils.cpp src/tcp_stream.cpp src/main.cpp src/proxy.cpp src/sender_scm.cpp src/watchdog_kni.cpp src/watchdog_tfe.cpp src/ssl_ja3.cpp src/watchdog_3rd_device.cpp) @@ -27,6 +27,7 @@ target_link_libraries(tfe pthread dl nfnetlink MESA_field_stat fieldstat3 breakpad_mini + msgpack ${SYSTEMD_LIBRARIES}) if(ENABLE_PLUGIN_HTTP) diff --git a/platform/include/internal/acceptor_kni_v4.h b/platform/include/internal/acceptor_kni_v4.h new file mode 100644 index 0000000..8c0b5cb --- /dev/null +++ b/platform/include/internal/acceptor_kni_v4.h @@ -0,0 +1,13 @@ +#pragma once + +struct tfe_proxy; +struct acceptor_kni_v4 +{ + struct tfe_proxy *proxy; + const char *profile; + + struct acceptor_ctx *acceptor; +}; + +struct acceptor_kni_v4 *acceptor_kni_v4_create(struct tfe_proxy *proxy, const char *profile, void *logger); +void acceptor_kni_v4_destroy(); diff --git a/platform/include/internal/proxy.h b/platform/include/internal/proxy.h index d402385..d5cbab5 100644 --- a/platform/include/internal/proxy.h +++ b/platform/include/internal/proxy.h @@ -140,10 +140,12 @@ struct tfe_proxy unsigned int en_kni_v1_acceptor; unsigned int en_kni_v2_acceptor; unsigned int en_kni_v3_acceptor; + unsigned int en_kni_v4_acceptor; struct acceptor_kni_v1 * kni_v1_acceptor; struct acceptor_kni_v2 * kni_v2_acceptor; struct acceptor_kni_v3 * kni_v3_acceptor; + struct acceptor_kni_v4 * kni_v4_acceptor; struct sender_scm * scm_sender; struct watchdog_kni * watchdog_kni; struct watchdog_tfe * watchdog_tfe; diff --git a/platform/src/acceptor_kni_v4.cpp b/platform/src/acceptor_kni_v4.cpp new file mode 100644 index 0000000..329d8bc --- /dev/null +++ b/platform/src/acceptor_kni_v4.cpp @@ -0,0 +1,152 @@ +#include +#include +#include +#include +#include +#include // for NF_ACCEPT +#include +#include + +#include +#include +#include +#include +#include "io_uring.h" +#include "tfe_tap_rss.h" +#include "tfe_metrics.h" +#include "tfe_tcp_restore.h" +#include "acceptor_kni_v4.h" + +static void *worker_thread_cycle(void *arg) +{ + struct acceptor_thread_ctx *thread_ctx = (struct acceptor_thread_ctx *)arg; + struct packet_io *handle = thread_ctx->ref_io; + struct acceptor_ctx *acceptor_ctx = thread_ctx->ref_acceptor_ctx; + + int pkg_len = 0; + char thread_name[16]; + int n_pkt_recv_from_nf = 0; + int n_pkt_recv_from_tap = 0; + int n_pkt_recv_from_tap_c = 0; + int n_pkt_recv_from_tap_s = 0; + + snprintf(thread_name, sizeof(thread_name), "kni:worker-%d", thread_ctx->thread_index); + prctl(PR_SET_NAME, (unsigned long long)thread_name, NULL, NULL, NULL); + + if (packet_io_thread_init(handle, thread_ctx) != 0) + { + goto error_out; + } + + if (acceptor_ctx->config->enable_iouring) { + io_uring_register_read_callback(thread_ctx->tap_ctx->io_uring_fd, handle_raw_packet_from_tap, thread_ctx); + io_uring_register_read_callback(thread_ctx->tap_ctx->io_uring_c, handle_decryption_packet_from_tap, thread_ctx); + io_uring_register_read_callback(thread_ctx->tap_ctx->io_uring_s, handle_decryption_packet_from_tap, thread_ctx); + } + else { + thread_ctx->tap_ctx->buff_size = 3000; + thread_ctx->tap_ctx->buff = ALLOC(char, thread_ctx->tap_ctx->buff_size); + } + + TFE_LOG_INFO(g_default_logger, "%s: worker thread %d is running", "LOG_TAG_KNI", thread_ctx->thread_index); + + while(1) { + n_pkt_recv_from_nf = packet_io_polling_nf_interface(handle, thread_ctx->thread_index, thread_ctx); + if (acceptor_ctx->config->enable_iouring) { + n_pkt_recv_from_tap = io_uring_peek_ready_entrys(thread_ctx->tap_ctx->io_uring_fd); + n_pkt_recv_from_tap_c = io_uring_peek_ready_entrys(thread_ctx->tap_ctx->io_uring_c); + n_pkt_recv_from_tap_c = io_uring_peek_ready_entrys(thread_ctx->tap_ctx->io_uring_s); + } + else { + if ((pkg_len = tfe_tap_read_per_thread(thread_ctx->tap_ctx->tap_fd, thread_ctx->tap_ctx->buff, thread_ctx->tap_ctx->buff_size, g_default_logger)) > 0) + { + handle_raw_packet_from_tap(thread_ctx->tap_ctx->buff, pkg_len, thread_ctx); + } + + if ((pkg_len = tfe_tap_read_per_thread(thread_ctx->tap_ctx->tap_c, thread_ctx->tap_ctx->buff, thread_ctx->tap_ctx->buff_size, g_default_logger)) > 0) + { + handle_decryption_packet_from_tap(thread_ctx->tap_ctx->buff, pkg_len, thread_ctx); + } + + if ((pkg_len = tfe_tap_read_per_thread(thread_ctx->tap_ctx->tap_s, thread_ctx->tap_ctx->buff, thread_ctx->tap_ctx->buff_size, g_default_logger)) > 0) + { + handle_decryption_packet_from_tap(thread_ctx->tap_ctx->buff, pkg_len, thread_ctx); + } + } + global_metrics_dump(acceptor_ctx->metrics); + + if (n_pkt_recv_from_nf == 0) + { + packet_io_thread_wait(handle, thread_ctx, 0); + } + + if (__atomic_fetch_add(&thread_ctx->session_table_need_reset, 0, __ATOMIC_RELAXED) > 0) + { + session_table_reset(thread_ctx->session_table); + __atomic_fetch_and(&thread_ctx->session_table_need_reset, 0, __ATOMIC_RELAXED); + } + } + +error_out: + TFE_LOG_ERROR(g_default_logger, "%s: worker thread %d exiting", LOG_TAG_SCE, thread_ctx->thread_index); + return (void *)NULL; +} + +void acceptor_kni_v4_destroy() +{ + return; +} + +struct acceptor_kni_v4 *acceptor_kni_v4_create(struct tfe_proxy *proxy, const char *profile, void *logger) +{ + int ret = 0; + struct acceptor_kni_v4 *__ctx = (struct acceptor_kni_v4 *)calloc(1, sizeof(struct acceptor_kni_v4)); + + struct acceptor_ctx *acceptor_ctx = acceptor_ctx_create(profile); + if (acceptor_ctx == NULL) + goto error_out; + + acceptor_ctx->ref_proxy = proxy; + for (int i = 0; i < acceptor_ctx->nr_worker_threads; i++) { + acceptor_ctx->work_threads[i].tid = 0; + acceptor_ctx->work_threads[i].thread_index = i; + acceptor_ctx->work_threads[i].ref_acceptor_ctx = acceptor_ctx; + + acceptor_ctx->work_threads[i].tap_ctx = tfe_tap_ctx_create(&acceptor_ctx->work_threads[i]); + if (acceptor_ctx->config->enable_iouring) { + int eventfd = 0; + struct tap_ctx *tap_ctx = acceptor_ctx->work_threads[i].tap_ctx; + tap_ctx->io_uring_fd = io_uring_instance_create(tap_ctx->tap_fd, eventfd, acceptor_ctx->config->ring_size, acceptor_ctx->config->buff_size, acceptor_ctx->config->flags, acceptor_ctx->config->sq_thread_idle, acceptor_ctx->config->enable_debuglog); + tap_ctx->io_uring_c = io_uring_instance_create(tap_ctx->tap_c, eventfd, acceptor_ctx->config->ring_size, acceptor_ctx->config->buff_size, acceptor_ctx->config->flags, acceptor_ctx->config->sq_thread_idle, acceptor_ctx->config->enable_debuglog); + tap_ctx->io_uring_s = io_uring_instance_create(tap_ctx->tap_s, eventfd, acceptor_ctx->config->ring_size, acceptor_ctx->config->buff_size, acceptor_ctx->config->flags, acceptor_ctx->config->sq_thread_idle, acceptor_ctx->config->enable_debuglog); + } + + acceptor_ctx->work_threads[i].session_table = session_table_create(); + acceptor_ctx->work_threads[i].ref_io = acceptor_ctx->io; + acceptor_ctx->work_threads[i].ref_proxy = proxy; + acceptor_ctx->work_threads[i].ref_tap_config = acceptor_ctx->config; + acceptor_ctx->work_threads[i].ref_metrics = acceptor_ctx->metrics; + acceptor_ctx->work_threads[i].session_table_need_reset = 0; + + if (acceptor_ctx->config->tap_rps_enable) + { + ret = tfe_tap_set_rps(g_default_logger, acceptor_ctx->config->tap_device, i, acceptor_ctx->config->tap_rps_mask); + if (ret != 0) + goto error_out; + } + } + + for (int i = 0; i < acceptor_ctx->nr_worker_threads; i++) { + struct acceptor_thread_ctx *thread_ctx = &acceptor_ctx->work_threads[i]; + if (pthread_create(&thread_ctx->tid, NULL, worker_thread_cycle, (void *)thread_ctx) < 0) + { + goto error_out; + } + } + + return __ctx; + +error_out: + acceptor_kni_v4_destroy(); + return NULL; +} \ No newline at end of file diff --git a/platform/src/proxy.cpp b/platform/src/proxy.cpp index 8ea88d7..df4285a 100644 --- a/platform/src/proxy.cpp +++ b/platform/src/proxy.cpp @@ -47,11 +47,14 @@ #include #include #include +#include #include #include #include #include +#include "tfe_metrics.h" + /* Breakpad */ #include @@ -532,9 +535,10 @@ void tfe_proxy_acceptor_init(struct tfe_proxy * proxy, const char * profile) { MESA_load_profile_uint_def(profile, "system", "enable_kni_v1", &proxy->en_kni_v1_acceptor, 0); MESA_load_profile_uint_def(profile, "system", "enable_kni_v2", &proxy->en_kni_v2_acceptor, 0); - MESA_load_profile_uint_def(profile, "system", "enable_kni_v3", &proxy->en_kni_v3_acceptor, 1); + MESA_load_profile_uint_def(profile, "system", "enable_kni_v3", &proxy->en_kni_v3_acceptor, 0); + MESA_load_profile_uint_def(profile, "system", "enable_kni_v4", &proxy->en_kni_v4_acceptor, 1); - int ret = proxy->en_kni_v1_acceptor + proxy->en_kni_v2_acceptor + proxy->en_kni_v3_acceptor; + int ret = proxy->en_kni_v1_acceptor + proxy->en_kni_v2_acceptor + proxy->en_kni_v3_acceptor + proxy->en_kni_v4_acceptor; CHECK_OR_EXIT((ret == 1), "Invalid KNI acceptor. Exit."); if (proxy->en_kni_v1_acceptor) @@ -555,6 +559,12 @@ void tfe_proxy_acceptor_init(struct tfe_proxy * proxy, const char * profile) CHECK_OR_EXIT(g_default_proxy->kni_v3_acceptor, "Failed at init KNIv3 acceptor. Exit. "); } + if (proxy->en_kni_v4_acceptor) + { + g_default_proxy->kni_v4_acceptor = acceptor_kni_v4_create(g_default_proxy, profile, g_default_logger); + CHECK_OR_EXIT(g_default_proxy->kni_v4_acceptor, "Failed at init KNIv4 acceptor. Exit. "); + } + return; } diff --git a/vendor/CMakeLists.txt b/vendor/CMakeLists.txt index 968182e..c750568 100644 --- a/vendor/CMakeLists.txt +++ b/vendor/CMakeLists.txt @@ -375,3 +375,19 @@ set_property(TARGET libnetfilter_queue-static PROPERTY INTERFACE_INCLUDE_DIRECTO #add_dependencies(gperftools-static gperftools) #set_property(TARGET gperftools-static PROPERTY IMPORTED_LOCATION ${INSTALL_DIR}/lib/libtcmalloc.a) #set_property(TARGET gperftools-static PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${INSTALL_DIR}/include) + +### msgpack-c 6.0.0 +ExternalProject_Add(msgpack-c PREFIX msgpack-c + URL ${CMAKE_CURRENT_SOURCE_DIR}/msgpack-c-6.0.0.tar.gz + URL_MD5 f930a80b118a20de2be3211b0706f562 + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DMSGPACK_BUILD_TESTS=OFF) + +ExternalProject_Get_Property(msgpack-c INSTALL_DIR) +file(MAKE_DIRECTORY ${INSTALL_DIR}/include) + +add_library(msgpack STATIC IMPORTED GLOBAL) +add_dependencies(msgpack msgpack-c) +set_property(TARGET msgpack PROPERTY IMPORTED_LOCATION ${INSTALL_DIR}/lib/libmsgpack-c.a) +set_property(TARGET msgpack PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${INSTALL_DIR}/include) \ No newline at end of file diff --git a/vendor/msgpack-c-6.0.0.tar.gz b/vendor/msgpack-c-6.0.0.tar.gz new file mode 100644 index 0000000..4a4a47a Binary files /dev/null and b/vendor/msgpack-c-6.0.0.tar.gz differ