Refactor TCP reassembly, the session knows where the TCP segment comes from: raw packet or tcp segment queue

This commit is contained in:
luwenpeng
2024-04-02 16:21:39 +08:00
parent a509f0ce3b
commit e8e60cee6d
25 changed files with 678 additions and 1419 deletions

View File

@@ -31,8 +31,9 @@ struct session_manager
uint64_t tcp_unverified_rst_timeout; // range: [1, 600000]
// UDP timeout
uint64_t udp_data_timeout; // range: [1, 15999999000]
struct tcp_reassembly_options tcp_reassembly_opts;
// TCP reassembly
uint32_t tcp_reassembly_max_timeout; // range: [1, 60000] (ms)
uint32_t tcp_reassembly_max_segments; // range: [2, 32]
struct session_pool *sess_pool;
struct session_table *tcp_sess_table;
@@ -101,6 +102,135 @@ int check_options(const struct session_manager_options *opts)
return 0;
}
/*
* The next routines deal with comparing 32 bit unsigned ints
* and worry about wraparound (automatic with unsigned arithmetic).
*/
static inline bool before(uint32_t seq1, uint32_t seq2)
{
return (int32_t)(seq1 - seq2) < 0;
}
static void tcp_pcb_clean(struct tcp_pcb *pcb)
{
if (pcb)
{
tcp_reassembly_free(pcb->c2s_assembler);
tcp_reassembly_free(pcb->s2c_assembler);
}
}
static int tcp_pcb_init(struct tcp_pcb *pcb, uint64_t max_timeout, uint64_t max_seg_num)
{
pcb->c2s_assembler = tcp_reassembly_new(max_timeout, max_seg_num);
pcb->s2c_assembler = tcp_reassembly_new(max_timeout, max_seg_num);
if (pcb->c2s_assembler == NULL || pcb->s2c_assembler == NULL)
{
tcp_pcb_clean(pcb);
return -1;
}
return 0;
}
static void tcp_pcb_update(struct tcp_pcb *pcb, enum session_dir dir, const struct pkt_layer *tcp_layer, uint64_t now)
{
struct tcp_segment *seg;
struct tcp_reassembly *assembler;
struct tcphdr *hdr = (struct tcphdr *)tcp_layer->hdr_ptr;
uint32_t seq = tcp_hdr_get_seq(hdr);
uint32_t ack = tcp_hdr_get_ack(hdr);
uint8_t flags = tcp_hdr_get_flags(hdr);
uint32_t rcv_nxt;
/*
* https://www.rfc-editor.org/rfc/rfc5961#section-3.2
*
* If the RST bit is set and the sequence number exactly matches the
* next expected sequence number (RCV.NXT), then TCP MUST reset the
* connection.
*
* if fin is received, the expected sequence number should be increased by 1
*/
uint16_t expect = 0;
if (dir == SESSION_DIR_C2S)
{
pcb->c2s_seq = seq;
pcb->c2s_ack = ack;
assembler = pcb->c2s_assembler;
expect = pcb->s2c_ack;
expect += pcb->sub_state & TCP_S2C_FIN_RCVD ? 1 : 0;
pcb->sub_state |= (flags & TH_SYN) ? TCP_SYN_RCVD : 0;
pcb->sub_state |= (flags & TH_FIN) ? TCP_C2S_FIN_RCVD : 0;
pcb->sub_state |= ((flags & TH_RST) && (seq == expect)) ? TCP_C2S_RST_RCVD : 0;
pcb->sub_state |= ((flags & TH_RST) && (seq != expect)) ? TCP_C2S_UNVERIFIED_RST_RCVD : 0;
}
else
{
pcb->s2c_seq = seq;
pcb->s2c_ack = ack;
assembler = pcb->s2c_assembler;
expect = pcb->c2s_ack;
expect += pcb->sub_state & TCP_C2S_FIN_RCVD ? 1 : 0;
pcb->sub_state |= (flags & TH_SYN) ? TCP_SYN_ACK_RCVD : 0;
pcb->sub_state |= (flags & TH_FIN) ? TCP_S2C_FIN_RCVD : 0;
pcb->sub_state |= ((flags & TH_RST) && (seq == expect)) ? TCP_S2C_RST_RCVD : 0;
pcb->sub_state |= ((flags & TH_RST) && (seq != expect)) ? TCP_S2C_UNVERIFIED_RST_RCVD : 0;
}
if (flags & TH_SYN)
{
tcp_reassembly_set_recv_next(assembler, seq + 1);
}
seg = tcp_reassembly_expire(assembler, now);
if (seg)
{
// TODO add metric (expire)
tcp_segment_free(seg);
}
if (tcp_layer->pld_len)
{
rcv_nxt = tcp_reassembly_get_recv_next(assembler);
if (seq == rcv_nxt)
{
pcb->order_seg.data = tcp_layer->pld_ptr;
pcb->order_seg.len = tcp_layer->pld_len;
tcp_reassembly_inc_recv_next(assembler, tcp_layer->pld_len);
}
else if (before(seq, rcv_nxt))
{
// TODO add metric (overlap)
}
else if ((seg = tcp_segment_new(seq, tcp_layer->pld_ptr, tcp_layer->pld_len)))
{
switch (tcp_reassembly_push(assembler, seg, now))
{
case -1:
// TODO add metric (assembler full)
tcp_segment_free(seg);
break;
case 0:
// TODO add metric (assembler push success)
break;
case 1:
// TODO add metric (assembler push success, overlap)
break;
default:
assert(0);
break;
}
}
}
}
/******************************************************************************
* Stat
******************************************************************************/
@@ -435,6 +565,7 @@ static struct session *session_manager_new_tcp_session(struct session_manager *m
session_manager_evicte_session(mgr, evic_sess, now);
}
enum session_dir dir = (flags & TH_ACK) ? SESSION_DIR_S2C : SESSION_DIR_C2S;
struct session *sess = session_pool_pop(mgr->sess_pool);
if (sess == NULL)
{
@@ -443,27 +574,26 @@ static struct session *session_manager_new_tcp_session(struct session_manager *m
}
session_init(sess);
session_set_id(sess, id_generator_alloc());
if (tcp_sess_init(sess, &mgr->tcp_reassembly_opts) == -1)
if (tcp_pcb_init(&sess->tcp_pcb, mgr->tcp_reassembly_max_timeout, mgr->tcp_reassembly_max_segments) == -1)
{
assert(0);
session_pool_push(mgr->sess_pool, sess);
return NULL;
}
mgr->stat.tcp_sess.nr_sess_used++;
tcp_pcb_update(&sess->tcp_pcb, dir, tcp_layer, now);
enum session_dir dir = tcp_hdr_get_ack_flag(hdr) ? SESSION_DIR_S2C : SESSION_DIR_C2S;
enum session_state next_state = session_transition_run(SESSION_STATE_INIT, TCP_SYN);
session_update(sess, next_state, pkt, key, dir, now);
session_transition_log(sess, SESSION_STATE_INIT, next_state, TCP_SYN);
session_stat_inc(&mgr->stat.tcp_sess, next_state);
tcp_data_enqueue(sess, tcp_layer, now);
uint64_t timeout = (flags & TH_ACK) ? mgr->tcp_handshake_timeout : mgr->tcp_init_timeout;
session_timer_update(mgr->sess_timer, sess, now + timeout);
session_table_add(mgr->tcp_sess_table, key, sess);
duplicated_packet_filter_add(mgr->dup_pkt_filter, pkt, now);
mgr->stat.tcp_sess.nr_sess_used++;
return sess;
}
@@ -515,8 +645,7 @@ static int session_manager_update_tcp_session(struct session_manager *mgr, struc
session_transition_log(sess, curr_state, next_state, inputs);
session_stat_update(mgr, sess, curr_state, next_state);
tcp_data_expire(sess, now);
tcp_data_enqueue(sess, tcp_layer, now);
tcp_pcb_update(&sess->tcp_pcb, dir, tcp_layer, now);
// set closing reason
if (next_state == SESSION_STATE_CLOSING && !session_get_closing_reason(sess))
@@ -531,7 +660,7 @@ static int session_manager_update_tcp_session(struct session_manager *mgr, struc
}
}
uint16_t sub_state = sess->data.tcp.sub_state;
uint16_t sub_state = sess->tcp_pcb.sub_state;
uint64_t timeout = 0;
switch (next_state)
@@ -620,6 +749,10 @@ struct session_manager *session_manager_new(struct session_manager_options *opts
mgr->tcp_discard_timeout = opts->tcp_discard_timeout;
mgr->tcp_unverified_rst_timeout = opts->tcp_unverified_rst_timeout;
mgr->udp_data_timeout = opts->udp_data_timeout;
// tcp reassembly
mgr->tcp_reassembly_max_timeout = opts->tcp_reassembly_max_timeout;
mgr->tcp_reassembly_max_segments = opts->tcp_reassembly_max_segments;
// duplicated packet filter
struct duplicated_packet_filter_options duplicated_packet_filter_opts = {
.enable = opts->duplicated_packet_filter_enable,
@@ -634,13 +767,6 @@ struct session_manager *session_manager_new(struct session_manager_options *opts
.timeout = opts->evicted_session_filter_timeout,
.error_rate = opts->evicted_session_filter_error_rate,
};
// tcp reassembly
mgr->tcp_reassembly_opts = {
.enable = opts->tcp_reassembly_enable,
.max_timeout = opts->tcp_reassembly_max_timeout,
.max_segments = opts->tcp_reassembly_max_segments,
.max_bytes = opts->tcp_reassembly_max_bytes,
};
mgr->sess_pool = session_pool_new(mgr->max_tcp_session_num + mgr->max_udp_session_num);
mgr->tcp_sess_table = session_table_new();
@@ -729,7 +855,7 @@ void session_manager_free_session(struct session_manager *mgr, struct session *s
switch (session_get_type(sess))
{
case SESSION_TYPE_TCP:
tcp_sess_clean(sess);
tcp_pcb_clean(&sess->tcp_pcb);
session_table_del(mgr->tcp_sess_table, session_get_tuple(sess));
session_stat_dec(&mgr->stat.tcp_sess, session_get_state(sess));
mgr->stat.tcp_sess.nr_sess_used--;
@@ -839,14 +965,13 @@ struct session *session_manager_get_expired_session(struct session_manager *mgr,
struct session *session_manager_get_evicted_session(struct session_manager *mgr)
{
struct session *sess = NULL;
if (list_empty(&mgr->evicte_queue))
{
return sess;
return NULL;
}
else
{
sess = list_first_entry(&mgr->evicte_queue, struct session, evicte);
struct session *sess = list_first_entry(&mgr->evicte_queue, struct session, evicte);
list_del(&sess->evicte);
return sess;
}