#include #include #include #include #include "tcp_reassembly.h" #include "itree.h" struct segment { struct tcp_reassembly *assy; struct segment *next; struct segment *prev; uint64_t id; uint64_t time; uint32_t offset; uint32_t len; char *payload; // Flexible array member }; struct segment_list { struct segment *head; // del segment from head struct segment *tail; // add segment to tail }; struct tcp_reassembly { struct tcp_reassembly_options opts; struct tcp_reassembly_stat stat; struct segment_list list; struct itree *itree; uint64_t exp_seq; }; /* * The next routines deal with comparing 32 bit unsigned ints * and worry about wraparound (automatic with unsigned arithmetic). */ static inline bool before(uint32_t seq1, uint32_t seq2) { return (int32_t)(seq1 - seq2) < 0; } static inline void segment_list_add(struct segment_list *list, struct segment *seg) { if (list->head == NULL) { list->head = seg; } else { list->tail->next = seg; seg->prev = list->tail; } list->tail = seg; } static inline void segment_list_del(struct segment_list *list, struct segment *seg) { if (list->head == seg) { list->head = seg->next; } if (list->tail == seg) { list->tail = seg->prev; } if (seg->prev) { seg->prev->next = seg->next; } if (seg->next) { seg->next->prev = seg->prev; } } static inline struct segment *segment_list_get_oldest(struct segment_list *list) { return list->head; } static inline struct segment *segment_new(uint32_t len) { return (struct segment *)calloc(1, sizeof(struct segment) + len); } static inline void *segment_dup(void *p) { return p; } static inline void segment_free(void *p) { struct segment *seg = NULL; struct tcp_reassembly *assy = NULL; if (p) { seg = (struct segment *)p; assy = seg->assy; segment_list_del(&assy->list, seg); assy->stat.curr_bytes -= seg->len; assy->stat.curr_segments--; free(seg); } } struct tcp_reassembly *tcp_reassembly_new(struct tcp_reassembly_options *opts) { struct tcp_reassembly *assy = NULL; assy = (struct tcp_reassembly *)calloc(1, sizeof(struct tcp_reassembly)); if (assy == NULL) { return NULL; } memcpy(&assy->opts, opts, sizeof(struct tcp_reassembly_options)); if (!assy->opts.enable) { return assy; } assy->itree = itree_new(segment_dup, segment_free); if (assy->itree == NULL) { goto error_out; } return assy; error_out: tcp_reassembly_free(assy); return NULL; } void tcp_reassembly_free(struct tcp_reassembly *assy) { if (assy) { if (assy->itree) { itree_delete(assy->itree); } free(assy); } } void tcp_reassembly_init(struct tcp_reassembly *assy, uint32_t syn_seq) { if (!assy->opts.enable) { return; } assy->exp_seq = syn_seq + 1; } void tcp_reassembly_expire(struct tcp_reassembly *assy, uint64_t now) { if (!assy->opts.enable) { return; } uint64_t high; interval_t expire; struct segment *seg = NULL; while ((seg = segment_list_get_oldest(&assy->list)) != NULL) { if (seg->time + assy->opts.max_timeout > now) { break; } high = (uint64_t)seg->offset + (uint64_t)seg->len - 1; expire = { .low = seg->offset, .high = high, .data = seg, }; assy->stat.timeout_discard_segments++; assy->stat.timeout_discard_bytes += seg->len; TCP_REASSEMBLE_DEBUG("expire %p [%lu, %lu] (time: %lu, now: %lu)", seg, seg->offset, high, seg->time, now); itree_remove(assy->itree, &expire); } } void tcp_reassembly_insert(struct tcp_reassembly *assy, uint32_t offset, const char *payload, uint32_t len, uint64_t now) { if (!assy->opts.enable) { return; } uint64_t low = (uint64_t)offset; uint64_t high = (uint64_t)offset + (uint64_t)len - 1; // from uint32_t to uint64_t, so no overflow struct segment *seg = NULL; interval_t insert; assy->stat.insert_segments++; assy->stat.insert_bytes += len; if (assy->opts.max_segments > 0 && assy->stat.curr_segments >= assy->opts.max_segments) { assy->stat.overload_bypass_segments++; assy->stat.overload_bypass_bytes += len; TCP_REASSEMBLE_DEBUG("insert [%lu, %lu] failed, reach max packets %u", low, high, assy->opts.max_segments); return; } if (assy->opts.max_bytes > 0 && assy->stat.curr_bytes >= assy->opts.max_bytes) { assy->stat.overload_bypass_segments++; assy->stat.overload_bypass_bytes += len; TCP_REASSEMBLE_DEBUG("insert [%lu, %lu] failed, reach max bytes %u", low, high, assy->opts.max_bytes); return; } if (len == 0 || before(offset + len, assy->exp_seq)) { assy->stat.retrans_bypass_segments++; assy->stat.retrans_bypass_bytes += len; TCP_REASSEMBLE_DEBUG("insert [%lu, %lu] failed, less the expect seq %lu", low, high, assy->exp_seq); return; } seg = segment_new(len); if (seg == NULL) { return; } seg->assy = assy; seg->id = assy->stat.insert_segments; seg->time = now; seg->offset = offset; seg->len = len; seg->payload = (char *)seg + sizeof(struct segment); memcpy(seg->payload, payload, len); insert = { .low = low, .high = high, .data = seg, }; if (itree_insert(assy->itree, &insert) == 0) { free(seg); return; } TCP_REASSEMBLE_DEBUG("insert %p [%lu, %lu]", seg, insert.low, insert.high); segment_list_add(&assy->list, seg); assy->stat.curr_segments++; assy->stat.curr_bytes += seg->len; } const char *tcp_reassembly_peek(struct tcp_reassembly *assy, uint32_t *len) { *len = 0; if (!assy->opts.enable) { return NULL; } int count = 0; interval_t peek; uint64_t overlap = 0; uint64_t min_id = UINT64_MAX; struct segment *seg = NULL; ilist_t *list = NULL; ilisttrav_t *trav = NULL; interval_t *query = NULL; interval_t *oldest = NULL; peek = { .low = assy->exp_seq, .high = assy->exp_seq, }; list = itree_findall(assy->itree, &peek); if (list == NULL) { return NULL; } count = ilist_size(list); trav = ilisttrav_new(list); for (int i = 0; i < count; i++) { if (i == 0) { query = (interval_t *)ilisttrav_first(trav); } else { query = (interval_t *)ilisttrav_next(trav); } seg = (struct segment *)query->data; if (seg->id < min_id) { min_id = seg->id; oldest = query; } } ilisttrav_delete(trav); ilist_delete(list); if (oldest == NULL) { return NULL; } seg = (struct segment *)oldest->data; if (seg->offset < assy->exp_seq) { overlap = assy->exp_seq - seg->offset; *len = seg->len - overlap; TCP_REASSEMBLE_DEBUG("peek [%lu, +∞], found %p [%lu, %lu] (left overlap: %lu)", assy->exp_seq, seg, oldest->low, oldest->high, overlap); return seg->payload + overlap; } TCP_REASSEMBLE_DEBUG("peek [%lu, +∞], found %p [%lu, %lu]", assy->exp_seq, seg, oldest->low, oldest->high); *len = seg->len; return seg->payload; } void tcp_reassembly_consume(struct tcp_reassembly *assy, uint32_t len) { if (!assy->opts.enable) { return; } if (len == 0) { return; } int count; uint64_t old_exp_seq; uint64_t new_exp_seq; interval_t consume; ilist_t *list = NULL; interval_t *del = NULL; ilisttrav_t *trav = NULL; struct segment *seg = NULL; old_exp_seq = assy->exp_seq; assy->exp_seq += len; if (assy->exp_seq > UINT32_MAX) { assy->exp_seq = assy->exp_seq % UINT32_MAX; } new_exp_seq = assy->exp_seq; TCP_REASSEMBLE_DEBUG("consume [%lu, %lu], update expect seq %lu -> %lu", old_exp_seq, old_exp_seq + len - 1, old_exp_seq, new_exp_seq); consume = { .low = old_exp_seq, .high = old_exp_seq + len - 1, }; list = itree_findall(assy->itree, &consume); if (list == NULL) { return; } assy->stat.consume_segments++; assy->stat.consume_bytes += len; count = ilist_size(list); trav = ilisttrav_new(list); for (int i = 0; i < count; i++) { if (i == 0) { del = (interval_t *)ilisttrav_first(trav); } else { del = (interval_t *)ilisttrav_next(trav); } if (del && before(del->high, new_exp_seq)) { seg = (struct segment *)del->data; assy->stat.remove_segments++; assy->stat.remove_bytes += seg->len; TCP_REASSEMBLE_DEBUG("consume [%lu, %lu], delete %p [%lu, %lu]", old_exp_seq, old_exp_seq + len - 1, seg, del->low, del->high); itree_remove(assy->itree, del); } } ilisttrav_delete(trav); ilist_delete(list); } struct tcp_reassembly_stat *tcp_reassembly_get_stat(struct tcp_reassembly *assy) { if (!assy->opts.enable) { return NULL; } return &assy->stat; } void tcp_reassembly_print_stat(struct tcp_reassembly *assy) { if (!assy->opts.enable) { return; } TCP_REASSEMBLE_DEBUG("current : segments %lu, bytes %lu", assy->stat.curr_segments, assy->stat.curr_bytes); TCP_REASSEMBLE_DEBUG("insert : segments %lu, bytes %lu", assy->stat.insert_segments, assy->stat.insert_bytes); TCP_REASSEMBLE_DEBUG("remove : segments %lu, bytes %lu", assy->stat.remove_segments, assy->stat.remove_bytes); TCP_REASSEMBLE_DEBUG("consume : segments %lu, bytes %lu", assy->stat.consume_segments, assy->stat.consume_bytes); TCP_REASSEMBLE_DEBUG("retrans bypass : segments %lu, bytes %lu", assy->stat.retrans_bypass_segments, assy->stat.retrans_bypass_bytes); TCP_REASSEMBLE_DEBUG("overload bypass : segments %lu, bytes %lu", assy->stat.overload_bypass_segments, assy->stat.overload_bypass_bytes); TCP_REASSEMBLE_DEBUG("timeout discard : segments %lu, bytes %lu", assy->stat.timeout_discard_segments, assy->stat.timeout_discard_bytes); }