Add linux kernel interval tree

This commit is contained in:
luwenpeng
2024-03-27 17:11:38 +08:00
parent eb281ab789
commit 814a0d739f
25 changed files with 2200 additions and 1665 deletions

View File

@@ -2,36 +2,27 @@
#include <string.h>
#include <stdio.h>
#include <assert.h>
#include "list.h"
#include "tcp_reassembly.h"
#include "itree.h"
#include "interval_tree.h"
struct segment
{
struct tcp_reassembly *assy;
struct segment *next;
struct segment *prev;
uint64_t id;
struct interval_tree_node tree_node;
struct list_head list_node;
uint64_t time;
uint32_t offset;
uint32_t len;
uint64_t id;
char *payload; // Flexible array member
};
struct segment_list
{
struct segment *head; // del segment from head
struct segment *tail; // add segment to tail
};
struct tcp_reassembly
{
struct tcp_reassembly_options opts;
struct tcp_reassembly_stat stat;
struct segment_list list;
struct itree *itree;
struct rb_root_cached tree_root;
struct list_head list_root;
uint64_t exp_seq;
};
@@ -45,73 +36,6 @@ static inline bool before(uint32_t seq1, uint32_t seq2)
return (int32_t)(seq1 - seq2) < 0;
}
static inline void segment_list_add(struct segment_list *list, struct segment *seg)
{
if (list->head == NULL)
{
list->head = seg;
}
else
{
list->tail->next = seg;
seg->prev = list->tail;
}
list->tail = seg;
}
static inline void segment_list_del(struct segment_list *list, struct segment *seg)
{
if (list->head == seg)
{
list->head = seg->next;
}
if (list->tail == seg)
{
list->tail = seg->prev;
}
if (seg->prev)
{
seg->prev->next = seg->next;
}
if (seg->next)
{
seg->next->prev = seg->prev;
}
}
static inline struct segment *segment_list_get_oldest(struct segment_list *list)
{
return list->head;
}
static inline struct segment *segment_new(uint32_t len)
{
return (struct segment *)calloc(1, sizeof(struct segment) + len);
}
static inline void *segment_dup(void *p)
{
return p;
}
static inline void segment_free(void *p)
{
struct segment *seg = NULL;
struct tcp_reassembly *assy = NULL;
if (p)
{
seg = (struct segment *)p;
assy = seg->assy;
segment_list_del(&assy->list, seg);
assy->stat.curr_bytes -= seg->len;
assy->stat.curr_segments--;
free(seg);
}
}
struct tcp_reassembly *tcp_reassembly_new(struct tcp_reassembly_options *opts)
{
struct tcp_reassembly *assy = NULL;
@@ -121,35 +45,30 @@ struct tcp_reassembly *tcp_reassembly_new(struct tcp_reassembly_options *opts)
{
return NULL;
}
memcpy(&assy->opts, opts, sizeof(struct tcp_reassembly_options));
if (!assy->opts.enable)
{
return assy;
}
assy->itree = itree_new(segment_dup, segment_free);
if (assy->itree == NULL)
{
goto error_out;
}
assy->tree_root = RB_ROOT_CACHED;
INIT_LIST_HEAD(&assy->list_root);
return assy;
error_out:
tcp_reassembly_free(assy);
return NULL;
}
void tcp_reassembly_free(struct tcp_reassembly *assy)
{
struct segment *seg = NULL;
struct interval_tree_node *tree_node = NULL;
if (assy)
{
if (assy->itree)
while ((tree_node = interval_tree_iter_first(&assy->tree_root, 0, UINT64_MAX)))
{
itree_delete(assy->itree);
seg = container_of(tree_node, struct segment, tree_node);
interval_tree_remove(&seg->tree_node, &assy->tree_root);
list_del(&seg->list_node);
free(seg);
seg = NULL;
}
free(assy);
assy = NULL;
}
}
@@ -171,28 +90,30 @@ void tcp_reassembly_expire(struct tcp_reassembly *assy, uint64_t now)
return;
}
uint64_t high;
interval_t expire;
uint64_t len;
struct segment *seg = NULL;
while ((seg = segment_list_get_oldest(&assy->list)) != NULL)
while (!list_empty(&assy->list_root))
{
seg = list_first_entry(&assy->list_root, struct segment, list_node);
if (seg->time + assy->opts.max_timeout > now)
{
break;
}
high = (uint64_t)seg->offset + (uint64_t)seg->len - 1;
expire = {
.low = seg->offset,
.high = high,
.data = seg,
};
assy->stat.timeout_discard_segments++;
assy->stat.timeout_discard_bytes += seg->len;
TCP_REASSEMBLE_DEBUG("reassembler %p expire segment %p [%lu, %lu] (time: %lu, now: %lu)", assy, seg, seg->offset, high, seg->time, now);
len = seg->tree_node.last - seg->tree_node.start + 1;
itree_remove(assy->itree, &expire);
assy->stat.timeout_discard_segments++;
assy->stat.timeout_discard_bytes += len;
assy->stat.curr_segments--;
assy->stat.curr_bytes -= len;
TCP_REASSEMBLE_DEBUG("reassembler %p expire segment %p [%lu, %lu] (time: %lu, now: %lu)", assy, seg, seg->tree_node.start, seg->tree_node.last, seg->time, now);
interval_tree_remove(&seg->tree_node, &assy->tree_root);
list_del(&seg->list_node);
free(seg);
seg = NULL;
}
}
@@ -205,8 +126,6 @@ void tcp_reassembly_insert(struct tcp_reassembly *assy, uint32_t offset, const c
uint64_t low = (uint64_t)offset;
uint64_t high = (uint64_t)offset + (uint64_t)len - 1; // from uint32_t to uint64_t, so no overflow
struct segment *seg = NULL;
interval_t insert;
assy->stat.insert_segments++;
assy->stat.insert_bytes += len;
@@ -235,35 +154,29 @@ void tcp_reassembly_insert(struct tcp_reassembly *assy, uint32_t offset, const c
return;
}
seg = segment_new(len);
struct segment *seg = (struct segment *)calloc(1, sizeof(struct segment) + len);
if (seg == NULL)
{
assy->stat.overload_bypass_segments++;
assy->stat.overload_bypass_bytes += len;
TCP_REASSEMBLE_DEBUG("reassembler %p insert [%lu, %lu] failed, calloc segment failed", assy, low, high);
return;
}
seg->assy = assy;
seg->id = assy->stat.insert_segments;
seg->tree_node.start = low;
seg->tree_node.last = high;
seg->time = now;
seg->offset = offset;
seg->len = len;
seg->id = assy->stat.insert_segments;
seg->payload = (char *)seg + sizeof(struct segment);
memcpy(seg->payload, payload, len);
insert = {
.low = low,
.high = high,
.data = seg,
};
if (itree_insert(assy->itree, &insert) == 0)
{
free(seg);
return;
}
TCP_REASSEMBLE_DEBUG("reassembler %p insert segment %p [%lu, %lu]", assy, seg, insert.low, insert.high);
segment_list_add(&assy->list, seg);
list_add_tail(&seg->list_node, &assy->list_root);
interval_tree_insert(&seg->tree_node, &assy->tree_root);
TCP_REASSEMBLE_DEBUG("reassembler %p insert segment %p [%lu, %lu]", assy, seg, low, high);
assy->stat.curr_segments++;
assy->stat.curr_bytes += seg->len;
assy->stat.curr_bytes += len;
}
const char *tcp_reassembly_peek(struct tcp_reassembly *assy, uint32_t *len)
@@ -275,89 +188,49 @@ const char *tcp_reassembly_peek(struct tcp_reassembly *assy, uint32_t *len)
return NULL;
}
int count = 0;
interval_t peek;
uint64_t overlap = 0;
uint64_t min_id = UINT64_MAX;
uint64_t id = UINT64_MAX;
struct segment *seg = NULL;
ilist_t *list = NULL;
ilisttrav_t *trav = NULL;
interval_t *query = NULL;
interval_t *oldest = NULL;
struct interval_tree_node *tree_node = NULL;
struct interval_tree_node *oldest_node = NULL;
tree_node = interval_tree_iter_first(&assy->tree_root, assy->exp_seq, assy->exp_seq);
while (tree_node)
{
seg = container_of(tree_node, struct segment, tree_node);
if (seg->id < id)
{
id = seg->id;
oldest_node = tree_node;
}
tree_node = interval_tree_iter_next(tree_node, assy->exp_seq, assy->exp_seq);
}
peek = {
.low = assy->exp_seq,
.high = assy->exp_seq,
};
list = itree_findall(assy->itree, &peek);
if (list == NULL)
if (oldest_node == NULL)
{
return NULL;
}
count = ilist_size(list);
trav = ilisttrav_new(list);
for (int i = 0; i < count; i++)
uint64_t payload_len = oldest_node->last - oldest_node->start + 1;
seg = container_of(oldest_node, struct segment, tree_node);
if (oldest_node->start < assy->exp_seq)
{
if (i == 0)
{
query = (interval_t *)ilisttrav_first(trav);
}
else
{
query = (interval_t *)ilisttrav_next(trav);
}
seg = (struct segment *)query->data;
if (seg->id < min_id)
{
min_id = seg->id;
oldest = query;
}
}
ilisttrav_delete(trav);
ilist_delete(list);
if (oldest == NULL)
{
return NULL;
}
seg = (struct segment *)oldest->data;
if (seg->offset < assy->exp_seq)
{
overlap = assy->exp_seq - seg->offset;
*len = seg->len - overlap;
TCP_REASSEMBLE_DEBUG("reassembler %p peek [%lu, +∞], found segment %p [%lu, %lu] (left overlap: %lu)", assy, assy->exp_seq, seg, oldest->low, oldest->high, overlap);
uint64_t overlap = assy->exp_seq - oldest_node->start;
*len = (uint16_t)(payload_len - overlap);
TCP_REASSEMBLE_DEBUG("reassembler %p peek [%lu, +∞], found segment %p [%lu, %lu] (left overlap: %lu)", assy, assy->exp_seq, seg, oldest_node->start, oldest_node->last, overlap);
return seg->payload + overlap;
}
TCP_REASSEMBLE_DEBUG("reassembler %p peek [%lu, +∞], found segment %p [%lu, %lu]", assy, assy->exp_seq, seg, oldest->low, oldest->high);
TCP_REASSEMBLE_DEBUG("reassembler %p peek [%lu, +∞], found segment %p [%lu, %lu]", assy, assy->exp_seq, seg, oldest_node->start, oldest_node->last);
*len = seg->len;
*len = (uint16_t)payload_len;
return seg->payload;
}
void tcp_reassembly_consume(struct tcp_reassembly *assy, uint32_t len)
{
if (!assy->opts.enable)
if (!assy->opts.enable || len == 0)
{
return;
}
if (len == 0)
{
return;
}
int count;
uint64_t old_exp_seq;
uint64_t new_exp_seq;
interval_t consume;
ilist_t *list = NULL;
interval_t *del = NULL;
ilisttrav_t *trav = NULL;
struct segment *seg = NULL;
/*
* https://www.ietf.org/rfc/rfc0793.txt
*
@@ -376,52 +249,46 @@ void tcp_reassembly_consume(struct tcp_reassembly *assy, uint32_t len)
* seq range: [0, 4294967295]
* seq range: [0, UINT32_MAX]
*/
old_exp_seq = assy->exp_seq;
uint64_t old_exp_seq = assy->exp_seq;
assy->exp_seq += len;
if (assy->exp_seq > UINT32_MAX)
{
assy->exp_seq = assy->exp_seq % 4294967296;
}
new_exp_seq = assy->exp_seq;
uint64_t new_exp_seq = assy->exp_seq;
TCP_REASSEMBLE_DEBUG("reassembler %p consume [%lu, %lu], update expect seq %lu -> %lu", assy, old_exp_seq, old_exp_seq + len - 1, old_exp_seq, new_exp_seq);
consume =
{
.low = old_exp_seq,
.high = old_exp_seq + len - 1,
};
list = itree_findall(assy->itree, &consume);
if (list == NULL)
{
return;
}
assy->stat.consume_segments++;
assy->stat.consume_bytes += len;
count = ilist_size(list);
trav = ilisttrav_new(list);
for (int i = 0; i < count; i++)
struct interval_tree_node *node = interval_tree_iter_first(&assy->tree_root, old_exp_seq, old_exp_seq + len - 1);
while (node)
{
if (i == 0)
if (before(node->last, new_exp_seq))
{
del = (interval_t *)ilisttrav_first(trav);
struct segment *seg = container_of(node, struct segment, tree_node);
uint32_t len = node->last - node->start + 1;
assy->stat.remove_segments++;
assy->stat.remove_bytes += len;
assy->stat.curr_segments--;
assy->stat.curr_bytes -= len;
TCP_REASSEMBLE_DEBUG("reassembler %p consume [%lu, %lu], delete segment %p [%lu, %lu]", assy, old_exp_seq, old_exp_seq + len - 1, node, node->start, node->last);
interval_tree_remove(node, &assy->tree_root);
list_del(&seg->list_node);
free(seg);
node = interval_tree_iter_first(&assy->tree_root, old_exp_seq, old_exp_seq + len - 1);
}
else
{
del = (interval_t *)ilisttrav_next(trav);
}
if (del && before(del->high, new_exp_seq))
{
seg = (struct segment *)del->data;
assy->stat.remove_segments++;
assy->stat.remove_bytes += seg->len;
TCP_REASSEMBLE_DEBUG("reassembler %p consume [%lu, %lu], delete segment %p [%lu, %lu]", assy, old_exp_seq, old_exp_seq + len - 1, seg, del->low, del->high);
itree_remove(assy->itree, del);
node = interval_tree_iter_next(node, old_exp_seq, old_exp_seq + len - 1);
}
}
ilisttrav_delete(trav);
ilist_delete(list);
}
struct tcp_reassembly_stat *tcp_reassembly_get_stat(struct tcp_reassembly *assy)