This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files
stellar-stellar/infra/ip_reassembly/ip_reassembly.c
2024-08-28 17:58:16 +08:00

1042 lines
31 KiB
C

#include <stdlib.h>
#include <string.h>
#include <sys/queue.h>
#include <assert.h>
#include "checksum.h"
#include "crc32_hash.h"
#include "log_private.h"
#include "ip_reassembly.h"
#include "packet_parser.h"
#include "packet_helper.h"
#include "packet_private.h"
#define IP_REASSEMBLE_DEBUG(format, ...) STELLAR_LOG_DEBUG(__thread_local_logger, "ip_reassembly", format, ##__VA_ARGS__)
#define IP_REASSEMBLE_ERROR(format, ...) STELLAR_LOG_ERROR(__thread_local_logger, "ip_reassembly", format, ##__VA_ARGS__)
#define IPV4_KEYLEN 1
#define IPV6_KEYLEN 4
#define PRIME_VALUE 0xeaad8405
#define IP_FRAG_HASH_FNUM 2
#define IP_FRAG_TBL_POS(assy, sig) ((assy)->table + ((sig) & (assy)->entry_mask))
#define KEY_TO_STR(key, str_str, dst_str) \
do \
{ \
if ((key)->src_dst_len == IPV4_KEYLEN) \
{ \
uint32_t src_addr = (key)->src_dst_addr[0] >> 32; \
uint32_t dst_addr = (key)->src_dst_addr[0] & 0xffffffff; \
inet_ntop(AF_INET, &src_addr, src_str, INET6_ADDRSTRLEN); \
inet_ntop(AF_INET, &dst_addr, dst_str, INET6_ADDRSTRLEN); \
} \
else \
{ \
inet_ntop(AF_INET6, &(key)->src_dst_addr[0], src_str, INET6_ADDRSTRLEN); \
inet_ntop(AF_INET6, &(key)->src_dst_addr[2], dst_str, INET6_ADDRSTRLEN); \
} \
} while (0)
#define IP_REASSEMBLE_DEBUG1(desc, key, ...) \
do \
{ \
char src_str[INET6_ADDRSTRLEN] = {0}; \
char dst_str[INET6_ADDRSTRLEN] = {0}; \
KEY_TO_STR(key, src_str, dst_str); \
IP_REASSEMBLE_DEBUG("%s (%s->%s 0x%0x)", desc, src_str, dst_str, (key)->ip_id); \
} while (0)
#define IP_REASSEMBLE_ERROR1(desc, key, ...) \
do \
{ \
char src_str[INET6_ADDRSTRLEN] = {0}; \
char dst_str[INET6_ADDRSTRLEN] = {0}; \
KEY_TO_STR(key, src_str, dst_str); \
IP_REASSEMBLE_ERROR("%s (%s->%s 0x%0x)", desc, src_str, dst_str, (key)->ip_id); \
} while (0)
/******************************************************************************
* Structs
******************************************************************************/
enum
{
IP_LAST_FRAG_IDX,
IP_FIRST_FRAG_IDX,
IP_MIN_FRAG_NUM,
IP_MAX_FRAG_NUM = 64,
};
struct ip_frag_hdr
{
void *hdr_data; // need be freed
uint16_t hdr_len;
uint16_t l3_offset;
uint16_t l3_len;
uint8_t next_proto;
};
struct ip_frag
{
void *data; // need be freed
uint16_t len;
uint16_t offset;
};
struct ip_frag_key
{
uint64_t src_dst_addr[4]; // src and dst address (only first 8 bytes used for IPv4)
uint32_t src_dst_len;
uint32_t ip_id; // ipv4: identification is uint16_t; ipv6: identification is uint32_t
uint8_t proto;
};
struct ip_frag_pkt
{
struct
{
struct ip_frag_pkt *tqe_next;
struct ip_frag_pkt **tqe_prev;
} lru;
struct ip_frag_key key;
struct ip_frag_hdr hdr;
uint64_t create_time;
uint32_t expected_total_size;
uint32_t received_frag_size;
uint32_t next_fill_idx;
struct ip_frag frags[IP_MAX_FRAG_NUM]; // first two entries in the frags[] array are for the last and first fragments.
};
struct ip_reassembly
{
// options
bool enable;
uint32_t timeout;
uint32_t bucket_entries;
// runtime
uint32_t entry_used;
uint32_t entry_total;
uint32_t entry_mask;
// stats
struct ip_reassembly_stat stat;
// hash table
struct
{
struct ip_frag_pkt *tqh_first;
struct ip_frag_pkt **tqh_last;
} lru;
struct ip_frag_pkt *last;
struct ip_frag_pkt *table; // array of ip_frag_pkt
};
/******************************************************************************
* utils
******************************************************************************/
#define IP_REASSEMBLY_STAT_INC(stat, filed, key) \
{ \
if ((key)->src_dst_len == IPV4_KEYLEN) \
{ \
(stat)->ip4_##filed++; \
} \
else \
{ \
(stat)->ip6_##filed++; \
} \
}
static inline void *memdup(const void *src, size_t len)
{
if (src == NULL || len == 0)
{
return NULL;
}
void *dst = malloc(len);
if (dst == NULL)
{
return NULL;
}
return memcpy(dst, src, len);
}
static inline uint32_t combine32ms1b(uint32_t x)
{
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return x;
}
static inline uint32_t align32pow2(uint32_t x)
{
x--;
x = combine32ms1b(x);
return x + 1;
}
static inline int is_power_of_2(uint32_t n)
{
return n && !(n & (n - 1));
}
static int check_options(const struct ip_reassembly_options *opts)
{
if (opts == NULL)
{
IP_REASSEMBLE_DEBUG("invalid options");
return -1;
}
if (opts->enable)
{
if (opts->timeout < 1 || opts->timeout > 60000)
{
IP_REASSEMBLE_DEBUG("invalid timeout: %u, supported range: [1, 60000]", opts->timeout);
return -1;
}
if (opts->bucket_entries < 1 || is_power_of_2(opts->bucket_entries) == 0)
{
IP_REASSEMBLE_DEBUG("invalid bucket_entries: %u, must be power of 2", opts->bucket_entries);
return -1;
}
if (opts->bucket_num == 0)
{
IP_REASSEMBLE_DEBUG("invalid bucket_num: %u, supported range: [1, 4294967295]", opts->bucket_num);
return -1;
}
}
return 0;
}
/******************************************************************************
* ip frag key
******************************************************************************/
static inline void ipv4_frag_key_hash(const struct ip_frag_key *key, uint32_t *value1, uint32_t *value2)
{
uint32_t v = 0;
const uint32_t *p = (const uint32_t *)&key->src_dst_addr;
v = crc32_hash_4byte(p[0], PRIME_VALUE);
v = crc32_hash_4byte(p[1], v);
v = crc32_hash_4byte(key->ip_id, v);
*value1 = v;
*value2 = (v << 7) + (v >> 14);
}
static inline void ipv6_frag_key_hash(const struct ip_frag_key *key, uint32_t *value1, uint32_t *value2)
{
uint32_t v = 0;
const uint32_t *p = (const uint32_t *)&key->src_dst_addr;
v = crc32_hash_4byte(p[0], PRIME_VALUE);
v = crc32_hash_4byte(p[1], v);
v = crc32_hash_4byte(p[2], v);
v = crc32_hash_4byte(p[3], v);
v = crc32_hash_4byte(p[4], v);
v = crc32_hash_4byte(p[5], v);
v = crc32_hash_4byte(p[6], v);
v = crc32_hash_4byte(p[7], v);
v = crc32_hash_4byte(key->ip_id, v);
*value1 = v;
*value2 = (v << 7) + (v >> 14);
}
static inline uint64_t ip_frag_key_cmp(const struct ip_frag_key *key1, const struct ip_frag_key *key2)
{
if (key1->ip_id != key2->ip_id)
{
return 1;
}
if (key1->src_dst_len != key2->src_dst_len)
{
return 1;
}
for (uint32_t i = 0; i < key1->src_dst_len; i++)
{
if (key1->src_dst_addr[i] != key2->src_dst_addr[i])
{
return 1;
}
}
return 0;
}
static inline int ip_frag_key_is_empty(const struct ip_frag_key *key)
{
return (key->src_dst_len == 0);
}
static inline void ip_frag_key_zero(struct ip_frag_key *key)
{
key->src_dst_addr[0] = 0;
key->src_dst_addr[1] = 0;
key->src_dst_addr[2] = 0;
key->src_dst_addr[3] = 0;
key->src_dst_len = 0;
key->ip_id = 0;
key->proto = 0;
}
/******************************************************************************
* ip frag hdr
******************************************************************************/
static inline void ip_frag_hdr_init(struct ip_frag_hdr *hdr, const struct packet *pkt)
{
struct layer_private *layer = pkt->frag_layer;
if (layer->proto == LAYER_PROTO_IPV6)
{
struct ip6_frag *frag_ext = ip6_hdr_get_frag_ext((const struct ip6_hdr *)layer->hdr_ptr);
hdr->next_proto = frag_ext->ip6f_nxt;
}
else
{
hdr->next_proto = ip4_hdr_get_proto((const struct ip *)layer->hdr_ptr);
}
hdr->l3_offset = layer->hdr_offset;
hdr->l3_len = layer->hdr_len;
hdr->hdr_len = layer->hdr_offset + layer->hdr_len;
hdr->hdr_data = memdup(pkt->data_ptr, hdr->hdr_len);
}
static inline void ip_frag_hdr_free(struct ip_frag_hdr *hdr)
{
hdr->next_proto = 0;
hdr->l3_offset = 0;
hdr->l3_len = 0;
hdr->hdr_len = 0;
if (hdr->hdr_data != NULL)
{
free(hdr->hdr_data);
hdr->hdr_data = NULL;
}
}
/******************************************************************************
* ip frag
******************************************************************************/
static inline void ip_frag_init(struct ip_frag *frag, void *data, uint16_t len, uint16_t offset)
{
frag->data = memdup(data, len);
frag->len = len;
frag->offset = offset;
}
static inline int ip_frag_free(struct ip_frag *frag)
{
int ret = -1;
if (frag)
{
if (frag->data)
{
free(frag->data);
frag->data = NULL;
ret = 0;
}
frag->len = 0;
frag->offset = 0;
}
return ret;
}
/******************************************************************************
* ip frag pkt
******************************************************************************/
static inline void ip_frag_pkt_init(struct ip_frag_pkt *frag_pkt, const struct ip_frag_key *key, uint64_t now)
{
static const struct ip_frag zero_frag = {
.data = NULL,
.len = 0,
.offset = 0,
};
frag_pkt->lru.tqe_next = NULL;
frag_pkt->lru.tqe_prev = NULL;
frag_pkt->key = *key;
frag_pkt->create_time = now;
frag_pkt->expected_total_size = UINT32_MAX;
frag_pkt->received_frag_size = 0;
frag_pkt->next_fill_idx = IP_MIN_FRAG_NUM;
frag_pkt->frags[IP_LAST_FRAG_IDX] = zero_frag;
frag_pkt->frags[IP_FIRST_FRAG_IDX] = zero_frag;
}
static inline void ip_frag_pkt_clean(struct ip_reassembly_stat *stat, struct ip_frag_pkt *frag_pkt)
{
for (uint32_t i = 0; i < IP_MAX_FRAG_NUM; i++)
{
struct ip_frag *frag = &frag_pkt->frags[i];
if (ip_frag_free(frag) == 0)
{
IP_REASSEMBLY_STAT_INC(stat, frags_freed, &frag_pkt->key);
}
}
ip_frag_key_zero(&frag_pkt->key);
ip_frag_hdr_free(&frag_pkt->hdr);
}
static inline int ip_frag_pkt_is_ready(struct ip_frag_pkt *frag_pkt)
{
return (frag_pkt->received_frag_size == frag_pkt->expected_total_size && frag_pkt->frags[IP_FIRST_FRAG_IDX].data != NULL);
}
static inline void ip_reassembly_add_frag_pkt(struct ip_reassembly *assy, struct ip_frag_pkt *frag_pkt, const struct ip_frag_key *key, uint64_t now)
{
ip_frag_pkt_init(frag_pkt, key, now);
IP_REASSEMBLY_STAT_INC(&assy->stat, defrags_expected, &frag_pkt->key);
TAILQ_INSERT_TAIL(&assy->lru, frag_pkt, lru);
assy->entry_used++;
}
static inline void ip_reassembly_del_frag_pkt(struct ip_reassembly *assy, struct ip_frag_pkt *frag_pkt)
{
TAILQ_REMOVE(&assy->lru, frag_pkt, lru);
assy->entry_used--;
ip_frag_pkt_clean(&assy->stat, frag_pkt);
}
// return 0 : success
// return -1 : failed
static inline int ip_frag_pkt_update(struct ip_reassembly *assy,
struct ip_frag_pkt *frag_pkt, const struct packet *pkt,
char *frag_data, uint16_t frag_len, uint16_t frag_offset, bool more_frags)
{
uint32_t idx;
/*
* Internet Protocol, Version 6 (IPv6) Specification
*
* https://datatracker.ietf.org/doc/html/rfc8200#section-4.5
*
* It should be noted that fragments may be duplicated in the
* network. Instead of treating these exact duplicate fragments
* as overlapping fragments, an implementation may choose to
* detect this case and drop exact duplicate fragments while
* keeping the other fragments belonging to the same packet.
*/
if (frag_offset == 0)
{
if (frag_pkt->frags[IP_FIRST_FRAG_IDX].data != NULL)
{
IP_REASSEMBLE_DEBUG1("duplicate first fragment bypass", &frag_pkt->key);
IP_REASSEMBLY_STAT_INC(&assy->stat, frags_bypass_dup_fist_frag, &frag_pkt->key);
return 0;
}
idx = IP_FIRST_FRAG_IDX;
ip_frag_hdr_init(&frag_pkt->hdr, pkt);
}
else if (more_frags == 0)
{
if (frag_pkt->frags[IP_LAST_FRAG_IDX].data != NULL)
{
IP_REASSEMBLE_DEBUG1("duplicate last fragment bypass", &frag_pkt->key);
IP_REASSEMBLY_STAT_INC(&assy->stat, frags_bypass_dup_last_frag, &frag_pkt->key);
return 0;
}
idx = IP_LAST_FRAG_IDX;
frag_pkt->expected_total_size = frag_offset + frag_len;
}
else
{
if (frag_pkt->next_fill_idx >= IP_MAX_FRAG_NUM)
{
IP_REASSEMBLE_ERROR1("max number of fragment exceeded", &frag_pkt->key);
IP_REASSEMBLY_STAT_INC(&assy->stat, defrags_failed_too_many_frag, &frag_pkt->key);
ip_reassembly_del_frag_pkt(assy, frag_pkt);
return -1;
}
idx = frag_pkt->next_fill_idx;
frag_pkt->next_fill_idx++;
}
frag_pkt->received_frag_size += frag_len;
struct ip_frag *frag = &frag_pkt->frags[idx];
ip_frag_init(frag, frag_data, frag_len, frag_offset);
IP_REASSEMBLY_STAT_INC(&assy->stat, frags_buffered, &frag_pkt->key);
return 0;
}
/*
* if return NULL, then *free and *expired are valid
* free : the first empty entry in the bucket
* expired: the first timed-out entry in the bucket
*/
static struct ip_frag_pkt *ip_reassembly_find_frag_pkt(struct ip_reassembly *assy, const struct ip_frag_key *key, struct ip_frag_pkt **free, struct ip_frag_pkt **expired, uint64_t now)
{
if (assy->last != NULL && ip_frag_key_cmp(key, &assy->last->key) == 0)
{
return assy->last;
}
uint32_t sig1 = 0;
uint32_t sig2 = 0;
if (key->src_dst_len == IPV4_KEYLEN)
{
ipv4_frag_key_hash(key, &sig1, &sig2);
}
else
{
ipv6_frag_key_hash(key, &sig1, &sig2);
}
// get the bucket by hash
struct ip_frag_pkt *p1 = IP_FRAG_TBL_POS(assy, sig1);
struct ip_frag_pkt *p2 = IP_FRAG_TBL_POS(assy, sig2);
// search in the bucket
struct ip_frag_pkt *old = NULL;
struct ip_frag_pkt *empty = NULL;
uint64_t timeout = assy->timeout;
uint32_t assoc = assy->bucket_entries;
for (uint32_t i = 0; i != assoc; i++)
{
if (ip_frag_key_cmp(key, &p1[i].key) == 0)
{
*free = NULL;
*expired = NULL;
return p1 + i;
}
else if (ip_frag_key_is_empty(&p1[i].key))
{
empty = (empty == NULL) ? (p1 + i) : empty;
}
else if (timeout + p1[i].create_time <= now)
{
old = (old == NULL) ? (p1 + i) : old;
}
if (ip_frag_key_cmp(key, &p2[i].key) == 0)
{
*free = NULL;
*expired = NULL;
return p2 + i;
}
else if (ip_frag_key_is_empty(&p2[i].key))
{
empty = (empty == NULL) ? (p2 + i) : empty;
}
else if (timeout + p2[i].create_time <= now)
{
old = (old == NULL) ? (p2 + i) : old;
}
}
*free = empty;
*expired = old;
return NULL;
}
static struct ip_frag_pkt *ip_reassembly_update_frag_pkt(struct ip_reassembly *assy, const struct ip_frag_key *key, uint64_t now)
{
struct ip_frag_pkt *frag_pkt = NULL;
struct ip_frag_pkt *free = NULL;
struct ip_frag_pkt *expired = NULL;
frag_pkt = ip_reassembly_find_frag_pkt(assy, key, &free, &expired, now);
if (frag_pkt == NULL)
{
if (expired)
{
IP_REASSEMBLE_DEBUG1("add ip frag pkt success: reuse expired entry", key);
IP_REASSEMBLY_STAT_INC(&assy->stat, defrags_failed_timeout, key);
ip_reassembly_del_frag_pkt(assy, expired);
ip_reassembly_add_frag_pkt(assy, expired, key, now);
assy->last = expired;
return expired;
}
if (free)
{
IP_REASSEMBLE_DEBUG1("add ip frag pkt success: use free entry", key);
ip_reassembly_add_frag_pkt(assy, free, key, now);
assy->last = free;
return free;
}
// no space
IP_REASSEMBLE_ERROR1("add ip frag pkt failed: bucket full", key);
IP_REASSEMBLY_STAT_INC(&assy->stat, frags_bypass_no_buffer, key);
return NULL;
}
else
{
// expired
if (assy->timeout + frag_pkt->create_time <= now)
{
IP_REASSEMBLE_DEBUG1("add ip frag pkt success: reuse expired entry", key);
IP_REASSEMBLY_STAT_INC(&assy->stat, defrags_failed_timeout, key);
ip_reassembly_del_frag_pkt(assy, frag_pkt);
ip_reassembly_add_frag_pkt(assy, frag_pkt, key, now);
assy->last = frag_pkt;
return frag_pkt;
}
// not expired
else
{
IP_REASSEMBLE_DEBUG1("find ip frag pkt success: not expire", key);
assy->last = frag_pkt;
return frag_pkt;
}
}
}
/******************************************************************************
* frag reassemble
******************************************************************************/
static struct packet *ip_frag_reassemble(struct ip_reassembly *assy, struct ip_frag_pkt *frag_pkt)
{
struct ip_frag *first = &frag_pkt->frags[IP_FIRST_FRAG_IDX];
struct ip_frag *last = &frag_pkt->frags[IP_LAST_FRAG_IDX];
struct ip_frag *temp = NULL;
uint32_t loop = 0;
uint16_t last_offset = last->offset;
struct ip *ip4_hdr = NULL;
struct ip6_hdr *ip6_hdr = NULL;
// calculate the length of the reassembled packet
uint32_t packet_len = frag_pkt->expected_total_size + frag_pkt->hdr.hdr_len;
struct packet *pkt = packet_new(packet_len);
if (pkt == NULL)
{
IP_REASSEMBLE_ERROR("unable to allocate memory");
return NULL;
}
char *ptr = (char *)packet_get_raw_data(pkt);
char *end = ptr + packet_get_raw_len(pkt);
// copy last frag
if (last->len > end - ptr)
{
IP_REASSEMBLE_ERROR1("last frag length not match expected reassembled length", &frag_pkt->key);
goto error_out_invalid_length;
}
end -= last->len;
memcpy(end, last->data, last->len);
while (first->len != last_offset)
{
/*
* https://datatracker.ietf.org/doc/html/rfc791
*
* In the case that two or more fragments contain the same data
* either identically or through a partial overlap, this procedure
* will use the more recently arrived copy in the data buffer and
* datagram delivered.
*/
for (uint32_t i = frag_pkt->next_fill_idx - 1; i >= IP_MIN_FRAG_NUM; i--)
{
temp = &frag_pkt->frags[i];
if (temp->offset + temp->len == last_offset)
{
if (temp->len > end - ptr)
{
IP_REASSEMBLE_ERROR1("middle frag length not match expected reassembled length", &frag_pkt->key);
goto error_out_invalid_length;
}
end -= temp->len;
memcpy(end, temp->data, temp->len);
last_offset = temp->offset;
break;
}
}
if (loop > frag_pkt->next_fill_idx - IP_MIN_FRAG_NUM)
{
IP_REASSEMBLE_ERROR1("overlap appear during frag reassemble", &frag_pkt->key);
goto error_out_overlap;
}
loop++;
}
// copy fist fragment data
if (first->len > end - ptr)
{
IP_REASSEMBLE_ERROR1("first frag length not match expected reassembled length", &frag_pkt->key);
goto error_out_invalid_length;
}
end -= first->len;
memcpy(end, first->data, first->len);
// copy frag hdr
if (frag_pkt->hdr.hdr_len > end - ptr)
{
IP_REASSEMBLE_ERROR1("packet header length not match expected reassembled length", &frag_pkt->key);
goto error_out_invalid_length;
}
end -= frag_pkt->hdr.hdr_len;
memcpy(end, frag_pkt->hdr.hdr_data, frag_pkt->hdr.hdr_len);
// assert
assert(ptr == end);
if (frag_pkt->key.src_dst_len == IPV4_KEYLEN)
{
// update ip total length & ip checksum
ip4_hdr = (struct ip *)(ptr + frag_pkt->hdr.l3_offset);
ip4_hdr_set_total_len(ip4_hdr, packet_len - frag_pkt->hdr.l3_offset); // update total length
ip4_hdr_set_mf_flag(ip4_hdr, false); // update more fragment flag
ip4_hdr_set_frag_offset(ip4_hdr, 0); // update fragment offset
ip4_hdr->ip_sum = 0; // update checksum
ip4_hdr->ip_sum = checksum((const void *)ip4_hdr, frag_pkt->hdr.l3_len);
}
else
{
// update ipv6 payload length & next header
ip6_hdr = (struct ip6_hdr *)(ptr + frag_pkt->hdr.l3_offset);
ip6_hdr_set_payload_len(ip6_hdr, frag_pkt->expected_total_size); // update payload length
ip6_hdr_set_next_header(ip6_hdr, frag_pkt->hdr.next_proto); // update next header
}
// create a new packet
packet_parse(pkt, ptr, packet_len);
IP_REASSEMBLY_STAT_INC(&assy->stat, defrags_succeed, &frag_pkt->key);
ip_reassembly_del_frag_pkt(assy, frag_pkt);
return pkt;
error_out_invalid_length:
packet_free(pkt);
IP_REASSEMBLY_STAT_INC(&assy->stat, defrags_failed_invalid_length, &frag_pkt->key);
ip_reassembly_del_frag_pkt(assy, frag_pkt);
return NULL;
error_out_overlap:
packet_free(pkt);
IP_REASSEMBLY_STAT_INC(&assy->stat, defrags_failed_overlap, &frag_pkt->key);
ip_reassembly_del_frag_pkt(assy, frag_pkt);
return NULL;
}
/******************************************************************************
* Public API
******************************************************************************/
struct ip_reassembly *ip_reassembly_new(const struct ip_reassembly_options *opts)
{
if (check_options(opts) == -1)
{
return NULL;
}
struct ip_reassembly *assy = (struct ip_reassembly *)calloc(1, sizeof(struct ip_reassembly));
if (assy == NULL)
{
IP_REASSEMBLE_ERROR("unable to allocate memory");
return NULL;
}
assy->enable = opts->enable;
assy->timeout = opts->timeout;
assy->bucket_entries = opts->bucket_entries;
if (!assy->enable)
{
return assy;
}
uint64_t entry_total = align32pow2(opts->bucket_num) * assy->bucket_entries * IP_FRAG_HASH_FNUM;
if (entry_total > UINT32_MAX)
{
IP_REASSEMBLE_ERROR("bucket_num * bucket_entries is too large");
free(assy);
return NULL;
}
assy->entry_total = (uint32_t)entry_total;
assy->entry_mask = (assy->entry_total - 1) & ~(assy->bucket_entries - 1);
assy->table = (struct ip_frag_pkt *)calloc(assy->entry_total, sizeof(struct ip_frag_pkt));
if (assy->table == NULL)
{
IP_REASSEMBLE_ERROR("unable to allocate memory");
free(assy);
return NULL;
}
TAILQ_INIT(&(assy->lru));
return assy;
}
void ip_reassembly_free(struct ip_reassembly *assy)
{
if (assy)
{
if (assy->table)
{
for (uint32_t i = 0; i < assy->entry_total; i++)
{
ip_frag_pkt_clean(&assy->stat, assy->table + i);
}
free(assy->table);
assy->table = NULL;
}
free(assy);
assy = NULL;
}
}
void ip_reassembly_expire(struct ip_reassembly *assy, uint64_t max_free, uint64_t now)
{
uint64_t count = 0;
struct ip_frag_pkt *frag_pkt = NULL;
uint64_t timeout = assy->timeout;
TAILQ_FOREACH(frag_pkt, &assy->lru, lru)
{
if (timeout + frag_pkt->create_time <= now)
{
IP_REASSEMBLE_DEBUG1("expire ip frag pkt: discarding old fragmented packets", &frag_pkt->key);
IP_REASSEMBLY_STAT_INC(&assy->stat, defrags_failed_timeout, &frag_pkt->key);
ip_reassembly_del_frag_pkt(assy, frag_pkt);
count++;
if (count >= max_free)
{
break;
}
}
else
{
break;
}
}
}
struct ip_reassembly_stat *ip_reassembly_stat(struct ip_reassembly *assy)
{
if (assy)
{
return &(assy->stat);
}
else
{
return NULL;
}
}
/*
* Returns the reassembled packet, or NULL if the packet is not reassembled
* The returned packet should be freed by calling the packet_free() function
*/
struct packet *ip_reassembly_packet(struct ip_reassembly *assy, const struct packet *pkt, uint64_t now)
{
struct packet *pkt1;
struct packet *pkt2;
if (!assy->enable)
{
return NULL;
}
const struct layer_private *layer = pkt->frag_layer;
if (layer == NULL)
{
return NULL;
}
if (layer->proto == LAYER_PROTO_IPV4)
{
pkt1 = ipv4_reassembly_packet(assy, pkt, now);
if (pkt1 && pkt1->frag_layer)
{
pkt2 = ip_reassembly_packet(assy, pkt1, now);
packet_free(pkt1);
return pkt2;
}
return pkt1;
}
else if (layer->proto == LAYER_PROTO_IPV6)
{
pkt1 = ipv6_reassembly_packet(assy, pkt, now);
if (pkt1 && pkt1->frag_layer)
{
pkt2 = ip_reassembly_packet(assy, pkt1, now);
packet_free(pkt1);
return pkt2;
}
return pkt1;
}
else
{
return NULL;
}
}
struct packet *ipv4_reassembly_packet(struct ip_reassembly *assy, const struct packet *pkt, uint64_t now)
{
const struct layer_private *layer = pkt->frag_layer;
const struct ip *hdr = (const struct ip *)layer->hdr_ptr;
uint16_t frag_len = ip4_hdr_get_total_len(hdr) - ip4_hdr_get_hdr_len(hdr);
if (frag_len > layer->pld_len)
{
IP_REASSEMBLE_ERROR("unexpected header length, ip id: %lu", ip4_hdr_get_ipid(hdr));
return NULL;
}
struct ip_frag_key key = {};
uint64_t src_addr = hdr->ip_src.s_addr;
uint64_t dst_addr = hdr->ip_dst.s_addr;
key.src_dst_addr[0] = src_addr << 32 | dst_addr;
key.src_dst_len = IPV4_KEYLEN;
key.ip_id = ip4_hdr_get_ipid(hdr);
key.proto = ip4_hdr_get_proto(hdr);
IP_REASSEMBLY_STAT_INC(&assy->stat, frags, &key);
struct ip_frag_pkt *frag_pkt = ip_reassembly_update_frag_pkt(assy, &key, now);
if (frag_pkt == NULL)
{
return NULL;
}
char *frag_data = (char *)layer->pld_ptr;
bool more_frags = ip4_hdr_get_mf_flag(hdr);
uint16_t frag_offset = ip4_hdr_get_frag_offset(hdr);
if (ip_frag_pkt_update(assy, frag_pkt, pkt, frag_data, frag_len, frag_offset, more_frags) != 0)
{
return NULL;
}
if (!ip_frag_pkt_is_ready(frag_pkt))
{
return NULL;
}
return ip_frag_reassemble(assy, frag_pkt);
}
/*
* https://datatracker.ietf.org/doc/html/rfc8200#section-4.5
*
* Note: unlike IPv4, fragmentation in IPv6 is performed only by source nodes,
* not by routers along a packet's delivery path
*/
/*
* original packet:
* +-----------------+-----------------+--------+--------+-//-+--------+
* | Per-Fragment |Ext & Upper-Layer| first | second | | last |
* | Headers | Headers |fragment|fragment|....|fragment|
* +-----------------+-----------------+--------+--------+-//-+--------+
*
* fragment packets:
* +-----------------+--------+-------------------+----------+
* | Per-Fragment |Fragment| Ext & Upper-Layer | first |
* | Headers | Header | Headers | fragment |
* +-----------------+--------+-------------------+----------+
*
* +-----------------+--------+----------+
* | Per-Fragment |Fragment| second |
* | Headers | Header | fragment |
* +-----------------+--------+----------+
* o
* o
* o
* +-----------------+--------+----------+
* | Per-Fragment |Fragment| last |
* | Headers | Header | fragment |
* +-----------------+--------+----------+
*
* reassembled packet:
* +-----------------+-----------------+--------+--------+-//-+--------+
* | Per-Fragment |Ext & Upper-Layer| first | second | | last |
* | Headers | Headers |fragment|fragment|....|fragment|
* +-----------------+-----------------+--------+--------+-//-+--------+
*/
struct packet *ipv6_reassembly_packet(struct ip_reassembly *assy, const struct packet *pkt, uint64_t now)
{
const struct layer_private *layer = pkt->frag_layer;
const struct ip6_hdr *hdr = (const struct ip6_hdr *)layer->hdr_ptr;
const struct ip6_frag *frag_hdr = ip6_hdr_get_frag_ext(hdr);
if (frag_hdr == NULL)
{
return NULL;
}
char *frag_data = (char *)frag_hdr + sizeof(struct ip6_frag);
uint16_t frag_len = ip6_hdr_get_payload_len(hdr) - sizeof(struct ip6_frag);
if (frag_data + frag_len > pkt->data_ptr + pkt->data_len)
{
IP_REASSEMBLE_ERROR("unexpected header length, frag id: %lu", ipv6_frag_get_ident(frag_hdr));
return NULL;
}
struct ip_frag_key key = {};
memcpy(&key.src_dst_addr[0], hdr->ip6_src.s6_addr, 16);
memcpy(&key.src_dst_addr[2], hdr->ip6_dst.s6_addr, 16);
key.src_dst_len = IPV6_KEYLEN;
key.ip_id = ipv6_frag_get_ident(frag_hdr);
key.proto = 0; // only first fragment has the upper layer protocol
IP_REASSEMBLY_STAT_INC(&assy->stat, frags, &key);
struct ip_frag_pkt *frag_pkt = ip_reassembly_update_frag_pkt(assy, &key, now);
if (frag_pkt == NULL)
{
return NULL;
}
bool more_frags = ipv6_frag_get_more(frag_hdr);
uint16_t frag_offset = ipv6_frag_get_offset(frag_hdr);
if (ip_frag_pkt_update(assy, frag_pkt, pkt, frag_data, frag_len, frag_offset, more_frags) != 0)
{
return NULL;
}
if (!ip_frag_pkt_is_ready(frag_pkt))
{
return NULL;
}
return ip_frag_reassemble(assy, frag_pkt);
}