Added a lock-free queue to lwIP driver for RX packet processing. Added checks to force strict ordering of callback events

This commit is contained in:
Joseph Henry
2019-02-07 14:11:17 -08:00
parent 52a7e9229e
commit 1f8d3030c8
4 changed files with 89 additions and 70 deletions

View File

@@ -42,6 +42,7 @@
#define ZTS_SERVICE_THREAD_NAME "ZeroTierServiceThread" #define ZTS_SERVICE_THREAD_NAME "ZeroTierServiceThread"
#define ZTS_EVENT_CALLBACK_THREAD_NAME "ZeroTierEventCallbackThread" #define ZTS_EVENT_CALLBACK_THREAD_NAME "ZeroTierEventCallbackThread"
#define ZTS_LWIP_DRIVER_THREAD_NAME "lwipDriver"
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// lwIP behaviour (tcpip driver) // // lwIP behaviour (tcpip driver) //
@@ -58,9 +59,9 @@
#define LWIP_GUARDED_BUF_CHECK_INTERVAL 5 #define LWIP_GUARDED_BUF_CHECK_INTERVAL 5
/** /**
* Number of frame pointers that can be cached waiting for receipt into core * Number of packets that can be queued for ingress into the lwIP core
*/ */
#define LWIP_MAX_GUARDED_RX_BUF_SZ 1024 #define ZTS_LWIP_MAX_RX_QUEUE_LEN 1024
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// Service behaviour // // Service behaviour //

View File

@@ -827,6 +827,9 @@ public:
inline void generateEventMsgs() inline void generateEventMsgs()
{ {
if (!lwip_is_up()) {
return; // Don't process peer status events unless the stack is up.
}
// Generate messages to be dequeued by the callback message thread // Generate messages to be dequeued by the callback message thread
#if ZTS_NETWORK_CALLBACKS #if ZTS_NETWORK_CALLBACKS
Mutex::Lock _l(_nets_m); Mutex::Lock _l(_nets_m);
@@ -870,7 +873,7 @@ public:
ZT_PeerList *pl = _node->peers(); ZT_PeerList *pl = _node->peers();
if (pl) { if (pl) {
for(unsigned long i=0;i<pl->peerCount;++i) { for(unsigned long i=0;i<pl->peerCount;++i) {
if (!peerCache.count(pl->peers[i].address)) { // Add first entry if (!peerCache.count(pl->peers[i].address)) {
if (pl->peers[i].pathCount > 0) { if (pl->peers[i].pathCount > 0) {
_callbackMsgQueue.enqueue(new std::pair<uint64_t,int>(pl->peers[i].address, ZTS_EVENT_PEER_P2P)); _callbackMsgQueue.enqueue(new std::pair<uint64_t,int>(pl->peers[i].address, ZTS_EVENT_PEER_P2P));
} }
@@ -885,6 +888,7 @@ public:
_callbackMsgQueue.enqueue(new std::pair<uint64_t,int>(pl->peers[i].address, ZTS_EVENT_PEER_RELAY)); _callbackMsgQueue.enqueue(new std::pair<uint64_t,int>(pl->peers[i].address, ZTS_EVENT_PEER_RELAY));
} }
} }
// Update our cache with most recently observed path count
peerCache[pl->peers[i].address] = pl->peers[i].pathCount; peerCache[pl->peers[i].address] = pl->peers[i].pathCount;
} }
} }

View File

@@ -56,6 +56,9 @@
#include "Controls.hpp" #include "Controls.hpp"
extern void postEvent(uint64_t id, int eventCode); extern void postEvent(uint64_t id, int eventCode);
#include "concurrentqueue.h"
moodycamel::ConcurrentQueue<struct ZeroTier::zts_sorted_packet*> rx_queue;
#if defined(_WIN32) #if defined(_WIN32)
#include <time.h> #include <time.h>
#endif #endif
@@ -67,17 +70,19 @@ extern void postEvent(uint64_t id, int eventCode);
namespace ZeroTier { namespace ZeroTier {
bool main_loop_exited = false; bool _has_exited = false;
bool lwip_driver_initialized = false;
bool has_already_been_initialized = false;
int hibernationDelayMultiplier = 1; int hibernationDelayMultiplier = 1;
extern bool _run_lwip_tcpip; extern bool _run_lwip_tcpip;
Mutex lwip_driver_m;
Mutex driver_m; void lwip_sleep(long ms)
{
std::queue<struct zts_sorted_packet*> rx_queue; #if defined(_WIN32)
Mutex _rx_input_lock_m; Sleep(ms*hibernationDelayMultiplier);
#else
usleep(ms*1000*hibernationDelayMultiplier);
#endif
}
void lwip_hibernate_driver() void lwip_hibernate_driver()
{ {
@@ -94,31 +99,24 @@ static void tcpip_init_done(void *arg)
{ {
sys_sem_t *sem; sys_sem_t *sem;
sem = (sys_sem_t *)arg; sem = (sys_sem_t *)arg;
lwip_driver_initialized = true;
_run_lwip_tcpip = true; _run_lwip_tcpip = true;
postEvent((uint64_t)0, ZTS_EVENT_NETWORK_STACK_UP); postEvent((uint64_t)0, ZTS_EVENT_NETWORK_STACK_UP);
driver_m.unlock();
sys_sem_signal(sem); sys_sem_signal(sem);
} }
void my_tcpip_callback(void *arg) void my_tcpip_callback(void *arg)
{ {
if (main_loop_exited) { if (!_run_lwip_tcpip) {
return; return;
} }
err_t err = ERR_OK; err_t err = ERR_OK;
int loop_score = LWIP_FRAMES_HANDLED_PER_CORE_CALL; // max num of packets to read per polling call int loop_score = LWIP_FRAMES_HANDLED_PER_CORE_CALL; // max num of packets to read per polling call
while (loop_score > 0) { struct zts_sorted_packet *sp;
while (loop_score > 0 && rx_queue.size_approx() > 0) {
// TODO: Swap this block out for a thread-safe container // TODO: Swap this block out for a thread-safe container
_rx_input_lock_m.lock(); struct pbuf *p;
if (rx_queue.size() == 0) { if (rx_queue.try_dequeue(sp)) {
_rx_input_lock_m.unlock(); p = sp->p;
return;
}
struct zts_sorted_packet *sp = rx_queue.front();
struct pbuf *p = sp->p;
rx_queue.pop();
_rx_input_lock_m.unlock();
// Feed packet into appropriate lwIP netif // Feed packet into appropriate lwIP netif
if (sp->p && sp->n) { if (sp->p && sp->n) {
if ((err = sp->n->input(sp->p, sp->n)) != ERR_OK) { if ((err = sp->n->input(sp->p, sp->n)) != ERR_OK) {
@@ -129,18 +127,18 @@ void my_tcpip_callback(void *arg)
} }
delete sp; delete sp;
sp = NULL; sp = NULL;
}
loop_score--; loop_score--;
} }
} }
// Main thread which starts the initialization process
static void main_lwip_driver_loop(void *arg) static void main_lwip_driver_loop(void *arg)
{ {
#if defined(__linux__) #if defined(__linux__)
pthread_setname_np(pthread_self(), "lwipDriver"); pthread_setname_np(pthread_self(), ZTS_LWIP_DRIVER_THREAD_NAME);
#endif #endif
#if defined(__APPLE__) #if defined(__APPLE__)
pthread_setname_np("lwipDriver"); pthread_setname_np(ZTS_LWIP_DRIVER_THREAD_NAME);
#endif #endif
sys_sem_t sem; sys_sem_t sem;
LWIP_UNUSED_ARG(arg); LWIP_UNUSED_ARG(arg);
@@ -148,53 +146,63 @@ static void main_lwip_driver_loop(void *arg)
DEBUG_ERROR("failed to create semaphore"); DEBUG_ERROR("failed to create semaphore");
} }
tcpip_init(tcpip_init_done, &sem); tcpip_init(tcpip_init_done, &sem);
has_already_been_initialized = true;
sys_sem_wait(&sem); sys_sem_wait(&sem);
while(lwip_driver_initialized) { // Main loop
#if defined(_WIN32) while(_run_lwip_tcpip) {
Sleep(LWIP_GUARDED_BUF_CHECK_INTERVAL*hibernationDelayMultiplier); lwip_sleep(LWIP_GUARDED_BUF_CHECK_INTERVAL);
#else
usleep(LWIP_GUARDED_BUF_CHECK_INTERVAL*1000*hibernationDelayMultiplier);
#endif
// Handle incoming packets from the core's thread context. // Handle incoming packets from the core's thread context.
// If you feed frames into the core directly you will violate the core's thread model // If you feed frames into the core directly you will violate the core's thread model
tcpip_callback_with_block(my_tcpip_callback, NULL, 1); tcpip_callback_with_block(my_tcpip_callback, NULL, 1);
} }
main_loop_exited = true; _has_exited = true;
_run_lwip_tcpip = false; }
bool lwip_is_up()
{
Mutex::Lock _l(lwip_driver_m);
return _run_lwip_tcpip;
}
bool lwip_has_previously_shutdown()
{
Mutex::Lock _l(lwip_driver_m);
return _has_exited;
} }
// Initialize the lwIP stack
void lwip_driver_init() void lwip_driver_init()
{ {
driver_m.lock(); // Unlocked from callback indicating completion of driver init if (lwip_is_up()) {
if (has_already_been_initialized || lwip_driver_initialized) {
// Already initialized, skip
driver_m.unlock();
return;
} if (main_loop_exited) {
DEBUG_ERROR("stack has previously been shutdown an cannot be restarted.");
driver_m.unlock();
return; return;
} }
if (lwip_has_previously_shutdown()) {
return;
}
Mutex::Lock _l(lwip_driver_m);
#if defined(_WIN32) #if defined(_WIN32)
sys_init(); // Required for win32 init of critical sections sys_init(); // Required for win32 init of critical sections
#endif #endif
void *st = sys_thread_new("main_thread", main_lwip_driver_loop, void *st = sys_thread_new(ZTS_LWIP_DRIVER_THREAD_NAME, main_lwip_driver_loop,
NULL, DEFAULT_THREAD_STACKSIZE, DEFAULT_THREAD_PRIO); NULL, DEFAULT_THREAD_STACKSIZE, DEFAULT_THREAD_PRIO);
} }
void lwip_driver_shutdown() void lwip_driver_shutdown()
{ {
if (main_loop_exited) { if (lwip_has_previously_shutdown()) {
return; return;
} }
lwip_driver_initialized = false; Mutex::Lock _l(lwip_driver_m);
// Give the stack time to call the frame feed callback one last time before shutting everything down // Set flag to stop sending frames into the core
int callbackInterval = LWIP_GUARDED_BUF_CHECK_INTERVAL*hibernationDelayMultiplier*1000; _run_lwip_tcpip = false;
usleep(callbackInterval*3); // Wait until the main lwIP thread has exited
while(!main_loop_exited) { while (!_has_exited) { lwip_sleep(LWIP_GUARDED_BUF_CHECK_INTERVAL); }
usleep(LWIP_GUARDED_BUF_CHECK_INTERVAL*1000); // After we're certain the stack isn't processing anymore traffic,
// start dequeing from the RX queue. This queue should be rejecting
// new frames at this point.
struct zts_sorted_packet *sp;
for (int i = 0; i < ZTS_LWIP_MAX_RX_QUEUE_LEN; i++) {
if (rx_queue.try_dequeue(sp)) {
delete sp;
}
} }
/* /*
if (tcpip_shutdown() == ERR_OK) { if (tcpip_shutdown() == ERR_OK) {
@@ -248,7 +256,7 @@ err_t lwip_eth_tx(struct netif *netif, struct pbuf *p)
int len = totalLength - sizeof(struct eth_hdr); int len = totalLength - sizeof(struct eth_hdr);
int proto = Utils::ntoh((uint16_t)ethhdr->type); int proto = Utils::ntoh((uint16_t)ethhdr->type);
tap->_handler(tap->_arg, NULL, tap->_nwid, src_mac, dest_mac, proto, 0, data, len); tap->_handler(tap->_arg, NULL, tap->_nwid, src_mac, dest_mac, proto, 0, data, len);
/*
if (ZT_MSG_TRANSFER == true) { if (ZT_MSG_TRANSFER == true) {
char flagbuf[32]; char flagbuf[32];
memset(&flagbuf, 0, 32); memset(&flagbuf, 0, 32);
@@ -262,19 +270,22 @@ err_t lwip_eth_tx(struct netif *netif, struct pbuf *p)
DEBUG_TRANS("len=%5d dst=%s [%s TX <-- %s] proto=0x%04x %s", totalLength, macBuf, nodeBuf, tap->nodeId().c_str(), DEBUG_TRANS("len=%5d dst=%s [%s TX <-- %s] proto=0x%04x %s", totalLength, macBuf, nodeBuf, tap->nodeId().c_str(),
Utils::ntoh(ethhdr->type), flagbuf); Utils::ntoh(ethhdr->type), flagbuf);
} }
*/
return ERR_OK; return ERR_OK;
} }
void lwip_eth_rx(VirtualTap *tap, const MAC &from, const MAC &to, unsigned int etherType, void lwip_eth_rx(VirtualTap *tap, const MAC &from, const MAC &to, unsigned int etherType,
const void *data, unsigned int len) const void *data, unsigned int len)
{ {
if (!_run_lwip_tcpip) {
return;
}
struct pbuf *p,*q; struct pbuf *p,*q;
struct eth_hdr ethhdr; struct eth_hdr ethhdr;
from.copyTo(ethhdr.src.addr, 6); from.copyTo(ethhdr.src.addr, 6);
to.copyTo(ethhdr.dest.addr, 6); to.copyTo(ethhdr.dest.addr, 6);
ethhdr.type = Utils::hton((uint16_t)etherType); ethhdr.type = Utils::hton((uint16_t)etherType);
/*
if (ZT_MSG_TRANSFER == true) { if (ZT_MSG_TRANSFER == true) {
char flagbuf[32]; char flagbuf[32];
memset(&flagbuf, 0, 32); memset(&flagbuf, 0, 32);
@@ -288,7 +299,7 @@ void lwip_eth_rx(VirtualTap *tap, const MAC &from, const MAC &to, unsigned int e
DEBUG_TRANS("len=%5d dst=%s [%s RX --> %s] proto=0x%04x %s", len, macBuf, nodeBuf, tap->nodeId().c_str(), DEBUG_TRANS("len=%5d dst=%s [%s RX --> %s] proto=0x%04x %s", len, macBuf, nodeBuf, tap->nodeId().c_str(),
Utils::ntoh(ethhdr.type), flagbuf); Utils::ntoh(ethhdr.type), flagbuf);
} }
*/
if (etherType == 0x0800 || etherType == 0x0806) { // ip4 or ARP if (etherType == 0x0800 || etherType == 0x0806) { // ip4 or ARP
if (!tap->netif4) { if (!tap->netif4) {
DEBUG_ERROR("dropped packet: no netif to accept this packet (etherType=%x) on this vtap (%p)", etherType, tap); DEBUG_ERROR("dropped packet: no netif to accept this packet (etherType=%x) on this vtap (%p)", etherType, tap);
@@ -326,9 +337,8 @@ void lwip_eth_rx(VirtualTap *tap, const MAC &from, const MAC &to, unsigned int e
dataptr += q->len; dataptr += q->len;
} }
_rx_input_lock_m.lock(); if (rx_queue.size_approx() >= ZTS_LWIP_MAX_RX_QUEUE_LEN) {
if (rx_queue.size() >= LWIP_MAX_GUARDED_RX_BUF_SZ) { DEBUG_INFO("dropped packet: rx_queue is full (>= %d)", ZTS_LWIP_MAX_RX_QUEUE_LEN);
DEBUG_INFO("dropped packet: rx_queue is full (>= %d)", LWIP_MAX_GUARDED_RX_BUF_SZ);
// TODO: Test performance scenarios: dropping this packet, dropping oldest front packet // TODO: Test performance scenarios: dropping this packet, dropping oldest front packet
pbuf_free(p); pbuf_free(p);
p = NULL; p = NULL;
@@ -353,8 +363,7 @@ void lwip_eth_rx(VirtualTap *tap, const MAC &from, const MAC &to, unsigned int e
DEBUG_ERROR("dropped packet: unhandled (etherType=%x)", etherType); DEBUG_ERROR("dropped packet: unhandled (etherType=%x)", etherType);
break; break;
} }
rx_queue.push(sp); rx_queue.enqueue(sp);
_rx_input_lock_m.unlock();
} }
static void print_netif_info(struct netif *netif) { static void print_netif_info(struct netif *netif) {

View File

@@ -77,6 +77,11 @@ void lwip_hibernate_driver();
*/ */
void lwip_wake_driver(); void lwip_wake_driver();
/**
* Returns whether the lwIP network stack is up and ready to process traffic
*/
bool lwip_is_up();
/** /**
* @brief Initialize network stack semaphores, threads, and timers. * @brief Initialize network stack semaphores, threads, and timers.
* *