TSG-5978 TFE 与 KNI 保活的 watchdog 线程增加对 tfe worker 线程健康状态检测的功能
This commit is contained in:
@@ -7,6 +7,8 @@
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include <proxy.h>
|
||||
#include <platform.h>
|
||||
#include <tfe_utils.h>
|
||||
#include <watchdog_kni.h>
|
||||
#include <MESA/MESA_prof_load.h>
|
||||
@@ -205,10 +207,32 @@ void * watchdog_kni_thread(void * arg)
|
||||
DIE("watchdog thread is terminated.");
|
||||
}
|
||||
|
||||
static void health_check_for_thread_worker(evutil_socket_t fd, short what, void * arg)
|
||||
{
|
||||
struct tfe_proxy *proxy = (struct tfe_proxy *)arg;
|
||||
struct timespec now;
|
||||
time_t temp;
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &now);
|
||||
|
||||
for (unsigned int i = 0; i < proxy->nr_work_threads; i++)
|
||||
{
|
||||
temp = ATOMIC_READ(&(proxy->work_threads[i]->lastime));
|
||||
if (temp + 2 + 2 + 1 < now.tv_sec)
|
||||
{
|
||||
TFE_LOG_ERROR(g_default_logger, "Watchdog thread nowtime %ld, Worker thread %d lastime %ld, Worker thread no reply, Exit ! ! ! ", now.tv_sec, proxy->work_threads[i]->thread_id, temp);
|
||||
exit(-1);
|
||||
}
|
||||
TFE_LOG_DEBUG(g_default_logger, "Watchdog thread nowtime %ld, Worker thread %d lastime %lds ", now.tv_sec, proxy->work_threads[i]->thread_id, temp);
|
||||
}
|
||||
}
|
||||
|
||||
struct watchdog_kni * watchdog_kni_create(struct tfe_proxy * proxy, const char * profile, void * logger)
|
||||
{
|
||||
struct watchdog_kni * __ctx = ALLOC(struct watchdog_kni, 1);
|
||||
int ret = 0;
|
||||
struct event *ev = NULL;
|
||||
struct timeval timer_delay = {2, 0};
|
||||
|
||||
__ctx->proxy = proxy;
|
||||
__ctx->profile = profile;
|
||||
@@ -232,7 +256,7 @@ struct watchdog_kni * watchdog_kni_create(struct tfe_proxy * proxy, const char *
|
||||
|
||||
if (ret < 0)
|
||||
{
|
||||
TFE_LOG_ERROR(logger, "failed at parsing kni's address, in file %s, section %s, entry %s: %s",
|
||||
TFE_LOG_ERROR(__ctx->logger, "failed at parsing kni's address, in file %s, section %s, entry %s: %s",
|
||||
profile, "kni", "ip", str_kni_ip);
|
||||
goto __errout;
|
||||
}
|
||||
@@ -245,12 +269,22 @@ struct watchdog_kni * watchdog_kni_create(struct tfe_proxy * proxy, const char *
|
||||
__ctx->ev_base = event_base_new();
|
||||
if (!__ctx->ev_base)
|
||||
{
|
||||
TFE_LOG_ERROR(logger, "failed at watchdog event_base_new(): %s", strerror(errno));
|
||||
TFE_LOG_ERROR(__ctx->logger, "failed at watchdog event_base_new(): %s", strerror(errno));
|
||||
/* after log, reset errno */
|
||||
errno = 0;
|
||||
goto __errout;
|
||||
}
|
||||
|
||||
ev = event_new(__ctx->ev_base, -1, EV_PERSIST, health_check_for_thread_worker, proxy);
|
||||
if (unlikely(ev == NULL))
|
||||
{
|
||||
TFE_LOG_ERROR(__ctx->logger, "Failed at creating health check event for worker thread");
|
||||
/* after log, reset errno */
|
||||
errno = 0;
|
||||
goto __errout;
|
||||
}
|
||||
evtimer_add(ev, &timer_delay);
|
||||
|
||||
watchdog_kni_reset(__ctx);
|
||||
watchdog_kni_try_connect(__ctx);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user