From 3b9f7300b012805beaa6e0f01fcfde2ddcfe6b01 Mon Sep 17 00:00:00 2001 From: dormando Date: Fri, 20 Jan 2023 16:02:53 -0800 Subject: proxy: fix stats deadlock caused by await code - specifically the WSTAT_DECR in proxy_await.c's return code could potentially use the wrong thread's lock This is why I've been swapping c with thread as lock/function arguments all over the code lately; it's very accident prone. Am reasonably sure this causes the deadlock but need to attempt to verify more. --- proto_proxy.c | 14 +++++++------- proxy.h | 16 ++++++++-------- proxy_await.c | 8 ++++---- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/proto_proxy.c b/proto_proxy.c index 7cea462..b09e113 100644 --- a/proto_proxy.c +++ b/proto_proxy.c @@ -402,7 +402,7 @@ void proxy_cleanup_conn(conn *c) { lua_State *L = thr->L; luaL_unref(L, LUA_REGISTRYINDEX, c->proxy_coro_ref); c->proxy_coro_ref = 0; - WSTAT_DECR(c, proxy_req_active, 1); + WSTAT_DECR(thr, proxy_req_active, 1); } // we buffered a SET of some kind. @@ -545,7 +545,7 @@ int proxy_run_coroutine(lua_State *Lc, mc_resp *resp, io_pending_proxy_t *p, con size_t rlen = 0; if (cores == LUA_OK) { - WSTAT_DECR(c, proxy_req_active, 1); + WSTAT_DECR(c->thread, proxy_req_active, 1); int type = lua_type(Lc, 1); if (type == LUA_TUSERDATA) { mcp_resp_t *r = luaL_checkudata(Lc, 1, "mcp.response"); @@ -619,7 +619,7 @@ int proxy_run_coroutine(lua_State *Lc, mc_resp *resp, io_pending_proxy_t *p, con } } else { - WSTAT_DECR(c, proxy_req_active, 1); + WSTAT_DECR(c->thread, proxy_req_active, 1); P_DEBUG("%s: Failed to run coroutine: %s\n", __func__, lua_tostring(Lc, -1)); LOGGER_LOG(NULL, LOG_PROXYEVENTS, LOGGER_PROXY_ERROR, NULL, lua_tostring(Lc, -1)); proxy_out_errstring(resp, "lua failure"); @@ -642,7 +642,7 @@ static void proxy_process_command(conn *c, char *command, size_t cmdlen, bool mu // permanent solution. int ret = process_request(&pr, command, cmdlen); if (ret != 0) { - WSTAT_INCR(c, proxy_conn_errors, 1); + WSTAT_INCR(c->thread, proxy_conn_errors, 1); if (!resp_start(c)) { conn_set_state(c, conn_closing); return; @@ -749,7 +749,7 @@ static void proxy_process_command(conn *c, char *command, size_t cmdlen, bool mu // We test the command length all the way down here because multigets can // be very long, and they're chopped up by now. if (cmdlen >= MCP_REQUEST_MAXLEN) { - WSTAT_INCR(c, proxy_conn_errors, 1); + WSTAT_INCR(c->thread, proxy_conn_errors, 1); if (!resp_start(c)) { conn_set_state(c, conn_closing); return; @@ -797,7 +797,7 @@ static void proxy_process_command(conn *c, char *command, size_t cmdlen, bool mu if (c->item == NULL) { lua_settop(L, 0); proxy_out_errstring(c->resp, "out of memory"); - WSTAT_DECR(c, proxy_req_active, 1); + WSTAT_DECR(c->thread, proxy_req_active, 1); return; } c->item_malloced = true; @@ -864,7 +864,7 @@ static void mcp_queue_io(conn *c, mc_resp *resp, int coro_ref, lua_State *Lc) { io_pending_proxy_t *p = do_cache_alloc(c->thread->io_cache); if (p == NULL) { - WSTAT_INCR(c, proxy_conn_oom, 1); + WSTAT_INCR(c->thread, proxy_conn_oom, 1); proxy_lua_error(Lc, "out of memory allocating from IO cache"); return; } diff --git a/proxy.h b/proxy.h index 29eed86..7c94e97 100644 --- a/proxy.h +++ b/proxy.h @@ -42,15 +42,15 @@ #define WSTAT_L(t) pthread_mutex_lock(&t->stats.mutex); #define WSTAT_UL(t) pthread_mutex_unlock(&t->stats.mutex); -#define WSTAT_INCR(c, stat, amount) { \ - pthread_mutex_lock(&c->thread->stats.mutex); \ - c->thread->stats.stat += amount; \ - pthread_mutex_unlock(&c->thread->stats.mutex); \ +#define WSTAT_INCR(t, stat, amount) { \ + pthread_mutex_lock(&t->stats.mutex); \ + t->stats.stat += amount; \ + pthread_mutex_unlock(&t->stats.mutex); \ } -#define WSTAT_DECR(c, stat, amount) { \ - pthread_mutex_lock(&c->thread->stats.mutex); \ - c->thread->stats.stat -= amount; \ - pthread_mutex_unlock(&c->thread->stats.mutex); \ +#define WSTAT_DECR(t, stat, amount) { \ + pthread_mutex_lock(&t->stats.mutex); \ + t->stats.stat -= amount; \ + pthread_mutex_unlock(&t->stats.mutex); \ } #define STAT_L(ctx) pthread_mutex_lock(&ctx->stats_lock); #define STAT_UL(ctx) pthread_mutex_unlock(&ctx->stats_lock); diff --git a/proxy_await.c b/proxy_await.c index 3dfd16b..4192900 100644 --- a/proxy_await.c +++ b/proxy_await.c @@ -132,7 +132,7 @@ static void mcp_queue_await_io(conn *c, lua_State *Lc, mcp_request_t *rq, int aw io_pending_proxy_t *p = do_cache_alloc(c->thread->io_cache); if (p == NULL) { - WSTAT_INCR(c, proxy_conn_oom, 1); + WSTAT_INCR(c->thread, proxy_conn_oom, 1); proxy_lua_error(Lc, "out of memory allocating from IO cache"); return; } @@ -186,7 +186,7 @@ static void mcp_queue_await_dummy_io(conn *c, lua_State *Lc, int await_ref) { io_pending_proxy_t *p = do_cache_alloc(c->thread->io_cache); if (p == NULL) { - WSTAT_INCR(c, proxy_conn_oom, 1); + WSTAT_INCR(c->thread, proxy_conn_oom, 1); proxy_lua_error(Lc, "out of memory allocating from IO cache"); return; } @@ -223,7 +223,7 @@ static void mcp_queue_await_dummy_io(conn *c, lua_State *Lc, int await_ref) { // places. Else these errors currently crash the daemon. int mcplib_await_run(conn *c, mc_resp *resp, lua_State *L, int coro_ref) { P_DEBUG("%s: start\n", __func__); - WSTAT_INCR(c, proxy_await_active, 1); + WSTAT_INCR(c->thread, proxy_await_active, 1); mcp_await_t *aw = lua_touserdata(L, -1); int await_ref = luaL_ref(L, LUA_REGISTRYINDEX); // await is popped. assert(aw != NULL); @@ -430,7 +430,7 @@ int mcplib_await_return(io_pending_proxy_t *p) { luaL_unref(L, LUA_REGISTRYINDEX, aw->argtable_ref); luaL_unref(L, LUA_REGISTRYINDEX, aw->req_ref); luaL_unref(L, LUA_REGISTRYINDEX, p->await_ref); - WSTAT_DECR(p->c, proxy_await_active, 1); + WSTAT_DECR(p->thread, proxy_await_active, 1); } // Just remove anything we could have left on the primary VM stack -- cgit v1.2.1