diff options
author | antirez <antirez@gmail.com> | 2020-05-16 17:15:35 +0200 |
---|---|---|
committer | antirez <antirez@gmail.com> | 2020-05-16 17:15:35 +0200 |
commit | 624742d9b40d20afe8b694c2dc0b0349f8b7aaad (patch) | |
tree | 0df72b6c9fd7c307c2fde52393e784707afc1203 | |
parent | 3cd92e87d19b7731d9d6f893bbcc861de60d4933 (diff) | |
download | redis-624742d9b40d20afe8b694c2dc0b0349f8b7aaad.tar.gz |
Remove the client from CLOSE_ASAP list before caching the master.
This was broken in 1a7cd2c: we identified a crash in the CI, what
was happening before the fix should be like that:
1. The client gets in the async free list.
2. However freeClient() gets called again against the same client
which is a master.
3. The client arrived in freeClient() with the CLOSE_ASAP flag set.
4. The master gets cached, but NOT removed from the CLOSE_ASAP linked
list.
5. The master client that was cached was immediately removed since it
was still in the list.
6. Redis accessed a freed cached master.
This is how the crash looked like:
=== REDIS BUG REPORT START: Cut & paste starting from here ===
1092:S 16 May 2020 11:44:09.731 # Redis 999.999.999 crashed by signal: 11
1092:S 16 May 2020 11:44:09.731 # Crashed running the instruction at: 0x447e18
1092:S 16 May 2020 11:44:09.731 # Accessing address: 0xffffffffffffffff
1092:S 16 May 2020 11:44:09.731 # Failed assertion: (:0)
------ STACK TRACE ------
EIP:
src/redis-server 127.0.0.1:21300(readQueryFromClient+0x48)[0x447e18]
And the 0xffff address access likely comes from accessing an SDS that is
set to NULL (we go -1 offset to read the header).
-rw-r--r-- | src/networking.c | 21 | ||||
-rw-r--r-- | src/replication.c | 3 |
2 files changed, 12 insertions, 12 deletions
diff --git a/src/networking.c b/src/networking.c index 6de00be09..0a04c0489 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1134,6 +1134,16 @@ void freeClient(client *c) { /* Notify module system that this client auth status changed. */ moduleNotifyUserChanged(c); + /* If this client was scheduled for async freeing we need to remove it + * from the queue. Note that we need to do this here, because later + * we may call replicationCacheMaster() and the client should already + * be removed from the list of clients to free. */ + if (c->flags & CLIENT_CLOSE_ASAP) { + ln = listSearchKey(server.clients_to_close,c); + serverAssert(ln != NULL); + listDelNode(server.clients_to_close,ln); + } + /* If it is our master that's beging disconnected we should make sure * to cache the state to try a partial resynchronization later. * @@ -1142,6 +1152,7 @@ void freeClient(client *c) { if (server.master && c->flags & CLIENT_MASTER) { serverLog(LL_WARNING,"Connection with master lost."); if (!(c->flags & (CLIENT_PROTOCOL_ERROR|CLIENT_BLOCKED))) { + c->flags &= ~(CLIENT_CLOSE_ASAP|CLIENT_CLOSE_AFTER_REPLY); replicationCacheMaster(c); return; } @@ -1209,15 +1220,7 @@ void freeClient(client *c) { * we lost the connection with the master. */ if (c->flags & CLIENT_MASTER) replicationHandleMasterDisconnection(); - /* If this client was scheduled for async freeing we need to remove it - * from the queue. */ - if (c->flags & CLIENT_CLOSE_ASAP) { - ln = listSearchKey(server.clients_to_close,c); - serverAssert(ln != NULL); - listDelNode(server.clients_to_close,ln); - } - - /* Remove the contribution that this client gave to our + /* Remove the contribution that this client gave to our * incrementally computed memory usage. */ server.stat_clients_type_memory[c->client_cron_last_memory_type] -= c->client_cron_last_memory_usage; diff --git a/src/replication.c b/src/replication.c index f433d6413..b10635f25 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2688,9 +2688,6 @@ void replicationCacheMaster(client *c) { /* Unlink the client from the server structures. */ unlinkClient(c); - /* Clear flags that can create issues once we reconnect the client. */ - c->flags &= ~(CLIENT_CLOSE_ASAP|CLIENT_CLOSE_AFTER_REPLY); - /* Reset the master client so that's ready to accept new commands: * we want to discard te non processed query buffers and non processed * offsets, including pending transactions, already populated arguments, |