From 4ebac0fc8664708639babfc0e352f736fbf09451 Mon Sep 17 00:00:00 2001 From: Martin Beck Date: Sat, 20 Nov 2021 14:22:25 +1100 Subject: MDEV-27088: Server crash on ARM (WMM architecture) due to missing barriers in lf-hash (10.5) MariaDB server crashes on ARM (weak memory model architecture) while concurrently executing l_find to load node->key and add_to_purgatory to store node->key = NULL. l_find then uses key (which is NULL), to pass it to a comparison function. The specific problem is the out-of-order execution that happens on a weak memory model architecture. Two essential reorderings are possible, which need to be prevented. a) As l_find has no barriers in place between the optimistic read of the key field lf_hash.cc#L117 and the verification of link lf_hash.cc#L124, the processor can reorder the load to happen after the while-loop. In that case, a concurrent thread executing add_to_purgatory on the same node can be scheduled to store NULL at the key field lf_alloc-pin.c#L253 before key is loaded in l_find. b) A node is marked as deleted by a CAS in l_delete lf_hash.cc#L247 and taken off the list with an upfollowing CAS lf_hash.cc#L252. Only if both CAS succeed, the key field is written to by add_to_purgatory. However, due to a missing barrier, the relaxed store of key lf_alloc-pin.c#L253 can be moved ahead of the two CAS operations, which makes the value of the local purgatory list stored by add_to_purgatory visible to all threads operating on the list. As the node is not marked as deleted yet, the same error occurs in l_find. This change three accesses to be atomic. * optimistic read of key in l_find lf_hash.cc#L117 * read of link for verification lf_hash.cc#L124 * write of key in add_to_purgatory lf_alloc-pin.c#L253 Reviewers: Sergei Vojtovich, Sergei Golubchik Fixes: MDEV-23510 / d30c1331a18d875e553f3fcf544997e4f33fb943 --- mysys/lf_alloc-pin.c | 5 +++-- mysys/lf_hash.cc | 14 +++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c index 2162e1771ba..33480289d94 100644 --- a/mysys/lf_alloc-pin.c +++ b/mysys/lf_alloc-pin.c @@ -250,8 +250,9 @@ static int ptr_cmp(void **a, void **b) #define add_to_purgatory(PINS, ADDR) \ do \ { \ - *(void **)((char *)(ADDR)+(PINS)->pinbox->free_ptr_offset)= \ - (PINS)->purgatory; \ + my_atomic_storeptr_explicit( \ + (void **)((char *)(ADDR)+(PINS)->pinbox->free_ptr_offset), \ + (PINS)->purgatory, MY_MEMORY_ORDER_RELEASE); \ (PINS)->purgatory= (ADDR); \ (PINS)->purgatory_count++; \ } while (0) diff --git a/mysys/lf_hash.cc b/mysys/lf_hash.cc index bc3504bc0dd..c8f2e3f456f 100644 --- a/mysys/lf_hash.cc +++ b/mysys/lf_hash.cc @@ -103,8 +103,9 @@ retry: do { /* PTR() isn't necessary below, head is a dummy node */ cursor->curr= my_assume_aligned((LF_SLIST *)(*cursor->prev)); lf_pin(pins, 1, cursor->curr); - } while (my_atomic_loadptr((void **)my_assume_aligned(cursor->prev)) != cursor->curr && - LF_BACKOFF()); + } while (my_atomic_loadptr( + (void **)my_assume_aligned(cursor->prev)) + != cursor->curr && LF_BACKOFF()); for (;;) { if (unlikely(!cursor->curr)) @@ -114,14 +115,17 @@ retry: cur_keylen= cursor->curr->keylen; /* The key element needs to be aligned, not necessary what it points to */ my_assume_aligned(&cursor->curr->key); - cur_key= cursor->curr->key; + cur_key= (const uchar *) my_atomic_loadptr_explicit((void **) &cursor->curr->key, + MY_MEMORY_ORDER_ACQUIRE); do { /* attempting to my_assume_aligned onlink below broke the implementation */ - link= cursor->curr->link; + link= (intptr) my_atomic_loadptr_explicit((void **) &cursor->curr->link, + MY_MEMORY_ORDER_RELAXED); cursor->next= my_assume_aligned(PTR(link)); lf_pin(pins, 0, cursor->next); - } while (link != cursor->curr->link && LF_BACKOFF()); + } while (link != (intptr) my_atomic_loadptr((void *volatile *) &cursor->curr->link) + && LF_BACKOFF()); if (!DELETED(link)) { -- cgit v1.2.1