summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2023-02-16 16:28:49 +1100
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-02-16 06:32:56 +0000
commit935639beed3d0c19c2551c93854b831107c0b118 (patch)
tree55b6270e1795e637d02e7fb4440c2e28ed9e7372
parentc3ed537547bd525563f0f0a2dbef5209873afd9d (diff)
downloadmongo-r5.0.15-rc2.tar.gz
Import wiredtiger: 8a10a0e704b4fe5ec94612c4386028bcdb452d39 from branch mongodb-5.0r5.0.15-rc2r5.0.15
ref: 8685f6cf81..8a10a0e704 for: 5.0.15 WT-10584 Add missing read barriers in __cursor_skip_prev
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curprev.c31
2 files changed, 27 insertions, 6 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 95ae575868c..54083b1e264 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-5.0",
- "commit": "8685f6cf81eca8efc402e1269629370ab5b968f8"
+ "commit": "8a10a0e704b4fe5ec94612c4386028bcdb452d39"
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c
index 194eb7dc2e5..3e85dd98176 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curprev.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c
@@ -34,7 +34,7 @@
static inline int
__cursor_skip_prev(WT_CURSOR_BTREE *cbt)
{
- WT_INSERT *current, *ins;
+ WT_INSERT *current, *ins, *next_ins;
WT_ITEM key;
WT_SESSION_IMPL *session;
uint64_t recno;
@@ -82,7 +82,13 @@ restart:
for (; i >= 0; i--) {
cbt->ins_stack[i] = NULL;
cbt->next_stack[i] = NULL;
- ins = cbt->ins_head->head[i];
+ /*
+ * Compiler may replace the usage of the variable with another read in the following
+ * code.
+ *
+ * Place a read barrier to avoid this issue.
+ */
+ WT_ORDERED_READ(ins, cbt->ins_head->head[i]);
if (ins != NULL && ins != current)
break;
}
@@ -98,11 +104,26 @@ restart:
cbt->next_stack[0] = NULL;
goto restart;
}
- if (ins->next[i] != current) /* Stay at this level */
- ins = ins->next[i];
+ /*
+ * CPU may reorder the read and return a stale value. This can lead us to wrongly skip a
+ * value in the lower levels of the skip list.
+ *
+ * For example, if we have A -> C initially for both level 0 and level 1 and we concurrently
+ * insert B into both level 0 and level 1. If B is visible on level 1 to this thread, it
+ * must also be visible on level 0. Otherwise, we would record an inconsistent stack.
+ *
+ * Place a read barrier to avoid this issue.
+ */
+ WT_ORDERED_READ(next_ins, ins->next[i]);
+ if (next_ins != current) /* Stay at this level */
+ ins = next_ins;
else { /* Drop down a level */
+ /*
+ * It is possible that we read an old value that is inconsistent to the higher levels of
+ * the skip list due to CPU read reordering. Add a read barrier to avoid this issue.
+ */
+ WT_ORDERED_READ(cbt->next_stack[i], ins->next[i]);
cbt->ins_stack[i] = &ins->next[i];
- cbt->next_stack[i] = ins->next[i];
--i;
}
}