summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2018-01-12 13:36:30 +1100
committerLuke Chen <luke.chen@mongodb.com>2018-01-12 13:51:17 +1100
commit0f8edc6e87fc4eb2242207932ff22961d31cf9b9 (patch)
treee797e27d92c6b8359c8310fe36311ce901ac31ea
parentd131ca7d6a2b0442620346561d27c6b00f832bf0 (diff)
downloadmongo-0f8edc6e87fc4eb2242207932ff22961d31cf9b9.tar.gz
Import wiredtiger: 46c2f8fd9b678f4e876bd4fbc888d0040b628045 from branch mongodb-3.2
ref: 827b48a342..46c2f8fd9b for: 3.2.19 WT-3284 Fix tree-walk restart bug WT-3533 Eviction handle walk can race with handle re-open WT-3590 Keep data consistent if writes fail during a clean shutdown WT-3618 WT remove solaris from evergreen builds
-rw-r--r--src/third_party/wiredtiger/dist/flags.py1
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_random.c3
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_walk.c99
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h2
-rw-r--r--src/third_party/wiredtiger/src/include/flags.h5
-rw-r--r--src/third_party/wiredtiger/test/suite/test_bug018.py0
7 files changed, 53 insertions, 59 deletions
diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py
index 64b5d789e72..d80c80a37ce 100644
--- a/src/third_party/wiredtiger/dist/flags.py
+++ b/src/third_party/wiredtiger/dist/flags.py
@@ -32,7 +32,6 @@ flags = {
'READ_PREV',
'READ_RESTART_OK',
'READ_SKIP_INTL',
- 'READ_SKIP_LEAF',
'READ_TRUNCATE',
'READ_WONT_NEED',
],
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 8bd00db3aa2..8255b539a98 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
{
- "commit": "827b48a34227243c809d41fac3dc909ed46b0c5e",
+ "commit": "46c2f8fd9b678f4e876bd4fbc888d0040b628045",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-3.2"
diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c
index c5948ec4ab5..b4f05c440ba 100644
--- a/src/third_party/wiredtiger/src/btree/bt_random.c
+++ b/src/third_party/wiredtiger/src/btree/bt_random.c
@@ -395,8 +395,7 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
*/
for (skip = cbt->next_random_leaf_skip; cbt->ref == NULL || skip > 0;) {
n = skip;
- WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip,
- WT_READ_NO_GEN | WT_READ_SKIP_INTL | WT_READ_WONT_NEED));
+ WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip));
if (n == skip) {
if (skip == 0)
break;
diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c
index 86484feb7c9..8aeded7494a 100644
--- a/src/third_party/wiredtiger/src/btree/bt_walk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_walk.c
@@ -497,29 +497,21 @@ restart: /*
}
/*
- * Optionally skip leaf pages: skip all leaf pages if
- * WT_READ_SKIP_LEAF is set, when the skip-leaf-count
- * variable is non-zero, skip some count of leaf pages.
- * If this page is disk-based, crack the cell to figure
- * out it's a leaf page without reading it.
+ * Optionally skip leaf pages: when the skip-leaf-count
+ * variable is non-zero, skip some count of leaf pages,
+ * then take the next leaf page we can.
*
- * If skipping some number of leaf pages, decrement the
- * count of pages to zero, and then take the next leaf
- * page we can. Be cautious around the page decrement,
- * if for some reason don't take this particular page,
- * we can take the next one, and, there are additional
- * tests/decrements when we're about to return a leaf
- * page.
+ * The reason to do some of this work here (rather than
+ * in our caller), is because we can look at the cell
+ * and know it's a leaf page without reading it into
+ * memory. If this page is disk-based, crack the cell
+ * to figure out it's a leaf page without reading it.
*/
- if (skipleafcntp != NULL || LF_ISSET(WT_READ_SKIP_LEAF))
- if (__ref_is_leaf(ref)) {
- if (LF_ISSET(WT_READ_SKIP_LEAF))
- break;
- if (*skipleafcntp > 0) {
- --*skipleafcntp;
- break;
- }
- }
+ if (skipleafcntp != NULL &&
+ *skipleafcntp > 0 && __ref_is_leaf(ref)) {
+ --*skipleafcntp;
+ break;
+ }
ret = __wt_page_swap(session, couple, ref,
WT_READ_NOTFOUND_OK | WT_READ_RESTART_OK | flags);
@@ -626,34 +618,18 @@ descend: empty_internal = true;
session, ref, &pindex);
slot = pindex->entries - 1;
}
- } else {
- /*
- * At the lowest tree level (considering a leaf
- * page), turn off the initial-descent state.
- * Descent race tests are different when moving
- * through the tree vs. the initial descent.
- */
- initial_descent = false;
-
- /*
- * Optionally skip leaf pages, the second half.
- * We didn't have an on-page cell to figure out
- * if it was a leaf page, we had to acquire the
- * hazard pointer and look at the page.
- */
- if (skipleafcntp != NULL ||
- LF_ISSET(WT_READ_SKIP_LEAF)) {
- if (LF_ISSET(WT_READ_SKIP_LEAF))
- break;
- if (*skipleafcntp > 0) {
- --*skipleafcntp;
- break;
- }
- }
-
- *refp = ref;
- goto done;
+ continue;
}
+
+ /*
+ * The tree-walk restart code knows we return any leaf
+ * page we acquire (never hazard-pointer coupling on
+ * after acquiring a leaf page), and asserts no restart
+ * happens while holding a leaf page. This page must be
+ * returned to our caller.
+ */
+ *refp = ref;
+ goto done;
}
}
@@ -690,8 +666,29 @@ __wt_tree_walk_count(WT_SESSION_IMPL *session,
* of leaf pages before returning.
*/
int
-__wt_tree_walk_skip(WT_SESSION_IMPL *session,
- WT_REF **refp, uint64_t *skipleafcntp, uint32_t flags)
+__wt_tree_walk_skip(
+ WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp)
{
- return (__tree_walk_internal(session, refp, NULL, skipleafcntp, flags));
+ /*
+ * Optionally skip leaf pages, the second half. The tree-walk function
+ * didn't have an on-page cell it could use to figure out if the page
+ * was a leaf page or not, it had to acquire the hazard pointer and look
+ * at the page. The tree-walk code never acquires a hazard pointer on a
+ * leaf page without returning it, and it's not trivial to change that.
+ * So, the tree-walk code returns all leaf pages here and we deal with
+ * decrementing the count.
+ */
+ do {
+ WT_RET(__tree_walk_internal(session, refp, NULL, skipleafcntp,
+ WT_READ_NO_GEN | WT_READ_SKIP_INTL | WT_READ_WONT_NEED));
+
+ /*
+ * The walk skipped internal pages, any page returned must be a
+ * leaf page.
+ */
+ if (*skipleafcntp > 0)
+ --*skipleafcntp;
+ } while (*skipleafcntp > 0);
+
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index dfd2d03707f..b86374ef041 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -181,7 +181,7 @@ extern int __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *tag, cons
extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_tree_walk_skip(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_tree_walk_skip( WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h
index f26a45c68f5..d7c0e0f9472 100644
--- a/src/third_party/wiredtiger/src/include/flags.h
+++ b/src/third_party/wiredtiger/src/include/flags.h
@@ -47,9 +47,8 @@
#define WT_READ_PREV 0x00000080
#define WT_READ_RESTART_OK 0x00000100
#define WT_READ_SKIP_INTL 0x00000200
-#define WT_READ_SKIP_LEAF 0x00000400
-#define WT_READ_TRUNCATE 0x00000800
-#define WT_READ_WONT_NEED 0x00001000
+#define WT_READ_TRUNCATE 0x00000400
+#define WT_READ_WONT_NEED 0x00000800
#define WT_SESSION_CAN_WAIT 0x00000001
#define WT_SESSION_INTERNAL 0x00000002
#define WT_SESSION_LOCKED_CHECKPOINT 0x00000004
diff --git a/src/third_party/wiredtiger/test/suite/test_bug018.py b/src/third_party/wiredtiger/test/suite/test_bug018.py
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_bug018.py