diff options
author | Luke Chen <luke.chen@mongodb.com> | 2018-01-12 13:36:30 +1100 |
---|---|---|
committer | Luke Chen <luke.chen@mongodb.com> | 2018-01-12 13:51:17 +1100 |
commit | 0f8edc6e87fc4eb2242207932ff22961d31cf9b9 (patch) | |
tree | e797e27d92c6b8359c8310fe36311ce901ac31ea | |
parent | d131ca7d6a2b0442620346561d27c6b00f832bf0 (diff) | |
download | mongo-0f8edc6e87fc4eb2242207932ff22961d31cf9b9.tar.gz |
Import wiredtiger: 46c2f8fd9b678f4e876bd4fbc888d0040b628045 from branch mongodb-3.2
ref: 827b48a342..46c2f8fd9b
for: 3.2.19
WT-3284 Fix tree-walk restart bug
WT-3533 Eviction handle walk can race with handle re-open
WT-3590 Keep data consistent if writes fail during a clean shutdown
WT-3618 WT remove solaris from evergreen builds
-rw-r--r-- | src/third_party/wiredtiger/dist/flags.py | 1 | ||||
-rw-r--r-- | src/third_party/wiredtiger/import.data | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_random.c | 3 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_walk.c | 99 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/extern.h | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/flags.h | 5 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/suite/test_bug018.py | 0 |
7 files changed, 53 insertions, 59 deletions
diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py index 64b5d789e72..d80c80a37ce 100644 --- a/src/third_party/wiredtiger/dist/flags.py +++ b/src/third_party/wiredtiger/dist/flags.py @@ -32,7 +32,6 @@ flags = { 'READ_PREV', 'READ_RESTART_OK', 'READ_SKIP_INTL', - 'READ_SKIP_LEAF', 'READ_TRUNCATE', 'READ_WONT_NEED', ], diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 8bd00db3aa2..8255b539a98 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -1,5 +1,5 @@ { - "commit": "827b48a34227243c809d41fac3dc909ed46b0c5e", + "commit": "46c2f8fd9b678f4e876bd4fbc888d0040b628045", "github": "wiredtiger/wiredtiger.git", "vendor": "wiredtiger", "branch": "mongodb-3.2" diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c index c5948ec4ab5..b4f05c440ba 100644 --- a/src/third_party/wiredtiger/src/btree/bt_random.c +++ b/src/third_party/wiredtiger/src/btree/bt_random.c @@ -395,8 +395,7 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) */ for (skip = cbt->next_random_leaf_skip; cbt->ref == NULL || skip > 0;) { n = skip; - WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip, - WT_READ_NO_GEN | WT_READ_SKIP_INTL | WT_READ_WONT_NEED)); + WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip)); if (n == skip) { if (skip == 0) break; diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c index 86484feb7c9..8aeded7494a 100644 --- a/src/third_party/wiredtiger/src/btree/bt_walk.c +++ b/src/third_party/wiredtiger/src/btree/bt_walk.c @@ -497,29 +497,21 @@ restart: /* } /* - * Optionally skip leaf pages: skip all leaf pages if - * WT_READ_SKIP_LEAF is set, when the skip-leaf-count - * variable is non-zero, skip some count of leaf pages. - * If this page is disk-based, crack the cell to figure - * out it's a leaf page without reading it. + * Optionally skip leaf pages: when the skip-leaf-count + * variable is non-zero, skip some count of leaf pages, + * then take the next leaf page we can. * - * If skipping some number of leaf pages, decrement the - * count of pages to zero, and then take the next leaf - * page we can. Be cautious around the page decrement, - * if for some reason don't take this particular page, - * we can take the next one, and, there are additional - * tests/decrements when we're about to return a leaf - * page. + * The reason to do some of this work here (rather than + * in our caller), is because we can look at the cell + * and know it's a leaf page without reading it into + * memory. If this page is disk-based, crack the cell + * to figure out it's a leaf page without reading it. */ - if (skipleafcntp != NULL || LF_ISSET(WT_READ_SKIP_LEAF)) - if (__ref_is_leaf(ref)) { - if (LF_ISSET(WT_READ_SKIP_LEAF)) - break; - if (*skipleafcntp > 0) { - --*skipleafcntp; - break; - } - } + if (skipleafcntp != NULL && + *skipleafcntp > 0 && __ref_is_leaf(ref)) { + --*skipleafcntp; + break; + } ret = __wt_page_swap(session, couple, ref, WT_READ_NOTFOUND_OK | WT_READ_RESTART_OK | flags); @@ -626,34 +618,18 @@ descend: empty_internal = true; session, ref, &pindex); slot = pindex->entries - 1; } - } else { - /* - * At the lowest tree level (considering a leaf - * page), turn off the initial-descent state. - * Descent race tests are different when moving - * through the tree vs. the initial descent. - */ - initial_descent = false; - - /* - * Optionally skip leaf pages, the second half. - * We didn't have an on-page cell to figure out - * if it was a leaf page, we had to acquire the - * hazard pointer and look at the page. - */ - if (skipleafcntp != NULL || - LF_ISSET(WT_READ_SKIP_LEAF)) { - if (LF_ISSET(WT_READ_SKIP_LEAF)) - break; - if (*skipleafcntp > 0) { - --*skipleafcntp; - break; - } - } - - *refp = ref; - goto done; + continue; } + + /* + * The tree-walk restart code knows we return any leaf + * page we acquire (never hazard-pointer coupling on + * after acquiring a leaf page), and asserts no restart + * happens while holding a leaf page. This page must be + * returned to our caller. + */ + *refp = ref; + goto done; } } @@ -690,8 +666,29 @@ __wt_tree_walk_count(WT_SESSION_IMPL *session, * of leaf pages before returning. */ int -__wt_tree_walk_skip(WT_SESSION_IMPL *session, - WT_REF **refp, uint64_t *skipleafcntp, uint32_t flags) +__wt_tree_walk_skip( + WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp) { - return (__tree_walk_internal(session, refp, NULL, skipleafcntp, flags)); + /* + * Optionally skip leaf pages, the second half. The tree-walk function + * didn't have an on-page cell it could use to figure out if the page + * was a leaf page or not, it had to acquire the hazard pointer and look + * at the page. The tree-walk code never acquires a hazard pointer on a + * leaf page without returning it, and it's not trivial to change that. + * So, the tree-walk code returns all leaf pages here and we deal with + * decrementing the count. + */ + do { + WT_RET(__tree_walk_internal(session, refp, NULL, skipleafcntp, + WT_READ_NO_GEN | WT_READ_SKIP_INTL | WT_READ_WONT_NEED)); + + /* + * The walk skipped internal pages, any page returned must be a + * leaf page. + */ + if (*skipleafcntp > 0) + --*skipleafcntp; + } while (*skipleafcntp > 0); + + return (0); } diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index dfd2d03707f..b86374ef041 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -181,7 +181,7 @@ extern int __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *tag, cons extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_tree_walk_skip(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_tree_walk_skip( WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h index f26a45c68f5..d7c0e0f9472 100644 --- a/src/third_party/wiredtiger/src/include/flags.h +++ b/src/third_party/wiredtiger/src/include/flags.h @@ -47,9 +47,8 @@ #define WT_READ_PREV 0x00000080 #define WT_READ_RESTART_OK 0x00000100 #define WT_READ_SKIP_INTL 0x00000200 -#define WT_READ_SKIP_LEAF 0x00000400 -#define WT_READ_TRUNCATE 0x00000800 -#define WT_READ_WONT_NEED 0x00001000 +#define WT_READ_TRUNCATE 0x00000400 +#define WT_READ_WONT_NEED 0x00000800 #define WT_SESSION_CAN_WAIT 0x00000001 #define WT_SESSION_INTERNAL 0x00000002 #define WT_SESSION_LOCKED_CHECKPOINT 0x00000004 diff --git a/src/third_party/wiredtiger/test/suite/test_bug018.py b/src/third_party/wiredtiger/test/suite/test_bug018.py new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_bug018.py |