diff options
author | Keith Bostic <keith@wiredtiger.com> | 2016-02-04 09:42:32 -0500 |
---|---|---|
committer | Keith Bostic <keith@wiredtiger.com> | 2016-02-04 09:42:32 -0500 |
commit | 5c2297305ddf5aab3ded6def9c2cc9087f876f06 (patch) | |
tree | 7798754c20f58285e53fcfe0820710e015c57db9 /src/btree | |
parent | 152839497426d1e894cc69e7cae2123a711364e8 (diff) | |
download | mongo-5c2297305ddf5aab3ded6def9c2cc9087f876f06.tar.gz |
WT-2361: column-store starting record number error
Don't continue searches from the "current" page if a cihld page we're
waiting on splits.
The problem is a thread holding a hazard pointer on a parent page X, and
waiting on a child page A. Then, A is insert-split, moving a chunk of
A's name-space to a new page B, that follows A in the parent page's
page-index. If the parent page X then does a split into its parent, B
could move to the parent of X, and the thread has to search from the
root of the tree, not just from page X, to find the name-space now in
page B.
I've never seen this bug fire (as far as I know), it's improbable, but
I think it's possible.
Diffstat (limited to 'src/btree')
-rw-r--r-- | src/btree/col_srch.c | 18 | ||||
-rw-r--r-- | src/btree/row_srch.c | 18 |
2 files changed, 24 insertions, 12 deletions
diff --git a/src/btree/col_srch.c b/src/btree/col_srch.c index cb5a227495f..84b1e24aa77 100644 --- a/src/btree/col_srch.c +++ b/src/btree/col_srch.c @@ -116,12 +116,12 @@ __wt_col_search(WT_SESSION_IMPL *session, goto leaf_only; } -restart_root: +restart: /* Search the internal pages of the tree. */ current = &btree->root; for (depth = 2, pindex = NULL;; ++depth) { parent_pindex = pindex; -restart_page: page = current->page; + page = current->page; if (page->type != WT_PAGE_COL_INT) break; @@ -141,7 +141,7 @@ restart_page: page = current->page; __wt_split_intl_race( session, current->home, parent_pindex)) { WT_RET(__wt_page_release(session, current, 0)); - goto restart_root; + goto restart; } goto descend; } @@ -178,8 +178,14 @@ descend: /* /* * Swap the current page for the child page. If the page splits - * while we're retrieving it, restart the search in the current - * page; otherwise return on error, the swap call ensures we're + * while we're retrieving it, restart the search at the root. + * We cannot restart in the "current" page; for example, if a + * thread is appending to the tree, the page it's waiting for + * did an insert-split into the parent, then the parent split + * into its parent, the name space we are searching for may have + * moved above the current page in the tree. + * + * On other error, simply return, the swap call ensures we're * holding nothing on failure. */ if ((ret = __wt_page_swap( @@ -188,7 +194,7 @@ descend: /* continue; } if (ret == WT_RESTART) - goto restart_page; + goto restart; return (ret); } diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c index 71564a7b3c5..0e7846bebe5 100644 --- a/src/btree/row_srch.c +++ b/src/btree/row_srch.c @@ -276,12 +276,12 @@ __wt_row_search(WT_SESSION_IMPL *session, goto leaf_only; } +restart: /* Search the internal pages of the tree. */ -restart_root: current = &btree->root; for (depth = 2, pindex = NULL;; ++depth) { parent_pindex = pindex; -restart_page: page = current->page; + page = current->page; if (page->type != WT_PAGE_ROW_INT) break; @@ -426,14 +426,20 @@ append: if (parent_pindex != NULL && return (ret); skiplow = skiphigh = 0; - goto restart_root; + goto restart; } } descend: /* * Swap the current page for the child page. If the page splits - * while we're retrieving it, restart the search in the current - * page; otherwise return on error, the swap call ensures we're + * while we're retrieving it, restart the search at the root. + * We cannot restart in the "current" page; for example, if a + * thread is appending to the tree, the page it's waiting for + * did an insert-split into the parent, then the parent split + * into its parent, the name space we are searching for may have + * moved above the current page in the tree. + * + * On other error, simply return, the swap call ensures we're * holding nothing on failure. */ if ((ret = __wt_page_swap( @@ -443,7 +449,7 @@ descend: /* } if (ret == WT_RESTART) { skiphigh = skiplow = 0; - goto restart_page; + goto restart; } return (ret); } |