summaryrefslogtreecommitdiff
path: root/src/btree
diff options
context:
space:
mode:
authorKeith Bostic <keith@wiredtiger.com>2016-02-04 09:42:32 -0500
committerKeith Bostic <keith@wiredtiger.com>2016-02-04 09:42:32 -0500
commit5c2297305ddf5aab3ded6def9c2cc9087f876f06 (patch)
tree7798754c20f58285e53fcfe0820710e015c57db9 /src/btree
parent152839497426d1e894cc69e7cae2123a711364e8 (diff)
downloadmongo-5c2297305ddf5aab3ded6def9c2cc9087f876f06.tar.gz
WT-2361: column-store starting record number error
Don't continue searches from the "current" page if a cihld page we're waiting on splits. The problem is a thread holding a hazard pointer on a parent page X, and waiting on a child page A. Then, A is insert-split, moving a chunk of A's name-space to a new page B, that follows A in the parent page's page-index. If the parent page X then does a split into its parent, B could move to the parent of X, and the thread has to search from the root of the tree, not just from page X, to find the name-space now in page B. I've never seen this bug fire (as far as I know), it's improbable, but I think it's possible.
Diffstat (limited to 'src/btree')
-rw-r--r--src/btree/col_srch.c18
-rw-r--r--src/btree/row_srch.c18
2 files changed, 24 insertions, 12 deletions
diff --git a/src/btree/col_srch.c b/src/btree/col_srch.c
index cb5a227495f..84b1e24aa77 100644
--- a/src/btree/col_srch.c
+++ b/src/btree/col_srch.c
@@ -116,12 +116,12 @@ __wt_col_search(WT_SESSION_IMPL *session,
goto leaf_only;
}
-restart_root:
+restart:
/* Search the internal pages of the tree. */
current = &btree->root;
for (depth = 2, pindex = NULL;; ++depth) {
parent_pindex = pindex;
-restart_page: page = current->page;
+ page = current->page;
if (page->type != WT_PAGE_COL_INT)
break;
@@ -141,7 +141,7 @@ restart_page: page = current->page;
__wt_split_intl_race(
session, current->home, parent_pindex)) {
WT_RET(__wt_page_release(session, current, 0));
- goto restart_root;
+ goto restart;
}
goto descend;
}
@@ -178,8 +178,14 @@ descend: /*
/*
* Swap the current page for the child page. If the page splits
- * while we're retrieving it, restart the search in the current
- * page; otherwise return on error, the swap call ensures we're
+ * while we're retrieving it, restart the search at the root.
+ * We cannot restart in the "current" page; for example, if a
+ * thread is appending to the tree, the page it's waiting for
+ * did an insert-split into the parent, then the parent split
+ * into its parent, the name space we are searching for may have
+ * moved above the current page in the tree.
+ *
+ * On other error, simply return, the swap call ensures we're
* holding nothing on failure.
*/
if ((ret = __wt_page_swap(
@@ -188,7 +194,7 @@ descend: /*
continue;
}
if (ret == WT_RESTART)
- goto restart_page;
+ goto restart;
return (ret);
}
diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c
index 71564a7b3c5..0e7846bebe5 100644
--- a/src/btree/row_srch.c
+++ b/src/btree/row_srch.c
@@ -276,12 +276,12 @@ __wt_row_search(WT_SESSION_IMPL *session,
goto leaf_only;
}
+restart:
/* Search the internal pages of the tree. */
-restart_root:
current = &btree->root;
for (depth = 2, pindex = NULL;; ++depth) {
parent_pindex = pindex;
-restart_page: page = current->page;
+ page = current->page;
if (page->type != WT_PAGE_ROW_INT)
break;
@@ -426,14 +426,20 @@ append: if (parent_pindex != NULL &&
return (ret);
skiplow = skiphigh = 0;
- goto restart_root;
+ goto restart;
}
}
descend: /*
* Swap the current page for the child page. If the page splits
- * while we're retrieving it, restart the search in the current
- * page; otherwise return on error, the swap call ensures we're
+ * while we're retrieving it, restart the search at the root.
+ * We cannot restart in the "current" page; for example, if a
+ * thread is appending to the tree, the page it's waiting for
+ * did an insert-split into the parent, then the parent split
+ * into its parent, the name space we are searching for may have
+ * moved above the current page in the tree.
+ *
+ * On other error, simply return, the swap call ensures we're
* holding nothing on failure.
*/
if ((ret = __wt_page_swap(
@@ -443,7 +449,7 @@ descend: /*
}
if (ret == WT_RESTART) {
skiphigh = skiplow = 0;
- goto restart_page;
+ goto restart;
}
return (ret);
}