summaryrefslogtreecommitdiff
path: root/src/btree
diff options
context:
space:
mode:
authorKeith Bostic <keith@wiredtiger.com>2016-02-16 00:37:22 +0000
committerKeith Bostic <keith@wiredtiger.com>2016-02-16 00:37:22 +0000
commitd3893dbcbff0b0b03f6d0b83eb6651f1810cc2c6 (patch)
treecd1dbb33e40398d0d4a64bbb9f6efc47826a209f /src/btree
parent642b4cdeebdac14c15a27f08084b9e43c3ed18cb (diff)
downloadmongo-d3893dbcbff0b0b03f6d0b83eb6651f1810cc2c6.tar.gz
WT-2397: Cursor traversal from end of the tree skips records.
If we're half-way down the tree, and get a RESTART trying to swap to a new page, and the reason we get the RESTART is that our page split into our parent, and caused our parent to split into its parent, we can't use our parent's page-index, because it's been truncated. Do a full restart. Simplify the __page_descend_prev() and __page_initial_descent_prev() functions, they don't need to set the slot, the caller can do that, and remove an unnecessary local variable.
Diffstat (limited to 'src/btree')
-rw-r--r--src/btree/bt_walk.c38
1 files changed, 25 insertions, 13 deletions
diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c
index d5a57406a37..4956bcbb20e 100644
--- a/src/btree/bt_walk.c
+++ b/src/btree/bt_walk.c
@@ -167,8 +167,8 @@ __page_ascend(WT_SESSION_IMPL *session,
* Descend the tree one level, during a previous-cursor walk.
*/
static void
-__page_descend_prev(WT_SESSION_IMPL *session,
- WT_REF *ref, WT_PAGE_INDEX **pindexp, uint32_t *slotp)
+__page_descend_prev(
+ WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX **pindexp)
{
WT_PAGE_INDEX *pindex;
@@ -232,8 +232,7 @@ __page_descend_prev(WT_SESSION_IMPL *session,
* wait until the split page's page index is updated.
*/
WT_INTL_INDEX_GET(session, ref->page, pindex);
- *slotp = pindex->entries - 1;
- if (pindex->index[*slotp]->home == ref->page)
+ if (pindex->index[pindex->entries - 1]->home == ref->page)
break;
}
*pindexp = pindex;
@@ -245,19 +244,20 @@ __page_descend_prev(WT_SESSION_IMPL *session,
* for a previous-cursor walk.
*/
static bool
-__page_initial_descent_prev(WT_SESSION_IMPL *session,
- WT_REF *ref, WT_PAGE_INDEX **pindexp, uint32_t *slotp)
+__page_initial_descent_prev(
+ WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX **pindexp)
{
- WT_PAGE_INDEX *parent_pindex, *pindex;
+ WT_PAGE_INDEX *pindex;
/*
* We're passed a child page into which we're descending, and on which
* we have a hazard pointer.
+ *
+ * Acquire a page index for the child page and then confirm we haven't
+ * raced with a parent split.
*/
- parent_pindex = *pindexp;
WT_INTL_INDEX_GET(session, ref->page, pindex);
- *slotp = pindex->entries - 1;
- if (__wt_split_descent_race(session, ref, parent_pindex))
+ if (__wt_split_descent_race(session, ref, *pindexp))
return (false);
*pindexp = pindex;
@@ -547,6 +547,15 @@ restart: /*
ret = 0;
/*
+ * If a cursor is setting up at the end of the
+ * tree, we can't use our parent page's index,
+ * because it may have already split; restart
+ * the walk.
+ */
+ if (prev && initial_descent)
+ goto restart;
+
+ /*
* If a new walk that never coupled from the
* root to a new saved position in the tree,
* restart the walk.
@@ -615,11 +624,14 @@ descend: couple = ref;
slot = 0;
} else if (initial_descent) {
if (!__page_initial_descent_prev(
- session, ref, &pindex, &slot))
+ session, ref, &pindex))
goto restart;
- } else
+ slot = pindex->entries - 1;
+ } else {
__page_descend_prev(
- session, ref, &pindex, &slot);
+ session, ref, &pindex);
+ slot = pindex->entries - 1;
+ }
} else {
/*
* At the lowest tree level (considering a leaf