summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/btree/bt_cursor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/src/btree/bt_cursor.c')
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c224
1 files changed, 131 insertions, 93 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index 8f4f7982e3d..9d62ef50e92 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -362,13 +362,15 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *valid)
* Column-store search from a cursor.
*/
static inline int
-__cursor_col_search(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_REF *leaf)
+__cursor_col_search(WT_CURSOR_BTREE *cbt, WT_REF *leaf, bool *leaf_foundp)
{
WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_WITH_PAGE_INDEX(session,
- ret = __wt_col_search(session, cbt->iface.recno, leaf, cbt, false));
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+ WT_WITH_PAGE_INDEX(
+ session, ret = __wt_col_search(
+ cbt, cbt->iface.recno, leaf, false, leaf_foundp));
return (ret);
}
@@ -378,12 +380,15 @@ __cursor_col_search(
*/
static inline int
__cursor_row_search(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_REF *leaf, bool insert)
+ WT_CURSOR_BTREE *cbt, bool insert, WT_REF *leaf, bool *leaf_foundp)
{
WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_WITH_PAGE_INDEX(session, ret = __wt_row_search(
- session, &cbt->iface.key, leaf, cbt, insert, false));
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+ WT_WITH_PAGE_INDEX(
+ session, ret = __wt_row_search(
+ cbt, &cbt->iface.key, insert, leaf, false, leaf_foundp));
return (ret);
}
@@ -392,11 +397,10 @@ __cursor_row_search(
* Column-store modify from a cursor, with a separate value.
*/
static inline int
-__cursor_col_modify_v(WT_SESSION_IMPL *session,
- WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
+__cursor_col_modify_v(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
{
- return (__wt_col_modify(session, cbt,
- cbt->iface.recno, value, NULL, modify_type, false));
+ return (__wt_col_modify(
+ cbt, cbt->iface.recno, value, NULL, modify_type, false));
}
/*
@@ -404,11 +408,10 @@ __cursor_col_modify_v(WT_SESSION_IMPL *session,
* Row-store modify from a cursor, with a separate value.
*/
static inline int
-__cursor_row_modify_v(WT_SESSION_IMPL *session,
- WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
+__cursor_row_modify_v(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
{
- return (__wt_row_modify(session, cbt,
- &cbt->iface.key, value, NULL, modify_type, false));
+ return (__wt_row_modify(
+ cbt, &cbt->iface.key, value, NULL, modify_type, false));
}
/*
@@ -416,11 +419,11 @@ __cursor_row_modify_v(WT_SESSION_IMPL *session,
* Column-store modify from a cursor.
*/
static inline int
-__cursor_col_modify(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type)
+__cursor_col_modify(WT_CURSOR_BTREE *cbt, u_int modify_type)
{
- return (__wt_col_modify(session, cbt,
- cbt->iface.recno, &cbt->iface.value, NULL, modify_type, false));
+ return (__wt_col_modify(
+ cbt, cbt->iface.recno, &cbt->iface.value,
+ NULL, modify_type, false));
}
/*
@@ -428,11 +431,11 @@ __cursor_col_modify(
* Row-store modify from a cursor.
*/
static inline int
-__cursor_row_modify(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type)
+__cursor_row_modify(WT_CURSOR_BTREE *cbt, u_int modify_type)
{
- return (__wt_row_modify(session, cbt,
- &cbt->iface.key, &cbt->iface.value, NULL, modify_type, false));
+ return (__wt_row_modify(
+ cbt, &cbt->iface.key, &cbt->iface.value,
+ NULL, modify_type, false));
}
/*
@@ -483,7 +486,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
WT_DECL_RET;
WT_SESSION_IMPL *session;
WT_UPDATE *upd;
- bool valid;
+ bool leaf_found, valid;
btree = cbt->btree;
cursor = &cbt->iface;
@@ -517,19 +520,19 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
__wt_txn_cursor_op(session);
WT_ERR(btree->type == BTREE_ROW ?
- __cursor_row_search(session, cbt, cbt->ref, false) :
- __cursor_col_search(session, cbt, cbt->ref));
+ __cursor_row_search(cbt, false, cbt->ref, &leaf_found) :
+ __cursor_col_search(cbt, cbt->ref, &leaf_found));
/* Return, if prepare conflict encountered. */
- if (cbt->compare == 0)
+ if (leaf_found && cbt->compare == 0)
WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
}
if (!valid) {
WT_ERR(__cursor_func_init(cbt, true));
WT_ERR(btree->type == BTREE_ROW ?
- __cursor_row_search(session, cbt, NULL, false) :
- __cursor_col_search(session, cbt, NULL));
+ __cursor_row_search(cbt, false, NULL, NULL) :
+ __cursor_col_search(cbt, NULL, NULL));
/* Return, if prepare conflict encountered. */
if (cbt->compare == 0)
@@ -537,7 +540,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
}
if (valid)
- ret = __cursor_kv_return(session, cbt, upd);
+ ret = __cursor_kv_return(cbt, upd);
else if (__cursor_fix_implicit(btree, cbt)) {
/*
* Creating a record past the end of the tree in a fixed-length
@@ -554,7 +557,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
#ifdef HAVE_DIAGNOSTIC
if (ret == 0)
- WT_ERR(__wt_cursor_key_order_init(session, cbt));
+ WT_ERR(__wt_cursor_key_order_init(cbt));
#endif
err: if (ret != 0) {
@@ -578,7 +581,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
WT_SESSION_IMPL *session;
WT_UPDATE *upd;
int exact;
- bool valid;
+ bool leaf_found, valid;
btree = cbt->btree;
cursor = &cbt->iface;
@@ -610,34 +613,43 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
* pages in the case of column-store, search-near isn't an interesting
* enough case for column-store to add the complexity needed to avoid
* the tree search.
- *
- * Set the "insert" flag for the btree row-store search; we may intend
- * to position the cursor at the end of the tree, rather than match an
- * existing record.
*/
valid = false;
if (btree->type == BTREE_ROW && __cursor_page_pinned(cbt)) {
__wt_txn_cursor_op(session);
-
- WT_ERR(__cursor_row_search(session, cbt, cbt->ref, true));
+ /*
+ * Set the "insert" flag for the btree row-store search; we may
+ * intend to position the cursor at the end of the tree, rather
+ * than match an existing record.
+ */
+ WT_ERR(__cursor_row_search(cbt, true, cbt->ref, &leaf_found));
/*
- * Search-near is trickier than search when searching an already
- * pinned page. If search returns the first or last page slots,
- * discard the results and search the full tree as the neighbor
- * pages might offer better matches. This test is simplistic as
- * we're ignoring append lists (there may be no page slots or we
- * might be legitimately positioned after the last page slot).
- * Ignore those cases, it makes things too complicated.
+ * Only use the pinned page search results if search returns an
+ * exact match or a slot other than the page's boundary slots,
+ * if that's not the case, a neighbor page might offer a better
+ * match. This test is simplistic as we're ignoring append
+ * lists (there may be no page slots or we might be
+ * legitimately positioned after the last page slot). Ignore
+ * those cases, it makes things too complicated.
*/
- if (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1)
+ if (leaf_found &&
+ (cbt->compare == 0 ||
+ (cbt->slot != 0 &&
+ cbt->slot != cbt->ref->page->entries - 1)))
WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
}
if (!valid) {
WT_ERR(__cursor_func_init(cbt, true));
+
+ /*
+ * Set the "insert" flag for the btree row-store search; we may
+ * intend to position the cursor at the end of the tree, rather
+ * than match an existing record.
+ */
WT_ERR(btree->type == BTREE_ROW ?
- __cursor_row_search(session, cbt, NULL, true) :
- __cursor_col_search(session, cbt, NULL));
+ __cursor_row_search(cbt, true, NULL, NULL) :
+ __cursor_col_search(cbt, NULL, NULL));
WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
}
@@ -660,7 +672,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
*/
if (valid) {
exact = cbt->compare;
- ret = __cursor_kv_return(session, cbt, upd);
+ ret = __cursor_kv_return(cbt, upd);
} else if (__cursor_fix_implicit(btree, cbt)) {
cbt->recno = cursor->recno;
cbt->v = 0;
@@ -711,7 +723,7 @@ err: if (ret == 0 && exactp != NULL)
#ifdef HAVE_DIAGNOSTIC
if (ret == 0)
- WT_TRET(__wt_cursor_key_order_init(session, cbt));
+ WT_TRET(__wt_cursor_key_order_init(cbt));
#endif
if (ret != 0) {
@@ -785,8 +797,8 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt)
*/
cbt->compare = 0;
ret = btree->type == BTREE_ROW ?
- __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD) :
- __cursor_col_modify(session, cbt, WT_UPDATE_STANDARD);
+ __cursor_row_modify(cbt, WT_UPDATE_STANDARD) :
+ __cursor_col_modify(cbt, WT_UPDATE_STANDARD);
if (ret == 0)
goto done;
@@ -814,7 +826,7 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt)
retry: WT_ERR(__cursor_func_init(cbt, true));
if (btree->type == BTREE_ROW) {
- WT_ERR(__cursor_row_search(session, cbt, NULL, true));
+ WT_ERR(__cursor_row_search(cbt, true, NULL, NULL));
/*
* If not overwriting, fail if the key exists, else insert the
* key/value pair.
@@ -826,7 +838,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
WT_ERR(WT_DUPLICATE_KEY);
}
- ret = __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD);
+ ret = __cursor_row_modify(cbt, WT_UPDATE_STANDARD);
} else if (append_key) {
/*
* Optionally insert a new record (ignoring the application's
@@ -835,11 +847,11 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
*/
cbt->iface.recno = WT_RECNO_OOB;
cbt->compare = 1;
- WT_ERR(__cursor_col_search(session, cbt, NULL));
- WT_ERR(__cursor_col_modify(session, cbt, WT_UPDATE_STANDARD));
+ WT_ERR(__cursor_col_search(cbt, NULL, NULL));
+ WT_ERR(__cursor_col_modify(cbt, WT_UPDATE_STANDARD));
cursor->recno = cbt->recno;
} else {
- WT_ERR(__cursor_col_search(session, cbt, NULL));
+ WT_ERR(__cursor_col_search(cbt, NULL, NULL));
/*
* If not overwriting, fail if the key exists. Creating a
@@ -856,7 +868,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
WT_ERR(WT_DUPLICATE_KEY);
}
- WT_ERR(__cursor_col_modify(session, cbt, WT_UPDATE_STANDARD));
+ WT_ERR(__cursor_col_modify(cbt, WT_UPDATE_STANDARD));
}
err: if (ret == WT_RESTART) {
@@ -940,7 +952,7 @@ __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt)
__cursor_novalue(cursor);
retry: WT_ERR(__cursor_func_init(cbt, true));
- WT_ERR(__cursor_row_search(session, cbt, NULL, true));
+ WT_ERR(__cursor_row_search(cbt, true, NULL, NULL));
/* Just check for conflicts. */
ret = __curfile_update_check(cbt);
@@ -1045,8 +1057,8 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt)
*/
cbt->compare = 0;
ret = btree->type == BTREE_ROW ?
- __cursor_row_modify(session, cbt, WT_UPDATE_TOMBSTONE) :
- __cursor_col_modify(session, cbt, WT_UPDATE_TOMBSTONE);
+ __cursor_row_modify(cbt, WT_UPDATE_TOMBSTONE) :
+ __cursor_col_modify(cbt, WT_UPDATE_TOMBSTONE);
if (ret == 0)
goto done;
goto err;
@@ -1068,7 +1080,7 @@ retry: if (positioned == POSITIONED)
WT_ERR(__cursor_func_init(cbt, true));
if (btree->type == BTREE_ROW) {
- WT_ERR(__cursor_row_search(session, cbt, NULL, false));
+ WT_ERR(__cursor_row_search(cbt, false, NULL, NULL));
/* Check whether an update would conflict. */
WT_ERR(__curfile_update_check(cbt));
@@ -1079,9 +1091,9 @@ retry: if (positioned == POSITIONED)
if (!valid)
WT_ERR(WT_NOTFOUND);
- ret = __cursor_row_modify(session, cbt, WT_UPDATE_TOMBSTONE);
+ ret = __cursor_row_modify(cbt, WT_UPDATE_TOMBSTONE);
} else {
- WT_ERR(__cursor_col_search(session, cbt, NULL));
+ WT_ERR(__cursor_col_search(cbt, NULL, NULL));
/*
* If we find a matching record, check whether an update would
@@ -1109,8 +1121,7 @@ retry: if (positioned == POSITIONED)
*/
cbt->recno = cursor->recno;
} else
- ret = __cursor_col_modify(
- session, cbt, WT_UPDATE_TOMBSTONE);
+ ret = __cursor_col_modify(cbt, WT_UPDATE_TOMBSTONE);
}
err: if (ret == WT_RESTART) {
@@ -1139,7 +1150,7 @@ done: switch (positioned) {
* Positioned and we did a search anyway, get a key to
* return.
*/
- WT_TRET(__wt_key_return(session, cbt));
+ WT_TRET(__wt_key_return(cbt));
break;
}
}
@@ -1194,7 +1205,7 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
WT_DECL_RET;
WT_SESSION_IMPL *session;
uint64_t yield_count, sleep_usecs;
- bool valid;
+ bool leaf_found, valid;
btree = cbt->btree;
cursor = &cbt->iface;
@@ -1227,8 +1238,8 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
*/
cbt->compare = 0;
ret = btree->type == BTREE_ROW ?
- __cursor_row_modify_v(session, cbt, value, modify_type) :
- __cursor_col_modify_v(session, cbt, value, modify_type);
+ __cursor_row_modify_v(cbt, value, modify_type) :
+ __cursor_col_modify_v(cbt, value, modify_type);
if (ret == 0)
goto done;
@@ -1253,11 +1264,38 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
WT_ERR(__cursor_localvalue(cursor));
__cursor_state_save(cursor, &state);
-retry: WT_ERR(__cursor_func_init(cbt, true));
+ /*
+ * If our caller configures for a local search and we have a page
+ * pinned, do that search.
+ */
+ if (F_ISSET(cursor, WT_CURSTD_UPDATE_LOCAL)
+ && __cursor_page_pinned(cbt)) {
+ __wt_txn_cursor_op(session);
+ WT_ERR(__wt_txn_autocommit_check(session));
+ WT_ERR(btree->type == BTREE_ROW ?
+ __cursor_row_search(cbt, true, cbt->ref, &leaf_found) :
+ __cursor_col_search(cbt, cbt->ref, &leaf_found));
+ /*
+ * Only use the pinned page search results if search returns an
+ * exact match or a slot other than the page's boundary slots,
+ * if that's not the case, a neighbor page might offer a better
+ * match. This test is simplistic as we're ignoring append
+ * lists (there may be no page slots or we might be
+ * legitimately positioned after the last page slot). Ignore
+ * those cases, it makes things too complicated.
+ */
+ if (leaf_found && (cbt->compare == 0
+ || (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1)))
+ goto update_local;
+ }
+retry:
+ WT_ERR(__cursor_func_init(cbt, true));
+ WT_ERR(btree->type == BTREE_ROW ?
+ __cursor_row_search(cbt, true, NULL, NULL) :
+ __cursor_col_search(cbt, NULL, NULL));
+update_local:
if (btree->type == BTREE_ROW) {
- WT_ERR(__cursor_row_search(session, cbt, NULL, true));
-
/*
* If not overwriting, check for conflicts and fail if the key
* does not exist.
@@ -1270,10 +1308,8 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
if (!valid)
WT_ERR(WT_NOTFOUND);
}
- ret = __cursor_row_modify_v(session, cbt, value, modify_type);
+ ret = __cursor_row_modify_v(cbt, value, modify_type);
} else {
- WT_ERR(__cursor_col_search(session, cbt, NULL));
-
/*
* If not overwriting, fail if the key doesn't exist. If we
* find an update for the key, check for conflicts. Update the
@@ -1291,7 +1327,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
!__cursor_fix_implicit(btree, cbt))
WT_ERR(WT_NOTFOUND);
}
- ret = __cursor_col_modify_v(session, cbt, value, modify_type);
+ ret = __cursor_col_modify_v(cbt, value, modify_type);
}
err: if (ret == WT_RESTART) {
@@ -1313,8 +1349,7 @@ done: switch (modify_type) {
/*
* WT_CURSOR.update returns a key and a value.
*/
- ret = __cursor_kv_return(
- session, cbt, cbt->modify_update);
+ ret = __cursor_kv_return(cbt, cbt->modify_update);
break;
case WT_UPDATE_RESERVE:
/*
@@ -1327,7 +1362,7 @@ done: switch (modify_type) {
* WT_CURSOR.modify has already created the return value
* and our job is to leave it untouched.
*/
- ret = __wt_key_return(session, cbt);
+ ret = __wt_key_return(cbt);
break;
case WT_UPDATE_BIRTHMARK:
case WT_UPDATE_TOMBSTONE:
@@ -1670,13 +1705,15 @@ __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp)
* tree.
*/
static int
-__cursor_truncate(WT_SESSION_IMPL *session,
+__cursor_truncate(
WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop,
- int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int))
+ int (*rmfunc)(WT_CURSOR_BTREE *, u_int))
{
WT_DECL_RET;
+ WT_SESSION_IMPL *session;
uint64_t yield_count, sleep_usecs;
+ session = (WT_SESSION_IMPL *)start->iface.session;
yield_count = sleep_usecs = 0;
/*
@@ -1698,12 +1735,13 @@ __cursor_truncate(WT_SESSION_IMPL *session,
* instantiated the end cursor, so we know that page is pinned in memory
* and we can proceed without concern.
*/
-retry: WT_ERR(__wt_btcur_search(start));
+retry:
+ WT_ERR(__wt_btcur_search(start));
WT_ASSERT(session,
F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT);
for (;;) {
- WT_ERR(rmfunc(session, start, WT_UPDATE_TOMBSTONE));
+ WT_ERR(rmfunc(start, WT_UPDATE_TOMBSTONE));
if (stop != NULL && __cursor_equals(start, stop))
return (0);
@@ -1727,14 +1765,16 @@ err: if (ret == WT_RESTART) {
* Discard a cursor range from fixed-width column-store tree.
*/
static int
-__cursor_truncate_fix(WT_SESSION_IMPL *session,
+__cursor_truncate_fix(
WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop,
- int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int))
+ int (*rmfunc)(WT_CURSOR_BTREE *, u_int))
{
WT_DECL_RET;
+ WT_SESSION_IMPL *session;
uint64_t yield_count, sleep_usecs;
const uint8_t *value;
+ session = (WT_SESSION_IMPL *)start->iface.session;
yield_count = sleep_usecs = 0;
/*
@@ -1763,7 +1803,7 @@ retry: WT_ERR(__wt_btcur_search(start));
for (;;) {
value = (const uint8_t *)start->iface.value.data;
if (*value != 0)
- WT_ERR(rmfunc(session, start, WT_UPDATE_TOMBSTONE));
+ WT_ERR(rmfunc(start, WT_UPDATE_TOMBSTONE));
if (stop != NULL && __cursor_equals(start, stop))
return (0);
@@ -1797,6 +1837,7 @@ __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
btree = start->btree;
WT_STAT_DATA_INCR(session, cursor_truncate);
+ WT_RET(__wt_txn_autocommit_check(session));
/*
* For recovery, log the start and stop keys for a truncate operation,
* not the individual records removed. On the other hand, for rollback
@@ -1811,12 +1852,10 @@ __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
switch (btree->type) {
case BTREE_COL_FIX:
- WT_ERR(__cursor_truncate_fix(
- session, start, stop, __cursor_col_modify));
+ WT_ERR(__cursor_truncate_fix(start, stop, __cursor_col_modify));
break;
case BTREE_COL_VAR:
- WT_ERR(__cursor_truncate(
- session, start, stop, __cursor_col_modify));
+ WT_ERR(__cursor_truncate(start, stop, __cursor_col_modify));
break;
case BTREE_ROW:
/*
@@ -1831,8 +1870,7 @@ __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
* that ever changes, we'd need to do something here to ensure a
* fully instantiated cursor.
*/
- WT_ERR(__cursor_truncate(
- session, start, stop, __cursor_row_modify));
+ WT_ERR(__cursor_truncate(start, stop, __cursor_row_modify));
break;
}