summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/btree/bt_cursor.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/src/btree/bt_cursor.c')
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c556
1 files changed, 424 insertions, 132 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index 5fde2237538..944e276fc01 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -9,6 +9,84 @@
#include "wt_internal.h"
/*
+ * When returning an error, we need to restore the cursor to a valid state, the
+ * upper-level cursor code is likely to retry. This structure and the associated
+ * functions are used save and restore the cursor state.
+ */
+typedef struct {
+ WT_ITEM key;
+ WT_ITEM value;
+ uint64_t recno;
+ uint32_t flags;
+} WT_CURFILE_STATE;
+
+/*
+ * __cursor_state_save --
+ * Save the cursor's external state.
+ */
+static inline void
+__cursor_state_save(WT_CURSOR *cursor, WT_CURFILE_STATE *state)
+{
+ WT_ITEM_SET(state->key, cursor->key);
+ WT_ITEM_SET(state->value, cursor->value);
+ state->recno = cursor->recno;
+ state->flags = cursor->flags;
+}
+
+/*
+ * __cursor_state_restore --
+ * Restore the cursor's external state.
+ */
+static inline void
+__cursor_state_restore(WT_CURSOR *cursor, WT_CURFILE_STATE *state)
+{
+ if (F_ISSET(state, WT_CURSTD_KEY_EXT))
+ WT_ITEM_SET(cursor->key, state->key);
+ if (F_ISSET(state, WT_CURSTD_VALUE_EXT))
+ WT_ITEM_SET(cursor->value, state->value);
+ cursor->recno = state->recno;
+ F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+ F_SET(cursor, F_MASK(state, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT));
+
+}
+
+/*
+ * __cursor_page_pinned --
+ * Return if we have a page pinned and it's not been flagged for forced
+ * eviction (the forced eviction test is so we periodically release pages
+ * grown too large).
+ */
+static inline bool
+__cursor_page_pinned(WT_CURSOR_BTREE *cbt)
+{
+ return (F_ISSET(cbt, WT_CBT_ACTIVE) &&
+ cbt->ref->page->read_gen != WT_READGEN_OLDEST);
+}
+
+/*
+ * __cursor_copy_int_key --
+ * If we're pointing into the tree, save the key into local memory.
+ */
+static inline int
+__cursor_copy_int_key(WT_CURSOR *cursor)
+{
+ /*
+ * We're about to discard the cursor's position and the cursor layer
+ * might retry the operation. We discard pinned pages on error, which
+ * will invalidate pinned keys. Clear WT_CURSTD_KEY_INT in all cases,
+ * the underlying page is gone whether we can allocate memory or not.
+ */
+ if (F_ISSET(cursor, WT_CURSTD_KEY_INT)) {
+ F_CLR(cursor, WT_CURSTD_KEY_INT);
+ if (!WT_DATA_IN_ITEM(&cursor->key))
+ WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session,
+ &cursor->key, cursor->key.data, cursor->key.size));
+ F_SET(cursor, WT_CURSTD_KEY_EXT);
+ }
+ return (0);
+}
+
+/*
* __cursor_size_chk --
* Return if an inserted item is too large.
*/
@@ -55,6 +133,34 @@ __cursor_size_chk(WT_SESSION_IMPL *session, WT_ITEM *kv)
}
/*
+ * __cursor_disable_bulk --
+ * Disable bulk loads into a tree.
+ */
+static inline void
+__cursor_disable_bulk(WT_SESSION_IMPL *session, WT_BTREE *btree)
+{
+ /*
+ * Once a tree (other than the LSM primary) is no longer empty, eviction
+ * should pay attention to it, and it's no longer possible to bulk-load
+ * into it.
+ */
+ if (!btree->original)
+ return;
+ if (btree->lsm_primary) {
+ btree->original = 0; /* Make the next test faster. */
+ return;
+ }
+
+ /*
+ * We use a compare-and-swap here to avoid races among the first inserts
+ * into a tree. Eviction is disabled when an empty tree is opened, and
+ * it must only be enabled once.
+ */
+ if (__wt_atomic_cas8(&btree->original, 1, 0))
+ __wt_evict_file_exclusive_off(session);
+}
+
+/*
* __cursor_fix_implicit --
* Return if search went past the end of the tree.
*/
@@ -285,13 +391,17 @@ __cursor_row_modify(
int
__wt_btcur_reset(WT_CURSOR_BTREE *cbt)
{
+ WT_CURSOR *cursor;
WT_SESSION_IMPL *session;
+ cursor = &cbt->iface;
session = (WT_SESSION_IMPL *)cbt->iface.session;
WT_STAT_CONN_INCR(session, cursor_reset);
WT_STAT_DATA_INCR(session, cursor_reset);
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+
return (__cursor_reset(cbt));
}
@@ -303,6 +413,7 @@ int
__wt_btcur_search(WT_CURSOR_BTREE *cbt)
{
WT_BTREE *btree;
+ WT_CURFILE_STATE state;
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
@@ -317,14 +428,22 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
WT_STAT_CONN_INCR(session, cursor_search);
WT_STAT_DATA_INCR(session, cursor_search);
+ __cursor_state_save(cursor, &state);
+
+ /*
+ * The pinned page goes away if we do a search, make sure there's a
+ * local copy of any key, then re-save the cursor state.
+ */
+ WT_ERR(__cursor_copy_int_key(cursor));
+ __cursor_state_save(cursor, &state);
+
/*
* If we have a page pinned, search it; if we don't have a page pinned,
* or the search of the pinned page doesn't find an exact match, search
* from the root.
*/
valid = false;
- if (F_ISSET(cbt, WT_CBT_ACTIVE) &&
- cbt->ref->page->read_gen != WT_READGEN_OLDEST) {
+ if (__cursor_page_pinned(cbt)) {
__wt_txn_cursor_op(session);
WT_ERR(btree->type == BTREE_ROW ?
@@ -352,6 +471,8 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
cbt->v = 0;
cursor->value.data = &cbt->v;
cursor->value.size = 1;
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
} else
ret = WT_NOTFOUND;
@@ -360,8 +481,10 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
WT_ERR(__wt_cursor_key_order_init(session, cbt));
#endif
-err: if (ret != 0)
+err: if (ret != 0) {
WT_TRET(__cursor_reset(cbt));
+ __cursor_state_restore(cursor, &state);
+ }
return (ret);
}
@@ -373,6 +496,7 @@ int
__wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
{
WT_BTREE *btree;
+ WT_CURFILE_STATE state;
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
@@ -389,6 +513,15 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
WT_STAT_CONN_INCR(session, cursor_search_near);
WT_STAT_DATA_INCR(session, cursor_search_near);
+ __cursor_state_save(cursor, &state);
+
+ /*
+ * The pinned page goes away if we do a search, make sure there's a
+ * local copy of any key, then re-save the cursor state.
+ */
+ WT_ERR(__cursor_copy_int_key(cursor));
+ __cursor_state_save(cursor, &state);
+
/*
* If we have a row-store page pinned, search it; if we don't have a
* page pinned, or the search of the pinned page doesn't find an exact
@@ -402,9 +535,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
* existing record.
*/
valid = false;
- if (btree->type == BTREE_ROW &&
- F_ISSET(cbt, WT_CBT_ACTIVE) &&
- cbt->ref->page->read_gen != WT_READGEN_OLDEST) {
+ if (btree->type == BTREE_ROW && __cursor_page_pinned(cbt)) {
__wt_txn_cursor_op(session);
WT_ERR(__cursor_row_search(session, cbt, cbt->ref, true));
@@ -455,6 +586,8 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
cursor->value.data = &cbt->v;
cursor->value.size = 1;
exact = 0;
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
} else if ((ret = __wt_btcur_next(cbt, false)) != WT_NOTFOUND)
exact = 1;
else {
@@ -469,15 +602,18 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
exact = -1;
}
+err: if (ret == 0 && exactp != NULL)
+ *exactp = exact;
+
#ifdef HAVE_DIAGNOSTIC
if (ret == 0)
- WT_ERR(__wt_cursor_key_order_init(session, cbt));
+ WT_TRET(__wt_cursor_key_order_init(session, cbt));
#endif
-err: if (ret != 0)
+ if (ret != 0) {
WT_TRET(__cursor_reset(cbt));
- if (exactp != NULL && (ret == 0 || ret == WT_NOTFOUND))
- *exactp = exact;
+ __cursor_state_restore(cursor, &state);
+ }
return (ret);
}
@@ -489,9 +625,11 @@ int
__wt_btcur_insert(WT_CURSOR_BTREE *cbt)
{
WT_BTREE *btree;
+ WT_CURFILE_STATE state;
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
+ bool append_key;
btree = cbt->btree;
cursor = &cbt->iface;
@@ -502,30 +640,86 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt)
WT_STAT_DATA_INCRV(session,
cursor_insert_bytes, cursor->key.size + cursor->value.size);
+ __cursor_state_save(cursor, &state);
+
if (btree->type == BTREE_ROW)
WT_RET(__cursor_size_chk(session, &cursor->key));
WT_RET(__cursor_size_chk(session, &cursor->value));
+ /* It's no longer possible to bulk-load into the tree. */
+ __cursor_disable_bulk(session, btree);
+
+ /*
+ * Insert a new record if WT_CURSTD_APPEND configured, (ignoring any
+ * application set record number). Although append can't be configured
+ * for a row-store, this code would break if it were, and that's owned
+ * by the upper cursor layer, be cautious.
+ */
+ append_key =
+ F_ISSET(cursor, WT_CURSTD_APPEND) && btree->type != BTREE_ROW;
+
/*
- * The tree is no longer empty: eviction should pay attention to it,
- * and it's no longer possible to bulk-load into it.
+ * If inserting with overwrite configured, and positioned to an on-page
+ * key, the update doesn't require another search. The cursor won't be
+ * positioned on a page with an external key set, but be sure. Cursors
+ * configured for append aren't included, regardless of whether or not
+ * they meet all other criteria.
*/
- if (btree->bulk_load_ok) {
- btree->bulk_load_ok = false;
- __wt_btree_evictable(session, true);
+ if (__cursor_page_pinned(cbt) &&
+ F_ISSET_ALL(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_OVERWRITE) &&
+ !append_key) {
+ WT_ERR(__wt_txn_autocommit_check(session));
+ /*
+ * The cursor position may not be exact (the cursor's comparison
+ * value not equal to zero). Correct to an exact match so we can
+ * update whatever we're pointing at.
+ */
+ cbt->compare = 0;
+ ret = btree->type == BTREE_ROW ?
+ __cursor_row_modify(session, cbt, false) :
+ __cursor_col_modify(session, cbt, false);
+ if (ret == 0)
+ goto done;
+
+ /*
+ * The pinned page goes away if we fail for any reason, make
+ * sure there's a local copy of any key. (Restart could still
+ * use the pinned page, but that's an unlikely path.) Re-save
+ * the cursor state: we may retry but eventually fail.
+ */
+ WT_TRET(__cursor_copy_int_key(cursor));
+ __cursor_state_save(cursor, &state);
+ goto err;
}
-retry: WT_RET(__cursor_func_init(cbt, true));
+ /*
+ * The pinned page goes away if we do a search, make sure there's a
+ * local copy of any key. Re-save the cursor state: we may retry but
+ * eventually fail.
+ */
+ WT_ERR(__cursor_copy_int_key(cursor));
+ __cursor_state_save(cursor, &state);
- switch (btree->type) {
- case BTREE_COL_FIX:
- case BTREE_COL_VAR:
+retry: WT_ERR(__cursor_func_init(cbt, true));
+
+ if (btree->type == BTREE_ROW) {
+ WT_ERR(__cursor_row_search(session, cbt, NULL, true));
/*
- * If WT_CURSTD_APPEND is set, insert a new record (ignoring
- * the application's record number). The real record number
- * is assigned by the serialized append operation.
+ * If not overwriting, fail if the key exists, else insert the
+ * key/value pair.
*/
- if (F_ISSET(cursor, WT_CURSTD_APPEND))
+ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) &&
+ cbt->compare == 0 && __wt_cursor_valid(cbt, NULL))
+ WT_ERR(WT_DUPLICATE_KEY);
+
+ ret = __cursor_row_modify(session, cbt, false);
+ } else {
+ /*
+ * Optionally insert a new record (ignoring the application's
+ * record number). The real record number is allocated by the
+ * serialized append operation.
+ */
+ if (append_key)
cbt->iface.recno = WT_RECNO_OOB;
WT_ERR(__cursor_col_search(session, cbt, NULL));
@@ -542,21 +736,9 @@ retry: WT_RET(__cursor_func_init(cbt, true));
WT_ERR(WT_DUPLICATE_KEY);
WT_ERR(__cursor_col_modify(session, cbt, false));
- if (F_ISSET(cursor, WT_CURSTD_APPEND))
- cbt->iface.recno = cbt->recno;
- break;
- case BTREE_ROW:
- WT_ERR(__cursor_row_search(session, cbt, NULL, true));
- /*
- * If not overwriting, fail if the key exists, else insert the
- * key/value pair.
- */
- if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) &&
- cbt->compare == 0 && __wt_cursor_valid(cbt, NULL))
- WT_ERR(WT_DUPLICATE_KEY);
- ret = __cursor_row_modify(session, cbt, false);
- break;
+ if (append_key)
+ cbt->iface.recno = cbt->recno;
}
err: if (ret == WT_RESTART) {
@@ -564,11 +746,17 @@ err: if (ret == WT_RESTART) {
WT_STAT_DATA_INCR(session, cursor_restart);
goto retry;
}
- /* Insert doesn't maintain a position across calls, clear resources. */
- if (ret == 0)
- WT_TRET(__curfile_leave(cbt));
+
+done: /* Insert doesn't maintain a position across calls, clear resources. */
+ if (ret == 0) {
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ if (append_key)
+ F_SET(cursor, WT_CURSTD_KEY_INT);
+ }
+ WT_TRET(__cursor_reset(cbt));
if (ret != 0)
- WT_TRET(__cursor_reset(cbt));
+ __cursor_state_restore(cursor, &state);
+
return (ret);
}
@@ -604,16 +792,15 @@ __curfile_update_check(WT_CURSOR_BTREE *cbt)
}
/*
- * __wt_btcur_update_check --
+ * __wt_btcur_insert_check --
* Check whether an update would conflict.
*
- * This can be used to replace WT_CURSOR::insert or WT_CURSOR::update, so
- * they only check for conflicts without updating the tree. It is used to
- * maintain snapshot isolation for transactions that span multiple chunks
- * in an LSM tree.
+ * This can replace WT_CURSOR::insert, so it only checks for conflicts without
+ * updating the tree. It is used to maintain snapshot isolation for transactions
+ * that span multiple chunks in an LSM tree.
*/
int
-__wt_btcur_update_check(WT_CURSOR_BTREE *cbt)
+__wt_btcur_insert_check(WT_CURSOR_BTREE *cbt)
{
WT_BTREE *btree;
WT_CURSOR *cursor;
@@ -624,31 +811,35 @@ __wt_btcur_update_check(WT_CURSOR_BTREE *cbt)
btree = cbt->btree;
session = (WT_SESSION_IMPL *)cursor->session;
-retry: WT_RET(__cursor_func_init(cbt, true));
+ /*
+ * The pinned page goes away if we do a search, make sure there's a
+ * local copy of any key. Unlike most of the btree cursor routines,
+ * we don't have to save/restore the cursor key state, none of the
+ * work done here changes the key state.
+ */
+ WT_ERR(__cursor_copy_int_key(cursor));
- switch (btree->type) {
- case BTREE_ROW:
+retry: WT_ERR(__cursor_func_init(cbt, true));
+
+ if (btree->type == BTREE_ROW) {
WT_ERR(__cursor_row_search(session, cbt, NULL, true));
- /*
- * Just check for conflicts.
- */
+ /* Just check for conflicts. */
ret = __curfile_update_check(cbt);
- break;
- case BTREE_COL_FIX:
- case BTREE_COL_VAR:
+ } else
WT_ERR(__wt_illegal_value(session, NULL));
- break;
- }
err: if (ret == WT_RESTART) {
WT_STAT_CONN_INCR(session, cursor_restart);
WT_STAT_DATA_INCR(session, cursor_restart);
goto retry;
}
- WT_TRET(__curfile_leave(cbt));
- if (ret != 0)
- WT_TRET(__cursor_reset(cbt));
+
+ /* Insert doesn't maintain a position across calls, clear resources. */
+ if (ret == 0)
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ WT_TRET(__cursor_reset(cbt));
+
return (ret);
}
@@ -660,9 +851,11 @@ int
__wt_btcur_remove(WT_CURSOR_BTREE *cbt)
{
WT_BTREE *btree;
+ WT_CURFILE_STATE state;
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
+ bool positioned;
btree = cbt->btree;
cursor = &cbt->iface;
@@ -672,11 +865,69 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt)
WT_STAT_DATA_INCR(session, cursor_remove);
WT_STAT_DATA_INCRV(session, cursor_remove_bytes, cursor->key.size);
-retry: WT_RET(__cursor_func_init(cbt, true));
+ __cursor_state_save(cursor, &state);
- switch (btree->type) {
- case BTREE_COL_FIX:
- case BTREE_COL_VAR:
+ /*
+ * WT_CURSOR.remove has a unique semantic, the cursor stays positioned
+ * if it starts positioned, otherwise clear the cursor on completion.
+ */
+ positioned = F_ISSET(cursor, WT_CURSTD_KEY_INT);
+
+ /*
+ * If remove positioned to an on-page key, the remove doesn't require
+ * another search. We don't care about the "overwrite" configuration
+ * because regardless of the overwrite setting, any existing record is
+ * removed, and the record must exist with a positioned cursor. The
+ * cursor won't be positioned on a page with an external key set, but
+ * be sure.
+ */
+ if (__cursor_page_pinned(cbt) && F_ISSET(cursor, WT_CURSTD_KEY_INT)) {
+ WT_ERR(__wt_txn_autocommit_check(session));
+
+ /*
+ * The cursor position may not be exact (the cursor's comparison
+ * value not equal to zero). Correct to an exact match so we can
+ * remove whatever we're pointing at.
+ */
+ cbt->compare = 0;
+ ret = btree->type == BTREE_ROW ?
+ __cursor_row_modify(session, cbt, true) :
+ __cursor_col_modify(session, cbt, true);
+ if (ret == 0)
+ goto done;
+
+ /*
+ * The pinned page goes away if we fail for any reason, make
+ * sure there's a local copy of any key. (Restart could still
+ * use the pinned page, but that's an unlikely path.) Re-save
+ * the cursor state: we may retry but eventually fail.
+ */
+ WT_TRET(__cursor_copy_int_key(cursor));
+ __cursor_state_save(cursor, &state);
+ goto err;
+ }
+
+ /*
+ * The pinned page goes away if we do a search, make sure there's a
+ * local copy of any key. Re-save the cursor state: we may retry but
+ * eventually fail.
+ */
+ WT_ERR(__cursor_copy_int_key(cursor));
+ __cursor_state_save(cursor, &state);
+
+retry: WT_ERR(__cursor_func_init(cbt, true));
+
+ if (btree->type == BTREE_ROW) {
+ WT_ERR(__cursor_row_search(session, cbt, NULL, false));
+
+ /* Check whether an update would conflict. */
+ WT_ERR(__curfile_update_check(cbt));
+
+ if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL))
+ WT_ERR(WT_NOTFOUND);
+
+ ret = __cursor_row_modify(session, cbt, true);
+ } else {
WT_ERR(__cursor_col_search(session, cbt, NULL));
/*
@@ -703,19 +954,6 @@ retry: WT_RET(__cursor_func_init(cbt, true));
cbt->recno = cursor->recno;
} else
ret = __cursor_col_modify(session, cbt, true);
- break;
- case BTREE_ROW:
- /* Remove the record if it exists. */
- WT_ERR(__cursor_row_search(session, cbt, NULL, false));
-
- /* Check whether an update would conflict. */
- WT_ERR(__curfile_update_check(cbt));
-
- if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL))
- WT_ERR(WT_NOTFOUND);
-
- ret = __cursor_row_modify(session, cbt, true);
- break;
}
err: if (ret == WT_RESTART) {
@@ -723,15 +961,27 @@ err: if (ret == WT_RESTART) {
WT_STAT_DATA_INCR(session, cursor_restart);
goto retry;
}
+
/*
- * If the cursor is configured to overwrite and the record is not
- * found, that is exactly what we want.
+ * If the cursor is configured to overwrite and the record is not found,
+ * that is exactly what we want, return success.
*/
if (F_ISSET(cursor, WT_CURSTD_OVERWRITE) && ret == WT_NOTFOUND)
ret = 0;
- if (ret != 0)
+done: /*
+ * If the cursor was positioned, it stays positioned, point the cursor
+ * at an internal copy of the key. Otherwise, there's no position or
+ * key/value.
+ */
+ if (ret == 0)
+ F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ if (ret == 0 && positioned)
+ WT_TRET(__wt_key_return(session, cbt));
+ else
WT_TRET(__cursor_reset(cbt));
+ if (ret != 0)
+ __cursor_state_restore(cursor, &state);
return (ret);
}
@@ -744,6 +994,7 @@ int
__wt_btcur_update(WT_CURSOR_BTREE *cbt)
{
WT_BTREE *btree;
+ WT_CURFILE_STATE state;
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
@@ -756,24 +1007,71 @@ __wt_btcur_update(WT_CURSOR_BTREE *cbt)
WT_STAT_DATA_INCR(session, cursor_update);
WT_STAT_DATA_INCRV(session, cursor_update_bytes, cursor->value.size);
+ __cursor_state_save(cursor, &state);
+
if (btree->type == BTREE_ROW)
WT_RET(__cursor_size_chk(session, &cursor->key));
WT_RET(__cursor_size_chk(session, &cursor->value));
+ /* It's no longer possible to bulk-load into the tree. */
+ __cursor_disable_bulk(session, btree);
+
/*
- * The tree is no longer empty: eviction should pay attention to it,
- * and it's no longer possible to bulk-load into it.
+ * If update positioned to an on-page key, the update doesn't require
+ * another search. We don't care about the "overwrite" configuration
+ * because regardless of the overwrite setting, any existing record is
+ * updated, and the record must exist with a positioned cursor. The
+ * cursor won't be positioned on a page with an external key set, but
+ * be sure.
*/
- if (btree->bulk_load_ok) {
- btree->bulk_load_ok = false;
- __wt_btree_evictable(session, true);
+ if (__cursor_page_pinned(cbt) && F_ISSET(cursor, WT_CURSTD_KEY_INT)) {
+ WT_ERR(__wt_txn_autocommit_check(session));
+ /*
+ * The cursor position may not be exact (the cursor's comparison
+ * value not equal to zero). Correct to an exact match so we can
+ * update whatever we're pointing at.
+ */
+ cbt->compare = 0;
+ ret = btree->type == BTREE_ROW ?
+ __cursor_row_modify(session, cbt, false) :
+ __cursor_col_modify(session, cbt, false);
+ if (ret == 0)
+ goto done;
+
+ /*
+ * The pinned page goes away if we fail for any reason, make
+ * sure there's a local copy of any key. (Restart could still
+ * use the pinned page, but that's an unlikely path.) Re-save
+ * the cursor state: we may retry but eventually fail.
+ */
+ WT_TRET(__cursor_copy_int_key(cursor));
+ __cursor_state_save(cursor, &state);
+ goto err;
}
-retry: WT_RET(__cursor_func_init(cbt, true));
+ /*
+ * The pinned page goes away if we do a search, make sure there's a
+ * local copy of any key. Re-save the cursor state: we may retry but
+ * eventually fail.
+ */
+ WT_ERR(__cursor_copy_int_key(cursor));
+ __cursor_state_save(cursor, &state);
- switch (btree->type) {
- case BTREE_COL_FIX:
- case BTREE_COL_VAR:
+retry: WT_ERR(__cursor_func_init(cbt, true));
+
+ if (btree->type == BTREE_ROW) {
+ WT_ERR(__cursor_row_search(session, cbt, NULL, true));
+ /*
+ * If not overwriting, check for conflicts and fail if the key
+ * does not exist.
+ */
+ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
+ WT_ERR(__curfile_update_check(cbt));
+ if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL))
+ WT_ERR(WT_NOTFOUND);
+ }
+ ret = __cursor_row_modify(session, cbt, false);
+ } else {
WT_ERR(__cursor_col_search(session, cbt, NULL));
/*
@@ -792,20 +1090,6 @@ retry: WT_RET(__cursor_func_init(cbt, true));
WT_ERR(WT_NOTFOUND);
}
ret = __cursor_col_modify(session, cbt, false);
- break;
- case BTREE_ROW:
- WT_ERR(__cursor_row_search(session, cbt, NULL, true));
- /*
- * If not overwriting, check for conflicts and fail if the key
- * does not exist.
- */
- if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
- WT_ERR(__curfile_update_check(cbt));
- if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL))
- WT_ERR(WT_NOTFOUND);
- }
- ret = __cursor_row_modify(session, cbt, false);
- break;
}
err: if (ret == WT_RESTART) {
@@ -822,11 +1106,14 @@ err: if (ret == WT_RESTART) {
* To make this work, we add a field to the btree cursor to pass back a
* pointer to the modify function's allocated update structure.
*/
- if (ret == 0)
+done: if (ret == 0)
WT_TRET(__wt_kv_return(session, cbt, cbt->modify_update));
- if (ret != 0)
+ if (ret != 0) {
WT_TRET(__cursor_reset(cbt));
+ __cursor_state_restore(cursor, &state);
+ }
+
return (ret);
}
@@ -955,9 +1242,12 @@ __cursor_truncate(WT_SESSION_IMPL *session,
WT_DECL_RET;
/*
- * First, call the standard cursor remove method to do a full search and
- * re-position the cursor because we don't have a saved copy of the
- * page's write generation information, which we need to remove records.
+ * First, call the cursor search method to re-position the cursor: we
+ * may not have a cursor position (if the higher-level truncate code
+ * switched the cursors to have an "external" cursor key, and because
+ * we don't save a copy of the page's write generation information,
+ * which we need to remove records.
+ *
* Once that's done, we can delete records without a full search, unless
* we encounter a restart error because the page was modified by some
* other thread of control; in that case, repeat the full search to
@@ -970,20 +1260,19 @@ __cursor_truncate(WT_SESSION_IMPL *session,
* instantiated the end cursor, so we know that page is pinned in memory
* and we can proceed without concern.
*/
-retry: WT_RET(__wt_btcur_remove(start));
+retry: WT_RET(__wt_btcur_search(start));
+ WT_ASSERT(session,
+ F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT);
+
+ for (;;) {
+ if ((ret = rmfunc(session, start, 1)) != 0)
+ break;
- /*
- * Reset ret each time through so that we don't loop forever in
- * the cursor equals case.
- */
- for (ret = 0;;) {
if (stop != NULL && __cursor_equals(start, stop))
break;
if ((ret = __wt_btcur_next(start, true)) != 0)
break;
- start->compare = 0; /* Exact match */
- if ((ret = rmfunc(session, start, 1)) != 0)
- break;
+ start->compare = 0; /* Exact match */
}
if (ret == WT_RESTART) {
@@ -1016,29 +1305,32 @@ __cursor_truncate_fix(WT_SESSION_IMPL *session,
* record 37, records 1-36 magically appear. Those records can't be
* deleted, which means we have to ignore already "deleted" records.
*
- * First, call the standard cursor remove method to do a full search and
- * re-position the cursor because we don't have a saved copy of the
- * page's write generation information, which we need to remove records.
+ * First, call the cursor search method to re-position the cursor: we
+ * may not have a cursor position (if the higher-level truncate code
+ * switched the cursors to have an "external" cursor key, and because
+ * we don't save a copy of the page's write generation information,
+ * which we need to remove records.
+ *
* Once that's done, we can delete records without a full search, unless
* we encounter a restart error because the page was modified by some
* other thread of control; in that case, repeat the full search to
* refresh the page's modification information.
*/
-retry: WT_RET(__wt_btcur_remove(start));
- /*
- * Reset ret each time through so that we don't loop forever in
- * the cursor equals case.
- */
- for (ret = 0;;) {
+retry: WT_RET(__wt_btcur_search(start));
+ WT_ASSERT(session,
+ F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT);
+
+ for (;;) {
+ value = (const uint8_t *)start->iface.value.data;
+ if (*value != 0 &&
+ (ret = rmfunc(session, start, 1)) != 0)
+ break;
+
if (stop != NULL && __cursor_equals(start, stop))
break;
if ((ret = __wt_btcur_next(start, true)) != 0)
break;
start->compare = 0; /* Exact match */
- value = (const uint8_t *)start->iface.value.data;
- if (*value != 0 &&
- (ret = rmfunc(session, start, 1)) != 0)
- break;
}
if (ret == WT_RESTART) {
@@ -1158,7 +1450,7 @@ __wt_btcur_close(WT_CURSOR_BTREE *cbt, bool lowlevel)
* Skip the usual cursor tear-down in that case.
*/
if (!lowlevel)
- ret = __curfile_leave(cbt);
+ ret = __cursor_reset(cbt);
__wt_buf_free(session, &cbt->_row_key);
__wt_buf_free(session, &cbt->_tmp);