diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/btree/bt_cursor.c')
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_cursor.c | 556 |
1 files changed, 424 insertions, 132 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c index 5fde2237538..944e276fc01 100644 --- a/src/third_party/wiredtiger/src/btree/bt_cursor.c +++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c @@ -9,6 +9,84 @@ #include "wt_internal.h" /* + * When returning an error, we need to restore the cursor to a valid state, the + * upper-level cursor code is likely to retry. This structure and the associated + * functions are used save and restore the cursor state. + */ +typedef struct { + WT_ITEM key; + WT_ITEM value; + uint64_t recno; + uint32_t flags; +} WT_CURFILE_STATE; + +/* + * __cursor_state_save -- + * Save the cursor's external state. + */ +static inline void +__cursor_state_save(WT_CURSOR *cursor, WT_CURFILE_STATE *state) +{ + WT_ITEM_SET(state->key, cursor->key); + WT_ITEM_SET(state->value, cursor->value); + state->recno = cursor->recno; + state->flags = cursor->flags; +} + +/* + * __cursor_state_restore -- + * Restore the cursor's external state. + */ +static inline void +__cursor_state_restore(WT_CURSOR *cursor, WT_CURFILE_STATE *state) +{ + if (F_ISSET(state, WT_CURSTD_KEY_EXT)) + WT_ITEM_SET(cursor->key, state->key); + if (F_ISSET(state, WT_CURSTD_VALUE_EXT)) + WT_ITEM_SET(cursor->value, state->value); + cursor->recno = state->recno; + F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); + F_SET(cursor, F_MASK(state, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT)); + +} + +/* + * __cursor_page_pinned -- + * Return if we have a page pinned and it's not been flagged for forced + * eviction (the forced eviction test is so we periodically release pages + * grown too large). + */ +static inline bool +__cursor_page_pinned(WT_CURSOR_BTREE *cbt) +{ + return (F_ISSET(cbt, WT_CBT_ACTIVE) && + cbt->ref->page->read_gen != WT_READGEN_OLDEST); +} + +/* + * __cursor_copy_int_key -- + * If we're pointing into the tree, save the key into local memory. + */ +static inline int +__cursor_copy_int_key(WT_CURSOR *cursor) +{ + /* + * We're about to discard the cursor's position and the cursor layer + * might retry the operation. We discard pinned pages on error, which + * will invalidate pinned keys. Clear WT_CURSTD_KEY_INT in all cases, + * the underlying page is gone whether we can allocate memory or not. + */ + if (F_ISSET(cursor, WT_CURSTD_KEY_INT)) { + F_CLR(cursor, WT_CURSTD_KEY_INT); + if (!WT_DATA_IN_ITEM(&cursor->key)) + WT_RET(__wt_buf_set((WT_SESSION_IMPL *)cursor->session, + &cursor->key, cursor->key.data, cursor->key.size)); + F_SET(cursor, WT_CURSTD_KEY_EXT); + } + return (0); +} + +/* * __cursor_size_chk -- * Return if an inserted item is too large. */ @@ -55,6 +133,34 @@ __cursor_size_chk(WT_SESSION_IMPL *session, WT_ITEM *kv) } /* + * __cursor_disable_bulk -- + * Disable bulk loads into a tree. + */ +static inline void +__cursor_disable_bulk(WT_SESSION_IMPL *session, WT_BTREE *btree) +{ + /* + * Once a tree (other than the LSM primary) is no longer empty, eviction + * should pay attention to it, and it's no longer possible to bulk-load + * into it. + */ + if (!btree->original) + return; + if (btree->lsm_primary) { + btree->original = 0; /* Make the next test faster. */ + return; + } + + /* + * We use a compare-and-swap here to avoid races among the first inserts + * into a tree. Eviction is disabled when an empty tree is opened, and + * it must only be enabled once. + */ + if (__wt_atomic_cas8(&btree->original, 1, 0)) + __wt_evict_file_exclusive_off(session); +} + +/* * __cursor_fix_implicit -- * Return if search went past the end of the tree. */ @@ -285,13 +391,17 @@ __cursor_row_modify( int __wt_btcur_reset(WT_CURSOR_BTREE *cbt) { + WT_CURSOR *cursor; WT_SESSION_IMPL *session; + cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cbt->iface.session; WT_STAT_CONN_INCR(session, cursor_reset); WT_STAT_DATA_INCR(session, cursor_reset); + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + return (__cursor_reset(cbt)); } @@ -303,6 +413,7 @@ int __wt_btcur_search(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; @@ -317,14 +428,22 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_STAT_CONN_INCR(session, cursor_search); WT_STAT_DATA_INCR(session, cursor_search); + __cursor_state_save(cursor, &state); + + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key, then re-save the cursor state. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + /* * If we have a page pinned, search it; if we don't have a page pinned, * or the search of the pinned page doesn't find an exact match, search * from the root. */ valid = false; - if (F_ISSET(cbt, WT_CBT_ACTIVE) && - cbt->ref->page->read_gen != WT_READGEN_OLDEST) { + if (__cursor_page_pinned(cbt)) { __wt_txn_cursor_op(session); WT_ERR(btree->type == BTREE_ROW ? @@ -352,6 +471,8 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) cbt->v = 0; cursor->value.data = &cbt->v; cursor->value.size = 1; + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); } else ret = WT_NOTFOUND; @@ -360,8 +481,10 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_ERR(__wt_cursor_key_order_init(session, cbt)); #endif -err: if (ret != 0) +err: if (ret != 0) { WT_TRET(__cursor_reset(cbt)); + __cursor_state_restore(cursor, &state); + } return (ret); } @@ -373,6 +496,7 @@ int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) { WT_BTREE *btree; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; @@ -389,6 +513,15 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_STAT_CONN_INCR(session, cursor_search_near); WT_STAT_DATA_INCR(session, cursor_search_near); + __cursor_state_save(cursor, &state); + + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key, then re-save the cursor state. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + /* * If we have a row-store page pinned, search it; if we don't have a * page pinned, or the search of the pinned page doesn't find an exact @@ -402,9 +535,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) * existing record. */ valid = false; - if (btree->type == BTREE_ROW && - F_ISSET(cbt, WT_CBT_ACTIVE) && - cbt->ref->page->read_gen != WT_READGEN_OLDEST) { + if (btree->type == BTREE_ROW && __cursor_page_pinned(cbt)) { __wt_txn_cursor_op(session); WT_ERR(__cursor_row_search(session, cbt, cbt->ref, true)); @@ -455,6 +586,8 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) cursor->value.data = &cbt->v; cursor->value.size = 1; exact = 0; + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); } else if ((ret = __wt_btcur_next(cbt, false)) != WT_NOTFOUND) exact = 1; else { @@ -469,15 +602,18 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) exact = -1; } +err: if (ret == 0 && exactp != NULL) + *exactp = exact; + #ifdef HAVE_DIAGNOSTIC if (ret == 0) - WT_ERR(__wt_cursor_key_order_init(session, cbt)); + WT_TRET(__wt_cursor_key_order_init(session, cbt)); #endif -err: if (ret != 0) + if (ret != 0) { WT_TRET(__cursor_reset(cbt)); - if (exactp != NULL && (ret == 0 || ret == WT_NOTFOUND)) - *exactp = exact; + __cursor_state_restore(cursor, &state); + } return (ret); } @@ -489,9 +625,11 @@ int __wt_btcur_insert(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; + bool append_key; btree = cbt->btree; cursor = &cbt->iface; @@ -502,30 +640,86 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_STAT_DATA_INCRV(session, cursor_insert_bytes, cursor->key.size + cursor->value.size); + __cursor_state_save(cursor, &state); + if (btree->type == BTREE_ROW) WT_RET(__cursor_size_chk(session, &cursor->key)); WT_RET(__cursor_size_chk(session, &cursor->value)); + /* It's no longer possible to bulk-load into the tree. */ + __cursor_disable_bulk(session, btree); + + /* + * Insert a new record if WT_CURSTD_APPEND configured, (ignoring any + * application set record number). Although append can't be configured + * for a row-store, this code would break if it were, and that's owned + * by the upper cursor layer, be cautious. + */ + append_key = + F_ISSET(cursor, WT_CURSTD_APPEND) && btree->type != BTREE_ROW; + /* - * The tree is no longer empty: eviction should pay attention to it, - * and it's no longer possible to bulk-load into it. + * If inserting with overwrite configured, and positioned to an on-page + * key, the update doesn't require another search. The cursor won't be + * positioned on a page with an external key set, but be sure. Cursors + * configured for append aren't included, regardless of whether or not + * they meet all other criteria. */ - if (btree->bulk_load_ok) { - btree->bulk_load_ok = false; - __wt_btree_evictable(session, true); + if (__cursor_page_pinned(cbt) && + F_ISSET_ALL(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_OVERWRITE) && + !append_key) { + WT_ERR(__wt_txn_autocommit_check(session)); + /* + * The cursor position may not be exact (the cursor's comparison + * value not equal to zero). Correct to an exact match so we can + * update whatever we're pointing at. + */ + cbt->compare = 0; + ret = btree->type == BTREE_ROW ? + __cursor_row_modify(session, cbt, false) : + __cursor_col_modify(session, cbt, false); + if (ret == 0) + goto done; + + /* + * The pinned page goes away if we fail for any reason, make + * sure there's a local copy of any key. (Restart could still + * use the pinned page, but that's an unlikely path.) Re-save + * the cursor state: we may retry but eventually fail. + */ + WT_TRET(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + goto err; } -retry: WT_RET(__cursor_func_init(cbt, true)); + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key. Re-save the cursor state: we may retry but + * eventually fail. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); - switch (btree->type) { - case BTREE_COL_FIX: - case BTREE_COL_VAR: +retry: WT_ERR(__cursor_func_init(cbt, true)); + + if (btree->type == BTREE_ROW) { + WT_ERR(__cursor_row_search(session, cbt, NULL, true)); /* - * If WT_CURSTD_APPEND is set, insert a new record (ignoring - * the application's record number). The real record number - * is assigned by the serialized append operation. + * If not overwriting, fail if the key exists, else insert the + * key/value pair. */ - if (F_ISSET(cursor, WT_CURSTD_APPEND)) + if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && + cbt->compare == 0 && __wt_cursor_valid(cbt, NULL)) + WT_ERR(WT_DUPLICATE_KEY); + + ret = __cursor_row_modify(session, cbt, false); + } else { + /* + * Optionally insert a new record (ignoring the application's + * record number). The real record number is allocated by the + * serialized append operation. + */ + if (append_key) cbt->iface.recno = WT_RECNO_OOB; WT_ERR(__cursor_col_search(session, cbt, NULL)); @@ -542,21 +736,9 @@ retry: WT_RET(__cursor_func_init(cbt, true)); WT_ERR(WT_DUPLICATE_KEY); WT_ERR(__cursor_col_modify(session, cbt, false)); - if (F_ISSET(cursor, WT_CURSTD_APPEND)) - cbt->iface.recno = cbt->recno; - break; - case BTREE_ROW: - WT_ERR(__cursor_row_search(session, cbt, NULL, true)); - /* - * If not overwriting, fail if the key exists, else insert the - * key/value pair. - */ - if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) && - cbt->compare == 0 && __wt_cursor_valid(cbt, NULL)) - WT_ERR(WT_DUPLICATE_KEY); - ret = __cursor_row_modify(session, cbt, false); - break; + if (append_key) + cbt->iface.recno = cbt->recno; } err: if (ret == WT_RESTART) { @@ -564,11 +746,17 @@ err: if (ret == WT_RESTART) { WT_STAT_DATA_INCR(session, cursor_restart); goto retry; } - /* Insert doesn't maintain a position across calls, clear resources. */ - if (ret == 0) - WT_TRET(__curfile_leave(cbt)); + +done: /* Insert doesn't maintain a position across calls, clear resources. */ + if (ret == 0) { + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + if (append_key) + F_SET(cursor, WT_CURSTD_KEY_INT); + } + WT_TRET(__cursor_reset(cbt)); if (ret != 0) - WT_TRET(__cursor_reset(cbt)); + __cursor_state_restore(cursor, &state); + return (ret); } @@ -604,16 +792,15 @@ __curfile_update_check(WT_CURSOR_BTREE *cbt) } /* - * __wt_btcur_update_check -- + * __wt_btcur_insert_check -- * Check whether an update would conflict. * - * This can be used to replace WT_CURSOR::insert or WT_CURSOR::update, so - * they only check for conflicts without updating the tree. It is used to - * maintain snapshot isolation for transactions that span multiple chunks - * in an LSM tree. + * This can replace WT_CURSOR::insert, so it only checks for conflicts without + * updating the tree. It is used to maintain snapshot isolation for transactions + * that span multiple chunks in an LSM tree. */ int -__wt_btcur_update_check(WT_CURSOR_BTREE *cbt) +__wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; WT_CURSOR *cursor; @@ -624,31 +811,35 @@ __wt_btcur_update_check(WT_CURSOR_BTREE *cbt) btree = cbt->btree; session = (WT_SESSION_IMPL *)cursor->session; -retry: WT_RET(__cursor_func_init(cbt, true)); + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key. Unlike most of the btree cursor routines, + * we don't have to save/restore the cursor key state, none of the + * work done here changes the key state. + */ + WT_ERR(__cursor_copy_int_key(cursor)); - switch (btree->type) { - case BTREE_ROW: +retry: WT_ERR(__cursor_func_init(cbt, true)); + + if (btree->type == BTREE_ROW) { WT_ERR(__cursor_row_search(session, cbt, NULL, true)); - /* - * Just check for conflicts. - */ + /* Just check for conflicts. */ ret = __curfile_update_check(cbt); - break; - case BTREE_COL_FIX: - case BTREE_COL_VAR: + } else WT_ERR(__wt_illegal_value(session, NULL)); - break; - } err: if (ret == WT_RESTART) { WT_STAT_CONN_INCR(session, cursor_restart); WT_STAT_DATA_INCR(session, cursor_restart); goto retry; } - WT_TRET(__curfile_leave(cbt)); - if (ret != 0) - WT_TRET(__cursor_reset(cbt)); + + /* Insert doesn't maintain a position across calls, clear resources. */ + if (ret == 0) + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + WT_TRET(__cursor_reset(cbt)); + return (ret); } @@ -660,9 +851,11 @@ int __wt_btcur_remove(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; + bool positioned; btree = cbt->btree; cursor = &cbt->iface; @@ -672,11 +865,69 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt) WT_STAT_DATA_INCR(session, cursor_remove); WT_STAT_DATA_INCRV(session, cursor_remove_bytes, cursor->key.size); -retry: WT_RET(__cursor_func_init(cbt, true)); + __cursor_state_save(cursor, &state); - switch (btree->type) { - case BTREE_COL_FIX: - case BTREE_COL_VAR: + /* + * WT_CURSOR.remove has a unique semantic, the cursor stays positioned + * if it starts positioned, otherwise clear the cursor on completion. + */ + positioned = F_ISSET(cursor, WT_CURSTD_KEY_INT); + + /* + * If remove positioned to an on-page key, the remove doesn't require + * another search. We don't care about the "overwrite" configuration + * because regardless of the overwrite setting, any existing record is + * removed, and the record must exist with a positioned cursor. The + * cursor won't be positioned on a page with an external key set, but + * be sure. + */ + if (__cursor_page_pinned(cbt) && F_ISSET(cursor, WT_CURSTD_KEY_INT)) { + WT_ERR(__wt_txn_autocommit_check(session)); + + /* + * The cursor position may not be exact (the cursor's comparison + * value not equal to zero). Correct to an exact match so we can + * remove whatever we're pointing at. + */ + cbt->compare = 0; + ret = btree->type == BTREE_ROW ? + __cursor_row_modify(session, cbt, true) : + __cursor_col_modify(session, cbt, true); + if (ret == 0) + goto done; + + /* + * The pinned page goes away if we fail for any reason, make + * sure there's a local copy of any key. (Restart could still + * use the pinned page, but that's an unlikely path.) Re-save + * the cursor state: we may retry but eventually fail. + */ + WT_TRET(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + goto err; + } + + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key. Re-save the cursor state: we may retry but + * eventually fail. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + +retry: WT_ERR(__cursor_func_init(cbt, true)); + + if (btree->type == BTREE_ROW) { + WT_ERR(__cursor_row_search(session, cbt, NULL, false)); + + /* Check whether an update would conflict. */ + WT_ERR(__curfile_update_check(cbt)); + + if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) + WT_ERR(WT_NOTFOUND); + + ret = __cursor_row_modify(session, cbt, true); + } else { WT_ERR(__cursor_col_search(session, cbt, NULL)); /* @@ -703,19 +954,6 @@ retry: WT_RET(__cursor_func_init(cbt, true)); cbt->recno = cursor->recno; } else ret = __cursor_col_modify(session, cbt, true); - break; - case BTREE_ROW: - /* Remove the record if it exists. */ - WT_ERR(__cursor_row_search(session, cbt, NULL, false)); - - /* Check whether an update would conflict. */ - WT_ERR(__curfile_update_check(cbt)); - - if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) - WT_ERR(WT_NOTFOUND); - - ret = __cursor_row_modify(session, cbt, true); - break; } err: if (ret == WT_RESTART) { @@ -723,15 +961,27 @@ err: if (ret == WT_RESTART) { WT_STAT_DATA_INCR(session, cursor_restart); goto retry; } + /* - * If the cursor is configured to overwrite and the record is not - * found, that is exactly what we want. + * If the cursor is configured to overwrite and the record is not found, + * that is exactly what we want, return success. */ if (F_ISSET(cursor, WT_CURSTD_OVERWRITE) && ret == WT_NOTFOUND) ret = 0; - if (ret != 0) +done: /* + * If the cursor was positioned, it stays positioned, point the cursor + * at an internal copy of the key. Otherwise, there's no position or + * key/value. + */ + if (ret == 0) + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + if (ret == 0 && positioned) + WT_TRET(__wt_key_return(session, cbt)); + else WT_TRET(__cursor_reset(cbt)); + if (ret != 0) + __cursor_state_restore(cursor, &state); return (ret); } @@ -744,6 +994,7 @@ int __wt_btcur_update(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; + WT_CURFILE_STATE state; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; @@ -756,24 +1007,71 @@ __wt_btcur_update(WT_CURSOR_BTREE *cbt) WT_STAT_DATA_INCR(session, cursor_update); WT_STAT_DATA_INCRV(session, cursor_update_bytes, cursor->value.size); + __cursor_state_save(cursor, &state); + if (btree->type == BTREE_ROW) WT_RET(__cursor_size_chk(session, &cursor->key)); WT_RET(__cursor_size_chk(session, &cursor->value)); + /* It's no longer possible to bulk-load into the tree. */ + __cursor_disable_bulk(session, btree); + /* - * The tree is no longer empty: eviction should pay attention to it, - * and it's no longer possible to bulk-load into it. + * If update positioned to an on-page key, the update doesn't require + * another search. We don't care about the "overwrite" configuration + * because regardless of the overwrite setting, any existing record is + * updated, and the record must exist with a positioned cursor. The + * cursor won't be positioned on a page with an external key set, but + * be sure. */ - if (btree->bulk_load_ok) { - btree->bulk_load_ok = false; - __wt_btree_evictable(session, true); + if (__cursor_page_pinned(cbt) && F_ISSET(cursor, WT_CURSTD_KEY_INT)) { + WT_ERR(__wt_txn_autocommit_check(session)); + /* + * The cursor position may not be exact (the cursor's comparison + * value not equal to zero). Correct to an exact match so we can + * update whatever we're pointing at. + */ + cbt->compare = 0; + ret = btree->type == BTREE_ROW ? + __cursor_row_modify(session, cbt, false) : + __cursor_col_modify(session, cbt, false); + if (ret == 0) + goto done; + + /* + * The pinned page goes away if we fail for any reason, make + * sure there's a local copy of any key. (Restart could still + * use the pinned page, but that's an unlikely path.) Re-save + * the cursor state: we may retry but eventually fail. + */ + WT_TRET(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); + goto err; } -retry: WT_RET(__cursor_func_init(cbt, true)); + /* + * The pinned page goes away if we do a search, make sure there's a + * local copy of any key. Re-save the cursor state: we may retry but + * eventually fail. + */ + WT_ERR(__cursor_copy_int_key(cursor)); + __cursor_state_save(cursor, &state); - switch (btree->type) { - case BTREE_COL_FIX: - case BTREE_COL_VAR: +retry: WT_ERR(__cursor_func_init(cbt, true)); + + if (btree->type == BTREE_ROW) { + WT_ERR(__cursor_row_search(session, cbt, NULL, true)); + /* + * If not overwriting, check for conflicts and fail if the key + * does not exist. + */ + if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { + WT_ERR(__curfile_update_check(cbt)); + if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) + WT_ERR(WT_NOTFOUND); + } + ret = __cursor_row_modify(session, cbt, false); + } else { WT_ERR(__cursor_col_search(session, cbt, NULL)); /* @@ -792,20 +1090,6 @@ retry: WT_RET(__cursor_func_init(cbt, true)); WT_ERR(WT_NOTFOUND); } ret = __cursor_col_modify(session, cbt, false); - break; - case BTREE_ROW: - WT_ERR(__cursor_row_search(session, cbt, NULL, true)); - /* - * If not overwriting, check for conflicts and fail if the key - * does not exist. - */ - if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) { - WT_ERR(__curfile_update_check(cbt)); - if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) - WT_ERR(WT_NOTFOUND); - } - ret = __cursor_row_modify(session, cbt, false); - break; } err: if (ret == WT_RESTART) { @@ -822,11 +1106,14 @@ err: if (ret == WT_RESTART) { * To make this work, we add a field to the btree cursor to pass back a * pointer to the modify function's allocated update structure. */ - if (ret == 0) +done: if (ret == 0) WT_TRET(__wt_kv_return(session, cbt, cbt->modify_update)); - if (ret != 0) + if (ret != 0) { WT_TRET(__cursor_reset(cbt)); + __cursor_state_restore(cursor, &state); + } + return (ret); } @@ -955,9 +1242,12 @@ __cursor_truncate(WT_SESSION_IMPL *session, WT_DECL_RET; /* - * First, call the standard cursor remove method to do a full search and - * re-position the cursor because we don't have a saved copy of the - * page's write generation information, which we need to remove records. + * First, call the cursor search method to re-position the cursor: we + * may not have a cursor position (if the higher-level truncate code + * switched the cursors to have an "external" cursor key, and because + * we don't save a copy of the page's write generation information, + * which we need to remove records. + * * Once that's done, we can delete records without a full search, unless * we encounter a restart error because the page was modified by some * other thread of control; in that case, repeat the full search to @@ -970,20 +1260,19 @@ __cursor_truncate(WT_SESSION_IMPL *session, * instantiated the end cursor, so we know that page is pinned in memory * and we can proceed without concern. */ -retry: WT_RET(__wt_btcur_remove(start)); +retry: WT_RET(__wt_btcur_search(start)); + WT_ASSERT(session, + F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); + + for (;;) { + if ((ret = rmfunc(session, start, 1)) != 0) + break; - /* - * Reset ret each time through so that we don't loop forever in - * the cursor equals case. - */ - for (ret = 0;;) { if (stop != NULL && __cursor_equals(start, stop)) break; if ((ret = __wt_btcur_next(start, true)) != 0) break; - start->compare = 0; /* Exact match */ - if ((ret = rmfunc(session, start, 1)) != 0) - break; + start->compare = 0; /* Exact match */ } if (ret == WT_RESTART) { @@ -1016,29 +1305,32 @@ __cursor_truncate_fix(WT_SESSION_IMPL *session, * record 37, records 1-36 magically appear. Those records can't be * deleted, which means we have to ignore already "deleted" records. * - * First, call the standard cursor remove method to do a full search and - * re-position the cursor because we don't have a saved copy of the - * page's write generation information, which we need to remove records. + * First, call the cursor search method to re-position the cursor: we + * may not have a cursor position (if the higher-level truncate code + * switched the cursors to have an "external" cursor key, and because + * we don't save a copy of the page's write generation information, + * which we need to remove records. + * * Once that's done, we can delete records without a full search, unless * we encounter a restart error because the page was modified by some * other thread of control; in that case, repeat the full search to * refresh the page's modification information. */ -retry: WT_RET(__wt_btcur_remove(start)); - /* - * Reset ret each time through so that we don't loop forever in - * the cursor equals case. - */ - for (ret = 0;;) { +retry: WT_RET(__wt_btcur_search(start)); + WT_ASSERT(session, + F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); + + for (;;) { + value = (const uint8_t *)start->iface.value.data; + if (*value != 0 && + (ret = rmfunc(session, start, 1)) != 0) + break; + if (stop != NULL && __cursor_equals(start, stop)) break; if ((ret = __wt_btcur_next(start, true)) != 0) break; start->compare = 0; /* Exact match */ - value = (const uint8_t *)start->iface.value.data; - if (*value != 0 && - (ret = rmfunc(session, start, 1)) != 0) - break; } if (ret == WT_RESTART) { @@ -1158,7 +1450,7 @@ __wt_btcur_close(WT_CURSOR_BTREE *cbt, bool lowlevel) * Skip the usual cursor tear-down in that case. */ if (!lowlevel) - ret = __curfile_leave(cbt); + ret = __cursor_reset(cbt); __wt_buf_free(session, &cbt->_row_key); __wt_buf_free(session, &cbt->_tmp); |