diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/btree/bt_cursor.c')
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_cursor.c | 224 |
1 files changed, 131 insertions, 93 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c index 8f4f7982e3d..9d62ef50e92 100644 --- a/src/third_party/wiredtiger/src/btree/bt_cursor.c +++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c @@ -362,13 +362,15 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *valid) * Column-store search from a cursor. */ static inline int -__cursor_col_search( - WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_REF *leaf) +__cursor_col_search(WT_CURSOR_BTREE *cbt, WT_REF *leaf, bool *leaf_foundp) { WT_DECL_RET; + WT_SESSION_IMPL *session; - WT_WITH_PAGE_INDEX(session, - ret = __wt_col_search(session, cbt->iface.recno, leaf, cbt, false)); + session = (WT_SESSION_IMPL *)cbt->iface.session; + WT_WITH_PAGE_INDEX( + session, ret = __wt_col_search( + cbt, cbt->iface.recno, leaf, false, leaf_foundp)); return (ret); } @@ -378,12 +380,15 @@ __cursor_col_search( */ static inline int __cursor_row_search( - WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_REF *leaf, bool insert) + WT_CURSOR_BTREE *cbt, bool insert, WT_REF *leaf, bool *leaf_foundp) { WT_DECL_RET; + WT_SESSION_IMPL *session; - WT_WITH_PAGE_INDEX(session, ret = __wt_row_search( - session, &cbt->iface.key, leaf, cbt, insert, false)); + session = (WT_SESSION_IMPL *)cbt->iface.session; + WT_WITH_PAGE_INDEX( + session, ret = __wt_row_search( + cbt, &cbt->iface.key, insert, leaf, false, leaf_foundp)); return (ret); } @@ -392,11 +397,10 @@ __cursor_row_search( * Column-store modify from a cursor, with a separate value. */ static inline int -__cursor_col_modify_v(WT_SESSION_IMPL *session, - WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type) +__cursor_col_modify_v(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type) { - return (__wt_col_modify(session, cbt, - cbt->iface.recno, value, NULL, modify_type, false)); + return (__wt_col_modify( + cbt, cbt->iface.recno, value, NULL, modify_type, false)); } /* @@ -404,11 +408,10 @@ __cursor_col_modify_v(WT_SESSION_IMPL *session, * Row-store modify from a cursor, with a separate value. */ static inline int -__cursor_row_modify_v(WT_SESSION_IMPL *session, - WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type) +__cursor_row_modify_v(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type) { - return (__wt_row_modify(session, cbt, - &cbt->iface.key, value, NULL, modify_type, false)); + return (__wt_row_modify( + cbt, &cbt->iface.key, value, NULL, modify_type, false)); } /* @@ -416,11 +419,11 @@ __cursor_row_modify_v(WT_SESSION_IMPL *session, * Column-store modify from a cursor. */ static inline int -__cursor_col_modify( - WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type) +__cursor_col_modify(WT_CURSOR_BTREE *cbt, u_int modify_type) { - return (__wt_col_modify(session, cbt, - cbt->iface.recno, &cbt->iface.value, NULL, modify_type, false)); + return (__wt_col_modify( + cbt, cbt->iface.recno, &cbt->iface.value, + NULL, modify_type, false)); } /* @@ -428,11 +431,11 @@ __cursor_col_modify( * Row-store modify from a cursor. */ static inline int -__cursor_row_modify( - WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type) +__cursor_row_modify(WT_CURSOR_BTREE *cbt, u_int modify_type) { - return (__wt_row_modify(session, cbt, - &cbt->iface.key, &cbt->iface.value, NULL, modify_type, false)); + return (__wt_row_modify( + cbt, &cbt->iface.key, &cbt->iface.value, + NULL, modify_type, false)); } /* @@ -483,7 +486,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_DECL_RET; WT_SESSION_IMPL *session; WT_UPDATE *upd; - bool valid; + bool leaf_found, valid; btree = cbt->btree; cursor = &cbt->iface; @@ -517,19 +520,19 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) __wt_txn_cursor_op(session); WT_ERR(btree->type == BTREE_ROW ? - __cursor_row_search(session, cbt, cbt->ref, false) : - __cursor_col_search(session, cbt, cbt->ref)); + __cursor_row_search(cbt, false, cbt->ref, &leaf_found) : + __cursor_col_search(cbt, cbt->ref, &leaf_found)); /* Return, if prepare conflict encountered. */ - if (cbt->compare == 0) + if (leaf_found && cbt->compare == 0) WT_ERR(__wt_cursor_valid(cbt, &upd, &valid)); } if (!valid) { WT_ERR(__cursor_func_init(cbt, true)); WT_ERR(btree->type == BTREE_ROW ? - __cursor_row_search(session, cbt, NULL, false) : - __cursor_col_search(session, cbt, NULL)); + __cursor_row_search(cbt, false, NULL, NULL) : + __cursor_col_search(cbt, NULL, NULL)); /* Return, if prepare conflict encountered. */ if (cbt->compare == 0) @@ -537,7 +540,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) } if (valid) - ret = __cursor_kv_return(session, cbt, upd); + ret = __cursor_kv_return(cbt, upd); else if (__cursor_fix_implicit(btree, cbt)) { /* * Creating a record past the end of the tree in a fixed-length @@ -554,7 +557,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) #ifdef HAVE_DIAGNOSTIC if (ret == 0) - WT_ERR(__wt_cursor_key_order_init(session, cbt)); + WT_ERR(__wt_cursor_key_order_init(cbt)); #endif err: if (ret != 0) { @@ -578,7 +581,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_SESSION_IMPL *session; WT_UPDATE *upd; int exact; - bool valid; + bool leaf_found, valid; btree = cbt->btree; cursor = &cbt->iface; @@ -610,34 +613,43 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) * pages in the case of column-store, search-near isn't an interesting * enough case for column-store to add the complexity needed to avoid * the tree search. - * - * Set the "insert" flag for the btree row-store search; we may intend - * to position the cursor at the end of the tree, rather than match an - * existing record. */ valid = false; if (btree->type == BTREE_ROW && __cursor_page_pinned(cbt)) { __wt_txn_cursor_op(session); - - WT_ERR(__cursor_row_search(session, cbt, cbt->ref, true)); + /* + * Set the "insert" flag for the btree row-store search; we may + * intend to position the cursor at the end of the tree, rather + * than match an existing record. + */ + WT_ERR(__cursor_row_search(cbt, true, cbt->ref, &leaf_found)); /* - * Search-near is trickier than search when searching an already - * pinned page. If search returns the first or last page slots, - * discard the results and search the full tree as the neighbor - * pages might offer better matches. This test is simplistic as - * we're ignoring append lists (there may be no page slots or we - * might be legitimately positioned after the last page slot). - * Ignore those cases, it makes things too complicated. + * Only use the pinned page search results if search returns an + * exact match or a slot other than the page's boundary slots, + * if that's not the case, a neighbor page might offer a better + * match. This test is simplistic as we're ignoring append + * lists (there may be no page slots or we might be + * legitimately positioned after the last page slot). Ignore + * those cases, it makes things too complicated. */ - if (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1) + if (leaf_found && + (cbt->compare == 0 || + (cbt->slot != 0 && + cbt->slot != cbt->ref->page->entries - 1))) WT_ERR(__wt_cursor_valid(cbt, &upd, &valid)); } if (!valid) { WT_ERR(__cursor_func_init(cbt, true)); + + /* + * Set the "insert" flag for the btree row-store search; we may + * intend to position the cursor at the end of the tree, rather + * than match an existing record. + */ WT_ERR(btree->type == BTREE_ROW ? - __cursor_row_search(session, cbt, NULL, true) : - __cursor_col_search(session, cbt, NULL)); + __cursor_row_search(cbt, true, NULL, NULL) : + __cursor_col_search(cbt, NULL, NULL)); WT_ERR(__wt_cursor_valid(cbt, &upd, &valid)); } @@ -660,7 +672,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) */ if (valid) { exact = cbt->compare; - ret = __cursor_kv_return(session, cbt, upd); + ret = __cursor_kv_return(cbt, upd); } else if (__cursor_fix_implicit(btree, cbt)) { cbt->recno = cursor->recno; cbt->v = 0; @@ -711,7 +723,7 @@ err: if (ret == 0 && exactp != NULL) #ifdef HAVE_DIAGNOSTIC if (ret == 0) - WT_TRET(__wt_cursor_key_order_init(session, cbt)); + WT_TRET(__wt_cursor_key_order_init(cbt)); #endif if (ret != 0) { @@ -785,8 +797,8 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) */ cbt->compare = 0; ret = btree->type == BTREE_ROW ? - __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD) : - __cursor_col_modify(session, cbt, WT_UPDATE_STANDARD); + __cursor_row_modify(cbt, WT_UPDATE_STANDARD) : + __cursor_col_modify(cbt, WT_UPDATE_STANDARD); if (ret == 0) goto done; @@ -814,7 +826,7 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) retry: WT_ERR(__cursor_func_init(cbt, true)); if (btree->type == BTREE_ROW) { - WT_ERR(__cursor_row_search(session, cbt, NULL, true)); + WT_ERR(__cursor_row_search(cbt, true, NULL, NULL)); /* * If not overwriting, fail if the key exists, else insert the * key/value pair. @@ -826,7 +838,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); WT_ERR(WT_DUPLICATE_KEY); } - ret = __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD); + ret = __cursor_row_modify(cbt, WT_UPDATE_STANDARD); } else if (append_key) { /* * Optionally insert a new record (ignoring the application's @@ -835,11 +847,11 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); */ cbt->iface.recno = WT_RECNO_OOB; cbt->compare = 1; - WT_ERR(__cursor_col_search(session, cbt, NULL)); - WT_ERR(__cursor_col_modify(session, cbt, WT_UPDATE_STANDARD)); + WT_ERR(__cursor_col_search(cbt, NULL, NULL)); + WT_ERR(__cursor_col_modify(cbt, WT_UPDATE_STANDARD)); cursor->recno = cbt->recno; } else { - WT_ERR(__cursor_col_search(session, cbt, NULL)); + WT_ERR(__cursor_col_search(cbt, NULL, NULL)); /* * If not overwriting, fail if the key exists. Creating a @@ -856,7 +868,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); WT_ERR(WT_DUPLICATE_KEY); } - WT_ERR(__cursor_col_modify(session, cbt, WT_UPDATE_STANDARD)); + WT_ERR(__cursor_col_modify(cbt, WT_UPDATE_STANDARD)); } err: if (ret == WT_RESTART) { @@ -940,7 +952,7 @@ __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) __cursor_novalue(cursor); retry: WT_ERR(__cursor_func_init(cbt, true)); - WT_ERR(__cursor_row_search(session, cbt, NULL, true)); + WT_ERR(__cursor_row_search(cbt, true, NULL, NULL)); /* Just check for conflicts. */ ret = __curfile_update_check(cbt); @@ -1045,8 +1057,8 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt) */ cbt->compare = 0; ret = btree->type == BTREE_ROW ? - __cursor_row_modify(session, cbt, WT_UPDATE_TOMBSTONE) : - __cursor_col_modify(session, cbt, WT_UPDATE_TOMBSTONE); + __cursor_row_modify(cbt, WT_UPDATE_TOMBSTONE) : + __cursor_col_modify(cbt, WT_UPDATE_TOMBSTONE); if (ret == 0) goto done; goto err; @@ -1068,7 +1080,7 @@ retry: if (positioned == POSITIONED) WT_ERR(__cursor_func_init(cbt, true)); if (btree->type == BTREE_ROW) { - WT_ERR(__cursor_row_search(session, cbt, NULL, false)); + WT_ERR(__cursor_row_search(cbt, false, NULL, NULL)); /* Check whether an update would conflict. */ WT_ERR(__curfile_update_check(cbt)); @@ -1079,9 +1091,9 @@ retry: if (positioned == POSITIONED) if (!valid) WT_ERR(WT_NOTFOUND); - ret = __cursor_row_modify(session, cbt, WT_UPDATE_TOMBSTONE); + ret = __cursor_row_modify(cbt, WT_UPDATE_TOMBSTONE); } else { - WT_ERR(__cursor_col_search(session, cbt, NULL)); + WT_ERR(__cursor_col_search(cbt, NULL, NULL)); /* * If we find a matching record, check whether an update would @@ -1109,8 +1121,7 @@ retry: if (positioned == POSITIONED) */ cbt->recno = cursor->recno; } else - ret = __cursor_col_modify( - session, cbt, WT_UPDATE_TOMBSTONE); + ret = __cursor_col_modify(cbt, WT_UPDATE_TOMBSTONE); } err: if (ret == WT_RESTART) { @@ -1139,7 +1150,7 @@ done: switch (positioned) { * Positioned and we did a search anyway, get a key to * return. */ - WT_TRET(__wt_key_return(session, cbt)); + WT_TRET(__wt_key_return(cbt)); break; } } @@ -1194,7 +1205,7 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type) WT_DECL_RET; WT_SESSION_IMPL *session; uint64_t yield_count, sleep_usecs; - bool valid; + bool leaf_found, valid; btree = cbt->btree; cursor = &cbt->iface; @@ -1227,8 +1238,8 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type) */ cbt->compare = 0; ret = btree->type == BTREE_ROW ? - __cursor_row_modify_v(session, cbt, value, modify_type) : - __cursor_col_modify_v(session, cbt, value, modify_type); + __cursor_row_modify_v(cbt, value, modify_type) : + __cursor_col_modify_v(cbt, value, modify_type); if (ret == 0) goto done; @@ -1253,11 +1264,38 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type) WT_ERR(__cursor_localvalue(cursor)); __cursor_state_save(cursor, &state); -retry: WT_ERR(__cursor_func_init(cbt, true)); + /* + * If our caller configures for a local search and we have a page + * pinned, do that search. + */ + if (F_ISSET(cursor, WT_CURSTD_UPDATE_LOCAL) + && __cursor_page_pinned(cbt)) { + __wt_txn_cursor_op(session); + WT_ERR(__wt_txn_autocommit_check(session)); + WT_ERR(btree->type == BTREE_ROW ? + __cursor_row_search(cbt, true, cbt->ref, &leaf_found) : + __cursor_col_search(cbt, cbt->ref, &leaf_found)); + /* + * Only use the pinned page search results if search returns an + * exact match or a slot other than the page's boundary slots, + * if that's not the case, a neighbor page might offer a better + * match. This test is simplistic as we're ignoring append + * lists (there may be no page slots or we might be + * legitimately positioned after the last page slot). Ignore + * those cases, it makes things too complicated. + */ + if (leaf_found && (cbt->compare == 0 + || (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1))) + goto update_local; + } +retry: + WT_ERR(__cursor_func_init(cbt, true)); + WT_ERR(btree->type == BTREE_ROW ? + __cursor_row_search(cbt, true, NULL, NULL) : + __cursor_col_search(cbt, NULL, NULL)); +update_local: if (btree->type == BTREE_ROW) { - WT_ERR(__cursor_row_search(session, cbt, NULL, true)); - /* * If not overwriting, check for conflicts and fail if the key * does not exist. @@ -1270,10 +1308,8 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); if (!valid) WT_ERR(WT_NOTFOUND); } - ret = __cursor_row_modify_v(session, cbt, value, modify_type); + ret = __cursor_row_modify_v(cbt, value, modify_type); } else { - WT_ERR(__cursor_col_search(session, cbt, NULL)); - /* * If not overwriting, fail if the key doesn't exist. If we * find an update for the key, check for conflicts. Update the @@ -1291,7 +1327,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); !__cursor_fix_implicit(btree, cbt)) WT_ERR(WT_NOTFOUND); } - ret = __cursor_col_modify_v(session, cbt, value, modify_type); + ret = __cursor_col_modify_v(cbt, value, modify_type); } err: if (ret == WT_RESTART) { @@ -1313,8 +1349,7 @@ done: switch (modify_type) { /* * WT_CURSOR.update returns a key and a value. */ - ret = __cursor_kv_return( - session, cbt, cbt->modify_update); + ret = __cursor_kv_return(cbt, cbt->modify_update); break; case WT_UPDATE_RESERVE: /* @@ -1327,7 +1362,7 @@ done: switch (modify_type) { * WT_CURSOR.modify has already created the return value * and our job is to leave it untouched. */ - ret = __wt_key_return(session, cbt); + ret = __wt_key_return(cbt); break; case WT_UPDATE_BIRTHMARK: case WT_UPDATE_TOMBSTONE: @@ -1670,13 +1705,15 @@ __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp) * tree. */ static int -__cursor_truncate(WT_SESSION_IMPL *session, +__cursor_truncate( WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop, - int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int)) + int (*rmfunc)(WT_CURSOR_BTREE *, u_int)) { WT_DECL_RET; + WT_SESSION_IMPL *session; uint64_t yield_count, sleep_usecs; + session = (WT_SESSION_IMPL *)start->iface.session; yield_count = sleep_usecs = 0; /* @@ -1698,12 +1735,13 @@ __cursor_truncate(WT_SESSION_IMPL *session, * instantiated the end cursor, so we know that page is pinned in memory * and we can proceed without concern. */ -retry: WT_ERR(__wt_btcur_search(start)); +retry: + WT_ERR(__wt_btcur_search(start)); WT_ASSERT(session, F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); for (;;) { - WT_ERR(rmfunc(session, start, WT_UPDATE_TOMBSTONE)); + WT_ERR(rmfunc(start, WT_UPDATE_TOMBSTONE)); if (stop != NULL && __cursor_equals(start, stop)) return (0); @@ -1727,14 +1765,16 @@ err: if (ret == WT_RESTART) { * Discard a cursor range from fixed-width column-store tree. */ static int -__cursor_truncate_fix(WT_SESSION_IMPL *session, +__cursor_truncate_fix( WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop, - int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int)) + int (*rmfunc)(WT_CURSOR_BTREE *, u_int)) { WT_DECL_RET; + WT_SESSION_IMPL *session; uint64_t yield_count, sleep_usecs; const uint8_t *value; + session = (WT_SESSION_IMPL *)start->iface.session; yield_count = sleep_usecs = 0; /* @@ -1763,7 +1803,7 @@ retry: WT_ERR(__wt_btcur_search(start)); for (;;) { value = (const uint8_t *)start->iface.value.data; if (*value != 0) - WT_ERR(rmfunc(session, start, WT_UPDATE_TOMBSTONE)); + WT_ERR(rmfunc(start, WT_UPDATE_TOMBSTONE)); if (stop != NULL && __cursor_equals(start, stop)) return (0); @@ -1797,6 +1837,7 @@ __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) btree = start->btree; WT_STAT_DATA_INCR(session, cursor_truncate); + WT_RET(__wt_txn_autocommit_check(session)); /* * For recovery, log the start and stop keys for a truncate operation, * not the individual records removed. On the other hand, for rollback @@ -1811,12 +1852,10 @@ __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) switch (btree->type) { case BTREE_COL_FIX: - WT_ERR(__cursor_truncate_fix( - session, start, stop, __cursor_col_modify)); + WT_ERR(__cursor_truncate_fix(start, stop, __cursor_col_modify)); break; case BTREE_COL_VAR: - WT_ERR(__cursor_truncate( - session, start, stop, __cursor_col_modify)); + WT_ERR(__cursor_truncate(start, stop, __cursor_col_modify)); break; case BTREE_ROW: /* @@ -1831,8 +1870,7 @@ __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) * that ever changes, we'd need to do something here to ensure a * fully instantiated cursor. */ - WT_ERR(__cursor_truncate( - session, start, stop, __cursor_row_modify)); + WT_ERR(__cursor_truncate(start, stop, __cursor_row_modify)); break; } |