summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/btree/bt_curnext.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/src/btree/bt_curnext.c')
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c52
1 files changed, 29 insertions, 23 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index 8506083e006..6953eb3dc02 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -343,10 +343,12 @@ __cursor_row_next(
WT_PAGE *page;
WT_ROW *rip;
WT_SESSION_IMPL *session;
+ bool prefix_search;
- session = CUR2S(cbt);
- page = cbt->ref->page;
key = &cbt->iface.key;
+ page = cbt->ref->page;
+ session = CUR2S(cbt);
+ prefix_search = prefix != NULL && F_ISSET(&cbt->iface, WT_CURSTD_PREFIX_SEARCH);
*skippedp = 0;
/* If restarting after a prepare conflict, jump to the right spot. */
@@ -394,6 +396,14 @@ restart_read_insert:
if ((ins = cbt->ins) != NULL) {
key->data = WT_INSERT_KEY(ins);
key->size = WT_INSERT_KEY_SIZE(ins);
+ /*
+ * If the cursor has prefix search configured we can early exit here if the key that we
+ * are visiting is after our prefix.
+ */
+ if (prefix_search && __wt_prefix_match(prefix, key) < 0) {
+ WT_STAT_CONN_DATA_INCR(session, cursor_search_near_prefix_fast_paths);
+ return (WT_NOTFOUND);
+ }
WT_RET(__wt_txn_read_upd_list(session, cbt, ins->upd));
if (cbt->upd_value->type == WT_UPDATE_INVALID) {
++*skippedp;
@@ -435,10 +445,7 @@ restart_read_page:
* If the cursor has prefix search configured we can early exit here if the key that we are
* visiting is after our prefix.
*/
- if (F_ISSET(&cbt->iface, WT_CURSTD_PREFIX_SEARCH) && prefix != NULL &&
- __wt_prefix_match(prefix, &cbt->iface.key) < 0) {
- /* It is not okay for the user to have a custom collator. */
- WT_ASSERT(session, CUR2BT(cbt)->collator == NULL);
+ if (prefix_search && __wt_prefix_match(prefix, &cbt->iface.key) < 0) {
WT_STAT_CONN_DATA_INCR(session, cursor_search_near_prefix_fast_paths);
return (WT_NOTFOUND);
}
@@ -681,13 +688,12 @@ __wt_btcur_next_prefix(WT_CURSOR_BTREE *cbt, WT_ITEM *prefix, bool truncating)
WT_DECL_RET;
WT_PAGE *page;
WT_SESSION_IMPL *session;
- size_t pages_skipped_count, total_skipped, skipped;
+ size_t total_skipped, skipped;
uint32_t flags;
bool newpage, restart;
cursor = &cbt->iface;
session = CUR2S(cbt);
- pages_skipped_count = 0;
total_skipped = 0;
WT_STAT_CONN_DATA_INCR(session, cursor_next);
@@ -707,7 +713,7 @@ __wt_btcur_next_prefix(WT_CURSOR_BTREE *cbt, WT_ITEM *prefix, bool truncating)
__wt_btcur_iterate_setup(cbt);
/*
- * Walk any page we're holding until the underlying call returns not- found. Then, move to the
+ * Walk any page we're holding until the underlying call returns not-found. Then, move to the
* next page, until we reach the end of the file.
*/
restart = F_ISSET(cbt, WT_CBT_ITERATE_RETRY_NEXT);
@@ -715,16 +721,6 @@ __wt_btcur_next_prefix(WT_CURSOR_BTREE *cbt, WT_ITEM *prefix, bool truncating)
for (newpage = false;; newpage = true, restart = false) {
page = cbt->ref == NULL ? NULL : cbt->ref->page;
- /*
- * Determine if all records on the page have been deleted and all the tombstones are visible
- * to our transaction. If so, we can avoid reading the records on the page and move to the
- * next page.
- */
- if (__wt_btcur_skip_page(cbt)) {
- pages_skipped_count++;
- goto skip_page;
- }
-
if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) {
/* The page cannot be NULL if the above flag is set. */
WT_ASSERT(session, page != NULL);
@@ -796,16 +792,26 @@ __wt_btcur_next_prefix(WT_CURSOR_BTREE *cbt, WT_ITEM *prefix, bool truncating)
WT_STAT_CONN_INCR(session, cache_eviction_force_delete);
}
cbt->page_deleted_count = 0;
-skip_page:
+
if (F_ISSET(cbt, WT_CBT_READ_ONCE))
LF_SET(WT_READ_WONT_NEED);
- WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
+
+ /*
+ * If we are running with snapshot isolation, and not interested in returning tombstones, we
+ * could potentially skip pages. The skip function looks at the aggregated timestamp
+ * information to determine if something is visible on the page. If nothing is, the page is
+ * skipped.
+ */
+ if (session->txn->isolation == WT_ISO_SNAPSHOT &&
+ !F_ISSET(&cbt->iface, WT_CURSTD_IGNORE_TOMBSTONE))
+ WT_ERR(
+ __wt_tree_walk_custom_skip(session, &cbt->ref, __wt_btcur_skip_page, NULL, flags));
+ else
+ WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND, false);
}
err:
- WT_STAT_CONN_DATA_INCRV(session, cursor_next_skip_page_count, pages_skipped_count);
-
if (total_skipped < 100)
WT_STAT_CONN_DATA_INCR(session, cursor_next_skip_lt_100);
else