summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/btree/bt_curnext.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/src/btree/bt_curnext.c')
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c48
1 files changed, 43 insertions, 5 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index d80a5f4740d..c4cbde25d12 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -165,14 +165,18 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, int newpage)
WT_CELL_UNPACK unpack;
WT_COL *cip;
WT_ITEM *val;
+ WT_INSERT *ins;
WT_PAGE *page;
WT_SESSION_IMPL *session;
WT_UPDATE *upd;
+ uint64_t rle, rle_start;
session = (WT_SESSION_IMPL *)cbt->iface.session;
page = cbt->ref->page;
val = &cbt->iface.value;
+ rle_start = 0; /* -Werror=maybe-uninitialized */
+
/* Initialize for each new page. */
if (newpage) {
cbt->last_standard_recno = __col_var_last_recno(page);
@@ -189,7 +193,8 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, int newpage)
__cursor_set_recno(cbt, cbt->recno + 1);
new_page: /* Find the matching WT_COL slot. */
- if ((cip = __col_var_search(page, cbt->recno)) == NULL)
+ if ((cip =
+ __col_var_search(page, cbt->recno, &rle_start)) == NULL)
return (WT_NOTFOUND);
cbt->slot = WT_COL_SLOT(page, cip);
@@ -220,15 +225,48 @@ new_page: /* Find the matching WT_COL slot. */
if ((cell = WT_COL_PTR(page, cip)) == NULL)
continue;
__wt_cell_unpack(cell, &unpack);
- if (unpack.type == WT_CELL_DEL)
+ if (unpack.type == WT_CELL_DEL) {
+ if ((rle = __wt_cell_rle(&unpack)) == 1)
+ continue;
+
+ /*
+ * There can be huge gaps in the variable-length
+ * column-store name space appearing as deleted
+ * records. If more than one deleted record, do
+ * the work of finding the next record to return
+ * instead of looping through the records.
+ *
+ * First, find the smallest record in the update
+ * list that's larger than the current record.
+ */
+ ins = __col_insert_search_gt(
+ cbt->ins_head, cbt->recno);
+
+ /*
+ * Second, for records with RLEs greater than 1,
+ * the above call to __col_var_search located
+ * this record in the page's list of repeating
+ * records, and returned the starting record.
+ * The starting record plus the RLE is the
+ * record to which we could skip, if there was
+ * no smaller record in the update list.
+ */
+ cbt->recno = rle_start + rle;
+ if (ins != NULL &&
+ WT_INSERT_RECNO(ins) < cbt->recno)
+ cbt->recno = WT_INSERT_RECNO(ins);
+
+ /* Adjust for the outer loop increment. */
+ --cbt->recno;
continue;
+ }
WT_RET(__wt_page_cell_data_ref(
- session, page, &unpack, &cbt->tmp));
+ session, page, &unpack, cbt->tmp));
cbt->cip_saved = cip;
}
- val->data = cbt->tmp.data;
- val->size = cbt->tmp.size;
+ val->data = cbt->tmp->data;
+ val->size = cbt->tmp->size;
return (0);
}
/* NOTREACHED */