summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/btree
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-01-13 14:25:10 +1100
committerLuke Chen <luke.chen@mongodb.com>2020-01-13 14:27:49 +1100
commitcf013cac38ee4b655c990fd0db341298a8d6d563 (patch)
tree064fef03b32ff4341d6cd3241fdf656024e07ef3 /src/third_party/wiredtiger/src/btree
parent462cc0e91455e0c18f40722ab1dab3ea2bdca581 (diff)
downloadmongo-cf013cac38ee4b655c990fd0db341298a8d6d563.tar.gz
Import wiredtiger: 54a846c423023183195dccc634aff4770f11ba54 from branch mongodb-4.0
ref: e651c9e274..54a846c423 for: 4.0.15 WT-4636 Fix strace in syscall test WT-5042 Reduce configuration parsing overhead from checkpoints WT-5106 Remove temporary files in clang-format script WT-5112 Handling goto labels with multiple words in s_goto.py WT-5120 Checkpoint hangs when reconciliation doesn't release the eviction generation WT-5125 Adding new stats for eviction target strategy WT-5135 Change lookaside file inserts to use cursor.insert WT-5136 Fix reading freed memory due to birthmark after uncommitted updates freed WT-5169 WT_REF_LIMBO pages cannot support fast (leaf-page only) searches WT-5196 Data mismatch failures with test/checkpoint after enabling LAS sweep WT-5218 Improve eviction to differentiate between clean and dirty pages with WT_CACHE_EVICT_NOKEEP readgen WT-5239 Fix syscall failure about metadata file open WT-5247 Ensure that only idempotent modify operations are logged WT-5277 Cursor key out-of-order detected in the lookaside file
Diffstat (limited to 'src/third_party/wiredtiger/src/btree')
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c30
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curprev.c25
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c220
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c22
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_random.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c10
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_ret.c25
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_slvg.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c12
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/col_modify.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/col_srch.c21
-rw-r--r--src/third_party/wiredtiger/src/btree/row_key.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/row_modify.c44
-rw-r--r--src/third_party/wiredtiger/src/btree/row_srch.c25
16 files changed, 265 insertions, 191 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index 1085d26315a..d5857f75bd3 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -60,7 +60,7 @@ __cursor_fix_append_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
cbt->v = 0;
cbt->iface.value.data = &cbt->v;
} else {
- restart_read:
+restart_read:
WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
if (upd == NULL) {
cbt->v = 0;
@@ -114,7 +114,7 @@ new_page:
if (cbt->ins != NULL && cbt->recno != WT_INSERT_RECNO(cbt->ins))
cbt->ins = NULL;
if (cbt->ins != NULL) {
- restart_read:
+restart_read:
WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
}
if (upd == NULL) {
@@ -149,12 +149,12 @@ __cursor_var_append_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
for (;;) {
cbt->ins = WT_SKIP_NEXT(cbt->ins);
- new_page:
+new_page:
if (cbt->ins == NULL)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
- restart_read:
+restart_read:
WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
if (upd == NULL)
continue;
@@ -163,7 +163,7 @@ __cursor_var_append_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
++cbt->page_deleted_count;
continue;
}
- return (__wt_value_return(session, cbt, upd));
+ return (__wt_value_return(cbt, upd));
}
/* NOTREACHED */
}
@@ -213,8 +213,8 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, cbt->recno + 1);
- new_page:
- restart_read:
+new_page:
+restart_read:
/* Find the matching WT_COL slot. */
if ((cip = __col_var_search(cbt->ref, cbt->recno, &rle_start)) == NULL)
return (WT_NOTFOUND);
@@ -232,7 +232,7 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
++cbt->page_deleted_count;
continue;
}
- return (__wt_value_return(session, cbt, upd));
+ return (__wt_value_return(cbt, upd));
}
/*
@@ -346,9 +346,9 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
if (cbt->ins != NULL)
cbt->ins = WT_SKIP_NEXT(cbt->ins);
- new_insert:
+new_insert:
cbt->iter_retry = WT_CBT_RETRY_INSERT;
- restart_read_insert:
+restart_read_insert:
if ((ins = cbt->ins) != NULL) {
WT_RET(__wt_txn_read(session, ins->upd, &upd));
if (upd == NULL)
@@ -360,7 +360,7 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
}
key->data = WT_INSERT_KEY(ins);
key->size = WT_INSERT_KEY_SIZE(ins);
- return (__wt_value_return(session, cbt, upd));
+ return (__wt_value_return(cbt, upd));
}
/* Check for the end of the page. */
@@ -381,7 +381,7 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
cbt->ins = NULL;
cbt->iter_retry = WT_CBT_RETRY_PAGE;
cbt->slot = cbt->row_iteration_slot / 2 - 1;
- restart_read_page:
+restart_read_page:
rip = &page->pg_row[cbt->slot];
WT_RET(__wt_txn_read(session, WT_ROW_UPDATE(page, rip), &upd));
if (upd != NULL && upd->type == WT_UPDATE_TOMBSTONE) {
@@ -489,8 +489,12 @@ __wt_cursor_key_order_check(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool
* Initialize key ordering checks for cursor movements after a successful search.
*/
int
-__wt_cursor_key_order_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
+__wt_cursor_key_order_init(WT_CURSOR_BTREE *cbt)
{
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+
/*
* Cursor searches set the position for cursor movements, set the last-key value for diagnostic
* checking.
diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c
index 9aa8d08345d..8abdc5b54e6 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curprev.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c
@@ -200,7 +200,7 @@ __cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
cbt->iface.value.data = &cbt->v;
} else {
upd = NULL;
- restart_read:
+restart_read:
WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
if (upd == NULL) {
cbt->v = 0;
@@ -254,7 +254,7 @@ new_page:
cbt->ins = NULL;
upd = NULL;
if (cbt->ins != NULL) {
- restart_read:
+restart_read:
WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
}
if (upd == NULL) {
@@ -289,12 +289,12 @@ __cursor_var_append_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
for (;;) {
WT_RET(__cursor_skip_prev(cbt));
- new_page:
+new_page:
if (cbt->ins == NULL)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
- restart_read:
+restart_read:
WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
if (upd == NULL)
continue;
@@ -303,7 +303,7 @@ __cursor_var_append_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
++cbt->page_deleted_count;
continue;
}
- return (__wt_value_return(session, cbt, upd));
+ return (__wt_value_return(cbt, upd));
}
/* NOTREACHED */
}
@@ -351,11 +351,12 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
for (;;) {
__cursor_set_recno(cbt, cbt->recno - 1);
- new_page:
+new_page:
if (cbt->recno < cbt->ref->ref_recno)
return (WT_NOTFOUND);
- restart_read: /* Find the matching WT_COL slot. */
+restart_read:
+ /* Find the matching WT_COL slot. */
if ((cip = __col_var_search(cbt->ref, cbt->recno, &rle_start)) == NULL)
return (WT_NOTFOUND);
cbt->slot = WT_COL_SLOT(page, cip);
@@ -372,7 +373,7 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
++cbt->page_deleted_count;
continue;
}
- return (__wt_value_return(session, cbt, upd));
+ return (__wt_value_return(cbt, upd));
}
/*
@@ -495,9 +496,9 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
if (cbt->ins != NULL)
WT_RET(__cursor_skip_prev(cbt));
- new_insert:
+new_insert:
cbt->iter_retry = WT_CBT_RETRY_INSERT;
- restart_read_insert:
+restart_read_insert:
if ((ins = cbt->ins) != NULL) {
WT_RET(__wt_txn_read(session, ins->upd, &upd));
if (upd == NULL)
@@ -509,7 +510,7 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
}
key->data = WT_INSERT_KEY(ins);
key->size = WT_INSERT_KEY_SIZE(ins);
- return (__wt_value_return(session, cbt, upd));
+ return (__wt_value_return(cbt, upd));
}
/* Check for the beginning of the page. */
@@ -533,7 +534,7 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart)
cbt->iter_retry = WT_CBT_RETRY_PAGE;
cbt->slot = cbt->row_iteration_slot / 2 - 1;
- restart_read_page:
+restart_read_page:
rip = &page->pg_row[cbt->slot];
WT_RET(__wt_txn_read(session, WT_ROW_UPDATE(page, rip), &upd));
if (upd != NULL && upd->type == WT_UPDATE_TOMBSTONE) {
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index dc4f30c073b..74dccc4098e 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -87,17 +87,16 @@ __cursor_page_pinned(WT_CURSOR_BTREE *cbt)
return (false);
/*
- * If we are doing an update, we need a page with history, release the page so we get it again
- * with history if required. Eviction may be locking the page, wait until we see a "normal"
- * state and then test against that state (eviction may have already locked the page again).
+ * We need a page with history: updates need complete update lists and a read might be based on
+ * a different timestamp than the one that brought the page into memory. Release the page and
+ * read it again with history if required. Eviction may be locking the page, wait until we see a
+ * "normal" state and then test against that state (eviction may have already locked the page
+ * again).
*/
- if (F_ISSET(&session->txn, WT_TXN_UPDATE)) {
- while ((current_state = cbt->ref->state) == WT_REF_LOCKED)
- __wt_yield();
- return (current_state == WT_REF_MEM);
- }
-
- return (true);
+ while ((current_state = cbt->ref->state) == WT_REF_LOCKED)
+ __wt_yield();
+ WT_ASSERT(session, current_state == WT_REF_LIMBO || current_state == WT_REF_MEM);
+ return (current_state == WT_REF_MEM);
}
/*
@@ -356,11 +355,14 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *valid)
* Column-store search from a cursor.
*/
static inline int
-__cursor_col_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_REF *leaf)
+__cursor_col_search(WT_CURSOR_BTREE *cbt, WT_REF *leaf, bool *leaf_foundp)
{
WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_WITH_PAGE_INDEX(session, ret = __wt_col_search(session, cbt->iface.recno, leaf, cbt, false));
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
+ WT_WITH_PAGE_INDEX(
+ session, ret = __wt_col_search(cbt, cbt->iface.recno, leaf, false, leaf_foundp));
return (ret);
}
@@ -369,12 +371,14 @@ __cursor_col_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_REF *leaf
* Row-store search from a cursor.
*/
static inline int
-__cursor_row_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_REF *leaf, bool insert)
+__cursor_row_search(WT_CURSOR_BTREE *cbt, bool insert, WT_REF *leaf, bool *leaf_foundp)
{
WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
WT_WITH_PAGE_INDEX(
- session, ret = __wt_row_search(session, &cbt->iface.key, leaf, cbt, insert, false));
+ session, ret = __wt_row_search(cbt, &cbt->iface.key, insert, leaf, false, leaf_foundp));
return (ret);
}
@@ -383,10 +387,9 @@ __cursor_row_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_REF *leaf
* Column-store modify from a cursor, with a separate value.
*/
static inline int
-__cursor_col_modify_v(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
+__cursor_col_modify_v(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
{
- return (__wt_col_modify(session, cbt, cbt->iface.recno, value, NULL, modify_type, false));
+ return (__wt_col_modify(cbt, cbt->iface.recno, value, NULL, modify_type, false));
}
/*
@@ -394,10 +397,9 @@ __cursor_col_modify_v(
* Row-store modify from a cursor, with a separate value.
*/
static inline int
-__cursor_row_modify_v(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
+__cursor_row_modify_v(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
{
- return (__wt_row_modify(session, cbt, &cbt->iface.key, value, NULL, modify_type, false));
+ return (__wt_row_modify(cbt, &cbt->iface.key, value, NULL, modify_type, false));
}
/*
@@ -405,10 +407,9 @@ __cursor_row_modify_v(
* Column-store modify from a cursor.
*/
static inline int
-__cursor_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type)
+__cursor_col_modify(WT_CURSOR_BTREE *cbt, u_int modify_type)
{
- return (
- __wt_col_modify(session, cbt, cbt->iface.recno, &cbt->iface.value, NULL, modify_type, false));
+ return (__wt_col_modify(cbt, cbt->iface.recno, &cbt->iface.value, NULL, modify_type, false));
}
/*
@@ -416,10 +417,9 @@ __cursor_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify
* Row-store modify from a cursor.
*/
static inline int
-__cursor_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type)
+__cursor_row_modify(WT_CURSOR_BTREE *cbt, u_int modify_type)
{
- return (
- __wt_row_modify(session, cbt, &cbt->iface.key, &cbt->iface.value, NULL, modify_type, false));
+ return (__wt_row_modify(cbt, &cbt->iface.key, &cbt->iface.value, NULL, modify_type, false));
}
/*
@@ -473,8 +473,8 @@ __wt_btcur_search_uncommitted(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
session = (WT_SESSION_IMPL *)cursor->session;
*updp = upd = NULL; /* -Wuninitialized */
- WT_RET(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, NULL, false) :
- __cursor_col_search(session, cbt, NULL));
+ WT_RET(btree->type == BTREE_ROW ? __cursor_row_search(cbt, false, NULL, NULL) :
+ __cursor_col_search(cbt, NULL, NULL));
/*
* Ideally exact match should be found, as this transaction has searched for updates done by
@@ -513,7 +513,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
WT_DECL_RET;
WT_SESSION_IMPL *session;
WT_UPDATE *upd;
- bool valid;
+ bool leaf_found, valid;
btree = cbt->btree;
cursor = &cbt->iface;
@@ -544,18 +544,18 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
if (__cursor_page_pinned(cbt)) {
__wt_txn_cursor_op(session);
- WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, cbt->ref, false) :
- __cursor_col_search(session, cbt, cbt->ref));
+ WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(cbt, false, cbt->ref, &leaf_found) :
+ __cursor_col_search(cbt, cbt->ref, &leaf_found));
/* Return, if prepare conflict encountered. */
- if (cbt->compare == 0)
+ if (leaf_found && cbt->compare == 0)
WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
}
if (!valid) {
WT_ERR(__cursor_func_init(cbt, true));
- WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, NULL, false) :
- __cursor_col_search(session, cbt, NULL));
+ WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(cbt, false, NULL, NULL) :
+ __cursor_col_search(cbt, NULL, NULL));
/* Return, if prepare conflict encountered. */
if (cbt->compare == 0)
@@ -563,7 +563,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
}
if (valid)
- ret = __cursor_kv_return(session, cbt, upd);
+ ret = __cursor_kv_return(cbt, upd);
else if (__cursor_fix_implicit(btree, cbt)) {
/*
* Creating a record past the end of the tree in a fixed-length column-store implicitly
@@ -580,7 +580,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
#ifdef HAVE_DIAGNOSTIC
if (ret == 0)
- WT_ERR(__wt_cursor_key_order_init(session, cbt));
+ WT_ERR(__wt_cursor_key_order_init(cbt));
#endif
err:
@@ -605,7 +605,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
WT_SESSION_IMPL *session;
WT_UPDATE *upd;
int exact;
- bool valid;
+ bool leaf_found, valid;
btree = cbt->btree;
cursor = &cbt->iface;
@@ -630,37 +630,42 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
__cursor_state_save(cursor, &state);
/*
- * If we have a row-store page pinned, search it; if we don't have a
- * page pinned, or the search of the pinned page doesn't find an exact
- * match, search from the root. Unlike WT_CURSOR.search, ignore pinned
- * pages in the case of column-store, search-near isn't an interesting
- * enough case for column-store to add the complexity needed to avoid
- * the tree search.
- *
- * Set the "insert" flag for the btree row-store search; we may intend
- * to position the cursor at the end of the tree, rather than match an
- * existing record.
+ * If we have a row-store page pinned, search it; if we don't have a page pinned, or the search
+ * of the pinned page doesn't find an exact match, search from the root. Unlike
+ * WT_CURSOR.search, ignore pinned pages in the case of column-store, search-near isn't an
+ * interesting enough case for column-store to add the complexity needed to avoid the tree
+ * search.
*/
valid = false;
if (btree->type == BTREE_ROW && __cursor_page_pinned(cbt)) {
__wt_txn_cursor_op(session);
- WT_ERR(__cursor_row_search(session, cbt, cbt->ref, true));
+ /*
+ * Set the "insert" flag for row-store search; we may intend to position the cursor at the
+ * the end of the tree, rather than match an existing record. (LSM requires this semantic.)
+ */
+ WT_ERR(__cursor_row_search(cbt, true, cbt->ref, &leaf_found));
/*
- * Search-near is trickier than search when searching an already pinned page. If search
- * returns the first or last page slots, discard the results and search the full tree as the
- * neighbor pages might offer better matches. This test is simplistic as we're ignoring
- * append lists (there may be no page slots or we might be legitimately positioned after the
- * last page slot). Ignore those cases, it makes things too complicated.
+ * Only use the pinned page search results if search returns an exact match or a slot other
+ * than the page's boundary slots, if that's not the case, a neighbor page might offer a
+ * better match. This test is simplistic as we're ignoring append lists (there may be no
+ * page slots or we might be legitimately positioned after the last page slot). Ignore those
+ * cases, it makes things too complicated.
*/
- if (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1)
+ if (leaf_found &&
+ (cbt->compare == 0 || (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1)))
WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
}
if (!valid) {
WT_ERR(__cursor_func_init(cbt, true));
- WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, NULL, true) :
- __cursor_col_search(session, cbt, NULL));
+
+ /*
+ * Set the "insert" flag for row-store search; we may intend to position the cursor at the
+ * the end of the tree, rather than match an existing record. (LSM requires this semantic.)
+ */
+ WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(cbt, true, NULL, NULL) :
+ __cursor_col_search(cbt, NULL, NULL));
WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
}
@@ -683,7 +688,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
*/
if (valid) {
exact = cbt->compare;
- ret = __cursor_kv_return(session, cbt, upd);
+ ret = __cursor_kv_return(cbt, upd);
} else if (__cursor_fix_implicit(btree, cbt)) {
cbt->recno = cursor->recno;
cbt->v = 0;
@@ -729,7 +734,7 @@ err:
#ifdef HAVE_DIAGNOSTIC
if (ret == 0)
- WT_TRET(__wt_cursor_key_order_init(session, cbt));
+ WT_TRET(__wt_cursor_key_order_init(cbt));
#endif
if (ret != 0) {
@@ -798,8 +803,8 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt)
* Correct to an exact match so we can update whatever we're pointing at.
*/
cbt->compare = 0;
- ret = btree->type == BTREE_ROW ? __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD) :
- __cursor_col_modify(session, cbt, WT_UPDATE_STANDARD);
+ ret = btree->type == BTREE_ROW ? __cursor_row_modify(cbt, WT_UPDATE_STANDARD) :
+ __cursor_col_modify(cbt, WT_UPDATE_STANDARD);
if (ret == 0)
goto done;
@@ -826,7 +831,7 @@ retry:
WT_ERR(__cursor_func_init(cbt, true));
if (btree->type == BTREE_ROW) {
- WT_ERR(__cursor_row_search(session, cbt, NULL, true));
+ WT_ERR(__cursor_row_search(cbt, true, NULL, NULL));
/*
* If not overwriting, fail if the key exists, else insert the key/value pair.
*/
@@ -836,7 +841,7 @@ retry:
WT_ERR(WT_DUPLICATE_KEY);
}
- ret = __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD);
+ ret = __cursor_row_modify(cbt, WT_UPDATE_STANDARD);
} else if (append_key) {
/*
* Optionally insert a new record (ignoring the application's record number). The real
@@ -844,11 +849,11 @@ retry:
*/
cbt->iface.recno = WT_RECNO_OOB;
cbt->compare = 1;
- WT_ERR(__cursor_col_search(session, cbt, NULL));
- WT_ERR(__cursor_col_modify(session, cbt, WT_UPDATE_STANDARD));
+ WT_ERR(__cursor_col_search(cbt, NULL, NULL));
+ WT_ERR(__cursor_col_modify(cbt, WT_UPDATE_STANDARD));
cursor->recno = cbt->recno;
} else {
- WT_ERR(__cursor_col_search(session, cbt, NULL));
+ WT_ERR(__cursor_col_search(cbt, NULL, NULL));
/*
* If not overwriting, fail if the key exists. Creating a record past the end of the tree in
@@ -864,7 +869,7 @@ retry:
WT_ERR(WT_DUPLICATE_KEY);
}
- WT_ERR(__cursor_col_modify(session, cbt, WT_UPDATE_STANDARD));
+ WT_ERR(__cursor_col_modify(cbt, WT_UPDATE_STANDARD));
}
err:
@@ -944,7 +949,7 @@ __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt)
retry:
WT_ERR(__cursor_func_init(cbt, true));
- WT_ERR(__cursor_row_search(session, cbt, NULL, true));
+ WT_ERR(__cursor_row_search(cbt, true, NULL, NULL));
/* Just check for conflicts. */
ret = __curfile_update_check(cbt);
@@ -1023,8 +1028,8 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt, bool positioned)
* Correct to an exact match so we can remove whatever we're pointing at.
*/
cbt->compare = 0;
- ret = btree->type == BTREE_ROW ? __cursor_row_modify(session, cbt, WT_UPDATE_TOMBSTONE) :
- __cursor_col_modify(session, cbt, WT_UPDATE_TOMBSTONE);
+ ret = btree->type == BTREE_ROW ? __cursor_row_modify(cbt, WT_UPDATE_TOMBSTONE) :
+ __cursor_col_modify(cbt, WT_UPDATE_TOMBSTONE);
if (ret == 0)
goto done;
goto err;
@@ -1046,7 +1051,7 @@ retry:
WT_ERR(__cursor_func_init(cbt, true));
if (btree->type == BTREE_ROW) {
- ret = __cursor_row_search(session, cbt, NULL, false);
+ ret = __cursor_row_search(cbt, false, NULL, NULL);
if (ret == WT_NOTFOUND)
goto search_notfound;
WT_ERR(ret);
@@ -1060,9 +1065,9 @@ retry:
if (!valid)
goto search_notfound;
- ret = __cursor_row_modify(session, cbt, WT_UPDATE_TOMBSTONE);
+ ret = __cursor_row_modify(cbt, WT_UPDATE_TOMBSTONE);
} else {
- ret = __cursor_col_search(session, cbt, NULL);
+ ret = __cursor_col_search(cbt, NULL, NULL);
if (ret == WT_NOTFOUND)
goto search_notfound;
WT_ERR(ret);
@@ -1092,7 +1097,7 @@ retry:
*/
cbt->recno = cursor->recno;
} else
- ret = __cursor_col_modify(session, cbt, WT_UPDATE_TOMBSTONE);
+ ret = __cursor_col_modify(cbt, WT_UPDATE_TOMBSTONE);
}
err:
@@ -1111,7 +1116,7 @@ err:
*/
if (positioned) {
if (searched)
- WT_TRET(__wt_key_return(session, cbt));
+ WT_TRET(__wt_key_return(cbt));
} else {
F_CLR(cursor, WT_CURSTD_KEY_SET);
WT_TRET(__cursor_reset(cbt));
@@ -1134,7 +1139,7 @@ err:
* subsequent iteration can succeed, we cannot return success.)
*/
if (0) {
- search_notfound:
+search_notfound:
ret = WT_NOTFOUND;
if (!iterating && !positioned && F_ISSET(cursor, WT_CURSTD_OVERWRITE))
ret = 0;
@@ -1174,7 +1179,7 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
WT_DECL_RET;
WT_SESSION_IMPL *session;
uint64_t yield_count, sleep_usecs;
- bool valid;
+ bool leaf_found, valid;
btree = cbt->btree;
cursor = &cbt->iface;
@@ -1205,8 +1210,8 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
* Correct to an exact match so we can update whatever we're pointing at.
*/
cbt->compare = 0;
- ret = btree->type == BTREE_ROW ? __cursor_row_modify_v(session, cbt, value, modify_type) :
- __cursor_col_modify_v(session, cbt, value, modify_type);
+ ret = btree->type == BTREE_ROW ? __cursor_row_modify_v(cbt, value, modify_type) :
+ __cursor_col_modify_v(cbt, value, modify_type);
if (ret == 0)
goto done;
@@ -1229,12 +1234,31 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
WT_ERR(__cursor_localvalue(cursor));
__cursor_state_save(cursor, &state);
+ /* If our caller configures for a local search and we have a page pinned, do that search. */
+ if (F_ISSET(cursor, WT_CURSTD_UPDATE_LOCAL) && __cursor_page_pinned(cbt)) {
+ __wt_txn_cursor_op(session);
+ WT_ERR(__wt_txn_autocommit_check(session));
+
+ WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(cbt, true, cbt->ref, &leaf_found) :
+ __cursor_col_search(cbt, cbt->ref, &leaf_found));
+ /*
+ * Only use the pinned page search results if search returns an exact match or a slot other
+ * than the page's boundary slots, if that's not the case, the record might belong on an
+ * entirely different page. This test is simplistic as we're ignoring append lists (there
+ * may be no page slots or we might be legitimately positioned after the last page slot).
+ * Ignore those cases, it makes things too complicated.
+ */
+ if (leaf_found &&
+ (cbt->compare == 0 || (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1)))
+ goto update_local;
+ }
+
retry:
WT_ERR(__cursor_func_init(cbt, true));
-
+ WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(cbt, true, NULL, NULL) :
+ __cursor_col_search(cbt, NULL, NULL));
+update_local:
if (btree->type == BTREE_ROW) {
- WT_ERR(__cursor_row_search(session, cbt, NULL, true));
-
/*
* If not overwriting, check for conflicts and fail if the key does not exist.
*/
@@ -1246,10 +1270,8 @@ retry:
if (!valid)
WT_ERR(WT_NOTFOUND);
}
- ret = __cursor_row_modify_v(session, cbt, value, modify_type);
+ ret = __cursor_row_modify_v(cbt, value, modify_type);
} else {
- WT_ERR(__cursor_col_search(session, cbt, NULL));
-
/*
* If not overwriting, fail if the key doesn't exist. If we find an update for the key,
* check for conflicts. Update the record if it exists. Creating a record past the end of
@@ -1264,7 +1286,7 @@ retry:
if ((cbt->compare != 0 || !valid) && !__cursor_fix_implicit(btree, cbt))
WT_ERR(WT_NOTFOUND);
}
- ret = __cursor_col_modify_v(session, cbt, value, modify_type);
+ ret = __cursor_col_modify_v(cbt, value, modify_type);
}
err:
@@ -1287,7 +1309,7 @@ done:
/*
* WT_CURSOR.update returns a key and a value.
*/
- ret = __cursor_kv_return(session, cbt, cbt->modify_update);
+ ret = __cursor_kv_return(cbt, cbt->modify_update);
break;
case WT_UPDATE_RESERVE:
/*
@@ -1300,7 +1322,7 @@ done:
* WT_CURSOR.modify has already created the return value and our job is to leave it
* untouched.
*/
- ret = __wt_key_return(session, cbt);
+ ret = __wt_key_return(cbt);
break;
case WT_UPDATE_BIRTHMARK:
case WT_UPDATE_TOMBSTONE:
@@ -1632,12 +1654,14 @@ __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp)
* Discard a cursor range from row-store or variable-width column-store tree.
*/
static int
-__cursor_truncate(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop,
- int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int))
+__cursor_truncate(
+ WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop, int (*rmfunc)(WT_CURSOR_BTREE *, u_int))
{
WT_DECL_RET;
+ WT_SESSION_IMPL *session;
uint64_t yield_count, sleep_usecs;
+ session = (WT_SESSION_IMPL *)start->iface.session;
yield_count = sleep_usecs = 0;
/*
@@ -1664,7 +1688,7 @@ retry:
WT_ASSERT(session, F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT);
for (;;) {
- WT_ERR(rmfunc(session, start, WT_UPDATE_TOMBSTONE));
+ WT_ERR(rmfunc(start, WT_UPDATE_TOMBSTONE));
if (stop != NULL && __cursor_equals(start, stop))
return (0);
@@ -1689,13 +1713,15 @@ err:
* Discard a cursor range from fixed-width column-store tree.
*/
static int
-__cursor_truncate_fix(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop,
- int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int))
+__cursor_truncate_fix(
+ WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop, int (*rmfunc)(WT_CURSOR_BTREE *, u_int))
{
WT_DECL_RET;
+ WT_SESSION_IMPL *session;
uint64_t yield_count, sleep_usecs;
const uint8_t *value;
+ session = (WT_SESSION_IMPL *)start->iface.session;
yield_count = sleep_usecs = 0;
/*
@@ -1724,7 +1750,7 @@ retry:
for (;;) {
value = (const uint8_t *)start->iface.value.data;
if (*value != 0)
- WT_ERR(rmfunc(session, start, WT_UPDATE_TOMBSTONE));
+ WT_ERR(rmfunc(start, WT_UPDATE_TOMBSTONE));
if (stop != NULL && __cursor_equals(start, stop))
return (0);
@@ -1759,6 +1785,8 @@ __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
btree = start->btree;
WT_STAT_DATA_INCR(session, cursor_truncate);
+ WT_RET(__wt_txn_autocommit_check(session));
+
/*
* For recovery, log the start and stop keys for a truncate operation,
* not the individual records removed. On the other hand, for rollback
@@ -1773,10 +1801,10 @@ __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
switch (btree->type) {
case BTREE_COL_FIX:
- WT_ERR(__cursor_truncate_fix(session, start, stop, __cursor_col_modify));
+ WT_ERR(__cursor_truncate_fix(start, stop, __cursor_col_modify));
break;
case BTREE_COL_VAR:
- WT_ERR(__cursor_truncate(session, start, stop, __cursor_col_modify));
+ WT_ERR(__cursor_truncate(start, stop, __cursor_col_modify));
break;
case BTREE_ROW:
/*
@@ -1788,7 +1816,7 @@ __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop)
* setting up the truncate so we're good to go: if that ever changes, we'd need to do
* something here to ensure a fully instantiated cursor.
*/
- WT_ERR(__cursor_truncate(session, start, stop, __cursor_row_modify));
+ WT_ERR(__cursor_truncate(start, stop, __cursor_row_modify));
break;
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index 3c9e4c260d0..e2d50a8745b 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -700,6 +700,28 @@ __wt_debug_cursor_page(void *cursor_arg, const char *ofile)
}
/*
+ * __wt_debug_cursor_las --
+ * Dump the LAS tree given a user cursor.
+ */
+int
+__wt_debug_cursor_las(void *cursor_arg, const char *ofile)
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_CURSOR *cursor;
+ WT_CURSOR_BTREE *cbt;
+ WT_SESSION_IMPL *las_session;
+
+ cursor = cursor_arg;
+ conn = S2C((WT_SESSION_IMPL *)cursor->session);
+ las_session = conn->cache->las_session[0];
+ if (las_session == NULL)
+ return (0);
+ cbt = (WT_CURSOR_BTREE *)las_session->las_cursor;
+ return (__wt_debug_tree_all(las_session, cbt->btree, NULL, ofile));
+}
+
+/*
* __debug_tree --
* Dump the in-memory information for a tree.
*/
diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c
index 525728b73dc..c0d4f342bb1 100644
--- a/src/third_party/wiredtiger/src/btree/bt_random.c
+++ b/src/third_party/wiredtiger/src/btree/bt_random.c
@@ -402,7 +402,7 @@ random_page_entry:
WT_ERR(__wt_row_random_leaf(session, cbt));
WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
if (valid)
- WT_ERR(__cursor_kv_return(session, cbt, upd));
+ WT_ERR(__cursor_kv_return(cbt, upd));
else {
if ((ret = __wt_btcur_next(cbt, false)) == WT_NOTFOUND)
ret = __wt_btcur_prev(cbt, false);
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index fc7a05f0083..cda1eee1eeb 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -31,8 +31,8 @@ __col_instantiate(
__wt_free_update_list(session, upd);
/* Search the page and add updates. */
- WT_RET(__wt_col_search(session, recno, ref, cbt, true));
- WT_RET(__wt_col_modify(session, cbt, recno, NULL, updlist, WT_UPDATE_INVALID, false));
+ WT_RET(__wt_col_search(cbt, recno, ref, true, NULL));
+ WT_RET(__wt_col_modify(cbt, recno, NULL, updlist, WT_UPDATE_INVALID, false));
return (0);
}
@@ -59,8 +59,8 @@ __row_instantiate(
__wt_free_update_list(session, upd);
/* Search the page and add updates. */
- WT_RET(__wt_row_search(session, key, ref, cbt, true, true));
- WT_RET(__wt_row_modify(session, cbt, key, NULL, updlist, WT_UPDATE_INVALID, false));
+ WT_RET(__wt_row_search(cbt, key, true, ref, true, NULL));
+ WT_RET(__wt_row_modify(cbt, key, NULL, updlist, WT_UPDATE_INVALID, false));
return (0);
}
@@ -752,7 +752,7 @@ read:
continue;
}
- skip_evict:
+skip_evict:
/*
* If we read the page and are configured to not trash
* the cache, and no other thread has already used the
diff --git a/src/third_party/wiredtiger/src/btree/bt_ret.c b/src/third_party/wiredtiger/src/btree/bt_ret.c
index 9c9dbe5c30a..bec7a1f7e5e 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ret.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ret.c
@@ -13,15 +13,17 @@
* Change the cursor to reference an internal return key.
*/
static inline int
-__key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
+__key_return(WT_CURSOR_BTREE *cbt)
{
WT_CURSOR *cursor;
WT_ITEM *tmp;
WT_PAGE *page;
WT_ROW *rip;
+ WT_SESSION_IMPL *session;
page = cbt->ref->page;
cursor = &cbt->iface;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
if (page->type == WT_PAGE_ROW_LEAF) {
rip = &page->pg_row[cbt->slot];
@@ -72,7 +74,7 @@ __key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
* Change the cursor to reference an internal original-page return value.
*/
static inline int
-__value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
+__value_return(WT_CURSOR_BTREE *cbt)
{
WT_BTREE *btree;
WT_CELL *cell;
@@ -80,8 +82,10 @@ __value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
WT_CURSOR *cursor;
WT_PAGE *page;
WT_ROW *rip;
+ WT_SESSION_IMPL *session;
uint8_t v;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
btree = S2BT(session);
page = cbt->ref->page;
@@ -123,17 +127,18 @@ __value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
* Change the cursor to reference an internal update structure return value.
*/
int
-__wt_value_return_upd(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd, bool ignore_visibility)
+__wt_value_return_upd(WT_CURSOR_BTREE *cbt, WT_UPDATE *upd, bool ignore_visibility)
{
WT_CURSOR *cursor;
WT_DECL_RET;
+ WT_SESSION_IMPL *session;
WT_UPDATE **listp, *list[WT_MODIFY_ARRAY_SIZE];
size_t allocated_bytes;
u_int i;
bool skipped_birthmark;
cursor = &cbt->iface;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
allocated_bytes = 0;
/*
@@ -213,7 +218,7 @@ __wt_value_return_upd(
*/
WT_ASSERT(session, cbt->slot != UINT32_MAX);
- WT_ERR(__value_return(session, cbt));
+ WT_ERR(__value_return(cbt));
}
} else if (upd->type == WT_UPDATE_TOMBSTONE)
WT_ERR(__wt_buf_set(session, &cursor->value, "", 0));
@@ -237,7 +242,7 @@ err:
* Change the cursor to reference an internal return key.
*/
int
-__wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
+__wt_key_return(WT_CURSOR_BTREE *cbt)
{
WT_CURSOR *cursor;
@@ -253,7 +258,7 @@ __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
*/
F_CLR(cursor, WT_CURSTD_KEY_EXT);
if (!F_ISSET(cursor, WT_CURSTD_KEY_INT)) {
- WT_RET(__key_return(session, cbt));
+ WT_RET(__key_return(cbt));
F_SET(cursor, WT_CURSTD_KEY_INT);
}
return (0);
@@ -264,7 +269,7 @@ __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
* Change the cursor to reference an internal return value.
*/
int
-__wt_value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
+__wt_value_return(WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
{
WT_CURSOR *cursor;
@@ -272,9 +277,9 @@ __wt_value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd
F_CLR(cursor, WT_CURSTD_VALUE_EXT);
if (upd == NULL)
- WT_RET(__value_return(session, cbt));
+ WT_RET(__value_return(cbt));
else
- WT_RET(__wt_value_return_upd(session, cbt, upd, false));
+ WT_RET(__wt_value_return_upd(cbt, upd, false));
F_SET(cursor, WT_CURSTD_VALUE_INT);
return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c
index fea2da9ae03..04ec016a3be 100644
--- a/src/third_party/wiredtiger/src/btree/bt_slvg.c
+++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c
@@ -869,7 +869,7 @@ __slvg_col_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_s
* Case #5: a_trk is a superset of b_trk and a_trk is more desirable -- discard b_trk.
*/
if (a_trk->trk_gen > b_trk->trk_gen) {
- delete_b:
+delete_b:
/*
* After page and overflow reconciliation, one (and only one)
* page can reference an overflow record. But, if we split a
@@ -1460,7 +1460,7 @@ __slvg_row_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_s
* Case #5: a_trk is a superset of b_trk and a_trk is more desirable -- discard b_trk.
*/
if (a_trk->trk_gen > b_trk->trk_gen) {
- delete_b:
+delete_b:
/*
* After page and overflow reconciliation, one (and only one)
* page can reference an overflow record. But, if we split a
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 5a7f9279bd4..80d523ae5ee 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -1425,10 +1425,10 @@ __split_multi_inmem(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_MULTI *multi, WT
recno = WT_INSERT_RECNO(supd->ins);
/* Search the page. */
- WT_ERR(__wt_col_search(session, recno, ref, &cbt, true));
+ WT_ERR(__wt_col_search(&cbt, recno, ref, true, NULL));
/* Apply the modification. */
- WT_ERR(__wt_col_modify(session, &cbt, recno, NULL, upd, WT_UPDATE_INVALID, true));
+ WT_ERR(__wt_col_modify(&cbt, recno, NULL, upd, WT_UPDATE_INVALID, true));
break;
case WT_PAGE_ROW_LEAF:
/* Build a key. */
@@ -1447,15 +1447,13 @@ __split_multi_inmem(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_MULTI *multi, WT
WT_ASSERT(session, __wt_count_birthmarks(upd) <= 1);
/* Search the page. */
- WT_ERR(__wt_row_search(session, key, ref, &cbt, true, true));
+ WT_ERR(__wt_row_search(&cbt, key, true, ref, true, NULL));
- /*
- * Birthmarks should only be applied to on-page values.
- */
+ /* Birthmarks should only be applied to on-page values. */
WT_ASSERT(session, cbt.compare == 0 || upd->type != WT_UPDATE_BIRTHMARK);
/* Apply the modification. */
- WT_ERR(__wt_row_modify(session, &cbt, key, NULL, upd, WT_UPDATE_INVALID, true));
+ WT_ERR(__wt_row_modify(&cbt, key, NULL, upd, WT_UPDATE_INVALID, true));
break;
default:
WT_ERR(__wt_illegal_value(session, orig->type));
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index d796436752d..6fa2dbda197 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -366,7 +366,7 @@ __verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_VSTUFF *vs)
goto recno_chk;
case WT_PAGE_COL_VAR:
recno = ref->ref_recno;
- recno_chk:
+recno_chk:
if (recno != vs->record_total + 1)
WT_RET_MSG(session, WT_ERROR, "page at %s has a starting record of %" PRIu64
" when the expected starting record is %" PRIu64,
@@ -422,7 +422,7 @@ __verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_VSTUFF *vs)
case WT_PAGE_COL_INT:
case WT_PAGE_ROW_INT:
if (unpack->raw != WT_CELL_ADDR_INT)
- celltype_err:
+celltype_err:
WT_RET_MSG(session, WT_ERROR,
"page at %s, of type %s, is referenced in "
"its parent by a cell of type %s",
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
index 603b379f928..831627d53e4 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
@@ -357,7 +357,7 @@ __verify_dsk_row(WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER
current->size = prefix + unpack->size;
}
- key_compare:
+key_compare:
/*
* Compare the current key against the last key.
*
@@ -550,7 +550,7 @@ __verify_dsk_col_var(WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HE
goto match_err;
} else if (cell_type == WT_CELL_VALUE && last_data != NULL && last_size == unpack->size &&
memcmp(last_data, unpack->data, last_size) == 0)
- match_err:
+match_err:
WT_RET_VRFY(session, "data entries %" PRIu32 " and %" PRIu32
" on page at %s are identical and should "
"have been run-length encoded",
diff --git a/src/third_party/wiredtiger/src/btree/col_modify.c b/src/third_party/wiredtiger/src/btree/col_modify.c
index 273797b19fc..3610ca6bbdc 100644
--- a/src/third_party/wiredtiger/src/btree/col_modify.c
+++ b/src/third_party/wiredtiger/src/btree/col_modify.c
@@ -15,8 +15,8 @@ static int __col_insert_alloc(WT_SESSION_IMPL *, uint64_t, u_int, WT_INSERT **,
* Column-store delete, insert, and update.
*/
int
-__wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno,
- const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive)
+__wt_col_modify(WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg,
+ u_int modify_type, bool exclusive)
{
static const WT_ITEM col_fix_remove = {"", 1, NULL, 0, 0};
WT_BTREE *btree;
@@ -25,6 +25,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno,
WT_INSERT_HEAD *ins_head, **ins_headp;
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
+ WT_SESSION_IMPL *session;
WT_UPDATE *old_upd, *upd;
size_t ins_size, upd_size;
u_int i, skipdepth;
@@ -33,6 +34,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno,
btree = cbt->btree;
ins = NULL;
page = cbt->ref->page;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
upd = upd_arg;
append = logged = false;
diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c
index f202dbd7f7b..2a74d37cb39 100644
--- a/src/third_party/wiredtiger/src/btree/col_srch.c
+++ b/src/third_party/wiredtiger/src/btree/col_srch.c
@@ -59,7 +59,7 @@ __check_leaf_key_range(WT_SESSION_IMPL *session, uint64_t recno, WT_REF *leaf, W
*/
int
__wt_col_search(
- WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool restore)
+ WT_CURSOR_BTREE *cbt, uint64_t search_recno, WT_REF *leaf, bool leaf_safe, bool *leaf_foundp)
{
WT_BTREE *btree;
WT_COL *cip;
@@ -69,10 +69,12 @@ __wt_col_search(
WT_PAGE *page;
WT_PAGE_INDEX *pindex, *parent_pindex;
WT_REF *current, *descent;
+ WT_SESSION_IMPL *session;
uint64_t recno;
uint32_t base, indx, limit, read_flags;
int depth;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
btree = S2BT(session);
current = NULL;
@@ -88,23 +90,18 @@ __wt_col_search(
/*
* We may be searching only a single leaf page, not the full tree. In the normal case where we
* are searching a tree, check the page's parent keys before doing the full search, it's faster
- * when the cursor is being re-positioned. Skip this if the page is being re-instantiated in
- * memory.
+ * when the cursor is being re-positioned. Skip that check if we know the page is the right one
+ * (for example, when re-instantiating a page in memory, in that case we know the target must be
+ * on the current page).
*/
if (leaf != NULL) {
WT_ASSERT(session, search_recno != WT_RECNO_OOB);
- if (!restore) {
+ if (!leaf_safe) {
WT_RET(__check_leaf_key_range(session, recno, leaf, cbt));
- if (cbt->compare != 0) {
- /*
- * !!!
- * WT_CURSOR.search_near uses the slot value to
- * decide if there was an on-page match.
- */
- cbt->slot = 0;
+ *leaf_foundp = cbt->compare == 0;
+ if (!*leaf_foundp)
return (0);
- }
}
current = leaf;
diff --git a/src/third_party/wiredtiger/src/btree/row_key.c b/src/third_party/wiredtiger/src/btree/row_key.c
index 35e8373ef6f..3c609e9344f 100644
--- a/src/third_party/wiredtiger/src/btree/row_key.c
+++ b/src/third_party/wiredtiger/src/btree/row_key.c
@@ -162,7 +162,7 @@ __wt_row_leaf_key_work(
direction = BACKWARD;
for (slot_offset = 0;;) {
if (0) {
- switch_and_jump:
+switch_and_jump:
/* Switching to a forward roll. */
WT_ASSERT(session, direction == BACKWARD);
direction = FORWARD;
diff --git a/src/third_party/wiredtiger/src/btree/row_modify.c b/src/third_party/wiredtiger/src/btree/row_modify.c
index c6c35de6e6f..7d9425b4ac0 100644
--- a/src/third_party/wiredtiger/src/btree/row_modify.c
+++ b/src/third_party/wiredtiger/src/btree/row_modify.c
@@ -41,14 +41,15 @@ err:
* Row-store insert, update and delete.
*/
int
-__wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key,
- const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive)
+__wt_row_modify(WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, WT_UPDATE *upd_arg,
+ u_int modify_type, bool exclusive)
{
WT_DECL_RET;
WT_INSERT *ins;
WT_INSERT_HEAD *ins_head, **ins_headp;
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
+ WT_SESSION_IMPL *session;
WT_UPDATE *old_upd, *upd, **upd_entry;
size_t ins_size, upd_size;
uint32_t ins_slot;
@@ -57,6 +58,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *k
ins = NULL;
page = cbt->ref->page;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
upd = upd_arg;
logged = false;
@@ -295,6 +297,7 @@ __wt_update_obsolete_check(
WT_UPDATE *first, *next, *prev;
size_t size;
u_int count;
+ bool upd_visible_all_seen;
txn_global = &S2C(session)->txn_global;
@@ -309,21 +312,38 @@ __wt_update_obsolete_check(
* Only updates with globally visible, self-contained data can terminate
* update chains.
*
- * Birthmarks are a special case: once a birthmark becomes obsolete, it
- * can be discarded and subsequent reads will see the on-page value (as
- * expected). Inserting updates into the lookaside table relies on
- * this behavior to avoid creating update chains with multiple
- * birthmarks.
+ * Birthmarks are a special case: once a birthmark becomes obsolete, it can be discarded if
+ * there is a globally visible update before it and subsequent reads will see the on-page value
+ * (as expected). Inserting updates into the lookaside table relies on this behavior to avoid
+ * creating update chains with multiple birthmarks. We cannot discard the birthmark if it's the
+ * first globally visible update as the previous updates can be aborted and be freed causing the
+ * entire update chain being removed.
*/
- for (first = prev = NULL, count = 0; upd != NULL; prev = upd, upd = upd->next, count++) {
+ for (first = prev = NULL, upd_visible_all_seen = false, count = 0; upd != NULL;
+ prev = upd, upd = upd->next, count++) {
if (upd->txnid == WT_TXN_ABORTED)
continue;
+
if (!__wt_txn_upd_visible_all(session, upd))
first = NULL;
- else if (first == NULL && upd->type == WT_UPDATE_BIRTHMARK)
- first = prev;
- else if (first == NULL && WT_UPDATE_DATA_VALUE(upd))
- first = upd;
+ else {
+ if (first == NULL) {
+ /*
+ * If we have seen a globally visible update before the birthmark, the birthmark can
+ * be discarded.
+ */
+ if (upd_visible_all_seen && upd->type == WT_UPDATE_BIRTHMARK)
+ first = prev;
+ /*
+ * We cannot discard the birthmark if it is the first globally visible update as the
+ * previous updates can be aborted resulting the entire update chain being removed.
+ */
+ else if (upd->type == WT_UPDATE_BIRTHMARK || WT_UPDATE_DATA_VALUE(upd))
+ first = upd;
+ }
+
+ upd_visible_all_seen = true;
+ }
}
/*
diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c
index a7a9c282564..87d11f84b83 100644
--- a/src/third_party/wiredtiger/src/btree/row_srch.c
+++ b/src/third_party/wiredtiger/src/btree/row_srch.c
@@ -199,8 +199,8 @@ __check_leaf_key_range(
* Search a row-store tree for a specific key.
*/
int
-__wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt,
- bool insert, bool restore)
+__wt_row_search(WT_CURSOR_BTREE *cbt, WT_ITEM *srch_key, bool insert, WT_REF *leaf, bool leaf_safe,
+ bool *leaf_foundp)
{
WT_BTREE *btree;
WT_COLLATOR *collator;
@@ -211,11 +211,13 @@ __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CU
WT_PAGE_INDEX *pindex, *parent_pindex;
WT_REF *current, *descent;
WT_ROW *rip;
+ WT_SESSION_IMPL *session;
size_t match, skiphigh, skiplow;
uint32_t base, indx, limit, read_flags;
int cmp, depth;
bool append_check, descend_right, done;
+ session = (WT_SESSION_IMPL *)cbt->iface.session;
btree = S2BT(session);
collator = btree->collator;
item = cbt->tmp;
@@ -245,21 +247,16 @@ __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CU
/*
* We may be searching only a single leaf page, not the full tree. In the normal case where we
* are searching a tree, check the page's parent keys before doing the full search, it's faster
- * when the cursor is being re-positioned. Skip this if the page is being re-instantiated in
- * memory.
+ * when the cursor is being re-positioned. Skip that check if we know the page is the right one
+ * (for example, when re-instantiating a page in memory, in that case we know the target must be
+ * on the current page).
*/
if (leaf != NULL) {
- if (!restore) {
+ if (!leaf_safe) {
WT_RET(__check_leaf_key_range(session, srch_key, leaf, cbt));
- if (cbt->compare != 0) {
- /*
- * !!!
- * WT_CURSOR.search_near uses the slot value to
- * decide if there was an on-page match.
- */
- cbt->slot = 0;
+ *leaf_foundp = cbt->compare == 0;
+ if (!*leaf_foundp)
return (0);
- }
}
current = leaf;
@@ -540,7 +537,7 @@ leaf_only:
* read-mostly workload. Check that case and get out fast.
*/
if (0) {
- leaf_match:
+leaf_match:
cbt->compare = 0;
cbt->slot = WT_ROW_SLOT(page, rip);
return (0);