diff options
author | Luke Chen <luke.chen@mongodb.com> | 2020-01-13 14:25:10 +1100 |
---|---|---|
committer | Luke Chen <luke.chen@mongodb.com> | 2020-01-13 14:27:49 +1100 |
commit | cf013cac38ee4b655c990fd0db341298a8d6d563 (patch) | |
tree | 064fef03b32ff4341d6cd3241fdf656024e07ef3 /src/third_party/wiredtiger/src/btree | |
parent | 462cc0e91455e0c18f40722ab1dab3ea2bdca581 (diff) | |
download | mongo-cf013cac38ee4b655c990fd0db341298a8d6d563.tar.gz |
Import wiredtiger: 54a846c423023183195dccc634aff4770f11ba54 from branch mongodb-4.0
ref: e651c9e274..54a846c423
for: 4.0.15
WT-4636 Fix strace in syscall test
WT-5042 Reduce configuration parsing overhead from checkpoints
WT-5106 Remove temporary files in clang-format script
WT-5112 Handling goto labels with multiple words in s_goto.py
WT-5120 Checkpoint hangs when reconciliation doesn't release the eviction generation
WT-5125 Adding new stats for eviction target strategy
WT-5135 Change lookaside file inserts to use cursor.insert
WT-5136 Fix reading freed memory due to birthmark after uncommitted updates freed
WT-5169 WT_REF_LIMBO pages cannot support fast (leaf-page only) searches
WT-5196 Data mismatch failures with test/checkpoint after enabling LAS sweep
WT-5218 Improve eviction to differentiate between clean and dirty pages with WT_CACHE_EVICT_NOKEEP readgen
WT-5239 Fix syscall failure about metadata file open
WT-5247 Ensure that only idempotent modify operations are logged
WT-5277 Cursor key out-of-order detected in the lookaside file
Diffstat (limited to 'src/third_party/wiredtiger/src/btree')
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_curnext.c | 30 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_curprev.c | 25 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_cursor.c | 220 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_debug.c | 22 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_random.c | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_read.c | 10 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_ret.c | 25 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_slvg.c | 4 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_split.c | 12 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_vrfy.c | 4 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c | 4 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/col_modify.c | 6 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/col_srch.c | 21 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/row_key.c | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/row_modify.c | 44 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/row_srch.c | 25 |
16 files changed, 265 insertions, 191 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c index 1085d26315a..d5857f75bd3 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curnext.c +++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c @@ -60,7 +60,7 @@ __cursor_fix_append_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) cbt->v = 0; cbt->iface.value.data = &cbt->v; } else { - restart_read: +restart_read: WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd)); if (upd == NULL) { cbt->v = 0; @@ -114,7 +114,7 @@ new_page: if (cbt->ins != NULL && cbt->recno != WT_INSERT_RECNO(cbt->ins)) cbt->ins = NULL; if (cbt->ins != NULL) { - restart_read: +restart_read: WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd)); } if (upd == NULL) { @@ -149,12 +149,12 @@ __cursor_var_append_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) for (;;) { cbt->ins = WT_SKIP_NEXT(cbt->ins); - new_page: +new_page: if (cbt->ins == NULL) return (WT_NOTFOUND); __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins)); - restart_read: +restart_read: WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd)); if (upd == NULL) continue; @@ -163,7 +163,7 @@ __cursor_var_append_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) ++cbt->page_deleted_count; continue; } - return (__wt_value_return(session, cbt, upd)); + return (__wt_value_return(cbt, upd)); } /* NOTREACHED */ } @@ -213,8 +213,8 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) return (WT_NOTFOUND); __cursor_set_recno(cbt, cbt->recno + 1); - new_page: - restart_read: +new_page: +restart_read: /* Find the matching WT_COL slot. */ if ((cip = __col_var_search(cbt->ref, cbt->recno, &rle_start)) == NULL) return (WT_NOTFOUND); @@ -232,7 +232,7 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) ++cbt->page_deleted_count; continue; } - return (__wt_value_return(session, cbt, upd)); + return (__wt_value_return(cbt, upd)); } /* @@ -346,9 +346,9 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) if (cbt->ins != NULL) cbt->ins = WT_SKIP_NEXT(cbt->ins); - new_insert: +new_insert: cbt->iter_retry = WT_CBT_RETRY_INSERT; - restart_read_insert: +restart_read_insert: if ((ins = cbt->ins) != NULL) { WT_RET(__wt_txn_read(session, ins->upd, &upd)); if (upd == NULL) @@ -360,7 +360,7 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) } key->data = WT_INSERT_KEY(ins); key->size = WT_INSERT_KEY_SIZE(ins); - return (__wt_value_return(session, cbt, upd)); + return (__wt_value_return(cbt, upd)); } /* Check for the end of the page. */ @@ -381,7 +381,7 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) cbt->ins = NULL; cbt->iter_retry = WT_CBT_RETRY_PAGE; cbt->slot = cbt->row_iteration_slot / 2 - 1; - restart_read_page: +restart_read_page: rip = &page->pg_row[cbt->slot]; WT_RET(__wt_txn_read(session, WT_ROW_UPDATE(page, rip), &upd)); if (upd != NULL && upd->type == WT_UPDATE_TOMBSTONE) { @@ -489,8 +489,12 @@ __wt_cursor_key_order_check(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool * Initialize key ordering checks for cursor movements after a successful search. */ int -__wt_cursor_key_order_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) +__wt_cursor_key_order_init(WT_CURSOR_BTREE *cbt) { + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)cbt->iface.session; + /* * Cursor searches set the position for cursor movements, set the last-key value for diagnostic * checking. diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c index 9aa8d08345d..8abdc5b54e6 100644 --- a/src/third_party/wiredtiger/src/btree/bt_curprev.c +++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c @@ -200,7 +200,7 @@ __cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) cbt->iface.value.data = &cbt->v; } else { upd = NULL; - restart_read: +restart_read: WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd)); if (upd == NULL) { cbt->v = 0; @@ -254,7 +254,7 @@ new_page: cbt->ins = NULL; upd = NULL; if (cbt->ins != NULL) { - restart_read: +restart_read: WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd)); } if (upd == NULL) { @@ -289,12 +289,12 @@ __cursor_var_append_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) for (;;) { WT_RET(__cursor_skip_prev(cbt)); - new_page: +new_page: if (cbt->ins == NULL) return (WT_NOTFOUND); __cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins)); - restart_read: +restart_read: WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd)); if (upd == NULL) continue; @@ -303,7 +303,7 @@ __cursor_var_append_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) ++cbt->page_deleted_count; continue; } - return (__wt_value_return(session, cbt, upd)); + return (__wt_value_return(cbt, upd)); } /* NOTREACHED */ } @@ -351,11 +351,12 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) for (;;) { __cursor_set_recno(cbt, cbt->recno - 1); - new_page: +new_page: if (cbt->recno < cbt->ref->ref_recno) return (WT_NOTFOUND); - restart_read: /* Find the matching WT_COL slot. */ +restart_read: + /* Find the matching WT_COL slot. */ if ((cip = __col_var_search(cbt->ref, cbt->recno, &rle_start)) == NULL) return (WT_NOTFOUND); cbt->slot = WT_COL_SLOT(page, cip); @@ -372,7 +373,7 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) ++cbt->page_deleted_count; continue; } - return (__wt_value_return(session, cbt, upd)); + return (__wt_value_return(cbt, upd)); } /* @@ -495,9 +496,9 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) if (cbt->ins != NULL) WT_RET(__cursor_skip_prev(cbt)); - new_insert: +new_insert: cbt->iter_retry = WT_CBT_RETRY_INSERT; - restart_read_insert: +restart_read_insert: if ((ins = cbt->ins) != NULL) { WT_RET(__wt_txn_read(session, ins->upd, &upd)); if (upd == NULL) @@ -509,7 +510,7 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) } key->data = WT_INSERT_KEY(ins); key->size = WT_INSERT_KEY_SIZE(ins); - return (__wt_value_return(session, cbt, upd)); + return (__wt_value_return(cbt, upd)); } /* Check for the beginning of the page. */ @@ -533,7 +534,7 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage, bool restart) cbt->iter_retry = WT_CBT_RETRY_PAGE; cbt->slot = cbt->row_iteration_slot / 2 - 1; - restart_read_page: +restart_read_page: rip = &page->pg_row[cbt->slot]; WT_RET(__wt_txn_read(session, WT_ROW_UPDATE(page, rip), &upd)); if (upd != NULL && upd->type == WT_UPDATE_TOMBSTONE) { diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c index dc4f30c073b..74dccc4098e 100644 --- a/src/third_party/wiredtiger/src/btree/bt_cursor.c +++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c @@ -87,17 +87,16 @@ __cursor_page_pinned(WT_CURSOR_BTREE *cbt) return (false); /* - * If we are doing an update, we need a page with history, release the page so we get it again - * with history if required. Eviction may be locking the page, wait until we see a "normal" - * state and then test against that state (eviction may have already locked the page again). + * We need a page with history: updates need complete update lists and a read might be based on + * a different timestamp than the one that brought the page into memory. Release the page and + * read it again with history if required. Eviction may be locking the page, wait until we see a + * "normal" state and then test against that state (eviction may have already locked the page + * again). */ - if (F_ISSET(&session->txn, WT_TXN_UPDATE)) { - while ((current_state = cbt->ref->state) == WT_REF_LOCKED) - __wt_yield(); - return (current_state == WT_REF_MEM); - } - - return (true); + while ((current_state = cbt->ref->state) == WT_REF_LOCKED) + __wt_yield(); + WT_ASSERT(session, current_state == WT_REF_LIMBO || current_state == WT_REF_MEM); + return (current_state == WT_REF_MEM); } /* @@ -356,11 +355,14 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *valid) * Column-store search from a cursor. */ static inline int -__cursor_col_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_REF *leaf) +__cursor_col_search(WT_CURSOR_BTREE *cbt, WT_REF *leaf, bool *leaf_foundp) { WT_DECL_RET; + WT_SESSION_IMPL *session; - WT_WITH_PAGE_INDEX(session, ret = __wt_col_search(session, cbt->iface.recno, leaf, cbt, false)); + session = (WT_SESSION_IMPL *)cbt->iface.session; + WT_WITH_PAGE_INDEX( + session, ret = __wt_col_search(cbt, cbt->iface.recno, leaf, false, leaf_foundp)); return (ret); } @@ -369,12 +371,14 @@ __cursor_col_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_REF *leaf * Row-store search from a cursor. */ static inline int -__cursor_row_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_REF *leaf, bool insert) +__cursor_row_search(WT_CURSOR_BTREE *cbt, bool insert, WT_REF *leaf, bool *leaf_foundp) { WT_DECL_RET; + WT_SESSION_IMPL *session; + session = (WT_SESSION_IMPL *)cbt->iface.session; WT_WITH_PAGE_INDEX( - session, ret = __wt_row_search(session, &cbt->iface.key, leaf, cbt, insert, false)); + session, ret = __wt_row_search(cbt, &cbt->iface.key, insert, leaf, false, leaf_foundp)); return (ret); } @@ -383,10 +387,9 @@ __cursor_row_search(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_REF *leaf * Column-store modify from a cursor, with a separate value. */ static inline int -__cursor_col_modify_v( - WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type) +__cursor_col_modify_v(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type) { - return (__wt_col_modify(session, cbt, cbt->iface.recno, value, NULL, modify_type, false)); + return (__wt_col_modify(cbt, cbt->iface.recno, value, NULL, modify_type, false)); } /* @@ -394,10 +397,9 @@ __cursor_col_modify_v( * Row-store modify from a cursor, with a separate value. */ static inline int -__cursor_row_modify_v( - WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type) +__cursor_row_modify_v(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type) { - return (__wt_row_modify(session, cbt, &cbt->iface.key, value, NULL, modify_type, false)); + return (__wt_row_modify(cbt, &cbt->iface.key, value, NULL, modify_type, false)); } /* @@ -405,10 +407,9 @@ __cursor_row_modify_v( * Column-store modify from a cursor. */ static inline int -__cursor_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type) +__cursor_col_modify(WT_CURSOR_BTREE *cbt, u_int modify_type) { - return ( - __wt_col_modify(session, cbt, cbt->iface.recno, &cbt->iface.value, NULL, modify_type, false)); + return (__wt_col_modify(cbt, cbt->iface.recno, &cbt->iface.value, NULL, modify_type, false)); } /* @@ -416,10 +417,9 @@ __cursor_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify * Row-store modify from a cursor. */ static inline int -__cursor_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, u_int modify_type) +__cursor_row_modify(WT_CURSOR_BTREE *cbt, u_int modify_type) { - return ( - __wt_row_modify(session, cbt, &cbt->iface.key, &cbt->iface.value, NULL, modify_type, false)); + return (__wt_row_modify(cbt, &cbt->iface.key, &cbt->iface.value, NULL, modify_type, false)); } /* @@ -473,8 +473,8 @@ __wt_btcur_search_uncommitted(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp) session = (WT_SESSION_IMPL *)cursor->session; *updp = upd = NULL; /* -Wuninitialized */ - WT_RET(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, NULL, false) : - __cursor_col_search(session, cbt, NULL)); + WT_RET(btree->type == BTREE_ROW ? __cursor_row_search(cbt, false, NULL, NULL) : + __cursor_col_search(cbt, NULL, NULL)); /* * Ideally exact match should be found, as this transaction has searched for updates done by @@ -513,7 +513,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_DECL_RET; WT_SESSION_IMPL *session; WT_UPDATE *upd; - bool valid; + bool leaf_found, valid; btree = cbt->btree; cursor = &cbt->iface; @@ -544,18 +544,18 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) if (__cursor_page_pinned(cbt)) { __wt_txn_cursor_op(session); - WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, cbt->ref, false) : - __cursor_col_search(session, cbt, cbt->ref)); + WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(cbt, false, cbt->ref, &leaf_found) : + __cursor_col_search(cbt, cbt->ref, &leaf_found)); /* Return, if prepare conflict encountered. */ - if (cbt->compare == 0) + if (leaf_found && cbt->compare == 0) WT_ERR(__wt_cursor_valid(cbt, &upd, &valid)); } if (!valid) { WT_ERR(__cursor_func_init(cbt, true)); - WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, NULL, false) : - __cursor_col_search(session, cbt, NULL)); + WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(cbt, false, NULL, NULL) : + __cursor_col_search(cbt, NULL, NULL)); /* Return, if prepare conflict encountered. */ if (cbt->compare == 0) @@ -563,7 +563,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) } if (valid) - ret = __cursor_kv_return(session, cbt, upd); + ret = __cursor_kv_return(cbt, upd); else if (__cursor_fix_implicit(btree, cbt)) { /* * Creating a record past the end of the tree in a fixed-length column-store implicitly @@ -580,7 +580,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt) #ifdef HAVE_DIAGNOSTIC if (ret == 0) - WT_ERR(__wt_cursor_key_order_init(session, cbt)); + WT_ERR(__wt_cursor_key_order_init(cbt)); #endif err: @@ -605,7 +605,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_SESSION_IMPL *session; WT_UPDATE *upd; int exact; - bool valid; + bool leaf_found, valid; btree = cbt->btree; cursor = &cbt->iface; @@ -630,37 +630,42 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) __cursor_state_save(cursor, &state); /* - * If we have a row-store page pinned, search it; if we don't have a - * page pinned, or the search of the pinned page doesn't find an exact - * match, search from the root. Unlike WT_CURSOR.search, ignore pinned - * pages in the case of column-store, search-near isn't an interesting - * enough case for column-store to add the complexity needed to avoid - * the tree search. - * - * Set the "insert" flag for the btree row-store search; we may intend - * to position the cursor at the end of the tree, rather than match an - * existing record. + * If we have a row-store page pinned, search it; if we don't have a page pinned, or the search + * of the pinned page doesn't find an exact match, search from the root. Unlike + * WT_CURSOR.search, ignore pinned pages in the case of column-store, search-near isn't an + * interesting enough case for column-store to add the complexity needed to avoid the tree + * search. */ valid = false; if (btree->type == BTREE_ROW && __cursor_page_pinned(cbt)) { __wt_txn_cursor_op(session); - WT_ERR(__cursor_row_search(session, cbt, cbt->ref, true)); + /* + * Set the "insert" flag for row-store search; we may intend to position the cursor at the + * the end of the tree, rather than match an existing record. (LSM requires this semantic.) + */ + WT_ERR(__cursor_row_search(cbt, true, cbt->ref, &leaf_found)); /* - * Search-near is trickier than search when searching an already pinned page. If search - * returns the first or last page slots, discard the results and search the full tree as the - * neighbor pages might offer better matches. This test is simplistic as we're ignoring - * append lists (there may be no page slots or we might be legitimately positioned after the - * last page slot). Ignore those cases, it makes things too complicated. + * Only use the pinned page search results if search returns an exact match or a slot other + * than the page's boundary slots, if that's not the case, a neighbor page might offer a + * better match. This test is simplistic as we're ignoring append lists (there may be no + * page slots or we might be legitimately positioned after the last page slot). Ignore those + * cases, it makes things too complicated. */ - if (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1) + if (leaf_found && + (cbt->compare == 0 || (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1))) WT_ERR(__wt_cursor_valid(cbt, &upd, &valid)); } if (!valid) { WT_ERR(__cursor_func_init(cbt, true)); - WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, NULL, true) : - __cursor_col_search(session, cbt, NULL)); + + /* + * Set the "insert" flag for row-store search; we may intend to position the cursor at the + * the end of the tree, rather than match an existing record. (LSM requires this semantic.) + */ + WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(cbt, true, NULL, NULL) : + __cursor_col_search(cbt, NULL, NULL)); WT_ERR(__wt_cursor_valid(cbt, &upd, &valid)); } @@ -683,7 +688,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) */ if (valid) { exact = cbt->compare; - ret = __cursor_kv_return(session, cbt, upd); + ret = __cursor_kv_return(cbt, upd); } else if (__cursor_fix_implicit(btree, cbt)) { cbt->recno = cursor->recno; cbt->v = 0; @@ -729,7 +734,7 @@ err: #ifdef HAVE_DIAGNOSTIC if (ret == 0) - WT_TRET(__wt_cursor_key_order_init(session, cbt)); + WT_TRET(__wt_cursor_key_order_init(cbt)); #endif if (ret != 0) { @@ -798,8 +803,8 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) * Correct to an exact match so we can update whatever we're pointing at. */ cbt->compare = 0; - ret = btree->type == BTREE_ROW ? __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD) : - __cursor_col_modify(session, cbt, WT_UPDATE_STANDARD); + ret = btree->type == BTREE_ROW ? __cursor_row_modify(cbt, WT_UPDATE_STANDARD) : + __cursor_col_modify(cbt, WT_UPDATE_STANDARD); if (ret == 0) goto done; @@ -826,7 +831,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); if (btree->type == BTREE_ROW) { - WT_ERR(__cursor_row_search(session, cbt, NULL, true)); + WT_ERR(__cursor_row_search(cbt, true, NULL, NULL)); /* * If not overwriting, fail if the key exists, else insert the key/value pair. */ @@ -836,7 +841,7 @@ retry: WT_ERR(WT_DUPLICATE_KEY); } - ret = __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD); + ret = __cursor_row_modify(cbt, WT_UPDATE_STANDARD); } else if (append_key) { /* * Optionally insert a new record (ignoring the application's record number). The real @@ -844,11 +849,11 @@ retry: */ cbt->iface.recno = WT_RECNO_OOB; cbt->compare = 1; - WT_ERR(__cursor_col_search(session, cbt, NULL)); - WT_ERR(__cursor_col_modify(session, cbt, WT_UPDATE_STANDARD)); + WT_ERR(__cursor_col_search(cbt, NULL, NULL)); + WT_ERR(__cursor_col_modify(cbt, WT_UPDATE_STANDARD)); cursor->recno = cbt->recno; } else { - WT_ERR(__cursor_col_search(session, cbt, NULL)); + WT_ERR(__cursor_col_search(cbt, NULL, NULL)); /* * If not overwriting, fail if the key exists. Creating a record past the end of the tree in @@ -864,7 +869,7 @@ retry: WT_ERR(WT_DUPLICATE_KEY); } - WT_ERR(__cursor_col_modify(session, cbt, WT_UPDATE_STANDARD)); + WT_ERR(__cursor_col_modify(cbt, WT_UPDATE_STANDARD)); } err: @@ -944,7 +949,7 @@ __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) retry: WT_ERR(__cursor_func_init(cbt, true)); - WT_ERR(__cursor_row_search(session, cbt, NULL, true)); + WT_ERR(__cursor_row_search(cbt, true, NULL, NULL)); /* Just check for conflicts. */ ret = __curfile_update_check(cbt); @@ -1023,8 +1028,8 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt, bool positioned) * Correct to an exact match so we can remove whatever we're pointing at. */ cbt->compare = 0; - ret = btree->type == BTREE_ROW ? __cursor_row_modify(session, cbt, WT_UPDATE_TOMBSTONE) : - __cursor_col_modify(session, cbt, WT_UPDATE_TOMBSTONE); + ret = btree->type == BTREE_ROW ? __cursor_row_modify(cbt, WT_UPDATE_TOMBSTONE) : + __cursor_col_modify(cbt, WT_UPDATE_TOMBSTONE); if (ret == 0) goto done; goto err; @@ -1046,7 +1051,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); if (btree->type == BTREE_ROW) { - ret = __cursor_row_search(session, cbt, NULL, false); + ret = __cursor_row_search(cbt, false, NULL, NULL); if (ret == WT_NOTFOUND) goto search_notfound; WT_ERR(ret); @@ -1060,9 +1065,9 @@ retry: if (!valid) goto search_notfound; - ret = __cursor_row_modify(session, cbt, WT_UPDATE_TOMBSTONE); + ret = __cursor_row_modify(cbt, WT_UPDATE_TOMBSTONE); } else { - ret = __cursor_col_search(session, cbt, NULL); + ret = __cursor_col_search(cbt, NULL, NULL); if (ret == WT_NOTFOUND) goto search_notfound; WT_ERR(ret); @@ -1092,7 +1097,7 @@ retry: */ cbt->recno = cursor->recno; } else - ret = __cursor_col_modify(session, cbt, WT_UPDATE_TOMBSTONE); + ret = __cursor_col_modify(cbt, WT_UPDATE_TOMBSTONE); } err: @@ -1111,7 +1116,7 @@ err: */ if (positioned) { if (searched) - WT_TRET(__wt_key_return(session, cbt)); + WT_TRET(__wt_key_return(cbt)); } else { F_CLR(cursor, WT_CURSTD_KEY_SET); WT_TRET(__cursor_reset(cbt)); @@ -1134,7 +1139,7 @@ err: * subsequent iteration can succeed, we cannot return success.) */ if (0) { - search_notfound: +search_notfound: ret = WT_NOTFOUND; if (!iterating && !positioned && F_ISSET(cursor, WT_CURSTD_OVERWRITE)) ret = 0; @@ -1174,7 +1179,7 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type) WT_DECL_RET; WT_SESSION_IMPL *session; uint64_t yield_count, sleep_usecs; - bool valid; + bool leaf_found, valid; btree = cbt->btree; cursor = &cbt->iface; @@ -1205,8 +1210,8 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type) * Correct to an exact match so we can update whatever we're pointing at. */ cbt->compare = 0; - ret = btree->type == BTREE_ROW ? __cursor_row_modify_v(session, cbt, value, modify_type) : - __cursor_col_modify_v(session, cbt, value, modify_type); + ret = btree->type == BTREE_ROW ? __cursor_row_modify_v(cbt, value, modify_type) : + __cursor_col_modify_v(cbt, value, modify_type); if (ret == 0) goto done; @@ -1229,12 +1234,31 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type) WT_ERR(__cursor_localvalue(cursor)); __cursor_state_save(cursor, &state); + /* If our caller configures for a local search and we have a page pinned, do that search. */ + if (F_ISSET(cursor, WT_CURSTD_UPDATE_LOCAL) && __cursor_page_pinned(cbt)) { + __wt_txn_cursor_op(session); + WT_ERR(__wt_txn_autocommit_check(session)); + + WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(cbt, true, cbt->ref, &leaf_found) : + __cursor_col_search(cbt, cbt->ref, &leaf_found)); + /* + * Only use the pinned page search results if search returns an exact match or a slot other + * than the page's boundary slots, if that's not the case, the record might belong on an + * entirely different page. This test is simplistic as we're ignoring append lists (there + * may be no page slots or we might be legitimately positioned after the last page slot). + * Ignore those cases, it makes things too complicated. + */ + if (leaf_found && + (cbt->compare == 0 || (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1))) + goto update_local; + } + retry: WT_ERR(__cursor_func_init(cbt, true)); - + WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(cbt, true, NULL, NULL) : + __cursor_col_search(cbt, NULL, NULL)); +update_local: if (btree->type == BTREE_ROW) { - WT_ERR(__cursor_row_search(session, cbt, NULL, true)); - /* * If not overwriting, check for conflicts and fail if the key does not exist. */ @@ -1246,10 +1270,8 @@ retry: if (!valid) WT_ERR(WT_NOTFOUND); } - ret = __cursor_row_modify_v(session, cbt, value, modify_type); + ret = __cursor_row_modify_v(cbt, value, modify_type); } else { - WT_ERR(__cursor_col_search(session, cbt, NULL)); - /* * If not overwriting, fail if the key doesn't exist. If we find an update for the key, * check for conflicts. Update the record if it exists. Creating a record past the end of @@ -1264,7 +1286,7 @@ retry: if ((cbt->compare != 0 || !valid) && !__cursor_fix_implicit(btree, cbt)) WT_ERR(WT_NOTFOUND); } - ret = __cursor_col_modify_v(session, cbt, value, modify_type); + ret = __cursor_col_modify_v(cbt, value, modify_type); } err: @@ -1287,7 +1309,7 @@ done: /* * WT_CURSOR.update returns a key and a value. */ - ret = __cursor_kv_return(session, cbt, cbt->modify_update); + ret = __cursor_kv_return(cbt, cbt->modify_update); break; case WT_UPDATE_RESERVE: /* @@ -1300,7 +1322,7 @@ done: * WT_CURSOR.modify has already created the return value and our job is to leave it * untouched. */ - ret = __wt_key_return(session, cbt); + ret = __wt_key_return(cbt); break; case WT_UPDATE_BIRTHMARK: case WT_UPDATE_TOMBSTONE: @@ -1632,12 +1654,14 @@ __wt_btcur_equals(WT_CURSOR_BTREE *a_arg, WT_CURSOR_BTREE *b_arg, int *equalp) * Discard a cursor range from row-store or variable-width column-store tree. */ static int -__cursor_truncate(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop, - int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int)) +__cursor_truncate( + WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop, int (*rmfunc)(WT_CURSOR_BTREE *, u_int)) { WT_DECL_RET; + WT_SESSION_IMPL *session; uint64_t yield_count, sleep_usecs; + session = (WT_SESSION_IMPL *)start->iface.session; yield_count = sleep_usecs = 0; /* @@ -1664,7 +1688,7 @@ retry: WT_ASSERT(session, F_MASK((WT_CURSOR *)start, WT_CURSTD_KEY_SET) == WT_CURSTD_KEY_INT); for (;;) { - WT_ERR(rmfunc(session, start, WT_UPDATE_TOMBSTONE)); + WT_ERR(rmfunc(start, WT_UPDATE_TOMBSTONE)); if (stop != NULL && __cursor_equals(start, stop)) return (0); @@ -1689,13 +1713,15 @@ err: * Discard a cursor range from fixed-width column-store tree. */ static int -__cursor_truncate_fix(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop, - int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int)) +__cursor_truncate_fix( + WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop, int (*rmfunc)(WT_CURSOR_BTREE *, u_int)) { WT_DECL_RET; + WT_SESSION_IMPL *session; uint64_t yield_count, sleep_usecs; const uint8_t *value; + session = (WT_SESSION_IMPL *)start->iface.session; yield_count = sleep_usecs = 0; /* @@ -1724,7 +1750,7 @@ retry: for (;;) { value = (const uint8_t *)start->iface.value.data; if (*value != 0) - WT_ERR(rmfunc(session, start, WT_UPDATE_TOMBSTONE)); + WT_ERR(rmfunc(start, WT_UPDATE_TOMBSTONE)); if (stop != NULL && __cursor_equals(start, stop)) return (0); @@ -1759,6 +1785,8 @@ __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) btree = start->btree; WT_STAT_DATA_INCR(session, cursor_truncate); + WT_RET(__wt_txn_autocommit_check(session)); + /* * For recovery, log the start and stop keys for a truncate operation, * not the individual records removed. On the other hand, for rollback @@ -1773,10 +1801,10 @@ __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) switch (btree->type) { case BTREE_COL_FIX: - WT_ERR(__cursor_truncate_fix(session, start, stop, __cursor_col_modify)); + WT_ERR(__cursor_truncate_fix(start, stop, __cursor_col_modify)); break; case BTREE_COL_VAR: - WT_ERR(__cursor_truncate(session, start, stop, __cursor_col_modify)); + WT_ERR(__cursor_truncate(start, stop, __cursor_col_modify)); break; case BTREE_ROW: /* @@ -1788,7 +1816,7 @@ __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) * setting up the truncate so we're good to go: if that ever changes, we'd need to do * something here to ensure a fully instantiated cursor. */ - WT_ERR(__cursor_truncate(session, start, stop, __cursor_row_modify)); + WT_ERR(__cursor_truncate(start, stop, __cursor_row_modify)); break; } diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c index 3c9e4c260d0..e2d50a8745b 100644 --- a/src/third_party/wiredtiger/src/btree/bt_debug.c +++ b/src/third_party/wiredtiger/src/btree/bt_debug.c @@ -700,6 +700,28 @@ __wt_debug_cursor_page(void *cursor_arg, const char *ofile) } /* + * __wt_debug_cursor_las -- + * Dump the LAS tree given a user cursor. + */ +int +__wt_debug_cursor_las(void *cursor_arg, const char *ofile) + WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) +{ + WT_CONNECTION_IMPL *conn; + WT_CURSOR *cursor; + WT_CURSOR_BTREE *cbt; + WT_SESSION_IMPL *las_session; + + cursor = cursor_arg; + conn = S2C((WT_SESSION_IMPL *)cursor->session); + las_session = conn->cache->las_session[0]; + if (las_session == NULL) + return (0); + cbt = (WT_CURSOR_BTREE *)las_session->las_cursor; + return (__wt_debug_tree_all(las_session, cbt->btree, NULL, ofile)); +} + +/* * __debug_tree -- * Dump the in-memory information for a tree. */ diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c index 525728b73dc..c0d4f342bb1 100644 --- a/src/third_party/wiredtiger/src/btree/bt_random.c +++ b/src/third_party/wiredtiger/src/btree/bt_random.c @@ -402,7 +402,7 @@ random_page_entry: WT_ERR(__wt_row_random_leaf(session, cbt)); WT_ERR(__wt_cursor_valid(cbt, &upd, &valid)); if (valid) - WT_ERR(__cursor_kv_return(session, cbt, upd)); + WT_ERR(__cursor_kv_return(cbt, upd)); else { if ((ret = __wt_btcur_next(cbt, false)) == WT_NOTFOUND) ret = __wt_btcur_prev(cbt, false); diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c index fc7a05f0083..cda1eee1eeb 100644 --- a/src/third_party/wiredtiger/src/btree/bt_read.c +++ b/src/third_party/wiredtiger/src/btree/bt_read.c @@ -31,8 +31,8 @@ __col_instantiate( __wt_free_update_list(session, upd); /* Search the page and add updates. */ - WT_RET(__wt_col_search(session, recno, ref, cbt, true)); - WT_RET(__wt_col_modify(session, cbt, recno, NULL, updlist, WT_UPDATE_INVALID, false)); + WT_RET(__wt_col_search(cbt, recno, ref, true, NULL)); + WT_RET(__wt_col_modify(cbt, recno, NULL, updlist, WT_UPDATE_INVALID, false)); return (0); } @@ -59,8 +59,8 @@ __row_instantiate( __wt_free_update_list(session, upd); /* Search the page and add updates. */ - WT_RET(__wt_row_search(session, key, ref, cbt, true, true)); - WT_RET(__wt_row_modify(session, cbt, key, NULL, updlist, WT_UPDATE_INVALID, false)); + WT_RET(__wt_row_search(cbt, key, true, ref, true, NULL)); + WT_RET(__wt_row_modify(cbt, key, NULL, updlist, WT_UPDATE_INVALID, false)); return (0); } @@ -752,7 +752,7 @@ read: continue; } - skip_evict: +skip_evict: /* * If we read the page and are configured to not trash * the cache, and no other thread has already used the diff --git a/src/third_party/wiredtiger/src/btree/bt_ret.c b/src/third_party/wiredtiger/src/btree/bt_ret.c index 9c9dbe5c30a..bec7a1f7e5e 100644 --- a/src/third_party/wiredtiger/src/btree/bt_ret.c +++ b/src/third_party/wiredtiger/src/btree/bt_ret.c @@ -13,15 +13,17 @@ * Change the cursor to reference an internal return key. */ static inline int -__key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) +__key_return(WT_CURSOR_BTREE *cbt) { WT_CURSOR *cursor; WT_ITEM *tmp; WT_PAGE *page; WT_ROW *rip; + WT_SESSION_IMPL *session; page = cbt->ref->page; cursor = &cbt->iface; + session = (WT_SESSION_IMPL *)cbt->iface.session; if (page->type == WT_PAGE_ROW_LEAF) { rip = &page->pg_row[cbt->slot]; @@ -72,7 +74,7 @@ __key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) * Change the cursor to reference an internal original-page return value. */ static inline int -__value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) +__value_return(WT_CURSOR_BTREE *cbt) { WT_BTREE *btree; WT_CELL *cell; @@ -80,8 +82,10 @@ __value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_CURSOR *cursor; WT_PAGE *page; WT_ROW *rip; + WT_SESSION_IMPL *session; uint8_t v; + session = (WT_SESSION_IMPL *)cbt->iface.session; btree = S2BT(session); page = cbt->ref->page; @@ -123,17 +127,18 @@ __value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) * Change the cursor to reference an internal update structure return value. */ int -__wt_value_return_upd( - WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd, bool ignore_visibility) +__wt_value_return_upd(WT_CURSOR_BTREE *cbt, WT_UPDATE *upd, bool ignore_visibility) { WT_CURSOR *cursor; WT_DECL_RET; + WT_SESSION_IMPL *session; WT_UPDATE **listp, *list[WT_MODIFY_ARRAY_SIZE]; size_t allocated_bytes; u_int i; bool skipped_birthmark; cursor = &cbt->iface; + session = (WT_SESSION_IMPL *)cbt->iface.session; allocated_bytes = 0; /* @@ -213,7 +218,7 @@ __wt_value_return_upd( */ WT_ASSERT(session, cbt->slot != UINT32_MAX); - WT_ERR(__value_return(session, cbt)); + WT_ERR(__value_return(cbt)); } } else if (upd->type == WT_UPDATE_TOMBSTONE) WT_ERR(__wt_buf_set(session, &cursor->value, "", 0)); @@ -237,7 +242,7 @@ err: * Change the cursor to reference an internal return key. */ int -__wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) +__wt_key_return(WT_CURSOR_BTREE *cbt) { WT_CURSOR *cursor; @@ -253,7 +258,7 @@ __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) */ F_CLR(cursor, WT_CURSTD_KEY_EXT); if (!F_ISSET(cursor, WT_CURSTD_KEY_INT)) { - WT_RET(__key_return(session, cbt)); + WT_RET(__key_return(cbt)); F_SET(cursor, WT_CURSTD_KEY_INT); } return (0); @@ -264,7 +269,7 @@ __wt_key_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) * Change the cursor to reference an internal return value. */ int -__wt_value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) +__wt_value_return(WT_CURSOR_BTREE *cbt, WT_UPDATE *upd) { WT_CURSOR *cursor; @@ -272,9 +277,9 @@ __wt_value_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd F_CLR(cursor, WT_CURSTD_VALUE_EXT); if (upd == NULL) - WT_RET(__value_return(session, cbt)); + WT_RET(__value_return(cbt)); else - WT_RET(__wt_value_return_upd(session, cbt, upd, false)); + WT_RET(__wt_value_return_upd(cbt, upd, false)); F_SET(cursor, WT_CURSTD_VALUE_INT); return (0); } diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c index fea2da9ae03..04ec016a3be 100644 --- a/src/third_party/wiredtiger/src/btree/bt_slvg.c +++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c @@ -869,7 +869,7 @@ __slvg_col_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_s * Case #5: a_trk is a superset of b_trk and a_trk is more desirable -- discard b_trk. */ if (a_trk->trk_gen > b_trk->trk_gen) { - delete_b: +delete_b: /* * After page and overflow reconciliation, one (and only one) * page can reference an overflow record. But, if we split a @@ -1460,7 +1460,7 @@ __slvg_row_range_overlap(WT_SESSION_IMPL *session, uint32_t a_slot, uint32_t b_s * Case #5: a_trk is a superset of b_trk and a_trk is more desirable -- discard b_trk. */ if (a_trk->trk_gen > b_trk->trk_gen) { - delete_b: +delete_b: /* * After page and overflow reconciliation, one (and only one) * page can reference an overflow record. But, if we split a diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index 5a7f9279bd4..80d523ae5ee 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -1425,10 +1425,10 @@ __split_multi_inmem(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_MULTI *multi, WT recno = WT_INSERT_RECNO(supd->ins); /* Search the page. */ - WT_ERR(__wt_col_search(session, recno, ref, &cbt, true)); + WT_ERR(__wt_col_search(&cbt, recno, ref, true, NULL)); /* Apply the modification. */ - WT_ERR(__wt_col_modify(session, &cbt, recno, NULL, upd, WT_UPDATE_INVALID, true)); + WT_ERR(__wt_col_modify(&cbt, recno, NULL, upd, WT_UPDATE_INVALID, true)); break; case WT_PAGE_ROW_LEAF: /* Build a key. */ @@ -1447,15 +1447,13 @@ __split_multi_inmem(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_MULTI *multi, WT WT_ASSERT(session, __wt_count_birthmarks(upd) <= 1); /* Search the page. */ - WT_ERR(__wt_row_search(session, key, ref, &cbt, true, true)); + WT_ERR(__wt_row_search(&cbt, key, true, ref, true, NULL)); - /* - * Birthmarks should only be applied to on-page values. - */ + /* Birthmarks should only be applied to on-page values. */ WT_ASSERT(session, cbt.compare == 0 || upd->type != WT_UPDATE_BIRTHMARK); /* Apply the modification. */ - WT_ERR(__wt_row_modify(session, &cbt, key, NULL, upd, WT_UPDATE_INVALID, true)); + WT_ERR(__wt_row_modify(&cbt, key, NULL, upd, WT_UPDATE_INVALID, true)); break; default: WT_ERR(__wt_illegal_value(session, orig->type)); diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c index d796436752d..6fa2dbda197 100644 --- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c +++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c @@ -366,7 +366,7 @@ __verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_VSTUFF *vs) goto recno_chk; case WT_PAGE_COL_VAR: recno = ref->ref_recno; - recno_chk: +recno_chk: if (recno != vs->record_total + 1) WT_RET_MSG(session, WT_ERROR, "page at %s has a starting record of %" PRIu64 " when the expected starting record is %" PRIu64, @@ -422,7 +422,7 @@ __verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_VSTUFF *vs) case WT_PAGE_COL_INT: case WT_PAGE_ROW_INT: if (unpack->raw != WT_CELL_ADDR_INT) - celltype_err: +celltype_err: WT_RET_MSG(session, WT_ERROR, "page at %s, of type %s, is referenced in " "its parent by a cell of type %s", diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c index 603b379f928..831627d53e4 100644 --- a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c +++ b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c @@ -357,7 +357,7 @@ __verify_dsk_row(WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER current->size = prefix + unpack->size; } - key_compare: +key_compare: /* * Compare the current key against the last key. * @@ -550,7 +550,7 @@ __verify_dsk_col_var(WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HE goto match_err; } else if (cell_type == WT_CELL_VALUE && last_data != NULL && last_size == unpack->size && memcmp(last_data, unpack->data, last_size) == 0) - match_err: +match_err: WT_RET_VRFY(session, "data entries %" PRIu32 " and %" PRIu32 " on page at %s are identical and should " "have been run-length encoded", diff --git a/src/third_party/wiredtiger/src/btree/col_modify.c b/src/third_party/wiredtiger/src/btree/col_modify.c index 273797b19fc..3610ca6bbdc 100644 --- a/src/third_party/wiredtiger/src/btree/col_modify.c +++ b/src/third_party/wiredtiger/src/btree/col_modify.c @@ -15,8 +15,8 @@ static int __col_insert_alloc(WT_SESSION_IMPL *, uint64_t, u_int, WT_INSERT **, * Column-store delete, insert, and update. */ int -__wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, - const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive) +__wt_col_modify(WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, + u_int modify_type, bool exclusive) { static const WT_ITEM col_fix_remove = {"", 1, NULL, 0, 0}; WT_BTREE *btree; @@ -25,6 +25,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, WT_INSERT_HEAD *ins_head, **ins_headp; WT_PAGE *page; WT_PAGE_MODIFY *mod; + WT_SESSION_IMPL *session; WT_UPDATE *old_upd, *upd; size_t ins_size, upd_size; u_int i, skipdepth; @@ -33,6 +34,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, btree = cbt->btree; ins = NULL; page = cbt->ref->page; + session = (WT_SESSION_IMPL *)cbt->iface.session; upd = upd_arg; append = logged = false; diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c index f202dbd7f7b..2a74d37cb39 100644 --- a/src/third_party/wiredtiger/src/btree/col_srch.c +++ b/src/third_party/wiredtiger/src/btree/col_srch.c @@ -59,7 +59,7 @@ __check_leaf_key_range(WT_SESSION_IMPL *session, uint64_t recno, WT_REF *leaf, W */ int __wt_col_search( - WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool restore) + WT_CURSOR_BTREE *cbt, uint64_t search_recno, WT_REF *leaf, bool leaf_safe, bool *leaf_foundp) { WT_BTREE *btree; WT_COL *cip; @@ -69,10 +69,12 @@ __wt_col_search( WT_PAGE *page; WT_PAGE_INDEX *pindex, *parent_pindex; WT_REF *current, *descent; + WT_SESSION_IMPL *session; uint64_t recno; uint32_t base, indx, limit, read_flags; int depth; + session = (WT_SESSION_IMPL *)cbt->iface.session; btree = S2BT(session); current = NULL; @@ -88,23 +90,18 @@ __wt_col_search( /* * We may be searching only a single leaf page, not the full tree. In the normal case where we * are searching a tree, check the page's parent keys before doing the full search, it's faster - * when the cursor is being re-positioned. Skip this if the page is being re-instantiated in - * memory. + * when the cursor is being re-positioned. Skip that check if we know the page is the right one + * (for example, when re-instantiating a page in memory, in that case we know the target must be + * on the current page). */ if (leaf != NULL) { WT_ASSERT(session, search_recno != WT_RECNO_OOB); - if (!restore) { + if (!leaf_safe) { WT_RET(__check_leaf_key_range(session, recno, leaf, cbt)); - if (cbt->compare != 0) { - /* - * !!! - * WT_CURSOR.search_near uses the slot value to - * decide if there was an on-page match. - */ - cbt->slot = 0; + *leaf_foundp = cbt->compare == 0; + if (!*leaf_foundp) return (0); - } } current = leaf; diff --git a/src/third_party/wiredtiger/src/btree/row_key.c b/src/third_party/wiredtiger/src/btree/row_key.c index 35e8373ef6f..3c609e9344f 100644 --- a/src/third_party/wiredtiger/src/btree/row_key.c +++ b/src/third_party/wiredtiger/src/btree/row_key.c @@ -162,7 +162,7 @@ __wt_row_leaf_key_work( direction = BACKWARD; for (slot_offset = 0;;) { if (0) { - switch_and_jump: +switch_and_jump: /* Switching to a forward roll. */ WT_ASSERT(session, direction == BACKWARD); direction = FORWARD; diff --git a/src/third_party/wiredtiger/src/btree/row_modify.c b/src/third_party/wiredtiger/src/btree/row_modify.c index c6c35de6e6f..7d9425b4ac0 100644 --- a/src/third_party/wiredtiger/src/btree/row_modify.c +++ b/src/third_party/wiredtiger/src/btree/row_modify.c @@ -41,14 +41,15 @@ err: * Row-store insert, update and delete. */ int -__wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *key, - const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive) +__wt_row_modify(WT_CURSOR_BTREE *cbt, const WT_ITEM *key, const WT_ITEM *value, WT_UPDATE *upd_arg, + u_int modify_type, bool exclusive) { WT_DECL_RET; WT_INSERT *ins; WT_INSERT_HEAD *ins_head, **ins_headp; WT_PAGE *page; WT_PAGE_MODIFY *mod; + WT_SESSION_IMPL *session; WT_UPDATE *old_upd, *upd, **upd_entry; size_t ins_size, upd_size; uint32_t ins_slot; @@ -57,6 +58,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, const WT_ITEM *k ins = NULL; page = cbt->ref->page; + session = (WT_SESSION_IMPL *)cbt->iface.session; upd = upd_arg; logged = false; @@ -295,6 +297,7 @@ __wt_update_obsolete_check( WT_UPDATE *first, *next, *prev; size_t size; u_int count; + bool upd_visible_all_seen; txn_global = &S2C(session)->txn_global; @@ -309,21 +312,38 @@ __wt_update_obsolete_check( * Only updates with globally visible, self-contained data can terminate * update chains. * - * Birthmarks are a special case: once a birthmark becomes obsolete, it - * can be discarded and subsequent reads will see the on-page value (as - * expected). Inserting updates into the lookaside table relies on - * this behavior to avoid creating update chains with multiple - * birthmarks. + * Birthmarks are a special case: once a birthmark becomes obsolete, it can be discarded if + * there is a globally visible update before it and subsequent reads will see the on-page value + * (as expected). Inserting updates into the lookaside table relies on this behavior to avoid + * creating update chains with multiple birthmarks. We cannot discard the birthmark if it's the + * first globally visible update as the previous updates can be aborted and be freed causing the + * entire update chain being removed. */ - for (first = prev = NULL, count = 0; upd != NULL; prev = upd, upd = upd->next, count++) { + for (first = prev = NULL, upd_visible_all_seen = false, count = 0; upd != NULL; + prev = upd, upd = upd->next, count++) { if (upd->txnid == WT_TXN_ABORTED) continue; + if (!__wt_txn_upd_visible_all(session, upd)) first = NULL; - else if (first == NULL && upd->type == WT_UPDATE_BIRTHMARK) - first = prev; - else if (first == NULL && WT_UPDATE_DATA_VALUE(upd)) - first = upd; + else { + if (first == NULL) { + /* + * If we have seen a globally visible update before the birthmark, the birthmark can + * be discarded. + */ + if (upd_visible_all_seen && upd->type == WT_UPDATE_BIRTHMARK) + first = prev; + /* + * We cannot discard the birthmark if it is the first globally visible update as the + * previous updates can be aborted resulting the entire update chain being removed. + */ + else if (upd->type == WT_UPDATE_BIRTHMARK || WT_UPDATE_DATA_VALUE(upd)) + first = upd; + } + + upd_visible_all_seen = true; + } } /* diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c index a7a9c282564..87d11f84b83 100644 --- a/src/third_party/wiredtiger/src/btree/row_srch.c +++ b/src/third_party/wiredtiger/src/btree/row_srch.c @@ -199,8 +199,8 @@ __check_leaf_key_range( * Search a row-store tree for a specific key. */ int -__wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, - bool insert, bool restore) +__wt_row_search(WT_CURSOR_BTREE *cbt, WT_ITEM *srch_key, bool insert, WT_REF *leaf, bool leaf_safe, + bool *leaf_foundp) { WT_BTREE *btree; WT_COLLATOR *collator; @@ -211,11 +211,13 @@ __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CU WT_PAGE_INDEX *pindex, *parent_pindex; WT_REF *current, *descent; WT_ROW *rip; + WT_SESSION_IMPL *session; size_t match, skiphigh, skiplow; uint32_t base, indx, limit, read_flags; int cmp, depth; bool append_check, descend_right, done; + session = (WT_SESSION_IMPL *)cbt->iface.session; btree = S2BT(session); collator = btree->collator; item = cbt->tmp; @@ -245,21 +247,16 @@ __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CU /* * We may be searching only a single leaf page, not the full tree. In the normal case where we * are searching a tree, check the page's parent keys before doing the full search, it's faster - * when the cursor is being re-positioned. Skip this if the page is being re-instantiated in - * memory. + * when the cursor is being re-positioned. Skip that check if we know the page is the right one + * (for example, when re-instantiating a page in memory, in that case we know the target must be + * on the current page). */ if (leaf != NULL) { - if (!restore) { + if (!leaf_safe) { WT_RET(__check_leaf_key_range(session, srch_key, leaf, cbt)); - if (cbt->compare != 0) { - /* - * !!! - * WT_CURSOR.search_near uses the slot value to - * decide if there was an on-page match. - */ - cbt->slot = 0; + *leaf_foundp = cbt->compare == 0; + if (!*leaf_foundp) return (0); - } } current = leaf; @@ -540,7 +537,7 @@ leaf_only: * read-mostly workload. Check that case and get out fast. */ if (0) { - leaf_match: +leaf_match: cbt->compare = 0; cbt->slot = WT_ROW_SLOT(page, rip); return (0); |