summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/btree/bt_ret.c
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-05-06 15:45:01 +1000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-05-06 06:00:50 +0000
commitd2274bb6e1f8b21d73121a2fcb20b6628f652bbe (patch)
tree72c771934dab7adff1bbffdcb1af1ac6e1c36a0d /src/third_party/wiredtiger/src/btree/bt_ret.c
parente500238a9ea3d5498ebffeb74a1aceac42eb2c1f (diff)
downloadmongo-d2274bb6e1f8b21d73121a2fcb20b6628f652bbe.tar.gz
Import wiredtiger: 18dfb9e58e39927696affcd8e362364e23e1aa59 from branch mongodb-4.4r4.4.0-rc4
ref: a707df12a2..18dfb9e58e for: 4.4.0-rc4 WT-5242 Minimize checkpoints pinned during backup WT-5470 Reduce copies and allocations in read path WT-5673 Prepare support with durable history: modify verify and salvage as needed WT-5677 Prepare support with durable history: add test/format stress tests WT-5710 Review WT_PANIC usage WT-5716 Create the history store file at the same time as creating the metadata file in wiredtiger open WT-5839 Ignore non-globally visible tombstones for both data store and hs store in hs verification WT-5841 Return WT_TRY_SALVAGE when the history file is removed or truncated WT-5928 Cleanup stale FIXMEs from durable history WT-5977 WT_SESSION_NO_RECONCILE flag set by history cursor prevents eviction WT-5984 Allow prepared updates to be evicted in durable history WT-6009 Prepare support with durable history: add statistic for prepared updates evicted WT-6032 Turn on mongodb-4.4 branch upgrade/downgrade testing WT-6051 Fix reconstructing full value from modifies for string format WT-6068 Re-enable tests temporarily disabled during durable history development WT-6069 Remove WT_UPDATE_RESTORED_FROM_DISK flag WT-6070 Coverity : Copy paste error WT-6071 Coverity : Change format specifier WT-6086 Move time windows and aggregated time windows into structures WT-6087 Add a C2S(cursor) macro to simplify translation from a cursor to a session WT-6095 Verify on-disk page only for row store as part of rollback to stable WT-6109 Cleanup usage of cursor->session WT-6110 Cleanup cast from cbt to cursor WT-6120 Remove use-after-free in __verify_history_store_id WT-6130 Disable test_random_abort
Diffstat (limited to 'src/third_party/wiredtiger/src/btree/bt_ret.c')
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_ret.c241
1 files changed, 68 insertions, 173 deletions
diff --git a/src/third_party/wiredtiger/src/btree/bt_ret.c b/src/third_party/wiredtiger/src/btree/bt_ret.c
index 2061d561a7a..1a2360f6d09 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ret.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ret.c
@@ -23,7 +23,7 @@ __key_return(WT_CURSOR_BTREE *cbt)
page = cbt->ref->page;
cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
if (page->type == WT_PAGE_ROW_LEAF) {
rip = &page->pg_row[cbt->slot];
@@ -70,102 +70,71 @@ __key_return(WT_CURSOR_BTREE *cbt)
}
/*
- * __time_pairs_init --
- * Initialize the time pairs to globally visible.
+ * __read_col_time_window --
+ * Retrieve the time window from a column store cell.
*/
-static inline void
-__time_pairs_init(WT_TIME_PAIR *start, WT_TIME_PAIR *stop)
+static void
+__read_col_time_window(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, WT_TIME_WINDOW *tw)
{
- start->txnid = WT_TXN_NONE;
- start->timestamp = WT_TS_NONE;
- stop->txnid = WT_TXN_MAX;
- stop->timestamp = WT_TS_MAX;
+ WT_CELL_UNPACK unpack;
+
+ __wt_cell_unpack(session, page, cell, &unpack);
+ __wt_time_window_copy(tw, &unpack.tw);
}
/*
- * __time_pairs_set --
- * Set the time pairs.
+ * __wt_read_row_time_window --
+ * Retrieve the time window from a row.
*/
-static inline void
-__time_pairs_set(WT_TIME_PAIR *start, WT_TIME_PAIR *stop, WT_CELL_UNPACK *unpack)
+void
+__wt_read_row_time_window(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_TIME_WINDOW *tw)
{
- start->timestamp = unpack->start_ts;
- start->txnid = unpack->start_txn;
- stop->timestamp = unpack->stop_ts;
- stop->txnid = unpack->stop_txn;
+ WT_CELL_UNPACK unpack;
+
+ __wt_time_window_init(tw);
+ /*
+ * If a value is simple and is globally visible at the time of reading a page into cache, we set
+ * the time pairs as globally visible.
+ */
+ if (__wt_row_leaf_value_exists(rip))
+ return;
+
+ __wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
+ __wt_time_window_copy(tw, &unpack.tw);
}
/*
- * __wt_read_cell_time_pairs --
+ * __wt_read_cell_time_window --
* Read the time pairs from the cell.
*/
void
-__wt_read_cell_time_pairs(
- WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_TIME_PAIR *start, WT_TIME_PAIR *stop)
+__wt_read_cell_time_window(WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_TIME_WINDOW *tw)
{
WT_PAGE *page;
WT_SESSION_IMPL *session;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
page = ref->page;
- WT_ASSERT(session, start != NULL && stop != NULL);
+ WT_ASSERT(session, tw != NULL);
/* Take the value from the original page cell. */
if (page->type == WT_PAGE_ROW_LEAF) {
- __wt_read_row_time_pairs(session, page, &page->pg_row[cbt->slot], start, stop);
+ __wt_read_row_time_window(session, page, &page->pg_row[cbt->slot], tw);
} else if (page->type == WT_PAGE_COL_VAR) {
- __wt_read_col_time_pairs(
- session, page, WT_COL_PTR(page, &page->pg_var[cbt->slot]), start, stop);
+ __read_col_time_window(session, page, WT_COL_PTR(page, &page->pg_var[cbt->slot]), tw);
} else {
/* WT_PAGE_COL_FIX: return the default time pairs. */
- __time_pairs_init(start, stop);
+ __wt_time_window_init(tw);
}
}
/*
- * __wt_read_col_time_pairs --
- * Retrieve the time pairs from a column store cell.
- */
-void
-__wt_read_col_time_pairs(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, WT_TIME_PAIR *start, WT_TIME_PAIR *stop)
-{
- WT_CELL_UNPACK unpack;
-
- __wt_cell_unpack(session, page, cell, &unpack);
- __time_pairs_set(start, stop, &unpack);
-}
-
-/*
- * __wt_read_row_time_pairs --
- * Retrieve the time pairs from a row.
- */
-void
-__wt_read_row_time_pairs(
- WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_TIME_PAIR *start, WT_TIME_PAIR *stop)
-{
- WT_CELL_UNPACK unpack;
-
- __time_pairs_init(start, stop);
- /*
- * If a value is simple and is globally visible at the time of reading a page into cache, we set
- * the time pairs as globally visible.
- */
- if (__wt_row_leaf_value_exists(rip))
- return;
-
- __wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
- __time_pairs_set(start, stop, &unpack);
-}
-
-/*
* __wt_value_return_buf --
* Change a buffer to reference an internal original-page return value.
*/
int
-__wt_value_return_buf(
- WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_ITEM *buf, WT_TIME_PAIR *start, WT_TIME_PAIR *stop)
+__wt_value_return_buf(WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_ITEM *buf, WT_TIME_WINDOW *tw)
{
WT_BTREE *btree;
WT_CELL *cell;
@@ -176,18 +145,12 @@ __wt_value_return_buf(
WT_SESSION_IMPL *session;
uint8_t v;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
+ session = CUR2S(cbt);
btree = S2BT(session);
page = ref->page;
cursor = &cbt->iface;
- if (start != NULL && stop != NULL)
- __time_pairs_init(start, stop);
-
- /* Must provide either both start and stop as output parameters or neither. */
- WT_ASSERT(session, (start != NULL && stop != NULL) || (start == NULL && stop == NULL));
-
if (page->type == WT_PAGE_ROW_LEAF) {
rip = &page->pg_row[cbt->slot];
@@ -195,14 +158,16 @@ __wt_value_return_buf(
* If a value is simple and is globally visible at the time of reading a page into cache, we
* encode its location into the WT_ROW.
*/
- if (__wt_row_leaf_value(page, rip, buf))
+ if (__wt_row_leaf_value(page, rip, buf)) {
+ if (tw != NULL)
+ __wt_time_window_init(tw);
return (0);
+ }
/* Take the value from the original page cell. */
__wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
- if (start != NULL && stop != NULL)
- __time_pairs_set(start, stop, &unpack);
-
+ if (tw != NULL)
+ __wt_time_window_copy(tw, &unpack.tw);
return (__wt_page_cell_data_ref(session, page, &unpack, buf));
}
@@ -210,17 +175,18 @@ __wt_value_return_buf(
/* Take the value from the original page cell. */
cell = WT_COL_PTR(page, &page->pg_var[cbt->slot]);
__wt_cell_unpack(session, page, cell, &unpack);
- if (start != NULL && stop != NULL)
- __time_pairs_set(start, stop, &unpack);
-
+ if (tw != NULL)
+ __wt_time_window_copy(tw, &unpack.tw);
return (__wt_page_cell_data_ref(session, page, &unpack, buf));
}
/*
* WT_PAGE_COL_FIX: Take the value from the original page.
*
- * FIXME-PM-1523: Should also check visibility here
+ * FIXME-WT-6126: Should also check visibility here
*/
+ if (tw != NULL)
+ __wt_time_window_init(tw);
v = __bit_getv_recno(ref, cursor->recno, btree->bitcnt);
return (__wt_buf_set(session, buf, &v, 1));
}
@@ -232,95 +198,7 @@ __wt_value_return_buf(
static inline int
__value_return(WT_CURSOR_BTREE *cbt)
{
- return (__wt_value_return_buf(cbt, cbt->ref, &cbt->iface.value, NULL, NULL));
-}
-
-/*
- * __wt_value_return_upd --
- * Change the cursor to reference an internal update structure return value.
- */
-int
-__wt_value_return_upd(WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
-{
- WT_CURSOR *cursor;
- WT_DECL_RET;
- WT_MODIFY_VECTOR modifies;
- WT_SESSION_IMPL *session;
- WT_TIME_PAIR start, stop;
-
- cursor = &cbt->iface;
- session = (WT_SESSION_IMPL *)cbt->iface.session;
- __wt_modify_vector_init(session, &modifies);
-
- /*
- * We're passed a "standard" or "modified" update that's visible to us. Our caller should have
- * already checked for deleted items (we're too far down the call stack to return not-found).
- *
- * Fast path if it's a standard item, assert our caller's behavior.
- */
- if (upd->type == WT_UPDATE_STANDARD) {
- if (F_ISSET(upd, WT_UPDATE_RESTORED_FROM_DISK)) {
- /* Copy an external update, and delete after using it */
- WT_RET(__wt_buf_set(session, &cursor->value, upd->data, upd->size));
- __wt_free_update_list(session, &upd);
- } else {
- cursor->value.data = upd->data;
- cursor->value.size = upd->size;
- }
- return (0);
- }
- WT_ASSERT(session, upd->type == WT_UPDATE_MODIFY);
-
- /*
- * Find a complete update.
- */
- for (; upd != NULL; upd = upd->next) {
- if (upd->txnid == WT_TXN_ABORTED)
- continue;
-
- if (WT_UPDATE_DATA_VALUE(upd))
- break;
-
- if (upd->type == WT_UPDATE_MODIFY)
- WT_ERR(__wt_modify_vector_push(&modifies, upd));
- }
-
- /*
- * If there's no full update, the base item is the on-page item. If the update is a tombstone,
- * the base item is an empty item.
- */
- if (upd == NULL) {
- /*
- * Callers of this function set the cursor slot to an impossible value to check we don't try
- * and return on-page values when the update list should have been sufficient (which
- * happens, for example, if an update list was truncated, deleting some standard update
- * required by a previous modify update). Assert the case.
- */
- WT_ASSERT(session, cbt->slot != UINT32_MAX);
-
- WT_ERR(__wt_value_return_buf(cbt, cbt->ref, &cbt->iface.value, &start, &stop));
- /*
- * Applying modifies on top of a tombstone is invalid. So if we're using the onpage value,
- * the stop time pair should be unset.
- */
- WT_ASSERT(session, stop.txnid == WT_TXN_MAX && stop.timestamp == WT_TS_MAX);
- } else {
- /* The base update must not be a tombstone. */
- WT_ASSERT(session, upd->type == WT_UPDATE_STANDARD);
- WT_ERR(__wt_buf_set(session, &cursor->value, upd->data, upd->size));
- }
-
- /*
- * Once we have a base item, roll forward through any visible modify updates.
- */
- while (modifies.size > 0) {
- __wt_modify_vector_pop(&modifies, &upd);
- WT_ERR(__wt_modify_apply(cursor, upd->data));
- }
-
-err:
- __wt_modify_vector_free(&modifies);
- return (ret);
+ return (__wt_value_return_buf(cbt, cbt->ref, &cbt->iface.value, NULL));
}
/*
@@ -352,20 +230,37 @@ __wt_key_return(WT_CURSOR_BTREE *cbt)
/*
* __wt_value_return --
- * Change the cursor to reference an internal return value.
+ * Change the cursor to reference an update return value.
*/
int
-__wt_value_return(WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
+__wt_value_return(WT_CURSOR_BTREE *cbt, WT_UPDATE_VALUE *upd_value)
{
WT_CURSOR *cursor;
+ WT_SESSION_IMPL *session;
cursor = &cbt->iface;
+ session = CUR2S(cbt);
F_CLR(cursor, WT_CURSTD_VALUE_EXT);
- if (upd == NULL)
+ if (upd_value->type == WT_UPDATE_INVALID) {
+ /*
+ * FIXME-WT-6127: This is a holdover from the pre-durable history read logic where we used
+ * to fallback to the on-page value if we didn't find a visible update elsewhere. This is
+ * still required for fixed length column store as we have issues with this table type in
+ * durable history which we're planning to address in PM-1814.
+ */
+ WT_ASSERT(session, cbt->btree->type == BTREE_COL_FIX);
WT_RET(__value_return(cbt));
- else
- WT_RET(__wt_value_return_upd(cbt, upd));
+ } else {
+ /*
+ * We're passed a "standard" update that's visible to us. Our caller should have already
+ * checked for deleted items (we're too far down the call stack to return not-found) and any
+ * modify updates should be have been reconstructed into a full standard update.
+ */
+ WT_ASSERT(session, upd_value->type == WT_UPDATE_STANDARD);
+ cursor->value.data = upd_value->buf.data;
+ cursor->value.size = upd_value->buf.size;
+ }
F_SET(cursor, WT_CURSTD_VALUE_INT);
return (0);
}