summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/include/cell.i
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/src/include/cell.i')
-rw-r--r--src/third_party/wiredtiger/src/include/cell.i456
1 files changed, 198 insertions, 258 deletions
diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i
index bcd23946883..f14eb7f8d15 100644
--- a/src/third_party/wiredtiger/src/include/cell.i
+++ b/src/third_party/wiredtiger/src/include/cell.i
@@ -11,47 +11,47 @@
* Check the value's validity window for sanity.
*/
static inline void
-__cell_check_value_validity(WT_SESSION_IMPL *session, wt_timestamp_t durable_start_ts,
- wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t durable_stop_ts,
- wt_timestamp_t stop_ts, uint64_t stop_txn)
+__cell_check_value_validity(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
{
#ifdef HAVE_DIAGNOSTIC
+ /*
+ * We're using WT_ERR_ASSERT rather than WT_ASSERT because we want to push out a message string.
+ * This usage of WT_ERR_ASSERT isn't "correct", because it jumps to a non-existent error label
+ * in non-diagnostic builds and returns WT_PANIC without calling the underlying panic routine.
+ * That's OK, we have to be in a diagnostic build to get here, and fixing it would require new
+ * macros that aren't needed anywhere else, so we're leaving it alone.
+ */
char ts_string[2][WT_TS_INT_STRING_SIZE];
- if (start_ts > durable_start_ts)
- WT_ERR_ASSERT(session, start_ts <= durable_start_ts, WT_PANIC,
+ if (tw->start_ts > tw->durable_start_ts)
+ WT_ERR_ASSERT(session, tw->start_ts <= tw->durable_start_ts, WT_PANIC,
"a start timestamp %s newer than its durable start timestamp %s",
- __wt_timestamp_to_string(start_ts, ts_string[0]),
- __wt_timestamp_to_string(durable_start_ts, ts_string[1]));
+ __wt_timestamp_to_string(tw->start_ts, ts_string[0]),
+ __wt_timestamp_to_string(tw->durable_start_ts, ts_string[1]));
- if (start_ts != WT_TS_NONE && stop_ts == WT_TS_NONE)
- WT_ERR_ASSERT(session, stop_ts != WT_TS_NONE, WT_PANIC, "stop timestamp of 0");
+ if (tw->start_ts != WT_TS_NONE && tw->stop_ts == WT_TS_NONE)
+ WT_ERR_ASSERT(session, tw->stop_ts != WT_TS_NONE, WT_PANIC, "stop timestamp of 0");
- if (start_ts > stop_ts)
- WT_ERR_ASSERT(session, start_ts <= stop_ts, WT_PANIC,
+ if (tw->start_ts > tw->stop_ts)
+ WT_ERR_ASSERT(session, tw->start_ts <= tw->stop_ts, WT_PANIC,
"a start timestamp %s newer than its stop timestamp %s",
- __wt_timestamp_to_string(start_ts, ts_string[0]),
- __wt_timestamp_to_string(stop_ts, ts_string[1]));
+ __wt_timestamp_to_string(tw->start_ts, ts_string[0]),
+ __wt_timestamp_to_string(tw->stop_ts, ts_string[1]));
- if (start_txn > stop_txn)
- WT_ERR_ASSERT(session, start_txn <= stop_txn, WT_PANIC,
+ if (tw->start_txn > tw->stop_txn)
+ WT_ERR_ASSERT(session, tw->start_txn <= tw->stop_txn, WT_PANIC,
"a start transaction ID %" PRIu64 " newer than its stop transaction ID %" PRIu64,
- start_txn, stop_txn);
+ tw->start_txn, tw->stop_txn);
- if (stop_ts != WT_TS_MAX && stop_ts > durable_stop_ts)
- WT_ERR_ASSERT(session, stop_ts <= durable_stop_ts, WT_PANIC,
+ if (tw->stop_ts != WT_TS_MAX && tw->stop_ts > tw->durable_stop_ts)
+ WT_ERR_ASSERT(session, tw->stop_ts <= tw->durable_stop_ts, WT_PANIC,
"a stop timestamp %s newer than its durable stop timestamp %s",
- __wt_timestamp_to_string(stop_ts, ts_string[0]),
- __wt_timestamp_to_string(durable_stop_ts, ts_string[1]));
+ __wt_timestamp_to_string(tw->stop_ts, ts_string[0]),
+ __wt_timestamp_to_string(tw->durable_stop_ts, ts_string[1]));
#else
WT_UNUSED(session);
- WT_UNUSED(durable_start_ts);
- WT_UNUSED(durable_stop_ts);
- WT_UNUSED(start_ts);
- WT_UNUSED(start_txn);
- WT_UNUSED(stop_ts);
- WT_UNUSED(stop_txn);
+ WT_UNUSED(tw);
#endif
}
@@ -60,21 +60,17 @@ __cell_check_value_validity(WT_SESSION_IMPL *session, wt_timestamp_t durable_sta
* Pack the validity window for a value.
*/
static inline void
-__cell_pack_value_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_t durable_start_ts,
- wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t durable_stop_ts,
- wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare)
+__cell_pack_value_validity(WT_SESSION_IMPL *session, uint8_t **pp, WT_TIME_WINDOW *tw)
{
uint8_t flags, *flagsp;
/* Globally visible values have no associated validity window. */
- if (durable_start_ts == WT_TS_NONE && start_ts == WT_TS_NONE && start_txn == WT_TXN_NONE &&
- durable_stop_ts == WT_TS_NONE && stop_ts == WT_TS_MAX && stop_txn == WT_TXN_MAX) {
+ if (__wt_time_window_is_empty(tw)) {
++*pp;
return;
}
- __cell_check_value_validity(
- session, durable_start_ts, start_ts, start_txn, durable_stop_ts, stop_ts, stop_txn);
+ __cell_check_value_validity(session, tw);
**pp |= WT_CELL_SECOND_DESC;
++*pp;
@@ -82,46 +78,41 @@ __cell_pack_value_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_
++*pp;
flags = 0;
- if (start_ts != WT_TS_NONE) {
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, start_ts));
+ if (tw->start_ts != WT_TS_NONE) {
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, tw->start_ts));
LF_SET(WT_CELL_TS_START);
}
- if (start_txn != WT_TXN_NONE) {
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, start_txn));
+ if (tw->start_txn != WT_TXN_NONE) {
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, tw->start_txn));
LF_SET(WT_CELL_TXN_START);
}
- if (durable_start_ts != WT_TS_NONE) {
- WT_ASSERT(session, start_ts != WT_TS_NONE && start_ts <= durable_start_ts);
+ if (tw->durable_start_ts != WT_TS_NONE) {
+ WT_ASSERT(session, tw->start_ts <= tw->durable_start_ts);
/* Store differences if any, not absolutes. */
- if (durable_start_ts - start_ts > 0) {
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, durable_start_ts - start_ts));
+ if (tw->durable_start_ts - tw->start_ts > 0) {
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, tw->durable_start_ts - tw->start_ts));
LF_SET(WT_CELL_TS_DURABLE_START);
}
}
- if (stop_ts != WT_TS_MAX) {
+ if (tw->stop_ts != WT_TS_MAX) {
/* Store differences, not absolutes. */
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, stop_ts - start_ts));
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, tw->stop_ts - tw->start_ts));
LF_SET(WT_CELL_TS_STOP);
}
- if (stop_txn != WT_TXN_MAX) {
+ if (tw->stop_txn != WT_TXN_MAX) {
/* Store differences, not absolutes. */
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, stop_txn - start_txn));
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, tw->stop_txn - tw->start_txn));
LF_SET(WT_CELL_TXN_STOP);
}
- if (durable_stop_ts != WT_TS_NONE) {
- WT_ASSERT(session, stop_ts != WT_TS_MAX && stop_ts <= durable_stop_ts);
+ if (tw->durable_stop_ts != WT_TS_NONE) {
+ WT_ASSERT(session, tw->stop_ts <= tw->durable_stop_ts);
/* Store differences if any, not absolutes. */
- if (durable_stop_ts - stop_ts > 0) {
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, durable_stop_ts - stop_ts));
+ if (tw->durable_stop_ts - tw->stop_ts > 0) {
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, tw->durable_stop_ts - tw->stop_ts));
LF_SET(WT_CELL_TS_DURABLE_STOP);
}
}
- /*
- * Currently, no uncommitted prepared updates are written to the data store, so this flag must
- * be false until we allow writing them in WT-5984. In that ticket this assert must be removed.
- */
- WT_ASSERT(session, prepare == false);
- if (prepare)
+ if (tw->prepare)
LF_SET(WT_CELL_PREPARE);
*flagsp = flags;
}
@@ -131,47 +122,47 @@ __cell_pack_value_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_
* Check the address' validity window for sanity.
*/
static inline void
-__wt_check_addr_validity(WT_SESSION_IMPL *session, wt_timestamp_t start_durable_ts,
- wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t stop_durable_ts,
- wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn)
+__wt_check_addr_validity(WT_SESSION_IMPL *session, WT_TIME_AGGREGATE *ta)
{
#ifdef HAVE_DIAGNOSTIC
+ /*
+ * We're using WT_ERR_ASSERT rather than WT_ASSERT because we want to push out a message string.
+ * This usage of WT_ERR_ASSERT isn't "correct", because it jumps to a non-existent error label
+ * in non-diagnostic builds and returns WT_PANIC without calling the underlying panic routine.
+ * That's OK, we have to be in a diagnostic build to get here, and fixing it would require new
+ * macros that aren't needed anywhere else, so we're leaving it alone.
+ */
char ts_string[2][WT_TS_INT_STRING_SIZE];
- if (oldest_start_ts != WT_TS_NONE && newest_stop_ts == WT_TS_NONE)
+ if (ta->oldest_start_ts != WT_TS_NONE && ta->newest_stop_ts == WT_TS_NONE)
WT_ERR_ASSERT(
- session, newest_stop_ts != WT_TS_NONE, WT_PANIC, "newest stop timestamp of 0");
+ session, ta->newest_stop_ts != WT_TS_NONE, WT_PANIC, "newest stop timestamp of 0");
- if (oldest_start_ts > newest_stop_ts)
- WT_ERR_ASSERT(session, oldest_start_ts <= newest_stop_ts, WT_PANIC,
+ if (ta->oldest_start_ts > ta->newest_stop_ts)
+ WT_ERR_ASSERT(session, ta->oldest_start_ts <= ta->newest_stop_ts, WT_PANIC,
"an oldest start timestamp %s newer than its newest stop timestamp %s",
- __wt_timestamp_to_string(oldest_start_ts, ts_string[0]),
- __wt_timestamp_to_string(newest_stop_ts, ts_string[1]));
+ __wt_timestamp_to_string(ta->oldest_start_ts, ts_string[0]),
+ __wt_timestamp_to_string(ta->newest_stop_ts, ts_string[1]));
- if (oldest_start_txn > newest_stop_txn)
- WT_ERR_ASSERT(session, oldest_start_txn <= newest_stop_txn, WT_PANIC,
+ if (ta->oldest_start_txn > ta->newest_stop_txn)
+ WT_ERR_ASSERT(session, ta->oldest_start_txn <= ta->newest_stop_txn, WT_PANIC,
"an oldest start transaction %" PRIu64 " newer than its newest stop transaction %" PRIu64,
- oldest_start_txn, newest_stop_txn);
+ ta->oldest_start_txn, ta->newest_stop_txn);
- if (oldest_start_ts > start_durable_ts)
- WT_ERR_ASSERT(session, oldest_start_ts <= start_durable_ts, WT_PANIC,
+ if (ta->oldest_start_ts > ta->newest_start_durable_ts)
+ WT_ERR_ASSERT(session, ta->oldest_start_ts <= ta->newest_start_durable_ts, WT_PANIC,
"an oldest start timestamp %s newer than its durable start timestamp %s",
- __wt_timestamp_to_string(oldest_start_ts, ts_string[0]),
- __wt_timestamp_to_string(start_durable_ts, ts_string[1]));
+ __wt_timestamp_to_string(ta->oldest_start_ts, ts_string[0]),
+ __wt_timestamp_to_string(ta->newest_start_durable_ts, ts_string[1]));
- if (newest_stop_ts != WT_TS_MAX && newest_stop_ts > stop_durable_ts)
- WT_ERR_ASSERT(session, newest_stop_ts <= stop_durable_ts, WT_PANIC,
+ if (ta->newest_stop_ts != WT_TS_MAX && ta->newest_stop_ts > ta->newest_stop_durable_ts)
+ WT_ERR_ASSERT(session, ta->newest_stop_ts <= ta->newest_stop_durable_ts, WT_PANIC,
"a newest stop timestamp %s newer than its durable stop timestamp %s",
- __wt_timestamp_to_string(newest_stop_ts, ts_string[0]),
- __wt_timestamp_to_string(stop_durable_ts, ts_string[1]));
+ __wt_timestamp_to_string(ta->newest_stop_ts, ts_string[0]),
+ __wt_timestamp_to_string(ta->newest_stop_durable_ts, ts_string[1]));
#else
WT_UNUSED(session);
- WT_UNUSED(start_durable_ts);
- WT_UNUSED(oldest_start_ts);
- WT_UNUSED(oldest_start_txn);
- WT_UNUSED(stop_durable_ts);
- WT_UNUSED(newest_stop_ts);
- WT_UNUSED(newest_stop_txn);
+ WT_UNUSED(ta);
#endif
}
@@ -180,22 +171,17 @@ __wt_check_addr_validity(WT_SESSION_IMPL *session, wt_timestamp_t start_durable_
* Pack the validity window for an address.
*/
static inline void
-__cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_t start_durable_ts,
- wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t stop_durable_ts,
- wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn, bool prepare)
+__cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp, WT_TIME_AGGREGATE *ta)
{
uint8_t flags, *flagsp;
/* Globally visible values have no associated validity window. */
- if (start_durable_ts == WT_TS_NONE && stop_durable_ts == WT_TS_NONE &&
- oldest_start_ts == WT_TS_NONE && oldest_start_txn == WT_TXN_NONE &&
- newest_stop_ts == WT_TS_MAX && newest_stop_txn == WT_TXN_MAX) {
+ if (__wt_time_aggregate_is_empty(ta)) {
++*pp;
return;
}
- __wt_check_addr_validity(session, start_durable_ts, oldest_start_ts, oldest_start_txn,
- stop_durable_ts, newest_stop_ts, newest_stop_txn);
+ __wt_check_addr_validity(session, ta);
**pp |= WT_CELL_SECOND_DESC;
++*pp;
@@ -203,21 +189,18 @@ __cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_t
++*pp;
flags = 0;
- if (oldest_start_ts != WT_TS_NONE) {
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, oldest_start_ts));
+ if (ta->oldest_start_ts != WT_TS_NONE) {
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, ta->oldest_start_ts));
LF_SET(WT_CELL_TS_START);
}
- if (oldest_start_txn != WT_TXN_NONE) {
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, oldest_start_txn));
+ if (ta->oldest_start_txn != WT_TXN_NONE) {
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, ta->oldest_start_txn));
LF_SET(WT_CELL_TXN_START);
}
- if (start_durable_ts != WT_TS_NONE) {
+ if (ta->newest_start_durable_ts != WT_TS_NONE) {
/* Store differences, not absolutes. */
- /*
- * FIXME-prepare-support:
- * WT_ASSERT(
- * session, oldest_start_ts != WT_TS_NONE && oldest_start_ts <= start_durable_ts);
- */
+ WT_ASSERT(session, ta->oldest_start_ts <= ta->newest_start_durable_ts);
+
/*
* Unlike value cell, we store the durable start timestamp even the difference is zero
* compared to oldest commit timestamp. The difference can only be zero when the page
@@ -225,43 +208,38 @@ __cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_t
* having that check to find out whether it is zero or not will unnecessarily add overhead
* than benefit.
*/
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, start_durable_ts - oldest_start_ts));
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, ta->newest_start_durable_ts - ta->oldest_start_ts));
LF_SET(WT_CELL_TS_DURABLE_START);
}
- if (newest_stop_ts != WT_TS_MAX) {
+ if (ta->newest_stop_ts != WT_TS_MAX) {
/* Store differences, not absolutes. */
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, newest_stop_ts - oldest_start_ts));
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, ta->newest_stop_ts - ta->oldest_start_ts));
LF_SET(WT_CELL_TS_STOP);
}
- if (newest_stop_txn != WT_TXN_MAX) {
+ if (ta->newest_stop_txn != WT_TXN_MAX) {
/* Store differences, not absolutes. */
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, newest_stop_txn - oldest_start_txn));
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, ta->newest_stop_txn - ta->oldest_start_txn));
LF_SET(WT_CELL_TXN_STOP);
}
- if (stop_durable_ts != WT_TS_NONE) {
- /* Store differences, not absolutes. */
- /*
- * FIXME-prepare-support:
- * WT_ASSERT(session,
- * newest_stop_ts != WT_TS_MAX && newest_stop_ts <= stop_durable__ts);
- */
+ if (ta->newest_stop_durable_ts != WT_TS_NONE) {
+ WT_ASSERT(session,
+ ta->newest_stop_ts == WT_TS_MAX || ta->newest_stop_ts <= ta->newest_stop_durable_ts);
+
/*
+ * Store differences, not absolutes.
+ *
* Unlike value cell, we store the durable stop timestamp even the difference is zero
* compared to newest commit timestamp. The difference can only be zero when the page
* contains all the key/value pairs with the same timestamp. But this scenario is rare and
* having that check to find out whether it is zero or not will unnecessarily add overhead
* than benefit.
*/
- WT_IGNORE_RET(__wt_vpack_uint(pp, 0, stop_durable_ts - newest_stop_ts));
+ WT_IGNORE_RET(__wt_vpack_uint(pp, 0, ta->newest_stop_durable_ts - ta->newest_stop_ts));
LF_SET(WT_CELL_TS_DURABLE_STOP);
}
- /*
- * Currently, no uncommitted prepared updates are written to the data store, so this flag must
- * be false until we allow writing them in WT-5984. In that ticket this assert must be removed.
- */
- WT_ASSERT(session, prepare == false);
- if (prepare)
+ if (ta->prepare)
LF_SET(WT_CELL_PREPARE);
+
*flagsp = flags;
}
@@ -271,9 +249,7 @@ __cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_t
*/
static inline size_t
__wt_cell_pack_addr(WT_SESSION_IMPL *session, WT_CELL *cell, u_int cell_type, uint64_t recno,
- wt_timestamp_t start_durable_ts, wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn,
- wt_timestamp_t stop_durable_ts, wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn,
- bool prepare, size_t size)
+ WT_TIME_AGGREGATE *ta, size_t size)
{
uint8_t *p;
@@ -281,8 +257,7 @@ __wt_cell_pack_addr(WT_SESSION_IMPL *session, WT_CELL *cell, u_int cell_type, ui
p = cell->__chunk;
*p = '\0';
- __cell_pack_addr_validity(session, &p, start_durable_ts, oldest_start_ts, oldest_start_txn,
- stop_durable_ts, newest_stop_ts, newest_stop_txn, prepare);
+ __cell_pack_addr_validity(session, &p, ta);
if (recno == WT_RECNO_OOB)
cell->__chunk[0] |= (uint8_t)cell_type; /* Type */
@@ -301,9 +276,8 @@ __wt_cell_pack_addr(WT_SESSION_IMPL *session, WT_CELL *cell, u_int cell_type, ui
* Set a value item's WT_CELL contents.
*/
static inline size_t
-__wt_cell_pack_value(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t durable_start_ts,
- wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t durable_stop_ts,
- wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare, uint64_t rle, size_t size)
+__wt_cell_pack_value(
+ WT_SESSION_IMPL *session, WT_CELL *cell, WT_TIME_WINDOW *tw, uint64_t rle, size_t size)
{
uint8_t byte, *p;
bool validity;
@@ -312,8 +286,7 @@ __wt_cell_pack_value(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t dur
p = cell->__chunk;
*p = '\0';
- __cell_pack_value_validity(session, &p, durable_start_ts, start_ts, start_txn, durable_stop_ts,
- stop_ts, stop_txn, prepare);
+ __cell_pack_value_validity(session, &p, tw);
/*
* Short data cells without a validity window or run-length encoding have 6 bits of data length
@@ -435,9 +408,8 @@ __wt_cell_pack_value_match(
* Write a copy value cell.
*/
static inline size_t
-__wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start_durable_ts,
- wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_durable_ts,
- wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare, uint64_t rle, uint64_t v)
+__wt_cell_pack_copy(
+ WT_SESSION_IMPL *session, WT_CELL *cell, WT_TIME_WINDOW *tw, uint64_t rle, uint64_t v)
{
uint8_t *p;
@@ -445,8 +417,7 @@ __wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t star
p = cell->__chunk;
*p = '\0';
- __cell_pack_value_validity(session, &p, start_durable_ts, start_ts, start_txn, stop_durable_ts,
- stop_ts, stop_txn, prepare);
+ __cell_pack_value_validity(session, &p, tw);
if (rle < 2)
cell->__chunk[0] |= WT_CELL_VALUE_COPY; /* Type */
@@ -466,9 +437,7 @@ __wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t star
* Write a deleted value cell.
*/
static inline size_t
-__wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start_durable_ts,
- wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_durable_ts,
- wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle)
+__wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell, WT_TIME_WINDOW *tw, uint64_t rle)
{
uint8_t *p;
@@ -476,9 +445,8 @@ __wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start
p = cell->__chunk;
*p = '\0';
- /* FIXME-prepare-support: we should pass prepare value. */
- __cell_pack_value_validity(session, &p, start_durable_ts, start_ts, start_txn, stop_durable_ts,
- stop_ts, stop_txn, false);
+ /* FIXME-WT-6124: we should set the time window prepare value. */
+ __cell_pack_value_validity(session, &p, tw);
if (rle < 2)
cell->__chunk[0] |= WT_CELL_DEL; /* Type */
@@ -564,9 +532,7 @@ __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size)
* Pack an overflow cell.
*/
static inline size_t
-__wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type,
- wt_timestamp_t durable_start_ts, wt_timestamp_t start_ts, uint64_t start_txn,
- wt_timestamp_t durable_stop_ts, wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare,
+__wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type, WT_TIME_WINDOW *tw,
uint64_t rle, size_t size)
{
uint8_t *p;
@@ -578,12 +544,12 @@ __wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type,
switch (type) {
case WT_CELL_KEY_OVFL:
case WT_CELL_KEY_OVFL_RM:
+ WT_ASSERT(session, tw == NULL);
++p;
break;
case WT_CELL_VALUE_OVFL:
case WT_CELL_VALUE_OVFL_RM:
- __cell_pack_value_validity(session, &p, durable_start_ts, start_ts, start_txn,
- durable_stop_ts, stop_ts, stop_txn, prepare);
+ __cell_pack_value_validity(session, &p, tw);
break;
}
@@ -739,26 +705,22 @@ __wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CE
{
struct {
uint64_t v;
- wt_timestamp_t start_ts;
- wt_timestamp_t durable_start_ts;
- uint64_t start_txn;
- wt_timestamp_t stop_ts;
- wt_timestamp_t durable_stop_ts;
- uint64_t stop_txn;
+ WT_TIME_WINDOW tw;
uint32_t len;
} copy;
+ WT_TIME_AGGREGATE *ta;
+ WT_TIME_WINDOW *tw;
uint64_t v;
const uint8_t *p;
uint8_t flags;
+ bool copy_cell;
+
+ copy_cell = false;
+ copy.len = 0; /* [-Wconditional-uninitialized] */
+ copy.v = 0; /* [-Wconditional-uninitialized] */
- copy.v = 0; /* -Werror=maybe-uninitialized */
- copy.start_ts = WT_TS_NONE;
- copy.durable_start_ts = WT_TS_NONE;
- copy.start_txn = WT_TXN_NONE;
- copy.stop_ts = WT_TS_MAX;
- copy.durable_stop_ts = WT_TS_NONE;
- copy.stop_txn = WT_TXN_MAX;
- copy.len = 0;
+ tw = &unpack->tw;
+ ta = &unpack->ta;
/*
* The verification code specifies an end argument, a pointer to 1B past the end-of-page. In which
@@ -789,18 +751,8 @@ restart:
* following switch. All validity windows default to durability.
*/
unpack->v = 0;
- unpack->durable_start_ts = WT_TS_NONE;
- unpack->durable_stop_ts = WT_TS_NONE;
- unpack->start_ts = WT_TS_NONE;
- unpack->start_txn = WT_TXN_NONE;
- unpack->stop_ts = WT_TS_MAX;
- unpack->stop_txn = WT_TXN_MAX;
- unpack->newest_start_durable_ts = WT_TS_NONE;
- unpack->newest_stop_durable_ts = WT_TS_NONE;
- unpack->oldest_start_ts = WT_TS_NONE;
- unpack->oldest_start_txn = WT_TXN_NONE;
- unpack->newest_stop_ts = WT_TS_MAX;
- unpack->newest_stop_txn = WT_TXN_MAX;
+ __wt_time_window_init(&unpack->tw);
+ __wt_time_aggregate_init(&unpack->ta);
unpack->raw = (uint8_t)__wt_cell_type_raw(cell);
unpack->type = (uint8_t)__wt_cell_type(cell);
unpack->flags = 0;
@@ -852,39 +804,38 @@ restart:
break;
flags = *p++; /* skip second descriptor byte */
- if (LF_ISSET(WT_CELL_PREPARE))
+ if (LF_ISSET(WT_CELL_PREPARE)) {
F_SET(unpack, WT_CELL_UNPACK_PREPARE);
+ ta->prepare = 1;
+ }
if (LF_ISSET(WT_CELL_TS_START))
- WT_RET(__wt_vunpack_uint(
- &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->oldest_start_ts));
+ WT_RET(
+ __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->oldest_start_ts));
if (LF_ISSET(WT_CELL_TXN_START))
- WT_RET(__wt_vunpack_uint(
- &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->oldest_start_txn));
+ WT_RET(
+ __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->oldest_start_txn));
if (LF_ISSET(WT_CELL_TS_DURABLE_START)) {
WT_RET(__wt_vunpack_uint(
- &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->newest_start_durable_ts));
- unpack->newest_start_durable_ts += unpack->oldest_start_ts;
+ &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_start_durable_ts));
+ ta->newest_start_durable_ts += ta->oldest_start_ts;
}
if (LF_ISSET(WT_CELL_TS_STOP)) {
WT_RET(
- __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->newest_stop_ts));
- unpack->newest_stop_ts += unpack->oldest_start_ts;
+ __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_stop_ts));
+ ta->newest_stop_ts += ta->oldest_start_ts;
}
if (LF_ISSET(WT_CELL_TXN_STOP)) {
- WT_RET(__wt_vunpack_uint(
- &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->newest_stop_txn));
- unpack->newest_stop_txn += unpack->oldest_start_txn;
+ WT_RET(
+ __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_stop_txn));
+ ta->newest_stop_txn += ta->oldest_start_txn;
}
if (LF_ISSET(WT_CELL_TS_DURABLE_STOP)) {
WT_RET(__wt_vunpack_uint(
- &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->newest_stop_durable_ts));
- unpack->newest_stop_durable_ts += unpack->newest_stop_ts;
+ &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_stop_durable_ts));
+ ta->newest_stop_durable_ts += ta->newest_stop_ts;
}
-
- __wt_check_addr_validity(session, unpack->newest_start_durable_ts, unpack->oldest_start_ts,
- unpack->oldest_start_txn, unpack->newest_stop_durable_ts, unpack->newest_stop_ts,
- unpack->newest_stop_txn);
+ __wt_check_addr_validity(session, ta);
break;
case WT_CELL_DEL:
case WT_CELL_VALUE:
@@ -895,38 +846,39 @@ restart:
break;
flags = *p++; /* skip second descriptor byte */
- if (LF_ISSET(WT_CELL_PREPARE))
+ if (LF_ISSET(WT_CELL_PREPARE)) {
F_SET(unpack, WT_CELL_UNPACK_PREPARE);
+ tw->prepare = 1;
+ }
if (LF_ISSET(WT_CELL_TS_START))
- WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->start_ts));
+ WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->start_ts));
if (LF_ISSET(WT_CELL_TXN_START))
- WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->start_txn));
+ WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->start_txn));
if (LF_ISSET(WT_CELL_TS_DURABLE_START)) {
- WT_RET(__wt_vunpack_uint(
- &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->durable_start_ts));
- unpack->durable_start_ts += unpack->start_ts;
+ WT_RET(
+ __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->durable_start_ts));
+ tw->durable_start_ts += tw->start_ts;
} else
- unpack->durable_start_ts = unpack->start_ts;
+ tw->durable_start_ts = tw->start_ts;
if (LF_ISSET(WT_CELL_TS_STOP)) {
- WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->stop_ts));
- unpack->stop_ts += unpack->start_ts;
+ WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->stop_ts));
+ tw->stop_ts += tw->start_ts;
}
if (LF_ISSET(WT_CELL_TXN_STOP)) {
- WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->stop_txn));
- unpack->stop_txn += unpack->start_txn;
+ WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->stop_txn));
+ tw->stop_txn += tw->start_txn;
}
if (LF_ISSET(WT_CELL_TS_DURABLE_STOP)) {
- WT_RET(__wt_vunpack_uint(
- &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->durable_stop_ts));
- unpack->durable_stop_ts += unpack->stop_ts;
- } else if (unpack->stop_ts != WT_TS_MAX)
- unpack->durable_stop_ts = unpack->stop_ts;
+ WT_RET(
+ __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->durable_stop_ts));
+ tw->durable_stop_ts += tw->stop_ts;
+ } else if (tw->stop_ts != WT_TS_MAX)
+ tw->durable_stop_ts = tw->stop_ts;
else
- unpack->durable_stop_ts = WT_TS_NONE;
+ tw->durable_stop_ts = WT_TS_NONE;
- __cell_check_value_validity(session, unpack->durable_start_ts, unpack->start_ts,
- unpack->start_txn, unpack->durable_stop_ts, unpack->stop_ts, unpack->stop_txn);
+ __cell_check_value_validity(session, tw);
break;
}
@@ -943,19 +895,16 @@ restart:
*/
switch (unpack->raw) {
case WT_CELL_VALUE_COPY:
+ copy_cell = true;
+
/*
* The cell is followed by an offset to a cell written earlier in the page. Save/restore the
- * length and RLE of this cell, we need the length to step through the set of cells on the
- * page and this RLE is probably different from the RLE of the earlier cell.
+ * visibility window, length and RLE of this cell, we need the length to step through the
+ * set of cells on the page and the RLE and timestamp information are specific to this cell.
*/
+ __wt_time_window_copy(&copy.tw, tw);
WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &v));
copy.v = unpack->v;
- copy.start_ts = unpack->start_ts;
- copy.durable_start_ts = unpack->durable_start_ts;
- copy.start_txn = unpack->start_txn;
- copy.stop_ts = unpack->stop_ts;
- copy.durable_stop_ts = unpack->durable_stop_ts;
- copy.stop_txn = unpack->stop_txn;
copy.len = WT_PTRDIFF32(p, cell);
cell = (WT_CELL *)((uint8_t *)cell - v);
goto restart;
@@ -1003,22 +952,17 @@ restart:
return (WT_ERROR); /* Unknown cell type. */
}
-/*
- * Check the original cell against the full cell length (this is a diagnostic as well, we may be
- * copying the cell from the page and we need the right length).
- */
done:
+ /*
+ * Check the original cell against the full cell length (this is a diagnostic as well, we may be
+ * copying the cell from the page and we need the right length).
+ */
WT_CELL_LEN_CHK(cell, unpack->__len);
- if (copy.len != 0) {
- unpack->raw = WT_CELL_VALUE_COPY;
+ if (copy_cell) {
+ __wt_time_window_copy(tw, &copy.tw);
unpack->v = copy.v;
- unpack->start_ts = copy.start_ts;
- unpack->durable_start_ts = copy.durable_start_ts;
- unpack->start_txn = copy.start_txn;
- unpack->stop_ts = copy.stop_ts;
- unpack->durable_stop_ts = copy.durable_stop_ts;
- unpack->stop_txn = copy.stop_txn;
unpack->__len = copy.len;
+ unpack->raw = WT_CELL_VALUE_COPY;
}
return (0);
@@ -1032,6 +976,12 @@ static inline void
__wt_cell_unpack_dsk(
WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell, WT_CELL_UNPACK *unpack)
{
+ WT_TIME_AGGREGATE *ta;
+ WT_TIME_WINDOW *tw;
+
+ ta = &unpack->ta;
+ tw = &unpack->tw;
+
/*
* Row-store doesn't store zero-length values on pages, but this allows us to pretend.
*/
@@ -1042,18 +992,8 @@ __wt_cell_unpack_dsk(
* If there isn't any value validity window (which is what it will take to get to a
* zero-length item), the value must be stable.
*/
- unpack->durable_start_ts = WT_TS_NONE;
- unpack->durable_stop_ts = WT_TS_NONE;
- unpack->start_ts = WT_TS_NONE;
- unpack->start_txn = WT_TXN_NONE;
- unpack->stop_ts = WT_TS_MAX;
- unpack->stop_txn = WT_TXN_MAX;
- unpack->newest_start_durable_ts = WT_TS_NONE;
- unpack->newest_stop_durable_ts = WT_TS_NONE;
- unpack->oldest_start_ts = WT_TS_NONE;
- unpack->oldest_start_txn = WT_TXN_NONE;
- unpack->newest_stop_ts = WT_TS_MAX;
- unpack->newest_stop_txn = WT_TXN_MAX;
+ __wt_time_window_init(tw);
+ __wt_time_aggregate_init(ta);
unpack->data = "";
unpack->size = 0;
unpack->__len = 0;
@@ -1081,30 +1021,30 @@ __wt_cell_unpack_dsk(
* Previous startup txnid=0, ts=y txnid=0, ts=WT_TS_NONE txnid=MAX, ts=MAX
*/
if (dsk->write_gen > 0 && dsk->write_gen <= S2C(session)->base_write_gen) {
- /* FIXME-prepare-support: deal with durable timestamps. */
+ /* FIXME-WT-6124: deal with durable timestamps. */
/* Tell reconciliation we cleared the transaction ids and the cell needs to be rebuilt. */
- if (unpack->start_txn != WT_TXN_NONE) {
- unpack->start_txn = WT_TXN_NONE;
- F_SET(unpack, WT_CELL_UNPACK_TIME_PAIRS_CLEARED);
+ if (tw->start_txn != WT_TXN_NONE) {
+ tw->start_txn = WT_TXN_NONE;
+ F_SET(unpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED);
}
- if (unpack->stop_txn != WT_TXN_MAX) {
- unpack->stop_txn = WT_TXN_NONE;
- F_SET(unpack, WT_CELL_UNPACK_TIME_PAIRS_CLEARED);
- if (unpack->stop_ts == WT_TS_MAX)
- unpack->stop_ts = WT_TS_NONE;
+ if (tw->stop_txn != WT_TXN_MAX) {
+ tw->stop_txn = WT_TXN_NONE;
+ F_SET(unpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED);
+ if (tw->stop_ts == WT_TS_MAX)
+ tw->stop_ts = WT_TS_NONE;
} else
- WT_ASSERT(session, unpack->stop_ts == WT_TS_MAX);
- if (unpack->oldest_start_txn != WT_TXN_NONE) {
- unpack->oldest_start_txn = WT_TXN_NONE;
- F_SET(unpack, WT_CELL_UNPACK_TIME_PAIRS_CLEARED);
+ WT_ASSERT(session, tw->stop_ts == WT_TS_MAX);
+ if (ta->oldest_start_txn != WT_TXN_NONE) {
+ ta->oldest_start_txn = WT_TXN_NONE;
+ F_SET(unpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED);
}
- if (unpack->newest_stop_txn != WT_TXN_MAX) {
- unpack->newest_stop_txn = WT_TXN_NONE;
- F_SET(unpack, WT_CELL_UNPACK_TIME_PAIRS_CLEARED);
- if (unpack->newest_stop_ts == WT_TS_MAX)
- unpack->newest_stop_ts = WT_TS_NONE;
+ if (ta->newest_stop_txn != WT_TXN_MAX) {
+ ta->newest_stop_txn = WT_TXN_NONE;
+ F_SET(unpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED);
+ if (ta->newest_stop_ts == WT_TS_MAX)
+ ta->newest_stop_ts = WT_TS_NONE;
} else
- WT_ASSERT(session, unpack->newest_stop_ts == WT_TS_MAX);
+ WT_ASSERT(session, ta->newest_stop_ts == WT_TS_MAX);
}
}