diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/include/cell.i')
-rw-r--r-- | src/third_party/wiredtiger/src/include/cell.i | 456 |
1 files changed, 198 insertions, 258 deletions
diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i index bcd23946883..f14eb7f8d15 100644 --- a/src/third_party/wiredtiger/src/include/cell.i +++ b/src/third_party/wiredtiger/src/include/cell.i @@ -11,47 +11,47 @@ * Check the value's validity window for sanity. */ static inline void -__cell_check_value_validity(WT_SESSION_IMPL *session, wt_timestamp_t durable_start_ts, - wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t durable_stop_ts, - wt_timestamp_t stop_ts, uint64_t stop_txn) +__cell_check_value_validity(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw) { #ifdef HAVE_DIAGNOSTIC + /* + * We're using WT_ERR_ASSERT rather than WT_ASSERT because we want to push out a message string. + * This usage of WT_ERR_ASSERT isn't "correct", because it jumps to a non-existent error label + * in non-diagnostic builds and returns WT_PANIC without calling the underlying panic routine. + * That's OK, we have to be in a diagnostic build to get here, and fixing it would require new + * macros that aren't needed anywhere else, so we're leaving it alone. + */ char ts_string[2][WT_TS_INT_STRING_SIZE]; - if (start_ts > durable_start_ts) - WT_ERR_ASSERT(session, start_ts <= durable_start_ts, WT_PANIC, + if (tw->start_ts > tw->durable_start_ts) + WT_ERR_ASSERT(session, tw->start_ts <= tw->durable_start_ts, WT_PANIC, "a start timestamp %s newer than its durable start timestamp %s", - __wt_timestamp_to_string(start_ts, ts_string[0]), - __wt_timestamp_to_string(durable_start_ts, ts_string[1])); + __wt_timestamp_to_string(tw->start_ts, ts_string[0]), + __wt_timestamp_to_string(tw->durable_start_ts, ts_string[1])); - if (start_ts != WT_TS_NONE && stop_ts == WT_TS_NONE) - WT_ERR_ASSERT(session, stop_ts != WT_TS_NONE, WT_PANIC, "stop timestamp of 0"); + if (tw->start_ts != WT_TS_NONE && tw->stop_ts == WT_TS_NONE) + WT_ERR_ASSERT(session, tw->stop_ts != WT_TS_NONE, WT_PANIC, "stop timestamp of 0"); - if (start_ts > stop_ts) - WT_ERR_ASSERT(session, start_ts <= stop_ts, WT_PANIC, + if (tw->start_ts > tw->stop_ts) + WT_ERR_ASSERT(session, tw->start_ts <= tw->stop_ts, WT_PANIC, "a start timestamp %s newer than its stop timestamp %s", - __wt_timestamp_to_string(start_ts, ts_string[0]), - __wt_timestamp_to_string(stop_ts, ts_string[1])); + __wt_timestamp_to_string(tw->start_ts, ts_string[0]), + __wt_timestamp_to_string(tw->stop_ts, ts_string[1])); - if (start_txn > stop_txn) - WT_ERR_ASSERT(session, start_txn <= stop_txn, WT_PANIC, + if (tw->start_txn > tw->stop_txn) + WT_ERR_ASSERT(session, tw->start_txn <= tw->stop_txn, WT_PANIC, "a start transaction ID %" PRIu64 " newer than its stop transaction ID %" PRIu64, - start_txn, stop_txn); + tw->start_txn, tw->stop_txn); - if (stop_ts != WT_TS_MAX && stop_ts > durable_stop_ts) - WT_ERR_ASSERT(session, stop_ts <= durable_stop_ts, WT_PANIC, + if (tw->stop_ts != WT_TS_MAX && tw->stop_ts > tw->durable_stop_ts) + WT_ERR_ASSERT(session, tw->stop_ts <= tw->durable_stop_ts, WT_PANIC, "a stop timestamp %s newer than its durable stop timestamp %s", - __wt_timestamp_to_string(stop_ts, ts_string[0]), - __wt_timestamp_to_string(durable_stop_ts, ts_string[1])); + __wt_timestamp_to_string(tw->stop_ts, ts_string[0]), + __wt_timestamp_to_string(tw->durable_stop_ts, ts_string[1])); #else WT_UNUSED(session); - WT_UNUSED(durable_start_ts); - WT_UNUSED(durable_stop_ts); - WT_UNUSED(start_ts); - WT_UNUSED(start_txn); - WT_UNUSED(stop_ts); - WT_UNUSED(stop_txn); + WT_UNUSED(tw); #endif } @@ -60,21 +60,17 @@ __cell_check_value_validity(WT_SESSION_IMPL *session, wt_timestamp_t durable_sta * Pack the validity window for a value. */ static inline void -__cell_pack_value_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_t durable_start_ts, - wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t durable_stop_ts, - wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare) +__cell_pack_value_validity(WT_SESSION_IMPL *session, uint8_t **pp, WT_TIME_WINDOW *tw) { uint8_t flags, *flagsp; /* Globally visible values have no associated validity window. */ - if (durable_start_ts == WT_TS_NONE && start_ts == WT_TS_NONE && start_txn == WT_TXN_NONE && - durable_stop_ts == WT_TS_NONE && stop_ts == WT_TS_MAX && stop_txn == WT_TXN_MAX) { + if (__wt_time_window_is_empty(tw)) { ++*pp; return; } - __cell_check_value_validity( - session, durable_start_ts, start_ts, start_txn, durable_stop_ts, stop_ts, stop_txn); + __cell_check_value_validity(session, tw); **pp |= WT_CELL_SECOND_DESC; ++*pp; @@ -82,46 +78,41 @@ __cell_pack_value_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_ ++*pp; flags = 0; - if (start_ts != WT_TS_NONE) { - WT_IGNORE_RET(__wt_vpack_uint(pp, 0, start_ts)); + if (tw->start_ts != WT_TS_NONE) { + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, tw->start_ts)); LF_SET(WT_CELL_TS_START); } - if (start_txn != WT_TXN_NONE) { - WT_IGNORE_RET(__wt_vpack_uint(pp, 0, start_txn)); + if (tw->start_txn != WT_TXN_NONE) { + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, tw->start_txn)); LF_SET(WT_CELL_TXN_START); } - if (durable_start_ts != WT_TS_NONE) { - WT_ASSERT(session, start_ts != WT_TS_NONE && start_ts <= durable_start_ts); + if (tw->durable_start_ts != WT_TS_NONE) { + WT_ASSERT(session, tw->start_ts <= tw->durable_start_ts); /* Store differences if any, not absolutes. */ - if (durable_start_ts - start_ts > 0) { - WT_IGNORE_RET(__wt_vpack_uint(pp, 0, durable_start_ts - start_ts)); + if (tw->durable_start_ts - tw->start_ts > 0) { + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, tw->durable_start_ts - tw->start_ts)); LF_SET(WT_CELL_TS_DURABLE_START); } } - if (stop_ts != WT_TS_MAX) { + if (tw->stop_ts != WT_TS_MAX) { /* Store differences, not absolutes. */ - WT_IGNORE_RET(__wt_vpack_uint(pp, 0, stop_ts - start_ts)); + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, tw->stop_ts - tw->start_ts)); LF_SET(WT_CELL_TS_STOP); } - if (stop_txn != WT_TXN_MAX) { + if (tw->stop_txn != WT_TXN_MAX) { /* Store differences, not absolutes. */ - WT_IGNORE_RET(__wt_vpack_uint(pp, 0, stop_txn - start_txn)); + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, tw->stop_txn - tw->start_txn)); LF_SET(WT_CELL_TXN_STOP); } - if (durable_stop_ts != WT_TS_NONE) { - WT_ASSERT(session, stop_ts != WT_TS_MAX && stop_ts <= durable_stop_ts); + if (tw->durable_stop_ts != WT_TS_NONE) { + WT_ASSERT(session, tw->stop_ts <= tw->durable_stop_ts); /* Store differences if any, not absolutes. */ - if (durable_stop_ts - stop_ts > 0) { - WT_IGNORE_RET(__wt_vpack_uint(pp, 0, durable_stop_ts - stop_ts)); + if (tw->durable_stop_ts - tw->stop_ts > 0) { + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, tw->durable_stop_ts - tw->stop_ts)); LF_SET(WT_CELL_TS_DURABLE_STOP); } } - /* - * Currently, no uncommitted prepared updates are written to the data store, so this flag must - * be false until we allow writing them in WT-5984. In that ticket this assert must be removed. - */ - WT_ASSERT(session, prepare == false); - if (prepare) + if (tw->prepare) LF_SET(WT_CELL_PREPARE); *flagsp = flags; } @@ -131,47 +122,47 @@ __cell_pack_value_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_ * Check the address' validity window for sanity. */ static inline void -__wt_check_addr_validity(WT_SESSION_IMPL *session, wt_timestamp_t start_durable_ts, - wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t stop_durable_ts, - wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn) +__wt_check_addr_validity(WT_SESSION_IMPL *session, WT_TIME_AGGREGATE *ta) { #ifdef HAVE_DIAGNOSTIC + /* + * We're using WT_ERR_ASSERT rather than WT_ASSERT because we want to push out a message string. + * This usage of WT_ERR_ASSERT isn't "correct", because it jumps to a non-existent error label + * in non-diagnostic builds and returns WT_PANIC without calling the underlying panic routine. + * That's OK, we have to be in a diagnostic build to get here, and fixing it would require new + * macros that aren't needed anywhere else, so we're leaving it alone. + */ char ts_string[2][WT_TS_INT_STRING_SIZE]; - if (oldest_start_ts != WT_TS_NONE && newest_stop_ts == WT_TS_NONE) + if (ta->oldest_start_ts != WT_TS_NONE && ta->newest_stop_ts == WT_TS_NONE) WT_ERR_ASSERT( - session, newest_stop_ts != WT_TS_NONE, WT_PANIC, "newest stop timestamp of 0"); + session, ta->newest_stop_ts != WT_TS_NONE, WT_PANIC, "newest stop timestamp of 0"); - if (oldest_start_ts > newest_stop_ts) - WT_ERR_ASSERT(session, oldest_start_ts <= newest_stop_ts, WT_PANIC, + if (ta->oldest_start_ts > ta->newest_stop_ts) + WT_ERR_ASSERT(session, ta->oldest_start_ts <= ta->newest_stop_ts, WT_PANIC, "an oldest start timestamp %s newer than its newest stop timestamp %s", - __wt_timestamp_to_string(oldest_start_ts, ts_string[0]), - __wt_timestamp_to_string(newest_stop_ts, ts_string[1])); + __wt_timestamp_to_string(ta->oldest_start_ts, ts_string[0]), + __wt_timestamp_to_string(ta->newest_stop_ts, ts_string[1])); - if (oldest_start_txn > newest_stop_txn) - WT_ERR_ASSERT(session, oldest_start_txn <= newest_stop_txn, WT_PANIC, + if (ta->oldest_start_txn > ta->newest_stop_txn) + WT_ERR_ASSERT(session, ta->oldest_start_txn <= ta->newest_stop_txn, WT_PANIC, "an oldest start transaction %" PRIu64 " newer than its newest stop transaction %" PRIu64, - oldest_start_txn, newest_stop_txn); + ta->oldest_start_txn, ta->newest_stop_txn); - if (oldest_start_ts > start_durable_ts) - WT_ERR_ASSERT(session, oldest_start_ts <= start_durable_ts, WT_PANIC, + if (ta->oldest_start_ts > ta->newest_start_durable_ts) + WT_ERR_ASSERT(session, ta->oldest_start_ts <= ta->newest_start_durable_ts, WT_PANIC, "an oldest start timestamp %s newer than its durable start timestamp %s", - __wt_timestamp_to_string(oldest_start_ts, ts_string[0]), - __wt_timestamp_to_string(start_durable_ts, ts_string[1])); + __wt_timestamp_to_string(ta->oldest_start_ts, ts_string[0]), + __wt_timestamp_to_string(ta->newest_start_durable_ts, ts_string[1])); - if (newest_stop_ts != WT_TS_MAX && newest_stop_ts > stop_durable_ts) - WT_ERR_ASSERT(session, newest_stop_ts <= stop_durable_ts, WT_PANIC, + if (ta->newest_stop_ts != WT_TS_MAX && ta->newest_stop_ts > ta->newest_stop_durable_ts) + WT_ERR_ASSERT(session, ta->newest_stop_ts <= ta->newest_stop_durable_ts, WT_PANIC, "a newest stop timestamp %s newer than its durable stop timestamp %s", - __wt_timestamp_to_string(newest_stop_ts, ts_string[0]), - __wt_timestamp_to_string(stop_durable_ts, ts_string[1])); + __wt_timestamp_to_string(ta->newest_stop_ts, ts_string[0]), + __wt_timestamp_to_string(ta->newest_stop_durable_ts, ts_string[1])); #else WT_UNUSED(session); - WT_UNUSED(start_durable_ts); - WT_UNUSED(oldest_start_ts); - WT_UNUSED(oldest_start_txn); - WT_UNUSED(stop_durable_ts); - WT_UNUSED(newest_stop_ts); - WT_UNUSED(newest_stop_txn); + WT_UNUSED(ta); #endif } @@ -180,22 +171,17 @@ __wt_check_addr_validity(WT_SESSION_IMPL *session, wt_timestamp_t start_durable_ * Pack the validity window for an address. */ static inline void -__cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_t start_durable_ts, - wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, wt_timestamp_t stop_durable_ts, - wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn, bool prepare) +__cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp, WT_TIME_AGGREGATE *ta) { uint8_t flags, *flagsp; /* Globally visible values have no associated validity window. */ - if (start_durable_ts == WT_TS_NONE && stop_durable_ts == WT_TS_NONE && - oldest_start_ts == WT_TS_NONE && oldest_start_txn == WT_TXN_NONE && - newest_stop_ts == WT_TS_MAX && newest_stop_txn == WT_TXN_MAX) { + if (__wt_time_aggregate_is_empty(ta)) { ++*pp; return; } - __wt_check_addr_validity(session, start_durable_ts, oldest_start_ts, oldest_start_txn, - stop_durable_ts, newest_stop_ts, newest_stop_txn); + __wt_check_addr_validity(session, ta); **pp |= WT_CELL_SECOND_DESC; ++*pp; @@ -203,21 +189,18 @@ __cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_t ++*pp; flags = 0; - if (oldest_start_ts != WT_TS_NONE) { - WT_IGNORE_RET(__wt_vpack_uint(pp, 0, oldest_start_ts)); + if (ta->oldest_start_ts != WT_TS_NONE) { + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, ta->oldest_start_ts)); LF_SET(WT_CELL_TS_START); } - if (oldest_start_txn != WT_TXN_NONE) { - WT_IGNORE_RET(__wt_vpack_uint(pp, 0, oldest_start_txn)); + if (ta->oldest_start_txn != WT_TXN_NONE) { + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, ta->oldest_start_txn)); LF_SET(WT_CELL_TXN_START); } - if (start_durable_ts != WT_TS_NONE) { + if (ta->newest_start_durable_ts != WT_TS_NONE) { /* Store differences, not absolutes. */ - /* - * FIXME-prepare-support: - * WT_ASSERT( - * session, oldest_start_ts != WT_TS_NONE && oldest_start_ts <= start_durable_ts); - */ + WT_ASSERT(session, ta->oldest_start_ts <= ta->newest_start_durable_ts); + /* * Unlike value cell, we store the durable start timestamp even the difference is zero * compared to oldest commit timestamp. The difference can only be zero when the page @@ -225,43 +208,38 @@ __cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_t * having that check to find out whether it is zero or not will unnecessarily add overhead * than benefit. */ - WT_IGNORE_RET(__wt_vpack_uint(pp, 0, start_durable_ts - oldest_start_ts)); + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, ta->newest_start_durable_ts - ta->oldest_start_ts)); LF_SET(WT_CELL_TS_DURABLE_START); } - if (newest_stop_ts != WT_TS_MAX) { + if (ta->newest_stop_ts != WT_TS_MAX) { /* Store differences, not absolutes. */ - WT_IGNORE_RET(__wt_vpack_uint(pp, 0, newest_stop_ts - oldest_start_ts)); + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, ta->newest_stop_ts - ta->oldest_start_ts)); LF_SET(WT_CELL_TS_STOP); } - if (newest_stop_txn != WT_TXN_MAX) { + if (ta->newest_stop_txn != WT_TXN_MAX) { /* Store differences, not absolutes. */ - WT_IGNORE_RET(__wt_vpack_uint(pp, 0, newest_stop_txn - oldest_start_txn)); + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, ta->newest_stop_txn - ta->oldest_start_txn)); LF_SET(WT_CELL_TXN_STOP); } - if (stop_durable_ts != WT_TS_NONE) { - /* Store differences, not absolutes. */ - /* - * FIXME-prepare-support: - * WT_ASSERT(session, - * newest_stop_ts != WT_TS_MAX && newest_stop_ts <= stop_durable__ts); - */ + if (ta->newest_stop_durable_ts != WT_TS_NONE) { + WT_ASSERT(session, + ta->newest_stop_ts == WT_TS_MAX || ta->newest_stop_ts <= ta->newest_stop_durable_ts); + /* + * Store differences, not absolutes. + * * Unlike value cell, we store the durable stop timestamp even the difference is zero * compared to newest commit timestamp. The difference can only be zero when the page * contains all the key/value pairs with the same timestamp. But this scenario is rare and * having that check to find out whether it is zero or not will unnecessarily add overhead * than benefit. */ - WT_IGNORE_RET(__wt_vpack_uint(pp, 0, stop_durable_ts - newest_stop_ts)); + WT_IGNORE_RET(__wt_vpack_uint(pp, 0, ta->newest_stop_durable_ts - ta->newest_stop_ts)); LF_SET(WT_CELL_TS_DURABLE_STOP); } - /* - * Currently, no uncommitted prepared updates are written to the data store, so this flag must - * be false until we allow writing them in WT-5984. In that ticket this assert must be removed. - */ - WT_ASSERT(session, prepare == false); - if (prepare) + if (ta->prepare) LF_SET(WT_CELL_PREPARE); + *flagsp = flags; } @@ -271,9 +249,7 @@ __cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp, wt_timestamp_t */ static inline size_t __wt_cell_pack_addr(WT_SESSION_IMPL *session, WT_CELL *cell, u_int cell_type, uint64_t recno, - wt_timestamp_t start_durable_ts, wt_timestamp_t oldest_start_ts, uint64_t oldest_start_txn, - wt_timestamp_t stop_durable_ts, wt_timestamp_t newest_stop_ts, uint64_t newest_stop_txn, - bool prepare, size_t size) + WT_TIME_AGGREGATE *ta, size_t size) { uint8_t *p; @@ -281,8 +257,7 @@ __wt_cell_pack_addr(WT_SESSION_IMPL *session, WT_CELL *cell, u_int cell_type, ui p = cell->__chunk; *p = '\0'; - __cell_pack_addr_validity(session, &p, start_durable_ts, oldest_start_ts, oldest_start_txn, - stop_durable_ts, newest_stop_ts, newest_stop_txn, prepare); + __cell_pack_addr_validity(session, &p, ta); if (recno == WT_RECNO_OOB) cell->__chunk[0] |= (uint8_t)cell_type; /* Type */ @@ -301,9 +276,8 @@ __wt_cell_pack_addr(WT_SESSION_IMPL *session, WT_CELL *cell, u_int cell_type, ui * Set a value item's WT_CELL contents. */ static inline size_t -__wt_cell_pack_value(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t durable_start_ts, - wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t durable_stop_ts, - wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare, uint64_t rle, size_t size) +__wt_cell_pack_value( + WT_SESSION_IMPL *session, WT_CELL *cell, WT_TIME_WINDOW *tw, uint64_t rle, size_t size) { uint8_t byte, *p; bool validity; @@ -312,8 +286,7 @@ __wt_cell_pack_value(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t dur p = cell->__chunk; *p = '\0'; - __cell_pack_value_validity(session, &p, durable_start_ts, start_ts, start_txn, durable_stop_ts, - stop_ts, stop_txn, prepare); + __cell_pack_value_validity(session, &p, tw); /* * Short data cells without a validity window or run-length encoding have 6 bits of data length @@ -435,9 +408,8 @@ __wt_cell_pack_value_match( * Write a copy value cell. */ static inline size_t -__wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start_durable_ts, - wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_durable_ts, - wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare, uint64_t rle, uint64_t v) +__wt_cell_pack_copy( + WT_SESSION_IMPL *session, WT_CELL *cell, WT_TIME_WINDOW *tw, uint64_t rle, uint64_t v) { uint8_t *p; @@ -445,8 +417,7 @@ __wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t star p = cell->__chunk; *p = '\0'; - __cell_pack_value_validity(session, &p, start_durable_ts, start_ts, start_txn, stop_durable_ts, - stop_ts, stop_txn, prepare); + __cell_pack_value_validity(session, &p, tw); if (rle < 2) cell->__chunk[0] |= WT_CELL_VALUE_COPY; /* Type */ @@ -466,9 +437,7 @@ __wt_cell_pack_copy(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t star * Write a deleted value cell. */ static inline size_t -__wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start_durable_ts, - wt_timestamp_t start_ts, uint64_t start_txn, wt_timestamp_t stop_durable_ts, - wt_timestamp_t stop_ts, uint64_t stop_txn, uint64_t rle) +__wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell, WT_TIME_WINDOW *tw, uint64_t rle) { uint8_t *p; @@ -476,9 +445,8 @@ __wt_cell_pack_del(WT_SESSION_IMPL *session, WT_CELL *cell, wt_timestamp_t start p = cell->__chunk; *p = '\0'; - /* FIXME-prepare-support: we should pass prepare value. */ - __cell_pack_value_validity(session, &p, start_durable_ts, start_ts, start_txn, stop_durable_ts, - stop_ts, stop_txn, false); + /* FIXME-WT-6124: we should set the time window prepare value. */ + __cell_pack_value_validity(session, &p, tw); if (rle < 2) cell->__chunk[0] |= WT_CELL_DEL; /* Type */ @@ -564,9 +532,7 @@ __wt_cell_pack_leaf_key(WT_CELL *cell, uint8_t prefix, size_t size) * Pack an overflow cell. */ static inline size_t -__wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type, - wt_timestamp_t durable_start_ts, wt_timestamp_t start_ts, uint64_t start_txn, - wt_timestamp_t durable_stop_ts, wt_timestamp_t stop_ts, uint64_t stop_txn, bool prepare, +__wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type, WT_TIME_WINDOW *tw, uint64_t rle, size_t size) { uint8_t *p; @@ -578,12 +544,12 @@ __wt_cell_pack_ovfl(WT_SESSION_IMPL *session, WT_CELL *cell, uint8_t type, switch (type) { case WT_CELL_KEY_OVFL: case WT_CELL_KEY_OVFL_RM: + WT_ASSERT(session, tw == NULL); ++p; break; case WT_CELL_VALUE_OVFL: case WT_CELL_VALUE_OVFL_RM: - __cell_pack_value_validity(session, &p, durable_start_ts, start_ts, start_txn, - durable_stop_ts, stop_ts, stop_txn, prepare); + __cell_pack_value_validity(session, &p, tw); break; } @@ -739,26 +705,22 @@ __wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CE { struct { uint64_t v; - wt_timestamp_t start_ts; - wt_timestamp_t durable_start_ts; - uint64_t start_txn; - wt_timestamp_t stop_ts; - wt_timestamp_t durable_stop_ts; - uint64_t stop_txn; + WT_TIME_WINDOW tw; uint32_t len; } copy; + WT_TIME_AGGREGATE *ta; + WT_TIME_WINDOW *tw; uint64_t v; const uint8_t *p; uint8_t flags; + bool copy_cell; + + copy_cell = false; + copy.len = 0; /* [-Wconditional-uninitialized] */ + copy.v = 0; /* [-Wconditional-uninitialized] */ - copy.v = 0; /* -Werror=maybe-uninitialized */ - copy.start_ts = WT_TS_NONE; - copy.durable_start_ts = WT_TS_NONE; - copy.start_txn = WT_TXN_NONE; - copy.stop_ts = WT_TS_MAX; - copy.durable_stop_ts = WT_TS_NONE; - copy.stop_txn = WT_TXN_MAX; - copy.len = 0; + tw = &unpack->tw; + ta = &unpack->ta; /* * The verification code specifies an end argument, a pointer to 1B past the end-of-page. In which @@ -789,18 +751,8 @@ restart: * following switch. All validity windows default to durability. */ unpack->v = 0; - unpack->durable_start_ts = WT_TS_NONE; - unpack->durable_stop_ts = WT_TS_NONE; - unpack->start_ts = WT_TS_NONE; - unpack->start_txn = WT_TXN_NONE; - unpack->stop_ts = WT_TS_MAX; - unpack->stop_txn = WT_TXN_MAX; - unpack->newest_start_durable_ts = WT_TS_NONE; - unpack->newest_stop_durable_ts = WT_TS_NONE; - unpack->oldest_start_ts = WT_TS_NONE; - unpack->oldest_start_txn = WT_TXN_NONE; - unpack->newest_stop_ts = WT_TS_MAX; - unpack->newest_stop_txn = WT_TXN_MAX; + __wt_time_window_init(&unpack->tw); + __wt_time_aggregate_init(&unpack->ta); unpack->raw = (uint8_t)__wt_cell_type_raw(cell); unpack->type = (uint8_t)__wt_cell_type(cell); unpack->flags = 0; @@ -852,39 +804,38 @@ restart: break; flags = *p++; /* skip second descriptor byte */ - if (LF_ISSET(WT_CELL_PREPARE)) + if (LF_ISSET(WT_CELL_PREPARE)) { F_SET(unpack, WT_CELL_UNPACK_PREPARE); + ta->prepare = 1; + } if (LF_ISSET(WT_CELL_TS_START)) - WT_RET(__wt_vunpack_uint( - &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->oldest_start_ts)); + WT_RET( + __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->oldest_start_ts)); if (LF_ISSET(WT_CELL_TXN_START)) - WT_RET(__wt_vunpack_uint( - &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->oldest_start_txn)); + WT_RET( + __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->oldest_start_txn)); if (LF_ISSET(WT_CELL_TS_DURABLE_START)) { WT_RET(__wt_vunpack_uint( - &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->newest_start_durable_ts)); - unpack->newest_start_durable_ts += unpack->oldest_start_ts; + &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_start_durable_ts)); + ta->newest_start_durable_ts += ta->oldest_start_ts; } if (LF_ISSET(WT_CELL_TS_STOP)) { WT_RET( - __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->newest_stop_ts)); - unpack->newest_stop_ts += unpack->oldest_start_ts; + __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_stop_ts)); + ta->newest_stop_ts += ta->oldest_start_ts; } if (LF_ISSET(WT_CELL_TXN_STOP)) { - WT_RET(__wt_vunpack_uint( - &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->newest_stop_txn)); - unpack->newest_stop_txn += unpack->oldest_start_txn; + WT_RET( + __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_stop_txn)); + ta->newest_stop_txn += ta->oldest_start_txn; } if (LF_ISSET(WT_CELL_TS_DURABLE_STOP)) { WT_RET(__wt_vunpack_uint( - &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->newest_stop_durable_ts)); - unpack->newest_stop_durable_ts += unpack->newest_stop_ts; + &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &ta->newest_stop_durable_ts)); + ta->newest_stop_durable_ts += ta->newest_stop_ts; } - - __wt_check_addr_validity(session, unpack->newest_start_durable_ts, unpack->oldest_start_ts, - unpack->oldest_start_txn, unpack->newest_stop_durable_ts, unpack->newest_stop_ts, - unpack->newest_stop_txn); + __wt_check_addr_validity(session, ta); break; case WT_CELL_DEL: case WT_CELL_VALUE: @@ -895,38 +846,39 @@ restart: break; flags = *p++; /* skip second descriptor byte */ - if (LF_ISSET(WT_CELL_PREPARE)) + if (LF_ISSET(WT_CELL_PREPARE)) { F_SET(unpack, WT_CELL_UNPACK_PREPARE); + tw->prepare = 1; + } if (LF_ISSET(WT_CELL_TS_START)) - WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->start_ts)); + WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->start_ts)); if (LF_ISSET(WT_CELL_TXN_START)) - WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->start_txn)); + WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->start_txn)); if (LF_ISSET(WT_CELL_TS_DURABLE_START)) { - WT_RET(__wt_vunpack_uint( - &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->durable_start_ts)); - unpack->durable_start_ts += unpack->start_ts; + WT_RET( + __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->durable_start_ts)); + tw->durable_start_ts += tw->start_ts; } else - unpack->durable_start_ts = unpack->start_ts; + tw->durable_start_ts = tw->start_ts; if (LF_ISSET(WT_CELL_TS_STOP)) { - WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->stop_ts)); - unpack->stop_ts += unpack->start_ts; + WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->stop_ts)); + tw->stop_ts += tw->start_ts; } if (LF_ISSET(WT_CELL_TXN_STOP)) { - WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->stop_txn)); - unpack->stop_txn += unpack->start_txn; + WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->stop_txn)); + tw->stop_txn += tw->start_txn; } if (LF_ISSET(WT_CELL_TS_DURABLE_STOP)) { - WT_RET(__wt_vunpack_uint( - &p, end == NULL ? 0 : WT_PTRDIFF(end, p), &unpack->durable_stop_ts)); - unpack->durable_stop_ts += unpack->stop_ts; - } else if (unpack->stop_ts != WT_TS_MAX) - unpack->durable_stop_ts = unpack->stop_ts; + WT_RET( + __wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &tw->durable_stop_ts)); + tw->durable_stop_ts += tw->stop_ts; + } else if (tw->stop_ts != WT_TS_MAX) + tw->durable_stop_ts = tw->stop_ts; else - unpack->durable_stop_ts = WT_TS_NONE; + tw->durable_stop_ts = WT_TS_NONE; - __cell_check_value_validity(session, unpack->durable_start_ts, unpack->start_ts, - unpack->start_txn, unpack->durable_stop_ts, unpack->stop_ts, unpack->stop_txn); + __cell_check_value_validity(session, tw); break; } @@ -943,19 +895,16 @@ restart: */ switch (unpack->raw) { case WT_CELL_VALUE_COPY: + copy_cell = true; + /* * The cell is followed by an offset to a cell written earlier in the page. Save/restore the - * length and RLE of this cell, we need the length to step through the set of cells on the - * page and this RLE is probably different from the RLE of the earlier cell. + * visibility window, length and RLE of this cell, we need the length to step through the + * set of cells on the page and the RLE and timestamp information are specific to this cell. */ + __wt_time_window_copy(©.tw, tw); WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 : WT_PTRDIFF(end, p), &v)); copy.v = unpack->v; - copy.start_ts = unpack->start_ts; - copy.durable_start_ts = unpack->durable_start_ts; - copy.start_txn = unpack->start_txn; - copy.stop_ts = unpack->stop_ts; - copy.durable_stop_ts = unpack->durable_stop_ts; - copy.stop_txn = unpack->stop_txn; copy.len = WT_PTRDIFF32(p, cell); cell = (WT_CELL *)((uint8_t *)cell - v); goto restart; @@ -1003,22 +952,17 @@ restart: return (WT_ERROR); /* Unknown cell type. */ } -/* - * Check the original cell against the full cell length (this is a diagnostic as well, we may be - * copying the cell from the page and we need the right length). - */ done: + /* + * Check the original cell against the full cell length (this is a diagnostic as well, we may be + * copying the cell from the page and we need the right length). + */ WT_CELL_LEN_CHK(cell, unpack->__len); - if (copy.len != 0) { - unpack->raw = WT_CELL_VALUE_COPY; + if (copy_cell) { + __wt_time_window_copy(tw, ©.tw); unpack->v = copy.v; - unpack->start_ts = copy.start_ts; - unpack->durable_start_ts = copy.durable_start_ts; - unpack->start_txn = copy.start_txn; - unpack->stop_ts = copy.stop_ts; - unpack->durable_stop_ts = copy.durable_stop_ts; - unpack->stop_txn = copy.stop_txn; unpack->__len = copy.len; + unpack->raw = WT_CELL_VALUE_COPY; } return (0); @@ -1032,6 +976,12 @@ static inline void __wt_cell_unpack_dsk( WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL *cell, WT_CELL_UNPACK *unpack) { + WT_TIME_AGGREGATE *ta; + WT_TIME_WINDOW *tw; + + ta = &unpack->ta; + tw = &unpack->tw; + /* * Row-store doesn't store zero-length values on pages, but this allows us to pretend. */ @@ -1042,18 +992,8 @@ __wt_cell_unpack_dsk( * If there isn't any value validity window (which is what it will take to get to a * zero-length item), the value must be stable. */ - unpack->durable_start_ts = WT_TS_NONE; - unpack->durable_stop_ts = WT_TS_NONE; - unpack->start_ts = WT_TS_NONE; - unpack->start_txn = WT_TXN_NONE; - unpack->stop_ts = WT_TS_MAX; - unpack->stop_txn = WT_TXN_MAX; - unpack->newest_start_durable_ts = WT_TS_NONE; - unpack->newest_stop_durable_ts = WT_TS_NONE; - unpack->oldest_start_ts = WT_TS_NONE; - unpack->oldest_start_txn = WT_TXN_NONE; - unpack->newest_stop_ts = WT_TS_MAX; - unpack->newest_stop_txn = WT_TXN_MAX; + __wt_time_window_init(tw); + __wt_time_aggregate_init(ta); unpack->data = ""; unpack->size = 0; unpack->__len = 0; @@ -1081,30 +1021,30 @@ __wt_cell_unpack_dsk( * Previous startup txnid=0, ts=y txnid=0, ts=WT_TS_NONE txnid=MAX, ts=MAX */ if (dsk->write_gen > 0 && dsk->write_gen <= S2C(session)->base_write_gen) { - /* FIXME-prepare-support: deal with durable timestamps. */ + /* FIXME-WT-6124: deal with durable timestamps. */ /* Tell reconciliation we cleared the transaction ids and the cell needs to be rebuilt. */ - if (unpack->start_txn != WT_TXN_NONE) { - unpack->start_txn = WT_TXN_NONE; - F_SET(unpack, WT_CELL_UNPACK_TIME_PAIRS_CLEARED); + if (tw->start_txn != WT_TXN_NONE) { + tw->start_txn = WT_TXN_NONE; + F_SET(unpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED); } - if (unpack->stop_txn != WT_TXN_MAX) { - unpack->stop_txn = WT_TXN_NONE; - F_SET(unpack, WT_CELL_UNPACK_TIME_PAIRS_CLEARED); - if (unpack->stop_ts == WT_TS_MAX) - unpack->stop_ts = WT_TS_NONE; + if (tw->stop_txn != WT_TXN_MAX) { + tw->stop_txn = WT_TXN_NONE; + F_SET(unpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED); + if (tw->stop_ts == WT_TS_MAX) + tw->stop_ts = WT_TS_NONE; } else - WT_ASSERT(session, unpack->stop_ts == WT_TS_MAX); - if (unpack->oldest_start_txn != WT_TXN_NONE) { - unpack->oldest_start_txn = WT_TXN_NONE; - F_SET(unpack, WT_CELL_UNPACK_TIME_PAIRS_CLEARED); + WT_ASSERT(session, tw->stop_ts == WT_TS_MAX); + if (ta->oldest_start_txn != WT_TXN_NONE) { + ta->oldest_start_txn = WT_TXN_NONE; + F_SET(unpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED); } - if (unpack->newest_stop_txn != WT_TXN_MAX) { - unpack->newest_stop_txn = WT_TXN_NONE; - F_SET(unpack, WT_CELL_UNPACK_TIME_PAIRS_CLEARED); - if (unpack->newest_stop_ts == WT_TS_MAX) - unpack->newest_stop_ts = WT_TS_NONE; + if (ta->newest_stop_txn != WT_TXN_MAX) { + ta->newest_stop_txn = WT_TXN_NONE; + F_SET(unpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED); + if (ta->newest_stop_ts == WT_TS_MAX) + ta->newest_stop_ts = WT_TS_NONE; } else - WT_ASSERT(session, unpack->newest_stop_ts == WT_TS_MAX); + WT_ASSERT(session, ta->newest_stop_ts == WT_TS_MAX); } } |