summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/reconcile/rec_write.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/src/reconcile/rec_write.c')
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c124
1 files changed, 37 insertions, 87 deletions
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index e7cdc847da8..ff4fe361abe 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -42,12 +42,12 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage
/*
* Sanity check flags.
*
- * If we try to do eviction using transaction visibility, we had better
- * have a snapshot. This doesn't apply to checkpoints: there are
- * (rare) cases where we write data at read-uncommitted isolation.
+ * If we try to do eviction using transaction visibility, we had better have a snapshot. This
+ * doesn't apply to checkpoints: there are (rare) cases where we write data at read-uncommitted
+ * isolation.
*/
WT_ASSERT(session, !LF_ISSET(WT_REC_EVICT) || LF_ISSET(WT_REC_VISIBLE_ALL) ||
- F_ISSET(&session->txn, WT_TXN_HAS_SNAPSHOT));
+ F_ISSET(session->txn, WT_TXN_HAS_SNAPSHOT));
/* It's an error to be called with a clean page. */
WT_ASSERT(session, __wt_page_is_modified(page));
@@ -225,11 +225,10 @@ __reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage, u
__rec_cleanup(session, r);
/*
- * When threads perform eviction, don't cache block manager structures
- * (even across calls), we can have a significant number of threads
- * doing eviction at the same time with large items. Ignore checkpoints,
- * once the checkpoint completes, all unnecessary session resources will
- * be discarded.
+ * When threads perform eviction, don't cache block manager structures (even across calls), we
+ * can have a significant number of threads doing eviction at the same time with large items.
+ * Ignore checkpoints, once the checkpoint completes, all unnecessary session resources will be
+ * discarded.
*/
if (!WT_SESSION_IS_CHECKPOINT(session)) {
/*
@@ -242,14 +241,6 @@ __reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage, u
WT_TRET(__rec_destroy_session(session));
}
-
- /*
- * We track removed overflow objects in case there's a reader in transit when they're removed.
- * Any form of eviction locks out readers, we can discard them all.
- */
- if (LF_ISSET(WT_REC_EVICT))
- __wt_ovfl_discard_remove(session, page);
-
WT_RET(ret);
/*
@@ -308,15 +299,6 @@ __rec_write_page_status(WT_SESSION_IMPL *session, WT_RECONCILE *r)
*/
WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT) ||
(F_ISSET(r, WT_REC_HS | WT_REC_IN_MEMORY) || page->type == WT_PAGE_COL_FIX));
-
- /*
- * We have written the page, but something prevents it from being evicted. If we wrote the
- * newest versions of updates, the on-disk page may contain records that are newer than what
- * checkpoint would write. Make sure that checkpoint visits the page and (if necessary)
- * fixes things up.
- */
- if (r->hs_skew_newest)
- mod->first_dirty_txn = WT_TXN_FIRST;
} else {
/*
* Track the page's maximum transaction ID (used to decide if we can evict a clean page and
@@ -518,52 +500,25 @@ __rec_init(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags, WT_SALVAGE_COO
* checkpoints into account.
*/
if (WT_IS_METADATA(session->dhandle)) {
- WT_ORDERED_READ(ckpt_txn, txn_global->checkpoint_state.id);
+ WT_ORDERED_READ(ckpt_txn, txn_global->checkpoint_txn_shared.id);
if (ckpt_txn != WT_TXN_NONE && WT_TXNID_LT(ckpt_txn, r->last_running))
r->last_running = ckpt_txn;
}
- /*
- * Decide whether to skew on-page values towards newer or older versions. This is a heuristic
- * attempting to minimize the number of pages that need to be rewritten by future checkpoints.
- *
- * We usually prefer to skew to newer versions, the logic being that by the time the next
- * checkpoint runs, it is likely that all the updates we choose will be stable. However, if
- * checkpointing with a timestamp (indicated by a stable_timestamp being set), and there is a
- * checkpoint already running, or this page was read with history store history, or the stable
- * timestamp hasn't changed since last time this page was successfully, skew oldest instead.
- */
- if (F_ISSET(S2C(session)->cache, WT_CACHE_EVICT_DEBUG_MODE) &&
- __wt_random(&session->rnd) % 3 == 0)
- r->hs_skew_newest = false;
- else
- r->hs_skew_newest = LF_ISSET(WT_REC_HS) && LF_ISSET(WT_REC_VISIBLE_ALL);
-
- if (r->hs_skew_newest && !__wt_btree_immediately_durable(session) &&
- txn_global->has_stable_timestamp &&
- ((btree->checkpoint_gen != __wt_gen(session, WT_GEN_CHECKPOINT) &&
- txn_global->stable_is_pinned) ||
- FLD_ISSET(page->modify->restore_state, WT_PAGE_RS_HS) ||
- page->modify->last_stable_timestamp == txn_global->stable_timestamp))
- r->hs_skew_newest = false;
-
/* When operating on the history store table, we should never try history store eviction. */
WT_ASSERT(session, !F_ISSET(btree, WT_BTREE_HS) || !LF_ISSET(WT_REC_HS));
/*
- * History store table eviction is configured when eviction gets aggressive,
- * adjust the flags for cases we don't support.
+ * History store table eviction is configured when eviction gets aggressive, adjust the flags
+ * for cases we don't support.
*
- * We don't yet support fixed-length column-store combined with the
- * history store table. It's not hard to do, but the underlying function
- * that reviews which updates can be written to the evicted page and
- * which updates need to be written to the history store table needs access
- * to the original value from the page being evicted, and there's no
- * code path for that in the case of fixed-length column-store objects.
- * (Row-store and variable-width column-store objects provide a
- * reference to the unpacked on-page cell for this purpose, but there
- * isn't an on-page cell for fixed-length column-store objects.) For
- * now, turn it off.
+ * We don't yet support fixed-length column-store combined with the history store table. It's
+ * not hard to do, but the underlying function that reviews which updates can be written to the
+ * evicted page and which updates need to be written to the history store table needs access to
+ * the original value from the page being evicted, and there's no code path for that in the case
+ * of fixed-length column-store objects. (Row-store and variable-width column-store objects
+ * provide a reference to the unpacked on-page cell for this purpose, but there isn't an on-page
+ * cell for fixed-length column-store objects.) For now, turn it off.
*/
if (page->type == WT_PAGE_COL_FIX)
LF_CLR(WT_REC_HS);
@@ -755,23 +710,20 @@ __rec_leaf_page_max(WT_SESSION_IMPL *session, WT_RECONCILE *r)
switch (page->type) {
case WT_PAGE_COL_FIX:
/*
- * Column-store pages can grow if there are missing records
- * (that is, we lost a chunk of the range, and have to write
- * deleted records). Fixed-length objects are a problem, if
- * there's a big missing range, we could theoretically have to
- * write large numbers of missing objects.
+ * Column-store pages can grow if there are missing records (that is, we lost a chunk of the
+ * range, and have to write deleted records). Fixed-length objects are a problem, if there's
+ * a big missing range, we could theoretically have to write large numbers of missing
+ * objects.
*/
page_size = (uint32_t)WT_ALIGN(
WT_FIX_ENTRIES_TO_BYTES(btree, r->salvage->take + r->salvage->missing), btree->allocsize);
break;
case WT_PAGE_COL_VAR:
/*
- * Column-store pages can grow if there are missing records
- * (that is, we lost a chunk of the range, and have to write
- * deleted records). Variable-length objects aren't usually a
- * problem because we can write any number of deleted records
- * in a single page entry because of the RLE, we just need to
- * ensure that additional entry fits.
+ * Column-store pages can grow if there are missing records (that is, we lost a chunk of the
+ * range, and have to write deleted records). Variable-length objects aren't usually a
+ * problem because we can write any number of deleted records in a single page entry because
+ * of the RLE, we just need to ensure that additional entry fits.
*/
break;
case WT_PAGE_ROW_LEAF:
@@ -946,15 +898,14 @@ __wt_rec_split_init(
}
/*
- * Ensure the disk image buffer is large enough for the max object, as
- * corrected by the underlying block manager.
+ * Ensure the disk image buffer is large enough for the max object, as corrected by the
+ * underlying block manager.
*
- * Since we want to support split_size values larger than the page size
- * (to allow for adjustments based on the compression), this buffer
- * should be the greater of split_size and page_size, then aligned to
- * the next allocation size boundary. The latter shouldn't be an issue,
- * but it's a possible scenario if, for example, the compression engine
- * is expected to give us 5x compression and gives us nothing at all.
+ * Since we want to support split_size values larger than the page size (to allow for
+ * adjustments based on the compression), this buffer should be the greater of split_size and
+ * page_size, then aligned to the next allocation size boundary. The latter shouldn't be an
+ * issue, but it's a possible scenario if, for example, the compression engine is expected to
+ * give us 5x compression and gives us nothing at all.
*/
corrected_page_size = r->page_size;
WT_RET(bm->write_size(bm, session, &corrected_page_size));
@@ -1626,12 +1577,11 @@ __rec_split_write_reuse(
return (false);
/*
- * Quit if evicting with no previously written block to compare against.
- * (In other words, if there's eviction pressure and the page was never
- * written by a checkpoint, calculating a checksum is worthless.)
+ * Quit if evicting with no previously written block to compare against. (In other words, if
+ * there's eviction pressure and the page was never written by a checkpoint, calculating a
+ * checksum is worthless.)
*
- * Quit if evicting and a previous check failed, once there's a miss no
- * future block will match.
+ * Quit if evicting and a previous check failed, once there's a miss no future block will match.
*/
if (F_ISSET(r, WT_REC_EVICT)) {
if (mod->rec_result != WT_PM_REC_MULTIBLOCK || mod->mod_multi_entries < r->multi_next)