summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@mongodb.com>2016-07-23 00:55:39 +1000
committerKeith Bostic <keith.bostic@mongodb.com>2016-07-22 10:55:39 -0400
commit2c3bcfabf30a05203d23f57e9e00410f8d020e8f (patch)
treecb74ef6bfc78d82bcfa4855e61266533ce7cb99f
parenta5f696c394815e3d630b77187899670b8087da37 (diff)
downloadmongo-2c3bcfabf30a05203d23f57e9e00410f8d020e8f.tar.gz
WT-2737 Page scrubbing: more fixes. (#2899)
* WT-2737 Page scrubbing: more fixes. * Swap endianness of the page header back after writing (for zSeries). * Deal with restoring updates when the WT_EVICT_SCRUB flag is not set: we want to save disk images for only those pages that have updates to restore. Unfortunately, that isn't easy to figure out down in the guts of raw compression. * typo * Move __wt_page_header_byteswap() calls to immediately before/after the write call, so error paths don't leave the caller's content in an unexpected format. * For now, WT_EVICT_UPDATE_RESTORE implies WT_EVICT_SCRUB; we may be able to relax this in the future. * whitespace * If raw compression gives up, the split boundary's already-compressed flag won't be true, but we will have a saved disk image. We can change raw compression to only create disk images if the already-compressed flag will be true, or always create a disk image in raw-compression, regardless of whether or not we actually compressed data. I'm going with the latter, it's simpler. * Fix a comment.
-rw-r--r--src/block/block_write.c19
-rw-r--r--src/evict/evict_page.c3
-rw-r--r--src/reconcile/rec_write.c26
3 files changed, 26 insertions, 22 deletions
diff --git a/src/block/block_write.c b/src/block/block_write.c
index e6e992436a3..8a77641e1f3 100644
--- a/src/block/block_write.c
+++ b/src/block/block_write.c
@@ -254,12 +254,6 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
blk = WT_BLOCK_HEADER_REF(buf->mem);
memset(blk, 0, sizeof(*blk));
- /*
- * Swap the page-header as needed; this doesn't belong here, but it's
- * the best place to catch all callers.
- */
- __wt_page_header_byteswap(buf->mem);
-
/* Buffers should be aligned for writing. */
if (!F_ISSET(buf, WT_ITEM_ALIGNED)) {
WT_ASSERT(session, F_ISSET(buf, WT_ITEM_ALIGNED));
@@ -345,9 +339,16 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
__wt_spin_unlock(session, &block->live_lock);
WT_RET(ret);
- /* Write the block. */
- if ((ret =
- __wt_write(session, fh, offset, align_size, buf->mem)) != 0) {
+ /*
+ * Ensure the page header is in little endian order; this doesn't belong
+ * here, but it's the best place to catch all callers. After the write,
+ * swap values back to native order so callers never see anything other
+ * than their original content.
+ */
+ __wt_page_header_byteswap(buf->mem);
+ ret = __wt_write(session, fh, offset, align_size, buf->mem);
+ __wt_page_header_byteswap(buf->mem);
+ if (ret != 0) {
if (!caller_locked)
__wt_spin_lock(session, &block->live_lock);
WT_TRET(__wt_block_off_free(
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c
index 5ef6a6370b0..6f5d04cc678 100644
--- a/src/evict/evict_page.c
+++ b/src/evict/evict_page.c
@@ -504,8 +504,7 @@ __evict_review(
LF_SET(WT_VISIBILITY_ERR);
else if (!WT_PAGE_IS_INTERNAL(page)) {
if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
- LF_SET(WT_EVICT_IN_MEMORY |
- WT_EVICT_UPDATE_RESTORE | WT_EVICT_SCRUB);
+ LF_SET(WT_EVICT_IN_MEMORY);
else if (page->read_gen == WT_READGEN_OLDEST ||
page->memory_footprint > S2BT(session)->splitmempage)
LF_SET(WT_EVICT_UPDATE_RESTORE);
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index f08c6c5a825..a6a7c07ea59 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -382,6 +382,15 @@ __wt_reconcile(WT_SESSION_IMPL *session,
WT_ASSERT(session, WT_TXNID_LE(mod->last_oldest_id, oldest_id));
mod->last_oldest_id = oldest_id;
+ /*
+ * Evicting in-memory uses the update/restore mechanisms.
+ * The update/restore mechanisms use disk images.
+ */
+ if (LF_ISSET(WT_EVICT_IN_MEMORY))
+ LF_SET(WT_EVICT_UPDATE_RESTORE);
+ if (LF_ISSET(WT_EVICT_UPDATE_RESTORE))
+ LF_SET(WT_EVICT_SCRUB);
+
/* Initialize the reconciliation structure for each new run. */
if ((ret = __rec_write_init(
session, ref, flags, salvage, &session->reconcile)) != 0) {
@@ -2783,7 +2792,7 @@ no_slots:
/*
* Optionally keep the disk image in cache. Update the initial
- * fields to reflect the actual disk image that was compressed.
+ * page-header fields to reflect the actual data being written.
*/
if (F_ISSET(r, WT_EVICT_SCRUB)) {
WT_RET(__wt_strndup(session, dsk,
@@ -3327,17 +3336,12 @@ supd_check_complete:
copy_image:
/*
- * Optionally keep the disk image in cache (raw compression has already
- * made a copy).
+ * Optionally keep the disk image in cache (raw compression may have
+ * already made a copy).
*/
- if (F_ISSET(r, WT_EVICT_SCRUB)) {
- WT_ASSERT(session,
- (bnd->already_compressed && bnd->disk_image != NULL) ||
- (!bnd->already_compressed && bnd->disk_image == NULL));
- if (bnd->disk_image == NULL)
- WT_ERR(__wt_strndup(
- session, buf->data, buf->size, &bnd->disk_image));
- }
+ if (F_ISSET(r, WT_EVICT_SCRUB) && bnd->disk_image == NULL)
+ WT_ERR(__wt_strndup(
+ session, buf->data, buf->size, &bnd->disk_image));
err: __wt_scr_free(session, &key);
return (ret);