diff options
author | Michael Cahill <michael.cahill@mongodb.com> | 2016-07-23 00:55:39 +1000 |
---|---|---|
committer | Keith Bostic <keith.bostic@mongodb.com> | 2016-07-22 10:55:39 -0400 |
commit | 2c3bcfabf30a05203d23f57e9e00410f8d020e8f (patch) | |
tree | cb74ef6bfc78d82bcfa4855e61266533ce7cb99f | |
parent | a5f696c394815e3d630b77187899670b8087da37 (diff) | |
download | mongo-2c3bcfabf30a05203d23f57e9e00410f8d020e8f.tar.gz |
WT-2737 Page scrubbing: more fixes. (#2899)
* WT-2737 Page scrubbing: more fixes.
* Swap endianness of the page header back after writing (for zSeries).
* Deal with restoring updates when the WT_EVICT_SCRUB flag is not set:
we want to save disk images for only those pages that have updates to
restore. Unfortunately, that isn't easy to figure out down in the
guts of raw compression.
* typo
* Move __wt_page_header_byteswap() calls to immediately before/after the write
call, so error paths don't leave the caller's content in an unexpected format.
* For now, WT_EVICT_UPDATE_RESTORE implies WT_EVICT_SCRUB; we may be able to
relax this in the future.
* whitespace
* If raw compression gives up, the split boundary's already-compressed
flag won't be true, but we will have a saved disk image. We can change
raw compression to only create disk images if the already-compressed
flag will be true, or always create a disk image in raw-compression,
regardless of whether or not we actually compressed data. I'm going
with the latter, it's simpler.
* Fix a comment.
-rw-r--r-- | src/block/block_write.c | 19 | ||||
-rw-r--r-- | src/evict/evict_page.c | 3 | ||||
-rw-r--r-- | src/reconcile/rec_write.c | 26 |
3 files changed, 26 insertions, 22 deletions
diff --git a/src/block/block_write.c b/src/block/block_write.c index e6e992436a3..8a77641e1f3 100644 --- a/src/block/block_write.c +++ b/src/block/block_write.c @@ -254,12 +254,6 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, blk = WT_BLOCK_HEADER_REF(buf->mem); memset(blk, 0, sizeof(*blk)); - /* - * Swap the page-header as needed; this doesn't belong here, but it's - * the best place to catch all callers. - */ - __wt_page_header_byteswap(buf->mem); - /* Buffers should be aligned for writing. */ if (!F_ISSET(buf, WT_ITEM_ALIGNED)) { WT_ASSERT(session, F_ISSET(buf, WT_ITEM_ALIGNED)); @@ -345,9 +339,16 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, __wt_spin_unlock(session, &block->live_lock); WT_RET(ret); - /* Write the block. */ - if ((ret = - __wt_write(session, fh, offset, align_size, buf->mem)) != 0) { + /* + * Ensure the page header is in little endian order; this doesn't belong + * here, but it's the best place to catch all callers. After the write, + * swap values back to native order so callers never see anything other + * than their original content. + */ + __wt_page_header_byteswap(buf->mem); + ret = __wt_write(session, fh, offset, align_size, buf->mem); + __wt_page_header_byteswap(buf->mem); + if (ret != 0) { if (!caller_locked) __wt_spin_lock(session, &block->live_lock); WT_TRET(__wt_block_off_free( diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index 5ef6a6370b0..6f5d04cc678 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -504,8 +504,7 @@ __evict_review( LF_SET(WT_VISIBILITY_ERR); else if (!WT_PAGE_IS_INTERNAL(page)) { if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) - LF_SET(WT_EVICT_IN_MEMORY | - WT_EVICT_UPDATE_RESTORE | WT_EVICT_SCRUB); + LF_SET(WT_EVICT_IN_MEMORY); else if (page->read_gen == WT_READGEN_OLDEST || page->memory_footprint > S2BT(session)->splitmempage) LF_SET(WT_EVICT_UPDATE_RESTORE); diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index f08c6c5a825..a6a7c07ea59 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -382,6 +382,15 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_ASSERT(session, WT_TXNID_LE(mod->last_oldest_id, oldest_id)); mod->last_oldest_id = oldest_id; + /* + * Evicting in-memory uses the update/restore mechanisms. + * The update/restore mechanisms use disk images. + */ + if (LF_ISSET(WT_EVICT_IN_MEMORY)) + LF_SET(WT_EVICT_UPDATE_RESTORE); + if (LF_ISSET(WT_EVICT_UPDATE_RESTORE)) + LF_SET(WT_EVICT_SCRUB); + /* Initialize the reconciliation structure for each new run. */ if ((ret = __rec_write_init( session, ref, flags, salvage, &session->reconcile)) != 0) { @@ -2783,7 +2792,7 @@ no_slots: /* * Optionally keep the disk image in cache. Update the initial - * fields to reflect the actual disk image that was compressed. + * page-header fields to reflect the actual data being written. */ if (F_ISSET(r, WT_EVICT_SCRUB)) { WT_RET(__wt_strndup(session, dsk, @@ -3327,17 +3336,12 @@ supd_check_complete: copy_image: /* - * Optionally keep the disk image in cache (raw compression has already - * made a copy). + * Optionally keep the disk image in cache (raw compression may have + * already made a copy). */ - if (F_ISSET(r, WT_EVICT_SCRUB)) { - WT_ASSERT(session, - (bnd->already_compressed && bnd->disk_image != NULL) || - (!bnd->already_compressed && bnd->disk_image == NULL)); - if (bnd->disk_image == NULL) - WT_ERR(__wt_strndup( - session, buf->data, buf->size, &bnd->disk_image)); - } + if (F_ISSET(r, WT_EVICT_SCRUB) && bnd->disk_image == NULL) + WT_ERR(__wt_strndup( + session, buf->data, buf->size, &bnd->disk_image)); err: __wt_scr_free(session, &key); return (ret); |