summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Bostic <keith.bostic@mongodb.com>2016-07-23 13:23:12 -0400
committerGitHub <noreply@github.com>2016-07-23 13:23:12 -0400
commit6eee786a839b7ed6f411b87a4591ba7afcd04276 (patch)
tree34da53c6cb749f2754e5dc5cbfe80cfca4b1a0dd
parent898a4e682e3ccef8feaae10d0cb35eed3a465b56 (diff)
downloadmongo-6eee786a839b7ed6f411b87a4591ba7afcd04276.tar.gz
WT-2737 Page scrubbing: more fixes. (#2903)
Revert parts of dc0ae01, restore parts of 0845a39: move all flag setting outside of reconciliation, don't save a disk image in the update/restore path unless there are updates to be restored to that segment of the name space.
-rw-r--r--src/evict/evict_page.c16
-rw-r--r--src/reconcile/rec_write.c37
2 files changed, 32 insertions, 21 deletions
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c
index 019f1beadf9..57010ddd571 100644
--- a/src/evict/evict_page.c
+++ b/src/evict/evict_page.c
@@ -504,10 +504,11 @@ __evict_review(
LF_SET(WT_VISIBILITY_ERR);
else if (!WT_PAGE_IS_INTERNAL(page)) {
if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
- LF_SET(WT_EVICT_IN_MEMORY);
+ LF_SET(WT_EVICT_IN_MEMORY |
+ WT_EVICT_SCRUB | WT_EVICT_UPDATE_RESTORE);
else if (page->read_gen == WT_READGEN_OLDEST ||
page->memory_footprint > S2BT(session)->splitmempage)
- LF_SET(WT_EVICT_UPDATE_RESTORE);
+ LF_SET(WT_EVICT_UPDATE_RESTORE | WT_EVICT_SCRUB);
else if (F_ISSET(cache, WT_CACHE_STUCK))
LF_SET(WT_EVICT_LOOKASIDE);
/*
@@ -523,14 +524,13 @@ __evict_review(
/*
* Success: assert the page is clean or reconciliation was configured
- * for in-memory or update/restore. If the page is clean, assert that
- * reconciliation was configured for a lookaside table, or it's not a
- * durable object (currently the lookaside table), or all page updates
- * were globally visible.
+ * for update/restore. If the page is clean, assert that reconciliation
+ * was configured for a lookaside table, or it's not a durable object
+ * (currently the lookaside table), or all page updates were globally
+ * visible.
*/
WT_ASSERT(session,
- !__wt_page_is_modified(page) ||
- LF_ISSET(WT_EVICT_IN_MEMORY | WT_EVICT_UPDATE_RESTORE));
+ !__wt_page_is_modified(page) || LF_ISSET(WT_EVICT_UPDATE_RESTORE));
WT_ASSERT(session,
__wt_page_is_modified(page) ||
LF_ISSET(WT_EVICT_LOOKASIDE) ||
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index a6a7c07ea59..6132d20c126 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -382,15 +382,6 @@ __wt_reconcile(WT_SESSION_IMPL *session,
WT_ASSERT(session, WT_TXNID_LE(mod->last_oldest_id, oldest_id));
mod->last_oldest_id = oldest_id;
- /*
- * Evicting in-memory uses the update/restore mechanisms.
- * The update/restore mechanisms use disk images.
- */
- if (LF_ISSET(WT_EVICT_IN_MEMORY))
- LF_SET(WT_EVICT_UPDATE_RESTORE);
- if (LF_ISSET(WT_EVICT_UPDATE_RESTORE))
- LF_SET(WT_EVICT_SCRUB);
-
/* Initialize the reconciliation structure for each new run. */
if ((ret = __rec_write_init(
session, ref, flags, salvage, &session->reconcile)) != 0) {
@@ -2793,8 +2784,15 @@ no_slots:
/*
* Optionally keep the disk image in cache. Update the initial
* page-header fields to reflect the actual data being written.
+ *
+ * If updates are saved and need to be restored, we have to keep
+ * a copy of the disk image. Unfortunately, we don't yet know if
+ * there are updates to restore for the key range covered by the
+ * disk image just created. If there are any saved updates, take
+ * a copy of the disk image, it's freed later if not needed.
*/
- if (F_ISSET(r, WT_EVICT_SCRUB)) {
+ if (F_ISSET(r, WT_EVICT_SCRUB) ||
+ (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && r->supd_next > 0)) {
WT_RET(__wt_strndup(session, dsk,
dsk_dst->mem_size, &last->disk_image));
disk_image = last->disk_image;
@@ -3146,6 +3144,7 @@ __rec_split_write(WT_SESSION_IMPL *session,
uint32_t bnd_slot, i, j;
int cmp;
uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE];
+ bool need_image;
btree = S2BT(session);
dsk = buf->mem;
@@ -3336,12 +3335,24 @@ supd_check_complete:
copy_image:
/*
- * Optionally keep the disk image in cache (raw compression may have
- * already made a copy).
+ * If re-instantiating this page in memory (either because eviction
+ * wants to, or because we skipped updates to build the disk image),
+ * save a copy of the disk image.
+ *
+ * Raw compression might have already saved a copy of the disk image
+ * before we could know if we skipped updates to create it, and now
+ * we know if we're going to need it.
+ *
+ * Copy the disk image if we need a copy and don't already have one,
+ * discard any already saved copy we don't need.
*/
- if (F_ISSET(r, WT_EVICT_SCRUB) && bnd->disk_image == NULL)
+ need_image = F_ISSET(r, WT_EVICT_SCRUB) ||
+ (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL);
+ if (need_image && bnd->disk_image == NULL)
WT_ERR(__wt_strndup(
session, buf->data, buf->size, &bnd->disk_image));
+ if (!need_image)
+ __wt_free(session, bnd->disk_image);
err: __wt_scr_free(session, &key);
return (ret);