diff options
author | Keith Bostic <keith.bostic@mongodb.com> | 2016-07-23 13:23:12 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-07-23 13:23:12 -0400 |
commit | 6eee786a839b7ed6f411b87a4591ba7afcd04276 (patch) | |
tree | 34da53c6cb749f2754e5dc5cbfe80cfca4b1a0dd | |
parent | 898a4e682e3ccef8feaae10d0cb35eed3a465b56 (diff) | |
download | mongo-6eee786a839b7ed6f411b87a4591ba7afcd04276.tar.gz |
WT-2737 Page scrubbing: more fixes. (#2903)
Revert parts of dc0ae01, restore parts of 0845a39: move all flag setting
outside of reconciliation, don't save a disk image in the update/restore
path unless there are updates to be restored to that segment of the name
space.
-rw-r--r-- | src/evict/evict_page.c | 16 | ||||
-rw-r--r-- | src/reconcile/rec_write.c | 37 |
2 files changed, 32 insertions, 21 deletions
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index 019f1beadf9..57010ddd571 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -504,10 +504,11 @@ __evict_review( LF_SET(WT_VISIBILITY_ERR); else if (!WT_PAGE_IS_INTERNAL(page)) { if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) - LF_SET(WT_EVICT_IN_MEMORY); + LF_SET(WT_EVICT_IN_MEMORY | + WT_EVICT_SCRUB | WT_EVICT_UPDATE_RESTORE); else if (page->read_gen == WT_READGEN_OLDEST || page->memory_footprint > S2BT(session)->splitmempage) - LF_SET(WT_EVICT_UPDATE_RESTORE); + LF_SET(WT_EVICT_UPDATE_RESTORE | WT_EVICT_SCRUB); else if (F_ISSET(cache, WT_CACHE_STUCK)) LF_SET(WT_EVICT_LOOKASIDE); /* @@ -523,14 +524,13 @@ __evict_review( /* * Success: assert the page is clean or reconciliation was configured - * for in-memory or update/restore. If the page is clean, assert that - * reconciliation was configured for a lookaside table, or it's not a - * durable object (currently the lookaside table), or all page updates - * were globally visible. + * for update/restore. If the page is clean, assert that reconciliation + * was configured for a lookaside table, or it's not a durable object + * (currently the lookaside table), or all page updates were globally + * visible. */ WT_ASSERT(session, - !__wt_page_is_modified(page) || - LF_ISSET(WT_EVICT_IN_MEMORY | WT_EVICT_UPDATE_RESTORE)); + !__wt_page_is_modified(page) || LF_ISSET(WT_EVICT_UPDATE_RESTORE)); WT_ASSERT(session, __wt_page_is_modified(page) || LF_ISSET(WT_EVICT_LOOKASIDE) || diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index a6a7c07ea59..6132d20c126 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -382,15 +382,6 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_ASSERT(session, WT_TXNID_LE(mod->last_oldest_id, oldest_id)); mod->last_oldest_id = oldest_id; - /* - * Evicting in-memory uses the update/restore mechanisms. - * The update/restore mechanisms use disk images. - */ - if (LF_ISSET(WT_EVICT_IN_MEMORY)) - LF_SET(WT_EVICT_UPDATE_RESTORE); - if (LF_ISSET(WT_EVICT_UPDATE_RESTORE)) - LF_SET(WT_EVICT_SCRUB); - /* Initialize the reconciliation structure for each new run. */ if ((ret = __rec_write_init( session, ref, flags, salvage, &session->reconcile)) != 0) { @@ -2793,8 +2784,15 @@ no_slots: /* * Optionally keep the disk image in cache. Update the initial * page-header fields to reflect the actual data being written. + * + * If updates are saved and need to be restored, we have to keep + * a copy of the disk image. Unfortunately, we don't yet know if + * there are updates to restore for the key range covered by the + * disk image just created. If there are any saved updates, take + * a copy of the disk image, it's freed later if not needed. */ - if (F_ISSET(r, WT_EVICT_SCRUB)) { + if (F_ISSET(r, WT_EVICT_SCRUB) || + (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && r->supd_next > 0)) { WT_RET(__wt_strndup(session, dsk, dsk_dst->mem_size, &last->disk_image)); disk_image = last->disk_image; @@ -3146,6 +3144,7 @@ __rec_split_write(WT_SESSION_IMPL *session, uint32_t bnd_slot, i, j; int cmp; uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE]; + bool need_image; btree = S2BT(session); dsk = buf->mem; @@ -3336,12 +3335,24 @@ supd_check_complete: copy_image: /* - * Optionally keep the disk image in cache (raw compression may have - * already made a copy). + * If re-instantiating this page in memory (either because eviction + * wants to, or because we skipped updates to build the disk image), + * save a copy of the disk image. + * + * Raw compression might have already saved a copy of the disk image + * before we could know if we skipped updates to create it, and now + * we know if we're going to need it. + * + * Copy the disk image if we need a copy and don't already have one, + * discard any already saved copy we don't need. */ - if (F_ISSET(r, WT_EVICT_SCRUB) && bnd->disk_image == NULL) + need_image = F_ISSET(r, WT_EVICT_SCRUB) || + (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL); + if (need_image && bnd->disk_image == NULL) WT_ERR(__wt_strndup( session, buf->data, buf->size, &bnd->disk_image)); + if (!need_image) + __wt_free(session, bnd->disk_image); err: __wt_scr_free(session, &key); return (ret); |