diff options
author | Michael Cahill <michael.cahill@wiredtiger.com> | 2015-02-19 15:36:01 +1100 |
---|---|---|
committer | Michael Cahill <michael.cahill@wiredtiger.com> | 2015-02-19 15:36:01 +1100 |
commit | 27a00a3919c09220ffea8a3afd78a448d50c8c51 (patch) | |
tree | 1e38c8660ec1785a450b9118bd2b0d6d4f34b2a8 | |
parent | 2597ba06643b86c9cba08860ff9a1744374081f9 (diff) | |
download | mongo-27a00a3919c09220ffea8a3afd78a448d50c8c51.tar.gz |
Changes based on @keithbostic's review
-rw-r--r-- | src/evict/evict_page.c | 52 | ||||
-rw-r--r-- | src/include/btree.i | 14 |
2 files changed, 31 insertions, 35 deletions
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index fc007e69cb6..9ba1af897a4 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -367,7 +367,28 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, WT_RET(ret); } - /* Check whether the page can be evicted. */ + /* + * Check whether the page can be evicted. + * + * If the file is being checkpointed, we can't evict dirty pages: + * if we write a page and free the previous version of the page, that + * previous version might be referenced by an internal page already + * been written in the checkpoint, leaving the checkpoint inconsistent. + * + * Don't rely on new updates being skipped by the transaction used + * for transaction reads: (1) there are paths that dirty pages for + * artificial reasons; (2) internal pages aren't transactional; and + * (3) if an update was skipped during the checkpoint (leaving the page + * dirty), then rolled back, we could still successfully overwrite a + * page and corrupt the checkpoint. + * + * Further, we can't race with the checkpoint's reconciliation of + * an internal page as we evict a clean child from the page's subtree. + * This works in the usual way: eviction locks the page and then checks + * for existing hazard pointers, the checkpoint thread reconciling an + * internal page acquires hazard pointers on child pages it reads, and + * is blocked by the exclusive lock. + */ if (!exclusive && !__wt_page_can_evict(session, page, 0)) return (EBUSY); @@ -409,29 +430,18 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, * If we have an exclusive lock (we're discarding the tree), assert * there are no updates we cannot read. * - * Otherwise, if the top-level page we're evicting is a leaf page, set - * the update-restore flag, so reconciliation will write blocks it can - * write and create a list of skipped updates for blocks it cannot - * write. This is how forced eviction of huge pages works: we take a - * big page and reconcile it into blocks, some of which we write and - * discard, the rest of which we re-create as smaller in-memory pages, - * (restoring the updates that stopped us from writing the block), and - * inserting the whole mess into the page's parent. + * Otherwise, if the top-level page we're evicting is a leaf page + * marked for forced eviction, set the update-restore flag, so + * reconciliation will write blocks it can write and create a list of + * skipped updates for blocks it cannot write. This is how forced + * eviction of huge pages works: we take a big page and reconcile it + * into blocks, some of which we write and discard, the rest of which + * we re-create as smaller in-memory pages, (restoring the updates that + * stopped us from writing the block), and inserting the whole mess + * into the page's parent. * * Don't set the update-restore flag for internal pages, they don't * have updates that can be saved and restored. - * - * Don't set the update-restore flag for small pages. (If a small - * page were selected by eviction and then modified, and we configure it - * for update-restore, we'll end up splitting one or two pages into the - * parent, which is a waste of effort. If we don't set update-restore, - * eviction will return EBUSY, which makes more sense, the page was just - * modified.) - * - * Don't set the update-restore flag for any page other than the - * top one; only the reconciled top page goes through the split path - * (and child pages are pages we expect to merge into the top page, they - * they are not expected to split). */ if (__wt_page_is_modified(page)) { if (exclusive) diff --git a/src/include/btree.i b/src/include/btree.i index a4b7f0d213e..43f8eaa1bfa 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -978,20 +978,6 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_PAGE *page, int check_splits) * if we write a page and free the previous version of the page, that * previous version might be referenced by an internal page already * been written in the checkpoint, leaving the checkpoint inconsistent. - * - * Don't rely on new updates being skipped by the transaction used - * for transaction reads: (1) there are paths that dirty pages for - * artificial reasons; (2) internal pages aren't transactional; and - * (3) if an update was skipped during the checkpoint (leaving the page - * dirty), then rolled back, we could still successfully overwrite a - * page and corrupt the checkpoint. - * - * Further, we can't race with the checkpoint's reconciliation of - * an internal page as we evict a clean child from the page's subtree. - * This works in the usual way: eviction locks the page and then checks - * for existing hazard pointers, the checkpoint thread reconciling an - * internal page acquires hazard pointers on child pages it reads, and - * is blocked by the exclusive lock. */ if (btree->checkpointing && (__wt_page_is_modified(page) || |