summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@wiredtiger.com>2015-02-19 15:36:01 +1100
committerMichael Cahill <michael.cahill@wiredtiger.com>2015-02-19 15:36:01 +1100
commit27a00a3919c09220ffea8a3afd78a448d50c8c51 (patch)
tree1e38c8660ec1785a450b9118bd2b0d6d4f34b2a8
parent2597ba06643b86c9cba08860ff9a1744374081f9 (diff)
downloadmongo-27a00a3919c09220ffea8a3afd78a448d50c8c51.tar.gz
Changes based on @keithbostic's review
-rw-r--r--src/evict/evict_page.c52
-rw-r--r--src/include/btree.i14
2 files changed, 31 insertions, 35 deletions
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c
index fc007e69cb6..9ba1af897a4 100644
--- a/src/evict/evict_page.c
+++ b/src/evict/evict_page.c
@@ -367,7 +367,28 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref,
WT_RET(ret);
}
- /* Check whether the page can be evicted. */
+ /*
+ * Check whether the page can be evicted.
+ *
+ * If the file is being checkpointed, we can't evict dirty pages:
+ * if we write a page and free the previous version of the page, that
+ * previous version might be referenced by an internal page already
+ * been written in the checkpoint, leaving the checkpoint inconsistent.
+ *
+ * Don't rely on new updates being skipped by the transaction used
+ * for transaction reads: (1) there are paths that dirty pages for
+ * artificial reasons; (2) internal pages aren't transactional; and
+ * (3) if an update was skipped during the checkpoint (leaving the page
+ * dirty), then rolled back, we could still successfully overwrite a
+ * page and corrupt the checkpoint.
+ *
+ * Further, we can't race with the checkpoint's reconciliation of
+ * an internal page as we evict a clean child from the page's subtree.
+ * This works in the usual way: eviction locks the page and then checks
+ * for existing hazard pointers, the checkpoint thread reconciling an
+ * internal page acquires hazard pointers on child pages it reads, and
+ * is blocked by the exclusive lock.
+ */
if (!exclusive && !__wt_page_can_evict(session, page, 0))
return (EBUSY);
@@ -409,29 +430,18 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref,
* If we have an exclusive lock (we're discarding the tree), assert
* there are no updates we cannot read.
*
- * Otherwise, if the top-level page we're evicting is a leaf page, set
- * the update-restore flag, so reconciliation will write blocks it can
- * write and create a list of skipped updates for blocks it cannot
- * write. This is how forced eviction of huge pages works: we take a
- * big page and reconcile it into blocks, some of which we write and
- * discard, the rest of which we re-create as smaller in-memory pages,
- * (restoring the updates that stopped us from writing the block), and
- * inserting the whole mess into the page's parent.
+ * Otherwise, if the top-level page we're evicting is a leaf page
+ * marked for forced eviction, set the update-restore flag, so
+ * reconciliation will write blocks it can write and create a list of
+ * skipped updates for blocks it cannot write. This is how forced
+ * eviction of huge pages works: we take a big page and reconcile it
+ * into blocks, some of which we write and discard, the rest of which
+ * we re-create as smaller in-memory pages, (restoring the updates that
+ * stopped us from writing the block), and inserting the whole mess
+ * into the page's parent.
*
* Don't set the update-restore flag for internal pages, they don't
* have updates that can be saved and restored.
- *
- * Don't set the update-restore flag for small pages. (If a small
- * page were selected by eviction and then modified, and we configure it
- * for update-restore, we'll end up splitting one or two pages into the
- * parent, which is a waste of effort. If we don't set update-restore,
- * eviction will return EBUSY, which makes more sense, the page was just
- * modified.)
- *
- * Don't set the update-restore flag for any page other than the
- * top one; only the reconciled top page goes through the split path
- * (and child pages are pages we expect to merge into the top page, they
- * they are not expected to split).
*/
if (__wt_page_is_modified(page)) {
if (exclusive)
diff --git a/src/include/btree.i b/src/include/btree.i
index a4b7f0d213e..43f8eaa1bfa 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -978,20 +978,6 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_PAGE *page, int check_splits)
* if we write a page and free the previous version of the page, that
* previous version might be referenced by an internal page already
* been written in the checkpoint, leaving the checkpoint inconsistent.
- *
- * Don't rely on new updates being skipped by the transaction used
- * for transaction reads: (1) there are paths that dirty pages for
- * artificial reasons; (2) internal pages aren't transactional; and
- * (3) if an update was skipped during the checkpoint (leaving the page
- * dirty), then rolled back, we could still successfully overwrite a
- * page and corrupt the checkpoint.
- *
- * Further, we can't race with the checkpoint's reconciliation of
- * an internal page as we evict a clean child from the page's subtree.
- * This works in the usual way: eviction locks the page and then checks
- * for existing hazard pointers, the checkpoint thread reconciling an
- * internal page acquires hazard pointers on child pages it reads, and
- * is blocked by the exclusive lock.
*/
if (btree->checkpointing &&
(__wt_page_is_modified(page) ||