summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexg@wiredtiger.com>2014-02-28 17:21:44 +1100
committerAlex Gorrod <alexg@wiredtiger.com>2014-02-28 17:21:44 +1100
commit3681e9cbdbed3eee7d80d89fa50bcb2c145aa1c7 (patch)
treefc1f2abd39bf0764bb413aaaf59d592a5155f76a
parentad9de86fe52aa32ea67e25864fa3eb6c072abc87 (diff)
parentf3cf9e7cec7268b1f84001b1fb75ab83f2ccc5ca (diff)
downloadmongo-3681e9cbdbed3eee7d80d89fa50bcb2c145aa1c7.tar.gz
Merge pull request #888 from wiredtiger/rec-skip-clean
Fix a bug regarding when clean pages can be evicted
-rw-r--r--src/btree/rec_evict.c2
-rw-r--r--src/btree/rec_write.c20
-rw-r--r--src/include/btmem.h4
-rw-r--r--src/include/txn.i32
4 files changed, 31 insertions, 27 deletions
diff --git a/src/btree/rec_evict.c b/src/btree/rec_evict.c
index 0713989af58..ce6c04d1283 100644
--- a/src/btree/rec_evict.c
+++ b/src/btree/rec_evict.c
@@ -498,7 +498,7 @@ ckpt: WT_STAT_FAST_CONN_INCR(session, cache_eviction_checkpoint);
* cache.
*/
if (!exclusive && mod != NULL &&
- !__wt_txn_visible_all(session, mod->disk_txn))
+ !__wt_txn_visible_all(session, mod->rec_max_txn))
return (EBUSY);
/*
diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c
index 81a4ec7a025..dd237693465 100644
--- a/src/btree/rec_write.c
+++ b/src/btree/rec_write.c
@@ -668,26 +668,12 @@ static inline int
__rec_txn_read(
WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_UPDATE *upd, WT_UPDATE **updp)
{
- uint64_t txnid;
int skip, retried;
retried = 0;
-retry: *updp = __wt_txn_read_skip(session, upd, &skip);
- if (!skip) {
- /*
- * Track the largest transaction ID written to disk for this
- * page. We store this in the page at the end of
- * reconciliation if no updates are skipped. It is used to
- * avoid evicting a clean page from memory with changes that
- * are required to satisfy a snapshot read.
- */
- if (*updp != NULL) {
- txnid = (*updp)->txnid;
- if (TXNID_LT(r->max_txn, txnid))
- r->max_txn = txnid;
- }
+retry: *updp = __wt_txn_read_skip(session, upd, &r->max_txn, &skip);
+ if (!skip)
return (0);
- }
/*
* If skipping this update will cause reconciliation to quit, update
@@ -4093,7 +4079,7 @@ err: __wt_scr_free(&tkey);
* cache's dirty statistics.
*/
if (!r->upd_skipped) {
- mod->disk_txn = r->max_txn;
+ mod->rec_max_txn = r->max_txn;
if (WT_ATOMIC_CAS(mod->write_gen, r->orig_write_gen, 0))
__wt_cache_dirty_decr(session, page);
diff --git a/src/include/btmem.h b/src/include/btmem.h
index 77033f390ed..42d7ecfa9e2 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -197,8 +197,8 @@ struct __wt_page_modify {
*/
uint64_t disk_snap_min;
- /* The largest transaction ID written to disk for the page. */
- uint64_t disk_txn;
+ /* The largest transaction ID seen on the page by reconciliation. */
+ uint64_t rec_max_txn;
/* The largest update transaction ID (approximate). */
uint64_t update_txn;
diff --git a/src/include/txn.i b/src/include/txn.i
index 8ec1f52bff4..2543d5ff21f 100644
--- a/src/include/txn.i
+++ b/src/include/txn.i
@@ -166,21 +166,39 @@ __wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id)
/*
* __wt_txn_read_skip --
- * Get the first visible update in a list (or NULL if none are visible),
- * and report whether there are an uncommitted changes in the list.
+ * Get the first visible update in a list (or NULL if none are visible).
+ * Report the maximum transaction ID in the list and whether any updates
+ * were skipped to find the visible update.
*/
static inline WT_UPDATE *
-__wt_txn_read_skip(WT_SESSION_IMPL *session, WT_UPDATE *upd, int *skipp)
+__wt_txn_read_skip(
+ WT_SESSION_IMPL *session, WT_UPDATE *upd, uint64_t *max_txn, int *skipp)
{
WT_UPDATE *first_upd;
+ /*
+ * Track the largest transaction ID on this page. We store this in the
+ * page at the end of reconciliation if no updates are skipped. It is
+ * used to avoid evicting a clean page from memory with changes that
+ * are required to satisfy a snapshot read.
+ *
+ * Record whether any updates were skipped on the way to finding the
+ * first visible update. That determines whether a future read with no
+ * intervening modifications to the page could see a different value.
+ * If not, the page can safely be marked clean, and does not need to be
+ * reconciled until it is modified again.
+ */
*skipp = 0;
for (first_upd = NULL; upd != NULL; upd = upd->next)
if (upd->txnid != WT_TXN_ABORTED) {
- if (!__wt_txn_visible(session, upd->txnid))
- *skipp = 1;
- else if (first_upd == NULL)
- first_upd = upd;
+ if (TXNID_LT(*max_txn, upd->txnid))
+ *max_txn = upd->txnid;
+ if (first_upd == NULL) {
+ if (__wt_txn_visible(session, upd->txnid))
+ first_upd = upd;
+ else
+ *skipp = 1;
+ }
}
return (first_upd);