summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@wiredtiger.com>2013-10-08 16:24:56 +1100
committerMichael Cahill <michael.cahill@wiredtiger.com>2013-10-08 16:24:56 +1100
commit97784f731b01c4d57d0074333b71b85b5d277b65 (patch)
treee1238b9669937e3c35ba3ad5482126743af61a9d
parentd84d623911a76f7393f3e06f0fae50a14f60194e (diff)
downloadmongo-97784f731b01c4d57d0074333b71b85b5d277b65.tar.gz
Track the per-page contribution to the cache's dirty byte count. If this is non-zero when a page is being discarded, fix the cache's count.
refs #635, #699
-rw-r--r--src/btree/bt_discard.c9
-rw-r--r--src/btree/bt_evict.c3
-rw-r--r--src/btree/rec_write.c16
-rw-r--r--src/include/btmem.h2
-rw-r--r--src/include/btree.i36
5 files changed, 43 insertions, 23 deletions
diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c
index 7a123b6f0a1..7edc4930e29 100644
--- a/src/btree/bt_discard.c
+++ b/src/btree/bt_discard.c
@@ -44,6 +44,15 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep)
WT_ASSERT(session, S2BT(session)->evict_page != page);
WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU));
+ /*
+ * In rare cases, we may race tracking a page's dirty footprint.
+ * If so, we will get here with a non-zero dirty_size in the page, and
+ * we can fix the global stats.
+ */
+ if (page->bytes_dirty != 0)
+ (void)WT_ATOMIC_SUB(
+ S2C(session)->cache->bytes_dirty, page->bytes_dirty);
+
#ifdef HAVE_DIAGNOSTIC
{
WT_HAZARD *hp;
diff --git a/src/btree/bt_evict.c b/src/btree/bt_evict.c
index 8d40b682837..9cf550cec8f 100644
--- a/src/btree/bt_evict.c
+++ b/src/btree/bt_evict.c
@@ -470,8 +470,7 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop)
if (WT_PAGE_IS_ROOT(page))
btree->root_page = NULL;
if (__wt_page_is_modified(page))
- __wt_cache_dirty_decr(
- session, page->memory_footprint);
+ __wt_cache_dirty_decr(session, page);
__wt_page_out(session, &page);
break;
WT_ILLEGAL_VALUE_ERR(session);
diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c
index 898dfc97fb3..5e4d6dc4f25 100644
--- a/src/btree/rec_write.c
+++ b/src/btree/rec_write.c
@@ -3935,22 +3935,8 @@ err: __wt_scr_free(&tkey);
if (!r->upd_skipped) {
mod->disk_txn = r->max_txn;
- /*
- * !!!
- * This code has a bug: the idea is to read the memory footprint
- * before attempting to "clean" the page, which is safe because
- * the atomic compare-and-swap is a read barrier so the read of
- * the memory footprint precedes the update of the page's write
- * generation. Since it is possible to decrement the footprint
- * of the page without making the page "dirty" (for example
- * when freeing an obsolete update list), the footprint could
- * be decremented between read and swap, and we might attempt to
- * decrement more than the bytes held by the page. Unlikely,
- * but technically possible.
- */
- size = page->memory_footprint;
if (WT_ATOMIC_CAS(mod->write_gen, r->orig_write_gen, 0))
- __wt_cache_dirty_decr(session, size);
+ __wt_cache_dirty_decr(session, page);
}
/* Record the most recent transaction ID we have *not* written. */
diff --git a/src/include/btmem.h b/src/include/btmem.h
index 0bab73bfcd0..51ff9c3dcac 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -351,6 +351,8 @@ struct __wt_page {
/* Memory attached to the page. */
uint32_t memory_footprint;
+ int64_t bytes_dirty; /* Dirty bytes added to cache. */
+
#define WT_PAGE_INVALID 0 /* Invalid page */
#define WT_PAGE_BLOCK_MANAGER 1 /* Block-manager page */
#define WT_PAGE_COL_FIX 2 /* Col-store fixed-len leaf */
diff --git a/src/include/btree.i b/src/include/btree.i
index 16fbf79985a..b931913e777 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -71,8 +71,10 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
cache = S2C(session)->cache;
(void)WT_ATOMIC_ADD(cache->bytes_inmem, size);
(void)WT_ATOMIC_ADD(page->memory_footprint, WT_STORE_SIZE(size));
- if (__wt_page_is_modified(page))
+ if (__wt_page_is_modified(page)) {
(void)WT_ATOMIC_ADD(cache->bytes_dirty, size);
+ (void)WT_ATOMIC_ADD(page->bytes_dirty, size);
+ }
}
/*
@@ -89,8 +91,10 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
cache = S2C(session)->cache;
(void)WT_ATOMIC_SUB(cache->bytes_inmem, size);
(void)WT_ATOMIC_SUB(page->memory_footprint, WT_STORE_SIZE(size));
- if (__wt_page_is_modified(page))
+ if (__wt_page_is_modified(page)) {
(void)WT_ATOMIC_SUB(cache->bytes_dirty, size);
+ (void)WT_ATOMIC_SUB(page->bytes_dirty, size);
+ }
}
/*
@@ -99,11 +103,14 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
* be called after a reconciliation leaves a page clean.
*/
static inline void
-__wt_cache_dirty_decr(WT_SESSION_IMPL *session, size_t size)
+__wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_CACHE *cache;
+ size_t size;
cache = S2C(session)->cache;
+ size = page->memory_footprint;
+
if (cache->bytes_dirty < size || cache->pages_dirty == 0) {
if (WT_VERBOSE_ISSET(session, evictserver))
(void)__wt_verbose(session,
@@ -114,9 +121,22 @@ __wt_cache_dirty_decr(WT_SESSION_IMPL *session, size_t size)
cache->bytes_dirty, (uintmax_t)size);
cache->bytes_dirty = 0;
cache->pages_dirty = 0;
+ page->bytes_dirty = 0;
} else {
+ /*
+ * It is possible to decrement the footprint of the page
+ * without making the page dirty (for example when freeing an
+ * obsolete update list), so the footprint could change between
+ * read and decrement, and we might attempt to decrement by a
+ * different amount than the bytes held by the page.
+ *
+ * We catch that by maintaining a per-page dirty size, and
+ * fixing the cache stats if that is non-zero when the page is
+ * discarded.
+ */
(void)WT_ATOMIC_SUB(cache->bytes_dirty, size);
(void)WT_ATOMIC_SUB(cache->pages_dirty, 1);
+ (void)WT_ATOMIC_SUB(page->bytes_dirty, size);
}
}
@@ -214,6 +234,8 @@ __wt_page_modify_init(WT_SESSION_IMPL *session, WT_PAGE *page)
static inline void
__wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page)
{
+ uint32_t size;
+
/*
* We depend on atomic-add being a write barrier, that is, a barrier to
* ensure all changes to the page are flushed before updating the page
@@ -221,12 +243,14 @@ __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page)
* and/or page reconciliation might be looking at a clean page/tree.
*
* Every time the page transitions from clean to dirty, update the cache
- * and transactional information.
+ * and transactional information. Take care to read the
+ * memory_footprint once in case we are racing with updates.
*/
if (WT_ATOMIC_ADD(page->modify->write_gen, 1) == 1) {
(void)WT_ATOMIC_ADD(S2C(session)->cache->pages_dirty, 1);
- (void)WT_ATOMIC_ADD(
- S2C(session)->cache->bytes_dirty, page->memory_footprint);
+ size = page->memory_footprint;
+ (void)WT_ATOMIC_ADD(S2C(session)->cache->bytes_dirty, size);
+ (void)WT_ATOMIC_ADD(page->bytes_dirty, size);
/*
* The page can never end up with changes older than the oldest