diff options
author | Michael Cahill <michael.cahill@wiredtiger.com> | 2013-10-08 16:24:56 +1100 |
---|---|---|
committer | Michael Cahill <michael.cahill@wiredtiger.com> | 2013-10-08 16:24:56 +1100 |
commit | 97784f731b01c4d57d0074333b71b85b5d277b65 (patch) | |
tree | e1238b9669937e3c35ba3ad5482126743af61a9d | |
parent | d84d623911a76f7393f3e06f0fae50a14f60194e (diff) | |
download | mongo-97784f731b01c4d57d0074333b71b85b5d277b65.tar.gz |
Track the per-page contribution to the cache's dirty byte count. If this is non-zero when a page is being discarded, fix the cache's count.
refs #635, #699
-rw-r--r-- | src/btree/bt_discard.c | 9 | ||||
-rw-r--r-- | src/btree/bt_evict.c | 3 | ||||
-rw-r--r-- | src/btree/rec_write.c | 16 | ||||
-rw-r--r-- | src/include/btmem.h | 2 | ||||
-rw-r--r-- | src/include/btree.i | 36 |
5 files changed, 43 insertions, 23 deletions
diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c index 7a123b6f0a1..7edc4930e29 100644 --- a/src/btree/bt_discard.c +++ b/src/btree/bt_discard.c @@ -44,6 +44,15 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep) WT_ASSERT(session, S2BT(session)->evict_page != page); WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU)); + /* + * In rare cases, we may race tracking a page's dirty footprint. + * If so, we will get here with a non-zero dirty_size in the page, and + * we can fix the global stats. + */ + if (page->bytes_dirty != 0) + (void)WT_ATOMIC_SUB( + S2C(session)->cache->bytes_dirty, page->bytes_dirty); + #ifdef HAVE_DIAGNOSTIC { WT_HAZARD *hp; diff --git a/src/btree/bt_evict.c b/src/btree/bt_evict.c index 8d40b682837..9cf550cec8f 100644 --- a/src/btree/bt_evict.c +++ b/src/btree/bt_evict.c @@ -470,8 +470,7 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop) if (WT_PAGE_IS_ROOT(page)) btree->root_page = NULL; if (__wt_page_is_modified(page)) - __wt_cache_dirty_decr( - session, page->memory_footprint); + __wt_cache_dirty_decr(session, page); __wt_page_out(session, &page); break; WT_ILLEGAL_VALUE_ERR(session); diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c index 898dfc97fb3..5e4d6dc4f25 100644 --- a/src/btree/rec_write.c +++ b/src/btree/rec_write.c @@ -3935,22 +3935,8 @@ err: __wt_scr_free(&tkey); if (!r->upd_skipped) { mod->disk_txn = r->max_txn; - /* - * !!! - * This code has a bug: the idea is to read the memory footprint - * before attempting to "clean" the page, which is safe because - * the atomic compare-and-swap is a read barrier so the read of - * the memory footprint precedes the update of the page's write - * generation. Since it is possible to decrement the footprint - * of the page without making the page "dirty" (for example - * when freeing an obsolete update list), the footprint could - * be decremented between read and swap, and we might attempt to - * decrement more than the bytes held by the page. Unlikely, - * but technically possible. - */ - size = page->memory_footprint; if (WT_ATOMIC_CAS(mod->write_gen, r->orig_write_gen, 0)) - __wt_cache_dirty_decr(session, size); + __wt_cache_dirty_decr(session, page); } /* Record the most recent transaction ID we have *not* written. */ diff --git a/src/include/btmem.h b/src/include/btmem.h index 0bab73bfcd0..51ff9c3dcac 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -351,6 +351,8 @@ struct __wt_page { /* Memory attached to the page. */ uint32_t memory_footprint; + int64_t bytes_dirty; /* Dirty bytes added to cache. */ + #define WT_PAGE_INVALID 0 /* Invalid page */ #define WT_PAGE_BLOCK_MANAGER 1 /* Block-manager page */ #define WT_PAGE_COL_FIX 2 /* Col-store fixed-len leaf */ diff --git a/src/include/btree.i b/src/include/btree.i index 16fbf79985a..b931913e777 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -71,8 +71,10 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) cache = S2C(session)->cache; (void)WT_ATOMIC_ADD(cache->bytes_inmem, size); (void)WT_ATOMIC_ADD(page->memory_footprint, WT_STORE_SIZE(size)); - if (__wt_page_is_modified(page)) + if (__wt_page_is_modified(page)) { (void)WT_ATOMIC_ADD(cache->bytes_dirty, size); + (void)WT_ATOMIC_ADD(page->bytes_dirty, size); + } } /* @@ -89,8 +91,10 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) cache = S2C(session)->cache; (void)WT_ATOMIC_SUB(cache->bytes_inmem, size); (void)WT_ATOMIC_SUB(page->memory_footprint, WT_STORE_SIZE(size)); - if (__wt_page_is_modified(page)) + if (__wt_page_is_modified(page)) { (void)WT_ATOMIC_SUB(cache->bytes_dirty, size); + (void)WT_ATOMIC_SUB(page->bytes_dirty, size); + } } /* @@ -99,11 +103,14 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) * be called after a reconciliation leaves a page clean. */ static inline void -__wt_cache_dirty_decr(WT_SESSION_IMPL *session, size_t size) +__wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_CACHE *cache; + size_t size; cache = S2C(session)->cache; + size = page->memory_footprint; + if (cache->bytes_dirty < size || cache->pages_dirty == 0) { if (WT_VERBOSE_ISSET(session, evictserver)) (void)__wt_verbose(session, @@ -114,9 +121,22 @@ __wt_cache_dirty_decr(WT_SESSION_IMPL *session, size_t size) cache->bytes_dirty, (uintmax_t)size); cache->bytes_dirty = 0; cache->pages_dirty = 0; + page->bytes_dirty = 0; } else { + /* + * It is possible to decrement the footprint of the page + * without making the page dirty (for example when freeing an + * obsolete update list), so the footprint could change between + * read and decrement, and we might attempt to decrement by a + * different amount than the bytes held by the page. + * + * We catch that by maintaining a per-page dirty size, and + * fixing the cache stats if that is non-zero when the page is + * discarded. + */ (void)WT_ATOMIC_SUB(cache->bytes_dirty, size); (void)WT_ATOMIC_SUB(cache->pages_dirty, 1); + (void)WT_ATOMIC_SUB(page->bytes_dirty, size); } } @@ -214,6 +234,8 @@ __wt_page_modify_init(WT_SESSION_IMPL *session, WT_PAGE *page) static inline void __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page) { + uint32_t size; + /* * We depend on atomic-add being a write barrier, that is, a barrier to * ensure all changes to the page are flushed before updating the page @@ -221,12 +243,14 @@ __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page) * and/or page reconciliation might be looking at a clean page/tree. * * Every time the page transitions from clean to dirty, update the cache - * and transactional information. + * and transactional information. Take care to read the + * memory_footprint once in case we are racing with updates. */ if (WT_ATOMIC_ADD(page->modify->write_gen, 1) == 1) { (void)WT_ATOMIC_ADD(S2C(session)->cache->pages_dirty, 1); - (void)WT_ATOMIC_ADD( - S2C(session)->cache->bytes_dirty, page->memory_footprint); + size = page->memory_footprint; + (void)WT_ATOMIC_ADD(S2C(session)->cache->bytes_dirty, size); + (void)WT_ATOMIC_ADD(page->bytes_dirty, size); /* * The page can never end up with changes older than the oldest |