diff options
Diffstat (limited to 'src/third_party/wiredtiger/src/include/btree.i')
-rw-r--r-- | src/third_party/wiredtiger/src/include/btree.i | 257 |
1 files changed, 148 insertions, 109 deletions
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index e0102a11511..c07966a4453 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -55,28 +55,66 @@ __wt_btree_block_free( } /* + * __wt_btree_bytes_inuse -- + * Return the number of bytes in use. + */ +static inline uint64_t +__wt_btree_bytes_inuse(WT_SESSION_IMPL *session) +{ + WT_BTREE *btree; + WT_CACHE *cache; + + btree = S2BT(session); + cache = S2C(session)->cache; + + return (__wt_cache_bytes_plus_overhead(cache, btree->bytes_inmem)); +} + +/* + * __wt_btree_dirty_leaf_inuse -- + * Return the number of bytes in use by dirty leaf pages. + */ +static inline uint64_t +__wt_btree_dirty_leaf_inuse(WT_SESSION_IMPL *session) +{ + WT_BTREE *btree; + WT_CACHE *cache; + + btree = S2BT(session); + cache = S2C(session)->cache; + + return (__wt_cache_bytes_plus_overhead(cache, btree->bytes_dirty_leaf)); +} + +/* * __wt_cache_page_inmem_incr -- * Increment a page's memory footprint in the cache. */ static inline void __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) { + WT_BTREE *btree; WT_CACHE *cache; WT_ASSERT(session, size < WT_EXABYTE); - + btree = S2BT(session); cache = S2C(session)->cache; + + (void)__wt_atomic_add64(&btree->bytes_inmem, size); (void)__wt_atomic_add64(&cache->bytes_inmem, size); (void)__wt_atomic_addsize(&page->memory_footprint, size); if (__wt_page_is_modified(page)) { - (void)__wt_atomic_add64(&cache->bytes_dirty, size); (void)__wt_atomic_addsize(&page->modify->bytes_dirty, size); + if (WT_PAGE_IS_INTERNAL(page)) + (void)__wt_atomic_add64(&cache->bytes_dirty_intl, size); + else { + (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size); + (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size); + } } - /* Track internal and overflow size in cache. */ + /* Track internal size in cache. */ if (WT_PAGE_IS_INTERNAL(page)) (void)__wt_atomic_add64(&cache->bytes_internal, size); - else if (page->type == WT_PAGE_OVFL) - (void)__wt_atomic_add64(&cache->bytes_overflow, size); } /* @@ -136,6 +174,22 @@ __wt_cache_decr_check_uint64( } /* + * __wt_cache_decr_zero_uint64 -- + * Decrement a uint64_t cache value and zero it on underflow. + */ +static inline void +__wt_cache_decr_zero_uint64( + WT_SESSION_IMPL *session, uint64_t *vp, size_t v, const char *fld) +{ + if (__wt_atomic_sub64(vp, v) < WT_EXABYTE) + return; + + __wt_errx( + session, "%s went negative: decrementing %" WT_SIZET_FMT, fld, v); + *vp = 0; +} + +/* * __wt_cache_page_byte_dirty_decr -- * Decrement the page's dirty byte count, guarding from underflow. */ @@ -143,11 +197,14 @@ static inline void __wt_cache_page_byte_dirty_decr( WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) { + WT_BTREE *btree; WT_CACHE *cache; size_t decr, orig; int i; + btree = S2BT(session); cache = S2C(session)->cache; + decr = 0; /* [-Wconditional-uninitialized] */ /* * We don't have exclusive access and there are ways of decrementing the @@ -174,11 +231,21 @@ __wt_cache_page_byte_dirty_decr( orig = page->modify->bytes_dirty; decr = WT_MIN(size, orig); if (__wt_atomic_cassize( - &page->modify->bytes_dirty, orig, orig - decr)) { - __wt_cache_decr_check_uint64(session, - &cache->bytes_dirty, decr, "WT_CACHE.bytes_dirty"); + &page->modify->bytes_dirty, orig, orig - decr)) break; - } + } + + if (i == 5) + return; + + if (WT_PAGE_IS_INTERNAL(page)) + __wt_cache_decr_check_uint64(session, &cache->bytes_dirty_intl, + decr, "WT_CACHE.bytes_dirty_intl"); + else { + __wt_cache_decr_check_uint64(session, &btree->bytes_dirty_leaf, + decr, "WT_BTREE.bytes_dirty_leaf"); + __wt_cache_decr_check_uint64(session, &cache->bytes_dirty_leaf, + decr, "WT_CACHE.bytes_dirty_leaf"); } } @@ -196,18 +263,17 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) WT_ASSERT(session, size < WT_EXABYTE); __wt_cache_decr_check_uint64( + session, &S2BT(session)->bytes_inmem, size, "WT_BTREE.bytes_inmem"); + __wt_cache_decr_check_uint64( session, &cache->bytes_inmem, size, "WT_CACHE.bytes_inmem"); __wt_cache_decr_check_size( session, &page->memory_footprint, size, "WT_PAGE.memory_footprint"); if (__wt_page_is_modified(page)) __wt_cache_page_byte_dirty_decr(session, page, size); - /* Track internal and overflow size in cache. */ + /* Track internal size in cache. */ if (WT_PAGE_IS_INTERNAL(page)) __wt_cache_decr_check_uint64(session, &cache->bytes_internal, size, "WT_CACHE.bytes_internal"); - else if (page->type == WT_PAGE_OVFL) - __wt_cache_decr_check_uint64(session, - &cache->bytes_overflow, size, "WT_CACHE.bytes_overflow"); } /* @@ -218,18 +284,26 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) static inline void __wt_cache_dirty_incr(WT_SESSION_IMPL *session, WT_PAGE *page) { + WT_BTREE *btree; WT_CACHE *cache; size_t size; + btree = S2BT(session); cache = S2C(session)->cache; - (void)__wt_atomic_add64(&cache->pages_dirty, 1); /* * Take care to read the memory_footprint once in case we are racing * with updates. */ size = page->memory_footprint; - (void)__wt_atomic_add64(&cache->bytes_dirty, size); + if (WT_PAGE_IS_INTERNAL(page)) { + (void)__wt_atomic_add64(&cache->bytes_dirty_intl, size); + (void)__wt_atomic_add64(&cache->pages_dirty_intl, 1); + } else { + (void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size); + (void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size); + (void)__wt_atomic_add64(&cache->pages_dirty_leaf, 1); + } (void)__wt_atomic_addsize(&page->modify->bytes_dirty, size); } @@ -246,13 +320,12 @@ __wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page) cache = S2C(session)->cache; - if (cache->pages_dirty < 1) { - __wt_errx(session, - "cache eviction dirty-page decrement failed: dirty page" - "count went negative"); - cache->pages_dirty = 0; - } else - (void)__wt_atomic_sub64(&cache->pages_dirty, 1); + if (WT_PAGE_IS_INTERNAL(page)) + __wt_cache_decr_zero_uint64(session, + &cache->pages_dirty_intl, 1, "dirty internal page count"); + else + __wt_cache_decr_zero_uint64(session, + &cache->pages_dirty_leaf, 1, "dirty leaf page count"); modify = page->modify; if (modify != NULL && modify->bytes_dirty != 0) @@ -261,21 +334,52 @@ __wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page) } /* + * __wt_cache_page_image_decr -- + * Decrement a page image's size to the cache. + */ +static inline void +__wt_cache_page_image_decr(WT_SESSION_IMPL *session, uint32_t size) +{ + WT_CACHE *cache; + + cache = S2C(session)->cache; + + __wt_cache_decr_check_uint64( + session, &cache->bytes_image, size, "WT_CACHE.image_inmem"); +} + +/* + * __wt_cache_page_image_incr -- + * Increment a page image's size to the cache. + */ +static inline void +__wt_cache_page_image_incr(WT_SESSION_IMPL *session, uint32_t size) +{ + WT_CACHE *cache; + + cache = S2C(session)->cache; + (void)__wt_atomic_add64(&cache->bytes_image, size); +} + +/* * __wt_cache_page_evict -- * Evict pages from the cache. */ static inline void __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) { + WT_BTREE *btree; WT_CACHE *cache; WT_PAGE_MODIFY *modify; + btree = S2BT(session); cache = S2C(session)->cache; modify = page->modify; /* Update the bytes in-memory to reflect the eviction. */ - __wt_cache_decr_check_uint64(session, - &cache->bytes_inmem, + __wt_cache_decr_check_uint64(session, &S2BT(session)->bytes_inmem, + page->memory_footprint, "WT_BTREE.bytes_inmem"); + __wt_cache_decr_check_uint64(session, &cache->bytes_inmem, page->memory_footprint, "WT_CACHE.bytes_inmem"); /* Update the bytes_internal value to reflect the eviction */ @@ -286,15 +390,18 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) /* Update the cache's dirty-byte count. */ if (modify != NULL && modify->bytes_dirty != 0) { - if (cache->bytes_dirty < modify->bytes_dirty) { - __wt_errx(session, - "cache eviction dirty-bytes decrement failed: " - "dirty byte count went negative"); - cache->bytes_dirty = 0; - } else - __wt_cache_decr_check_uint64(session, - &cache->bytes_dirty, - modify->bytes_dirty, "WT_CACHE.bytes_dirty"); + if (WT_PAGE_IS_INTERNAL(page)) + __wt_cache_decr_zero_uint64(session, + &cache->bytes_dirty_intl, + modify->bytes_dirty, "WT_CACHE.bytes_dirty_intl"); + else { + __wt_cache_decr_zero_uint64(session, + &cache->bytes_dirty_leaf, + modify->bytes_dirty, "WT_CACHE.bytes_dirty_leaf"); + __wt_cache_decr_zero_uint64(session, + &btree->bytes_dirty_leaf, + modify->bytes_dirty, "WT_BTREE.bytes_dirty_leaf"); + } } /* Update pages and bytes evicted. */ @@ -318,16 +425,6 @@ __wt_update_list_memsize(WT_UPDATE *upd) } /* - * __wt_page_evict_soon -- - * Set a page to be evicted as soon as possible. - */ -static inline void -__wt_page_evict_soon(WT_PAGE *page) -{ - page->read_gen = WT_READGEN_OLDEST; -} - -/* * __wt_page_modify_init -- * A page is about to be modified, allocate the modification structure. */ @@ -1099,16 +1196,14 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) * Check whether a page can be evicted. */ static inline bool -__wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) +__wt_page_can_evict( + WT_SESSION_IMPL *session, WT_REF *ref, uint32_t *evict_flagsp) { WT_BTREE *btree; WT_PAGE *page; WT_PAGE_MODIFY *mod; bool modified; - if (inmem_splitp != NULL) - *inmem_splitp = false; - btree = S2BT(session); page = ref->page; mod = page->modify; @@ -1124,8 +1219,8 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) * won't be written or discarded from the cache. */ if (__wt_leaf_page_can_split(session, page)) { - if (inmem_splitp != NULL) - *inmem_splitp = true; + if (evict_flagsp != NULL) + FLD_SET(*evict_flagsp, WT_EVICT_INMEM_SPLIT); return (true); } @@ -1137,7 +1232,7 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) * previous version might be referenced by an internal page already * been written in the checkpoint, leaving the checkpoint inconsistent. */ - if (btree->checkpointing != WT_CKPT_OFF && modified) { + if (modified && btree->checkpointing != WT_CKPT_OFF) { WT_STAT_FAST_CONN_INCR(session, cache_eviction_checkpoint); WT_STAT_FAST_DATA_INCR(session, cache_eviction_checkpoint); return (false); @@ -1165,72 +1260,16 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp) return (false); /* - * If the oldest transaction hasn't changed since the last time - * this page was written, it's unlikely we can make progress. - * Similarly, if the most recent update on the page is not yet - * globally visible, eviction will fail. These heuristics - * attempt to avoid repeated attempts to evict the same page. + * If the page is clean but has modifications that appear too new to + * evict, skip it. */ - if (modified && - !F_ISSET(S2C(session)->cache, WT_CACHE_STUCK) && - (mod->last_oldest_id == __wt_txn_oldest_id(session) || - !__wt_txn_visible_all(session, mod->update_txn))) + if (!modified && !__wt_txn_visible_all(session, mod->rec_max_txn)) return (false); return (true); } /* - * __wt_page_release_evict -- - * Release a reference to a page, and attempt to immediately evict it. - */ -static inline int -__wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) -{ - WT_BTREE *btree; - WT_DECL_RET; - WT_PAGE *page; - bool locked, too_big; - - btree = S2BT(session); - page = ref->page; - - /* - * Take some care with order of operations: if we release the hazard - * reference without first locking the page, it could be evicted in - * between. - */ - locked = __wt_atomic_casv32( - &ref->state, WT_REF_MEM, WT_REF_LOCKED) ? true : false; - if ((ret = __wt_hazard_clear(session, page)) != 0 || !locked) { - if (locked) - ref->state = WT_REF_MEM; - return (ret == 0 ? EBUSY : ret); - } - - (void)__wt_atomic_addv32(&btree->evict_busy, 1); - - too_big = page->memory_footprint > btree->maxmempage; - if ((ret = __wt_evict(session, ref, false)) == 0) { - if (too_big) - WT_STAT_FAST_CONN_INCR(session, cache_eviction_force); - else - /* - * If the page isn't too big, we are evicting it because - * it had a chain of deleted entries that make traversal - * expensive. - */ - WT_STAT_FAST_CONN_INCR( - session, cache_eviction_force_delete); - } else - WT_STAT_FAST_CONN_INCR(session, cache_eviction_force_fail); - - (void)__wt_atomic_subv32(&btree->evict_busy, 1); - - return (ret); -} - -/* * __wt_page_release -- * Release a reference to a page. */ |