/*- * Copyright (c) 2014-2016 MongoDB, Inc. * Copyright (c) 2008-2014 WiredTiger, Inc. * All rights reserved. * * See the file LICENSE for redistribution information. */ /* * __wt_cache_aggressive -- * Indicate if the cache is operating in aggressive mode. */ static inline bool __wt_cache_aggressive(WT_SESSION_IMPL *session) { return (S2C(session)->cache->evict_aggressive_score >= WT_EVICT_SCORE_CUTOFF); } /* * __wt_cache_read_gen -- * Get the current read generation number. */ static inline uint64_t __wt_cache_read_gen(WT_SESSION_IMPL *session) { return (S2C(session)->cache->read_gen); } /* * __wt_cache_read_gen_incr -- * Increment the current read generation number. */ static inline void __wt_cache_read_gen_incr(WT_SESSION_IMPL *session) { ++S2C(session)->cache->read_gen; } /* * __wt_cache_read_gen_bump -- * Update the page's read generation. */ static inline void __wt_cache_read_gen_bump(WT_SESSION_IMPL *session, WT_PAGE *page) { /* Ignore pages set for forcible eviction. */ if (page->read_gen == WT_READGEN_OLDEST) return; /* Ignore pages already in the future. */ if (page->read_gen > __wt_cache_read_gen(session)) return; /* * We set read-generations in the future (where "the future" is measured * by increments of the global read generation). The reason is because * when acquiring a new hazard pointer for a page, we can check its read * generation, and if the read generation isn't less than the current * global generation, we don't bother updating the page. In other * words, the goal is to avoid some number of updates immediately after * each update we have to make. */ page->read_gen = __wt_cache_read_gen(session) + WT_READGEN_STEP; } /* * __wt_cache_read_gen_new -- * Get the read generation for a new page in memory. */ static inline void __wt_cache_read_gen_new(WT_SESSION_IMPL *session, WT_PAGE *page) { WT_CACHE *cache; cache = S2C(session)->cache; page->read_gen = (__wt_cache_read_gen(session) + cache->read_gen_oldest) / 2; } /* * __wt_cache_stuck -- * Indicate if the cache is stuck (i.e., not making progress). */ static inline bool __wt_cache_stuck(WT_SESSION_IMPL *session) { WT_CACHE *cache; cache = S2C(session)->cache; return (cache->evict_aggressive_score == WT_EVICT_SCORE_MAX && F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD | WT_CACHE_EVICT_DIRTY_HARD)); } /* * __wt_page_evict_soon -- * Set a page to be evicted as soon as possible. */ static inline void __wt_page_evict_soon(WT_SESSION_IMPL *session, WT_REF *ref) { WT_UNUSED(session); ref->page->read_gen = WT_READGEN_OLDEST; } /* * __wt_cache_pages_inuse -- * Return the number of pages in use. */ static inline uint64_t __wt_cache_pages_inuse(WT_CACHE *cache) { return (cache->pages_inmem - cache->pages_evict); } /* * __wt_cache_bytes_plus_overhead -- * Apply the cache overhead to a size in bytes. */ static inline uint64_t __wt_cache_bytes_plus_overhead(WT_CACHE *cache, uint64_t sz) { if (cache->overhead_pct != 0) sz += (sz * (uint64_t)cache->overhead_pct) / 100; return (sz); } /* * __wt_cache_bytes_inuse -- * Return the number of bytes in use. */ static inline uint64_t __wt_cache_bytes_inuse(WT_CACHE *cache) { return (__wt_cache_bytes_plus_overhead(cache, cache->bytes_inmem)); } /* * __wt_cache_dirty_inuse -- * Return the number of dirty bytes in use. */ static inline uint64_t __wt_cache_dirty_inuse(WT_CACHE *cache) { return (__wt_cache_bytes_plus_overhead(cache, cache->bytes_dirty_intl + cache->bytes_dirty_leaf)); } /* * __wt_cache_dirty_leaf_inuse -- * Return the number of dirty bytes in use by leaf pages. */ static inline uint64_t __wt_cache_dirty_leaf_inuse(WT_CACHE *cache) { return (__wt_cache_bytes_plus_overhead(cache, cache->bytes_dirty_leaf)); } /* * __wt_cache_bytes_image -- * Return the number of page image bytes in use. */ static inline uint64_t __wt_cache_bytes_image(WT_CACHE *cache) { return (__wt_cache_bytes_plus_overhead(cache, cache->bytes_image)); } /* * __wt_cache_bytes_other -- * Return the number of bytes in use not for page images. */ static inline uint64_t __wt_cache_bytes_other(WT_CACHE *cache) { uint64_t bytes_image, bytes_inmem; bytes_image = cache->bytes_image; bytes_inmem = cache->bytes_inmem; /* * The reads above could race with changes to the values, so protect * against underflow. */ return ((bytes_image > bytes_inmem) ? 0 : __wt_cache_bytes_plus_overhead(cache, bytes_inmem - bytes_image)); } /* * __wt_session_can_wait -- * Return if a session available for a potentially slow operation. */ static inline bool __wt_session_can_wait(WT_SESSION_IMPL *session) { /* * Return if a session available for a potentially slow operation; * for example, used by the block manager in the case of flushing * the system cache. */ if (!F_ISSET(session, WT_SESSION_CAN_WAIT)) return (false); /* * LSM sets the no-eviction flag when holding the LSM tree lock, in that * case, or when holding the schema lock, we don't want to highjack the * thread for eviction. */ return (!F_ISSET( session, WT_SESSION_NO_EVICTION | WT_SESSION_LOCKED_SCHEMA)); } /* * __wt_eviction_clean_needed -- * Return if an application thread should do eviction due to the total * volume of dirty data in cache. */ static inline bool __wt_eviction_clean_needed(WT_SESSION_IMPL *session, u_int *pct_fullp) { WT_CACHE *cache; uint64_t bytes_inuse, bytes_max; cache = S2C(session)->cache; /* * Avoid division by zero if the cache size has not yet been set in a * shared cache. */ bytes_max = S2C(session)->cache_size + 1; bytes_inuse = __wt_cache_bytes_inuse(cache); if (pct_fullp != NULL) *pct_fullp = (u_int)((100 * bytes_inuse) / bytes_max); return (bytes_inuse > (cache->eviction_trigger * bytes_max) / 100); } /* * __wt_eviction_dirty_needed -- * Return if an application thread should do eviction due to the total * volume of dirty data in cache. */ static inline bool __wt_eviction_dirty_needed(WT_SESSION_IMPL *session, u_int *pct_fullp) { WT_CACHE *cache; double dirty_trigger; uint64_t dirty_inuse, bytes_max; cache = S2C(session)->cache; /* * Avoid division by zero if the cache size has not yet been set in a * shared cache. */ bytes_max = S2C(session)->cache_size + 1; dirty_inuse = __wt_cache_dirty_leaf_inuse(cache); if (pct_fullp != NULL) *pct_fullp = (u_int)((100 * dirty_inuse) / bytes_max); if ((dirty_trigger = cache->eviction_scrub_limit) < 1.0) dirty_trigger = (double)cache->eviction_dirty_trigger; return (dirty_inuse > (uint64_t)(dirty_trigger * bytes_max) / 100); } /* * __wt_eviction_needed -- * Return if an application thread should do eviction, and the cache full * percentage as a side-effect. */ static inline bool __wt_eviction_needed(WT_SESSION_IMPL *session, bool busy, u_int *pct_fullp) { WT_CACHE *cache; u_int pct_dirty, pct_full; bool clean_needed, dirty_needed; cache = S2C(session)->cache; /* * If the connection is closing we do not need eviction from an * application thread. The eviction subsystem is already closed. */ if (F_ISSET(S2C(session), WT_CONN_CLOSING)) return (false); clean_needed = __wt_eviction_clean_needed(session, &pct_full); dirty_needed = __wt_eviction_dirty_needed(session, &pct_dirty); /* * Calculate the cache full percentage; anything over the trigger means * we involve the application thread. */ if (pct_fullp != NULL) *pct_fullp = (u_int)WT_MAX(0, 100 - WT_MIN( (int)cache->eviction_trigger - (int)pct_full, (int)cache->eviction_dirty_trigger - (int)pct_dirty)); /* * Only check the dirty trigger when the session is not busy. * * In other words, once we are pinning resources, try to finish the * operation as quickly as possible without exceeding the cache size. * The next transaction in this session will not be able to start until * the cache is under the limit. */ return (clean_needed || (!busy && dirty_needed)); } /* * __wt_cache_full -- * Return if the cache is at (or over) capacity. */ static inline bool __wt_cache_full(WT_SESSION_IMPL *session) { WT_CONNECTION_IMPL *conn; WT_CACHE *cache; conn = S2C(session); cache = conn->cache; return (__wt_cache_bytes_inuse(cache) >= conn->cache_size); } /* * __wt_cache_eviction_check -- * Evict pages if the cache crosses its boundaries. */ static inline int __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool *didworkp) { WT_BTREE *btree; WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *txn_state; u_int pct_full; if (didworkp != NULL) *didworkp = false; /* * If the current transaction is keeping the oldest ID pinned, it is in * the middle of an operation. This may prevent the oldest ID from * moving forward, leading to deadlock, so only evict what we can. * Otherwise, we are at a transaction boundary and we can work harder * to make sure there is free space in the cache. */ txn_global = &S2C(session)->txn_global; txn_state = WT_SESSION_TXN_STATE(session); busy = busy || txn_state->id != WT_TXN_NONE || session->nhazard > 0 || (txn_state->pinned_id != WT_TXN_NONE && txn_global->current != txn_global->oldest_id); /* * LSM sets the no-cache-check flag when holding the LSM tree lock, in * that case, or when holding the handle list, schema or table locks * (which can block checkpoints and eviction), don't block the thread * for eviction. */ if (F_ISSET(session, WT_SESSION_NO_EVICTION | WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_LOCKED_SCHEMA | WT_SESSION_LOCKED_TABLE)) return (0); /* In memory configurations don't block when the cache is full. */ if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) return (0); /* * Threads operating on cache-resident trees are ignored because * they're not contributing to the problem. We also don't block while * reading metadata because we're likely to be holding some other * resources that could block checkpoints or eviction. */ btree = S2BT_SAFE(session); if (btree != NULL && (F_ISSET(btree, WT_BTREE_IN_MEMORY) || WT_IS_METADATA(session->dhandle))) return (0); /* Check if eviction is needed. */ if (!__wt_eviction_needed(session, busy, &pct_full)) return (0); /* * Some callers (those waiting for slow operations), will sleep if there * was no cache work to do. After this point, let them skip the sleep. */ if (didworkp != NULL) *didworkp = true; return (__wt_cache_eviction_worker(session, busy, pct_full)); }