diff options
author | Keith Bostic <keith.bostic@mongodb.com> | 2016-09-25 23:43:01 -0400 |
---|---|---|
committer | Alex Gorrod <alexander.gorrod@mongodb.com> | 2016-09-26 13:43:01 +1000 |
commit | 7cc8f4bfbc5b45486e35072771c659e3c9e12ece (patch) | |
tree | a4249d4c75e9d4a993420a930cd74170d1a14bc9 | |
parent | 5a26a11f0da1941b38ca34c3ba2a4e1265392049 (diff) | |
download | mongo-7cc8f4bfbc5b45486e35072771c659e3c9e12ece.tar.gz |
WT-2928 Eviction failing to switch queues can lead to starvation (#3060)
There are some configurations where the eviction server waiting for a the current queue to drain before switching in the new queue can lead to starvation. Specifically if in-memory is configured, the cache is full and there is only a single eviction thread. Or if an operation that does not contribute to eviction fills the cache (for example compact).
-rw-r--r-- | src/evict/evict_lru.c | 98 |
1 files changed, 58 insertions, 40 deletions
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 3be7a124a1d..ad4f3ecc2b5 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -152,12 +152,24 @@ __wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref) /* * __evict_queue_empty -- * Is the queue empty? + * + * Note that the eviction server is pessimistic and treats a half full + * queue as empty. */ static inline bool -__evict_queue_empty(WT_EVICT_QUEUE *queue) +__evict_queue_empty(WT_EVICT_QUEUE *queue, bool server_check) { - return (queue->evict_current == NULL || - queue->evict_candidates == 0); + uint32_t candidates, used; + + if (queue->evict_current == NULL) + return (true); + + /* The eviction server only considers half of the candidates. */ + candidates = queue->evict_candidates; + if (server_check && candidates > 1) + candidates /= 2; + used = (uint32_t)(queue->evict_current - queue->evict_queue); + return (used >= candidates); } /* @@ -442,7 +454,7 @@ __evict_update_work(WT_SESSION_IMPL *session) if (!F_ISSET(conn, WT_CONN_EVICTION_RUN)) return (false); - if (!__evict_queue_empty(cache->evict_urgent_queue)) + if (!__evict_queue_empty(cache->evict_urgent_queue, false)) F_SET(cache, WT_CACHE_EVICT_URGENT); /* @@ -588,7 +600,7 @@ __evict_pass(WT_SESSION_IMPL *session) */ if (cache->evict_empty_score < WT_EVICT_SCORE_CUTOFF || (!WT_EVICT_HAS_WORKERS(session) && - !__evict_queue_empty(cache->evict_urgent_queue))) + !__evict_queue_empty(cache->evict_urgent_queue, false))) WT_RET(__evict_lru_pages(session, true)); if (cache->pass_intr != 0) @@ -924,7 +936,7 @@ __evict_lru_walk(WT_SESSION_IMPL *session) * If the queue we are filling is empty, pages are being requested * faster than they are being queued. */ - if (__evict_queue_empty(queue)) { + if (__evict_queue_empty(queue, false)) { if (F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD | WT_CACHE_EVICT_DIRTY_HARD)) { cache->evict_empty_score = WT_MIN( @@ -1533,7 +1545,7 @@ __evict_get_ref( WT_CACHE *cache; WT_DECL_RET; WT_EVICT_ENTRY *evict; - WT_EVICT_QUEUE *other_queue, *queue, *urgent_queue; + WT_EVICT_QUEUE *queue, *other_queue, *urgent_queue; uint32_t candidates; bool is_app, urgent_ok; @@ -1549,9 +1561,9 @@ __evict_get_ref( WT_STAT_CONN_INCR(session, cache_eviction_get_ref); /* Avoid the LRU lock if no pages are available. */ - if (__evict_queue_empty(cache->evict_current_queue) && - __evict_queue_empty(cache->evict_other_queue) && - __evict_queue_empty(urgent_queue)) { + if (__evict_queue_empty(cache->evict_current_queue, is_server) && + __evict_queue_empty(cache->evict_other_queue, is_server) && + (!urgent_ok || __evict_queue_empty(urgent_queue, false))) { WT_STAT_CONN_INCR(session, cache_eviction_get_ref_empty); return (WT_NOTFOUND); } @@ -1565,11 +1577,11 @@ __evict_get_ref( * Such cases are extremely rare in real applications. */ if (is_server && - (cache->evict_empty_score > WT_EVICT_SCORE_CUTOFF || - __evict_queue_empty(cache->evict_fill_queue))) { + (cache->evict_empty_score > WT_EVICT_SCORE_CUTOFF || + __evict_queue_empty(cache->evict_fill_queue, false))) { do { if ((!urgent_ok || - __evict_queue_empty(urgent_queue)) && + __evict_queue_empty(urgent_queue, false)) && !__evict_queue_full(cache->evict_fill_queue)) return (WT_NOTFOUND); } while ((ret = __wt_spin_trylock( @@ -1579,40 +1591,37 @@ __evict_get_ref( } else __wt_spin_lock(session, &cache->evict_queue_lock); - /* - * Check if the current queue needs to change. - * The current queue could have changed while we waited for the lock. - */ - queue = cache->evict_current_queue; - other_queue = cache->evict_other_queue; - if (__evict_queue_empty(queue) && !__evict_queue_empty(other_queue)) { - cache->evict_current_queue = other_queue; - cache->evict_other_queue = queue; - } - /* Check the urgent queue first. */ - queue = urgent_ok && !__evict_queue_empty(urgent_queue) ? - urgent_queue : cache->evict_current_queue; + if (urgent_ok && !__evict_queue_empty(urgent_queue, false)) + queue = urgent_queue; + else { + /* + * Check if the current queue needs to change. + * The current queue could have changed while we waited for + * the lock. + * + * The server will only evict half of the pages before looking + * for more. The remainder are left to eviction workers (if any + * configured), or application threads if necessary. + */ + queue = cache->evict_current_queue; + other_queue = cache->evict_other_queue; + if (__evict_queue_empty(queue, is_server) && + !__evict_queue_empty(other_queue, is_server)) { + cache->evict_current_queue = other_queue; + cache->evict_other_queue = queue; + } + } __wt_spin_unlock(session, &cache->evict_queue_lock); /* - * Only evict half of the pages before looking for more. The remainder - * are left to eviction workers (if configured), or application threads - * if necessary. - */ - candidates = queue->evict_candidates; - if (is_server && queue != urgent_queue && candidates > 1) - candidates /= 2; - - /* * We got the queue lock, which should be fast, and chose a queue. * Now we want to get the lock on the individual queue. */ for (;;) { /* Verify there are still pages available. */ - if (__evict_queue_empty(queue) || (uint32_t) - (queue->evict_current - queue->evict_queue) >= candidates) { + if (__evict_queue_empty(queue, is_server)) { WT_STAT_CONN_INCR( session, cache_eviction_get_ref_empty2); return (WT_NOTFOUND); @@ -1624,6 +1633,15 @@ __evict_get_ref( break; } + /* + * Only evict half of the pages before looking for more. The remainder + * are left to eviction workers (if configured), or application thread + * if necessary. + */ + candidates = queue->evict_candidates; + if (is_server && queue != urgent_queue && candidates > 1) + candidates /= 2; + /* Get the next page queued for eviction. */ for (evict = queue->evict_current; evict >= queue->evict_queue && @@ -1679,8 +1697,8 @@ __evict_get_ref( } /* Move to the next item. */ - if (evict != NULL && evict + 1 < - queue->evict_queue + queue->evict_candidates) + if (evict != NULL && + evict + 1 < queue->evict_queue + queue->evict_candidates) queue->evict_current = evict + 1; else /* Clear the current pointer if there are no more candidates. */ queue->evict_current = NULL; @@ -1873,7 +1891,7 @@ __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref) goto done; __wt_spin_lock(session, &urgent_queue->evict_lock); - if (__evict_queue_empty(urgent_queue)) { + if (__evict_queue_empty(urgent_queue, false)) { urgent_queue->evict_current = urgent_queue->evict_queue; urgent_queue->evict_candidates = 0; } |