summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--items.c280
-rw-r--r--items.h2
-rw-r--r--memcached.c4
-rw-r--r--slabs.c12
-rw-r--r--slabs.h2
-rwxr-xr-xt/binary.t1
6 files changed, 82 insertions, 219 deletions
diff --git a/items.c b/items.c
index d9920d2..689f571 100644
--- a/items.c
+++ b/items.c
@@ -37,6 +37,7 @@ typedef struct {
uint64_t moves_to_cold;
uint64_t moves_to_warm;
uint64_t moves_within_lru;
+ uint64_t direct_reclaims;
rel_time_t evicted_time;
} itemstats_t;
@@ -55,7 +56,6 @@ static int lru_maintainer_check_clsid = 0;
static pthread_mutex_t lru_crawler_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t lru_crawler_cond = PTHREAD_COND_INITIALIZER;
static pthread_mutex_t lru_maintainer_lock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t lru_maintainer_cond = PTHREAD_COND_INITIALIZER;
static pthread_mutex_t cas_id_lock = PTHREAD_MUTEX_INITIALIZER;
void item_stats_reset(void) {
@@ -67,6 +67,8 @@ void item_stats_reset(void) {
}
}
+static int lru_pull_tail(const int orig_id, const int cur_lru,
+ const unsigned int total_chunks, const bool do_evict, const uint32_t cur_hv);
/* Get the next CAS id for a new item. */
/* TODO: refactor some atomics for this. */
@@ -124,9 +126,11 @@ static size_t item_make_header(const uint8_t nkey, const int flags, const int nb
item *do_item_alloc(char *key, const size_t nkey, const int flags,
const rel_time_t exptime, const int nbytes,
const uint32_t cur_hv) {
+ int i;
uint8_t nsuffix;
item *it = NULL;
char suffix[40];
+ unsigned int total_chunks;
size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
if (settings.use_cas) {
ntotal += sizeof(uint64_t);
@@ -136,7 +140,28 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags,
if (id == 0)
return 0;
- it = slabs_alloc(ntotal, id);
+ /* If no memory is available, attempt a direct LRU juggle/eviction */
+ /* This is a race sin order to simplify lru_pull_tail; in cases where
+ * locked items are on the tail, you want them to fall out and cause
+ * occasional OOM's, rather than internally work around them.
+ * This also gives one fewer code path for a slab alloc/free
+ */
+ for (i = 0; i < 5; i++) {
+ it = slabs_alloc(ntotal, id, &total_chunks);
+ if (it == NULL) {
+ lru_pull_tail(id, HOT_LRU, total_chunks, false, cur_hv);
+ lru_pull_tail(id, WARM_LRU, total_chunks, false, cur_hv);
+ lru_pull_tail(id, COLD_LRU, total_chunks, true, cur_hv);
+ } else {
+ break;
+ }
+ }
+
+ if (i > 0) {
+ pthread_mutex_lock(&lru_locks[id]);
+ itemstats[id].direct_reclaims += i;
+ pthread_mutex_unlock(&lru_locks[id]);
+ }
if (it == NULL) {
pthread_mutex_lock(&lru_locks[id]);
@@ -167,156 +192,6 @@ item *do_item_alloc(char *key, const size_t nkey, const int flags,
return it;
}
-/*@null@*/
-item *do_item_alloc_old(char *key, const size_t nkey, const int flags,
- const rel_time_t exptime, const int nbytes,
- const uint32_t cur_hv) {
- uint8_t nsuffix;
- item *it = NULL;
- char suffix[40];
- size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
- if (settings.use_cas) {
- ntotal += sizeof(uint64_t);
- }
-
- unsigned int id = slabs_clsid(ntotal);
- if (id == 0)
- return 0;
-
- mutex_lock(&cache_lock);
- /* do a quick check if we have any expired items in the tail.. */
- int tries = 5;
- /* Avoid hangs if a slab has nothing but refcounted stuff in it. */
- int tries_lrutail_reflocked = 1000;
- int tried_alloc = 0;
- item *search;
- item *next_it;
- void *hold_lock = NULL;
-
- search = tails[id];
- /* We walk up *only* for locked items. Never searching for expired.
- * Waste of CPU for almost all deployments */
- for (; tries > 0 && search != NULL; tries--, search=next_it) {
- /* we might relink search mid-loop, so search->prev isn't reliable */
- next_it = search->prev;
- if (search->nbytes == 0 && search->nkey == 0 && search->it_flags == 1) {
- /* We are a crawler, ignore it. */
- tries++;
- continue;
- }
- uint32_t hv = hash(ITEM_key(search), search->nkey);
- /* Attempt to hash item lock the "search" item. If locked, no
- * other callers can incr the refcount
- */
- /* Don't accidentally grab ourselves, or bail if we can't quicklock */
- if (hv == cur_hv || (hold_lock = item_trylock(hv)) == NULL)
- continue;
- /* Now see if the item is refcount locked */
- if (refcount_incr(&search->refcount) != 2) {
- /* Avoid pathological case with ref'ed items in tail */
- do_item_update_nolock(search);
- tries_lrutail_reflocked--;
- tries++;
- refcount_decr(&search->refcount);
- itemstats[id].lrutail_reflocked++;
- /* Old rare bug could cause a refcount leak. We haven't seen
- * it in years, but we leave this code in to prevent failures
- * just in case */
- if (settings.tail_repair_time &&
- search->time + settings.tail_repair_time < current_time) {
- itemstats[id].tailrepairs++;
- search->refcount = 1;
- do_item_unlink_nolock(search, hv);
- }
- if (hold_lock)
- item_trylock_unlock(hold_lock);
-
- if (tries_lrutail_reflocked < 1)
- break;
-
- continue;
- }
-
- /* Expired or flushed */
- if ((search->exptime != 0 && search->exptime < current_time)
- || is_flushed(search)) {
- itemstats[id].reclaimed++;
- if ((search->it_flags & ITEM_FETCHED) == 0) {
- itemstats[id].expired_unfetched++;
- }
- it = search;
- slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
- do_item_unlink_nolock(it, hv);
- /* Initialize the item block: */
- it->slabs_clsid = 0;
- } else if ((it = slabs_alloc(ntotal, id)) == NULL) {
- tried_alloc = 1;
- if (settings.evict_to_free == 0) {
- itemstats[id].outofmemory++;
- } else {
- itemstats[id].evicted++;
- itemstats[id].evicted_time = current_time - search->time;
- if (search->exptime != 0)
- itemstats[id].evicted_nonzero++;
- if ((search->it_flags & ITEM_FETCHED) == 0) {
- itemstats[id].evicted_unfetched++;
- }
- it = search;
- slabs_adjust_mem_requested(it->slabs_clsid, ITEM_ntotal(it), ntotal);
- do_item_unlink_nolock(it, hv);
- /* Initialize the item block: */
- it->slabs_clsid = 0;
-
- /* If we've just evicted an item, and the automover is set to
- * angry bird mode, attempt to rip memory into this slab class.
- * TODO: Move valid object detection into a function, and on a
- * "successful" memory pull, look behind and see if the next alloc
- * would be an eviction. Then kick off the slab mover before the
- * eviction happens.
- */
- if (settings.slab_automove == 2)
- slabs_reassign(-1, id);
- }
- }
-
- refcount_decr(&search->refcount);
- /* If hash values were equal, we don't grab a second lock */
- if (hold_lock)
- item_trylock_unlock(hold_lock);
- break;
- }
-
- if (!tried_alloc && (tries == 0 || search == NULL))
- it = slabs_alloc(ntotal, id);
-
- if (it == NULL) {
- itemstats[id].outofmemory++;
- mutex_unlock(&cache_lock);
- return NULL;
- }
-
- assert(it->slabs_clsid == 0);
- assert(it != heads[id]);
-
- /* Item initialization can happen outside of the lock; the item's already
- * been removed from the slab LRU.
- */
- it->refcount = 1; /* the caller will have a reference */
- mutex_unlock(&cache_lock);
- it->next = it->prev = it->h_next = 0;
- it->slabs_clsid = id;
-
- DEBUG_REFCNT(it, '*');
- it->it_flags = settings.use_cas ? ITEM_CAS : 0;
- it->nkey = nkey;
- it->nbytes = nbytes;
- memcpy(ITEM_key(it), key, nkey);
- it->exptime = exptime;
- memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
- it->nsuffix = nsuffix;
- return it;
-}
-
void item_free(item *it) {
size_t ntotal = ITEM_ntotal(it);
unsigned int clsid;
@@ -477,25 +352,23 @@ void do_item_update_nolock(item *it) {
}
}
-/* This is a no-op with the new item allocator. */
+/* Currently only bumps the accessed time. When compat mode is implemented it
+ * can also bump the LRU
+ */
void do_item_update(item *it) {
- return;
-}
-
-/*void do_item_update(item *it) {
MEMCACHED_ITEM_UPDATE(ITEM_key(it), it->nkey, it->nbytes);
if (it->time < current_time - ITEM_UPDATE_INTERVAL) {
assert((it->it_flags & ITEM_SLABBED) == 0);
if ((it->it_flags & ITEM_LINKED) != 0) {
- mutex_lock(&cache_lock);
- item_unlink_q(it);
it->time = current_time;
+ /*
+ item_unlink_q(it);
item_link_q(it);
- mutex_unlock(&cache_lock);
+ */
}
}
-}*/
+}
int do_item_replace(item *it, item *new_it, const uint32_t hv) {
MEMCACHED_ITEM_REPLACE(ITEM_key(it), it->nkey, it->nbytes,
@@ -585,6 +458,7 @@ void item_stats_totals(ADD_STAT add_stats, void *c) {
totals.moves_to_cold += itemstats[i].moves_to_cold;
totals.moves_to_warm += itemstats[i].moves_to_warm;
totals.moves_within_lru += itemstats[i].moves_within_lru;
+ totals.direct_reclaims += itemstats[i].direct_reclaims;
pthread_mutex_unlock(&lru_locks[i]);
}
APPEND_STAT("expired_unfetched", "%llu",
@@ -605,6 +479,8 @@ void item_stats_totals(ADD_STAT add_stats, void *c) {
(unsigned long long)totals.moves_to_warm);
APPEND_STAT("moves_within_lru", "%llu",
(unsigned long long)totals.moves_within_lru);
+ APPEND_STAT("direct_reclaims", "%llu",
+ (unsigned long long)totals.direct_reclaims);
}
void item_stats(ADD_STAT add_stats, void *c) {
@@ -648,6 +524,8 @@ void item_stats(ADD_STAT add_stats, void *c) {
"%llu", (unsigned long long)itemstats[i].moves_to_warm);
APPEND_NUM_FMT_STAT(fmt, i, "moves_within_lru",
"%llu", (unsigned long long)itemstats[i].moves_within_lru);
+ APPEND_NUM_FMT_STAT(fmt, i, "direct_reclaims",
+ "%llu", (unsigned long long)itemstats[i].direct_reclaims);
}
pthread_mutex_unlock(&lru_locks[i]);
}
@@ -839,9 +717,9 @@ static int lru_pull_tail(const int orig_id, const int cur_lru,
itemstats[id].expired_unfetched++;
}
/* refcnt 2 -> 1 */
- do_item_unlink_nolock(it, hv);
+ do_item_unlink_nolock(search, hv);
/* refcnt 1 -> 0 -> item_free */
- do_item_remove(it);
+ do_item_remove(search);
item_trylock_unlock(hold_lock);
removed++;
@@ -891,14 +769,14 @@ static int lru_pull_tail(const int orig_id, const int cur_lru,
if ((search->it_flags & ITEM_FETCHED) == 0) {
itemstats[id].evicted_unfetched++;
}
- do_item_unlink_nolock(it, hv);
+ do_item_unlink_nolock(search, hv);
removed++;
} else if ((search->it_flags & ITEM_ACTIVE) != 0) {
itemstats[id].moves_to_warm++;
search->it_flags &= ~ITEM_ACTIVE;
move_to_lru = WARM_LRU;
do_item_unlink_q(search);
- /* Don't tick removed, since we haven't freed anything */
+ removed++;
}
break;
}
@@ -927,63 +805,70 @@ static int lru_pull_tail(const int orig_id, const int cur_lru,
* If too many items are in COLD_LRU, poke COLD_LRU tail
* If too many items are in COLD_LRU and are below low watermark, start evicting
*/
-static void lru_maintainer_juggle(const int slabs_clsid) {
+static int lru_maintainer_juggle(const int slabs_clsid) {
int i;
- bool mem_limit_reached = false;
+ int did_moves = 0;
+ //bool mem_limit_reached = false;
unsigned int total_chunks = 0;
- unsigned int free_chunks = slabs_available_chunks(slabs_clsid,
- &mem_limit_reached, &total_chunks);
+ /* FIXME: if free_chunks below high watermark, increase aggressiveness */
+/* unsigned int free_chunks = slabs_available_chunks(slabs_clsid,
+ &mem_limit_reached, &total_chunks);*/
STATS_LOCK();
stats.lru_maintainer_juggles++;
STATS_UNLOCK();
/* Juggle HOT/WARM up to N times */
- for (i = 0; i < 50; i++) {
+ for (i = 0; i < 500; i++) {
int do_more = 0;
- int removed = 0;
if (lru_pull_tail(slabs_clsid, HOT_LRU, total_chunks, false, 0) &&
lru_pull_tail(slabs_clsid, WARM_LRU, total_chunks, false, 0)) {
do_more++;
}
- /* FIXME: evictions under low watermark is hardcoded */
- removed = lru_pull_tail(slabs_clsid, COLD_LRU, total_chunks,
- (free_chunks < 50 && mem_limit_reached) ? true : false, 0);
- if (removed != 0)
- do_more++;
- free_chunks += removed;
+ do_more += lru_pull_tail(slabs_clsid, COLD_LRU, total_chunks, false, 0);
if (do_more == 0)
break;
+ did_moves++;
}
+ return did_moves;
}
static pthread_t lru_maintainer_tid;
+#define MAX_LRU_MAINTAINER_SLEEP 1000000
+#define MIN_LRU_MAINTAINER_SLEEP 1000
+
static void *lru_maintainer_thread(void *arg) {
int i;
+ useconds_t to_sleep = MIN_LRU_MAINTAINER_SLEEP;
pthread_mutex_lock(&lru_maintainer_lock);
if (settings.verbose > 2)
fprintf(stderr, "Starting LRU maintainer background thread\n");
while (do_run_lru_maintainer_thread) {
- struct timespec ts;
- struct timeval tv;
- gettimeofday(&tv, NULL);
- /* FIXME: #define or optionize the sleep */
- ts.tv_sec = tv.tv_sec + 1;
- pthread_cond_timedwait(&lru_maintainer_cond, &lru_maintainer_lock, &ts);
-
- if (verbose > 2)
+ int did_moves = 0;
+ pthread_mutex_unlock(&lru_maintainer_lock);
+ usleep(to_sleep);
+ pthread_mutex_lock(&lru_maintainer_lock);
+
+ if (settings.verbose > 2)
fprintf(stderr, "LRU maintainer thread running\n");
/* We were asked to immediately wake up and poke a particular slab
* class due to a low watermark being hit */
if (lru_maintainer_check_clsid != 0) {
- lru_maintainer_juggle(lru_maintainer_check_clsid);
+ did_moves = lru_maintainer_juggle(lru_maintainer_check_clsid);
lru_maintainer_check_clsid = 0;
} else {
for (i = POWER_SMALLEST; i < MAX_NUMBER_OF_SLAB_CLASSES; i++) {
- lru_maintainer_juggle(i);
+ did_moves += lru_maintainer_juggle(i);
}
}
+ if (did_moves == 0) {
+ if (to_sleep < MAX_LRU_MAINTAINER_SLEEP)
+ to_sleep += 1000;
+ } else {
+ /* FIXME: Slew rampdown? */
+ to_sleep = MIN_LRU_MAINTAINER_SLEEP;
+ }
if (settings.verbose > 2)
fprintf(stderr, "LRU maintainer thread sleeping\n");
@@ -997,8 +882,8 @@ static void *lru_maintainer_thread(void *arg) {
int stop_lru_maintainer_thread(void) {
int ret;
pthread_mutex_lock(&lru_maintainer_lock);
+ /* LRU thread is a sleep loop, will die on its own */
do_run_lru_maintainer_thread = 0;
- pthread_cond_signal(&lru_maintainer_cond);
pthread_mutex_unlock(&lru_maintainer_lock);
if ((ret = pthread_join(lru_maintainer_tid, NULL)) != 0) {
fprintf(stderr, "Failed to stop LRU maintainer thread: %s\n", strerror(ret));
@@ -1035,27 +920,8 @@ void lru_maintainer_resume(void) {
pthread_mutex_unlock(&lru_maintainer_lock);
}
-/* Attempt to wake the LRU maintenance thread early. Can specify which slab
- * class to immediately inspect for efficiency
- */
-void lru_maintainer_wake(int slabs_clsid) {
- if (pthread_mutex_trylock(&lru_maintainer_lock) != 0) {
- return;
- }
- if (settings.verbose > 2)
- fprintf(stderr, "Signalling LRU maintainer to wake up\n");
-
- lru_maintainer_check_clsid = slabs_clsid;
- pthread_cond_signal(&lru_maintainer_cond);
- pthread_mutex_unlock(&lru_maintainer_lock);
-}
-
int init_lru_maintainer(void) {
if (lru_maintainer_initialized == 0) {
- if (pthread_cond_init(&lru_maintainer_cond, NULL) != 0) {
- fprintf(stderr, "Can't initialize lru maintainer condition\n");
- return -1;
- }
pthread_mutex_init(&lru_maintainer_lock, NULL);
lru_maintainer_initialized = 1;
}
diff --git a/items.h b/items.h
index 3eccfa3..6ec7819 100644
--- a/items.h
+++ b/items.h
@@ -2,7 +2,6 @@
uint64_t get_cas_id(void);
/*@null@*/
-item *do_item_alloc_old(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes, const uint32_t cur_hv);
item *do_item_alloc(char *key, const size_t nkey, const int flags, const rel_time_t exptime, const int nbytes, const uint32_t cur_hv);
void item_free(item *it);
bool item_size_ok(const size_t nkey, const int flags, const int nbytes);
@@ -38,7 +37,6 @@ int stop_lru_maintainer_thread(void);
int init_lru_maintainer(void);
void lru_maintainer_pause(void);
void lru_maintainer_resume(void);
-void lru_maintainer_wake(int slabs_clsid);
int start_item_crawler_thread(void);
int stop_item_crawler_thread(void);
diff --git a/memcached.c b/memcached.c
index 654c953..ad7ffb4 100644
--- a/memcached.c
+++ b/memcached.c
@@ -180,6 +180,7 @@ static void stats_init(void) {
stats.hash_power_level = stats.hash_bytes = stats.hash_is_expanding = 0;
stats.expired_unfetched = stats.evicted_unfetched = 0;
stats.slabs_moved = 0;
+ stats.lru_maintainer_juggles = 0;
stats.accepting_conns = true; /* assuming we start in this state. */
stats.slab_reassign_running = false;
stats.lru_crawler_running = false;
@@ -2628,6 +2629,9 @@ static void server_stats(ADD_STAT add_stats, conn *c) {
if (settings.lru_crawler) {
APPEND_STAT("lru_crawler_running", "%u", stats.lru_crawler_running);
}
+ if (settings.lru_maintainer_thread) {
+ APPEND_STAT("lru_maintainer_juggles", "%llu", (unsigned long long)stats.lru_maintainer_juggles);
+ }
APPEND_STAT("malloc_fails", "%llu",
(unsigned long long)stats.malloc_fails);
STATS_UNLOCK();
diff --git a/slabs.c b/slabs.c
index d762af2..821d837 100644
--- a/slabs.c
+++ b/slabs.c
@@ -225,7 +225,7 @@ static int do_slabs_newslab(const unsigned int id) {
}
/*@null@*/
-static void *do_slabs_alloc(const size_t size, unsigned int id) {
+static void *do_slabs_alloc(const size_t size, unsigned int id, unsigned int *total_chunks) {
slabclass_t *p;
void *ret = NULL;
item *it = NULL;
@@ -238,6 +238,7 @@ static void *do_slabs_alloc(const size_t size, unsigned int id) {
p = &slabclass[id];
assert(p->sl_curr == 0 || ((item *)p->slots)->slabs_clsid == 0);
+ *total_chunks = p->slabs * p->perslab;
/* fail unless we have space at the end of a recently allocated page,
we have something on our freelist, or we could allocate a new page */
if (! (p->sl_curr != 0 || do_slabs_newslab(id) != 0)) {
@@ -259,11 +260,6 @@ static void *do_slabs_alloc(const size_t size, unsigned int id) {
MEMCACHED_SLABS_ALLOCATE_FAILED(size, id);
}
- /* FIXME: needs to be a per-slab watermark. */
- if (mem_limit_reached && p->sl_curr < 50) {
- lru_maintainer_wake(id);
- }
-
return ret;
}
@@ -410,11 +406,11 @@ static void *memory_allocate(size_t size) {
return ret;
}
-void *slabs_alloc(size_t size, unsigned int id) {
+void *slabs_alloc(size_t size, unsigned int id, unsigned int *total_chunks) {
void *ret;
pthread_mutex_lock(&slabs_lock);
- ret = do_slabs_alloc(size, id);
+ ret = do_slabs_alloc(size, id, total_chunks);
pthread_mutex_unlock(&slabs_lock);
return ret;
}
diff --git a/slabs.h b/slabs.h
index 012089d..1eac5c8 100644
--- a/slabs.h
+++ b/slabs.h
@@ -19,7 +19,7 @@ void slabs_init(const size_t limit, const double factor, const bool prealloc);
unsigned int slabs_clsid(const size_t size);
/** Allocate object of given length. 0 on error */ /*@null@*/
-void *slabs_alloc(const size_t size, unsigned int id);
+void *slabs_alloc(const size_t size, unsigned int id, unsigned int *total_chunks);
/** Free previously allocated object */
void slabs_free(void *ptr, size_t size, unsigned int id);
diff --git a/t/binary.t b/t/binary.t
index d3727b7..97137be 100755
--- a/t/binary.t
+++ b/t/binary.t
@@ -612,7 +612,6 @@ sub _handle_single_response {
my $found = length($rv);
die("Expected $remaining bytes, got $found");
}
-
if (defined $myopaque) {
Test::More::is($opaque, $myopaque, "Expected opaque");
} else {