summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordormando <dormando@rydia.net>2015-09-29 02:48:02 -0700
committerdormando <dormando@rydia.net>2015-11-18 23:14:35 -0800
commitd6e96467051720197abf9eab8ca85d153ae06610 (patch)
tree85ec27c356f2ef6fac98d30670e78ae16d84aa8d
parentd5185f9c25e346417d0de1c8d704d945d76ea474 (diff)
downloadmemcached-d6e96467051720197abf9eab8ca85d153ae06610.tar.gz
first half of new slab automover
If any slab classes have more than two pages worth of free chunks, attempt to free one page back to a global pool. Create new concept of a slab page move destination of "0", which is a global page pool. Pages can be re-assigned out of that pool during allocation. Combined with item rescuing from the previous patch, we can safely shuffle pages back to the reassignment pool as chunks free up naturally. This should be a safe default going forward. Users should be able to decide to free or move pages based on eviction pressure as well. This is coming up in another commit. This also fixes a calculation of the NOEXP LRU size, and completely removes the old slab automover thread. Slab automove decisions will now be part of the lru maintainer thread.
-rw-r--r--items.c28
-rw-r--r--items.h1
-rw-r--r--memcached.h1
-rw-r--r--slabs.c140
-rw-r--r--slabs.h2
-rw-r--r--t/slabs-reassign2.t26
6 files changed, 73 insertions, 125 deletions
diff --git a/items.c b/items.c
index 950ba60..98bebb0 100644
--- a/items.c
+++ b/items.c
@@ -113,6 +113,7 @@ int item_is_flushed(item *it) {
static unsigned int noexp_lru_size(int slabs_clsid) {
int id = CLEAR_LRU(slabs_clsid);
+ id |= NOEXP_LRU;
unsigned int ret;
pthread_mutex_lock(&lru_locks[id]);
ret = sizes[id];
@@ -478,20 +479,6 @@ char *item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, u
return buffer;
}
-void item_stats_evictions(uint64_t *evicted) {
- int n;
- for (n = 0; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
- int i;
- int x;
- for (x = 0; x < 4; x++) {
- i = n | lru_type_map[x];
- pthread_mutex_lock(&lru_locks[i]);
- evicted[n] += itemstats[i].evicted;
- pthread_mutex_unlock(&lru_locks[i]);
- }
- }
-}
-
void item_stats_totals(ADD_STAT add_stats, void *c) {
itemstats_t totals;
memset(&totals, 0, sizeof(itemstats_t));
@@ -907,11 +894,22 @@ static int lru_maintainer_juggle(const int slabs_clsid) {
int did_moves = 0;
bool mem_limit_reached = false;
unsigned int total_chunks = 0;
+ unsigned int chunks_perslab = 0;
+ unsigned int chunks_free = 0;
/* TODO: if free_chunks below high watermark, increase aggressiveness */
- slabs_available_chunks(slabs_clsid, &mem_limit_reached, &total_chunks);
+ chunks_free = slabs_available_chunks(slabs_clsid, &mem_limit_reached,
+ &total_chunks, &chunks_perslab);
if (settings.expirezero_does_not_evict)
total_chunks -= noexp_lru_size(slabs_clsid);
+ /* If slab automove is enabled on any level, and we have more than 2 pages
+ * worth of chunks free in this class, ask (gently) to reassign a page
+ * from this class back into the global pool (0)
+ */
+ if (settings.slab_automove > 0 && chunks_free > (chunks_perslab * 2)) {
+ slabs_reassign(slabs_clsid, SLAB_GLOBAL_PAGE_POOL);
+ }
+
/* Juggle HOT/WARM up to N times */
for (i = 0; i < 1000; i++) {
int do_more = 0;
diff --git a/items.h b/items.h
index f47de8f..4e492b4 100644
--- a/items.h
+++ b/items.h
@@ -27,7 +27,6 @@ item *do_item_get(const char *key, const size_t nkey, const uint32_t hv);
item *do_item_touch(const char *key, const size_t nkey, uint32_t exptime, const uint32_t hv);
void item_stats_reset(void);
extern pthread_mutex_t lru_locks[POWER_LARGEST];
-void item_stats_evictions(uint64_t *evicted);
enum crawler_result_type {
CRAWLER_OK=0, CRAWLER_RUNNING, CRAWLER_BADCLASS, CRAWLER_NOTSTARTED
diff --git a/memcached.h b/memcached.h
index 05eeb04..c5c348a 100644
--- a/memcached.h
+++ b/memcached.h
@@ -78,6 +78,7 @@
/* Slab sizing definitions. */
#define POWER_SMALLEST 1
#define POWER_LARGEST 256 /* actual cap is 255 */
+#define SLAB_GLOBAL_PAGE_POOL 0 /* magic slab class for storing pages for reassignment */
#define CHUNK_ALIGN_BYTES 8
/* slab class max is a 6-bit number, -1. */
#define MAX_NUMBER_OF_SLAB_CLASSES (63 + 1)
diff --git a/slabs.c b/slabs.c
index 3518098..1c88376 100644
--- a/slabs.c
+++ b/slabs.c
@@ -194,20 +194,34 @@ static void split_slab_page_into_freelist(char *ptr, const unsigned int id) {
}
}
+/* Fast FIFO queue */
+static void *get_page_from_global_pool(void) {
+ slabclass_t *p = &slabclass[SLAB_GLOBAL_PAGE_POOL];
+ if (p->slabs < 1) {
+ return NULL;
+ }
+ char *ret = p->slab_list[p->slabs - 1];
+ p->slabs--;
+ return ret;
+}
+
static int do_slabs_newslab(const unsigned int id) {
slabclass_t *p = &slabclass[id];
+ slabclass_t *g = &slabclass[SLAB_GLOBAL_PAGE_POOL];
int len = settings.slab_reassign ? settings.item_size_max
: p->size * p->perslab;
char *ptr;
- if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0)) {
+ if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0
+ && g->slabs == 0)) {
mem_limit_reached = true;
MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
return 0;
}
if ((grow_slab_list(id) == 0) ||
- ((ptr = memory_allocate((size_t)len)) == 0)) {
+ (((ptr = get_page_from_global_pool()) == NULL) &&
+ ((ptr = memory_allocate((size_t)len)) == 0))) {
MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
return 0;
@@ -307,6 +321,11 @@ bool get_stats(const char *stat_type, int nkey, ADD_STAT add_stats, void *c) {
APPEND_STAT("curr_items", "%u", stats.curr_items);
APPEND_STAT("total_items", "%u", stats.total_items);
STATS_UNLOCK();
+ if (settings.slab_automove > 0) {
+ pthread_mutex_lock(&slabs_lock);
+ APPEND_STAT("slab_global_page_pool", "%u", slabclass[SLAB_GLOBAL_PAGE_POOL].slabs);
+ pthread_mutex_unlock(&slabs_lock);
+ }
item_stats_totals(add_stats, c);
} else if (nz_strcmp(nkey, stat_type, "items") == 0) {
item_stats(add_stats, c);
@@ -446,7 +465,7 @@ void slabs_adjust_mem_requested(unsigned int id, size_t old, size_t ntotal)
}
unsigned int slabs_available_chunks(const unsigned int id, bool *mem_flag,
- unsigned int *total_chunks) {
+ unsigned int *total_chunks, unsigned int *chunks_perslab) {
unsigned int ret;
slabclass_t *p;
@@ -457,6 +476,8 @@ unsigned int slabs_available_chunks(const unsigned int id, bool *mem_flag,
*mem_flag = mem_limit_reached;
if (total_chunks != NULL)
*total_chunks = p->slabs * p->perslab;
+ if (chunks_perslab != NULL)
+ *chunks_perslab = p->perslab;
pthread_mutex_unlock(&slabs_lock);
return ret;
}
@@ -476,7 +497,7 @@ static int slab_rebalance_start(void) {
if (slab_rebal.s_clsid < POWER_SMALLEST ||
slab_rebal.s_clsid > power_largest ||
- slab_rebal.d_clsid < POWER_SMALLEST ||
+ slab_rebal.d_clsid < SLAB_GLOBAL_PAGE_POOL ||
slab_rebal.d_clsid > power_largest ||
slab_rebal.s_clsid == slab_rebal.d_clsid)
no_go = -2;
@@ -720,7 +741,7 @@ static void slab_rebalance_finish(void) {
pthread_mutex_lock(&slabs_lock);
s_cls = &slabclass[slab_rebal.s_clsid];
- d_cls = &slabclass[slab_rebal.d_clsid];
+ d_cls = &slabclass[slab_rebal.d_clsid];
/* At this point the stolen slab is completely clear.
* We always kill the "first"/"oldest" slab page in the slab_list, so
@@ -734,8 +755,11 @@ static void slab_rebalance_finish(void) {
memset(slab_rebal.slab_start, 0, (size_t)settings.item_size_max);
d_cls->slab_list[d_cls->slabs++] = slab_rebal.slab_start;
- split_slab_page_into_freelist(slab_rebal.slab_start,
- slab_rebal.d_clsid);
+ /* Don't need to split the page into chunks if we're just storing it */
+ if (slab_rebal.d_clsid > SLAB_GLOBAL_PAGE_POOL) {
+ split_slab_page_into_freelist(slab_rebal.slab_start,
+ slab_rebal.d_clsid);
+ }
slab_rebal.done = 0;
slab_rebal.s_clsid = 0;
@@ -758,97 +782,6 @@ static void slab_rebalance_finish(void) {
}
}
-/* Return 1 means a decision was reached.
- * Move to its own thread (created/destroyed as needed) once automover is more
- * complex.
- */
-static int slab_automove_decision(int *src, int *dst) {
- static uint64_t evicted_old[MAX_NUMBER_OF_SLAB_CLASSES];
- static unsigned int slab_zeroes[MAX_NUMBER_OF_SLAB_CLASSES];
- static unsigned int slab_winner = 0;
- static unsigned int slab_wins = 0;
- uint64_t evicted_new[MAX_NUMBER_OF_SLAB_CLASSES];
- uint64_t evicted_diff = 0;
- uint64_t evicted_max = 0;
- unsigned int highest_slab = 0;
- unsigned int total_pages[MAX_NUMBER_OF_SLAB_CLASSES];
- int i;
- int source = 0;
- int dest = 0;
- static rel_time_t next_run;
-
- /* Run less frequently than the slabmove tester. */
- if (current_time >= next_run) {
- next_run = current_time + 10;
- } else {
- return 0;
- }
-
- item_stats_evictions(evicted_new);
- pthread_mutex_lock(&slabs_lock);
- for (i = POWER_SMALLEST; i < power_largest; i++) {
- total_pages[i] = slabclass[i].slabs;
- }
- pthread_mutex_unlock(&slabs_lock);
-
- /* Find a candidate source; something with zero evicts 3+ times */
- for (i = POWER_SMALLEST; i < power_largest; i++) {
- evicted_diff = evicted_new[i] - evicted_old[i];
- if (evicted_diff == 0 && total_pages[i] > 2) {
- slab_zeroes[i]++;
- if (source == 0 && slab_zeroes[i] >= 3)
- source = i;
- } else {
- slab_zeroes[i] = 0;
- if (evicted_diff > evicted_max) {
- evicted_max = evicted_diff;
- highest_slab = i;
- }
- }
- evicted_old[i] = evicted_new[i];
- }
-
- /* Pick a valid destination */
- if (slab_winner != 0 && slab_winner == highest_slab) {
- slab_wins++;
- if (slab_wins >= 3)
- dest = slab_winner;
- } else {
- slab_wins = 1;
- slab_winner = highest_slab;
- }
-
- if (source && dest) {
- *src = source;
- *dst = dest;
- return 1;
- }
- return 0;
-}
-
-/* Slab rebalancer thread.
- * Does not use spinlocks since it is not timing sensitive. Burn less CPU and
- * go to sleep if locks are contended
- */
-static void *slab_maintenance_thread(void *arg) {
- int src, dest;
-
- while (do_run_slab_thread) {
- if (settings.slab_automove == 1) {
- if (slab_automove_decision(&src, &dest) == 1) {
- /* Blind to the return codes. It will retry on its own */
- slabs_reassign(src, dest);
- }
- sleep(1);
- } else {
- /* Don't wake as often if we're not enabled.
- * This is lazier than setting up a condition right now. */
- sleep(5);
- }
- }
- return NULL;
-}
-
/* Slab mover thread.
* Sits waiting for a condition to jump off and shovel some memory about
*/
@@ -918,8 +851,8 @@ static enum reassign_result_type do_slabs_reassign(int src, int dst) {
/* TODO: If we end up back at -1, return a new error type */
}
- if (src < POWER_SMALLEST || src > power_largest ||
- dst < POWER_SMALLEST || dst > power_largest)
+ if (src < POWER_SMALLEST || src > power_largest ||
+ dst < SLAB_GLOBAL_PAGE_POOL || dst > power_largest)
return REASSIGN_BADCLASS;
if (slabclass[src].slabs < 2)
@@ -953,7 +886,6 @@ void slabs_rebalancer_resume(void) {
pthread_mutex_unlock(&slabs_rebalance_lock);
}
-static pthread_t maintenance_tid;
static pthread_t rebalance_tid;
int start_slab_maintenance_thread(void) {
@@ -974,11 +906,6 @@ int start_slab_maintenance_thread(void) {
}
pthread_mutex_init(&slabs_rebalance_lock, NULL);
- if ((ret = pthread_create(&maintenance_tid, NULL,
- slab_maintenance_thread, NULL)) != 0) {
- fprintf(stderr, "Can't create slab maint thread: %s\n", strerror(ret));
- return -1;
- }
if ((ret = pthread_create(&rebalance_tid, NULL,
slab_rebalance_thread, NULL)) != 0) {
fprintf(stderr, "Can't create rebal thread: %s\n", strerror(ret));
@@ -997,6 +924,5 @@ void stop_slab_maintenance_thread(void) {
pthread_mutex_unlock(&slabs_rebalance_lock);
/* Wait for the maintenance thread to stop */
- pthread_join(maintenance_tid, NULL);
pthread_join(rebalance_tid, NULL);
}
diff --git a/slabs.h b/slabs.h
index 1eac5c8..fb29cfa 100644
--- a/slabs.h
+++ b/slabs.h
@@ -34,7 +34,7 @@ bool get_stats(const char *stat_type, int nkey, ADD_STAT add_stats, void *c);
void slabs_stats(ADD_STAT add_stats, void *c);
/* Hints as to freespace in slab class */
-unsigned int slabs_available_chunks(unsigned int id, bool *mem_flag, unsigned int *total_chunks);
+unsigned int slabs_available_chunks(unsigned int id, bool *mem_flag, unsigned int *total_chunks, unsigned int *chunks_perslab);
int start_slab_maintenance_thread(void);
void stop_slab_maintenance_thread(void);
diff --git a/t/slabs-reassign2.t b/t/slabs-reassign2.t
index 8de4a05..9135170 100644
--- a/t/slabs-reassign2.t
+++ b/t/slabs-reassign2.t
@@ -2,7 +2,7 @@
use strict;
use warnings;
-use Test::More tests => 5;
+use Test::More tests => 9;
use FindBin qw($Bin);
use lib "$Bin/lib";
use MemcachedTest;
@@ -62,3 +62,27 @@ cmp_ok($hits, '>', 4000, 'were able to fetch back 2/3rds of 8k keys');
my $stats_done = mem_stats($sock);
cmp_ok($stats_done->{slab_reassign_rescues}, '>', 0, 'some reassign rescues happened');
cmp_ok($stats_done->{slab_reassign_evictions}, '>', 0, 'some reassing evictions happened');
+
+print $sock "flush_all\r\n";
+is(scalar <$sock>, "OK\r\n", "did flush_all");
+my $tries;
+for ($tries = 20; $tries > 0; $tries--) {
+ sleep 1;
+ my $stats = mem_stats($sock);
+ if ($stats->{slab_global_page_pool} == 61) {
+ last;
+ }
+}
+cmp_ok($tries, '>', 0, 'reclaimed 61 pages before timeout');
+
+# Set into an entirely new class. Overload a bit to try to cause problems.
+$value = "B"x4096;
+for (1 .. $keycount * 4) {
+ print $sock "set jfoo$_ 0 0 4096 noreply\r\n$value\r\n";
+}
+
+{
+ my $stats = mem_stats($sock);
+ is($stats->{curr_items}, 14490, "stored 14490 4k items");
+ is($stats->{slab_global_page_pool}, 0, "drained the global page pool");
+}