summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--items.c28
-rw-r--r--items.h1
-rw-r--r--memcached.h1
-rw-r--r--slabs.c140
-rw-r--r--slabs.h2
-rw-r--r--t/slabs-reassign2.t26
6 files changed, 73 insertions, 125 deletions
diff --git a/items.c b/items.c
index 950ba60..98bebb0 100644
--- a/items.c
+++ b/items.c
@@ -113,6 +113,7 @@ int item_is_flushed(item *it) {
static unsigned int noexp_lru_size(int slabs_clsid) {
int id = CLEAR_LRU(slabs_clsid);
+ id |= NOEXP_LRU;
unsigned int ret;
pthread_mutex_lock(&lru_locks[id]);
ret = sizes[id];
@@ -478,20 +479,6 @@ char *item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, u
return buffer;
}
-void item_stats_evictions(uint64_t *evicted) {
- int n;
- for (n = 0; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
- int i;
- int x;
- for (x = 0; x < 4; x++) {
- i = n | lru_type_map[x];
- pthread_mutex_lock(&lru_locks[i]);
- evicted[n] += itemstats[i].evicted;
- pthread_mutex_unlock(&lru_locks[i]);
- }
- }
-}
-
void item_stats_totals(ADD_STAT add_stats, void *c) {
itemstats_t totals;
memset(&totals, 0, sizeof(itemstats_t));
@@ -907,11 +894,22 @@ static int lru_maintainer_juggle(const int slabs_clsid) {
int did_moves = 0;
bool mem_limit_reached = false;
unsigned int total_chunks = 0;
+ unsigned int chunks_perslab = 0;
+ unsigned int chunks_free = 0;
/* TODO: if free_chunks below high watermark, increase aggressiveness */
- slabs_available_chunks(slabs_clsid, &mem_limit_reached, &total_chunks);
+ chunks_free = slabs_available_chunks(slabs_clsid, &mem_limit_reached,
+ &total_chunks, &chunks_perslab);
if (settings.expirezero_does_not_evict)
total_chunks -= noexp_lru_size(slabs_clsid);
+ /* If slab automove is enabled on any level, and we have more than 2 pages
+ * worth of chunks free in this class, ask (gently) to reassign a page
+ * from this class back into the global pool (0)
+ */
+ if (settings.slab_automove > 0 && chunks_free > (chunks_perslab * 2)) {
+ slabs_reassign(slabs_clsid, SLAB_GLOBAL_PAGE_POOL);
+ }
+
/* Juggle HOT/WARM up to N times */
for (i = 0; i < 1000; i++) {
int do_more = 0;
diff --git a/items.h b/items.h
index f47de8f..4e492b4 100644
--- a/items.h
+++ b/items.h
@@ -27,7 +27,6 @@ item *do_item_get(const char *key, const size_t nkey, const uint32_t hv);
item *do_item_touch(const char *key, const size_t nkey, uint32_t exptime, const uint32_t hv);
void item_stats_reset(void);
extern pthread_mutex_t lru_locks[POWER_LARGEST];
-void item_stats_evictions(uint64_t *evicted);
enum crawler_result_type {
CRAWLER_OK=0, CRAWLER_RUNNING, CRAWLER_BADCLASS, CRAWLER_NOTSTARTED
diff --git a/memcached.h b/memcached.h
index 05eeb04..c5c348a 100644
--- a/memcached.h
+++ b/memcached.h
@@ -78,6 +78,7 @@
/* Slab sizing definitions. */
#define POWER_SMALLEST 1
#define POWER_LARGEST 256 /* actual cap is 255 */
+#define SLAB_GLOBAL_PAGE_POOL 0 /* magic slab class for storing pages for reassignment */
#define CHUNK_ALIGN_BYTES 8
/* slab class max is a 6-bit number, -1. */
#define MAX_NUMBER_OF_SLAB_CLASSES (63 + 1)
diff --git a/slabs.c b/slabs.c
index 3518098..1c88376 100644
--- a/slabs.c
+++ b/slabs.c
@@ -194,20 +194,34 @@ static void split_slab_page_into_freelist(char *ptr, const unsigned int id) {
}
}
+/* Fast FIFO queue */
+static void *get_page_from_global_pool(void) {
+ slabclass_t *p = &slabclass[SLAB_GLOBAL_PAGE_POOL];
+ if (p->slabs < 1) {
+ return NULL;
+ }
+ char *ret = p->slab_list[p->slabs - 1];
+ p->slabs--;
+ return ret;
+}
+
static int do_slabs_newslab(const unsigned int id) {
slabclass_t *p = &slabclass[id];
+ slabclass_t *g = &slabclass[SLAB_GLOBAL_PAGE_POOL];
int len = settings.slab_reassign ? settings.item_size_max
: p->size * p->perslab;
char *ptr;
- if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0)) {
+ if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0
+ && g->slabs == 0)) {
mem_limit_reached = true;
MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
return 0;
}
if ((grow_slab_list(id) == 0) ||
- ((ptr = memory_allocate((size_t)len)) == 0)) {
+ (((ptr = get_page_from_global_pool()) == NULL) &&
+ ((ptr = memory_allocate((size_t)len)) == 0))) {
MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
return 0;
@@ -307,6 +321,11 @@ bool get_stats(const char *stat_type, int nkey, ADD_STAT add_stats, void *c) {
APPEND_STAT("curr_items", "%u", stats.curr_items);
APPEND_STAT("total_items", "%u", stats.total_items);
STATS_UNLOCK();
+ if (settings.slab_automove > 0) {
+ pthread_mutex_lock(&slabs_lock);
+ APPEND_STAT("slab_global_page_pool", "%u", slabclass[SLAB_GLOBAL_PAGE_POOL].slabs);
+ pthread_mutex_unlock(&slabs_lock);
+ }
item_stats_totals(add_stats, c);
} else if (nz_strcmp(nkey, stat_type, "items") == 0) {
item_stats(add_stats, c);
@@ -446,7 +465,7 @@ void slabs_adjust_mem_requested(unsigned int id, size_t old, size_t ntotal)
}
unsigned int slabs_available_chunks(const unsigned int id, bool *mem_flag,
- unsigned int *total_chunks) {
+ unsigned int *total_chunks, unsigned int *chunks_perslab) {
unsigned int ret;
slabclass_t *p;
@@ -457,6 +476,8 @@ unsigned int slabs_available_chunks(const unsigned int id, bool *mem_flag,
*mem_flag = mem_limit_reached;
if (total_chunks != NULL)
*total_chunks = p->slabs * p->perslab;
+ if (chunks_perslab != NULL)
+ *chunks_perslab = p->perslab;
pthread_mutex_unlock(&slabs_lock);
return ret;
}
@@ -476,7 +497,7 @@ static int slab_rebalance_start(void) {
if (slab_rebal.s_clsid < POWER_SMALLEST ||
slab_rebal.s_clsid > power_largest ||
- slab_rebal.d_clsid < POWER_SMALLEST ||
+ slab_rebal.d_clsid < SLAB_GLOBAL_PAGE_POOL ||
slab_rebal.d_clsid > power_largest ||
slab_rebal.s_clsid == slab_rebal.d_clsid)
no_go = -2;
@@ -720,7 +741,7 @@ static void slab_rebalance_finish(void) {
pthread_mutex_lock(&slabs_lock);
s_cls = &slabclass[slab_rebal.s_clsid];
- d_cls = &slabclass[slab_rebal.d_clsid];
+ d_cls = &slabclass[slab_rebal.d_clsid];
/* At this point the stolen slab is completely clear.
* We always kill the "first"/"oldest" slab page in the slab_list, so
@@ -734,8 +755,11 @@ static void slab_rebalance_finish(void) {
memset(slab_rebal.slab_start, 0, (size_t)settings.item_size_max);
d_cls->slab_list[d_cls->slabs++] = slab_rebal.slab_start;
- split_slab_page_into_freelist(slab_rebal.slab_start,
- slab_rebal.d_clsid);
+ /* Don't need to split the page into chunks if we're just storing it */
+ if (slab_rebal.d_clsid > SLAB_GLOBAL_PAGE_POOL) {
+ split_slab_page_into_freelist(slab_rebal.slab_start,
+ slab_rebal.d_clsid);
+ }
slab_rebal.done = 0;
slab_rebal.s_clsid = 0;
@@ -758,97 +782,6 @@ static void slab_rebalance_finish(void) {
}
}
-/* Return 1 means a decision was reached.
- * Move to its own thread (created/destroyed as needed) once automover is more
- * complex.
- */
-static int slab_automove_decision(int *src, int *dst) {
- static uint64_t evicted_old[MAX_NUMBER_OF_SLAB_CLASSES];
- static unsigned int slab_zeroes[MAX_NUMBER_OF_SLAB_CLASSES];
- static unsigned int slab_winner = 0;
- static unsigned int slab_wins = 0;
- uint64_t evicted_new[MAX_NUMBER_OF_SLAB_CLASSES];
- uint64_t evicted_diff = 0;
- uint64_t evicted_max = 0;
- unsigned int highest_slab = 0;
- unsigned int total_pages[MAX_NUMBER_OF_SLAB_CLASSES];
- int i;
- int source = 0;
- int dest = 0;
- static rel_time_t next_run;
-
- /* Run less frequently than the slabmove tester. */
- if (current_time >= next_run) {
- next_run = current_time + 10;
- } else {
- return 0;
- }
-
- item_stats_evictions(evicted_new);
- pthread_mutex_lock(&slabs_lock);
- for (i = POWER_SMALLEST; i < power_largest; i++) {
- total_pages[i] = slabclass[i].slabs;
- }
- pthread_mutex_unlock(&slabs_lock);
-
- /* Find a candidate source; something with zero evicts 3+ times */
- for (i = POWER_SMALLEST; i < power_largest; i++) {
- evicted_diff = evicted_new[i] - evicted_old[i];
- if (evicted_diff == 0 && total_pages[i] > 2) {
- slab_zeroes[i]++;
- if (source == 0 && slab_zeroes[i] >= 3)
- source = i;
- } else {
- slab_zeroes[i] = 0;
- if (evicted_diff > evicted_max) {
- evicted_max = evicted_diff;
- highest_slab = i;
- }
- }
- evicted_old[i] = evicted_new[i];
- }
-
- /* Pick a valid destination */
- if (slab_winner != 0 && slab_winner == highest_slab) {
- slab_wins++;
- if (slab_wins >= 3)
- dest = slab_winner;
- } else {
- slab_wins = 1;
- slab_winner = highest_slab;
- }
-
- if (source && dest) {
- *src = source;
- *dst = dest;
- return 1;
- }
- return 0;
-}
-
-/* Slab rebalancer thread.
- * Does not use spinlocks since it is not timing sensitive. Burn less CPU and
- * go to sleep if locks are contended
- */
-static void *slab_maintenance_thread(void *arg) {
- int src, dest;
-
- while (do_run_slab_thread) {
- if (settings.slab_automove == 1) {
- if (slab_automove_decision(&src, &dest) == 1) {
- /* Blind to the return codes. It will retry on its own */
- slabs_reassign(src, dest);
- }
- sleep(1);
- } else {
- /* Don't wake as often if we're not enabled.
- * This is lazier than setting up a condition right now. */
- sleep(5);
- }
- }
- return NULL;
-}
-
/* Slab mover thread.
* Sits waiting for a condition to jump off and shovel some memory about
*/
@@ -918,8 +851,8 @@ static enum reassign_result_type do_slabs_reassign(int src, int dst) {
/* TODO: If we end up back at -1, return a new error type */
}
- if (src < POWER_SMALLEST || src > power_largest ||
- dst < POWER_SMALLEST || dst > power_largest)
+ if (src < POWER_SMALLEST || src > power_largest ||
+ dst < SLAB_GLOBAL_PAGE_POOL || dst > power_largest)
return REASSIGN_BADCLASS;
if (slabclass[src].slabs < 2)
@@ -953,7 +886,6 @@ void slabs_rebalancer_resume(void) {
pthread_mutex_unlock(&slabs_rebalance_lock);
}
-static pthread_t maintenance_tid;
static pthread_t rebalance_tid;
int start_slab_maintenance_thread(void) {
@@ -974,11 +906,6 @@ int start_slab_maintenance_thread(void) {
}
pthread_mutex_init(&slabs_rebalance_lock, NULL);
- if ((ret = pthread_create(&maintenance_tid, NULL,
- slab_maintenance_thread, NULL)) != 0) {
- fprintf(stderr, "Can't create slab maint thread: %s\n", strerror(ret));
- return -1;
- }
if ((ret = pthread_create(&rebalance_tid, NULL,
slab_rebalance_thread, NULL)) != 0) {
fprintf(stderr, "Can't create rebal thread: %s\n", strerror(ret));
@@ -997,6 +924,5 @@ void stop_slab_maintenance_thread(void) {
pthread_mutex_unlock(&slabs_rebalance_lock);
/* Wait for the maintenance thread to stop */
- pthread_join(maintenance_tid, NULL);
pthread_join(rebalance_tid, NULL);
}
diff --git a/slabs.h b/slabs.h
index 1eac5c8..fb29cfa 100644
--- a/slabs.h
+++ b/slabs.h
@@ -34,7 +34,7 @@ bool get_stats(const char *stat_type, int nkey, ADD_STAT add_stats, void *c);
void slabs_stats(ADD_STAT add_stats, void *c);
/* Hints as to freespace in slab class */
-unsigned int slabs_available_chunks(unsigned int id, bool *mem_flag, unsigned int *total_chunks);
+unsigned int slabs_available_chunks(unsigned int id, bool *mem_flag, unsigned int *total_chunks, unsigned int *chunks_perslab);
int start_slab_maintenance_thread(void);
void stop_slab_maintenance_thread(void);
diff --git a/t/slabs-reassign2.t b/t/slabs-reassign2.t
index 8de4a05..9135170 100644
--- a/t/slabs-reassign2.t
+++ b/t/slabs-reassign2.t
@@ -2,7 +2,7 @@
use strict;
use warnings;
-use Test::More tests => 5;
+use Test::More tests => 9;
use FindBin qw($Bin);
use lib "$Bin/lib";
use MemcachedTest;
@@ -62,3 +62,27 @@ cmp_ok($hits, '>', 4000, 'were able to fetch back 2/3rds of 8k keys');
my $stats_done = mem_stats($sock);
cmp_ok($stats_done->{slab_reassign_rescues}, '>', 0, 'some reassign rescues happened');
cmp_ok($stats_done->{slab_reassign_evictions}, '>', 0, 'some reassing evictions happened');
+
+print $sock "flush_all\r\n";
+is(scalar <$sock>, "OK\r\n", "did flush_all");
+my $tries;
+for ($tries = 20; $tries > 0; $tries--) {
+ sleep 1;
+ my $stats = mem_stats($sock);
+ if ($stats->{slab_global_page_pool} == 61) {
+ last;
+ }
+}
+cmp_ok($tries, '>', 0, 'reclaimed 61 pages before timeout');
+
+# Set into an entirely new class. Overload a bit to try to cause problems.
+$value = "B"x4096;
+for (1 .. $keycount * 4) {
+ print $sock "set jfoo$_ 0 0 4096 noreply\r\n$value\r\n";
+}
+
+{
+ my $stats = mem_stats($sock);
+ is($stats->{curr_items}, 14490, "stored 14490 4k items");
+ is($stats->{slab_global_page_pool}, 0, "drained the global page pool");
+}