summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordormando <dormando@rydia.net>2017-12-06 19:47:00 -0800
committerdormando <dormando@rydia.net>2017-12-07 15:28:13 -0800
commit48b07bd3200a92a82b7500b10f14843204502060 (patch)
tree65e1619e4374de044ffc226119a108df4068e1e4
parentbb1080bb9f8ab5271699bb84eb90de1fcf9ed695 (diff)
downloadmemcached-48b07bd3200a92a82b7500b10f14843204502060.tar.gz
extstore: C version of automove algorithm
couple TODO items left for a new issue I thought of. Also hardcoded memory buffer size which should be fixed. also need to change the "free and re-init" logic to use a boolean in case any related option changes.
-rw-r--r--Makefile.am3
-rw-r--r--items.c28
-rw-r--r--memcached.c1
-rw-r--r--memcached.h1
-rw-r--r--slab_automove.c4
-rw-r--r--slab_automove.h13
-rw-r--r--slab_automove_extstore.c259
-rw-r--r--slab_automove_extstore.h8
-rw-r--r--slabs.c14
-rw-r--r--slabs.h2
-rwxr-xr-xt/binary-extstore.t2
-rw-r--r--t/chunked-extstore.t2
-rw-r--r--t/extstore-buckets.t2
-rw-r--r--t/extstore.t2
14 files changed, 327 insertions, 14 deletions
diff --git a/Makefile.am b/Makefile.am
index 27a2dac..b886145 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -49,7 +49,8 @@ endif
if ENABLE_EXTSTORE
memcached_SOURCES += extstore.c extstore.h \
- storage.c storage.h
+ storage.c storage.h \
+ slab_automove_extstore.c slab_automove_extstore.h
endif
memcached_debug_SOURCES = $(memcached_SOURCES)
diff --git a/items.c b/items.c
index 889c328..9f49897 100644
--- a/items.c
+++ b/items.c
@@ -4,6 +4,7 @@
#include "slab_automove.h"
#ifdef EXTSTORE
#include "storage.h"
+#include "slab_automove_extstore.h"
#endif
#include <sys/stat.h>
#include <sys/socket.h>
@@ -1488,14 +1489,29 @@ static void lru_maintainer_crawler_check(struct crawler_expired_data *cdata, log
}
}
+slab_automove_reg_t slab_automove_default = {
+ .init = slab_automove_init,
+ .free = slab_automove_free,
+ .run = slab_automove_run
+};
+#ifdef EXTSTORE
+slab_automove_reg_t slab_automove_extstore = {
+ .init = slab_automove_extstore_init,
+ .free = slab_automove_extstore_free,
+ .run = slab_automove_extstore_run
+};
+#endif
static pthread_t lru_maintainer_tid;
#define MAX_LRU_MAINTAINER_SLEEP 1000000
#define MIN_LRU_MAINTAINER_SLEEP 1000
static void *lru_maintainer_thread(void *arg) {
+ slab_automove_reg_t *sam = &slab_automove_default;
#ifdef EXTSTORE
void *storage = arg;
+ if (storage != NULL)
+ sam = &slab_automove_extstore;
int x;
#endif
int i;
@@ -1520,8 +1536,7 @@ static void *lru_maintainer_thread(void *arg) {
}
double last_ratio = settings.slab_automove_ratio;
- void *am = slab_automove_init(settings.slab_automove_window,
- settings.slab_automove_ratio);
+ void *am = sam->init(&settings);
pthread_mutex_lock(&lru_maintainer_lock);
if (settings.verbose > 2)
@@ -1597,13 +1612,12 @@ static void *lru_maintainer_thread(void *arg) {
if (settings.slab_automove == 1 && last_automove_check != current_time) {
if (last_ratio != settings.slab_automove_ratio) {
- slab_automove_free(am);
- am = slab_automove_init(settings.slab_automove_window,
- settings.slab_automove_ratio);
+ sam->free(am);
+ am = sam->init(&settings);
last_ratio = settings.slab_automove_ratio;
}
int src, dst;
- slab_automove_run(am, &src, &dst);
+ sam->run(am, &src, &dst);
if (src != -1 && dst != -1) {
slabs_reassign(src, dst);
LOGGER_LOG(l, LOG_SYSEVENTS, LOGGER_SLAB_MOVE, NULL,
@@ -1619,7 +1633,7 @@ static void *lru_maintainer_thread(void *arg) {
}
}
pthread_mutex_unlock(&lru_maintainer_lock);
- slab_automove_free(am);
+ sam->free(am);
// LRU crawler *must* be stopped.
free(cdata);
if (settings.verbose > 2)
diff --git a/memcached.c b/memcached.c
index 0f2a032..0901c09 100644
--- a/memcached.c
+++ b/memcached.c
@@ -6634,6 +6634,7 @@ int main (int argc, char **argv) {
settings.ext_drop_unread = false;
settings.ext_wbuf_size = 1024 * 1024 * 4;
settings.ext_compact_under = 0;
+ settings.slab_automove_freeratio = 0.005;
ext_cf.page_size = 1024 * 1024 * 64;
ext_cf.page_count = 64;
ext_cf.wbuf_size = settings.ext_wbuf_size;
diff --git a/memcached.h b/memcached.h
index 50c8222..b3e6e54 100644
--- a/memcached.h
+++ b/memcached.h
@@ -413,6 +413,7 @@ struct settings {
unsigned int ext_wbuf_size; /* read only note for the engine */
unsigned int ext_compact_under; /* when fewer than this many pages, compact */
double ext_max_frag; /* ideal maximum page fragmentation */
+ double slab_automove_freeratio; /* % of memory to hold free as buffer */
bool ext_drop_unread; /* skip unread items during compaction */
/* per-slab-class free chunk limit */
unsigned int ext_free_memchunks[MAX_NUMBER_OF_SLAB_CLASSES];
diff --git a/slab_automove.c b/slab_automove.c
index 8a66370..25a6bdb 100644
--- a/slab_automove.c
+++ b/slab_automove.c
@@ -30,7 +30,9 @@ typedef struct {
slab_stats_automove sam_after[MAX_NUMBER_OF_SLAB_CLASSES];
} slab_automove;
-void *slab_automove_init(uint32_t window_size, double max_age_ratio) {
+void *slab_automove_init(struct settings *settings) {
+ uint32_t window_size = settings->slab_automove_window;
+ double max_age_ratio = settings->slab_automove_ratio;
slab_automove *a = calloc(1, sizeof(slab_automove));
if (a == NULL)
return NULL;
diff --git a/slab_automove.h b/slab_automove.h
index 99d8d0c..226d68d 100644
--- a/slab_automove.h
+++ b/slab_automove.h
@@ -1,8 +1,19 @@
#ifndef SLAB_AUTOMOVE_H
#define SLAB_AUTOMOVE_H
-void *slab_automove_init(uint32_t window_size, double max_age_ratio);
+/* default automove functions */
+void *slab_automove_init(struct settings *settings);
void slab_automove_free(void *arg);
void slab_automove_run(void *arg, int *src, int *dst);
+typedef void *(*slab_automove_init_func)(struct settings *settings);
+typedef void (*slab_automove_free_func)(void *arg);
+typedef void (*slab_automove_run_func)(void *arg, int *src, int *dst);
+
+typedef struct {
+ slab_automove_init_func init;
+ slab_automove_free_func free;
+ slab_automove_run_func run;
+} slab_automove_reg_t;
+
#endif
diff --git a/slab_automove_extstore.c b/slab_automove_extstore.c
new file mode 100644
index 0000000..ce08bbe
--- /dev/null
+++ b/slab_automove_extstore.c
@@ -0,0 +1,259 @@
+/* Copyright 2017 Facebook.
+ *
+ * Use and distribution licensed under the BSD license. See
+ * the LICENSE file for full text.
+ */
+
+/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
+#include "memcached.h"
+#include "slab_automove_extstore.h"
+#include <stdlib.h>
+#include <string.h>
+
+#define MIN_PAGES_FOR_SOURCE 2
+#define MIN_PAGES_FOR_RECLAIM 2.5
+#define MIN_PAGES_FREE 1.5
+#define MEMCHECK_PERIOD 60
+
+struct window_data {
+ uint64_t age;
+ uint64_t dirty;
+ uint64_t evicted;
+ unsigned int excess_free;
+};
+
+struct window_global {
+ uint32_t pool_low;
+ uint32_t pool_high;
+};
+
+typedef struct {
+ struct window_data *window_data;
+ struct window_global *window_global;
+ struct settings *settings;
+ uint32_t window_size;
+ uint32_t window_cur;
+ uint32_t item_size;
+ rel_time_t last_memcheck_run;
+ double max_age_ratio;
+ double free_ratio;
+ unsigned int free_mem[MAX_NUMBER_OF_SLAB_CLASSES];
+ item_stats_automove iam_before[MAX_NUMBER_OF_SLAB_CLASSES];
+ item_stats_automove iam_after[MAX_NUMBER_OF_SLAB_CLASSES];
+ slab_stats_automove sam_before[MAX_NUMBER_OF_SLAB_CLASSES];
+ slab_stats_automove sam_after[MAX_NUMBER_OF_SLAB_CLASSES];
+} slab_automove;
+
+void *slab_automove_extstore_init(struct settings *settings) {
+ uint32_t window_size = settings->slab_automove_window;
+ double max_age_ratio = settings->slab_automove_ratio;
+ slab_automove *a = calloc(1, sizeof(slab_automove));
+ if (a == NULL)
+ return NULL;
+ a->window_data = calloc(window_size * MAX_NUMBER_OF_SLAB_CLASSES, sizeof(struct window_data));
+ a->window_global = calloc(window_size, sizeof(struct window_global));
+ a->window_size = window_size;
+ a->max_age_ratio = max_age_ratio;
+ a->free_ratio = settings->slab_automove_freeratio;
+ a->item_size = settings->ext_item_size;
+ a->last_memcheck_run = 0;
+ a->settings = settings;
+ if (a->window_data == NULL || a->window_global == NULL) {
+ if (a->window_data)
+ free(a->window_data);
+ if (a->window_global)
+ free(a->window_global);
+ free(a);
+ return NULL;
+ }
+
+ // do a dry run to fill the before structs
+ fill_item_stats_automove(a->iam_before);
+ fill_slab_stats_automove(a->sam_before);
+
+ return (void *)a;
+}
+
+void slab_automove_extstore_free(void *arg) {
+ slab_automove *a = (slab_automove *)arg;
+ free(a->window_data);
+ free(a);
+}
+
+static void window_sum(struct window_data *wd, struct window_data *w,
+ uint32_t size) {
+ for (int x = 0; x < size; x++) {
+ struct window_data *d = &wd[x];
+ w->age += d->age;
+ w->dirty += d->dirty;
+ w->evicted += d->evicted;
+ w->excess_free += d->excess_free;
+ }
+}
+
+/* This could potentially merge with above */
+static void window_global_sum(struct window_global *wg,
+ struct window_global *w, uint32_t size) {
+ for (int x = 0; x < size; x++) {
+ struct window_global *d = &wg[x];
+ w->pool_high += d->pool_high;
+ w->pool_low += d->pool_low;
+ }
+}
+
+static void global_pool_check(slab_automove *a) {
+ bool mem_limit_reached;
+ uint32_t free = a->free_mem[0];
+ struct window_global *wg = &a->window_global[a->window_cur % a->window_size];
+ unsigned int count = global_page_pool_size(&mem_limit_reached);
+ memset(wg, 0, sizeof(struct window_global));
+ if (!mem_limit_reached)
+ return;
+ if (count < free / 2) {
+ wg->pool_low = 1;
+ } else if (count > free) {
+ wg->pool_high = 1;
+ }
+}
+
+static void memcheck(slab_automove *a) {
+ unsigned int total_pages = 0;
+ if (current_time < a->last_memcheck_run + MEMCHECK_PERIOD)
+ return;
+ a->last_memcheck_run = current_time;
+ for (int n = 1; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
+ slab_stats_automove *sam = &a->sam_after[n];
+ total_pages += sam->total_pages;
+ unsigned int hold_free = (sam->total_pages * sam->chunks_per_page)
+ * a->free_ratio;
+ if (sam->chunks_per_page * MIN_PAGES_FREE > hold_free)
+ hold_free = sam->chunks_per_page * MIN_PAGES_FREE;
+ a->free_mem[n] = hold_free;
+ if (a->settings->ext_free_memchunks[n] != hold_free) {
+ a->settings->ext_free_memchunks[n] = hold_free;
+ }
+ }
+ a->free_mem[0] = total_pages * a->free_ratio;
+}
+
+void slab_automove_extstore_run(void *arg, int *src, int *dst) {
+ slab_automove *a = (slab_automove *)arg;
+ int n;
+ struct window_data w_sum;
+ int oldest = -1;
+ uint64_t oldest_age = 0;
+ int youngest = -1;
+ uint64_t youngest_age = ~0;
+ *src = -1;
+ *dst = -1;
+
+ global_pool_check(a);
+ struct window_global wg_sum;
+ memset(&wg_sum, 0, sizeof(struct window_global));
+ window_global_sum(a->window_global, &wg_sum, a->window_size);
+ // fill after structs
+ fill_item_stats_automove(a->iam_after);
+ fill_slab_stats_automove(a->sam_after);
+ a->window_cur++;
+
+ memcheck(a);
+
+ // iterate slabs
+ for (n = POWER_SMALLEST; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
+ bool small_slab = a->sam_before[n].chunk_size < a->item_size
+ ? true : false;
+ int w_offset = n * a->window_size;
+ struct window_data *wd = &a->window_data[w_offset + (a->window_cur % a->window_size)];
+ // summarize the window-up-to-now.
+ memset(&w_sum, 0, sizeof(struct window_data));
+ window_sum(&a->window_data[w_offset], &w_sum, a->window_size);
+ memset(wd, 0, sizeof(struct window_data));
+
+ // if page delta, or evicted delta, mark window dirty
+ // (or outofmemory)
+ if (a->iam_after[n].evicted - a->iam_before[n].evicted > 0 ||
+ a->iam_after[n].outofmemory - a->iam_before[n].outofmemory > 0) {
+ wd->evicted = 1;
+ wd->dirty = 1;
+ }
+ if (a->sam_after[n].total_pages - a->sam_before[n].total_pages > 0) {
+ wd->dirty = 1;
+ }
+ // Mark excess free if we're over the free mem limit and the number of
+ // chunks aren't decreasing at all.
+ if (a->sam_after[n].free_chunks > a->free_mem[n]
+ && a->sam_after[n].free_chunks - a->sam_before[n].free_chunks >= 0) {
+ if (a->free_mem[n] > 0) {
+ wd->excess_free = 1;
+ }
+ }
+
+ // set age into window
+ wd->age = a->iam_after[n].age;
+
+ // grab age as average of window total
+ uint64_t age = w_sum.age / a->window_size;
+
+ // if > N free chunks and not dirty, make decision.
+ if (a->sam_after[n].free_chunks > a->sam_after[n].chunks_per_page * MIN_PAGES_FOR_RECLAIM) {
+ if (small_slab && w_sum.dirty == 0) {
+ *src = n;
+ *dst = 0;
+ break;
+ } else if (!small_slab && w_sum.excess_free >= a->window_size / 2
+ && a->sam_after[n].total_pages > MIN_PAGES_FOR_SOURCE) {
+ // If large slab and free chunks haven't decreased for a full
+ // window, reclaim pages.
+ *src = n;
+ *dst = 0;
+ }
+ }
+
+ // if oldest and have enough pages, is oldest
+ if (!small_slab
+ && age > oldest_age
+ && a->sam_after[n].total_pages > MIN_PAGES_FOR_SOURCE) {
+ oldest = n;
+ oldest_age = age;
+ }
+
+ // don't count as youngest if it hasn't been using new chunks.
+ if (!small_slab && age < youngest_age && a->sam_after[n].total_pages != 0
+ && w_sum.excess_free < a->window_size) {
+ youngest = n;
+ youngest_age = age;
+ }
+ }
+
+ memcpy(a->iam_before, a->iam_after,
+ sizeof(item_stats_automove) * MAX_NUMBER_OF_SLAB_CLASSES);
+ memcpy(a->sam_before, a->sam_after,
+ sizeof(slab_stats_automove) * MAX_NUMBER_OF_SLAB_CLASSES);
+ // only make decisions if window has filled once.
+ if (a->window_cur < a->window_size)
+ return;
+
+ if (wg_sum.pool_high && youngest != -1) {
+ /**src = 0;
+ *dst = youngest;*/
+ /* TODO: No current way to directly assign page from 0 to elsewhere.
+ * Do a current hack by setting the youngest's free mem limiter to
+ * zero and re-running memcheck in the next second.
+ * If set rates are very high and the pool is too low, this can bottom
+ * out...
+ */
+ a->last_memcheck_run = 0;
+ a->settings->ext_free_memchunks[youngest] = 0;
+ } else if (wg_sum.pool_low && oldest != -1) {
+ *src = oldest;
+ *dst = 0;
+ } else if (youngest != -1 && oldest != -1 && youngest != oldest) {
+ // if we have a youngest and oldest, and oldest is outside the ratio.
+ if (a->sam_after[youngest].free_chunks <= a->free_mem[youngest]
+ && youngest_age < ((double)oldest_age * a->max_age_ratio)) {
+ *src = oldest;
+ *dst = youngest;
+ }
+ }
+ return;
+}
diff --git a/slab_automove_extstore.h b/slab_automove_extstore.h
new file mode 100644
index 0000000..8b437b1
--- /dev/null
+++ b/slab_automove_extstore.h
@@ -0,0 +1,8 @@
+#ifndef SLAB_AUTOMOVE_EXTSTORE_H
+#define SLAB_AUTOMOVE_EXTSTORE_H
+
+void *slab_automove_extstore_init(struct settings *settings);
+void slab_automove_extstore_free(void *arg);
+void slab_automove_extstore_run(void *arg, int *src, int *dst);
+
+#endif
diff --git a/slabs.c b/slabs.c
index 39cfbd2..c9a37aa 100644
--- a/slabs.c
+++ b/slabs.c
@@ -404,10 +404,24 @@ void fill_slab_stats_automove(slab_stats_automove *am) {
cur->chunks_per_page = p->perslab;
cur->free_chunks = p->sl_curr;
cur->total_pages = p->slabs;
+ cur->chunk_size = p->size;
}
pthread_mutex_unlock(&slabs_lock);
}
+/* TODO: slabs_available_chunks should grow up to encompass this.
+ * mem_flag is redundant with the other function.
+ */
+unsigned int global_page_pool_size(bool *mem_flag) {
+ unsigned int ret = 0;
+ pthread_mutex_lock(&slabs_lock);
+ if (mem_flag != NULL)
+ *mem_flag = mem_malloced >= mem_limit ? true : false;
+ ret = slabclass[SLAB_GLOBAL_PAGE_POOL].slabs;
+ pthread_mutex_unlock(&slabs_lock);
+ return ret;
+}
+
static int nz_strcmp(int nzlength, const char *nz, const char *z) {
int zlength=strlen(z);
return (zlength == nzlength) && (strncmp(nz, z, zlength) == 0) ? 0 : -1;
diff --git a/slabs.h b/slabs.h
index ed557d6..447033c 100644
--- a/slabs.h
+++ b/slabs.h
@@ -36,10 +36,12 @@ bool get_stats(const char *stat_type, int nkey, ADD_STAT add_stats, void *c);
typedef struct {
unsigned int chunks_per_page;
+ unsigned int chunk_size;
long int free_chunks;
long int total_pages;
} slab_stats_automove;
void fill_slab_stats_automove(slab_stats_automove *am);
+unsigned int global_page_pool_size(bool *mem_flag);
/** Fill buffer with stats */ /*@null@*/
void slabs_stats(ADD_STAT add_stats, void *c);
diff --git a/t/binary-extstore.t b/t/binary-extstore.t
index 9028422..390e26a 100755
--- a/t/binary-extstore.t
+++ b/t/binary-extstore.t
@@ -17,7 +17,7 @@ if (!supports_extstore()) {
$ext_path = "/tmp/extstore.$$";
-my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path,no_lru_crawler");
+my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path,no_lru_crawler,slab_automove=0");
ok($server, "started the server");
# Based almost 100% off testClient.py which is:
diff --git a/t/chunked-extstore.t b/t/chunked-extstore.t
index f3cf903..ebd0f0b 100644
--- a/t/chunked-extstore.t
+++ b/t/chunked-extstore.t
@@ -18,7 +18,7 @@ if (!supports_extstore()) {
$ext_path = "/tmp/extstore.$$";
-my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path,slab_chunk_max=16384");
+my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path,slab_chunk_max=16384,slab_automove=0");
my $sock = $server->sock;
# We're testing to ensure item chaining doesn't corrupt or poorly overlap
diff --git a/t/extstore-buckets.t b/t/extstore-buckets.t
index f6143af..a2c1c90 100644
--- a/t/extstore-buckets.t
+++ b/t/extstore-buckets.t
@@ -17,7 +17,7 @@ if (!supports_extstore()) {
$ext_path = "/tmp/extstore.$$";
-my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0,ext_path=$ext_path,ext_low_ttl=60");
+my $server = new_memcached("-m 256 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0,ext_path=$ext_path,ext_low_ttl=60,slab_automove=1");
my $sock = $server->sock;
my $value;
diff --git a/t/extstore.t b/t/extstore.t
index 4963ca6..a53a5cf 100644
--- a/t/extstore.t
+++ b/t/extstore.t
@@ -17,7 +17,7 @@ if (!supports_extstore()) {
$ext_path = "/tmp/extstore.$$";
-my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path");
+my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path,slab_automove=0");
my $sock = $server->sock;
my $value;