diff options
author | dormando <dormando@rydia.net> | 2017-12-06 19:47:00 -0800 |
---|---|---|
committer | dormando <dormando@rydia.net> | 2017-12-07 15:28:13 -0800 |
commit | 48b07bd3200a92a82b7500b10f14843204502060 (patch) | |
tree | 65e1619e4374de044ffc226119a108df4068e1e4 | |
parent | bb1080bb9f8ab5271699bb84eb90de1fcf9ed695 (diff) | |
download | memcached-48b07bd3200a92a82b7500b10f14843204502060.tar.gz |
extstore: C version of automove algorithm
couple TODO items left for a new issue I thought of. Also hardcoded memory
buffer size which should be fixed.
also need to change the "free and re-init" logic to use a boolean in case any
related option changes.
-rw-r--r-- | Makefile.am | 3 | ||||
-rw-r--r-- | items.c | 28 | ||||
-rw-r--r-- | memcached.c | 1 | ||||
-rw-r--r-- | memcached.h | 1 | ||||
-rw-r--r-- | slab_automove.c | 4 | ||||
-rw-r--r-- | slab_automove.h | 13 | ||||
-rw-r--r-- | slab_automove_extstore.c | 259 | ||||
-rw-r--r-- | slab_automove_extstore.h | 8 | ||||
-rw-r--r-- | slabs.c | 14 | ||||
-rw-r--r-- | slabs.h | 2 | ||||
-rwxr-xr-x | t/binary-extstore.t | 2 | ||||
-rw-r--r-- | t/chunked-extstore.t | 2 | ||||
-rw-r--r-- | t/extstore-buckets.t | 2 | ||||
-rw-r--r-- | t/extstore.t | 2 |
14 files changed, 327 insertions, 14 deletions
diff --git a/Makefile.am b/Makefile.am index 27a2dac..b886145 100644 --- a/Makefile.am +++ b/Makefile.am @@ -49,7 +49,8 @@ endif if ENABLE_EXTSTORE memcached_SOURCES += extstore.c extstore.h \ - storage.c storage.h + storage.c storage.h \ + slab_automove_extstore.c slab_automove_extstore.h endif memcached_debug_SOURCES = $(memcached_SOURCES) @@ -4,6 +4,7 @@ #include "slab_automove.h" #ifdef EXTSTORE #include "storage.h" +#include "slab_automove_extstore.h" #endif #include <sys/stat.h> #include <sys/socket.h> @@ -1488,14 +1489,29 @@ static void lru_maintainer_crawler_check(struct crawler_expired_data *cdata, log } } +slab_automove_reg_t slab_automove_default = { + .init = slab_automove_init, + .free = slab_automove_free, + .run = slab_automove_run +}; +#ifdef EXTSTORE +slab_automove_reg_t slab_automove_extstore = { + .init = slab_automove_extstore_init, + .free = slab_automove_extstore_free, + .run = slab_automove_extstore_run +}; +#endif static pthread_t lru_maintainer_tid; #define MAX_LRU_MAINTAINER_SLEEP 1000000 #define MIN_LRU_MAINTAINER_SLEEP 1000 static void *lru_maintainer_thread(void *arg) { + slab_automove_reg_t *sam = &slab_automove_default; #ifdef EXTSTORE void *storage = arg; + if (storage != NULL) + sam = &slab_automove_extstore; int x; #endif int i; @@ -1520,8 +1536,7 @@ static void *lru_maintainer_thread(void *arg) { } double last_ratio = settings.slab_automove_ratio; - void *am = slab_automove_init(settings.slab_automove_window, - settings.slab_automove_ratio); + void *am = sam->init(&settings); pthread_mutex_lock(&lru_maintainer_lock); if (settings.verbose > 2) @@ -1597,13 +1612,12 @@ static void *lru_maintainer_thread(void *arg) { if (settings.slab_automove == 1 && last_automove_check != current_time) { if (last_ratio != settings.slab_automove_ratio) { - slab_automove_free(am); - am = slab_automove_init(settings.slab_automove_window, - settings.slab_automove_ratio); + sam->free(am); + am = sam->init(&settings); last_ratio = settings.slab_automove_ratio; } int src, dst; - slab_automove_run(am, &src, &dst); + sam->run(am, &src, &dst); if (src != -1 && dst != -1) { slabs_reassign(src, dst); LOGGER_LOG(l, LOG_SYSEVENTS, LOGGER_SLAB_MOVE, NULL, @@ -1619,7 +1633,7 @@ static void *lru_maintainer_thread(void *arg) { } } pthread_mutex_unlock(&lru_maintainer_lock); - slab_automove_free(am); + sam->free(am); // LRU crawler *must* be stopped. free(cdata); if (settings.verbose > 2) diff --git a/memcached.c b/memcached.c index 0f2a032..0901c09 100644 --- a/memcached.c +++ b/memcached.c @@ -6634,6 +6634,7 @@ int main (int argc, char **argv) { settings.ext_drop_unread = false; settings.ext_wbuf_size = 1024 * 1024 * 4; settings.ext_compact_under = 0; + settings.slab_automove_freeratio = 0.005; ext_cf.page_size = 1024 * 1024 * 64; ext_cf.page_count = 64; ext_cf.wbuf_size = settings.ext_wbuf_size; diff --git a/memcached.h b/memcached.h index 50c8222..b3e6e54 100644 --- a/memcached.h +++ b/memcached.h @@ -413,6 +413,7 @@ struct settings { unsigned int ext_wbuf_size; /* read only note for the engine */ unsigned int ext_compact_under; /* when fewer than this many pages, compact */ double ext_max_frag; /* ideal maximum page fragmentation */ + double slab_automove_freeratio; /* % of memory to hold free as buffer */ bool ext_drop_unread; /* skip unread items during compaction */ /* per-slab-class free chunk limit */ unsigned int ext_free_memchunks[MAX_NUMBER_OF_SLAB_CLASSES]; diff --git a/slab_automove.c b/slab_automove.c index 8a66370..25a6bdb 100644 --- a/slab_automove.c +++ b/slab_automove.c @@ -30,7 +30,9 @@ typedef struct { slab_stats_automove sam_after[MAX_NUMBER_OF_SLAB_CLASSES]; } slab_automove; -void *slab_automove_init(uint32_t window_size, double max_age_ratio) { +void *slab_automove_init(struct settings *settings) { + uint32_t window_size = settings->slab_automove_window; + double max_age_ratio = settings->slab_automove_ratio; slab_automove *a = calloc(1, sizeof(slab_automove)); if (a == NULL) return NULL; diff --git a/slab_automove.h b/slab_automove.h index 99d8d0c..226d68d 100644 --- a/slab_automove.h +++ b/slab_automove.h @@ -1,8 +1,19 @@ #ifndef SLAB_AUTOMOVE_H #define SLAB_AUTOMOVE_H -void *slab_automove_init(uint32_t window_size, double max_age_ratio); +/* default automove functions */ +void *slab_automove_init(struct settings *settings); void slab_automove_free(void *arg); void slab_automove_run(void *arg, int *src, int *dst); +typedef void *(*slab_automove_init_func)(struct settings *settings); +typedef void (*slab_automove_free_func)(void *arg); +typedef void (*slab_automove_run_func)(void *arg, int *src, int *dst); + +typedef struct { + slab_automove_init_func init; + slab_automove_free_func free; + slab_automove_run_func run; +} slab_automove_reg_t; + #endif diff --git a/slab_automove_extstore.c b/slab_automove_extstore.c new file mode 100644 index 0000000..ce08bbe --- /dev/null +++ b/slab_automove_extstore.c @@ -0,0 +1,259 @@ +/* Copyright 2017 Facebook. + * + * Use and distribution licensed under the BSD license. See + * the LICENSE file for full text. + */ + +/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ +#include "memcached.h" +#include "slab_automove_extstore.h" +#include <stdlib.h> +#include <string.h> + +#define MIN_PAGES_FOR_SOURCE 2 +#define MIN_PAGES_FOR_RECLAIM 2.5 +#define MIN_PAGES_FREE 1.5 +#define MEMCHECK_PERIOD 60 + +struct window_data { + uint64_t age; + uint64_t dirty; + uint64_t evicted; + unsigned int excess_free; +}; + +struct window_global { + uint32_t pool_low; + uint32_t pool_high; +}; + +typedef struct { + struct window_data *window_data; + struct window_global *window_global; + struct settings *settings; + uint32_t window_size; + uint32_t window_cur; + uint32_t item_size; + rel_time_t last_memcheck_run; + double max_age_ratio; + double free_ratio; + unsigned int free_mem[MAX_NUMBER_OF_SLAB_CLASSES]; + item_stats_automove iam_before[MAX_NUMBER_OF_SLAB_CLASSES]; + item_stats_automove iam_after[MAX_NUMBER_OF_SLAB_CLASSES]; + slab_stats_automove sam_before[MAX_NUMBER_OF_SLAB_CLASSES]; + slab_stats_automove sam_after[MAX_NUMBER_OF_SLAB_CLASSES]; +} slab_automove; + +void *slab_automove_extstore_init(struct settings *settings) { + uint32_t window_size = settings->slab_automove_window; + double max_age_ratio = settings->slab_automove_ratio; + slab_automove *a = calloc(1, sizeof(slab_automove)); + if (a == NULL) + return NULL; + a->window_data = calloc(window_size * MAX_NUMBER_OF_SLAB_CLASSES, sizeof(struct window_data)); + a->window_global = calloc(window_size, sizeof(struct window_global)); + a->window_size = window_size; + a->max_age_ratio = max_age_ratio; + a->free_ratio = settings->slab_automove_freeratio; + a->item_size = settings->ext_item_size; + a->last_memcheck_run = 0; + a->settings = settings; + if (a->window_data == NULL || a->window_global == NULL) { + if (a->window_data) + free(a->window_data); + if (a->window_global) + free(a->window_global); + free(a); + return NULL; + } + + // do a dry run to fill the before structs + fill_item_stats_automove(a->iam_before); + fill_slab_stats_automove(a->sam_before); + + return (void *)a; +} + +void slab_automove_extstore_free(void *arg) { + slab_automove *a = (slab_automove *)arg; + free(a->window_data); + free(a); +} + +static void window_sum(struct window_data *wd, struct window_data *w, + uint32_t size) { + for (int x = 0; x < size; x++) { + struct window_data *d = &wd[x]; + w->age += d->age; + w->dirty += d->dirty; + w->evicted += d->evicted; + w->excess_free += d->excess_free; + } +} + +/* This could potentially merge with above */ +static void window_global_sum(struct window_global *wg, + struct window_global *w, uint32_t size) { + for (int x = 0; x < size; x++) { + struct window_global *d = &wg[x]; + w->pool_high += d->pool_high; + w->pool_low += d->pool_low; + } +} + +static void global_pool_check(slab_automove *a) { + bool mem_limit_reached; + uint32_t free = a->free_mem[0]; + struct window_global *wg = &a->window_global[a->window_cur % a->window_size]; + unsigned int count = global_page_pool_size(&mem_limit_reached); + memset(wg, 0, sizeof(struct window_global)); + if (!mem_limit_reached) + return; + if (count < free / 2) { + wg->pool_low = 1; + } else if (count > free) { + wg->pool_high = 1; + } +} + +static void memcheck(slab_automove *a) { + unsigned int total_pages = 0; + if (current_time < a->last_memcheck_run + MEMCHECK_PERIOD) + return; + a->last_memcheck_run = current_time; + for (int n = 1; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) { + slab_stats_automove *sam = &a->sam_after[n]; + total_pages += sam->total_pages; + unsigned int hold_free = (sam->total_pages * sam->chunks_per_page) + * a->free_ratio; + if (sam->chunks_per_page * MIN_PAGES_FREE > hold_free) + hold_free = sam->chunks_per_page * MIN_PAGES_FREE; + a->free_mem[n] = hold_free; + if (a->settings->ext_free_memchunks[n] != hold_free) { + a->settings->ext_free_memchunks[n] = hold_free; + } + } + a->free_mem[0] = total_pages * a->free_ratio; +} + +void slab_automove_extstore_run(void *arg, int *src, int *dst) { + slab_automove *a = (slab_automove *)arg; + int n; + struct window_data w_sum; + int oldest = -1; + uint64_t oldest_age = 0; + int youngest = -1; + uint64_t youngest_age = ~0; + *src = -1; + *dst = -1; + + global_pool_check(a); + struct window_global wg_sum; + memset(&wg_sum, 0, sizeof(struct window_global)); + window_global_sum(a->window_global, &wg_sum, a->window_size); + // fill after structs + fill_item_stats_automove(a->iam_after); + fill_slab_stats_automove(a->sam_after); + a->window_cur++; + + memcheck(a); + + // iterate slabs + for (n = POWER_SMALLEST; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) { + bool small_slab = a->sam_before[n].chunk_size < a->item_size + ? true : false; + int w_offset = n * a->window_size; + struct window_data *wd = &a->window_data[w_offset + (a->window_cur % a->window_size)]; + // summarize the window-up-to-now. + memset(&w_sum, 0, sizeof(struct window_data)); + window_sum(&a->window_data[w_offset], &w_sum, a->window_size); + memset(wd, 0, sizeof(struct window_data)); + + // if page delta, or evicted delta, mark window dirty + // (or outofmemory) + if (a->iam_after[n].evicted - a->iam_before[n].evicted > 0 || + a->iam_after[n].outofmemory - a->iam_before[n].outofmemory > 0) { + wd->evicted = 1; + wd->dirty = 1; + } + if (a->sam_after[n].total_pages - a->sam_before[n].total_pages > 0) { + wd->dirty = 1; + } + // Mark excess free if we're over the free mem limit and the number of + // chunks aren't decreasing at all. + if (a->sam_after[n].free_chunks > a->free_mem[n] + && a->sam_after[n].free_chunks - a->sam_before[n].free_chunks >= 0) { + if (a->free_mem[n] > 0) { + wd->excess_free = 1; + } + } + + // set age into window + wd->age = a->iam_after[n].age; + + // grab age as average of window total + uint64_t age = w_sum.age / a->window_size; + + // if > N free chunks and not dirty, make decision. + if (a->sam_after[n].free_chunks > a->sam_after[n].chunks_per_page * MIN_PAGES_FOR_RECLAIM) { + if (small_slab && w_sum.dirty == 0) { + *src = n; + *dst = 0; + break; + } else if (!small_slab && w_sum.excess_free >= a->window_size / 2 + && a->sam_after[n].total_pages > MIN_PAGES_FOR_SOURCE) { + // If large slab and free chunks haven't decreased for a full + // window, reclaim pages. + *src = n; + *dst = 0; + } + } + + // if oldest and have enough pages, is oldest + if (!small_slab + && age > oldest_age + && a->sam_after[n].total_pages > MIN_PAGES_FOR_SOURCE) { + oldest = n; + oldest_age = age; + } + + // don't count as youngest if it hasn't been using new chunks. + if (!small_slab && age < youngest_age && a->sam_after[n].total_pages != 0 + && w_sum.excess_free < a->window_size) { + youngest = n; + youngest_age = age; + } + } + + memcpy(a->iam_before, a->iam_after, + sizeof(item_stats_automove) * MAX_NUMBER_OF_SLAB_CLASSES); + memcpy(a->sam_before, a->sam_after, + sizeof(slab_stats_automove) * MAX_NUMBER_OF_SLAB_CLASSES); + // only make decisions if window has filled once. + if (a->window_cur < a->window_size) + return; + + if (wg_sum.pool_high && youngest != -1) { + /**src = 0; + *dst = youngest;*/ + /* TODO: No current way to directly assign page from 0 to elsewhere. + * Do a current hack by setting the youngest's free mem limiter to + * zero and re-running memcheck in the next second. + * If set rates are very high and the pool is too low, this can bottom + * out... + */ + a->last_memcheck_run = 0; + a->settings->ext_free_memchunks[youngest] = 0; + } else if (wg_sum.pool_low && oldest != -1) { + *src = oldest; + *dst = 0; + } else if (youngest != -1 && oldest != -1 && youngest != oldest) { + // if we have a youngest and oldest, and oldest is outside the ratio. + if (a->sam_after[youngest].free_chunks <= a->free_mem[youngest] + && youngest_age < ((double)oldest_age * a->max_age_ratio)) { + *src = oldest; + *dst = youngest; + } + } + return; +} diff --git a/slab_automove_extstore.h b/slab_automove_extstore.h new file mode 100644 index 0000000..8b437b1 --- /dev/null +++ b/slab_automove_extstore.h @@ -0,0 +1,8 @@ +#ifndef SLAB_AUTOMOVE_EXTSTORE_H +#define SLAB_AUTOMOVE_EXTSTORE_H + +void *slab_automove_extstore_init(struct settings *settings); +void slab_automove_extstore_free(void *arg); +void slab_automove_extstore_run(void *arg, int *src, int *dst); + +#endif @@ -404,10 +404,24 @@ void fill_slab_stats_automove(slab_stats_automove *am) { cur->chunks_per_page = p->perslab; cur->free_chunks = p->sl_curr; cur->total_pages = p->slabs; + cur->chunk_size = p->size; } pthread_mutex_unlock(&slabs_lock); } +/* TODO: slabs_available_chunks should grow up to encompass this. + * mem_flag is redundant with the other function. + */ +unsigned int global_page_pool_size(bool *mem_flag) { + unsigned int ret = 0; + pthread_mutex_lock(&slabs_lock); + if (mem_flag != NULL) + *mem_flag = mem_malloced >= mem_limit ? true : false; + ret = slabclass[SLAB_GLOBAL_PAGE_POOL].slabs; + pthread_mutex_unlock(&slabs_lock); + return ret; +} + static int nz_strcmp(int nzlength, const char *nz, const char *z) { int zlength=strlen(z); return (zlength == nzlength) && (strncmp(nz, z, zlength) == 0) ? 0 : -1; @@ -36,10 +36,12 @@ bool get_stats(const char *stat_type, int nkey, ADD_STAT add_stats, void *c); typedef struct { unsigned int chunks_per_page; + unsigned int chunk_size; long int free_chunks; long int total_pages; } slab_stats_automove; void fill_slab_stats_automove(slab_stats_automove *am); +unsigned int global_page_pool_size(bool *mem_flag); /** Fill buffer with stats */ /*@null@*/ void slabs_stats(ADD_STAT add_stats, void *c); diff --git a/t/binary-extstore.t b/t/binary-extstore.t index 9028422..390e26a 100755 --- a/t/binary-extstore.t +++ b/t/binary-extstore.t @@ -17,7 +17,7 @@ if (!supports_extstore()) { $ext_path = "/tmp/extstore.$$"; -my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path,no_lru_crawler"); +my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path,no_lru_crawler,slab_automove=0"); ok($server, "started the server"); # Based almost 100% off testClient.py which is: diff --git a/t/chunked-extstore.t b/t/chunked-extstore.t index f3cf903..ebd0f0b 100644 --- a/t/chunked-extstore.t +++ b/t/chunked-extstore.t @@ -18,7 +18,7 @@ if (!supports_extstore()) { $ext_path = "/tmp/extstore.$$"; -my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path,slab_chunk_max=16384"); +my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path,slab_chunk_max=16384,slab_automove=0"); my $sock = $server->sock; # We're testing to ensure item chaining doesn't corrupt or poorly overlap diff --git a/t/extstore-buckets.t b/t/extstore-buckets.t index f6143af..a2c1c90 100644 --- a/t/extstore-buckets.t +++ b/t/extstore-buckets.t @@ -17,7 +17,7 @@ if (!supports_extstore()) { $ext_path = "/tmp/extstore.$$"; -my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0,ext_path=$ext_path,ext_low_ttl=60"); +my $server = new_memcached("-m 256 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0,ext_path=$ext_path,ext_low_ttl=60,slab_automove=1"); my $sock = $server->sock; my $value; diff --git a/t/extstore.t b/t/extstore.t index 4963ca6..a53a5cf 100644 --- a/t/extstore.t +++ b/t/extstore.t @@ -17,7 +17,7 @@ if (!supports_extstore()) { $ext_path = "/tmp/extstore.$$"; -my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path"); +my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path,slab_automove=0"); my $sock = $server->sock; my $value; |