summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordormando <dormando@rydia.net>2018-07-16 21:58:43 -0700
committerdormando <dormando@rydia.net>2018-08-06 15:11:04 -0700
commit89bf7ab1cfea2c24d08b9de697215ac7f61a0362 (patch)
tree25bdbe36326abb864f02caec690c4f4893d65326
parent954f4e044b3f1641da66910e4564cd91dfb83712 (diff)
downloadmemcached-89bf7ab1cfea2c24d08b9de697215ac7f61a0362.tar.gz
extstore JBOD support
Just a Bunch Of Devices :P code exists for routing specific devices to specific buckets (lowttl/compact/etc), but enabling it requires significant fixes to compaction algorithm. Thus it is disabled as of this writing. code cleanups and future work: - pedantically freeing memory and closing fd's on exit - unify and flatten the free_bucket code - defines for free buckets - page eviction adjustment (force min-free per free bucket) - fix default calculation for compact_under and drop_under - might require forcing this value only on default bucket
-rw-r--r--extstore.c124
-rw-r--r--extstore.h16
-rw-r--r--memcached.c38
-rw-r--r--storage.c91
-rw-r--r--storage.h1
-rwxr-xr-xt/binary-extstore.t2
-rw-r--r--t/chunked-extstore.t2
-rw-r--r--t/error-extstore.t2
-rw-r--r--t/extstore-buckets.t2
-rw-r--r--t/extstore-jbod.t69
-rw-r--r--t/extstore.t2
-rw-r--r--t/lib/MemcachedTest.pm22
12 files changed, 312 insertions, 59 deletions
diff --git a/extstore.c b/extstore.c
index 23f0f14..726435c 100644
--- a/extstore.c
+++ b/extstore.c
@@ -62,6 +62,7 @@ typedef struct _store_page {
unsigned int allocated;
unsigned int written; /* item offsets can be past written if wbuf not flushed */
unsigned int bucket; /* which bucket the page is linked into */
+ unsigned int free_bucket; /* which bucket this page returns to when freed */
int fd;
unsigned short id;
bool active; /* actively being written to */
@@ -95,6 +96,7 @@ struct store_engine {
store_maint_thread *maint_thread;
store_page *page_freelist;
store_page **page_buckets; /* stack of pages currently allocated to each bucket */
+ store_page **free_page_buckets; /* stack of use-case isolated free pages */
size_t page_size;
unsigned int version; /* global version counter */
unsigned int last_io_thread; /* round robin the IO threads */
@@ -102,6 +104,7 @@ struct store_engine {
unsigned int page_count;
unsigned int page_free; /* unallocated pages */
unsigned int page_bucketcount; /* count of potential page buckets */
+ unsigned int free_page_bucketcount; /* count of free page buckets */
unsigned int io_depth; /* FIXME: Might cache into thr struct */
pthread_mutex_t stats_mutex;
struct extstore_stats stats;
@@ -192,11 +195,11 @@ const char *extstore_err(enum extstore_res res) {
return rv;
}
-void *extstore_init(char *fn, struct extstore_conf *cf,
+// TODO: #define's for DEFAULT_BUCKET, FREE_VERSION, etc
+void *extstore_init(struct extstore_conf_file *fh, struct extstore_conf *cf,
enum extstore_res *res) {
int i;
- int fd;
- uint64_t offset = 0;
+ struct extstore_conf_file *f = NULL;
pthread_t thread;
if (cf->page_size % cf->wbuf_size != 0) {
@@ -227,43 +230,72 @@ void *extstore_init(char *fn, struct extstore_conf *cf,
}
e->page_size = cf->page_size;
- fd = open(fn, O_RDWR | O_CREAT | O_TRUNC, 0644);
- if (fd < 0) {
- *res = EXTSTORE_INIT_OPEN_FAIL;
+ for (f = fh; f != NULL; f = f->next) {
+ f->fd = open(f->file, O_RDWR | O_CREAT | O_TRUNC, 0644);
+ if (f->fd < 0) {
+ *res = EXTSTORE_INIT_OPEN_FAIL;
#ifdef EXTSTORE_DEBUG
- perror("open");
+ perror("open");
#endif
- free(e);
- return NULL;
+ free(e);
+ return NULL;
+ }
+ e->page_count += f->page_count;
+ f->offset = 0;
}
- e->pages = calloc(cf->page_count, sizeof(store_page));
+ e->pages = calloc(e->page_count, sizeof(store_page));
if (e->pages == NULL) {
*res = EXTSTORE_INIT_OOM;
- close(fd);
+ // FIXME: loop-close. make error label
free(e);
return NULL;
}
- for (i = 0; i < cf->page_count; i++) {
+ // interleave the pages between devices
+ f = NULL; // start at the first device.
+ for (i = 0; i < e->page_count; i++) {
+ // find next device with available pages
+ while (1) {
+ // restart the loop
+ if (f == NULL || f->next == NULL) {
+ f = fh;
+ } else {
+ f = f->next;
+ }
+ if (f->page_count) {
+ f->page_count--;
+ break;
+ }
+ }
pthread_mutex_init(&e->pages[i].mutex, NULL);
e->pages[i].id = i;
- e->pages[i].fd = fd;
- e->pages[i].offset = offset;
+ e->pages[i].fd = f->fd;
+ e->pages[i].free_bucket = f->free_bucket;
+ e->pages[i].offset = f->offset;
e->pages[i].free = true;
- offset += e->page_size;
+ f->offset += e->page_size;
}
- for (i = cf->page_count-1; i > 0; i--) {
- e->pages[i].next = e->page_freelist;
- e->page_freelist = &e->pages[i];
+ // free page buckets allows the app to organize devices by use case
+ e->free_page_buckets = calloc(cf->page_buckets, sizeof(store_page *));
+ e->page_bucketcount = cf->page_buckets;
+
+ for (i = e->page_count-1; i > 0; i--) {
e->page_free++;
+ if (e->pages[i].free_bucket == 0) {
+ e->pages[i].next = e->page_freelist;
+ e->page_freelist = &e->pages[i];
+ } else {
+ int fb = e->pages[i].free_bucket;
+ e->pages[i].next = e->free_page_buckets[fb];
+ e->free_page_buckets[fb] = &e->pages[i];
+ }
}
// 0 is magic "page is freed" version
e->version = 1;
- e->page_count = cf->page_count;
// scratch data for stats. TODO: malloc failure handle
e->stats.page_data =
calloc(e->page_count, sizeof(struct extstore_page_data));
@@ -309,6 +341,8 @@ void *extstore_init(char *fn, struct extstore_conf *cf,
pthread_cond_init(&e->maint_thread->cond, NULL);
pthread_create(&thread, NULL, extstore_maint_thread, e->maint_thread);
+ extstore_run_maint(e);
+
return (void *)e;
}
@@ -318,13 +352,25 @@ void extstore_run_maint(void *ptr) {
}
// call with *e locked
-static store_page *_allocate_page(store_engine *e, unsigned int bucket) {
+static store_page *_allocate_page(store_engine *e, unsigned int bucket,
+ unsigned int free_bucket) {
assert(!e->page_buckets[bucket] || e->page_buckets[bucket]->allocated == e->page_size);
- store_page *tmp = e->page_freelist;
- E_DEBUG("EXTSTORE: allocating new page\n");
- if (e->page_free > 0) {
- assert(e->page_freelist != NULL);
+ store_page *tmp = NULL;
+ // if a specific free bucket was requested, check there first
+ if (free_bucket != 0 && e->free_page_buckets[free_bucket] != NULL) {
+ assert(e->page_free > 0);
+ tmp = e->free_page_buckets[free_bucket];
+ e->free_page_buckets[free_bucket] = tmp->next;
+ }
+ // failing that, try the global list.
+ if (tmp == NULL && e->page_freelist != NULL) {
+ tmp = e->page_freelist;
e->page_freelist = tmp->next;
+ }
+ E_DEBUG("EXTSTORE: allocating new page\n");
+ // page_freelist can be empty if the only free pages are specialized and
+ // we didn't just request one.
+ if (e->page_free > 0 && tmp != NULL) {
tmp->next = e->page_buckets[bucket];
e->page_buckets[bucket] = tmp;
tmp->active = true;
@@ -434,7 +480,8 @@ static void _submit_wbuf(store_engine *e, store_page *p) {
* new page. best if used from a background thread that can harmlessly retry.
*/
-int extstore_write_request(void *ptr, unsigned int bucket, obj_io *io) {
+int extstore_write_request(void *ptr, unsigned int bucket,
+ unsigned int free_bucket, obj_io *io) {
store_engine *e = (store_engine *)ptr;
store_page *p;
int ret = -1;
@@ -444,7 +491,7 @@ int extstore_write_request(void *ptr, unsigned int bucket, obj_io *io) {
pthread_mutex_lock(&e->mutex);
p = e->page_buckets[bucket];
if (!p) {
- p = _allocate_page(e, bucket);
+ p = _allocate_page(e, bucket, free_bucket);
}
pthread_mutex_unlock(&e->mutex);
if (!p)
@@ -458,7 +505,7 @@ int extstore_write_request(void *ptr, unsigned int bucket, obj_io *io) {
((!p->wbuf || p->wbuf->full) && p->allocated >= e->page_size)) {
pthread_mutex_unlock(&p->mutex);
pthread_mutex_lock(&e->mutex);
- _allocate_page(e, bucket);
+ _allocate_page(e, bucket, free_bucket);
pthread_mutex_unlock(&e->mutex);
return ret;
}
@@ -764,8 +811,14 @@ static void _free_page(store_engine *e, store_page *p) {
p->closed = false;
p->free = true;
// add to page stack
- p->next = e->page_freelist;
- e->page_freelist = p;
+ // TODO: free_page_buckets first class and remove redundancy?
+ if (p->free_bucket != 0) {
+ p->next = e->free_page_buckets[p->free_bucket];
+ e->free_page_buckets[p->free_bucket] = p;
+ } else {
+ p->next = e->page_freelist;
+ e->page_freelist = p;
+ }
e->page_free++;
pthread_mutex_unlock(&e->mutex);
}
@@ -797,7 +850,9 @@ static void *extstore_maint_thread(void *arg) {
pthread_cond_wait(&me->cond, &me->mutex);
pthread_mutex_lock(&e->mutex);
- if (e->page_free == 0) {
+ // default freelist requires at least one page free.
+ // specialized freelists fall back to default once full.
+ if (e->page_free == 0 || e->page_freelist == NULL) {
do_evict = true;
}
pthread_mutex_unlock(&e->mutex);
@@ -806,6 +861,7 @@ static void *extstore_maint_thread(void *arg) {
for (i = 0; i < e->page_count; i++) {
store_page *p = &e->pages[i];
pthread_mutex_lock(&p->mutex);
+ pd[p->id].free_bucket = p->free_bucket;
if (p->active || p->free) {
pthread_mutex_unlock(&p->mutex);
continue;
@@ -814,7 +870,13 @@ static void *extstore_maint_thread(void *arg) {
pd[p->id].version = p->version;
pd[p->id].bytes_used = p->bytes_used;
pd[p->id].bucket = p->bucket;
- if (p->version < low_version) {
+ // low_version/low_page are only used in the eviction
+ // scenario. when we evict, it's only to fill the default page
+ // bucket again.
+ // TODO: experiment with allowing evicting up to a single page
+ // for any specific free bucket. this is *probably* required
+ // since it could cause a load bias on default-only devices?
+ if (p->free_bucket == 0 && p->version < low_version) {
low_version = p->version;
low_page = i;
}
diff --git a/extstore.h b/extstore.h
index a466562..6814415 100644
--- a/extstore.h
+++ b/extstore.h
@@ -8,6 +8,7 @@ struct extstore_page_data {
uint64_t version;
uint64_t bytes_used;
unsigned int bucket;
+ unsigned int free_bucket;
};
/* Pages can have objects deleted from them at any time. This creates holes
@@ -43,12 +44,23 @@ struct extstore_conf {
unsigned int page_size; // ideally 64-256M in size
unsigned int page_count;
unsigned int page_buckets; // number of different writeable pages
+ unsigned int free_page_buckets; // buckets of dedicated pages (see code)
unsigned int wbuf_size; // must divide cleanly into page_size
unsigned int wbuf_count; // this might get locked to "2 per active page"
unsigned int io_threadcount;
unsigned int io_depth; // with normal I/O, hits locks less. req'd for AIO
};
+struct extstore_conf_file {
+ unsigned int page_count;
+ char *file;
+ int fd; // internal usage
+ uint64_t offset; // internal usage
+ unsigned int bucket; // free page bucket
+ unsigned int free_bucket; // specialized free bucket
+ struct extstore_conf_file *next;
+};
+
enum obj_io_mode {
OBJ_IO_READ = 0,
OBJ_IO_WRITE,
@@ -87,8 +99,8 @@ enum extstore_res {
};
const char *extstore_err(enum extstore_res res);
-void *extstore_init(char *fn, struct extstore_conf *cf, enum extstore_res *res);
-int extstore_write_request(void *ptr, unsigned int bucket, obj_io *io);
+void *extstore_init(struct extstore_conf_file *fh, struct extstore_conf *cf, enum extstore_res *res);
+int extstore_write_request(void *ptr, unsigned int bucket, unsigned int free_bucket, obj_io *io);
void extstore_write(void *ptr, obj_io *io);
int extstore_submit(void *ptr, obj_io *io);
/* count are the number of objects being removed, bytes are the original
diff --git a/memcached.c b/memcached.c
index bacf1a7..7aea26e 100644
--- a/memcached.c
+++ b/memcached.c
@@ -3401,6 +3401,8 @@ static void process_extstore_stats(ADD_STAT add_stats, conn *c) {
(unsigned long long) st.page_data[i].bytes_used);
APPEND_NUM_STAT(i, "bucket", "%u",
st.page_data[i].bucket);
+ APPEND_NUM_STAT(i, "free_bucket", "%u",
+ st.page_data[i].free_bucket);
}
}
#endif
@@ -6262,8 +6264,8 @@ static void usage(void) {
#endif
#ifdef EXTSTORE
" - ext_path: file to write to for external storage.\n"
+ " ie: ext_path=/mnt/d1/extstore:1G\n"
" - ext_page_size: size in megabytes of storage pages.\n"
- " - ext_page_count: total number of storage pages.\n"
" - ext_wbuf_size: size in megabytes of page write buffers.\n"
" - ext_threads: number of IO threads to run.\n"
" - ext_item_size: store items larger than this (bytes)\n"
@@ -6564,7 +6566,7 @@ int main (int argc, char **argv) {
bool slab_chunk_size_changed = false;
#ifdef EXTSTORE
void *storage = NULL;
- char *storage_file = NULL;
+ struct extstore_conf_file *storage_file = NULL;
struct extstore_conf ext_cf;
#endif
char *subopts, *subopts_orig;
@@ -6611,7 +6613,6 @@ int main (int argc, char **argv) {
#endif
#ifdef EXTSTORE
EXT_PAGE_SIZE,
- EXT_PAGE_COUNT,
EXT_WBUF_SIZE,
EXT_THREADS,
EXT_IO_DEPTH,
@@ -6669,7 +6670,6 @@ int main (int argc, char **argv) {
#endif
#ifdef EXTSTORE
[EXT_PAGE_SIZE] = "ext_page_size",
- [EXT_PAGE_COUNT] = "ext_page_count",
[EXT_WBUF_SIZE] = "ext_wbuf_size",
[EXT_THREADS] = "ext_threads",
[EXT_IO_DEPTH] = "ext_io_depth",
@@ -6709,7 +6709,6 @@ int main (int argc, char **argv) {
settings.ext_drop_under = 0;
settings.slab_automove_freeratio = 0.01;
ext_cf.page_size = 1024 * 1024 * 64;
- ext_cf.page_count = 64;
ext_cf.wbuf_size = settings.ext_wbuf_size;
ext_cf.io_threadcount = 1;
ext_cf.io_depth = 1;
@@ -7235,16 +7234,6 @@ int main (int argc, char **argv) {
}
ext_cf.page_size *= 1024 * 1024; /* megabytes */
break;
- case EXT_PAGE_COUNT:
- if (subopts_value == NULL) {
- fprintf(stderr, "Missing ext_page_count argument\n");
- return 1;
- }
- if (!safe_strtoul(subopts_value, &ext_cf.page_count)) {
- fprintf(stderr, "could not parse argument to ext_page_count\n");
- return 1;
- }
- break;
case EXT_WBUF_SIZE:
if (subopts_value == NULL) {
fprintf(stderr, "Missing ext_wbuf_size argument\n");
@@ -7361,7 +7350,20 @@ int main (int argc, char **argv) {
settings.ext_drop_unread = true;
break;
case EXT_PATH:
- storage_file = strdup(subopts_value);
+ if (subopts_value) {
+ struct extstore_conf_file *tmp = storage_conf_parse(subopts_value, ext_cf.page_size);
+ if (tmp == NULL) {
+ fprintf(stderr, "failed to parse ext_path argument\n");
+ return 1;
+ }
+ if (storage_file != NULL) {
+ tmp->next = storage_file;
+ }
+ storage_file = tmp;
+ } else {
+ fprintf(stderr, "missing argument to ext_path, ie: ext_path=/d/file:5G\n");
+ return 1;
+ }
break;
#endif
case MODERN:
@@ -7635,9 +7637,9 @@ int main (int argc, char **argv) {
if (storage_file) {
enum extstore_res eres;
if (settings.ext_compact_under == 0) {
- settings.ext_compact_under = ext_cf.page_count / 4;
+ settings.ext_compact_under = storage_file->page_count / 4;
/* Only rescues non-COLD items if below this threshold */
- settings.ext_drop_under = ext_cf.page_count / 4;
+ settings.ext_drop_under = storage_file->page_count / 4;
}
crc32c_init();
/* Init free chunks to zero. */
diff --git a/storage.c b/storage.c
index 7af074d..d0fee98 100644
--- a/storage.c
+++ b/storage.c
@@ -6,6 +6,7 @@
#include <stdlib.h>
#include <string.h>
#include <limits.h>
+#include <ctype.h>
#define PAGE_BUCKET_DEFAULT 0
#define PAGE_BUCKET_COMPACT 1
@@ -50,7 +51,9 @@ static int storage_write(void *storage, const int clsid, const int item_age) {
// NOTE: when the item is read back in, the slab mover
// may see it. Important to have refcount>=2 or ~ITEM_LINKED
assert(it->refcount >= 2);
- if (extstore_write_request(storage, bucket, &io) == 0) {
+ // NOTE: write bucket vs free page bucket will disambiguate once
+ // lowttl feature is better understood.
+ if (extstore_write_request(storage, bucket, bucket, &io) == 0) {
// cuddle the hash value into the time field so we don't have
// to recalculate it.
item *buf_it = (item *) io.buf;
@@ -365,7 +368,7 @@ static void storage_compact_readback(void *storage, logger *l,
io.len = ntotal;
io.mode = OBJ_IO_WRITE;
for (tries = 10; tries > 0; tries--) {
- if (extstore_write_request(storage, PAGE_BUCKET_COMPACT, &io) == 0) {
+ if (extstore_write_request(storage, PAGE_BUCKET_COMPACT, PAGE_BUCKET_COMPACT, &io) == 0) {
memcpy(io.buf, it, io.len);
extstore_write(storage, &io);
do_update = true;
@@ -561,4 +564,88 @@ int start_storage_compact_thread(void *arg) {
return 0;
}
+/*** UTILITY ***/
+// /path/to/file:100G:bucket1
+// FIXME: Modifies argument. copy instead?
+struct extstore_conf_file *storage_conf_parse(char *arg, unsigned int page_size) {
+ struct extstore_conf_file *cf = NULL;
+ char *b = NULL;
+ char *p = strtok_r(arg, ":", &b);
+ char unit = 0;
+ uint64_t multiplier = 0;
+ int base_size = 0;
+ if (p == NULL)
+ goto error;
+ // First arg is the filepath.
+ cf = calloc(1, sizeof(struct extstore_conf_file));
+ cf->file = strdup(p);
+
+ p = strtok_r(NULL, ":", &b);
+ if (p == NULL) {
+ fprintf(stderr, "must supply size to ext_path, ie: ext_path=/f/e:64m (M|G|T|P supported)\n");
+ goto error;
+ }
+ unit = tolower(p[strlen(p)-1]);
+ p[strlen(p)-1] = '\0';
+ // sigh.
+ switch (unit) {
+ case 'm':
+ multiplier = 1024 * 1024;
+ break;
+ case 'g':
+ multiplier = 1024 * 1024 * 1024;
+ break;
+ case 't':
+ multiplier = 1024 * 1024;
+ multiplier *= 1024 * 1024;
+ break;
+ case 'p':
+ multiplier = 1024 * 1024;
+ multiplier *= 1024 * 1024 * 1024;
+ break;
+ }
+ base_size = atoi(p);
+ multiplier *= base_size;
+ // page_count is nearest-but-not-larger-than pages * psize
+ cf->page_count = multiplier / page_size;
+ assert(page_size * cf->page_count <= multiplier);
+
+ // final token would be a default free bucket
+ p = strtok_r(NULL, ",", &b);
+ // TODO: We reuse the original DEFINES for now,
+ // but if lowttl gets split up this needs to be its own set.
+ if (p != NULL) {
+ if (strcmp(p, "compact") == 0) {
+ cf->free_bucket = PAGE_BUCKET_COMPACT;
+ } else if (strcmp(p, "lowttl") == 0) {
+ cf->free_bucket = PAGE_BUCKET_LOWTTL;
+ } else if (strcmp(p, "chunked") == 0) {
+ cf->free_bucket = PAGE_BUCKET_CHUNKED;
+ } else if (strcmp(p, "default") == 0) {
+ cf->free_bucket = PAGE_BUCKET_DEFAULT;
+ } else {
+ fprintf(stderr, "Unknown extstore bucket: %s\n", p);
+ goto error;
+ }
+ } else {
+ // TODO: is this necessary?
+ cf->free_bucket = PAGE_BUCKET_DEFAULT;
+ }
+
+ // TODO: disabling until compact algorithm is improved.
+ if (cf->free_bucket != PAGE_BUCKET_DEFAULT) {
+ fprintf(stderr, "ext_path only presently supports the default bucket\n");
+ goto error;
+ }
+
+ return cf;
+error:
+ if (cf) {
+ if (cf->file)
+ free(cf->file);
+ free(cf);
+ }
+ return NULL;
+}
+
#endif
diff --git a/storage.h b/storage.h
index 875962c..60f499b 100644
--- a/storage.h
+++ b/storage.h
@@ -7,6 +7,7 @@ void storage_write_resume(void);
int start_storage_compact_thread(void *arg);
void storage_compact_pause(void);
void storage_compact_resume(void);
+struct extstore_conf_file *storage_conf_parse(char *arg, unsigned int page_size);
// Ignore pointers and header bits from the CRC
#define STORE_OFFSET offsetof(item, nbytes)
diff --git a/t/binary-extstore.t b/t/binary-extstore.t
index c71a81f..676d13b 100755
--- a/t/binary-extstore.t
+++ b/t/binary-extstore.t
@@ -17,7 +17,7 @@ if (!supports_extstore()) {
$ext_path = "/tmp/extstore.$$";
-my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path,no_lru_crawler,slab_automove=0");
+my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path:64m,no_lru_crawler,slab_automove=0");
ok($server, "started the server");
# Based almost 100% off testClient.py which is:
diff --git a/t/chunked-extstore.t b/t/chunked-extstore.t
index 30645da..3515f21 100644
--- a/t/chunked-extstore.t
+++ b/t/chunked-extstore.t
@@ -18,7 +18,7 @@ if (!supports_extstore()) {
$ext_path = "/tmp/extstore.$$";
-my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path,slab_chunk_max=16384,slab_automove=0,ext_compact_under=1");
+my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path:64m,slab_chunk_max=16384,slab_automove=0,ext_compact_under=1");
my $sock = $server->sock;
# Wait until all items have flushed
diff --git a/t/error-extstore.t b/t/error-extstore.t
index 623ca80..6df1528 100644
--- a/t/error-extstore.t
+++ b/t/error-extstore.t
@@ -20,7 +20,7 @@ if (!supports_extstore()) {
$ext_path = "/tmp/extstore.$$";
-my $server = new_memcached("-m 64 -I 4m -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=8,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path,slab_automove=0,ext_compact_under=1");
+my $server = new_memcached("-m 64 -I 4m -U 0 -o ext_page_size=8,ext_wbuf_size=8,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path:64m,slab_automove=0,ext_compact_under=1");
my $sock = $server->sock;
# Wait until all items have flushed
diff --git a/t/extstore-buckets.t b/t/extstore-buckets.t
index a2c1c90..e3027ad 100644
--- a/t/extstore-buckets.t
+++ b/t/extstore-buckets.t
@@ -17,7 +17,7 @@ if (!supports_extstore()) {
$ext_path = "/tmp/extstore.$$";
-my $server = new_memcached("-m 256 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0,ext_path=$ext_path,ext_low_ttl=60,slab_automove=1");
+my $server = new_memcached("-m 256 -U 0 -o ext_page_size=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0,ext_path=$ext_path:64m,ext_low_ttl=60,slab_automove=1");
my $sock = $server->sock;
my $value;
diff --git a/t/extstore-jbod.t b/t/extstore-jbod.t
new file mode 100644
index 0000000..1618803
--- /dev/null
+++ b/t/extstore-jbod.t
@@ -0,0 +1,69 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use Test::More;
+use FindBin qw($Bin);
+use lib "$Bin/lib";
+use MemcachedTest;
+use Data::Dumper qw/Dumper/;
+
+my $ext_path;
+my $ext_path2;
+
+if (!supports_extstore()) {
+ plan skip_all => 'extstore not enabled';
+ exit 0;
+}
+
+$ext_path = "/tmp/extstore1.$$";
+$ext_path2 = "/tmp/extstore2.$$";
+
+my $server = new_memcached("-m 256 -U 0 -o ext_page_size=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path:64m,ext_path=$ext_path2:96m,slab_automove=1");
+my $sock = $server->sock;
+
+my $value;
+{
+ my @chars = ("C".."Z");
+ for (1 .. 20000) {
+ $value .= $chars[rand @chars];
+ }
+}
+
+# fill some larger objects
+{
+ # interleave sets with 0 ttl vs long ttl's.
+ my $keycount = 3700;
+ for (1 .. $keycount) {
+ print $sock "set nfoo$_ 0 0 20000 noreply\r\n$value\r\n";
+ print $sock "set lfoo$_ 0 0 20000 noreply\r\n$value\r\n";
+ }
+ # wait for a flush
+ wait_ext_flush($sock);
+ # delete half
+ mem_get_is($sock, "nfoo1", $value);
+ for (1 .. $keycount) {
+ print $sock "delete lfoo$_ noreply\r\n";
+ }
+ print $sock "lru_crawler crawl all\r\n";
+ <$sock>;
+ sleep 10;
+ # fetch
+ # check extstore counters
+ my $stats = mem_stats($sock);
+ is($stats->{evictions}, 0, 'no RAM evictions');
+ cmp_ok($stats->{extstore_page_allocs}, '>', 0, 'at least one page allocated');
+ cmp_ok($stats->{extstore_objects_written}, '>', $keycount / 2, 'some objects written');
+ cmp_ok($stats->{extstore_bytes_written}, '>', length($value) * 2, 'some bytes written');
+ cmp_ok($stats->{get_extstore}, '>', 0, 'one object was fetched');
+ cmp_ok($stats->{extstore_objects_read}, '>', 0, 'one object read');
+ cmp_ok($stats->{extstore_bytes_read}, '>', length($value), 'some bytes read');
+ cmp_ok($stats->{extstore_page_reclaims}, '>', 1, 'at least two pages reclaimed');
+}
+
+done_testing();
+
+END {
+ unlink $ext_path if $ext_path;
+ unlink $ext_path2 if $ext_path2;
+}
diff --git a/t/extstore.t b/t/extstore.t
index 782eaaa..1790a54 100644
--- a/t/extstore.t
+++ b/t/extstore.t
@@ -17,7 +17,7 @@ if (!supports_extstore()) {
$ext_path = "/tmp/extstore.$$";
-my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_page_count=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path,slab_automove=0,ext_compact_under=1");
+my $server = new_memcached("-m 64 -U 0 -o ext_page_size=8,ext_wbuf_size=2,ext_threads=1,ext_io_depth=2,ext_item_size=512,ext_item_age=2,ext_recache_rate=10000,ext_max_frag=0.9,ext_path=$ext_path:64m,slab_automove=0,ext_compact_under=1");
my $sock = $server->sock;
# Wait until all items have flushed
diff --git a/t/lib/MemcachedTest.pm b/t/lib/MemcachedTest.pm
index 4e1da66..416daaf 100644
--- a/t/lib/MemcachedTest.pm
+++ b/t/lib/MemcachedTest.pm
@@ -14,13 +14,33 @@ my $builddir = getcwd;
my @unixsockets = ();
@EXPORT = qw(new_memcached sleep mem_get_is mem_gets mem_gets_is mem_stats
- supports_sasl free_port supports_drop_priv supports_extstore);
+ supports_sasl free_port supports_drop_priv supports_extstore
+ wait_ext_flush);
sub sleep {
my $n = shift;
select undef, undef, undef, $n;
}
+# Wait until all items have flushed
+sub wait_ext_flush {
+ my $sock = shift;
+ my $target = shift || 0;
+ my $sum = $target + 1;
+ while ($sum > $target) {
+ my $s = mem_stats($sock, "items");
+ $sum = 0;
+ for my $key (keys %$s) {
+ if ($key =~ m/items:(\d+):number/) {
+ # Ignore classes which can contain extstore items
+ next if $1 < 3;
+ $sum += $s->{$key};
+ }
+ }
+ sleep 1 if $sum > $target;
+ }
+}
+
sub mem_stats {
my ($sock, $type) = @_;
$type = $type ? " $type" : "";