summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordormando <dormando@rydia.net>2020-11-09 15:47:39 -0800
committerdormando <dormando@rydia.net>2020-11-20 12:19:56 -0800
commitb2cee81823ba32adf09a6666d92aee56a8d98056 (patch)
treec7e7fb4763106f2d8860eda60f34742e4f184f2f
parent2896cc6c49c2bcd064e00da8bdf0dcaf1a481e73 (diff)
downloadmemcached-b2cee81823ba32adf09a6666d92aee56a8d98056.tar.gz
item crawler hash table walk mode
specifying 'hash' instead of 'all' will make the LRU crawler iterate over ever bucket in the hash table once, instead of what's in the LRU. This also doesn't suffer from missing items because of LRU reordering or high lock contention.
-rw-r--r--assoc.c69
-rw-r--r--assoc.h5
-rw-r--r--crawler.c101
-rw-r--r--doc/protocol.txt7
-rw-r--r--t/lru-crawler.t41
5 files changed, 208 insertions, 15 deletions
diff --git a/assoc.c b/assoc.c
index 1a6b845..64405a6 100644
--- a/assoc.c
+++ b/assoc.c
@@ -292,3 +292,72 @@ void stop_assoc_maintenance_thread() {
pthread_join(maintenance_tid, NULL);
}
+struct assoc_iterator {
+ unsigned int bucket;
+ bool bucket_locked;
+ item *it;
+ item *next;
+};
+
+void *assoc_get_iterator(void) {
+ struct assoc_iterator *iter = calloc(1, sizeof(struct assoc_iterator));
+ if (iter == NULL) {
+ return NULL;
+ }
+ // this will hang the caller while a hash table expansion is running.
+ mutex_lock(&maintenance_lock);
+ return iter;
+}
+
+bool assoc_iterate(void *iterp, item **it) {
+ struct assoc_iterator *iter = (struct assoc_iterator *) iterp;
+ *it = NULL;
+ // - if locked bucket and next, update next and return
+ if (iter->bucket_locked) {
+ if (iter->next != NULL) {
+ iter->it = iter->next;
+ iter->next = iter->it->h_next;
+ *it = iter->it;
+ } else {
+ // unlock previous bucket, if any
+ item_unlock(iter->bucket);
+ // iterate the bucket post since it starts at 0.
+ iter->bucket++;
+ iter->bucket_locked = false;
+ *it = NULL;
+ }
+ return true;
+ }
+
+ // - loop until we hit the end or find something.
+ if (iter->bucket != hashsize(hashpower)) {
+ // - lock next bucket
+ item_lock(iter->bucket);
+ iter->bucket_locked = true;
+ // - only check the primary hash table since expand is blocked.
+ iter->it = primary_hashtable[iter->bucket];
+ if (iter->it != NULL) {
+ // - set it, next and return
+ iter->next = iter->it->h_next;
+ *it = iter->it;
+ } else {
+ // - nothing found in this bucket, try next.
+ item_unlock(iter->bucket);
+ iter->bucket_locked = false;
+ iter->bucket++;
+ }
+ } else {
+ return false;
+ }
+
+ return true;
+}
+
+void assoc_iterate_final(void *iterp) {
+ struct assoc_iterator *iter = (struct assoc_iterator *) iterp;
+ if (iter->bucket_locked) {
+ item_unlock(iter->bucket);
+ }
+ mutex_unlock(&maintenance_lock);
+ free(iter);
+}
diff --git a/assoc.h b/assoc.h
index fb2c922..457ca25 100644
--- a/assoc.h
+++ b/assoc.h
@@ -7,5 +7,10 @@ void do_assoc_move_next_bucket(void);
int start_assoc_maintenance_thread(void);
void stop_assoc_maintenance_thread(void);
void assoc_start_expand(uint64_t curr_items);
+/* walk functions */
+void *assoc_get_iterator(void);
+bool assoc_iterate(void *iterp, item **it);
+void assoc_iterate_final(void *iterp);
+
extern unsigned int hashpower;
extern unsigned int item_lock_hashpower;
diff --git a/crawler.c b/crawler.c
index a546c39..5a24cf5 100644
--- a/crawler.c
+++ b/crawler.c
@@ -349,6 +349,62 @@ static void lru_crawler_class_done(int i) {
active_crawler_mod.mod->doneclass(&active_crawler_mod, i);
}
+static void item_crawl_hash(void) {
+ // get iterator from assoc. can hang for a long time.
+ // - blocks hash expansion
+ void *iter = assoc_get_iterator();
+ int crawls_persleep = settings.crawls_persleep;
+ item *it = NULL;
+
+ // loop while iterator returns something
+ // - iterator func handles bucket-walking
+ // - iterator returns with bucket locked.
+ while (assoc_iterate(iter, &it)) {
+ // if iterator returns true but no item, we're inbetween buckets and
+ // can do sleep or cleanup work without holding a lock.
+ if (it == NULL) {
+ // - sleep bits from orig loop
+ if (crawls_persleep-- <= 0 && settings.lru_crawler_sleep) {
+ pthread_mutex_unlock(&lru_crawler_lock);
+ usleep(settings.lru_crawler_sleep);
+ pthread_mutex_lock(&lru_crawler_lock);
+ crawls_persleep = settings.crawls_persleep;
+ } else if (!settings.lru_crawler_sleep) {
+ // TODO: only cycle lock every N?
+ pthread_mutex_unlock(&lru_crawler_lock);
+ pthread_mutex_lock(&lru_crawler_lock);
+ }
+ continue;
+ }
+
+ /* Get memory from bipbuf, if client has no space, flush. */
+ if (active_crawler_mod.c.c != NULL) {
+ int ret = lru_crawler_client_getbuf(&active_crawler_mod.c);
+ if (ret != 0) {
+ // fail out and finalize.
+ break;
+ }
+ } else if (active_crawler_mod.mod->needs_client) {
+ // fail out and finalize.
+ break;
+ }
+
+ // double check that the item isn't in a transitional state.
+ if (refcount_incr(it) < 2) {
+ refcount_decr(it);
+ continue;
+ }
+
+ // FIXME: missing hv and i are fine for metadump eval, but not fine
+ // for expire eval.
+ active_crawler_mod.mod->eval(&active_crawler_mod, it, 0, 0);
+ }
+
+ // must finalize or we leave the hash table expansion blocked.
+ assoc_iterate_final(iter);
+ return;
+}
+
static void *item_crawler_thread(void *arg) {
int i;
int crawls_persleep = settings.crawls_persleep;
@@ -361,6 +417,10 @@ static void *item_crawler_thread(void *arg) {
while (do_run_lru_crawler_thread) {
pthread_cond_wait(&lru_crawler_cond, &lru_crawler_lock);
+ if (crawler_count == -1) {
+ item_crawl_hash();
+ crawler_count = 0;
+ } else {
while (crawler_count) {
item *search = NULL;
void *hold_lock = NULL;
@@ -434,7 +494,8 @@ static void *item_crawler_thread(void *arg) {
pthread_mutex_lock(&lru_crawler_lock);
}
}
- }
+ } // while
+ } // if crawler_count
if (active_crawler_mod.mod != NULL) {
if (active_crawler_mod.mod->finalize != NULL)
@@ -553,12 +614,6 @@ static int do_lru_crawler_start(uint32_t id, uint32_t remaining) {
starts++;
}
pthread_mutex_unlock(&lru_locks[sid]);
- if (starts) {
- STATS_LOCK();
- stats_state.lru_crawler_running = true;
- stats.lru_crawler_starts++;
- STATS_UNLOCK();
- }
return starts;
}
@@ -604,6 +659,12 @@ int lru_crawler_start(uint8_t *ids, uint32_t remaining,
return -1;
}
+ /* hash table walk only supported with metadump for now. */
+ if (type != CRAWLER_METADUMP && ids == NULL) {
+ pthread_mutex_unlock(&lru_crawler_lock);
+ return -2;
+ }
+
/* Configure the module */
if (!is_running) {
assert(crawler_mod_regs[type] != NULL);
@@ -624,12 +685,25 @@ int lru_crawler_start(uint8_t *ids, uint32_t remaining,
}
}
- /* we allow the autocrawler to restart sub-LRU's before completion */
- for (int sid = POWER_SMALLEST; sid < POWER_LARGEST; sid++) {
- if (ids[sid])
- starts += do_lru_crawler_start(sid, remaining);
+ if (ids == NULL) {
+ /* NULL ids means to walk the hash table instead. */
+ starts = 1;
+ /* FIXME: hack to signal hash mode to the crawler thread.
+ * Something more clear would be nice.
+ */
+ crawler_count = -1;
+ } else {
+ /* we allow the autocrawler to restart sub-LRU's before completion */
+ for (int sid = POWER_SMALLEST; sid < POWER_LARGEST; sid++) {
+ if (ids[sid])
+ starts += do_lru_crawler_start(sid, remaining);
+ }
}
if (starts) {
+ STATS_LOCK();
+ stats_state.lru_crawler_running = true;
+ stats.lru_crawler_starts++;
+ STATS_UNLOCK();
pthread_cond_signal(&lru_crawler_cond);
}
pthread_mutex_unlock(&lru_crawler_lock);
@@ -645,6 +719,7 @@ enum crawler_result_type lru_crawler_crawl(char *slabs, const enum crawler_run_t
uint32_t sid = 0;
int starts = 0;
uint8_t tocrawl[POWER_LARGEST];
+ bool hash_crawl = false;
/* FIXME: I added this while debugging. Don't think it's needed? */
memset(tocrawl, 0, sizeof(uint8_t) * POWER_LARGEST);
@@ -652,6 +727,8 @@ enum crawler_result_type lru_crawler_crawl(char *slabs, const enum crawler_run_t
for (sid = 0; sid < POWER_LARGEST; sid++) {
tocrawl[sid] = 1;
}
+ } else if (strcmp(slabs, "hash") == 0) {
+ hash_crawl = true;
} else {
for (char *p = strtok_r(slabs, ",", &b);
p != NULL;
@@ -669,7 +746,7 @@ enum crawler_result_type lru_crawler_crawl(char *slabs, const enum crawler_run_t
}
}
- starts = lru_crawler_start(tocrawl, remaining, type, NULL, c, sfd);
+ starts = lru_crawler_start(hash_crawl ? NULL : tocrawl, remaining, type, NULL, c, sfd);
if (starts == -1) {
return CRAWLER_RUNNING;
} else if (starts == -2) {
diff --git a/doc/protocol.txt b/doc/protocol.txt
index c4cca71..9a48aa7 100644
--- a/doc/protocol.txt
+++ b/doc/protocol.txt
@@ -1071,13 +1071,18 @@ The response line could be one of:
- "BADCLASS [message]" to indicate an invalid class was specified.
-lru_crawler metadump <classid,classid,classid|all>
+lru_crawler metadump <classid,classid,classid|all|hash>
- Similar in function to the above "lru_crawler crawl" command, this function
outputs one line for every valid item found in the matching slab classes.
Similar to "cachedump", but does not lock the cache and can return all
items, not just 1MB worth.
+ if "hash" is specified instead of a classid or "all", the crawler will dump
+ items by directly walking the hash table instead of the LRU's. This makes it
+ more likely all items will be visited once as LRU reordering and locking can
+ cause frequently accessed items to be missed.
+
Lines are in "key=value key2=value2" format, with value being URI encoded
(ie: %20 for a space).
diff --git a/t/lru-crawler.t b/t/lru-crawler.t
index 2075236..eab2e6f 100644
--- a/t/lru-crawler.t
+++ b/t/lru-crawler.t
@@ -2,7 +2,7 @@
use strict;
use warnings;
-use Test::More tests => 224;
+use Test::More tests => 70257;
use FindBin qw($Bin);
use lib "$Bin/lib";
use MemcachedTest;
@@ -77,7 +77,7 @@ while (1) {
/^(key=) (\S+).*([^\r\n]+)/;
$count++;
}
- is ($count, 60);
+ is ($count, 60, "metadump all returns all items");
}
for (1 .. 30) {
@@ -86,6 +86,43 @@ for (1 .. 30) {
mem_get_is($sock, "sfoo$_", undef);
}
+# add a few more items into a different slab class
+my $mfdata = 'x' x 512;
+for (1 .. 30) {
+ print $sock "set mfoo$_ 0 0 512\r\n$mfdata\r\n";
+ is(scalar <$sock>, "STORED\r\n", "stored key");
+}
+
+# set enough small values to ensure bucket chaining happens
+# ... but not enough that hash table expansion happens.
+# TODO: check hash power level?
+my %bfoo = ();
+for (1 .. 70000) {
+ print $sock "set bfoo$_ 0 0 1 noreply\r\nz\r\n";
+ $bfoo{$_} = 1;
+}
+{
+ print $sock "version\r\n";
+ my $res = <$sock>;
+ like($res, qr/^VERSION/, "bulk sets completed");
+}
+
+# Check metadump hash table walk returns correct number of items.
+{
+ print $sock "lru_crawler metadump hash\r\n";
+ my $count = 0;
+ while (<$sock>) {
+ last if /^(\.|END)/;
+ if (/^key=bfoo(\S+)/) {
+ ok(exists $bfoo{$1}, "found bfoo key $1 is still in test hash");
+ delete $bfoo{$1};
+ }
+ $count++;
+ }
+ is ($count, 70090, "metadump hash returns all items");
+ is ((keys %bfoo), 0, "metadump found all bfoo keys");
+}
+
print $sock "lru_crawler disable\r\n";
is(scalar <$sock>, "OK\r\n", "disabled lru crawler");
my $settings_match = 0;