summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordormando <dormando@rydia.net>2023-02-18 22:52:41 -0800
committerdormando <dormando@rydia.net>2023-02-27 21:32:08 -0800
commitd2e0d66899cc41b2a77ed15b715cad5dfe0f7740 (patch)
tree765af8f16a63d63cfa52f17fdccddccb87f70065
parenta22be2bdbff1bb80be87071aad1caf648d15722b (diff)
downloadmemcached-d2e0d66899cc41b2a77ed15b715cad5dfe0f7740.tar.gz
crawler: add "lru_crawler mgdump" command
The "metadump" command was designed primarily for doing analysis on what's in cache, but it's also used for pulling the data out for various reasons. The string format is a bit onerous: key=value (for futureproofing) and URI encoded keys (which may or may not be binary internally) This adds a command "mgdump", which dumps keys in the format: "mg key\r\nmg key2\r\n" if a key is binary encoded, it uses the meta binary encoding scheme of base64-ing keys and appends a "b" flag: "mg 44OG44K544OI b\r\n" when the dump is complete it prints an "EN\r\n" clients wishing to stream or fetch data can take the mg commands, strip the \r\n, append any flags they care about, then send the command back to the server to fetch the full key data. This seems to use 30-40% less CPU time on the server for the same key dumps.
-rw-r--r--crawler.c58
-rw-r--r--memcached.h2
-rw-r--r--proto_text.c35
3 files changed, 91 insertions, 4 deletions
diff --git a/crawler.c b/crawler.c
index f333585..e360081 100644
--- a/crawler.c
+++ b/crawler.c
@@ -22,6 +22,8 @@
#include <unistd.h>
#include <poll.h>
+#include "base64.h"
+
#define LARGEST_ID POWER_LARGEST
typedef struct {
@@ -81,10 +83,23 @@ crawler_module_reg_t crawler_metadump_mod = {
.needs_client = true
};
-crawler_module_reg_t *crawler_mod_regs[3] = {
+static void crawler_mgdump_eval(crawler_module_t *cm, item *search, uint32_t hv, int i);
+static void crawler_mgdump_finalize(crawler_module_t *cm);
+
+crawler_module_reg_t crawler_mgdump_mod = {
+ .init = NULL,
+ .eval = crawler_mgdump_eval,
+ .doneclass = NULL,
+ .finalize = crawler_mgdump_finalize,
+ .needs_lock = false,
+ .needs_client = true
+};
+
+crawler_module_reg_t *crawler_mod_regs[4] = {
&crawler_expired_mod,
&crawler_expired_mod,
- &crawler_metadump_mod
+ &crawler_metadump_mod,
+ &crawler_mgdump_mod,
};
static int lru_crawler_write(crawler_client_t *c);
@@ -282,6 +297,43 @@ static void crawler_metadump_finalize(crawler_module_t *cm) {
}
}
+static void crawler_mgdump_eval(crawler_module_t *cm, item *it, uint32_t hv, int i) {
+ int is_flushed = item_is_flushed(it);
+ /* Ignore expired content. */
+ if ((it->exptime != 0 && it->exptime < current_time)
+ || is_flushed) {
+ refcount_decr(it);
+ return;
+ }
+
+ char *p = cm->c.buf + cm->c.bufused; // buffer offset.
+ char *start = p;
+ memcpy(p, "mg ", 3);
+ p += 3;
+ if (it->it_flags & ITEM_KEY_BINARY) {
+ p += base64_encode((unsigned char *) ITEM_key(it), it->nkey, (unsigned char*) p, LRU_CRAWLER_MINBUFSPACE/2);
+ memcpy(p, " b\r\n", 4);
+ p += 4;
+ } else {
+ memcpy(p, ITEM_key(it), it->nkey);
+ p += it->nkey;
+ memcpy(p, "\r\n", 2);
+ p += 2;
+ }
+ int total = p - start;
+
+ refcount_decr(it);
+ cm->c.bufused += total;
+}
+
+static void crawler_mgdump_finalize(crawler_module_t *cm) {
+ if (cm->c.c != NULL) {
+ lru_crawler_write(&cm->c); // empty the write buffer
+ memcpy(cm->c.buf, "EN\r\n", 4);
+ cm->c.bufused += 4;
+ }
+}
+
// write the whole buffer out to the client socket.
static int lru_crawler_write(crawler_client_t *c) {
unsigned int data_size = c->bufused;
@@ -687,7 +739,7 @@ int lru_crawler_start(uint8_t *ids, uint32_t remaining,
}
/* hash table walk only supported with metadump for now. */
- if (type != CRAWLER_METADUMP && ids == NULL) {
+ if (ids == NULL && type != CRAWLER_METADUMP && type != CRAWLER_MGDUMP) {
pthread_mutex_unlock(&lru_crawler_lock);
return -2;
}
diff --git a/memcached.h b/memcached.h
index 79b9a81..f5d801e 100644
--- a/memcached.h
+++ b/memcached.h
@@ -602,7 +602,7 @@ typedef struct _stritem {
// TODO: If we eventually want user loaded modules, we can't use an enum :(
enum crawler_run_type {
- CRAWLER_AUTOEXPIRE=0, CRAWLER_EXPIRED, CRAWLER_METADUMP
+ CRAWLER_AUTOEXPIRE=0, CRAWLER_EXPIRED, CRAWLER_METADUMP, CRAWLER_MGDUMP
};
typedef struct {
diff --git a/proto_text.c b/proto_text.c
index adb38a6..94a556f 100644
--- a/proto_text.c
+++ b/proto_text.c
@@ -2649,6 +2649,41 @@ static void process_lru_crawler_command(conn *c, token_t *tokens, const size_t n
break;
}
return;
+ } else if (ntokens == 4 && strcmp(tokens[COMMAND_TOKEN + 1].value, "mgdump") == 0) {
+ if (settings.lru_crawler == false) {
+ out_string(c, "CLIENT_ERROR lru crawler disabled");
+ return;
+ }
+ if (!settings.dump_enabled) {
+ out_string(c, "ERROR key dump not allowed");
+ return;
+ }
+ if (resp_has_stack(c)) {
+ out_string(c, "ERROR cannot pipeline other commands before mgdump");
+ return;
+ }
+
+ int rv = lru_crawler_crawl(tokens[2].value, CRAWLER_MGDUMP,
+ c, c->sfd, LRU_CRAWLER_CAP_REMAINING);
+ switch(rv) {
+ case CRAWLER_OK:
+ conn_set_state(c, conn_watch);
+ event_del(&c->event);
+ break;
+ case CRAWLER_RUNNING:
+ out_string(c, "BUSY currently processing crawler request");
+ break;
+ case CRAWLER_BADCLASS:
+ out_string(c, "BADCLASS invalid class id");
+ break;
+ case CRAWLER_NOTSTARTED:
+ out_string(c, "NOTSTARTED no items to crawl");
+ break;
+ case CRAWLER_ERROR:
+ out_string(c, "ERROR an unknown error happened");
+ break;
+ }
+ return;
} else if (ntokens == 4 && strcmp(tokens[COMMAND_TOKEN + 1].value, "tocrawl") == 0) {
uint32_t tocrawl;
if (!safe_strtoul(tokens[2].value, &tocrawl)) {