summaryrefslogtreecommitdiff
path: root/crawler.c
diff options
context:
space:
mode:
authordormando <dormando@rydia.net>2023-02-18 22:52:41 -0800
committerdormando <dormando@rydia.net>2023-02-27 21:32:08 -0800
commitd2e0d66899cc41b2a77ed15b715cad5dfe0f7740 (patch)
tree765af8f16a63d63cfa52f17fdccddccb87f70065 /crawler.c
parenta22be2bdbff1bb80be87071aad1caf648d15722b (diff)
downloadmemcached-d2e0d66899cc41b2a77ed15b715cad5dfe0f7740.tar.gz
crawler: add "lru_crawler mgdump" command
The "metadump" command was designed primarily for doing analysis on what's in cache, but it's also used for pulling the data out for various reasons. The string format is a bit onerous: key=value (for futureproofing) and URI encoded keys (which may or may not be binary internally) This adds a command "mgdump", which dumps keys in the format: "mg key\r\nmg key2\r\n" if a key is binary encoded, it uses the meta binary encoding scheme of base64-ing keys and appends a "b" flag: "mg 44OG44K544OI b\r\n" when the dump is complete it prints an "EN\r\n" clients wishing to stream or fetch data can take the mg commands, strip the \r\n, append any flags they care about, then send the command back to the server to fetch the full key data. This seems to use 30-40% less CPU time on the server for the same key dumps.
Diffstat (limited to 'crawler.c')
-rw-r--r--crawler.c58
1 files changed, 55 insertions, 3 deletions
diff --git a/crawler.c b/crawler.c
index f333585..e360081 100644
--- a/crawler.c
+++ b/crawler.c
@@ -22,6 +22,8 @@
#include <unistd.h>
#include <poll.h>
+#include "base64.h"
+
#define LARGEST_ID POWER_LARGEST
typedef struct {
@@ -81,10 +83,23 @@ crawler_module_reg_t crawler_metadump_mod = {
.needs_client = true
};
-crawler_module_reg_t *crawler_mod_regs[3] = {
+static void crawler_mgdump_eval(crawler_module_t *cm, item *search, uint32_t hv, int i);
+static void crawler_mgdump_finalize(crawler_module_t *cm);
+
+crawler_module_reg_t crawler_mgdump_mod = {
+ .init = NULL,
+ .eval = crawler_mgdump_eval,
+ .doneclass = NULL,
+ .finalize = crawler_mgdump_finalize,
+ .needs_lock = false,
+ .needs_client = true
+};
+
+crawler_module_reg_t *crawler_mod_regs[4] = {
&crawler_expired_mod,
&crawler_expired_mod,
- &crawler_metadump_mod
+ &crawler_metadump_mod,
+ &crawler_mgdump_mod,
};
static int lru_crawler_write(crawler_client_t *c);
@@ -282,6 +297,43 @@ static void crawler_metadump_finalize(crawler_module_t *cm) {
}
}
+static void crawler_mgdump_eval(crawler_module_t *cm, item *it, uint32_t hv, int i) {
+ int is_flushed = item_is_flushed(it);
+ /* Ignore expired content. */
+ if ((it->exptime != 0 && it->exptime < current_time)
+ || is_flushed) {
+ refcount_decr(it);
+ return;
+ }
+
+ char *p = cm->c.buf + cm->c.bufused; // buffer offset.
+ char *start = p;
+ memcpy(p, "mg ", 3);
+ p += 3;
+ if (it->it_flags & ITEM_KEY_BINARY) {
+ p += base64_encode((unsigned char *) ITEM_key(it), it->nkey, (unsigned char*) p, LRU_CRAWLER_MINBUFSPACE/2);
+ memcpy(p, " b\r\n", 4);
+ p += 4;
+ } else {
+ memcpy(p, ITEM_key(it), it->nkey);
+ p += it->nkey;
+ memcpy(p, "\r\n", 2);
+ p += 2;
+ }
+ int total = p - start;
+
+ refcount_decr(it);
+ cm->c.bufused += total;
+}
+
+static void crawler_mgdump_finalize(crawler_module_t *cm) {
+ if (cm->c.c != NULL) {
+ lru_crawler_write(&cm->c); // empty the write buffer
+ memcpy(cm->c.buf, "EN\r\n", 4);
+ cm->c.bufused += 4;
+ }
+}
+
// write the whole buffer out to the client socket.
static int lru_crawler_write(crawler_client_t *c) {
unsigned int data_size = c->bufused;
@@ -687,7 +739,7 @@ int lru_crawler_start(uint8_t *ids, uint32_t remaining,
}
/* hash table walk only supported with metadump for now. */
- if (type != CRAWLER_METADUMP && ids == NULL) {
+ if (ids == NULL && type != CRAWLER_METADUMP && type != CRAWLER_MGDUMP) {
pthread_mutex_unlock(&lru_crawler_lock);
return -2;
}