6 files changed, 300 insertions, 29 deletions
diff --git a/Makefile.am b/Makefile.am
index 8ce0c2c..cf4b31c 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -20,6 +20,7 @@ memcached_SOURCES = memcached.c memcached.h \
                     util.c util.h \
                     trace.h cache.c cache.h sasl_defs.h \
                     bipbuffer.c bipbuffer.h \
+                    base64.c base64.h \
                     logger.c logger.h \
                     crawler.c crawler.h \
                     itoa_ljust.c itoa_ljust.h \
diff --git a/base64.c b/base64.c
new file mode 100644
index 0000000..420ac4b
--- /dev/null
+++ b/base64.c
@@ -0,0 +1,205 @@
+/*
+ * Base64 encoding/decoding (RFC1341)
+ * Copyright (c) 2005-2011, Jouni Malinen <j@w1.fi>
+ * Modified by Dormando
+ *
+ * This software may be distributed under the terms of the BSD license.
+ * Original license included below:
+ *
+License
+-------
+
+This software may be distributed, used, and modified under the terms of
+BSD license:
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+3. Neither the name(s) of the above-listed copyright holder(s) nor the
+   names of its contributors may be used to endorse or promote products
+   derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Changes from original code:
+ * - decode table is pre-generated
+ * - no line splitting on encoder
+ * - output buffers are passed in instead of malloc'ed
+ * - returns encoded/decoded length instead of pointer.
+ */
+
+#include <stddef.h>
+#include "base64.h"
+
+static const unsigned char base64_table[65] =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+/* Original decode function generated the table every time. I used the code to
+ * print this table and pre-generate it instead.
+ */
+static const unsigned char dtable[256] = {
+128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+62, 128, 128, 128, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+61, 128, 128, 128, 0, 128, 128, 128, 0, 1, 2, 3, 4, 5,
+6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+20, 21, 22, 23, 24, 25, 128, 128, 128, 128, 128, 128, 26, 27,
+28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
+42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 128, 128, 128, 128,
+128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+128, 128, 128
+};
+
+/**
+ * base64_encode - Base64 encode
+ * @src: Data to be encoded
+ * @len: Length of the data to be encoded
+ * @out: output uffer
+ * @out_len: length of output buffer
+ * Returns: Number of actual bytes encoded into the buffer
+ * or 0 on failure
+ *
+ * Output buffer is nul terminated to make it easier to use as a C string.
+ * The nul terminator is * not included in the return length.
+ */
+size_t base64_encode(const unsigned char *src, size_t len,
+                  unsigned char *out, size_t out_len)
+{
+    unsigned char *pos;
+    const unsigned char *end, *in;
+    size_t olen;
+
+    olen = len * 4 / 3 + 4; /* 3-byte blocks to 4-byte */
+    olen += olen / 72; /* line feeds */
+    olen++; /* nul termination */
+    if (olen < len) {
+        return 0; /* integer overflow */
+    }
+    if (olen > out_len) {
+        return 0; /* not enough space in output buffer */
+    }
+    if (out == NULL) {
+        return 0;
+    }
+
+    end = src + len;
+    in = src;
+    pos = out;
+    while (end - in >= 3) {
+        *pos++ = base64_table[in[0] >> 2];
+        *pos++ = base64_table[((in[0] & 0x03) << 4) | (in[1] >> 4)];
+        *pos++ = base64_table[((in[1] & 0x0f) << 2) | (in[2] >> 6)];
+        *pos++ = base64_table[in[2] & 0x3f];
+        in += 3;
+    }
+
+    if (end - in) {
+        *pos++ = base64_table[in[0] >> 2];
+        if (end - in == 1) {
+            *pos++ = base64_table[(in[0] & 0x03) << 4];
+            *pos++ = '=';
+        } else {
+            *pos++ = base64_table[((in[0] & 0x03) << 4) |
+                          (in[1] >> 4)];
+            *pos++ = base64_table[(in[1] & 0x0f) << 2];
+        }
+        *pos++ = '=';
+    }
+
+    *pos = '\0';
+    return pos - out;
+}
+
+
+/**
+ * base64_decode - Base64 decode
+ * @src: Data to be decoded
+ * @len: Length of the data to be decoded
+ * @out: Output buffer to decode into
+ * @out_len: Length of output buffer
+ * Returns: Length of encoded data, or 0 on failure
+ */
+size_t base64_decode(const unsigned char *src, size_t len,
+                  unsigned char *out, size_t out_len)
+{
+    unsigned char *pos, block[4], tmp;
+    size_t i, count, olen;
+    int pad = 0;
+
+    count = 0;
+    for (i = 0; i < len; i++) {
+        if (dtable[src[i]] != 0x80)
+            count++;
+    }
+
+    if (count == 0 || count % 4)
+        return 0;
+
+    olen = count / 4 * 3;
+    if (olen > out_len) {
+        return 0;
+    }
+    pos = out;
+    if (out == NULL) {
+        return 0;
+    }
+
+    count = 0;
+    for (i = 0; i < len; i++) {
+        tmp = dtable[src[i]];
+        if (tmp == 0x80)
+            continue;
+
+        if (src[i] == '=')
+            pad++;
+        block[count] = tmp;
+        count++;
+        if (count == 4) {
+            *pos++ = (block[0] << 2) | (block[1] >> 4);
+            *pos++ = (block[1] << 4) | (block[2] >> 2);
+            *pos++ = (block[2] << 6) | block[3];
+            count = 0;
+            if (pad) {
+                if (pad == 1)
+                    pos--;
+                else if (pad == 2)
+                    pos -= 2;
+                else {
+                    /* Invalid padding */
+                    return 0;
+                }
+                break;
+            }
+        }
+    }
+
+    return pos - out;
+}
diff --git a/base64.h b/base64.h
new file mode 100644
index 0000000..efbbbc8
--- /dev/null
+++ b/base64.h
@@ -0,0 +1,17 @@
+/*
+ * Base64 encoding/decoding (RFC1341)
+ * Copyright (c) 2005, Jouni Malinen <j@w1.fi>
+ *
+ * This software may be distributed under the terms of the BSD license.
+ * See base64.c for more details
+ */
+
+#ifndef BASE64_H
+#define BASE64_H
+
+size_t base64_encode(const unsigned char *src, size_t len,
+                  unsigned char *out, size_t out_len);
+size_t base64_decode(const unsigned char *src, size_t len,
+                  unsigned char *out, size_t out_len);
+
+#endif /* BASE64_H */
diff --git a/memcached.h b/memcached.h
index e07119a..e569ad5 100644
--- a/memcached.h
+++ b/memcached.h
@@ -520,6 +520,8 @@ extern struct settings settings;
 #define ITEM_TOKEN_RESERVED 1024
 /* if item has been marked as a stale value */
 #define ITEM_STALE 2048
+/* if item key was sent in binary */
+#define ITEM_KEY_BINARY 4096
 
 /**
  * Structure for storing items within memcached.
diff --git a/proto_text.c b/proto_text.c
index d060b17..ce1b030 100644
--- a/proto_text.c
+++ b/proto_text.c
@@ -7,6 +7,7 @@
 #include "proto_text.h"
 #include "authfile.h"
 #include "storage.h"
+#include "base64.h"
 #ifdef TLS
 #include "tls.h"
 #endif
@@ -744,8 +745,7 @@ static void process_stat(conn *c, token_t *tokens, const size_t ntokens) {
     }
 }
 
-
-
+// TODO: if 'b' after "ME key" decode the key.
 // slow snprintf for debugging purposes.
 static void process_meta_command(conn *c, token_t *tokens, const size_t ntokens) {
     assert(c != NULL);
@@ -808,6 +808,7 @@ struct _meta_flags {
     unsigned int no_reply :1;
     unsigned int has_cas :1;
     unsigned int new_ttl :1;
+    unsigned int key_binary:1;
     char mode; // single character mode switch, common to ms/ma
     rel_time_t exptime;
     rel_time_t autoviv_exptime;
@@ -822,6 +823,7 @@ struct _meta_flags {
 static int _meta_flag_preparse(token_t *tokens, const size_t ntokens,
         struct _meta_flags *of, char **errstr) {
     unsigned int i;
+    size_t ret;
     int32_t tmp_int;
     uint8_t seen[127] = {0};
     // Start just past the key token. Look at first character of each token.
@@ -834,6 +836,19 @@ static int _meta_flag_preparse(token_t *tokens, const size_t ntokens,
         }
         seen[o] = 1;
         switch (o) {
+            // base64 decode the key in-place, as the binary should always be
+            // shorter and the conversion code buffers bytes.
+            case 'b':
+                ret = base64_decode((unsigned char *)tokens[KEY_TOKEN].value, tokens[KEY_TOKEN].length,
+                            (unsigned char *)tokens[KEY_TOKEN].value, tokens[KEY_TOKEN].length);
+                if (ret == 0) {
+                    // Failed to decode
+                    *errstr = "CLIENT_ERROR error decoding key";
+                    of->has_error = 1;
+                }
+                tokens[KEY_TOKEN].length = ret;
+                of->key_binary = 1;
+                break;
             /* Negative exptimes can underflow and end up immortal. realtime() will
                immediately expire values that are greater than REALTIME_MAXDELTA, but less
                than process_started, so lets aim for that. */
@@ -961,6 +976,20 @@ static int _meta_flag_preparse(token_t *tokens, const size_t ntokens,
     p += 2; \
 }
 
+// TODO: calc bytes remaining in buffer
+#define META_KEY(p, key, nkey, bin) { \
+    META_CHAR(p, 'k'); \
+    if (!bin) { \
+        memcpy(p, key, nkey); \
+        p += nkey; \
+    } else { \
+        p += base64_encode((unsigned char *) key, nkey, (unsigned char *)p, WRITE_BUFFER_SIZE); \
+        *p = ' '; \
+        *(p+1) = 'b'; \
+        p += 2; \
+    } \
+}
+
 static void process_mget_command(conn *c, token_t *tokens, const size_t ntokens) {
     char *key;
     size_t nkey;
@@ -979,14 +1008,12 @@ static void process_mget_command(conn *c, token_t *tokens, const size_t ntokens)
     assert(c != NULL);
     WANT_TOKENS_MIN(ntokens, 3);
 
+    // FIXME: do we move this check to after preparse?
     if (tokens[KEY_TOKEN].length > KEY_MAX_LENGTH) {
         out_errstring(c, "CLIENT_ERROR bad command line format");
         return;
     }
 
-    key = tokens[KEY_TOKEN].value;
-    nkey = tokens[KEY_TOKEN].length;
-
     // NOTE: final token has length == 0.
     // KEY_TOKEN == 1. 0 is command.
 
@@ -1007,6 +1034,10 @@ static void process_mget_command(conn *c, token_t *tokens, const size_t ntokens)
     }
     c->noreply = of.no_reply;
 
+    // Grab key and length after meta preparsing in case it was decoded.
+    key = tokens[KEY_TOKEN].value;
+    nkey = tokens[KEY_TOKEN].length;
+
     // TODO: need to indicate if the item was overflowed or not?
     // I think we do, since an overflow shouldn't trigger an alloc/replace.
     bool overflow = false;
@@ -1128,9 +1159,7 @@ static void process_mget_command(conn *c, token_t *tokens, const size_t ntokens)
                     p += tokens[i].length;
                     break;
                 case 'k':
-                    META_CHAR(p, 'k');
-                    memcpy(p, ITEM_key(it), it->nkey);
-                    p += it->nkey;
+                    META_KEY(p, ITEM_key(it), it->nkey, (it->it_flags & ITEM_KEY_BINARY));
                     break;
             }
         }
@@ -1280,9 +1309,6 @@ static void process_mset_command(conn *c, token_t *tokens, const size_t ntokens)
         return;
     }
 
-    key = tokens[KEY_TOKEN].value;
-    nkey = tokens[KEY_TOKEN].length;
-
     if (ntokens == 3) {
         out_errstring(c, "CLIENT_ERROR bad command line format");
         return;
@@ -1294,6 +1320,7 @@ static void process_mset_command(conn *c, token_t *tokens, const size_t ntokens)
     }
 
     // leave space for the status code.
+    // FIXME: two spaces after the OK line :(
     p = resp->wbuf + 3;
 
     // We need to at least try to get the size to properly slurp bad bytes
@@ -1302,6 +1329,9 @@ static void process_mset_command(conn *c, token_t *tokens, const size_t ntokens)
         goto error;
     }
 
+    key = tokens[KEY_TOKEN].value;
+    nkey = tokens[KEY_TOKEN].length;
+
     // Set noreply after tokens are understood.
     c->noreply = of.no_reply;
 
@@ -1320,9 +1350,7 @@ static void process_mset_command(conn *c, token_t *tokens, const size_t ntokens)
                 p += tokens[i].length;
                 break;
             case 'k':
-                META_CHAR(p, 'k');
-                memcpy(p, key, nkey);
-                p += nkey;
+                META_KEY(p, key, nkey, of.key_binary);
                 break;
         }
     }
@@ -1408,6 +1436,12 @@ static void process_mset_command(conn *c, token_t *tokens, const size_t ntokens)
 #endif
     c->rlbytes = it->nbytes;
     c->cmd = comm;
+
+    // Prevent printing back the key in meta commands as garbage.
+    if (of.key_binary) {
+        it->it_flags |= ITEM_KEY_BINARY;
+    }
+
     if (of.set_stale && comm == NREAD_CAS) {
         c->set_stale = true;
     }
@@ -1452,21 +1486,22 @@ static void process_mdelete_command(conn *c, token_t *tokens, const size_t ntoke
         return;
     }
 
-    key = tokens[KEY_TOKEN].value;
-    nkey = tokens[KEY_TOKEN].length;
-
     if (ntokens > MFLAG_MAX_OPT_LENGTH) {
         out_string(c, "CLIENT_ERROR options flags too long");
         return;
     }
 
     // scrubs duplicated options and sets flags for how to load the item.
+    // FIXME: not using the preparse errstr?
     if (_meta_flag_preparse(tokens, ntokens, &of, &errstr) != 0) {
         out_errstring(c, "CLIENT_ERROR invalid or duplicate flag");
         return;
     }
     c->noreply = of.no_reply;
 
+    key = tokens[KEY_TOKEN].value;
+    nkey = tokens[KEY_TOKEN].length;
+
     assert(c != NULL);
     for (i = KEY_TOKEN+1; i < ntokens-1; i++) {
         switch (tokens[i].value[0]) {
@@ -1481,9 +1516,7 @@ static void process_mdelete_command(conn *c, token_t *tokens, const size_t ntoke
                 p += tokens[i].length;
                 break;
             case 'k':
-                META_CHAR(p, 'k');
-                memcpy(p, key, nkey);
-                p += nkey;
+                META_KEY(p, key, nkey, of.key_binary);
                 break;
         }
     }
@@ -1582,9 +1615,6 @@ static void process_marithmetic_command(conn *c, token_t *tokens, const size_t n
         return;
     }
 
-    key = tokens[KEY_TOKEN].value;
-    nkey = tokens[KEY_TOKEN].length;
-
     if (ntokens > MFLAG_MAX_OPT_LENGTH) {
         out_string(c, "CLIENT_ERROR options flags too long");
         return;
@@ -1597,6 +1627,9 @@ static void process_marithmetic_command(conn *c, token_t *tokens, const size_t n
     }
     c->noreply = of.no_reply;
 
+    key = tokens[KEY_TOKEN].value;
+    nkey = tokens[KEY_TOKEN].length;
+
     assert(c != NULL);
     // "mode switch" to alternative commands
     switch (of.mode) {
@@ -1727,9 +1760,7 @@ static void process_marithmetic_command(conn *c, token_t *tokens, const size_t n
                     p += tokens[i].length;
                     break;
                 case 'k':
-                    META_CHAR(p, 'k');
-                    memcpy(p, key, nkey);
-                    p += nkey;
+                    META_KEY(p, key, nkey, of.key_binary);
                     break;
             }
         }
@@ -1758,9 +1789,7 @@ static void process_marithmetic_command(conn *c, token_t *tokens, const size_t n
                     p += tokens[i].length;
                     break;
                 case 'k':
-                    META_CHAR(p, 'k');
-                    memcpy(p, key, nkey);
-                    p += nkey;
+                    META_KEY(p, key, nkey, of.key_binary);
                     break;
             }
         }
diff --git a/t/metaget.t b/t/metaget.t
index be85ff0..962dd3e 100644
--- a/t/metaget.t
+++ b/t/metaget.t
@@ -149,6 +149,23 @@ my $sock = $server->sock;
 }
 
 {
+    diag "encoded binary keys";
+    # 44OG44K544OI is "tesuto" in katakana
+    my $tesuto = "44OG44K544OI";
+    print $sock "ms $tesuto S2 b\r\npo\r\n";
+    like(scalar <$sock>, qr/^OK /, "set with encoded key");
+
+    my $res = mget($sock, $tesuto, 'v');
+    ok(! exists $res->{val}, "encoded key doesn't exist");
+    $res = mget($sock, $tesuto, 'b v k');
+    ok(exists $res->{val}, "decoded key exists");
+    ok(get_flag($res, 'k') eq $tesuto, "key returned encoded");
+
+    # TODO: test k is returned properly from ms.
+    # validate the store data is smaller somehow?
+}
+
+{
     diag "marithmetic tests";
     print $sock "ma mo\r\n";
     like(scalar <$sock>, qr/^NF/, "incr miss");