base64url{,d} & base32hex{,d} expansion operatorsrfc4648

We need ${base64url:....} to provide a sane / safe format for encoding data to be safe for filenames (and URLs) without containing a `/` character. This comes from RFC 4648. Expand base32 to include the hex variants while in there too: this alphabet (for character inputs) preserves sort order. Document them all. Add test suites. NB: have not invoked test suites (recent reinstall).
author: Phil Pennock <pdp@exim.org> 2016-10-18 17:38:05 -0400
committer: Phil Pennock <pdp@exim.org> 2016-10-19 01:31:22 -0400
commit: 2d3164b4f2943011d062e5e9d388b1a4bc0bf78d (patch)
tree: 14ebff9264f88b620563478464707d750ab0c299
parent: 8b0fb68e04323248df1208516e8a9293af9859d8 (diff)
download: exim4-2d3164b4f2943011d062e5e9d388b1a4bc0bf78d.tar.gz
6 files changed, 136 insertions, 22 deletions
diff --git a/doc/doc-docbook/spec.xfpt b/doc/doc-docbook/spec.xfpt
index 45d845718..805303740 100644
--- a/doc/doc-docbook/spec.xfpt
+++ b/doc/doc-docbook/spec.xfpt
@@ -10134,6 +10134,15 @@ Only lowercase letters are used.
 .cindex "expansion" "conversion to base 32"
 The string must consist entirely of base-32 digits.
 The number is converted to decimal and output as a string.
+
+.vitem &*${base32hex:*&<&'digits'&>&*}*&
+.cindex "&%base32hex%& expansion item"
+This is the same as &%base32%& but using the "Extended Hex Alphabet".
+See RFC 4648 for details.
+
+.vitem &*${base32hexd:*&<&'base-32-hex&~digits'&>&*}*&
+.cindex "&%base32hexd%& expansion item"
+This is the same as &%base32d%& but decoding from the "Extended Hex Alphabet".
 .wen
 
 .vitem &*${base62:*&<&'digits'&>&*}*&
@@ -10172,6 +10181,22 @@ returns the base64 encoding of the DER form of the certificate.
 This operator converts a base64-encoded string into the un-coded form.
 
 
+.new
+.vitem &*${base64url:*&<&'string'&>&*}*&
+.cindex "&%base64url%& expansion item"
+This is the same as &%base64%& but using the "URL and Filename Safe Alphabet".
+This replaces two characters in the encoded alphabet, including the solidus
+used as a directory separator on Unix, thus the result of this encoding can
+be safely used as a filename.
+See RFC 4648 for details.
+
+.vitem &*${base64urld:*&<&'string'&>&*}*&
+.cindex "&%base64urld%& expansion item"
+This is the same as &%base64d%& but decoding from the
+"URL and Filename Safe Alphabet".
+.wen
+
+
 .vitem &*${domain:*&<&'string'&>&*}*&
 .cindex "domain" "extraction"
 .cindex "expansion" "domain extraction"
diff --git a/doc/doc-txt/ChangeLog b/doc/doc-txt/ChangeLog
index 1267d75dc..526dd8f67 100644
--- a/doc/doc-txt/ChangeLog
+++ b/doc/doc-txt/ChangeLog
@@ -120,6 +120,9 @@ PP/01 Changed default Diffie-Hellman parameters to be Exim-specific, created
 PP/02 Unbreak build via pkg-config with new hash support when crypto headers
       are not in the system include path.
 
+PP/03 Add base64url and base64urld expansion operators, to use the URL and
+      Filename Safe Alphabet encodings.  Also base32hex/base32hexd.
+
 
 Exim version 4.87
 -----------------
diff --git a/doc/doc-txt/NewStuff b/doc/doc-txt/NewStuff
index cca958f79..40bb8000e 100644
--- a/doc/doc-txt/NewStuff
+++ b/doc/doc-txt/NewStuff
@@ -26,7 +26,8 @@ Version 4.88
     the queue to be used for a message.  A $queue_name variable gives
     visibility.
 
- 6. New expansion operators base32/base32d.
+ 6. New expansion operators base32/base32d, base32hex/base32hexd and
+    base64url/base64urld.
 
  7. The CHUNKING ESMTP extension from RFC 3030.  May give some slight
     performance increase and network load decrease.  Main config option
diff --git a/src/src/expand.c b/src/src/expand.c
index cfde23610..1319a8f54 100644
--- a/src/src/expand.c
+++ b/src/src/expand.c
@@ -201,10 +201,14 @@ static uschar *op_table_main[] = {
   US"addresses",
   US"base32",
   US"base32d",
+  US"base32hex",
+  US"base32hexd",
   US"base62",
   US"base62d",
   US"base64",
   US"base64d",
+  US"base64url",
+  US"base64urld",
   US"domain",
   US"escape",
   US"escape8bit",
@@ -247,10 +251,14 @@ enum {
   EOP_ADDRESSES,
   EOP_BASE32,
   EOP_BASE32D,
+  EOP_BASE32HEX,
+  EOP_BASE32HEXD,
   EOP_BASE62,
   EOP_BASE62D,
   EOP_BASE64,
   EOP_BASE64D,
+  EOP_BASE64URL,
+  EOP_BASE64URLD,
   EOP_DOMAIN,
   EOP_ESCAPE,
   EOP_ESCAPE8BIT,
@@ -845,7 +853,8 @@ static int utf8_table2[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
 
 
 
-static uschar * base32_chars = US"abcdefghijklmnopqrstuvwxyz234567";
+static const uschar * const base32_chars    = US"abcdefghijklmnopqrstuvwxyz234567";
+static const uschar * const base32hex_chars = US"0123456789ABCDEFGHIJKLMNOPQRSTUV";
 
 /*************************************************
 *           Binary chop search on a table        *
@@ -6233,6 +6242,7 @@ while (*s != 0)
       case EOP_SHA1:
       case EOP_SHA256:
       case EOP_BASE64:
+      case EOP_BASE64URL:
 	if (s[1] == '$')
 	  {
 	  const uschar * s1 = s;
@@ -6274,40 +6284,48 @@ while (*s != 0)
     switch(c)
       {
       case EOP_BASE32:
+      case EOP_BASE32HEX:
 	{
         uschar *t;
+        const uschar *alphabet = (c == EOP_BASE32) ? base32_chars : base32hex_chars;
         unsigned long int n = Ustrtoul(sub, &t, 10);
 	uschar * s = NULL;
 	int sz = 0, i = 0;
 
         if (*t != 0)
           {
-          expand_string_message = string_sprintf("argument for base32 "
-            "operator is \"%s\", which is not a decimal number", sub);
+          expand_string_message = string_sprintf("argument for base32%s "
+            "operator is \"%s\", which is not a decimal number",
+            (c == EOP_BASE32 ? "" : "hex"),
+            sub);
           goto EXPAND_FAILED;
           }
 	for ( ; n; n >>= 5)
-	  s = string_catn(s, &sz, &i, &base32_chars[n & 0x1f], 1);
+	  s = string_catn(s, &sz, &i, &alphabet[n & 0x1f], 1);
 
 	while (i > 0) yield = string_catn(yield, &size, &ptr, &s[--i], 1);
 	continue;
 	}
 
       case EOP_BASE32D:
+      case EOP_BASE32HEXD:
         {
         uschar *tt = sub;
         unsigned long int n = 0;
-	uschar * s;
+        uschar * s;
+        const uschar *alphabet = (c == EOP_BASE32D) ? base32_chars : base32hex_chars;
         while (*tt)
           {
-          uschar * t = Ustrchr(base32_chars, *tt++);
+          uschar * t = Ustrchr(alphabet, *tt++);
           if (t == NULL)
             {
-            expand_string_message = string_sprintf("argument for base32d "
-              "operator is \"%s\", which is not a base 32 number", sub);
+            expand_string_message = string_sprintf("argument for base32%sd "
+              "operator is \"%s\", which is not a base 32 number",
+              (c == EOP_BASE32D ? "" : "hex"),
+              sub);
             goto EXPAND_FAILED;
             }
-          n = n * 32 + (t - base32_chars);
+          n = n * 32 + (t - alphabet);
           }
         s = string_sprintf("%ld", n);
         yield = string_cat(yield, &size, &ptr, s);
@@ -7185,23 +7203,46 @@ while (*s != 0)
 
       case EOP_STR2B64:
       case EOP_BASE64:
-	{
+      case EOP_BASE64URL:
+        {
+        uschar * t;
 #ifdef SUPPORT_TLS
-	uschar * s = vp && *(void **)vp->value
-	  ? tls_cert_der_b64(*(void **)vp->value)
-	  : b64encode(sub, Ustrlen(sub));
+        uschar * s = vp && *(void **)vp->value
+          ? tls_cert_der_b64(*(void **)vp->value)
+          : b64encode(sub, Ustrlen(sub));
 #else
-	uschar * s = b64encode(sub, Ustrlen(sub));
+        uschar * s = b64encode(sub, Ustrlen(sub));
 #endif
-	yield = string_cat(yield, &size, &ptr, s);
-	continue;
-	}
+        if (c == EOP_BASE64URL) {
+          for (t = s; *t != '\0'; ++t) {
+            if (*t == '+') {
+              *t = '-';
+            } else if (*t == '/') {
+              *t = '_';
+            }
+          }
+        }
+        yield = string_cat(yield, &size, &ptr, s);
+        continue;
+        }
 
       case EOP_BASE64D:
-        {
-        uschar * s;
-        int len = b64decode(sub, &s);
-	if (len < 0)
+      case EOP_BASE64URLD:
+        {
+        uschar * s, * t;
+        t = sub;
+        if (c == EOP_BASE64URLD) {
+          t = string_copy(sub);
+          for (s = t; *s != '\0'; ++s) {
+            if (*s == '-') {
+              *s = '+';
+            } else if (*s == '_') {
+              *s = '/';
+            }
+          }
+        }
+        int len = b64decode(t, &s);
+        if (len < 0)
           {
           expand_string_message = string_sprintf("string \"%s\" is not "
             "well-formed for \"%s\" operator", sub, name);
diff --git a/test/scripts/0000-Basic/0002 b/test/scripts/0000-Basic/0002
index c2dcc40d9..7b4149fa3 100644
--- a/test/scripts/0000-Basic/0002
+++ b/test/scripts/0000-Basic/0002
@@ -195,6 +195,20 @@ base32d: 32 ${base32d:${base32:32}}
 base32d: 42 ${base32d:${base32:42}}
 base32d error: ABC ${base32d:ABC}
 
+base32hex: 0  <${base32hex:0}>
+base32hex: 1  <${base32hex:1}>
+base32hex: 31 <${base32hex:31}>
+base32hex: 32 <${base32hex:32}>
+base32hex: 42 <${base32hex:42}>
+base32hex error: 0x1 ${base32hex:0x1}
+
+base32hexd: 0  ${base32hexd:${base32hex:0}}
+base32hexd: 1  ${base32hexd:${base32hex:1}}
+base32hexd: 31 ${base32hexd:${base32hex:31}}
+base32hexd: 32 ${base32hexd:${base32hex:32}}
+base32hexd: 42 ${base32hexd:${base32hex:42}}
+base32hexd error: WXY ${base32hexd:WXY}
+
 The base62 operator is actually a base36 operator in the Darwin and Cygwin
 environments. Write cunning tests that produce the same output in both cases,
 while doing a reasonable check.
@@ -279,6 +293,14 @@ base64: ${base64:abcd}
 base64: ${base64:The quick brown \n fox}
 base64d:${base64d:YWJjZA==}
 base64d:${base64d:VGhlIHF1aWNrIGJyb3duIAogZm94}
+base64:     ${base64:==>}
+base64:     ${base64:==?}
+base64url:  ${base64url:==>}
+base64url:  ${base64url:==?}
+base64d:    ${base64d:PT0+}
+base64d:    ${base64d:PT0/}
+base64urld: ${base64urld:PT0-}
+base64urld: ${base64urld:PT0_}
 strlen: ${strlen:}
 strlen: ${strlen:a}
 strlen: ${strlen:abcdefgh}
diff --git a/test/stdout/0002 b/test/stdout/0002
index b6ca54d48..6238757d9 100644
--- a/test/stdout/0002
+++ b/test/stdout/0002
@@ -186,6 +186,20 @@ newline	tab\134backslash ~tilde\177DEL\200\201.
 > base32d: 42 42
 > Failed: argument for base32d operator is "ABC", which is not a base 32 number
 > 
+> base32hex: 0  <>
+> base32hex: 1  <1>
+> base32hex: 31 <V>
+> base32hex: 32 <10>
+> base32hex: 42 <1A>
+> Failed: argument for base32hex operator is "0x1", which is not a decimal number
+> 
+> base32hexd: 0  0
+> base32hexd: 1  1
+> base32hexd: 31 31
+> base32hexd: 32 32
+> base32hexd: 42 42
+> Failed: argument for base32d operator is "WXY", which is not a base 32 number
+> 
 > The base62 operator is actually a base36 operator in the Darwin and Cygwin
 > environments. Write cunning tests that produce the same output in both cases,
 > while doing a reasonable check.
@@ -254,6 +268,14 @@ newline	tab\134backslash ~tilde\177DEL\200\201.
 > base64d:abcd
 > base64d:The quick brown 
  fox
+> base64:     PT0+
+> base64:     PT0/
+> base64url:  PT0-
+> base64url:  PT0_
+> base64d:    ==>
+> base64d:    ==?
+> base64urld: ==>
+> base64urld: ==?
 > strlen: 0
 > strlen: 1
 > strlen: 8
author	Phil Pennock <pdp@exim.org>	2016-10-18 17:38:05 -0400
committer	Phil Pennock <pdp@exim.org>	2016-10-19 01:31:22 -0400
commit	2d3164b4f2943011d062e5e9d388b1a4bc0bf78d (patch)
tree	14ebff9264f88b620563478464707d750ab0c299
parent	8b0fb68e04323248df1208516e8a9293af9859d8 (diff)
download	exim4-2d3164b4f2943011d062e5e9d388b1a4bc0bf78d.tar.gz