Craig's patch to the QuoteMeta function in pcrecpp.cc so that it escapes the

NUL character as backslash + 0 rather than backslash + NUL, because PCRE doesn't support NULs in patterns. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@326 2f5784b3-3f2a-0410-8824-cb99058d5e15
author: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2008-03-08 17:24:02 +0000
committer: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2008-03-08 17:24:02 +0000
commit: bda4463b94aca19bdd31e527c8b348f8fa3c7657 (patch)
tree: 9c45355b43c32e36d31729a665c79836ec10ccb8 /pcrecpp.cc
parent: 2d6ec30227035d99946e622ed3ea3b4c26d415b3 (diff)
download: pcre-bda4463b94aca19bdd31e527c8b348f8fa3c7657.tar.gz
1 files changed, 16 insertions, 10 deletions
diff --git a/pcrecpp.cc b/pcrecpp.cc
index f34f0fc..52187a7 100644
--- a/pcrecpp.cc
+++ b/pcrecpp.cc
@@ -449,21 +449,27 @@ bool RE::Extract(const StringPiece& rewrite,
   // Note that it's legal to escape a character even if it has no
   // special meaning in a regular expression -- so this function does
   // that.  (This also makes it identical to the perl function of the
-  // same name; see `perldoc -f quotemeta`.)
+  // same name; see `perldoc -f quotemeta`.)  The one exception is
+  // escaping NUL: rather than doing backslash + NUL, like perl does,
+  // we do '\0', because pcre itself doesn't take embedded NUL chars.
   for (int ii = 0; ii < unquoted.size(); ++ii) {
     // Note that using 'isalnum' here raises the benchmark time from
     // 32ns to 58ns:
-    if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
-        (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
-        (unquoted[ii] < '0' || unquoted[ii] > '9') &&
-        unquoted[ii] != '_' &&
-        // If this is the part of a UTF8 or Latin1 character, we need
-        // to copy this byte without escaping.  Experimentally this is
-        // what works correctly with the regexp library.
-        !(unquoted[ii] & 128)) {
+    if (unquoted[ii] == '\0') {
+      result += "\\0";
+    } else if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
+               (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
+               (unquoted[ii] < '0' || unquoted[ii] > '9') &&
+               unquoted[ii] != '_' &&
+               // If this is the part of a UTF8 or Latin1 character, we need
+               // to copy this byte without escaping.  Experimentally this is
+               // what works correctly with the regexp library.
+               !(unquoted[ii] & 128)) {
       result += '\\';
+      result += unquoted[ii];
+    } else {
+      result += unquoted[ii];
     }
-    result += unquoted[ii];
   }
 
   return result;
author	ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2008-03-08 17:24:02 +0000
committer	ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2008-03-08 17:24:02 +0000
commit	bda4463b94aca19bdd31e527c8b348f8fa3c7657 (patch)
tree	9c45355b43c32e36d31729a665c79836ec10ccb8 /pcrecpp.cc
parent	2d6ec30227035d99946e622ed3ea3b4c26d415b3 (diff)
download	pcre-bda4463b94aca19bdd31e527c8b348f8fa3c7657.tar.gz