summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2008-03-08 17:24:02 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2008-03-08 17:24:02 +0000
commitbda4463b94aca19bdd31e527c8b348f8fa3c7657 (patch)
tree9c45355b43c32e36d31729a665c79836ec10ccb8
parent2d6ec30227035d99946e622ed3ea3b4c26d415b3 (diff)
downloadpcre-bda4463b94aca19bdd31e527c8b348f8fa3c7657.tar.gz
Craig's patch to the QuoteMeta function in pcrecpp.cc so that it escapes the
NUL character as backslash + 0 rather than backslash + NUL, because PCRE doesn't support NULs in patterns. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@326 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog4
-rw-r--r--pcrecpp.cc26
-rw-r--r--pcrecpp.h3
-rw-r--r--pcrecpp_unittest.cc1
4 files changed, 24 insertions, 10 deletions
diff --git a/ChangeLog b/ChangeLog
index 5cc4838..ddbe87b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -29,6 +29,10 @@ Version 7.7 05-Mar-08
5. Added the --include_dir and --exclude_dir patterns to pcregrep, and used
--exclude_dir in the tests to avoid scanning .svn directories.
+
+6. Applied Craig's patch to the QuoteMeta function so that it escapes the
+ NUL character as backslash + 0 rather than backslash + NUL, because PCRE
+ doesn't support NULs in patterns.
Version 7.6 28-Jan-08
diff --git a/pcrecpp.cc b/pcrecpp.cc
index f34f0fc..52187a7 100644
--- a/pcrecpp.cc
+++ b/pcrecpp.cc
@@ -449,21 +449,27 @@ bool RE::Extract(const StringPiece& rewrite,
// Note that it's legal to escape a character even if it has no
// special meaning in a regular expression -- so this function does
// that. (This also makes it identical to the perl function of the
- // same name; see `perldoc -f quotemeta`.)
+ // same name; see `perldoc -f quotemeta`.) The one exception is
+ // escaping NUL: rather than doing backslash + NUL, like perl does,
+ // we do '\0', because pcre itself doesn't take embedded NUL chars.
for (int ii = 0; ii < unquoted.size(); ++ii) {
// Note that using 'isalnum' here raises the benchmark time from
// 32ns to 58ns:
- if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
- (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
- (unquoted[ii] < '0' || unquoted[ii] > '9') &&
- unquoted[ii] != '_' &&
- // If this is the part of a UTF8 or Latin1 character, we need
- // to copy this byte without escaping. Experimentally this is
- // what works correctly with the regexp library.
- !(unquoted[ii] & 128)) {
+ if (unquoted[ii] == '\0') {
+ result += "\\0";
+ } else if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
+ (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
+ (unquoted[ii] < '0' || unquoted[ii] > '9') &&
+ unquoted[ii] != '_' &&
+ // If this is the part of a UTF8 or Latin1 character, we need
+ // to copy this byte without escaping. Experimentally this is
+ // what works correctly with the regexp library.
+ !(unquoted[ii] & 128)) {
result += '\\';
+ result += unquoted[ii];
+ } else {
+ result += unquoted[ii];
}
- result += unquoted[ii];
}
return result;
diff --git a/pcrecpp.h b/pcrecpp.h
index a4638e1..a87a816 100644
--- a/pcrecpp.h
+++ b/pcrecpp.h
@@ -620,6 +620,9 @@ class PCRECPP_EXP_DEFN RE {
// 1.5-2.0?
// may become:
// 1\.5\-2\.0\?
+ // Note QuoteMeta behaves the same as perl's QuoteMeta function,
+ // *except* that it escapes the NUL character (\0) as backslash + 0,
+ // rather than backslash + NUL.
static string QuoteMeta(const StringPiece& unquoted);
diff --git a/pcrecpp_unittest.cc b/pcrecpp_unittest.cc
index 2e5f2df..44e0cc9 100644
--- a/pcrecpp_unittest.cc
+++ b/pcrecpp_unittest.cc
@@ -497,6 +497,7 @@ static void TestQuotaMetaSimple() {
TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
TestQuoteMeta("((?!)xxx).*yyy");
TestQuoteMeta("([");
+ TestQuoteMeta(string("foo\0bar", 7));
}
static void TestQuoteMetaSimpleNegative() {