diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2008-03-08 17:24:02 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2008-03-08 17:24:02 +0000 |
commit | bda4463b94aca19bdd31e527c8b348f8fa3c7657 (patch) | |
tree | 9c45355b43c32e36d31729a665c79836ec10ccb8 | |
parent | 2d6ec30227035d99946e622ed3ea3b4c26d415b3 (diff) | |
download | pcre-bda4463b94aca19bdd31e527c8b348f8fa3c7657.tar.gz |
Craig's patch to the QuoteMeta function in pcrecpp.cc so that it escapes the
NUL character as backslash + 0 rather than backslash + NUL, because PCRE
doesn't support NULs in patterns.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@326 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | pcrecpp.cc | 26 | ||||
-rw-r--r-- | pcrecpp.h | 3 | ||||
-rw-r--r-- | pcrecpp_unittest.cc | 1 |
4 files changed, 24 insertions, 10 deletions
@@ -29,6 +29,10 @@ Version 7.7 05-Mar-08 5. Added the --include_dir and --exclude_dir patterns to pcregrep, and used --exclude_dir in the tests to avoid scanning .svn directories. + +6. Applied Craig's patch to the QuoteMeta function so that it escapes the + NUL character as backslash + 0 rather than backslash + NUL, because PCRE + doesn't support NULs in patterns. Version 7.6 28-Jan-08 @@ -449,21 +449,27 @@ bool RE::Extract(const StringPiece& rewrite, // Note that it's legal to escape a character even if it has no // special meaning in a regular expression -- so this function does // that. (This also makes it identical to the perl function of the - // same name; see `perldoc -f quotemeta`.) + // same name; see `perldoc -f quotemeta`.) The one exception is + // escaping NUL: rather than doing backslash + NUL, like perl does, + // we do '\0', because pcre itself doesn't take embedded NUL chars. for (int ii = 0; ii < unquoted.size(); ++ii) { // Note that using 'isalnum' here raises the benchmark time from // 32ns to 58ns: - if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && - (unquoted[ii] < 'A' || unquoted[ii] > 'Z') && - (unquoted[ii] < '0' || unquoted[ii] > '9') && - unquoted[ii] != '_' && - // If this is the part of a UTF8 or Latin1 character, we need - // to copy this byte without escaping. Experimentally this is - // what works correctly with the regexp library. - !(unquoted[ii] & 128)) { + if (unquoted[ii] == '\0') { + result += "\\0"; + } else if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && + (unquoted[ii] < 'A' || unquoted[ii] > 'Z') && + (unquoted[ii] < '0' || unquoted[ii] > '9') && + unquoted[ii] != '_' && + // If this is the part of a UTF8 or Latin1 character, we need + // to copy this byte without escaping. Experimentally this is + // what works correctly with the regexp library. + !(unquoted[ii] & 128)) { result += '\\'; + result += unquoted[ii]; + } else { + result += unquoted[ii]; } - result += unquoted[ii]; } return result; @@ -620,6 +620,9 @@ class PCRECPP_EXP_DEFN RE { // 1.5-2.0? // may become: // 1\.5\-2\.0\? + // Note QuoteMeta behaves the same as perl's QuoteMeta function, + // *except* that it escapes the NUL character (\0) as backslash + 0, + // rather than backslash + NUL. static string QuoteMeta(const StringPiece& unquoted); diff --git a/pcrecpp_unittest.cc b/pcrecpp_unittest.cc index 2e5f2df..44e0cc9 100644 --- a/pcrecpp_unittest.cc +++ b/pcrecpp_unittest.cc @@ -497,6 +497,7 @@ static void TestQuotaMetaSimple() { TestQuoteMeta("((a|b)c?d*e+[f-h]i)"); TestQuoteMeta("((?!)xxx).*yyy"); TestQuoteMeta("(["); + TestQuoteMeta(string("foo\0bar", 7)); } static void TestQuoteMetaSimpleNegative() { |