diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2008-03-08 17:24:02 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2008-03-08 17:24:02 +0000 |
commit | bda4463b94aca19bdd31e527c8b348f8fa3c7657 (patch) | |
tree | 9c45355b43c32e36d31729a665c79836ec10ccb8 /pcrecpp.cc | |
parent | 2d6ec30227035d99946e622ed3ea3b4c26d415b3 (diff) | |
download | pcre-bda4463b94aca19bdd31e527c8b348f8fa3c7657.tar.gz |
Craig's patch to the QuoteMeta function in pcrecpp.cc so that it escapes the
NUL character as backslash + 0 rather than backslash + NUL, because PCRE
doesn't support NULs in patterns.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@326 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcrecpp.cc')
-rw-r--r-- | pcrecpp.cc | 26 |
1 files changed, 16 insertions, 10 deletions
@@ -449,21 +449,27 @@ bool RE::Extract(const StringPiece& rewrite, // Note that it's legal to escape a character even if it has no // special meaning in a regular expression -- so this function does // that. (This also makes it identical to the perl function of the - // same name; see `perldoc -f quotemeta`.) + // same name; see `perldoc -f quotemeta`.) The one exception is + // escaping NUL: rather than doing backslash + NUL, like perl does, + // we do '\0', because pcre itself doesn't take embedded NUL chars. for (int ii = 0; ii < unquoted.size(); ++ii) { // Note that using 'isalnum' here raises the benchmark time from // 32ns to 58ns: - if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && - (unquoted[ii] < 'A' || unquoted[ii] > 'Z') && - (unquoted[ii] < '0' || unquoted[ii] > '9') && - unquoted[ii] != '_' && - // If this is the part of a UTF8 or Latin1 character, we need - // to copy this byte without escaping. Experimentally this is - // what works correctly with the regexp library. - !(unquoted[ii] & 128)) { + if (unquoted[ii] == '\0') { + result += "\\0"; + } else if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && + (unquoted[ii] < 'A' || unquoted[ii] > 'Z') && + (unquoted[ii] < '0' || unquoted[ii] > '9') && + unquoted[ii] != '_' && + // If this is the part of a UTF8 or Latin1 character, we need + // to copy this byte without escaping. Experimentally this is + // what works correctly with the regexp library. + !(unquoted[ii] & 128)) { result += '\\'; + result += unquoted[ii]; + } else { + result += unquoted[ii]; } - result += unquoted[ii]; } return result; |