summaryrefslogtreecommitdiff
path: root/pcrecpp.cc
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2008-03-08 17:24:02 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2008-03-08 17:24:02 +0000
commitbda4463b94aca19bdd31e527c8b348f8fa3c7657 (patch)
tree9c45355b43c32e36d31729a665c79836ec10ccb8 /pcrecpp.cc
parent2d6ec30227035d99946e622ed3ea3b4c26d415b3 (diff)
downloadpcre-bda4463b94aca19bdd31e527c8b348f8fa3c7657.tar.gz
Craig's patch to the QuoteMeta function in pcrecpp.cc so that it escapes the
NUL character as backslash + 0 rather than backslash + NUL, because PCRE doesn't support NULs in patterns. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@326 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcrecpp.cc')
-rw-r--r--pcrecpp.cc26
1 files changed, 16 insertions, 10 deletions
diff --git a/pcrecpp.cc b/pcrecpp.cc
index f34f0fc..52187a7 100644
--- a/pcrecpp.cc
+++ b/pcrecpp.cc
@@ -449,21 +449,27 @@ bool RE::Extract(const StringPiece& rewrite,
// Note that it's legal to escape a character even if it has no
// special meaning in a regular expression -- so this function does
// that. (This also makes it identical to the perl function of the
- // same name; see `perldoc -f quotemeta`.)
+ // same name; see `perldoc -f quotemeta`.) The one exception is
+ // escaping NUL: rather than doing backslash + NUL, like perl does,
+ // we do '\0', because pcre itself doesn't take embedded NUL chars.
for (int ii = 0; ii < unquoted.size(); ++ii) {
// Note that using 'isalnum' here raises the benchmark time from
// 32ns to 58ns:
- if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
- (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
- (unquoted[ii] < '0' || unquoted[ii] > '9') &&
- unquoted[ii] != '_' &&
- // If this is the part of a UTF8 or Latin1 character, we need
- // to copy this byte without escaping. Experimentally this is
- // what works correctly with the regexp library.
- !(unquoted[ii] & 128)) {
+ if (unquoted[ii] == '\0') {
+ result += "\\0";
+ } else if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
+ (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
+ (unquoted[ii] < '0' || unquoted[ii] > '9') &&
+ unquoted[ii] != '_' &&
+ // If this is the part of a UTF8 or Latin1 character, we need
+ // to copy this byte without escaping. Experimentally this is
+ // what works correctly with the regexp library.
+ !(unquoted[ii] & 128)) {
result += '\\';
+ result += unquoted[ii];
+ } else {
+ result += unquoted[ii];
}
- result += unquoted[ii];
}
return result;