diff options
author | nigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-02-24 21:41:42 +0000 |
---|---|---|
committer | nigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-02-24 21:41:42 +0000 |
commit | 876a1a775acdc16384b603754a67010ca8e80cda (patch) | |
tree | e9b25e0bf3c35e0455cdffef8f42cb72ca3c31f3 /pcrecpp.cc | |
parent | 78d9c9e331dc39ca5131981dd347b7b3aeca459f (diff) | |
download | pcre-876a1a775acdc16384b603754a67010ca8e80cda.tar.gz |
Load pcre-7.0 into code/trunk.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@93 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcrecpp.cc')
-rw-r--r-- | pcrecpp.cc | 56 |
1 files changed, 45 insertions, 11 deletions
@@ -61,7 +61,7 @@ static const string empty_string; // If the user doesn't ask for any options, we just use this one static RE_Options default_options; -void RE::Init(const char* pat, const RE_Options* options) { +void RE::Init(const string& pat, const RE_Options* options) { pattern_ = pat; if (options == NULL) { options_ = default_options; @@ -78,7 +78,7 @@ void RE::Init(const char* pat, const RE_Options* options) { // conservative in that it may treat some "simple" patterns // as "complex" (e.g., if the vertical bar is in a character // class or is escaped). But it seems good enough. - if (strchr(pat, '|') == NULL) { + if (strchr(pat.c_str(), '|') == NULL) { // Simple pattern: we can use position-based checks to perform // fully anchored matches re_full_ = re_partial_; @@ -89,12 +89,18 @@ void RE::Init(const char* pat, const RE_Options* options) { } } -RE::~RE() { +void RE::Cleanup() { if (re_full_ != NULL && re_full_ != re_partial_) (*pcre_free)(re_full_); if (re_partial_ != NULL) (*pcre_free)(re_partial_); if (error_ != &empty_string) delete error_; } + +RE::~RE() { + Cleanup(); +} + + pcre* RE::Compile(Anchor anchor) { // First, convert RE_Options into pcre options int pcre_options = 0; @@ -424,6 +430,34 @@ bool RE::Extract(const StringPiece& rewrite, return Rewrite(out, rewrite, text, vec, matches); } +/*static*/ string RE::QuoteMeta(const StringPiece& unquoted) { + string result; + + // Escape any ascii character not in [A-Za-z_0-9]. + // + // Note that it's legal to escape a character even if it has no + // special meaning in a regular expression -- so this function does + // that. (This also makes it identical to the perl function of the + // same name; see `perldoc -f quotemeta`.) + for (int ii = 0; ii < unquoted.size(); ++ii) { + // Note that using 'isalnum' here raises the benchmark time from + // 32ns to 58ns: + if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && + (unquoted[ii] < 'A' || unquoted[ii] > 'Z') && + (unquoted[ii] < '0' || unquoted[ii] > '9') && + unquoted[ii] != '_' && + // If this is the part of a UTF8 or Latin1 character, we need + // to copy this byte without escaping. Experimentally this is + // what works correctly with the regexp library. + !(unquoted[ii] & 128)) { + result += '\\'; + } + result += unquoted[ii]; + } + + return result; +} + /***** Actual matching and rewriting code *****/ int RE::TryMatch(const StringPiece& text, @@ -809,14 +843,14 @@ bool Arg::parse_float(const char* str, int n, void* dest) { return parse_##name##_radix(str, n, dest, 0); \ } -DEFINE_INTEGER_PARSERS(short); -DEFINE_INTEGER_PARSERS(ushort); -DEFINE_INTEGER_PARSERS(int); -DEFINE_INTEGER_PARSERS(uint); -DEFINE_INTEGER_PARSERS(long); -DEFINE_INTEGER_PARSERS(ulong); -DEFINE_INTEGER_PARSERS(longlong); -DEFINE_INTEGER_PARSERS(ulonglong); +DEFINE_INTEGER_PARSERS(short) /* */ +DEFINE_INTEGER_PARSERS(ushort) /* */ +DEFINE_INTEGER_PARSERS(int) /* Don't use semicolons after these */ +DEFINE_INTEGER_PARSERS(uint) /* statements because they can cause */ +DEFINE_INTEGER_PARSERS(long) /* compiler warnings if the checking */ +DEFINE_INTEGER_PARSERS(ulong) /* level is turned up high enough. */ +DEFINE_INTEGER_PARSERS(longlong) /* */ +DEFINE_INTEGER_PARSERS(ulonglong) /* */ #undef DEFINE_INTEGER_PARSERS |