summaryrefslogtreecommitdiff
path: root/pcrecpp.cc
diff options
context:
space:
mode:
authornigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-02-24 21:41:42 +0000
committernigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-02-24 21:41:42 +0000
commit876a1a775acdc16384b603754a67010ca8e80cda (patch)
treee9b25e0bf3c35e0455cdffef8f42cb72ca3c31f3 /pcrecpp.cc
parent78d9c9e331dc39ca5131981dd347b7b3aeca459f (diff)
downloadpcre-876a1a775acdc16384b603754a67010ca8e80cda.tar.gz
Load pcre-7.0 into code/trunk.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@93 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcrecpp.cc')
-rw-r--r--pcrecpp.cc56
1 files changed, 45 insertions, 11 deletions
diff --git a/pcrecpp.cc b/pcrecpp.cc
index f828de7..b75738b 100644
--- a/pcrecpp.cc
+++ b/pcrecpp.cc
@@ -61,7 +61,7 @@ static const string empty_string;
// If the user doesn't ask for any options, we just use this one
static RE_Options default_options;
-void RE::Init(const char* pat, const RE_Options* options) {
+void RE::Init(const string& pat, const RE_Options* options) {
pattern_ = pat;
if (options == NULL) {
options_ = default_options;
@@ -78,7 +78,7 @@ void RE::Init(const char* pat, const RE_Options* options) {
// conservative in that it may treat some "simple" patterns
// as "complex" (e.g., if the vertical bar is in a character
// class or is escaped). But it seems good enough.
- if (strchr(pat, '|') == NULL) {
+ if (strchr(pat.c_str(), '|') == NULL) {
// Simple pattern: we can use position-based checks to perform
// fully anchored matches
re_full_ = re_partial_;
@@ -89,12 +89,18 @@ void RE::Init(const char* pat, const RE_Options* options) {
}
}
-RE::~RE() {
+void RE::Cleanup() {
if (re_full_ != NULL && re_full_ != re_partial_) (*pcre_free)(re_full_);
if (re_partial_ != NULL) (*pcre_free)(re_partial_);
if (error_ != &empty_string) delete error_;
}
+
+RE::~RE() {
+ Cleanup();
+}
+
+
pcre* RE::Compile(Anchor anchor) {
// First, convert RE_Options into pcre options
int pcre_options = 0;
@@ -424,6 +430,34 @@ bool RE::Extract(const StringPiece& rewrite,
return Rewrite(out, rewrite, text, vec, matches);
}
+/*static*/ string RE::QuoteMeta(const StringPiece& unquoted) {
+ string result;
+
+ // Escape any ascii character not in [A-Za-z_0-9].
+ //
+ // Note that it's legal to escape a character even if it has no
+ // special meaning in a regular expression -- so this function does
+ // that. (This also makes it identical to the perl function of the
+ // same name; see `perldoc -f quotemeta`.)
+ for (int ii = 0; ii < unquoted.size(); ++ii) {
+ // Note that using 'isalnum' here raises the benchmark time from
+ // 32ns to 58ns:
+ if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
+ (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
+ (unquoted[ii] < '0' || unquoted[ii] > '9') &&
+ unquoted[ii] != '_' &&
+ // If this is the part of a UTF8 or Latin1 character, we need
+ // to copy this byte without escaping. Experimentally this is
+ // what works correctly with the regexp library.
+ !(unquoted[ii] & 128)) {
+ result += '\\';
+ }
+ result += unquoted[ii];
+ }
+
+ return result;
+}
+
/***** Actual matching and rewriting code *****/
int RE::TryMatch(const StringPiece& text,
@@ -809,14 +843,14 @@ bool Arg::parse_float(const char* str, int n, void* dest) {
return parse_##name##_radix(str, n, dest, 0); \
}
-DEFINE_INTEGER_PARSERS(short);
-DEFINE_INTEGER_PARSERS(ushort);
-DEFINE_INTEGER_PARSERS(int);
-DEFINE_INTEGER_PARSERS(uint);
-DEFINE_INTEGER_PARSERS(long);
-DEFINE_INTEGER_PARSERS(ulong);
-DEFINE_INTEGER_PARSERS(longlong);
-DEFINE_INTEGER_PARSERS(ulonglong);
+DEFINE_INTEGER_PARSERS(short) /* */
+DEFINE_INTEGER_PARSERS(ushort) /* */
+DEFINE_INTEGER_PARSERS(int) /* Don't use semicolons after these */
+DEFINE_INTEGER_PARSERS(uint) /* statements because they can cause */
+DEFINE_INTEGER_PARSERS(long) /* compiler warnings if the checking */
+DEFINE_INTEGER_PARSERS(ulong) /* level is turned up high enough. */
+DEFINE_INTEGER_PARSERS(longlong) /* */
+DEFINE_INTEGER_PARSERS(ulonglong) /* */
#undef DEFINE_INTEGER_PARSERS