1 files changed, 45 insertions, 11 deletions
diff --git a/pcrecpp.cc b/pcrecpp.cc
index f828de7..b75738b 100644
--- a/pcrecpp.cc
+++ b/pcrecpp.cc
@@ -61,7 +61,7 @@ static const string empty_string;
 // If the user doesn't ask for any options, we just use this one
 static RE_Options default_options;
 
-void RE::Init(const char* pat, const RE_Options* options) {
+void RE::Init(const string& pat, const RE_Options* options) {
   pattern_ = pat;
   if (options == NULL) {
     options_ = default_options;
@@ -78,7 +78,7 @@ void RE::Init(const char* pat, const RE_Options* options) {
     // conservative in that it may treat some "simple" patterns
     // as "complex" (e.g., if the vertical bar is in a character
     // class or is escaped).  But it seems good enough.
-    if (strchr(pat, '|') == NULL) {
+    if (strchr(pat.c_str(), '|') == NULL) {
       // Simple pattern: we can use position-based checks to perform
       // fully anchored matches
       re_full_ = re_partial_;
@@ -89,12 +89,18 @@ void RE::Init(const char* pat, const RE_Options* options) {
   }
 }
 
-RE::~RE() {
+void RE::Cleanup() {
   if (re_full_ != NULL && re_full_ != re_partial_) (*pcre_free)(re_full_);
   if (re_partial_ != NULL)                         (*pcre_free)(re_partial_);
   if (error_ != &empty_string)                     delete error_;
 }
 
+
+RE::~RE() {
+  Cleanup();
+}
+
+
 pcre* RE::Compile(Anchor anchor) {
   // First, convert RE_Options into pcre options
   int pcre_options = 0;
@@ -424,6 +430,34 @@ bool RE::Extract(const StringPiece& rewrite,
   return Rewrite(out, rewrite, text, vec, matches);
 }
 
+/*static*/ string RE::QuoteMeta(const StringPiece& unquoted) {
+  string result;
+
+  // Escape any ascii character not in [A-Za-z_0-9].
+  //
+  // Note that it's legal to escape a character even if it has no
+  // special meaning in a regular expression -- so this function does
+  // that.  (This also makes it identical to the perl function of the
+  // same name; see `perldoc -f quotemeta`.)
+  for (int ii = 0; ii < unquoted.size(); ++ii) {
+    // Note that using 'isalnum' here raises the benchmark time from
+    // 32ns to 58ns:
+    if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
+        (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
+        (unquoted[ii] < '0' || unquoted[ii] > '9') &&
+        unquoted[ii] != '_' &&
+        // If this is the part of a UTF8 or Latin1 character, we need
+        // to copy this byte without escaping.  Experimentally this is
+        // what works correctly with the regexp library.
+        !(unquoted[ii] & 128)) {
+      result += '\\';
+    }
+    result += unquoted[ii];
+  }
+
+  return result;
+}
+
 /***** Actual matching and rewriting code *****/
 
 int RE::TryMatch(const StringPiece& text,
@@ -809,14 +843,14 @@ bool Arg::parse_float(const char* str, int n, void* dest) {
     return parse_##name##_radix(str, n, dest, 0);                       \
   }
 
-DEFINE_INTEGER_PARSERS(short);
-DEFINE_INTEGER_PARSERS(ushort);
-DEFINE_INTEGER_PARSERS(int);
-DEFINE_INTEGER_PARSERS(uint);
-DEFINE_INTEGER_PARSERS(long);
-DEFINE_INTEGER_PARSERS(ulong);
-DEFINE_INTEGER_PARSERS(longlong);
-DEFINE_INTEGER_PARSERS(ulonglong);
+DEFINE_INTEGER_PARSERS(short)      /*                                   */
+DEFINE_INTEGER_PARSERS(ushort)     /*                                   */
+DEFINE_INTEGER_PARSERS(int)        /* Don't use semicolons after these  */
+DEFINE_INTEGER_PARSERS(uint)       /* statements because they can cause */
+DEFINE_INTEGER_PARSERS(long)       /* compiler warnings if the checking */
+DEFINE_INTEGER_PARSERS(ulong)      /* level is turned up high enough.   */
+DEFINE_INTEGER_PARSERS(longlong)   /*                                   */
+DEFINE_INTEGER_PARSERS(ulonglong)  /*                                   */
 
 #undef DEFINE_INTEGER_PARSERS