summaryrefslogtreecommitdiff
path: root/pcrecpp_unittest.cc
diff options
context:
space:
mode:
authornigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-02-24 21:40:45 +0000
committernigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-02-24 21:40:45 +0000
commit97cb05691b9cabed35f1a853c74d48c692aaabcf (patch)
treecb7c68a44f0b79c6d90d9a18a7ec640c8435a5e7 /pcrecpp_unittest.cc
parent455fcc7e13a175722acfd2cca6ab99caa9606a22 (diff)
downloadpcre-97cb05691b9cabed35f1a853c74d48c692aaabcf.tar.gz
Load pcre-6.0 into code/trunk.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@77 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcrecpp_unittest.cc')
-rw-r--r--pcrecpp_unittest.cc814
1 files changed, 814 insertions, 0 deletions
diff --git a/pcrecpp_unittest.cc b/pcrecpp_unittest.cc
new file mode 100644
index 0000000..000c12e
--- /dev/null
+++ b/pcrecpp_unittest.cc
@@ -0,0 +1,814 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: Sanjay Ghemawat
+//
+// TODO: Test extractions for PartialMatch/Consume
+
+#include <stdio.h>
+#include <vector>
+#include "config.h"
+#include "pcrecpp.h"
+
+using pcrecpp::StringPiece;
+using pcrecpp::RE;
+using pcrecpp::RE_Options;
+using pcrecpp::Hex;
+using pcrecpp::Octal;
+using pcrecpp::CRadix;
+
+// CHECK dies with a fatal error if condition is not true. It is *not*
+// controlled by NDEBUG, so the check will be executed regardless of
+// compilation mode. Therefore, it is safe to do things like:
+// CHECK_EQ(fp->Write(x), 4)
+#define CHECK(condition) do { \
+ if (!(condition)) { \
+ fprintf(stderr, "%s:%d: Check failed: %s\n", \
+ __FILE__, __LINE__, #condition); \
+ exit(1); \
+ } \
+} while (0)
+
+#define CHECK_EQ(a, b) CHECK(a == b)
+
+static void Timing1(int num_iters) {
+ // Same pattern lots of times
+ RE pattern("ruby:\\d+");
+ StringPiece p("ruby:1234");
+ for (int j = num_iters; j > 0; j--) {
+ CHECK(pattern.FullMatch(p));
+ }
+}
+
+static void Timing2(int num_iters) {
+ // Same pattern lots of times
+ RE pattern("ruby:(\\d+)");
+ int i;
+ for (int j = num_iters; j > 0; j--) {
+ CHECK(pattern.FullMatch("ruby:1234", &i));
+ CHECK_EQ(i, 1234);
+ }
+}
+
+static void Timing3(int num_iters) {
+ string text_string;
+ for (int j = num_iters; j > 0; j--) {
+ text_string += "this is another line\n";
+ }
+
+ RE line_matcher(".*\n");
+ string line;
+ StringPiece text(text_string);
+ int counter = 0;
+ while (line_matcher.Consume(&text)) {
+ counter++;
+ }
+ printf("Matched %d lines\n", counter);
+}
+
+#if 0 // uncomment this if you have a way of defining VirtualProcessSize()
+
+static void LeakTest() {
+ // Check for memory leaks
+ unsigned long long initial_size = 0;
+ for (int i = 0; i < 100000; i++) {
+ if (i == 50000) {
+ initial_size = VirtualProcessSize();
+ printf("Size after 50000: %llu\n", initial_size);
+ }
+ char buf[100];
+ snprintf(buf, sizeof(buf), "pat%09d", i);
+ RE newre(buf);
+ }
+ uint64 final_size = VirtualProcessSize();
+ printf("Size after 100000: %llu\n", final_size);
+ const double growth = double(final_size - initial_size) / final_size;
+ printf("Growth: %0.2f%%", growth * 100);
+ CHECK(growth < 0.02); // Allow < 2% growth
+}
+
+#endif
+
+static void RadixTests() {
+ printf("Testing hex\n");
+
+#define CHECK_HEX(type, value) \
+ do { \
+ type v; \
+ CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
+ CHECK_EQ(v, 0x ## value); \
+ CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
+ CHECK_EQ(v, 0x ## value); \
+ } while(0)
+
+ CHECK_HEX(short, 2bad);
+ CHECK_HEX(unsigned short, 2badU);
+ CHECK_HEX(int, dead);
+ CHECK_HEX(unsigned int, deadU);
+ CHECK_HEX(long, 7eadbeefL);
+ CHECK_HEX(unsigned long, deadbeefUL);
+#ifdef HAVE_LONG_LONG
+ CHECK_HEX(long long, 12345678deadbeefLL);
+#endif
+#ifdef HAVE_UNSIGNED_LONG_LONG
+ CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
+#endif
+
+#undef CHECK_HEX
+
+ printf("Testing octal\n");
+
+#define CHECK_OCTAL(type, value) \
+ do { \
+ type v; \
+ CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
+ CHECK_EQ(v, 0 ## value); \
+ CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
+ CHECK_EQ(v, 0 ## value); \
+ } while(0)
+
+ CHECK_OCTAL(short, 77777);
+ CHECK_OCTAL(unsigned short, 177777U);
+ CHECK_OCTAL(int, 17777777777);
+ CHECK_OCTAL(unsigned int, 37777777777U);
+ CHECK_OCTAL(long, 17777777777L);
+ CHECK_OCTAL(unsigned long, 37777777777UL);
+#ifdef HAVE_LONG_LONG
+ CHECK_OCTAL(long long, 777777777777777777777LL);
+#endif
+#ifdef HAVE_UNSIGNED_LONG_LONG
+ CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
+#endif
+
+#undef CHECK_OCTAL
+
+ printf("Testing decimal\n");
+
+#define CHECK_DECIMAL(type, value) \
+ do { \
+ type v; \
+ CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
+ CHECK_EQ(v, value); \
+ CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
+ CHECK_EQ(v, value); \
+ } while(0)
+
+ CHECK_DECIMAL(short, -1);
+ CHECK_DECIMAL(unsigned short, 9999);
+ CHECK_DECIMAL(int, -1000);
+ CHECK_DECIMAL(unsigned int, 12345U);
+ CHECK_DECIMAL(long, -10000000L);
+ CHECK_DECIMAL(unsigned long, 3083324652U);
+#ifdef HAVE_LONG_LONG
+ CHECK_DECIMAL(long long, -100000000000000LL);
+#endif
+#ifdef HAVE_UNSIGNED_LONG_LONG
+ CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
+#endif
+
+#undef CHECK_DECIMAL
+
+}
+
+static void TestReplace() {
+ printf("Testing Replace\n");
+
+ struct ReplaceTest {
+ const char *regexp;
+ const char *rewrite;
+ const char *original;
+ const char *single;
+ const char *global;
+ };
+ static const ReplaceTest tests[] = {
+ { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
+ "\\2\\1ay",
+ "the quick brown fox jumps over the lazy dogs.",
+ "ethay quick brown fox jumps over the lazy dogs.",
+ "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." },
+ { "\\w+",
+ "\\0-NOSPAM",
+ "paul.haahr@google.com",
+ "paul-NOSPAM.haahr@google.com",
+ "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" },
+ { "^",
+ "(START)",
+ "foo",
+ "(START)foo",
+ "(START)foo" },
+ { "^",
+ "(START)",
+ "",
+ "(START)",
+ "(START)" },
+ { "$",
+ "(END)",
+ "",
+ "(END)",
+ "(END)" },
+ { "b",
+ "bb",
+ "ababababab",
+ "abbabababab",
+ "abbabbabbabbabb" },
+ { "b",
+ "bb",
+ "bbbbbb",
+ "bbbbbbb",
+ "bbbbbbbbbbbb" },
+ { "b+",
+ "bb",
+ "bbbbbb",
+ "bb",
+ "bb" },
+ { "b*",
+ "bb",
+ "bbbbbb",
+ "bb",
+ "bb" },
+ { "b*",
+ "bb",
+ "aaaaa",
+ "bbaaaaa",
+ "bbabbabbabbabbabb" },
+ { "", NULL, NULL, NULL, NULL }
+ };
+
+ for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
+ string one(t->original);
+ CHECK(RE(t->regexp).Replace(t->rewrite, &one));
+ CHECK_EQ(one, t->single);
+ string all(t->original);
+ CHECK(RE(t->regexp).GlobalReplace(t->rewrite, &all) > 0);
+ CHECK_EQ(all, t->global);
+ }
+}
+
+static void TestExtract() {
+ printf("Testing Extract\n");
+
+ string s;
+
+ CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
+ CHECK_EQ(s, "kremvax!boris");
+
+ // check the RE interface as well
+ CHECK(RE(".*").Extract("'\\0'", "foo", &s));
+ CHECK_EQ(s, "'foo'");
+ CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
+ CHECK_EQ(s, "'foo'");
+}
+
+static void TestConsume() {
+ printf("Testing Consume\n");
+
+ string word;
+
+ string s(" aaa b!@#$@#$cccc");
+ StringPiece input(s);
+
+ RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
+ CHECK(r.Consume(&input, &word));
+ CHECK_EQ(word, "aaa");
+ CHECK(r.Consume(&input, &word));
+ CHECK_EQ(word, "b");
+ CHECK(! r.Consume(&input, &word));
+}
+
+static void TestFindAndConsume() {
+ printf("Testing FindAndConsume\n");
+
+ string word;
+
+ string s(" aaa b!@#$@#$cccc");
+ StringPiece input(s);
+
+ RE r("(\\w+)"); // matches a word
+ CHECK(r.FindAndConsume(&input, &word));
+ CHECK_EQ(word, "aaa");
+ CHECK(r.FindAndConsume(&input, &word));
+ CHECK_EQ(word, "b");
+ CHECK(r.FindAndConsume(&input, &word));
+ CHECK_EQ(word, "cccc");
+ CHECK(! r.FindAndConsume(&input, &word));
+}
+
+static void TestMatchNumberPeculiarity() {
+ printf("Testing match-number peculiaraity\n");
+
+ string word1;
+ string word2;
+ string word3;
+
+ RE r("(foo)|(bar)|(baz)");
+ CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
+ CHECK_EQ(word1, "foo");
+ CHECK_EQ(word2, "");
+ CHECK_EQ(word3, "");
+ CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
+ CHECK_EQ(word1, "");
+ CHECK_EQ(word2, "bar");
+ CHECK_EQ(word3, "");
+ CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
+ CHECK_EQ(word1, "");
+ CHECK_EQ(word2, "");
+ CHECK_EQ(word3, "baz");
+ CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
+
+ string a;
+ CHECK(RE("(foo)|hello").FullMatch("hello", &a));
+ CHECK_EQ(a, "");
+}
+
+static void TestRecursion(int size, const char *pattern, int match_limit) {
+ printf("Testing recursion\n");
+
+ // Fill up a string repeating the pattern given
+ string domain;
+ domain.resize(size);
+ int patlen = strlen(pattern);
+ for (int i = 0; i < size; ++i) {
+ domain[i] = pattern[i % patlen];
+ }
+ // Just make sure it doesn't crash due to too much recursion.
+ RE_Options options;
+ options.set_match_limit(match_limit);
+ RE re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", options);
+ re.FullMatch(domain);
+}
+
+
+int main(int argc, char** argv) {
+ // Treat any flag as --help
+ if (argc > 1 && argv[1][0] == '-') {
+ printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
+ " If 'timingX ###' is specified, run the given timing test\n"
+ " with the given number of iterations, rather than running\n"
+ " the default corectness test.\n", argv[0]);
+ return 0;
+ }
+
+ if (argc > 1) {
+ if ( argc == 2 || atoi(argv[2]) == 0) {
+ printf("timing mode needs a num-iters argument\n");
+ return 1;
+ }
+ if (!strcmp(argv[1], "timing1"))
+ Timing1(atoi(argv[2]));
+ else if (!strcmp(argv[1], "timing2"))
+ Timing2(atoi(argv[2]));
+ else if (!strcmp(argv[1], "timing3"))
+ Timing3(atoi(argv[2]));
+ else
+ printf("Unknown argument '%s'\n", argv[1]);
+ return 0;
+ }
+
+ printf("Testing FullMatch\n");
+
+ int i;
+ string s;
+
+ /***** FullMatch with no args *****/
+
+ CHECK(RE("h.*o").FullMatch("hello"));
+ CHECK(!RE("h.*o").FullMatch("othello"));
+ CHECK(!RE("h.*o").FullMatch("hello!"));
+
+ /***** FullMatch with args *****/
+
+ // Zero-arg
+ CHECK(RE("\\d+").FullMatch("1001"));
+
+ // Single-arg
+ CHECK(RE("(\\d+)").FullMatch("1001", &i));
+ CHECK_EQ(i, 1001);
+ CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
+ CHECK_EQ(i, -123);
+ CHECK(!RE("()\\d+").FullMatch("10", &i));
+ CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
+ &i));
+
+ // Digits surrounding integer-arg
+ CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
+ CHECK_EQ(i, 23);
+ CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
+ CHECK_EQ(i, 1);
+ CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
+ CHECK_EQ(i, -1);
+ CHECK(RE("(\\d)").PartialMatch("1234", &i));
+ CHECK_EQ(i, 1);
+ CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
+ CHECK_EQ(i, -1);
+
+ // String-arg
+ CHECK(RE("h(.*)o").FullMatch("hello", &s));
+ CHECK_EQ(s, string("ell"));
+
+ // StringPiece-arg
+ StringPiece sp;
+ CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
+ CHECK_EQ(sp.size(), 4);
+ CHECK(memcmp(sp.data(), "ruby", 4) == 0);
+ CHECK_EQ(i, 1234);
+
+ // Multi-arg
+ CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
+ CHECK_EQ(s, string("ruby"));
+ CHECK_EQ(i, 1234);
+
+ // Ignored arg
+ CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
+ CHECK_EQ(s, string("ruby"));
+ CHECK_EQ(i, 1234);
+
+ // Type tests
+ {
+ char c;
+ CHECK(RE("(H)ello").FullMatch("Hello", &c));
+ CHECK_EQ(c, 'H');
+ }
+ {
+ unsigned char c;
+ CHECK(RE("(H)ello").FullMatch("Hello", &c));
+ CHECK_EQ(c, static_cast<unsigned char>('H'));
+ }
+ {
+ short v;
+ CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
+ CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
+ CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
+ CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
+ CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
+ CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
+ }
+ {
+ unsigned short v;
+ CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
+ CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
+ CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
+ CHECK(!RE("(\\d+)").FullMatch("65536", &v));
+ }
+ {
+ int v;
+ static const int max_value = 0x7fffffff;
+ static const int min_value = -max_value - 1;
+ CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
+ CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
+ CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
+ CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
+ CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
+ CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
+ }
+ {
+ unsigned int v;
+ static const unsigned int max_value = 0xfffffffful;
+ CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
+ CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
+ CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
+ }
+#ifdef HAVE_LONG_LONG
+ {
+ long long v;
+ static const long long max_value = 0x7fffffffffffffffLL;
+ static const long long min_value = -max_value - 1;
+ char buf[32];
+
+ CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
+ CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
+
+ snprintf(buf, sizeof(buf), "%lld", max_value);
+ CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
+
+ snprintf(buf, sizeof(buf), "%lld", min_value);
+ CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
+
+ snprintf(buf, sizeof(buf), "%lld", max_value);
+ assert(buf[strlen(buf)-1] != '9');
+ buf[strlen(buf)-1]++;
+ CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
+
+ snprintf(buf, sizeof(buf), "%lld", min_value);
+ assert(buf[strlen(buf)-1] != '9');
+ buf[strlen(buf)-1]++;
+ CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
+ }
+#endif
+#if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
+ {
+ unsigned long long v;
+ long long v2;
+ static const unsigned long long max_value = 0xffffffffffffffffULL;
+ char buf[32];
+
+ CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
+ CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
+
+ snprintf(buf, sizeof(buf), "%llu", max_value);
+ CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
+
+ assert(buf[strlen(buf)-1] != '9');
+ buf[strlen(buf)-1]++;
+ CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
+ }
+#endif
+ {
+ float v;
+ CHECK(RE("(.*)").FullMatch("100", &v));
+ CHECK(RE("(.*)").FullMatch("-100.", &v));
+ CHECK(RE("(.*)").FullMatch("1e23", &v));
+ }
+ {
+ double v;
+ CHECK(RE("(.*)").FullMatch("100", &v));
+ CHECK(RE("(.*)").FullMatch("-100.", &v));
+ CHECK(RE("(.*)").FullMatch("1e23", &v));
+ }
+
+ // Check that matching is fully anchored
+ CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
+ CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
+ CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
+ CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
+
+ // Braces
+ CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
+ CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
+ CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
+
+ // Complicated RE
+ CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
+ CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
+ CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
+ CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
+
+ // Check full-match handling (needs '$' tacked on internally)
+ CHECK(RE("fo|foo").FullMatch("fo"));
+ CHECK(RE("fo|foo").FullMatch("foo"));
+ CHECK(RE("fo|foo$").FullMatch("fo"));
+ CHECK(RE("fo|foo$").FullMatch("foo"));
+ CHECK(RE("foo$").FullMatch("foo"));
+ CHECK(!RE("foo\\$").FullMatch("foo$bar"));
+ CHECK(!RE("fo|bar").FullMatch("fox"));
+
+ // Uncomment the following if we change the handling of '$' to
+ // prevent it from matching a trailing newline
+ if (false) {
+ // Check that we don't get bitten by pcre's special handling of a
+ // '\n' at the end of the string matching '$'
+ CHECK(!RE("foo$").PartialMatch("foo\n"));
+ }
+
+ // Number of args
+ int a[16];
+ CHECK(RE("").FullMatch(""));
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d){1}").FullMatch("1",
+ &a[0]));
+ CHECK_EQ(a[0], 1);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)").FullMatch("12",
+ &a[0], &a[1]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
+ &a[0], &a[1], &a[2]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+ CHECK_EQ(a[2], 3);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
+ &a[0], &a[1], &a[2], &a[3]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+ CHECK_EQ(a[2], 3);
+ CHECK_EQ(a[3], 4);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
+ &a[0], &a[1], &a[2],
+ &a[3], &a[4]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+ CHECK_EQ(a[2], 3);
+ CHECK_EQ(a[3], 4);
+ CHECK_EQ(a[4], 5);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
+ &a[0], &a[1], &a[2],
+ &a[3], &a[4], &a[5]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+ CHECK_EQ(a[2], 3);
+ CHECK_EQ(a[3], 4);
+ CHECK_EQ(a[4], 5);
+ CHECK_EQ(a[5], 6);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
+ &a[0], &a[1], &a[2], &a[3],
+ &a[4], &a[5], &a[6]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+ CHECK_EQ(a[2], 3);
+ CHECK_EQ(a[3], 4);
+ CHECK_EQ(a[4], 5);
+ CHECK_EQ(a[5], 6);
+ CHECK_EQ(a[6], 7);
+
+ memset(a, 0, sizeof(0));
+ CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
+ "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
+ "1234567890123456",
+ &a[0], &a[1], &a[2], &a[3],
+ &a[4], &a[5], &a[6], &a[7],
+ &a[8], &a[9], &a[10], &a[11],
+ &a[12], &a[13], &a[14], &a[15]));
+ CHECK_EQ(a[0], 1);
+ CHECK_EQ(a[1], 2);
+ CHECK_EQ(a[2], 3);
+ CHECK_EQ(a[3], 4);
+ CHECK_EQ(a[4], 5);
+ CHECK_EQ(a[5], 6);
+ CHECK_EQ(a[6], 7);
+ CHECK_EQ(a[7], 8);
+ CHECK_EQ(a[8], 9);
+ CHECK_EQ(a[9], 0);
+ CHECK_EQ(a[10], 1);
+ CHECK_EQ(a[11], 2);
+ CHECK_EQ(a[12], 3);
+ CHECK_EQ(a[13], 4);
+ CHECK_EQ(a[14], 5);
+ CHECK_EQ(a[15], 6);
+
+ /***** PartialMatch *****/
+
+ printf("Testing PartialMatch\n");
+
+ CHECK(RE("h.*o").PartialMatch("hello"));
+ CHECK(RE("h.*o").PartialMatch("othello"));
+ CHECK(RE("h.*o").PartialMatch("hello!"));
+ CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
+
+ RadixTests();
+ TestReplace();
+ TestExtract();
+ TestConsume();
+ TestFindAndConsume();
+ TestMatchNumberPeculiarity();
+
+ // Check the pattern() accessor
+ {
+ const string kPattern = "http://([^/]+)/.*";
+ const RE re(kPattern);
+ CHECK_EQ(kPattern, re.pattern());
+ }
+
+ // Check RE error field.
+ {
+ RE re("foo");
+ CHECK(re.error().empty()); // Must have no error
+ }
+
+#ifdef SUPPORT_UTF8
+ // Check UTF-8 handling
+ {
+ printf("Testing UTF-8 handling\n");
+
+ // Three Japanese characters (nihongo)
+ const char utf8_string[] = {
+ 0xe6, 0x97, 0xa5, // 65e5
+ 0xe6, 0x9c, 0xac, // 627c
+ 0xe8, 0xaa, 0x9e, // 8a9e
+ 0
+ };
+ const char utf8_pattern[] = {
+ '.',
+ 0xe6, 0x9c, 0xac, // 627c
+ '.',
+ 0
+ };
+
+ // Both should match in either mode, bytes or UTF-8
+ RE re_test1(".........");
+ CHECK(re_test1.FullMatch(utf8_string));
+ RE re_test2("...", pcrecpp::UTF8());
+ CHECK(re_test2.FullMatch(utf8_string));
+
+ // Check that '.' matches one byte or UTF-8 character
+ // according to the mode.
+ string ss;
+ RE re_test3("(.)");
+ CHECK(re_test3.PartialMatch(utf8_string, &ss));
+ CHECK_EQ(ss, string("\xe6"));
+ RE re_test4("(.)", pcrecpp::UTF8());
+ CHECK(re_test4.PartialMatch(utf8_string, &ss));
+ CHECK_EQ(ss, string("\xe6\x97\xa5"));
+
+ // Check that string matches itself in either mode
+ RE re_test5(utf8_string);
+ CHECK(re_test5.FullMatch(utf8_string));
+ RE re_test6(utf8_string, pcrecpp::UTF8());
+ CHECK(re_test6.FullMatch(utf8_string));
+
+ // Check that pattern matches string only in UTF8 mode
+ RE re_test7(utf8_pattern);
+ CHECK(!re_test7.FullMatch(utf8_string));
+ RE re_test8(utf8_pattern, pcrecpp::UTF8());
+ CHECK(re_test8.FullMatch(utf8_string));
+ }
+
+ // Check that ungreedy, UTF8 regular expressions don't match when they
+ // oughtn't -- see bug 82246.
+ {
+ // This code always worked.
+ const char* pattern = "\\w+X";
+ const string target = "a aX";
+ RE match_sentence(pattern);
+ RE match_sentence_re(pattern, pcrecpp::UTF8());
+
+ CHECK(!match_sentence.FullMatch(target));
+ CHECK(!match_sentence_re.FullMatch(target));
+ }
+
+ {
+ const char* pattern = "(?U)\\w+X";
+ const string target = "a aX";
+ RE match_sentence(pattern);
+ RE match_sentence_re(pattern, pcrecpp::UTF8());
+
+ CHECK(!match_sentence.FullMatch(target));
+ CHECK(!match_sentence_re.FullMatch(target));
+ }
+#endif /* def SUPPORT_UTF8 */
+
+ printf("Testing error reporting\n");
+
+ { RE re("a\\1"); CHECK(!re.error().empty()); }
+ {
+ RE re("a[x");
+ CHECK(!re.error().empty());
+ }
+ {
+ RE re("a[z-a]");
+ CHECK(!re.error().empty());
+ }
+ {
+ RE re("a[[:foobar:]]");
+ CHECK(!re.error().empty());
+ }
+ {
+ RE re("a(b");
+ CHECK(!re.error().empty());
+ }
+ {
+ RE re("a\\");
+ CHECK(!re.error().empty());
+ }
+
+ // Test that recursion is stopped: there will be some errors reported
+ int matchlimit = 5000;
+ int bytes = 15 * 1024; // enough to crash if there was no match limit
+ TestRecursion(bytes, ".", matchlimit);
+ TestRecursion(bytes, "a", matchlimit);
+ TestRecursion(bytes, "a.", matchlimit);
+ TestRecursion(bytes, "ab.", matchlimit);
+ TestRecursion(bytes, "abc.", matchlimit);
+
+ // Done
+ printf("OK\n");
+
+ return 0;
+}