summaryrefslogtreecommitdiff
path: root/src/third_party/re2/dist/re2/testing/mimics_pcre_test.cc
blob: 01ab41ee38e3c60967cd530b2cdd934b7fa0ca3b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
// Copyright 2008 The RE2 Authors.  All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "util/test.h"
#include "util/logging.h"
#include "re2/prog.h"
#include "re2/regexp.h"

namespace re2 {

struct PCRETest {
  const char* regexp;
  bool should_match;
};

static PCRETest tests[] = {
  // Most things should behave exactly.
  { "abc",       true  },
  { "(a|b)c",    true  },
  { "(a*|b)c",   true  },
  { "(a|b*)c",   true  },
  { "a(b|c)d",   true  },
  { "a(()|())c", true  },
  { "ab*c",      true  },
  { "ab+c",      true  },
  { "a(b*|c*)d", true  },
  { "\\W",       true  },
  { "\\W{1,2}",  true  },
  { "\\d",       true  },

  // Check that repeated empty strings do not.
  { "(a*)*",     false },
  { "x(a*)*y",   false },
  { "(a*)+",     false },
  { "(a+)*",     true  },
  { "(a+)+",     true  },
  { "(a+)+",     true  },

  // \v is the only character class that shouldn't.
  { "\\b",       true  },
  { "\\v",       false },
  { "\\d",       true  },

  // The handling of ^ in multi-line mode is different, as is
  // the handling of $ in single-line mode.  (Both involve
  // boundary cases if the string ends with \n.)
  { "\\A",       true  },
  { "\\z",       true  },
  { "(?m)^",     false },
  { "(?m)$",     true  },
  { "(?-m)^",    true  },
  { "(?-m)$",    false },  // In PCRE, == \Z
  { "(?m)\\A",   true  },
  { "(?m)\\z",   true  },
  { "(?-m)\\A",  true  },
  { "(?-m)\\z",  true  },
};

TEST(MimicsPCRE, SimpleTests) {
  for (size_t i = 0; i < arraysize(tests); i++) {
    const PCRETest& t = tests[i];
    for (size_t j = 0; j < 2; j++) {
      Regexp::ParseFlags flags = Regexp::LikePerl;
      if (j == 0)
        flags = flags | Regexp::Latin1;
      Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
      ASSERT_TRUE(re != NULL) << " " << t.regexp;
      ASSERT_EQ(t.should_match, re->MimicsPCRE())
        << " " << t.regexp << " "
        << (j == 0 ? "latin1" : "utf");
      re->Decref();
    }
  }
}

}  // namespace re2