summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-02-24 21:40:59 +0000
committernigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-02-24 21:40:59 +0000
commitf82b62380bd773b22a4a5d28d1a403ffd54c5392 (patch)
treed8fd1e5c25d0e781ca46b6b570beedaa15a81019
parent477806cfbeb607865593eb63f0216d854a2bbf6f (diff)
downloadpcre-f82b62380bd773b22a4a5d28d1a403ffd54c5392.tar.gz
Load pcre-6.2 into code/trunk.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@81 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog29
-rw-r--r--Makefile.in12
-rw-r--r--RunGrepTest.in5
-rwxr-xr-xRunTest.in5
-rwxr-xr-xconfigure4
-rw-r--r--configure.in4
-rw-r--r--dftables.c2
-rw-r--r--doc/html/pcrecpp.html114
-rw-r--r--doc/pcre.txt165
-rw-r--r--doc/pcrecpp.394
-rw-r--r--pcre_compile.c59
-rw-r--r--pcrecpp.cc6
-rw-r--r--pcrecpp.h.in211
-rw-r--r--pcrecpp_unittest.cc228
-rw-r--r--testdata/grepoutput2
-rw-r--r--testdata/testinput221
-rw-r--r--testdata/testoutput12
-rw-r--r--testdata/testoutput2225
-rw-r--r--testdata/testoutput32
-rw-r--r--testdata/testoutput42
-rw-r--r--testdata/testoutput52
-rw-r--r--testdata/testoutput62
-rw-r--r--testdata/testoutput72
-rw-r--r--testdata/testoutput82
-rw-r--r--testdata/testoutput92
25 files changed, 1090 insertions, 112 deletions
diff --git a/ChangeLog b/ChangeLog
index 0513429..b9d74d3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,35 @@
ChangeLog for PCRE
------------------
+Version 6.2 01-Aug-05
+---------------------
+
+ 1. There was no test for integer overflow of quantifier values. A construction
+ such as {1111111111111111} would give undefined results. What is worse, if
+ a minimum quantifier for a parenthesized subpattern overflowed and became
+ negative, the calculation of the memory size went wrong. This could have
+ led to memory overwriting.
+
+ 2. Building PCRE using VPATH was broken. Hopefully it is now fixed.
+
+ 3. Added "b" to the 2nd argument of fopen() in dftables.c, for non-Unix-like
+ operating environments where this matters.
+
+ 4. Applied Giuseppe Maxia's patch to add additional features for controlling
+ PCRE options from within the C++ wrapper.
+
+ 5. Named capturing subpatterns were not being correctly counted when a pattern
+ was compiled. This caused two problems: (a) If there were more than 100
+ such subpatterns, the calculation of the memory needed for the whole
+ compiled pattern went wrong, leading to an overflow error. (b) Numerical
+ back references of the form \12, where the number was greater than 9, were
+ not recognized as back references, even though there were sufficient
+ previous subpatterns.
+
+ 6. Two minor patches to pcrecpp.cc in order to allow it to compile on older
+ versions of gcc, e.g. 2.95.4.
+
+
Version 6.1 21-Jun-05
---------------------
diff --git a/Makefile.in b/Makefile.in
index 74121ea..39a7abe 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -211,7 +211,7 @@ libpcrecpp.la: libpcre.la $(CPPOBJ)
# directory, not the source directory.
pcre_chartables.@OBJEXT@: pcre_chartables.c
- @$(LTCOMPILE) $(top_srcdir)/pcre_chartables.c
+ @$(LTCOMPILE) pcre_chartables.c
pcre_compile.@OBJEXT@: Makefile config.h pcre.h \
$(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_compile.c
@@ -329,23 +329,23 @@ pcre_stringpiece.@OBJEXT@: $(top_srcdir)/pcre_stringpiece.cc pcre_stringpiece.h
pcretest.@OBJEXT@: $(top_srcdir)/pcretest.c $(top_srcdir)/pcre_internal.h \
pcre.h config.h Makefile
- $(CC) -c $(CFLAGS) -I. $(UTF8) $(UCP) $(LINK_SIZE) $(top_srcdir)/pcretest.c
+ $(CC) -c $(CFLAGS) -I. -I$(top_srcdir) $(UTF8) $(UCP) $(LINK_SIZE) $(top_srcdir)/pcretest.c
pcrecpp_unittest.@OBJEXT@: $(top_srcdir)/pcrecpp_unittest.cc pcrecpp.h \
pcre_stringpiece.h pcre.h config.h Makefile
- $(CXX) -c $(CXXFLAGS) -I. $(UTF8) $(UCP) $(LINK_SIZE) $(top_srcdir)/pcrecpp_unittest.cc
+ $(CXX) -c $(CXXFLAGS) -I. -I$(top_srcdir) $(UTF8) $(UCP) $(LINK_SIZE) $(top_srcdir)/pcrecpp_unittest.cc
pcre_stringpiece_unittest.@OBJEXT@: $(top_srcdir)/pcre_stringpiece_unittest.cc \
pcre_stringpiece.h config.h Makefile
- $(CXX) -c $(CXXFLAGS) -I. $(UTF8) $(UCP) $(LINK_SIZE) $(top_srcdir)/pcre_stringpiece_unittest.cc
+ $(CXX) -c $(CXXFLAGS) -I. -I$(top_srcdir) $(UTF8) $(UCP) $(LINK_SIZE) $(top_srcdir)/pcre_stringpiece_unittest.cc
pcre_scanner_unittest.@OBJEXT@: $(top_srcdir)/pcre_scanner_unittest.cc \
$(top_srcdir)/pcre_scanner.h pcrecpp.h pcre_stringpiece.h \
pcre.h config.h Makefile
- $(CXX) -c $(CXXFLAGS) -I. $(UTF8) $(UCP) $(LINK_SIZE) $(top_srcdir)/pcre_scanner_unittest.cc
+ $(CXX) -c $(CXXFLAGS) -I. -I$(top_srcdir) $(UTF8) $(UCP) $(LINK_SIZE) $(top_srcdir)/pcre_scanner_unittest.cc
pcregrep.@OBJEXT@: $(top_srcdir)/pcregrep.c pcre.h Makefile config.h
- $(CC) -c $(CFLAGS) -I. $(UTF8) $(UCP) $(PCREGREP_OSTYPE) $(top_srcdir)/pcregrep.c
+ $(CC) -c $(CFLAGS) -I. -I$(top_srcdir) $(UTF8) $(UCP) $(PCREGREP_OSTYPE) $(top_srcdir)/pcregrep.c
# Some Windows-specific targets for MinGW. Do not use for Cygwin.
diff --git a/RunGrepTest.in b/RunGrepTest.in
index 57ac403..109267e 100644
--- a/RunGrepTest.in
+++ b/RunGrepTest.in
@@ -10,7 +10,10 @@ echo "Testing pcregrep"
# supported by pcregrep.
cf=diff
-testdata=@top_srcdir@/testdata
+if [ ! -d testdata ] ; then
+ ln -s @top_srcdir@/testdata testdata
+fi
+testdata=./testdata
./pcregrep -V 2>testtry
diff --git a/RunTest.in b/RunTest.in
index d3f3aa7..69f0665 100755
--- a/RunTest.in
+++ b/RunTest.in
@@ -6,7 +6,10 @@
# Run PCRE tests
cf=diff
-testdata=@top_srcdir@/testdata
+if [ ! -d testdata ] ; then
+ ln -s @top_srcdir@/testdata testdata
+fi
+testdata=./testdata
# Select which tests to run; if no selection, run all
diff --git a/configure b/configure
index c23f6ff..316ccaf 100755
--- a/configure
+++ b/configure
@@ -1505,8 +1505,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
PCRE_MAJOR=6
-PCRE_MINOR=1
-PCRE_DATE=21-Jun-2005
+PCRE_MINOR=2
+PCRE_DATE=01-Aug-2005
PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR}
diff --git a/configure.in b/configure.in
index 4f8c6ee..47fd8d5 100644
--- a/configure.in
+++ b/configure.in
@@ -21,8 +21,8 @@ dnl digits for minor numbers less than 10. There are unlikely to be
dnl that many releases anyway.
PCRE_MAJOR=6
-PCRE_MINOR=1
-PCRE_DATE=21-Jun-2005
+PCRE_MINOR=2
+PCRE_DATE=01-Aug-2005
PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR}
dnl Default values for miscellaneous macros
diff --git a/dftables.c b/dftables.c
index d6022f7..4807538 100644
--- a/dftables.c
+++ b/dftables.c
@@ -66,7 +66,7 @@ if (argc != 2)
return 1;
}
-f = fopen(argv[1], "w");
+f = fopen(argv[1], "wb");
if (f == NULL)
{
fprintf(stderr, "dftables: failed to open %s for writing\n", argv[1]);
diff --git a/doc/html/pcrecpp.html b/doc/html/pcrecpp.html
index 3f597b1..1d5acb7 100644
--- a/doc/html/pcrecpp.html
+++ b/doc/html/pcrecpp.html
@@ -18,10 +18,11 @@ man page, in case the conversion went wrong.
<li><a name="TOC3" href="#SEC3">MATCHING INTERFACE</a>
<li><a name="TOC4" href="#SEC4">PARTIAL MATCHES</a>
<li><a name="TOC5" href="#SEC5">UTF-8 AND THE MATCHING INTERFACE</a>
-<li><a name="TOC6" href="#SEC6">SCANNING TEXT INCREMENTALLY</a>
-<li><a name="TOC7" href="#SEC7">PARSING HEX/OCTAL/C-RADIX NUMBERS</a>
-<li><a name="TOC8" href="#SEC8">REPLACING PARTS OF STRINGS</a>
-<li><a name="TOC9" href="#SEC9">AUTHOR</a>
+<li><a name="TOC6" href="#SEC6">PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE</a>
+<li><a name="TOC7" href="#SEC7">SCANNING TEXT INCREMENTALLY</a>
+<li><a name="TOC8" href="#SEC8">PARSING HEX/OCTAL/C-RADIX NUMBERS</a>
+<li><a name="TOC9" href="#SEC9">REPLACING PARTS OF STRINGS</a>
+<li><a name="TOC10" href="#SEC10">AUTHOR</a>
</ul>
<br><a name="SEC1" href="#TOC1">SYNOPSIS OF C++ WRAPPER</a><br>
<P>
@@ -31,9 +32,10 @@ man page, in case the conversion went wrong.
</P>
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
<P>
-The C++ wrapper for PCRE was provided by Google Inc. This brief man page was
-constructed from the notes in the <i>pcrecpp.h</i> file, which should be
-consulted for further details.
+The C++ wrapper for PCRE was provided by Google Inc. Some additional
+functionality was added by Giuseppe Maxia. This brief man page was constructed
+from the notes in the <i>pcrecpp.h</i> file, which should be consulted for
+further details.
</P>
<br><a name="SEC3" href="#TOC1">MATCHING INTERFACE</a><br>
<P>
@@ -148,7 +150,97 @@ NOTE: The UTF8 flag is ignored if pcre was not configured with the
--enable-utf8 flag.
</PRE>
</P>
-<br><a name="SEC6" href="#TOC1">SCANNING TEXT INCREMENTALLY</a><br>
+<br><a name="SEC6" href="#TOC1">PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE</a><br>
+<P>
+PCRE defines some modifiers to change the behavior of the regular expression
+engine. The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle to
+pass such modifiers to a RE class. Currently, the following modifiers are
+supported:
+<pre>
+ modifier description Perl corresponding
+
+ PCRE_CASELESS case insensitive match /i
+ PCRE_MULTILINE multiple lines match /m
+ PCRE_DOTALL dot matches newlines /s
+ PCRE_DOLLAR_ENDONLY $ matches only at end N/A
+ PCRE_EXTRA strict escape parsing N/A
+ PCRE_EXTENDED ignore whitespaces /x
+ PCRE_UTF8 handles UTF8 chars built-in
+ PCRE_UNGREEDY reverses * and *? N/A
+ PCRE_NO_AUTO_CAPTURE disables capturing parens N/A (*)
+</pre>
+(*) Both Perl and PCRE allow non capturing parentheses by means of the
+"?:" modifier within the pattern itself. e.g. (?:ab|cd) does not
+capture, while (ab|cd) does.
+</P>
+<P>
+For a full account on how each modifier works, please check the
+PCRE API reference page.
+</P>
+<P>
+For each modifier, there are two member functions whose name is made
+out of the modifier in lowercase, without the "PCRE_" prefix. For
+instance, PCRE_CASELESS is handled by
+<pre>
+ bool caseless()
+</pre>
+which returns true if the modifier is set, and
+<pre>
+ RE_Options & set_caseless(bool)
+</pre>
+which sets or unsets the modifier. Moreover, PCRE_CONFIG_MATCH_LIMIT can be
+accessed through the <b>set_match_limit()</b> and <b>match_limit()</b> member
+functions. Setting <i>match_limit</i> to a non-zero value will limit the
+execution of pcre to keep it from doing bad things like blowing the stack or
+taking an eternity to return a result. A value of 5000 is good enough to stop
+stack blowup in a 2MB thread stack. Setting <i>match_limit</i> to zero disables
+match limiting.
+</P>
+<P>
+Normally, to pass one or more modifiers to a RE class, you declare
+a <i>RE_Options</i> object, set the appropriate options, and pass this
+object to a RE constructor. Example:
+<pre>
+ RE_options opt;
+ opt.set_caseless(true);
+ if (RE("HELLO", opt).PartialMatch("hello world")) ...
+</pre>
+RE_options has two constructors. The default constructor takes no arguments and
+creates a set of flags that are off by default. The optional parameter
+<i>option_flags</i> is to facilitate transfer of legacy code from C programs.
+This lets you do
+<pre>
+ RE(pattern,
+ RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
+</pre>
+However, new code is better off doing
+<pre>
+ RE(pattern,
+ RE_Options().set_caseless(true).set_multiline(true))
+ .PartialMatch(str);
+</pre>
+If you are going to pass one of the most used modifiers, there are some
+convenience functions that return a RE_Options class with the
+appropriate modifier already set: <b>CASELESS()</b>, <b>UTF8()</b>,
+<b>MULTILINE()</b>, <b>DOTALL</b>(), and <b>EXTENDED()</b>.
+</P>
+<P>
+If you need to set several options at once, and you don't want to go through
+the pains of declaring a RE_Options object and setting several options, there
+is a parallel method that give you such ability on the fly. You can concatenate
+several <b>set_xxxxx()</b> member functions, since each of them returns a
+reference to its class object. For example, to pass PCRE_CASELESS,
+PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one statement, you may write:
+<pre>
+ RE(" ^ xyz \\s+ .* blah$",
+ RE_Options()
+ .set_caseless(true)
+ .set_extended(true)
+ .set_multiline(true)).PartialMatch(sometext);
+
+</PRE>
+</P>
+<br><a name="SEC7" href="#TOC1">SCANNING TEXT INCREMENTALLY</a><br>
<P>
The "Consume" operation may be useful if you want to repeatedly
match regular expressions at the front of a string and skip over
@@ -181,7 +273,7 @@ could extract all words from a string by repeatedly calling
pcrecpp::RE("(\\w+)").FindAndConsume(&input, &word)
</PRE>
</P>
-<br><a name="SEC7" href="#TOC1">PARSING HEX/OCTAL/C-RADIX NUMBERS</a><br>
+<br><a name="SEC8" href="#TOC1">PARSING HEX/OCTAL/C-RADIX NUMBERS</a><br>
<P>
By default, if you pass a pointer to a numeric value, the
corresponding text is interpreted as a base-10 number. You can
@@ -199,7 +291,7 @@ prefixes, but defaults to base-10.
</pre>
will leave 64 in a, b, c, and d.
</P>
-<br><a name="SEC8" href="#TOC1">REPLACING PARTS OF STRINGS</a><br>
+<br><a name="SEC9" href="#TOC1">REPLACING PARTS OF STRINGS</a><br>
<P>
You can replace the first match of "pattern" in "str" with "rewrite".
Within "rewrite", backslash-escaped digits (\1 to \9) can be
@@ -231,7 +323,7 @@ The non-matching portions of "text" are ignored. Returns true iff a match
occurred and the extraction happened successfully; if no match occurs, the
string is left unaffected.
</P>
-<br><a name="SEC9" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC10" href="#TOC1">AUTHOR</a><br>
<P>
The C++ wrapper was contributed by Google Inc.
<br>
diff --git a/doc/pcre.txt b/doc/pcre.txt
index 735f504..801bde2 100644
--- a/doc/pcre.txt
+++ b/doc/pcre.txt
@@ -4352,15 +4352,16 @@ SYNOPSIS OF C++ WRAPPER
DESCRIPTION
- The C++ wrapper for PCRE was provided by Google Inc. This brief man
- page was constructed from the notes in the pcrecpp.h file, which should
- be consulted for further details.
+ The C++ wrapper for PCRE was provided by Google Inc. Some additional
+ functionality was added by Giuseppe Maxia. This brief man page was con-
+ structed from the notes in the pcrecpp.h file, which should be con-
+ sulted for further details.
MATCHING INTERFACE
- The "FullMatch" operation checks that supplied text matches a supplied
- pattern exactly. If pointer arguments are supplied, it copies matched
+ The "FullMatch" operation checks that supplied text matches a supplied
+ pattern exactly. If pointer arguments are supplied, it copies matched
sub-strings that match sub-patterns into them.
Example: successful match
@@ -4374,10 +4375,10 @@ MATCHING INTERFACE
Example: creating a temporary RE object:
pcrecpp::RE("h.*o").FullMatch("hello");
- You can pass in a "const char*" or a "string" for "text". The examples
- below tend to use a const char*. You can, as in the different examples
- above, store the RE object explicitly in a variable or use a temporary
- RE object. The examples below use one mode or the other arbitrarily.
+ You can pass in a "const char*" or a "string" for "text". The examples
+ below tend to use a const char*. You can, as in the different examples
+ above, store the RE object explicitly in a variable or use a temporary
+ RE object. The examples below use one mode or the other arbitrarily.
Either could correctly be used for any of these examples.
You must supply extra pointer arguments to extract matched subpieces.
@@ -4403,7 +4404,7 @@ MATCHING INTERFACE
Example: fails because string cannot be stored in integer
!pcrecpp::RE("(.*)").FullMatch("ruby", &i);
- The provided pointer arguments can be pointers to any scalar numeric
+ The provided pointer arguments can be pointers to any scalar numeric
type, or one of:
string (matched piece is copied to string)
@@ -4411,7 +4412,7 @@ MATCHING INTERFACE
T (where "bool T::ParseFrom(const char*, int)" exists)
NULL (the corresponding matched sub-pattern is not copied)
- The function returns true iff all of the following conditions are sat-
+ The function returns true iff all of the following conditions are sat-
isfied:
a. "text" matches "pattern" exactly;
@@ -4425,14 +4426,14 @@ MATCHING INTERFACE
number of sub-patterns, "i"th captured sub-pattern is
ignored.
- The matching interface supports at most 16 arguments per call. If you
- need more, consider using the more general interface
+ The matching interface supports at most 16 arguments per call. If you
+ need more, consider using the more general interface
pcrecpp::RE::DoMatch. See pcrecpp.h for the signature for DoMatch.
PARTIAL MATCHES
- You can use the "PartialMatch" operation when you want the pattern to
+ You can use the "PartialMatch" operation when you want the pattern to
match any substring of the text.
Example: simple search for a string:
@@ -4447,13 +4448,13 @@ PARTIAL MATCHES
UTF-8 AND THE MATCHING INTERFACE
- By default, pattern and text are plain text, one byte per character.
- The UTF8 flag, passed to the constructor, causes both pattern and
+ By default, pattern and text are plain text, one byte per character.
+ The UTF8 flag, passed to the constructor, causes both pattern and
string to be treated as UTF-8 text, still a byte stream but potentially
- multiple bytes per character. In practice, the text is likelier to be
- UTF-8 than the pattern, but the match returned may depend on the UTF8
- flag, so always use it when matching UTF8 text. For example, "." will
- match one byte normally but with UTF8 set may match up to three bytes
+ multiple bytes per character. In practice, the text is likelier to be
+ UTF-8 than the pattern, but the match returned may depend on the UTF8
+ flag, so always use it when matching UTF8 text. For example, "." will
+ match one byte normally but with UTF8 set may match up to three bytes
of a multi-byte character.
Example:
@@ -4470,12 +4471,98 @@ UTF-8 AND THE MATCHING INTERFACE
--enable-utf8 flag.
+PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE
+
+ PCRE defines some modifiers to change the behavior of the regular
+ expression engine. The C++ wrapper defines an auxiliary class,
+ RE_Options, as a vehicle to pass such modifiers to a RE class. Cur-
+ rently, the following modifiers are supported:
+
+ modifier description Perl corresponding
+
+ PCRE_CASELESS case insensitive match /i
+ PCRE_MULTILINE multiple lines match /m
+ PCRE_DOTALL dot matches newlines /s
+ PCRE_DOLLAR_ENDONLY $ matches only at end N/A
+ PCRE_EXTRA strict escape parsing N/A
+ PCRE_EXTENDED ignore whitespaces /x
+ PCRE_UTF8 handles UTF8 chars built-in
+ PCRE_UNGREEDY reverses * and *? N/A
+ PCRE_NO_AUTO_CAPTURE disables capturing parens N/A (*)
+
+ (*) Both Perl and PCRE allow non capturing parentheses by means of the
+ "?:" modifier within the pattern itself. e.g. (?:ab|cd) does not cap-
+ ture, while (ab|cd) does.
+
+ For a full account on how each modifier works, please check the PCRE
+ API reference page.
+
+ For each modifier, there are two member functions whose name is made
+ out of the modifier in lowercase, without the "PCRE_" prefix. For
+ instance, PCRE_CASELESS is handled by
+
+ bool caseless()
+
+ which returns true if the modifier is set, and
+
+ RE_Options & set_caseless(bool)
+
+ which sets or unsets the modifier. Moreover, PCRE_CONFIG_MATCH_LIMIT
+ can be accessed through the set_match_limit() and match_limit() member
+ functions. Setting match_limit to a non-zero value will limit the exe-
+ cution of pcre to keep it from doing bad things like blowing the stack
+ or taking an eternity to return a result. A value of 5000 is good
+ enough to stop stack blowup in a 2MB thread stack. Setting match_limit
+ to zero disables match limiting.
+
+ Normally, to pass one or more modifiers to a RE class, you declare a
+ RE_Options object, set the appropriate options, and pass this object to
+ a RE constructor. Example:
+
+ RE_options opt;
+ opt.set_caseless(true);
+ if (RE("HELLO", opt).PartialMatch("hello world")) ...
+
+ RE_options has two constructors. The default constructor takes no argu-
+ ments and creates a set of flags that are off by default. The optional
+ parameter option_flags is to facilitate transfer of legacy code from C
+ programs. This lets you do
+
+ RE(pattern,
+ RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
+
+ However, new code is better off doing
+
+ RE(pattern,
+ RE_Options().set_caseless(true).set_multiline(true))
+ .PartialMatch(str);
+
+ If you are going to pass one of the most used modifiers, there are some
+ convenience functions that return a RE_Options class with the appropri-
+ ate modifier already set: CASELESS(), UTF8(), MULTILINE(), DOTALL(),
+ and EXTENDED().
+
+ If you need to set several options at once, and you don't want to go
+ through the pains of declaring a RE_Options object and setting several
+ options, there is a parallel method that give you such ability on the
+ fly. You can concatenate several set_xxxxx() member functions, since
+ each of them returns a reference to its class object. For example, to
+ pass PCRE_CASELESS, PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one
+ statement, you may write:
+
+ RE(" ^ xyz \\s+ .* blah$",
+ RE_Options()
+ .set_caseless(true)
+ .set_extended(true)
+ .set_multiline(true)).PartialMatch(sometext);
+
+
SCANNING TEXT INCREMENTALLY
- The "Consume" operation may be useful if you want to repeatedly match
+ The "Consume" operation may be useful if you want to repeatedly match
regular expressions at the front of a string and skip over them as they
- match. This requires use of the "StringPiece" type, which represents a
- sub-range of a real string. Like RE, StringPiece is defined in the
+ match. This requires use of the "StringPiece" type, which represents a
+ sub-range of a real string. Like RE, StringPiece is defined in the
pcrecpp namespace.
Example: read lines of the form "var = value" from a string.
@@ -4489,11 +4576,11 @@ SCANNING TEXT INCREMENTALLY
...;
}
- Each successful call to "Consume" will set "var/value", and also
+ Each successful call to "Consume" will set "var/value", and also
advance "input" so it points past the matched text.
- The "FindAndConsume" operation is similar to "Consume" but does not
- anchor your match at the beginning of the string. For example, you
+ The "FindAndConsume" operation is similar to "Consume" but does not
+ anchor your match at the beginning of the string. For example, you
could extract all words from a string by repeatedly calling
pcrecpp::RE("(\\w+)").FindAndConsume(&input, &word)
@@ -4502,10 +4589,10 @@ SCANNING TEXT INCREMENTALLY
PARSING HEX/OCTAL/C-RADIX NUMBERS
By default, if you pass a pointer to a numeric value, the corresponding
- text is interpreted as a base-10 number. You can instead wrap the
+ text is interpreted as a base-10 number. You can instead wrap the
pointer with a call to one of the operators Hex(), Octal(), or CRadix()
- to interpret the text in another base. The CRadix operator interprets
- C-style "0" (base-8) and "0x" (base-16) prefixes, but defaults to
+ to interpret the text in another base. The CRadix operator interprets
+ C-style "0" (base-8) and "0x" (base-16) prefixes, but defaults to
base-10.
Example:
@@ -4520,30 +4607,30 @@ PARSING HEX/OCTAL/C-RADIX NUMBERS
REPLACING PARTS OF STRINGS
- You can replace the first match of "pattern" in "str" with "rewrite".
- Within "rewrite", backslash-escaped digits (\1 to \9) can be used to
- insert text matching corresponding parenthesized group from the pat-
+ You can replace the first match of "pattern" in "str" with "rewrite".
+ Within "rewrite", backslash-escaped digits (\1 to \9) can be used to
+ insert text matching corresponding parenthesized group from the pat-
tern. \0 in "rewrite" refers to the entire matching text. For example:
string s = "yabba dabba doo";
pcrecpp::RE("b+").Replace("d", &s);
- will leave "s" containing "yada dabba doo". The result is true if the
+ will leave "s" containing "yada dabba doo". The result is true if the
pattern matches and a replacement occurs, false otherwise.
- GlobalReplace is like Replace except that it replaces all occurrences
- of the pattern in the string with the rewrite. Replacements are not
+ GlobalReplace is like Replace except that it replaces all occurrences
+ of the pattern in the string with the rewrite. Replacements are not
subject to re-matching. For example:
string s = "yabba dabba doo";
pcrecpp::RE("b+").GlobalReplace("d", &s);
- will leave "s" containing "yada dada doo". It returns the number of
+ will leave "s" containing "yada dada doo". It returns the number of
replacements made.
- Extract is like Replace, except that if the pattern matches, "rewrite"
- is copied into "out" (an additional argument) with substitutions. The
- non-matching portions of "text" are ignored. Returns true iff a match
+ Extract is like Replace, except that if the pattern matches, "rewrite"
+ is copied into "out" (an additional argument) with substitutions. The
+ non-matching portions of "text" are ignored. Returns true iff a match
occurred and the extraction happened successfully; if no match occurs,
the string is left unaffected.
diff --git a/doc/pcrecpp.3 b/doc/pcrecpp.3
index abf7334..78ac564 100644
--- a/doc/pcrecpp.3
+++ b/doc/pcrecpp.3
@@ -11,9 +11,10 @@ PCRE - Perl-compatible regular expressions.
.SH DESCRIPTION
.rs
.sp
-The C++ wrapper for PCRE was provided by Google Inc. This brief man page was
-constructed from the notes in the \fIpcrecpp.h\fP file, which should be
-consulted for further details.
+The C++ wrapper for PCRE was provided by Google Inc. Some additional
+functionality was added by Giuseppe Maxia. This brief man page was constructed
+from the notes in the \fIpcrecpp.h\fP file, which should be consulted for
+further details.
.
.
.SH "MATCHING INTERFACE"
@@ -130,6 +131,93 @@ NOTE: The UTF8 flag is ignored if pcre was not configured with the
--enable-utf8 flag.
.
.
+.SH "PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE"
+.rs
+.sp
+PCRE defines some modifiers to change the behavior of the regular expression
+engine. The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle to
+pass such modifiers to a RE class. Currently, the following modifiers are
+supported:
+.sp
+ modifier description Perl corresponding
+.sp
+ PCRE_CASELESS case insensitive match /i
+ PCRE_MULTILINE multiple lines match /m
+ PCRE_DOTALL dot matches newlines /s
+ PCRE_DOLLAR_ENDONLY $ matches only at end N/A
+ PCRE_EXTRA strict escape parsing N/A
+ PCRE_EXTENDED ignore whitespaces /x
+ PCRE_UTF8 handles UTF8 chars built-in
+ PCRE_UNGREEDY reverses * and *? N/A
+ PCRE_NO_AUTO_CAPTURE disables capturing parens N/A (*)
+.sp
+(*) Both Perl and PCRE allow non capturing parentheses by means of the
+"?:" modifier within the pattern itself. e.g. (?:ab|cd) does not
+capture, while (ab|cd) does.
+.P
+For a full account on how each modifier works, please check the
+PCRE API reference page.
+.P
+For each modifier, there are two member functions whose name is made
+out of the modifier in lowercase, without the "PCRE_" prefix. For
+instance, PCRE_CASELESS is handled by
+.sp
+ bool caseless()
+.sp
+which returns true if the modifier is set, and
+.sp
+ RE_Options & set_caseless(bool)
+.sp
+which sets or unsets the modifier. Moreover, PCRE_CONFIG_MATCH_LIMIT can be
+accessed through the \fBset_match_limit()\fR and \fBmatch_limit()\fR member
+functions. Setting \fImatch_limit\fR to a non-zero value will limit the
+execution of pcre to keep it from doing bad things like blowing the stack or
+taking an eternity to return a result. A value of 5000 is good enough to stop
+stack blowup in a 2MB thread stack. Setting \fImatch_limit\fR to zero disables
+match limiting.
+.P
+Normally, to pass one or more modifiers to a RE class, you declare
+a \fIRE_Options\fR object, set the appropriate options, and pass this
+object to a RE constructor. Example:
+.sp
+ RE_options opt;
+ opt.set_caseless(true);
+ if (RE("HELLO", opt).PartialMatch("hello world")) ...
+.sp
+RE_options has two constructors. The default constructor takes no arguments and
+creates a set of flags that are off by default. The optional parameter
+\fIoption_flags\fR is to facilitate transfer of legacy code from C programs.
+This lets you do
+.sp
+ RE(pattern,
+ RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
+.sp
+However, new code is better off doing
+.sp
+ RE(pattern,
+ RE_Options().set_caseless(true).set_multiline(true))
+ .PartialMatch(str);
+.sp
+If you are going to pass one of the most used modifiers, there are some
+convenience functions that return a RE_Options class with the
+appropriate modifier already set: \fBCASELESS()\fR, \fBUTF8()\fR,
+\fBMULTILINE()\fR, \fBDOTALL\fR(), and \fBEXTENDED()\fR.
+.P
+If you need to set several options at once, and you don't want to go through
+the pains of declaring a RE_Options object and setting several options, there
+is a parallel method that give you such ability on the fly. You can concatenate
+several \fBset_xxxxx()\fR member functions, since each of them returns a
+reference to its class object. For example, to pass PCRE_CASELESS,
+PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one statement, you may write:
+.sp
+ RE(" ^ xyz \e\es+ .* blah$",
+ RE_Options()
+ .set_caseless(true)
+ .set_extended(true)
+ .set_multiline(true)).PartialMatch(sometext);
+.sp
+.
+.
.SH "SCANNING TEXT INCREMENTALLY"
.rs
.sp
diff --git a/pcre_compile.c b/pcre_compile.c
index c592a49..2289952 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -698,7 +698,18 @@ read_repeat_counts(const uschar *p, int *minp, int *maxp, int *errorcodeptr)
int min = 0;
int max = -1;
+/* Read the minimum value and do a paranoid check: a negative value indicates
+an integer overflow. */
+
while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
+if (min < 0 || min > 65535)
+ {
+ *errorcodeptr = ERR5;
+ return p;
+ }
+
+/* Read the maximum value if there is one, and again do a paranoid on its size.
+Also, max must not be less than min. */
if (*p == '}') max = min; else
{
@@ -706,6 +717,11 @@ if (*p == '}') max = min; else
{
max = 0;
while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
+ if (max < 0 || max > 65535)
+ {
+ *errorcodeptr = ERR5;
+ return p;
+ }
if (max < min)
{
*errorcodeptr = ERR4;
@@ -714,16 +730,11 @@ if (*p == '}') max = min; else
}
}
-/* Do paranoid checks, then fill in the required variables, and pass back the
-pointer to the terminating '}'. */
+/* Fill in the required variables, and pass back the pointer to the terminating
+'}'. */
-if (min > 65535 || max > 65535)
- *errorcodeptr = ERR5;
-else
- {
- *minp = min;
- *maxp = max;
- }
+*minp = min;
+*maxp = max;
return p;
}
@@ -3856,6 +3867,7 @@ BOOL utf8;
BOOL class_utf8;
#endif
BOOL inescq = FALSE;
+BOOL capturing;
unsigned int brastackptr = 0;
size_t size;
uschar *code;
@@ -4410,6 +4422,7 @@ while ((c = *(++ptr)) != 0)
case '(':
branch_newextra = 0;
bracket_length = 1 + LINK_SIZE;
+ capturing = FALSE;
/* Handle special forms of bracket, which all start (? */
@@ -4497,6 +4510,9 @@ while ((c = *(++ptr)) != 0)
case 'P':
ptr += 3;
+
+ /* Handle the definition of a named subpattern */
+
if (*ptr == '<')
{
const uschar *p; /* Don't amalgamate; some compilers */
@@ -4509,9 +4525,12 @@ while ((c = *(++ptr)) != 0)
}
name_count++;
if (ptr - p > max_name_size) max_name_size = (ptr - p);
+ capturing = TRUE; /* Named parentheses are always capturing */
break;
}
+ /* Handle back references and recursive calls to named subpatterns */
+
if (*ptr == '=' || *ptr == '>')
{
while ((compile_block.ctypes[*(++ptr)] & ctype_word) != 0);
@@ -4695,18 +4714,24 @@ while ((c = *(++ptr)) != 0)
continue;
}
- /* If options were terminated by ':' control comes here. Fall through
- to handle the group below. */
+ /* If options were terminated by ':' control comes here. This is a
+ non-capturing group with an options change. There is nothing more that
+ needs to be done because "capturing" is already set FALSE by default;
+ we can just fall through. */
+
}
}
- /* Extracting brackets must be counted so we can process escapes in a
- Perlish way. If the number exceeds EXTRACT_BASIC_MAX we are going to
- need an additional 3 bytes of store per extracting bracket. However, if
- PCRE_NO_AUTO)CAPTURE is set, unadorned brackets become non-capturing, so we
- must leave the count alone (it will aways be zero). */
+ /* Ordinary parentheses, not followed by '?', are capturing unless
+ PCRE_NO_AUTO_CAPTURE is set. */
+
+ else capturing = (options & PCRE_NO_AUTO_CAPTURE) == 0;
+
+ /* Capturing brackets must be counted so we can process escapes in a
+ Perlish way. If the number exceeds EXTRACT_BASIC_MAX we are going to need
+ an additional 3 bytes of memory per capturing bracket. */
- else if ((options & PCRE_NO_AUTO_CAPTURE) == 0)
+ if (capturing)
{
bracount++;
if (bracount > EXTRACT_BASIC_MAX) bracket_length += 3;
diff --git a/pcrecpp.cc b/pcrecpp.cc
index 0876abf..0ffd221 100644
--- a/pcrecpp.cc
+++ b/pcrecpp.cc
@@ -36,6 +36,7 @@
#include <assert.h>
#include <errno.h>
#include <string>
+#include <algorithm>
#include "config.h"
// We need this to compile the proper dll on windows/msys. This is copied
// from pcre_internal.h. It would probably be better just to include that.
@@ -97,8 +98,7 @@ RE::~RE() {
pcre* RE::Compile(Anchor anchor) {
// First, convert RE_Options into pcre options
int pcre_options = 0;
- if (options_.utf8())
- pcre_options |= PCRE_UTF8;
+ pcre_options = options_.all_options();
// Special treatment for anchoring. This is needed because at
// runtime pcre only provides an option for anchoring at the
@@ -378,7 +378,7 @@ bool RE::Extract(const StringPiece& rewrite,
int matches = TryMatch(text, 0, UNANCHORED, vec, kVecSize);
if (matches == 0)
return false;
- out->clear();
+ out->erase();
return Rewrite(out, rewrite, text, vec, matches);
}
diff --git a/pcrecpp.h.in b/pcrecpp.h.in
index c0d3050..c1da0c8 100644
--- a/pcrecpp.h.in
+++ b/pcrecpp.h.in
@@ -28,6 +28,7 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: Sanjay Ghemawat
+// Support for PCRE_XXX modifiers added by Giuseppe Maxia, July 2005
#ifndef _PCRE_REGEXP_H
#define _PCRE_REGEXP_H
@@ -159,6 +160,90 @@
// --enable-utf8 flag.
//
// -----------------------------------------------------------------------
+// PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE
+//
+// PCRE defines some modifiers to change the behavior of the regular
+// expression engine.
+// The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle
+// to pass such modifiers to a RE class.
+//
+// Currently, the following modifiers are supported
+//
+// modifier description Perl corresponding
+//
+// PCRE_CASELESS case insensitive match /i
+// PCRE_MULTILINE multiple lines match /m
+// PCRE_DOTALL dot matches newlines /s
+// PCRE_DOLLAR_ENDONLY $ matches only at end N/A
+// PCRE_EXTRA strict escape parsing N/A
+// PCRE_EXTENDED ignore whitespaces /x
+// PCRE_UTF8 handles UTF8 chars built-in
+// PCRE_UNGREEDY reverses * and *? N/A
+// PCRE_NO_AUTO_CAPTURE disables matching parens N/A (*)
+//
+// (For a full account on how each modifier works, please check the
+// PCRE API reference manual).
+//
+// (*) Both Perl and PCRE allow non matching parentheses by means of the
+// "?:" modifier within the pattern itself. e.g. (?:ab|cd) does not
+// capture, while (ab|cd) does.
+//
+// For each modifier, there are two member functions whose name is made
+// out of the modifier in lowercase, without the "PCRE_" prefix. For
+// instance, PCRE_CASELESS is handled by
+// bool caseless(),
+// which returns true if the modifier is set, and
+// RE_Options & set_caseless(bool),
+// which sets or unsets the modifier.
+//
+// Moreover, PCRE_CONFIG_MATCH_LIMIT can be accessed through the
+// set_match_limit() and match_limit() member functions.
+// Setting match_limit to a non-zero value will limit the executation of
+// pcre to keep it from doing bad things like blowing the stack or taking
+// an eternity to return a result. A value of 5000 is good enough to stop
+// stack blowup in a 2MB thread stack. Setting match_limit to zero will
+// disable match limiting.
+//
+// Normally, to pass one or more modifiers to a RE class, you declare
+// a RE_Options object, set the appropriate options, and pass this
+// object to a RE constructor. Example:
+//
+// RE_options opt;
+// opt.set_caseless(true);
+//
+// if (RE("HELLO", opt).PartialMatch("hello world")) ...
+//
+// RE_options has two constructors. The default constructor takes no
+// arguments and creates a set of flags that are off by default.
+//
+// The optional parameter 'option_flags' is to facilitate transfer
+// of legacy code from C programs. This lets you do
+// RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
+//
+// But new code is better off doing
+// RE(pattern,
+// RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
+// (See below)
+//
+// If you are going to pass one of the most used modifiers, there are some
+// convenience functions that return a RE_Options class with the
+// appropriate modifier already set:
+// CASELESS(), UTF8(), MULTILINE(), DOTALL(), EXTENDED()
+//
+// If you need to set several options at once, and you don't want to go
+// through the pains of declaring a RE_Options object and setting several
+// options, there is a parallel method that give you such ability on the
+// fly. You can concatenate several set_xxxxx member functions, since each
+// of them returns a reference to its class object. e.g.: to pass
+// PCRE_CASELESS, PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one
+// statement, you may write
+//
+// RE(" ^ xyz \\s+ .* blah$", RE_Options()
+// .set_caseless(true)
+// .set_extended(true)
+// .set_multiline(true)).PartialMatch(sometext);
+//
+// -----------------------------------------------------------------------
// SCANNING TEXT INCREMENTALLY
//
// The "Consume" operation may be useful if you want to repeatedly
@@ -245,6 +330,13 @@
namespace pcrecpp {
+#define PCRE_SET_OR_CLEAR(b, o) \
+ if (b) all_options_ |= (o); else all_options_ &= ~(o); \
+ return *this
+
+#define PCRE_IS_SET(o) \
+ (all_options_ & o) == o
+
// We convert user-passed pointers into special Arg objects
class Arg;
extern Arg no_arg;
@@ -252,44 +344,128 @@ extern Arg no_arg;
/***** Compiling regular expressions: the RE class *****/
// RE_Options allow you to set options to be passed along to pcre,
-// along with other options we put on top of pcre. Only UTF and
-// match_limit are supported now. Setting match_limit
-// to a non-zero value will limit the executation of pcre to
-// keep it from doing bad things like blowing the stack or taking
-// an eternity to return a result. A value of 5000 is good enough
-// to stop stack blowup in a 2MB thread stack.
-// Setting match_limit to zero will disable match limiting.
+// along with other options we put on top of pcre.
+// Only 9 modifiers, plus match_limit are supported now.
class RE_Options {
public:
// constructor
- RE_Options() : match_limit_(0), utf8_(false) {}
+ RE_Options() : match_limit_(0), all_options_(0) {}
+
+ // alternative constructor.
+ // To facilitate transfer of legacy code from C programs
+ //
+ // This lets you do
+ // RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
+ // But new code is better off doing
+ // RE(pattern,
+ // RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
+ RE_Options(int option_flags) : match_limit_(0), all_options_ (option_flags) {}
// we're fine with the default destructor, copy constructor, etc.
// accessors and mutators
int match_limit() const { return match_limit_; };
- void set_match_limit(int limit) {
+ RE_Options &set_match_limit(int limit) {
match_limit_ = limit;
+ return *this;
+ }
+
+ bool caseless() const {
+ return PCRE_IS_SET(PCRE_CASELESS);
+ }
+ RE_Options &set_caseless(bool x) {
+ PCRE_SET_OR_CLEAR(x, PCRE_CASELESS);
+ }
+
+ bool multiline() const {
+ return PCRE_IS_SET(PCRE_MULTILINE);
+ }
+ RE_Options &set_multiline(bool x) {
+ PCRE_SET_OR_CLEAR(x, PCRE_MULTILINE);
}
- bool utf8() const { return utf8_; }
- void set_utf8(bool u) {
- utf8_ = u;
+ bool dotall() const {
+ return PCRE_IS_SET(PCRE_DOTALL);
+ }
+ RE_Options &set_dotall(bool x) {
+ PCRE_SET_OR_CLEAR(x,PCRE_DOTALL);
+ }
+
+ bool extended() const {
+ return PCRE_IS_SET(PCRE_EXTENDED);
+ }
+ RE_Options &set_extended(bool x) {
+ PCRE_SET_OR_CLEAR(x,PCRE_EXTENDED);
+ }
+
+ bool dollar_endonly() const {
+ return PCRE_IS_SET(PCRE_DOLLAR_ENDONLY);
+ }
+ RE_Options &set_dollar_endonly(bool x) {
+ PCRE_SET_OR_CLEAR(x,PCRE_DOLLAR_ENDONLY);
+ }
+
+ bool extra() const {
+ return PCRE_IS_SET( PCRE_EXTRA);
+ }
+ RE_Options &set_extra(bool x) {
+ PCRE_SET_OR_CLEAR(x, PCRE_EXTRA);
+ }
+
+ bool ungreedy() const {
+ return PCRE_IS_SET(PCRE_UNGREEDY);
+ }
+ RE_Options &set_ungreedy(bool x) {
+ PCRE_SET_OR_CLEAR(x, PCRE_UNGREEDY);
+ }
+
+ bool utf8() const {
+ return PCRE_IS_SET(PCRE_UTF8);
+ }
+ RE_Options &set_utf8(bool x) {
+ PCRE_SET_OR_CLEAR(x, PCRE_UTF8);
+ }
+
+ bool no_auto_capture() const {
+ return PCRE_IS_SET(PCRE_NO_AUTO_CAPTURE);
+ }
+ RE_Options &set_no_auto_capture(bool x) {
+ PCRE_SET_OR_CLEAR(x, PCRE_NO_AUTO_CAPTURE);
+ }
+
+ RE_Options &set_all_options(int opt) {
+ all_options_ = opt;
+ return *this;
+ }
+ int all_options() const {
+ return all_options_ ;
}
// TODO: add other pcre flags
private:
int match_limit_;
- bool utf8_;
+ int all_options_;
};
// These functions return some common RE_Options
static inline RE_Options UTF8() {
- RE_Options options;
- options.set_utf8(true);
- return options;
+ return RE_Options().set_utf8(true);
}
+static inline RE_Options CASELESS() {
+ return RE_Options().set_caseless(true);
+}
+static inline RE_Options MULTILINE() {
+ return RE_Options().set_multiline(true);
+}
+
+static inline RE_Options DOTALL() {
+ return RE_Options().set_dotall(true);
+}
+
+static inline RE_Options EXTENDED() {
+ return RE_Options().set_extended(true);
+}
// Interface for regular expression matching. Also corresponds to a
// pre-compiled regular expression. An "RE" object is safe for
@@ -600,8 +776,11 @@ MAKE_INTEGER_PARSER(long long, longlong);
MAKE_INTEGER_PARSER(unsigned long long, ulonglong);
#endif
+#undef PCRE_IS_SET
+#undef PCRE_SET_OR_CLEAR
#undef MAKE_INTEGER_PARSER
} // namespace pcrecpp
+
#endif /* _PCRE_REGEXP_H */
diff --git a/pcrecpp_unittest.cc b/pcrecpp_unittest.cc
index 000c12e..6a03744 100644
--- a/pcrecpp_unittest.cc
+++ b/pcrecpp_unittest.cc
@@ -43,6 +43,8 @@ using pcrecpp::Hex;
using pcrecpp::Octal;
using pcrecpp::CRadix;
+static bool VERBOSE_TEST = false;
+
// CHECK dies with a fatal error if condition is not true. It is *not*
// controlled by NDEBUG, so the check will be executed regardless of
// compilation mode. Therefore, it is safe to do things like:
@@ -363,6 +365,227 @@ static void TestRecursion(int size, const char *pattern, int match_limit) {
re.FullMatch(domain);
}
+//
+// Options tests contributed by
+// Giuseppe Maxia, CTO, Stardata s.r.l.
+// July 2005
+//
+static void GetOneOptionResult(
+ const char *option_name,
+ const char *regex,
+ const char *str,
+ RE_Options options,
+ bool full,
+ string expected) {
+
+ printf("Testing Option <%s>\n", option_name);
+ if(VERBOSE_TEST)
+ printf("/%s/ finds \"%s\" within \"%s\" \n",
+ regex,
+ expected.c_str(),
+ str);
+ string captured("");
+ if (full)
+ RE(regex,options).FullMatch(str, &captured);
+ else
+ RE(regex,options).PartialMatch(str, &captured);
+ CHECK_EQ(captured, expected);
+}
+
+static void TestOneOption(
+ const char *option_name,
+ const char *regex,
+ const char *str,
+ RE_Options options,
+ bool full,
+ bool assertive = true) {
+
+ printf("Testing Option <%s>\n", option_name);
+ if (VERBOSE_TEST)
+ printf("'%s' %s /%s/ \n",
+ str,
+ (assertive? "matches" : "doesn't match"),
+ regex);
+ if (assertive) {
+ if (full)
+ CHECK(RE(regex,options).FullMatch(str));
+ else
+ CHECK(RE(regex,options).PartialMatch(str));
+ } else {
+ if (full)
+ CHECK(!RE(regex,options).FullMatch(str));
+ else
+ CHECK(!RE(regex,options).PartialMatch(str));
+ }
+}
+
+static void Test_CASELESS() {
+ RE_Options options;
+ RE_Options options2;
+
+ options.set_caseless(true);
+ TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
+ TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
+ TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
+
+ TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
+ TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
+ options.set_caseless(false);
+ TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
+}
+
+static void Test_MULTILINE() {
+ RE_Options options;
+ RE_Options options2;
+ const char *str = "HELLO\n" "cruel\n" "world\n";
+
+ options.set_multiline(true);
+ TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
+ TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
+ TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
+ options.set_multiline(false);
+ TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
+}
+
+static void Test_DOTALL() {
+ RE_Options options;
+ RE_Options options2;
+ const char *str = "HELLO\n" "cruel\n" "world";
+
+ options.set_dotall(true);
+ TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
+ TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
+ TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
+ options.set_dotall(false);
+ TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
+}
+
+static void Test_DOLLAR_ENDONLY() {
+ RE_Options options;
+ RE_Options options2;
+ const char *str = "HELLO world\n";
+
+ TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
+ options.set_dollar_endonly(true);
+ TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
+ TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
+}
+
+static void Test_EXTRA() {
+ RE_Options options;
+ const char *str = "HELLO";
+
+ options.set_extra(true);
+ TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
+ TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
+ options.set_extra(false);
+ TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
+}
+
+static void Test_EXTENDED() {
+ RE_Options options;
+ RE_Options options2;
+ const char *str = "HELLO world";
+
+ options.set_extended(true);
+ TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
+ TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
+ TestOneOption("EXTENDED (class)",
+ "^ HE L{2} O "
+ "\\s+ "
+ "\\w+ $ ",
+ str,
+ options,
+ false);
+
+ TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
+ TestOneOption("EXTENDED (function)",
+ "^ HE L{2} O "
+ "\\s+ "
+ "\\w+ $ ",
+ str,
+ pcrecpp::EXTENDED(),
+ false);
+
+ options.set_extended(false);
+ TestOneOption("no EXTENDED", "HELLO world", str, options, false);
+}
+
+static void Test_NO_AUTO_CAPTURE() {
+ RE_Options options;
+ const char *str = "HELLO world";
+ string captured;
+
+ printf("Testing Option <no NO_AUTO_CAPTURE>\n");
+ if (VERBOSE_TEST)
+ printf("parentheses capture text\n");
+ RE re("(world|universe)$", options);
+ CHECK(re.Extract("\\1", str , &captured));
+ CHECK_EQ(captured, "world");
+ options.set_no_auto_capture(true);
+ printf("testing Option <NO_AUTO_CAPTURE>\n");
+ if (VERBOSE_TEST)
+ printf("parentheses do not capture text\n");
+ re.Extract("\\1",str, &captured );
+ CHECK_EQ(captured, "world");
+}
+
+static void Test_UNGREEDY() {
+ RE_Options options;
+ const char *str = "HELLO, 'this' is the 'world'";
+
+ options.set_ungreedy(true);
+ GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
+ GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
+ GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
+
+ options.set_ungreedy(false);
+ GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
+ GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
+}
+
+static void Test_all_options() {
+ const char *str = "HELLO\n" "cruel\n" "world";
+ RE_Options options;
+ options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
+
+ TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
+ options.set_all_options(0);
+ TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
+ options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
+
+ TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
+ TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
+ " ^ c r u e l $ ",
+ str,
+ RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
+ false);
+
+ TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
+ " ^ c r u e l $ ",
+ str,
+ RE_Options()
+ .set_multiline(true)
+ .set_extended(true),
+ false);
+
+ options.set_all_options(0);
+ TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
+
+}
+
+static void TestOptions() {
+ printf("Testing Options\n");
+ Test_CASELESS();
+ Test_MULTILINE();
+ Test_DOTALL();
+ Test_DOLLAR_ENDONLY();
+ Test_EXTENDED();
+ Test_NO_AUTO_CAPTURE();
+ Test_UNGREEDY();
+ Test_EXTRA();
+ Test_all_options();
+}
int main(int argc, char** argv) {
// Treat any flag as --help
@@ -807,6 +1030,11 @@ int main(int argc, char** argv) {
TestRecursion(bytes, "ab.", matchlimit);
TestRecursion(bytes, "abc.", matchlimit);
+ // Test Options
+ if (getenv("VERBOSE_TEST") != NULL)
+ VERBOSE_TEST = true;
+ TestOptions();
+
// Done
printf("OK\n");
diff --git a/testdata/grepoutput b/testdata/grepoutput
index 20a6f79..27ab7e4 100644
--- a/testdata/grepoutput
+++ b/testdata/grepoutput
@@ -1,4 +1,4 @@
-pcregrep version 4.0 07-Jun-2005 using PCRE version 6.1 21-Jun-2005
+pcregrep version 4.0 07-Jun-2005 using PCRE version 6.2 01-Aug-2005
---------------------------- Test 1 ------------------------------
PATTERN at the start of a line.
In the middle of a line, PATTERN appears.
diff --git a/testdata/testinput2 b/testdata/testinput2
index dcb5609..befb65a 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -1428,5 +1428,26 @@
AbCd
** Failers
abcd
+
+/a{11111111111111111111}/
+
+/(){64294967295}/
+
+/(){2,4294967295}/
+
+"(?i:a)(?i:b)(?i:c)(?i:d)(?i:e)(?i:f)(?i:g)(?i:h)(?i:i)(?i:j)(k)(?i:l)A\1B"
+ abcdefghijklAkB
+
+"(?P<n0>a)(?P<n1>b)(?P<n2>c)(?P<n3>d)(?P<n4>e)(?P<n5>f)(?P<n6>g)(?P<n7>h)(?P<n8>i)(?P<n9>j)(?P<n10>k)(?P<n11>l)A\11B"
+ abcdefghijklAkB
+
+"(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)A\11B"
+ abcdefghijklAkB
+
+"(?P<name0>a)(?P<name1>a)(?P<name2>a)(?P<name3>a)(?P<name4>a)(?P<name5>a)(?P<name6>a)(?P<name7>a)(?P<name8>a)(?P<name9>a)(?P<name10>a)(?P<name11>a)(?P<name12>a)(?P<name13>a)(?P<name14>a)(?P<name15>a)(?P<name16>a)(?P<name17>a)(?P<name18>a)(?P<name19>a)(?P<name20>a)(?P<name21>a)(?P<name22>a)(?P<name23>a)(?P<name24>a)(?P<name25>a)(?P<name26>a)(?P<name27>a)(?P<name28>a)(?P<name29>a)(?P<name30>a)(?P<name31>a)(?P<name32>a)(?P<name33>a)(?P<name34>a)(?P<name35>a)(?P<name36>a)(?P<name37>a)(?P<name38>a)(?P<name39>a)(?P<name40>a)(?P<name41>a)(?P<name42>a)(?P<name43>a)(?P<name44>a)(?P<name45>a)(?P<name46>a)(?P<name47>a)(?P<name48>a)(?P<name49>a)(?P<name50>a)(?P<name51>a)(?P<name52>a)(?P<name53>a)(?P<name54>a)(?P<name55>a)(?P<name56>a)(?P<name57>a)(?P<name58>a)(?P<name59>a)(?P<name60>a)(?P<name61>a)(?P<name62>a)(?P<name63>a)(?P<name64>a)(?P<name65>a)(?P<name66>a)(?P<name67>a)(?P<name68>a)(?P<name69>a)(?P<name70>a)(?P<name71>a)(?P<name72>a)(?P<name73>a)(?P<name74>a)(?P<name75>a)(?P<name76>a)(?P<name77>a)(?P<name78>a)(?P<name79>a)(?P<name80>a)(?P<name81>a)(?P<name82>a)(?P<name83>a)(?P<name84>a)(?P<name85>a)(?P<name86>a)(?P<name87>a)(?P<name88>a)(?P<name89>a)(?P<name90>a)(?P<name91>a)(?P<name92>a)(?P<name93>a)(?P<name94>a)(?P<name95>a)(?P<name96>a)(?P<name97>a)(?P<name98>a)(?P<name99>a)(?P<name100>a)"
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+
+"(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)"
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
/ End of testinput2 /
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index 0eef50e..c823162 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -1,4 +1,4 @@
-PCRE version 6.1 21-Jun-2005
+PCRE version 6.2 01-Aug-2005
/the quick brown fox/
the quick brown fox
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 823c67a..8f078b8 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -1,4 +1,4 @@
-PCRE version 6.1 21-Jun-2005
+PCRE version 6.2 01-Aug-2005
/(a)b|/
Capturing subpattern count = 1
@@ -5715,6 +5715,229 @@ Need char = 'd'
No match
abcd
No match
+
+/a{11111111111111111111}/
+Failed: number too big in {} quantifier at offset 22
+
+/(){64294967295}/
+Failed: number too big in {} quantifier at offset 14
+
+/(){2,4294967295}/
+Failed: number too big in {} quantifier at offset 15
+
+"(?i:a)(?i:b)(?i:c)(?i:d)(?i:e)(?i:f)(?i:g)(?i:h)(?i:i)(?i:j)(k)(?i:l)A\1B"
+Capturing subpattern count = 1
+Max back reference = 1
+No options
+Case state changes
+First char = 'a' (caseless)
+Need char = 'B'
+ abcdefghijklAkB
+ 0: abcdefghijklAkB
+ 1: k
+
+"(?P<n0>a)(?P<n1>b)(?P<n2>c)(?P<n3>d)(?P<n4>e)(?P<n5>f)(?P<n6>g)(?P<n7>h)(?P<n8>i)(?P<n9>j)(?P<n10>k)(?P<n11>l)A\11B"
+Capturing subpattern count = 12
+Max back reference = 11
+Named capturing subpatterns:
+ n0 1
+ n1 2
+ n10 11
+ n11 12
+ n2 3
+ n3 4
+ n4 5
+ n5 6
+ n6 7
+ n7 8
+ n8 9
+ n9 10
+No options
+First char = 'a'
+Need char = 'B'
+ abcdefghijklAkB
+ 0: abcdefghijklAkB
+ 1: a
+ 2: b
+ 3: c
+ 4: d
+ 5: e
+ 6: f
+ 7: g
+ 8: h
+ 9: i
+10: j
+11: k
+12: l
+
+"(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)A\11B"
+Capturing subpattern count = 12
+Max back reference = 11
+No options
+First char = 'a'
+Need char = 'B'
+ abcdefghijklAkB
+ 0: abcdefghijklAkB
+ 1: a
+ 2: b
+ 3: c
+ 4: d
+ 5: e
+ 6: f
+ 7: g
+ 8: h
+ 9: i
+10: j
+11: k
+12: l
+
+"(?P<name0>a)(?P<name1>a)(?P<name2>a)(?P<name3>a)(?P<name4>a)(?P<name5>a)(?P<name6>a)(?P<name7>a)(?P<name8>a)(?P<name9>a)(?P<name10>a)(?P<name11>a)(?P<name12>a)(?P<name13>a)(?P<name14>a)(?P<name15>a)(?P<name16>a)(?P<name17>a)(?P<name18>a)(?P<name19>a)(?P<name20>a)(?P<name21>a)(?P<name22>a)(?P<name23>a)(?P<name24>a)(?P<name25>a)(?P<name26>a)(?P<name27>a)(?P<name28>a)(?P<name29>a)(?P<name30>a)(?P<name31>a)(?P<name32>a)(?P<name33>a)(?P<name34>a)(?P<name35>a)(?P<name36>a)(?P<name37>a)(?P<name38>a)(?P<name39>a)(?P<name40>a)(?P<name41>a)(?P<name42>a)(?P<name43>a)(?P<name44>a)(?P<name45>a)(?P<name46>a)(?P<name47>a)(?P<name48>a)(?P<name49>a)(?P<name50>a)(?P<name51>a)(?P<name52>a)(?P<name53>a)(?P<name54>a)(?P<name55>a)(?P<name56>a)(?P<name57>a)(?P<name58>a)(?P<name59>a)(?P<name60>a)(?P<name61>a)(?P<name62>a)(?P<name63>a)(?P<name64>a)(?P<name65>a)(?P<name66>a)(?P<name67>a)(?P<name68>a)(?P<name69>a)(?P<name70>a)(?P<name71>a)(?P<name72>a)(?P<name73>a)(?P<name74>a)(?P<name75>a)(?P<name76>a)(?P<name77>a)(?P<name78>a)(?P<name79>a)(?P<name80>a)(?P<name81>a)(?P<name82>a)(?P<name83>a)(?P<name84>a)(?P<name85>a)(?P<name86>a)(?P<name87>a)(?P<name88>a)(?P<name89>a)(?P<name90>a)(?P<name91>a)(?P<name92>a)(?P<name93>a)(?P<name94>a)(?P<name95>a)(?P<name96>a)(?P<name97>a)(?P<name98>a)(?P<name99>a)(?P<name100>a)"
+Capturing subpattern count = 101
+Named capturing subpatterns:
+ name0 1
+ name1 2
+ name10 11
+ name100 101
+ name11 12
+ name12 13
+ name13 14
+ name14 15
+ name15 16
+ name16 17
+ name17 18
+ name18 19
+ name19 20
+ name2 3
+ name20 21
+ name21 22
+ name22 23
+ name23 24
+ name24 25
+ name25 26
+ name26 27
+ name27 28
+ name28 29
+ name29 30
+ name3 4
+ name30 31
+ name31 32
+ name32 33
+ name33 34
+ name34 35
+ name35 36
+ name36 37
+ name37 38
+ name38 39
+ name39 40
+ name4 5
+ name40 41
+ name41 42
+ name42 43
+ name43 44
+ name44 45
+ name45 46
+ name46 47
+ name47 48
+ name48 49
+ name49 50
+ name5 6
+ name50 51
+ name51 52
+ name52 53
+ name53 54
+ name54 55
+ name55 56
+ name56 57
+ name57 58
+ name58 59
+ name59 60
+ name6 7
+ name60 61
+ name61 62
+ name62 63
+ name63 64
+ name64 65
+ name65 66
+ name66 67
+ name67 68
+ name68 69
+ name69 70
+ name7 8
+ name70 71
+ name71 72
+ name72 73
+ name73 74
+ name74 75
+ name75 76
+ name76 77
+ name77 78
+ name78 79
+ name79 80
+ name8 9
+ name80 81
+ name81 82
+ name82 83
+ name83 84
+ name84 85
+ name85 86
+ name86 87
+ name87 88
+ name88 89
+ name89 90
+ name9 10
+ name90 91
+ name91 92
+ name92 93
+ name93 94
+ name94 95
+ name95 96
+ name96 97
+ name97 98
+ name98 99
+ name99 100
+No options
+First char = 'a'
+Need char = 'a'
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+Matched, but too many substrings
+ 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ 1: a
+ 2: a
+ 3: a
+ 4: a
+ 5: a
+ 6: a
+ 7: a
+ 8: a
+ 9: a
+10: a
+11: a
+12: a
+13: a
+14: a
+
+"(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)(a)"
+Capturing subpattern count = 101
+No options
+First char = 'a'
+Need char = 'a'
+ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+Matched, but too many substrings
+ 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+ 1: a
+ 2: a
+ 3: a
+ 4: a
+ 5: a
+ 6: a
+ 7: a
+ 8: a
+ 9: a
+10: a
+11: a
+12: a
+13: a
+14: a
/ End of testinput2 /
Capturing subpattern count = 0
diff --git a/testdata/testoutput3 b/testdata/testoutput3
index e58c9c7..16ebc71 100644
--- a/testdata/testoutput3
+++ b/testdata/testoutput3
@@ -1,4 +1,4 @@
-PCRE version 6.1 21-Jun-2005
+PCRE version 6.2 01-Aug-2005
/^[\w]+/
*** Failers
diff --git a/testdata/testoutput4 b/testdata/testoutput4
index af4a821..234fd1a 100644
--- a/testdata/testoutput4
+++ b/testdata/testoutput4
@@ -1,4 +1,4 @@
-PCRE version 6.1 21-Jun-2005
+PCRE version 6.2 01-Aug-2005
/-- Do not use the \x{} construct except with patterns that have the --/
/-- /8 option set, because PCRE doesn't recognize them as UTF-8 unless --/
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index 6b694a7..6e0d418 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -1,4 +1,4 @@
-PCRE version 6.1 21-Jun-2005
+PCRE version 6.2 01-Aug-2005
/\x{100}/8DM
Memory allocation (code space): 10
diff --git a/testdata/testoutput6 b/testdata/testoutput6
index 8889b05..9f9421e 100644
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@@ -1,4 +1,4 @@
-PCRE version 6.1 21-Jun-2005
+PCRE version 6.2 01-Aug-2005
/^\pC\pL\pM\pN\pP\pS\pZ</8
\x7f\x{c0}\x{30f}\x{660}\x{66c}\x{f01}\x{1680}<
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index 421efb2..8e55069 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -1,4 +1,4 @@
-PCRE version 6.1 21-Jun-2005
+PCRE version 6.2 01-Aug-2005
/abc/
abc
diff --git a/testdata/testoutput8 b/testdata/testoutput8
index 6425f00..eb585ad 100644
--- a/testdata/testoutput8
+++ b/testdata/testoutput8
@@ -1,4 +1,4 @@
-PCRE version 6.1 21-Jun-2005
+PCRE version 6.2 01-Aug-2005
/-- Do not use the \x{} construct except with patterns that have the --/
/-- /8 option set, because PCRE doesn't recognize them as UTF-8 unless --/
diff --git a/testdata/testoutput9 b/testdata/testoutput9
index 4d2a41f..f7b4661 100644
--- a/testdata/testoutput9
+++ b/testdata/testoutput9
@@ -1,4 +1,4 @@
-PCRE version 6.1 21-Jun-2005
+PCRE version 6.2 01-Aug-2005
/\pL\P{Nd}/8
AB