summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-02-24 21:38:33 +0000
committernigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-02-24 21:38:33 +0000
commitc046fc1dd3ac4d0ab9f6c77951877746f0aabbdb (patch)
tree52fa6c90399536e750f2b028e4fab76fc43778df
parent9dc6505b56ff9ba2f87071990a26a109dcbfa322 (diff)
downloadpcre-c046fc1dd3ac4d0ab9f6c77951877746f0aabbdb.tar.gz
Load pcre-1.08 into code/trunk.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@19 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog9
-rw-r--r--internal.h4
-rw-r--r--pcre.331
-rw-r--r--pcre.c32
-rw-r--r--pcre.h1
-rw-r--r--pcretest.c6
-rw-r--r--testinput229
-rw-r--r--testoutput2
-rw-r--r--testoutput268
9 files changed, 169 insertions, 13 deletions
diff --git a/ChangeLog b/ChangeLog
index 118a034..600a38f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,6 +2,15 @@ ChangeLog for PCRE
------------------
+Version 1.08 27-Mar-98
+----------------------
+
+1. Add PCRE_UNGREEDY to invert the greediness of quantifiers.
+
+2. Add (?U) and (?X) to set PCRE_UNGREEDY and PCRE_EXTRA respectively. The
+latter must appear before anything that relies on it in the pattern.
+
+
Version 1.07 16-Feb-98
----------------------
diff --git a/internal.h b/internal.h
index fd361cb..16e0880 100644
--- a/internal.h
+++ b/internal.h
@@ -3,7 +3,7 @@
*************************************************/
-#define PCRE_VERSION "1.07 16-Feb-1998"
+#define PCRE_VERSION "1.08 27-Mar-1998"
/* This is a library of functions to support regular expressions whose syntax
@@ -78,7 +78,7 @@ only some permitted at run or study time. */
#define PUBLIC_OPTIONS \
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
- PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA)
+ PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY)
#define PUBLIC_EXEC_OPTIONS \
(PCRE_CASELESS|PCRE_ANCHORED|PCRE_MULTILINE|PCRE_NOTBOL|PCRE_NOTEOL| \
diff --git a/pcre.3 b/pcre.3
index 257e710..c96a9d7 100644
--- a/pcre.3
+++ b/pcre.3
@@ -104,7 +104,8 @@ The \fIoptions\fR argument contains independent bits that affect the
compilation. It should be zero if no options are required. Those options that
are compabible with Perl can also be set at compile time from within the
pattern (see the detailed description of regular expressions below) and all
-options except PCRE_EXTENDED and PCRE_EXTRA can be set at the time of matching.
+options except PCRE_EXTENDED, PCRE_EXTRA and PCRE_UNGREEDY can be set at the
+time of matching.
.PP
If \fIerrptr\fR is NULL, \fBpcre_compile()\fR returns NULL immediately.
Otherwise, if compilation of a pattern fails, \fBpcre_compile()\fR returns
@@ -182,7 +183,15 @@ influencing the progress of a match.
(2) Once a subpattern enclosed in (?>subpat) brackets has matched,
backtracking never goes back into the pattern.
-See below for further details of both of these.
+See below for further details of both of these. PCRE_EXTRA can be set by a (?X)
+option setting within the pattern, but this must precede anything in the
+pattern which relies on its being set.
+
+ PCRE_UNGREEDY
+
+This option inverts the "greediness" of the quantifiers so that they are not
+greedy by default, but become greedy if followed by "?". It is not compatible
+with Perl. It can also be set by a (?U) option setting within the pattern.
@@ -419,6 +428,10 @@ recognized, and a backslash followed by a letter with no special meaning is
faulted. There is also a new kind of parenthesized subpattern starting with (?>
which has a block on backtracking into it once it has matched.
+(c) If PCRE_UNGREEDY is set, the greediness of the repetition quantifiers is
+inverted, that is, by default they are not greedy, but if followed by a
+question mark they are.
+
.SH REGULAR EXPRESSION DETAILS
The syntax and semantics of the regular expressions supported by PCRE are
@@ -866,6 +879,11 @@ own right. Because it has two uses, it can sometimes appear doubled, as in
which matches one digit by preference, but can match two if that is the only
way the rest of the pattern matches.
+If the PCRE_UNGREEDY option is set (an option which is not available in Perl)
+then the quantifiers are not greedy by default, but individual ones can be made
+greedy by following they by a question mark. In other words, it inverts the
+default behaviour.
+
When a parenthesized subpattern is quantified with a minimum repeat count that
is greater than 1 or with a limited maximum, more store is required for the
compiled pattern, in proportion to the size of the minimum or maximum.
@@ -986,6 +1004,15 @@ the PCRE_EXTENDED option, that is, whitespace is ignored and # introduces a
comment that lasts till the next newline. The option applies to the whole
pattern, not just to the portion that follows it.
+If the sequence (?U) occurs anywhere in a pattern, it has the effect of setting
+the PCRE_UNGREEDY option which inverts the greediness of quantifiers. This is
+an extension to Perl's facilities.
+
+If the sequence (?X) occurs in a pattern, it has the effect of setting the
+PCRE_EXTRA flag, which turns on some additional features not found in Perl.
+This flag setting is special in that it must occur earlier in the pattern than
+any of the additional features. It is best put at the start.
+
If more than one option is required, they can be specified jointly, for example
as (?ix) or (?mi).
diff --git a/pcre.c b/pcre.c
index 25daa07..bbfcf9c 100644
--- a/pcre.c
+++ b/pcre.c
@@ -623,6 +623,7 @@ compile_branch(int options, int *brackets, uschar **codeptr,
int repeat_type, op_type;
int repeat_min, repeat_max;
int bravalue, length;
+int greedy_default, greedy_non_default;
register int c;
register uschar *code = *codeptr;
const uschar *ptr = *ptrptr;
@@ -630,6 +631,11 @@ const uschar *oldptr;
uschar *previous = NULL;
uschar class[32];
+/* Set up the default and non-default settings for greediness */
+
+greedy_default = ((options & PCRE_UNGREEDY) != 0);
+greedy_non_default = greedy_default ^ 1;
+
/* Switch on next character until the end of the branch */
for (;; ptr++)
@@ -907,10 +913,13 @@ for (;; ptr++)
goto FAILED;
}
- /* If the next character is '?' this is a minimizing repeat. Advance to the
+ /* If the next character is '?' this is a minimizing repeat, by default,
+ but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
next character. */
- if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0;
+ if (ptr[1] == '?')
+ { repeat_type = greedy_non_default; ptr++; }
+ else repeat_type = greedy_default;
/* If the maximum is zero then the minimum must also be zero; Perl allows
this case, so we do too - by simply omitting the item altogether. */
@@ -1149,6 +1158,8 @@ for (;; ptr++)
case 'm':
case 's':
case 'x':
+ case 'U':
+ case 'X':
ptr++;
while (*ptr != ')') ptr++;
previous = NULL;
@@ -1752,7 +1763,7 @@ while ((c = *(++ptr)) != 0)
ptr += 2;
break;
}
- /* Else fall thourh */
+ /* Else fall through */
/* Else loop setting valid options until ) is met. Anything else is an
error. */
@@ -1782,6 +1793,16 @@ while ((c = *(++ptr)) != 0)
length -= spaces; /* Already counted spaces */
continue;
}
+ else if (c == 'X')
+ {
+ options |= PCRE_EXTRA;
+ continue;
+ }
+ else if (c == 'U')
+ {
+ options |= PCRE_UNGREEDY;
+ continue;
+ }
else if (c == ')') break;
*errorptr = ERR12;
@@ -1987,14 +2008,15 @@ printf("Length = %d top_bracket = %d top_backref=%d\n",
if (re->options != 0)
{
- printf("%s%s%s%s%s%s%s\n",
+ printf("%s%s%s%s%s%s%s%s\n",
((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
- ((re->options & PCRE_EXTRA) != 0)? "extra " : "");
+ ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
+ ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
}
if ((re->options & PCRE_FIRSTSET) != 0)
diff --git a/pcre.h b/pcre.h
index e4355eb..525dca4 100644
--- a/pcre.h
+++ b/pcre.h
@@ -30,6 +30,7 @@ extern "C" {
#define PCRE_EXTRA 0x0040
#define PCRE_NOTBOL 0x0080
#define PCRE_NOTEOL 0x0100
+#define PCRE_UNGREEDY 0x0200
/* Exec-time error codes */
diff --git a/pcretest.c b/pcretest.c
index 3ea4b16..5bc1f3a 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -404,6 +404,7 @@ while (!done)
case 'P': do_posix = 1; break;
case 'S': do_study = 1; break;
case 'I': study_options |= PCRE_CASELESS; break;
+ case 'U': options |= PCRE_UNGREEDY; break;
case 'X': options |= PCRE_EXTRA; break;
case '\n': case ' ': break;
default:
@@ -495,14 +496,15 @@ while (!done)
{
fprintf(outfile, "Identifying subpattern count = %d\n", count);
if (options == 0) fprintf(outfile, "No options\n");
- else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",
+ else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",
((options & PCRE_ANCHORED) != 0)? " anchored" : "",
((options & PCRE_CASELESS) != 0)? " caseless" : "",
((options & PCRE_EXTENDED) != 0)? " extended" : "",
((options & PCRE_MULTILINE) != 0)? " multiline" : "",
((options & PCRE_DOTALL) != 0)? " dotall" : "",
((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
- ((options & PCRE_EXTRA) != 0)? " extra" : "");
+ ((options & PCRE_EXTRA) != 0)? " extra" : "",
+ ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");
if (first_char == -1)
{
fprintf(outfile, "First char at start or follows \\n\n");
diff --git a/testinput2 b/testinput2
index e293aef..e922811 100644
--- a/testinput2
+++ b/testinput2
@@ -38,6 +38,8 @@
/ab\gdef/X
+/(?X)ab\gdef/X
+
/x{5,4}/
/z{65536}/
@@ -146,9 +148,15 @@
".*/\Xfoo"X
/this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/
+"(?X).*/\Xfoo"
+ /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/
+
".*/\Xfoo"X
/this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo
+"(?X).*/\Xfoo"
+ /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo
+
/(\.\d\d[1-9]?)\d+/
1.230003938
1.875000282
@@ -287,4 +295,25 @@
/((a|b|c)*)*/
+/<.*>/
+ abc<def>ghi<klm>nop
+
+/<.*?>/
+ abc<def>ghi<klm>nop
+
+/<.*>/U
+ abc<def>ghi<klm>nop
+
+/<.*>(?U)/
+ abc<def>ghi<klm>nop
+
+/<.*?>/U
+ abc<def>ghi<klm>nop
+
+/={3,}/U
+ abc========def
+
+/(?U)={3,}?/
+ abc========def
+
/ End of test input /
diff --git a/testoutput b/testoutput
index 653a377..92577d7 100644
--- a/testoutput
+++ b/testoutput
@@ -1,5 +1,5 @@
Testing Perl-Compatible Regular Expressions
-PCRE version 1.07 16-Feb-1998
+PCRE version 1.08 27-Mar-1998
/the quick brown fox/
the quick brown fox
diff --git a/testoutput2 b/testoutput2
index b969320..77955ae 100644
--- a/testoutput2
+++ b/testoutput2
@@ -1,5 +1,5 @@
Testing Perl-Compatible Regular Expressions
-PCRE version 1.07 16-Feb-1998
+PCRE version 1.08 27-Mar-1998
/(a)b|/
Identifying subpattern count = 1
@@ -85,6 +85,9 @@ Failed: \ at end of pattern at offset 4
/ab\gdef/X
Failed: unrecognized character follows \ at offset 3
+/(?X)ab\gdef/X
+Failed: unrecognized character follows \ at offset 7
+
/x{5,4}/
Failed: numbers out of order in {} quantifier at offset 5
@@ -353,6 +356,13 @@ No first char
/this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/
No match
+"(?X).*/\Xfoo"
+Identifying subpattern count = 0
+Options: anchored extra
+No first char
+ /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/
+No match
+
".*/\Xfoo"X
Identifying subpattern count = 0
Options: anchored extra
@@ -360,6 +370,13 @@ No first char
/this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo
0: /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo
+"(?X).*/\Xfoo"
+Identifying subpattern count = 0
+Options: anchored extra
+No first char
+ /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo
+ 0: /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo
+
/(\.\d\d[1-9]?)\d+/
Identifying subpattern count = 1
No options
@@ -647,6 +664,55 @@ Failed: operand of unlimited repeat could match the empty string at offset 6
/((a|b|c)*)*/
Failed: operand of unlimited repeat could match the empty string at offset 10
+/<.*>/
+Identifying subpattern count = 0
+No options
+First char = '<'
+ abc<def>ghi<klm>nop
+ 0: <def>ghi<klm>
+
+/<.*?>/
+Identifying subpattern count = 0
+No options
+First char = '<'
+ abc<def>ghi<klm>nop
+ 0: <def>
+
+/<.*>/U
+Identifying subpattern count = 0
+Options: ungreedy
+First char = '<'
+ abc<def>ghi<klm>nop
+ 0: <def>
+
+/<.*>(?U)/
+Identifying subpattern count = 0
+Options: ungreedy
+First char = '<'
+ abc<def>ghi<klm>nop
+ 0: <def>
+
+/<.*?>/U
+Identifying subpattern count = 0
+Options: ungreedy
+First char = '<'
+ abc<def>ghi<klm>nop
+ 0: <def>ghi<klm>
+
+/={3,}/U
+Identifying subpattern count = 0
+Options: ungreedy
+First char = '='
+ abc========def
+ 0: ===
+
+/(?U)={3,}?/
+Identifying subpattern count = 0
+Options: ungreedy
+First char = '='
+ abc========def
+ 0: ========
+
/ End of test input /
Identifying subpattern count = 0
No options