diff options
author | nigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-02-24 21:38:33 +0000 |
---|---|---|
committer | nigel <nigel@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-02-24 21:38:33 +0000 |
commit | c046fc1dd3ac4d0ab9f6c77951877746f0aabbdb (patch) | |
tree | 52fa6c90399536e750f2b028e4fab76fc43778df | |
parent | 9dc6505b56ff9ba2f87071990a26a109dcbfa322 (diff) | |
download | pcre-c046fc1dd3ac4d0ab9f6c77951877746f0aabbdb.tar.gz |
Load pcre-1.08 into code/trunk.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@19 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | internal.h | 4 | ||||
-rw-r--r-- | pcre.3 | 31 | ||||
-rw-r--r-- | pcre.c | 32 | ||||
-rw-r--r-- | pcre.h | 1 | ||||
-rw-r--r-- | pcretest.c | 6 | ||||
-rw-r--r-- | testinput2 | 29 | ||||
-rw-r--r-- | testoutput | 2 | ||||
-rw-r--r-- | testoutput2 | 68 |
9 files changed, 169 insertions, 13 deletions
@@ -2,6 +2,15 @@ ChangeLog for PCRE ------------------ +Version 1.08 27-Mar-98 +---------------------- + +1. Add PCRE_UNGREEDY to invert the greediness of quantifiers. + +2. Add (?U) and (?X) to set PCRE_UNGREEDY and PCRE_EXTRA respectively. The +latter must appear before anything that relies on it in the pattern. + + Version 1.07 16-Feb-98 ---------------------- @@ -3,7 +3,7 @@ *************************************************/ -#define PCRE_VERSION "1.07 16-Feb-1998" +#define PCRE_VERSION "1.08 27-Mar-1998" /* This is a library of functions to support regular expressions whose syntax @@ -78,7 +78,7 @@ only some permitted at run or study time. */ #define PUBLIC_OPTIONS \ (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ - PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA) + PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY) #define PUBLIC_EXEC_OPTIONS \ (PCRE_CASELESS|PCRE_ANCHORED|PCRE_MULTILINE|PCRE_NOTBOL|PCRE_NOTEOL| \ @@ -104,7 +104,8 @@ The \fIoptions\fR argument contains independent bits that affect the compilation. It should be zero if no options are required. Those options that are compabible with Perl can also be set at compile time from within the pattern (see the detailed description of regular expressions below) and all -options except PCRE_EXTENDED and PCRE_EXTRA can be set at the time of matching. +options except PCRE_EXTENDED, PCRE_EXTRA and PCRE_UNGREEDY can be set at the +time of matching. .PP If \fIerrptr\fR is NULL, \fBpcre_compile()\fR returns NULL immediately. Otherwise, if compilation of a pattern fails, \fBpcre_compile()\fR returns @@ -182,7 +183,15 @@ influencing the progress of a match. (2) Once a subpattern enclosed in (?>subpat) brackets has matched, backtracking never goes back into the pattern. -See below for further details of both of these. +See below for further details of both of these. PCRE_EXTRA can be set by a (?X) +option setting within the pattern, but this must precede anything in the +pattern which relies on its being set. + + PCRE_UNGREEDY + +This option inverts the "greediness" of the quantifiers so that they are not +greedy by default, but become greedy if followed by "?". It is not compatible +with Perl. It can also be set by a (?U) option setting within the pattern. @@ -419,6 +428,10 @@ recognized, and a backslash followed by a letter with no special meaning is faulted. There is also a new kind of parenthesized subpattern starting with (?> which has a block on backtracking into it once it has matched. +(c) If PCRE_UNGREEDY is set, the greediness of the repetition quantifiers is +inverted, that is, by default they are not greedy, but if followed by a +question mark they are. + .SH REGULAR EXPRESSION DETAILS The syntax and semantics of the regular expressions supported by PCRE are @@ -866,6 +879,11 @@ own right. Because it has two uses, it can sometimes appear doubled, as in which matches one digit by preference, but can match two if that is the only way the rest of the pattern matches. +If the PCRE_UNGREEDY option is set (an option which is not available in Perl) +then the quantifiers are not greedy by default, but individual ones can be made +greedy by following they by a question mark. In other words, it inverts the +default behaviour. + When a parenthesized subpattern is quantified with a minimum repeat count that is greater than 1 or with a limited maximum, more store is required for the compiled pattern, in proportion to the size of the minimum or maximum. @@ -986,6 +1004,15 @@ the PCRE_EXTENDED option, that is, whitespace is ignored and # introduces a comment that lasts till the next newline. The option applies to the whole pattern, not just to the portion that follows it. +If the sequence (?U) occurs anywhere in a pattern, it has the effect of setting +the PCRE_UNGREEDY option which inverts the greediness of quantifiers. This is +an extension to Perl's facilities. + +If the sequence (?X) occurs in a pattern, it has the effect of setting the +PCRE_EXTRA flag, which turns on some additional features not found in Perl. +This flag setting is special in that it must occur earlier in the pattern than +any of the additional features. It is best put at the start. + If more than one option is required, they can be specified jointly, for example as (?ix) or (?mi). @@ -623,6 +623,7 @@ compile_branch(int options, int *brackets, uschar **codeptr, int repeat_type, op_type; int repeat_min, repeat_max; int bravalue, length; +int greedy_default, greedy_non_default; register int c; register uschar *code = *codeptr; const uschar *ptr = *ptrptr; @@ -630,6 +631,11 @@ const uschar *oldptr; uschar *previous = NULL; uschar class[32]; +/* Set up the default and non-default settings for greediness */ + +greedy_default = ((options & PCRE_UNGREEDY) != 0); +greedy_non_default = greedy_default ^ 1; + /* Switch on next character until the end of the branch */ for (;; ptr++) @@ -907,10 +913,13 @@ for (;; ptr++) goto FAILED; } - /* If the next character is '?' this is a minimizing repeat. Advance to the + /* If the next character is '?' this is a minimizing repeat, by default, + but if PCRE_UNGREEDY is set, it works the other way round. Advance to the next character. */ - if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0; + if (ptr[1] == '?') + { repeat_type = greedy_non_default; ptr++; } + else repeat_type = greedy_default; /* If the maximum is zero then the minimum must also be zero; Perl allows this case, so we do too - by simply omitting the item altogether. */ @@ -1149,6 +1158,8 @@ for (;; ptr++) case 'm': case 's': case 'x': + case 'U': + case 'X': ptr++; while (*ptr != ')') ptr++; previous = NULL; @@ -1752,7 +1763,7 @@ while ((c = *(++ptr)) != 0) ptr += 2; break; } - /* Else fall thourh */ + /* Else fall through */ /* Else loop setting valid options until ) is met. Anything else is an error. */ @@ -1782,6 +1793,16 @@ while ((c = *(++ptr)) != 0) length -= spaces; /* Already counted spaces */ continue; } + else if (c == 'X') + { + options |= PCRE_EXTRA; + continue; + } + else if (c == 'U') + { + options |= PCRE_UNGREEDY; + continue; + } else if (c == ')') break; *errorptr = ERR12; @@ -1987,14 +2008,15 @@ printf("Length = %d top_bracket = %d top_backref=%d\n", if (re->options != 0) { - printf("%s%s%s%s%s%s%s\n", + printf("%s%s%s%s%s%s%s%s\n", ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "", ((re->options & PCRE_CASELESS) != 0)? "caseless " : "", ((re->options & PCRE_EXTENDED) != 0)? "extended " : "", ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "", ((re->options & PCRE_DOTALL) != 0)? "dotall " : "", ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "", - ((re->options & PCRE_EXTRA) != 0)? "extra " : ""); + ((re->options & PCRE_EXTRA) != 0)? "extra " : "", + ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : ""); } if ((re->options & PCRE_FIRSTSET) != 0) @@ -30,6 +30,7 @@ extern "C" { #define PCRE_EXTRA 0x0040 #define PCRE_NOTBOL 0x0080 #define PCRE_NOTEOL 0x0100 +#define PCRE_UNGREEDY 0x0200 /* Exec-time error codes */ @@ -404,6 +404,7 @@ while (!done) case 'P': do_posix = 1; break; case 'S': do_study = 1; break; case 'I': study_options |= PCRE_CASELESS; break; + case 'U': options |= PCRE_UNGREEDY; break; case 'X': options |= PCRE_EXTRA; break; case '\n': case ' ': break; default: @@ -495,14 +496,15 @@ while (!done) { fprintf(outfile, "Identifying subpattern count = %d\n", count); if (options == 0) fprintf(outfile, "No options\n"); - else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n", + else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n", ((options & PCRE_ANCHORED) != 0)? " anchored" : "", ((options & PCRE_CASELESS) != 0)? " caseless" : "", ((options & PCRE_EXTENDED) != 0)? " extended" : "", ((options & PCRE_MULTILINE) != 0)? " multiline" : "", ((options & PCRE_DOTALL) != 0)? " dotall" : "", ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "", - ((options & PCRE_EXTRA) != 0)? " extra" : ""); + ((options & PCRE_EXTRA) != 0)? " extra" : "", + ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : ""); if (first_char == -1) { fprintf(outfile, "First char at start or follows \\n\n"); @@ -38,6 +38,8 @@ /ab\gdef/X +/(?X)ab\gdef/X + /x{5,4}/ /z{65536}/ @@ -146,9 +148,15 @@ ".*/\Xfoo"X /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/ +"(?X).*/\Xfoo" + /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/ + ".*/\Xfoo"X /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo +"(?X).*/\Xfoo" + /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo + /(\.\d\d[1-9]?)\d+/ 1.230003938 1.875000282 @@ -287,4 +295,25 @@ /((a|b|c)*)*/ +/<.*>/ + abc<def>ghi<klm>nop + +/<.*?>/ + abc<def>ghi<klm>nop + +/<.*>/U + abc<def>ghi<klm>nop + +/<.*>(?U)/ + abc<def>ghi<klm>nop + +/<.*?>/U + abc<def>ghi<klm>nop + +/={3,}/U + abc========def + +/(?U)={3,}?/ + abc========def + / End of test input / @@ -1,5 +1,5 @@ Testing Perl-Compatible Regular Expressions -PCRE version 1.07 16-Feb-1998 +PCRE version 1.08 27-Mar-1998 /the quick brown fox/ the quick brown fox diff --git a/testoutput2 b/testoutput2 index b969320..77955ae 100644 --- a/testoutput2 +++ b/testoutput2 @@ -1,5 +1,5 @@ Testing Perl-Compatible Regular Expressions -PCRE version 1.07 16-Feb-1998 +PCRE version 1.08 27-Mar-1998 /(a)b|/ Identifying subpattern count = 1 @@ -85,6 +85,9 @@ Failed: \ at end of pattern at offset 4 /ab\gdef/X Failed: unrecognized character follows \ at offset 3 +/(?X)ab\gdef/X +Failed: unrecognized character follows \ at offset 7 + /x{5,4}/ Failed: numbers out of order in {} quantifier at offset 5 @@ -353,6 +356,13 @@ No first char /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/ No match +"(?X).*/\Xfoo" +Identifying subpattern count = 0 +Options: anchored extra +No first char + /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/it/you/see/ +No match + ".*/\Xfoo"X Identifying subpattern count = 0 Options: anchored extra @@ -360,6 +370,13 @@ No first char /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo 0: /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo +"(?X).*/\Xfoo" +Identifying subpattern count = 0 +Options: anchored extra +No first char + /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo + 0: /this/is/a/very/long/line/in/deed/with/very/many/slashes/in/and/foo + /(\.\d\d[1-9]?)\d+/ Identifying subpattern count = 1 No options @@ -647,6 +664,55 @@ Failed: operand of unlimited repeat could match the empty string at offset 6 /((a|b|c)*)*/ Failed: operand of unlimited repeat could match the empty string at offset 10 +/<.*>/ +Identifying subpattern count = 0 +No options +First char = '<' + abc<def>ghi<klm>nop + 0: <def>ghi<klm> + +/<.*?>/ +Identifying subpattern count = 0 +No options +First char = '<' + abc<def>ghi<klm>nop + 0: <def> + +/<.*>/U +Identifying subpattern count = 0 +Options: ungreedy +First char = '<' + abc<def>ghi<klm>nop + 0: <def> + +/<.*>(?U)/ +Identifying subpattern count = 0 +Options: ungreedy +First char = '<' + abc<def>ghi<klm>nop + 0: <def> + +/<.*?>/U +Identifying subpattern count = 0 +Options: ungreedy +First char = '<' + abc<def>ghi<klm>nop + 0: <def>ghi<klm> + +/={3,}/U +Identifying subpattern count = 0 +Options: ungreedy +First char = '=' + abc========def + 0: === + +/(?U)={3,}?/ +Identifying subpattern count = 0 +Options: ungreedy +First char = '=' + abc========def + 0: ======== + / End of test input / Identifying subpattern count = 0 No options |