diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-05-09 14:48:28 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-05-09 14:48:28 +0000 |
commit | fcde8e6080d98b4fee23600b0e59b5179af250c3 (patch) | |
tree | 0d67f4337c932fe0e88fdb75597a6a375f6d07eb | |
parent | 71cf6597fa7987b2d1920c99f69c5ed4b0497826 (diff) | |
download | pcre-fcde8e6080d98b4fee23600b0e59b5179af250c3.tar.gz |
Add (?-n) and (?+n) relative references.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@166 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 8 | ||||
-rw-r--r-- | NEWS | 4 | ||||
-rw-r--r-- | configure.ac | 4 | ||||
-rw-r--r-- | doc/pcrepattern.3 | 43 | ||||
-rw-r--r-- | pcre_compile.c | 42 | ||||
-rw-r--r-- | pcre_internal.h | 2 | ||||
-rw-r--r-- | pcreposix.c | 5 | ||||
-rw-r--r-- | testdata/testinput2 | 16 | ||||
-rw-r--r-- | testdata/testoutput2 | 51 |
9 files changed, 157 insertions, 18 deletions
@@ -21,6 +21,14 @@ Version 7.2 01-May-07 stack recursion. This gives a massive performance boost under BSD, but just a small improvement under Linux. However, it saves one field in the frame in all cases. + + 6. Added more features from the forthcoming Perl 5.10: + + (a) (?-n) (where n is a string of digits) is a relative subroutine or + recursion call. It refers to the nth most recently opened parentheses. + + (b) (?+n) is also a relative subroutine call; it refers to the nth next + to be opened parentheses. Version 7.1 24-Apr-07 @@ -13,6 +13,10 @@ are no longer independent. They are like the Unix libraries. To use the pcreposix functions, for example, you need to link with both the pcreposix and the basic pcre library. +Some more features from Perl 5.10 have been added: + + (?-n) and (?+n) relative references for recursion and subroutines. + Release 7.1 24-Apr-07 --------------------- diff --git a/configure.ac b/configure.ac index 4ca8324..6c2fe9d 100644 --- a/configure.ac +++ b/configure.ac @@ -8,8 +8,8 @@ dnl empty. m4_define(pcre_major, [7]) m4_define(pcre_minor, [2]) -m4_define(pcre_prerelease, [-RC1]) -m4_define(pcre_date, [2007-05-02]) +m4_define(pcre_prerelease, [-RC2]) +m4_define(pcre_date, [2007-05-09]) # Libtool shared library interface versions (current:revision:age) m4_define(libpcre_version, [0:1:0]) diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3 index b53426b..ad47ae7 100644 --- a/doc/pcrepattern.3 +++ b/doc/pcrepattern.3 @@ -1674,19 +1674,33 @@ pattern, so instead you could use this: ( \e( ( (?>[^()]+) | (?1) )* \e) ) .sp We have put the pattern into parentheses, and caused the recursion to refer to -them instead of the whole pattern. In a larger pattern, keeping track of -parenthesis numbers can be tricky. It may be more convenient to use named -parentheses instead. The Perl syntax for this is (?&name); PCRE's earlier -syntax (?P>name) is also supported. We could rewrite the above example as -follows: +them instead of the whole pattern. +.P +In a larger pattern, keeping track of parenthesis numbers can be tricky. This +is made easier by the use of relative references. (A Perl 5.10 feature.) +Instead of (?1) in the pattern above you can write (?-2) to refer to the second +most recently opened parentheses preceding the recursion. In other words, a +negative number counts capturing parentheses leftwards from the point at which +it is encountered. +.P +It is also possible to refer to subsequently opened parentheses, by writing +references such as (?+2). However, these cannot be recursive because the +reference is not inside the parentheses that are referenced. They are always +"subroutine" calls, as described in the next section. +.P +An alternative approach is to use named parentheses instead. The Perl syntax +for this is (?&name); PCRE's earlier syntax (?P>name) is also supported. We +could rewrite the above example as follows: .sp (?<pn> \e( ( (?>[^()]+) | (?&pn) )* \e) ) .sp If there is more than one subpattern with the same name, the earliest one is -used. This particular example pattern contains nested unlimited repeats, and so -the use of atomic grouping for matching strings of non-parentheses is important -when applying the pattern to strings that do not match. For example, when this -pattern is applied to +used. +.P +This particular example pattern that we have been looking at contains nested +unlimited repeats, and so the use of atomic grouping for matching strings of +non-parentheses is important when applying the pattern to strings that do not +match. For example, when this pattern is applied to .sp (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa() .sp @@ -1738,7 +1752,14 @@ is the actual recursive call. If the syntax for a recursive subpattern reference (either by number or by name) is used outside the parentheses to which it refers, it operates like a subroutine in a programming language. The "called" subpattern may be defined -before or after the reference. An earlier example pointed out that the pattern +before or after the reference. A numbered reference can be absolute or +relative, as in these examples: +.sp + (...(absolute)...)...(?2)... + (...(relative)...)...(?-1)... + (...(?+1)...(relative)... +.sp +An earlier example pointed out that the pattern .sp (sens|respons)e and \e1ibility .sp @@ -1759,7 +1780,7 @@ When a subpattern is used as a subroutine, processing options such as case-independence are fixed when the subpattern is defined. They cannot be changed for different calls. For example, consider this pattern: .sp - (abc)(?i:(?1)) + (abc)(?i:(?-1)) .sp It matches "abcabc". It does not match "abcABC" because the change of processing option does not affect the called subpattern. diff --git a/pcre_compile.c b/pcre_compile.c index dd94473..983d696 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -208,7 +208,7 @@ static const char *error_texts[] = { "malformed number or name after (?(", "conditional group contains more than two branches", "assertion expected after (?(", - "(?R or (?digits must be followed by )", + "(?R or (?[+-]digits must be followed by )", /* 30 */ "unknown POSIX class name", "POSIX collating elements are not supported", @@ -242,7 +242,8 @@ static const char *error_texts[] = { /* 55 */ "repeating a DEFINE group is not allowed", "inconsistent NEWLINE options", - "\\g is not followed by an (optionally braced) non-zero number" + "\\g is not followed by an (optionally braced) non-zero number", + "(?+ or (?- must be followed by a non-zero number" }; @@ -3999,18 +4000,54 @@ for (;; ptr++) /* ------------------------------------------------------------ */ + case '-': case '+': case '0': case '1': case '2': case '3': case '4': /* Recursion or */ case '5': case '6': case '7': case '8': case '9': /* subroutine */ { const uschar *called; + int sign = *ptr; + + if (sign == '+') ptr++; + else if (sign == '-') + { + if ((digitab[ptr[1]] & ctype_digit) == 0) + goto OTHER_CHAR_AFTER_QUERY; + ptr++; + } + recno = 0; while((digitab[*ptr] & ctype_digit) != 0) recno = recno * 10 + *ptr++ - '0'; + if (*ptr != ')') { *errorcodeptr = ERR29; goto FAILED; } + + if (sign == '-') + { + if (recno == 0) + { + *errorcodeptr = ERR58; + goto FAILED; + } + recno = cd->bracount - recno + 1; + if (recno <= 0) + { + *errorcodeptr = ERR15; + goto FAILED; + } + } + else if (sign == '+') + { + if (recno == 0) + { + *errorcodeptr = ERR58; + goto FAILED; + } + recno += cd->bracount; + } /* Come here from code above that handles a named recursion */ @@ -4084,6 +4121,7 @@ for (;; ptr++) /* ------------------------------------------------------------ */ default: /* Other characters: check option setting */ + OTHER_CHAR_AFTER_QUERY: set = unset = 0; optset = &set; diff --git a/pcre_internal.h b/pcre_internal.h index 7ce6079..35030d8 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -855,7 +855,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, - ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57 }; + ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58 }; /* The real format of the start of the pcre block; the index of names and the code vector run on as long as necessary after the end. We store an explicit diff --git a/pcreposix.c b/pcreposix.c index 2670346..8582fba 100644 --- a/pcreposix.c +++ b/pcreposix.c @@ -80,7 +80,7 @@ static const int eint[] = { REG_BADPAT, /* malformed number or name after (?( */ REG_BADPAT, /* conditional group contains more than two branches */ REG_BADPAT, /* assertion expected after (?( */ - REG_BADPAT, /* (?R or (?digits must be followed by ) */ + REG_BADPAT, /* (?R or (?[+-]digits must be followed by ) */ REG_ECTYPE, /* unknown POSIX class name */ REG_BADPAT, /* POSIX collating elements are not supported */ REG_INVARG, /* this version of PCRE is not compiled with PCRE_UTF8 support */ @@ -108,7 +108,8 @@ static const int eint[] = { REG_BADPAT, /* DEFINE group contains more than one branch */ REG_BADPAT, /* repeating a DEFINE group is not allowed */ REG_INVARG, /* inconsistent NEWLINE options */ - REG_BADPAT /* \g is not followed followed by an (optionally braced) non-zero number */ + REG_BADPAT, /* \g is not followed followed by an (optionally braced) non-zero number */ + REG_BADPAT /* (?+ or (?- must be followed by a non-zero number */ }; /* Table of texts corresponding to POSIX error codes */ diff --git a/testdata/testinput2 b/testdata/testinput2 index 2806846..804ce23 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -2156,4 +2156,20 @@ a random value. /Ix ** Failers XABC\B +/(ab|c)(?-1)/B + abc + +/xy(?+1)(abc)/B + xyabcabc + ** Failers + xyabc + +/x(?-0)y/ + +/x(?-1)y/ + +/x(?+0)y/ + +/x(?+1)y/ + / End of testinput2 / diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 8684dd7..3299d35 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -8169,4 +8169,55 @@ No match XABC\B No match +/(ab|c)(?-1)/B +------------------------------------------------------------------ + 0 29 Bra 0 + 3 9 Bra 1 + 8 ab + 12 5 Alt + 15 c + 17 14 Ket + 20 6 Once + 23 3 Recurse + 26 6 Ket + 29 29 Ket + 32 End +------------------------------------------------------------------ + abc + 0: abc + 1: ab + +/xy(?+1)(abc)/B +------------------------------------------------------------------ + 0 30 Bra 0 + 3 xy + 7 6 Once + 10 16 Recurse + 13 6 Ket + 16 11 Bra 1 + 21 abc + 27 11 Ket + 30 30 Ket + 33 End +------------------------------------------------------------------ + xyabcabc + 0: xyabcabc + 1: abc + ** Failers +No match + xyabc +No match + +/x(?-0)y/ +Failed: (?+ or (?- must be followed by a non-zero number at offset 5 + +/x(?-1)y/ +Failed: reference to non-existent subpattern at offset 5 + +/x(?+0)y/ +Failed: (?+ or (?- must be followed by a non-zero number at offset 5 + +/x(?+1)y/ +Failed: reference to non-existent subpattern at offset 5 + / End of testinput2 / |