summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2006-11-13 19:59:32 +0100
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2006-11-14 06:59:03 +0000
commit542fa716385f08c64e1dc5ef1d9ceacf2ee69d29 (patch)
tree3b7f267de643be37d6de28ee0b22c24bd6a857e2
parentc8e599d3a7f82b1ac9916859e455d8fd113f5731 (diff)
downloadperl-542fa716385f08c64e1dc5ef1d9ceacf2ee69d29.tar.gz
Allow negative indexing in recursive patterns
Message-ID: <9b18b3110611130959k1fdd2485yd8eb1cd428de570a@mail.gmail.com> p4raw-id: //depot/perl@29267
-rw-r--r--pod/perlre.pod24
-rw-r--r--regcomp.c37
-rwxr-xr-xt/op/pat.t21
-rw-r--r--t/op/re_tests3
4 files changed, 77 insertions, 8 deletions
diff --git a/pod/perlre.pod b/pod/perlre.pod
index 0323a97405..c2b968062b 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -866,9 +866,10 @@ Recursing deeper than 50 times without consuming any input string will
result in a fatal error. The maximum depth is compiled into perl, so
changing it requires a custom build.
-=item C<(?PARNO)> C<(?R)> C<(?0)>
-X<(?PARNO)> X<(?1)> X<(?R)> X<(?0)>
+=item C<(?PARNO)> C<(?-PARNO)> C<(?+PARNO)> C<(?R)> C<(?0)>
+X<(?PARNO)> X<(?1)> X<(?R)> X<(?0)> X<(?-1)> X<(?+1)> X<(?-PARNO)> X<(?+PARNO)>
X<regex, recursive> X<regexp, recursive> X<regular expression, recursive>
+X<regex, relative recursion>
Similar to C<(??{ code })> except it does not involve compiling any code,
instead it treats the contents of a capture buffer as an independent
@@ -879,7 +880,10 @@ outermost recursion.
PARNO is a sequence of digits (not starting with 0) whose value reflects
the paren-number of the capture buffer to recurse to. C<(?R)> recurses to
the beginning of the whole pattern. C<(?0)> is an alternate syntax for
-C<(?R)>.
+C<(?R)>. If PARNO is preceded by a plus or minus sign then it is assumed
+to be relative, with negative numbers indicating preceding capture buffers
+and positive ones following. Thus C<(?-1)> refers to the most recently
+declared buffer, and C<(?+1)> indicates the next buffer to be declared.
The following pattern matches a function foo() which may contain
balanced parentheses as the argument.
@@ -918,11 +922,21 @@ fatal error. Recursing deeper than 50 times without consuming any input
string will also result in a fatal error. The maximum depth is compiled
into perl, so changing it requires a custom build.
+The following shows how using negative indexing can make it
+easier to embed recursive patterns inside of a C<qr//> construct
+for later use:
+
+ my $parens = qr/(\((?:[^()]++|(?-1))*+\))/;
+ if (/foo $parens \s+ + \s+ bar $parens/x) {
+ # do something here...
+ }
+
B<Note> that this pattern does not behave the same way as the equivalent
PCRE or Python construct of the same form. In perl you can backtrack into
a recursed group, in PCRE and Python the recursed into group is treated
-as atomic. Also, constructs like (?i:(?1)) or (?:(?i)(?1)) do not affect
-the pattern being recursed into.
+as atomic. Also, modifiers are resolved at compile time, so constructs
+like (?i:(?1)) or (?:(?i)(?1)) do not affect how the sub-pattern will
+be processed.
=item C<(?&NAME)>
X<(?&NAME)>
diff --git a/regcomp.c b/regcomp.c
index 3cc129506b..b077ddb1df 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4915,17 +4915,54 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
}
goto gen_recurse_regop;
/* NOT REACHED */
+ case '+':
+ if (!(RExC_parse[0] >= '1' && RExC_parse[0] <= '9')) {
+ RExC_parse++;
+ vFAIL("Illegal pattern");
+ }
+ goto parse_recursion;
+ /* NOT REACHED*/
+ case '-': /* (?-1) */
+ if (!(RExC_parse[0] >= '1' && RExC_parse[0] <= '9')) {
+ RExC_parse--; /* rewind to let it be handled later */
+ goto parse_flags;
+ }
+ /*FALLTHROUGH */
case '1': case '2': case '3': case '4': /* (?1) */
case '5': case '6': case '7': case '8': case '9':
RExC_parse--;
+ parse_recursion:
num = atoi(RExC_parse);
parse_start = RExC_parse - 1; /* MJD */
+ if (*RExC_parse == '-')
+ RExC_parse++;
while (isDIGIT(*RExC_parse))
RExC_parse++;
if (*RExC_parse!=')')
vFAIL("Expecting close bracket");
gen_recurse_regop:
+ if ( paren == '-' ) {
+ /*
+ Diagram of capture buffer numbering.
+ Top line is the normal capture buffer numbers
+ Botton line is the negative indexing as from
+ the X (the (?-2))
+
+ + 1 2 3 4 5 X 6 7
+ /(a(x)y)(a(b(c(?-2)d)e)f)(g(h))/
+ - 5 4 3 2 1 X x x
+
+ */
+ num = RExC_npar + num;
+ if (num < 1) {
+ RExC_parse++;
+ vFAIL("Reference to nonexistent group");
+ }
+ } else if ( paren == '+' ) {
+ num = RExC_npar + num - 1;
+ }
+
ret = reganode(pRExC_state, GOSUB, num);
if (!SIZE_ONLY) {
if (num > (I32)RExC_rx->nparens) {
diff --git a/t/op/pat.t b/t/op/pat.t
index 0bc0eb675c..333165d185 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -3991,8 +3991,23 @@ for my $c ("z", "\0", "!", chr(254), chr(256)) {
for ("ABC","BAX") {
ok(/A (*THEN) X | B (*THEN) C/x,"Simple (*THEN) test");
}
-}
-
+}
+
+{
+ my $parens=qr/(\((?:[^()]++|(?-1))*+\))/;
+ local $_='foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))';
+ my ($all,$one,$two)=('','','');
+ if (/foo $parens \s* \+ \s* bar $parens/x) {
+ $all=$&;
+ $one=$1;
+ $two=$2;
+ }
+ iseq($one, '((2*3)+4-3)');
+ iseq($two, '(2*(3+4)-1*(2-3))');
+ iseq($all, 'foo((2*3)+4-3) + bar(2*(3+4)-1*(2-3))');
+ iseq($all, $_);
+}
+
#-------------------------------------------------------------------
# Keep the following tests last -- they may crash perl
@@ -4019,4 +4034,4 @@ ok((q(a)x 100) =~ /^(??{'(.)'x 100})/,
# Put new tests above the line, not here.
# Don't forget to update this!
-BEGIN { print "1..1341\n" };
+BEGIN { print "1..1345\n" };
diff --git a/t/op/re_tests b/t/op/re_tests
index 99c68243e5..078caa94be 100644
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -1186,3 +1186,6 @@ a*(*F) aaaab n - -
(A(A|B(*ACCEPT)|C)D)(E) AB y $1 AB
(A(A|B(*ACCEPT)|C)D)(E) ACDE y $1$2$3 ACDCE
+(a)(?:(?-1)|(?+1))(b) aab y $&-$1-$2 aab-a-b
+(a)(?:(?-1)|(?+1))(b) abb y $&-$1-$2 abb-a-b
+(a)(?:(?-1)|(?+1))(b) acb n - -