summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--embedvar.h4
-rw-r--r--gv.c2
-rw-r--r--mg.c15
-rw-r--r--perlapi.h2
-rw-r--r--pod/perlretut.pod9
-rw-r--r--pod/perltoc.pod2
-rw-r--r--pod/perlvar.pod21
-rw-r--r--regexec.c7
-rw-r--r--regexp.h1
-rwxr-xr-xt/op/pat.t37
-rw-r--r--thrdvar.h1
11 files changed, 92 insertions, 9 deletions
diff --git a/embedvar.h b/embedvar.h
index a77a2738a3..82c965f09f 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -113,6 +113,7 @@
#define PL_regint_start (vTHX->Tregint_start)
#define PL_regint_string (vTHX->Tregint_string)
#define PL_reginterp_cnt (vTHX->Treginterp_cnt)
+#define PL_reglastcloseparen (vTHX->Treglastcloseparen)
#define PL_reglastparen (vTHX->Treglastparen)
#define PL_regnarrate (vTHX->Tregnarrate)
#define PL_regnaughty (vTHX->Tregnaughty)
@@ -821,6 +822,7 @@
#define PL_regint_start (aTHXo->interp.Tregint_start)
#define PL_regint_string (aTHXo->interp.Tregint_string)
#define PL_reginterp_cnt (aTHXo->interp.Treginterp_cnt)
+#define PL_reglastcloseparen (aTHXo->interp.Treglastcloseparen)
#define PL_reglastparen (aTHXo->interp.Treglastparen)
#define PL_regnarrate (aTHXo->interp.Tregnarrate)
#define PL_regnaughty (aTHXo->interp.Tregnaughty)
@@ -1518,6 +1520,7 @@
#define PL_regint_start (aTHX->Tregint_start)
#define PL_regint_string (aTHX->Tregint_string)
#define PL_reginterp_cnt (aTHX->Treginterp_cnt)
+#define PL_reglastcloseparen (aTHX->Treglastcloseparen)
#define PL_reglastparen (aTHX->Treglastparen)
#define PL_regnarrate (aTHX->Tregnarrate)
#define PL_regnaughty (aTHX->Tregnaughty)
@@ -1654,6 +1657,7 @@
#define PL_Tregint_start PL_regint_start
#define PL_Tregint_string PL_regint_string
#define PL_Treginterp_cnt PL_reginterp_cnt
+#define PL_Treglastcloseparen PL_reglastcloseparen
#define PL_Treglastparen PL_reglastparen
#define PL_Tregnarrate PL_regnarrate
#define PL_Tregnaughty PL_regnaughty
diff --git a/gv.c b/gv.c
index 86f48434a6..0af054c4f5 100644
--- a/gv.c
+++ b/gv.c
@@ -895,6 +895,7 @@ Perl_gv_fetchpv(pTHX_ const char *nambeg, I32 add, I32 sv_type)
case '\006': /* $^F */
case '\010': /* $^H */
case '\011': /* $^I, NOT \t in EBCDIC */
+ case '\016': /* $^N */
case '\020': /* $^P */
case '\024': /* $^T */
if (len > 1)
@@ -1764,6 +1765,7 @@ Perl_is_gv_magical(pTHX_ char *name, STRLEN len, U32 flags)
case '\010': /* $^H */
case '\011': /* $^I, NOT \t in EBCDIC */
case '\014': /* $^L */
+ case '\016': /* $^N */
case '\020': /* $^P */
case '\023': /* $^S */
case '\024': /* $^T */
diff --git a/mg.c b/mg.c
index 1f51e5ce58..30c8cddfdc 100644
--- a/mg.c
+++ b/mg.c
@@ -435,6 +435,13 @@ Perl_magic_len(pTHX_ SV *sv, MAGIC *mg)
goto getparen;
}
return 0;
+ case '\016': /* ^N */
+ if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
+ paren = rx->lastcloseparen;
+ if (paren)
+ goto getparen;
+ }
+ return 0;
case '`':
if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
if (rx->startp[0] != -1) {
@@ -660,6 +667,14 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
}
sv_setsv(sv,&PL_sv_undef);
break;
+ case '\016': /* ^N */
+ if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
+ paren = rx->lastcloseparen;
+ if (paren)
+ goto getparen;
+ }
+ sv_setsv(sv,&PL_sv_undef);
+ break;
case '`':
if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
if ((s = rx->subbeg) && rx->startp[0] != -1) {
diff --git a/perlapi.h b/perlapi.h
index 7085e74adc..7a8dcec618 100644
--- a/perlapi.h
+++ b/perlapi.h
@@ -802,6 +802,8 @@ START_EXTERN_C
#define PL_regint_string (*Perl_Tregint_string_ptr(aTHXo))
#undef PL_reginterp_cnt
#define PL_reginterp_cnt (*Perl_Treginterp_cnt_ptr(aTHXo))
+#undef PL_reglastcloseparen
+#define PL_reglastcloseparen (*Perl_Treglastcloseparen_ptr(aTHXo))
#undef PL_reglastparen
#define PL_reglastparen (*Perl_Treglastparen_ptr(aTHXo))
#undef PL_regnarrate
diff --git a/pod/perlretut.pod b/pod/perlretut.pod
index 45f829b2a0..3e83c1305f 100644
--- a/pod/perlretut.pod
+++ b/pod/perlretut.pod
@@ -710,9 +710,12 @@ indicated below it:
/(ab(cd|ef)((gi)|j))/;
1 2 34
-so that if the regexp matched, e.g., C<$2> would contain 'cd' or 'ef'.
-For convenience, perl sets C<$+> to the highest numbered C<$1>, C<$2>,
-... that got assigned.
+so that if the regexp matched, e.g., C<$2> would contain 'cd' or 'ef'. For
+convenience, perl sets C<$+> to the string held by the highest numbered
+C<$1>, C<$2>, ... that got assigned (and, somewhat related, C<$^N> to the
+value of the C<$1>, C<$2>, ... most-recently assigned; i.e. the C<$1>,
+C<$2>, ... associated with the rightmost closing parenthesis used in the
+match).
Closely associated with the matching variables C<$1>, C<$2>, ... are
the B<backreferences> C<\1>, C<\2>, ... . Backreferences are simply
diff --git a/pod/perltoc.pod b/pod/perltoc.pod
index 502a8f433b..98652cc60b 100644
--- a/pod/perltoc.pod
+++ b/pod/perltoc.pod
@@ -904,7 +904,7 @@ $CHILD_ERROR, $?, $OS_ERROR, $ERRNO, $!, $EXTENDED_OS_ERROR, $^E,
$EVAL_ERROR, $@, $PROCESS_ID, $PID, $$, $REAL_USER_ID, $UID, $<,
$EFFECTIVE_USER_ID, $EUID, $>, $REAL_GROUP_ID, $GID, $(,
$EFFECTIVE_GROUP_ID, $EGID, $), $PROGRAM_NAME, $0, $[, $], $COMPILING, $^C,
-$DEBUGGING, $^D, $SYSTEM_FD_MAX, $^F, $^H, %^H, $INPLACE_EDIT, $^I, $^M,
+$DEBUGGING, $^D, $SYSTEM_FD_MAX, $^F, $^H, %^H, $INPLACE_EDIT, $^I, $^M, $^N,
$OSNAME, $^O, $PERLDB, $^P, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
0x100, 0x200, $LAST_REGEXP_CODE_RESULT, $^R, $EXCEPTIONS_BEING_CAUGHT, $^S,
$BASETIME, $^T, $PERL_VERSION, $^V, $WARNING, $^W, ${^WARNING_BITS},
diff --git a/pod/perlvar.pod b/pod/perlvar.pod
index eae87c791c..d70f22d1bd 100644
--- a/pod/perlvar.pod
+++ b/pod/perlvar.pod
@@ -180,15 +180,30 @@ performance penalty on all regular expression matches. See L<BUGS>.
=item $+
-The last bracket matched by the last search pattern. This is useful if
-you don't know which one of a set of alternative patterns matched. For
-example:
+The text matched by the last bracket of the last successful search pattern.
+This is useful if you don't know which one of a set of alternative patterns
+matched. For example:
/Version: (.*)|Revision: (.*)/ && ($rev = $+);
(Mnemonic: be positive and forward looking.)
This variable is read-only and dynamically scoped to the current BLOCK.
+=item $^N
+
+The text matched by the used group most-recently closed (i.e. the group
+with the rightmost closing parenthesis) of the last successful search
+pattern. This is primarly used inside C<(?{...})> blocks for examining text
+recently matched. For example, to effectively capture text to a variable
+(in addition to C<$1>, C<$2>, etc.), replace C<(...)> with
+
+ (?:(...)(?{ $var = $^N }))
+
+By setting and then using C<$var> in this way relieves you from having to
+worry about exactly which numbered set of parentheses they are.
+
+This variable is dynamically scoped to the current BLOCK.
+
=item @LAST_MATCH_END
=item @+
diff --git a/regexec.c b/regexec.c
index 1145b602b3..b5f8f4759a 100644
--- a/regexec.c
+++ b/regexec.c
@@ -147,7 +147,7 @@ S_regcppush(pTHX_ I32 parenfloor)
if (paren_elems_to_push < 0)
Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
-#define REGCP_OTHER_ELEMS 5
+#define REGCP_OTHER_ELEMS 6
SSCHECK(paren_elems_to_push + REGCP_OTHER_ELEMS);
for (p = PL_regsize; p > parenfloor; p--) {
/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
@@ -159,6 +159,7 @@ S_regcppush(pTHX_ I32 parenfloor)
/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
SSPUSHINT(PL_regsize);
SSPUSHINT(*PL_reglastparen);
+ SSPUSHINT(*PL_reglastcloseparen);
SSPUSHPTR(PL_reginput);
#define REGCP_FRAME_ELEMS 2
/* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
@@ -192,6 +193,7 @@ S_regcppop(pTHX)
assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
i = SSPOPINT; /* Parentheses elements to pop. */
input = (char *) SSPOPPTR;
+ *PL_reglastcloseparen = SSPOPINT;
*PL_reglastparen = SSPOPINT;
PL_regsize = SSPOPINT;
@@ -1871,6 +1873,7 @@ S_regtry(pTHX_ regexp *prog, char *startpos)
PL_regstartp = prog->startp;
PL_regendp = prog->endp;
PL_reglastparen = &prog->lastparen;
+ PL_reglastcloseparen = &prog->lastcloseparen;
prog->lastparen = 0;
PL_regsize = 0;
DEBUG_r(PL_reg_starttry = startpos);
@@ -2562,6 +2565,7 @@ S_regmatch(pTHX_ regnode *prog)
cache_re(re);
state.ss = PL_savestack_ix;
*PL_reglastparen = 0;
+ *PL_reglastcloseparen = 0;
PL_reg_call_cc = &state;
PL_reginput = locinput;
@@ -2619,6 +2623,7 @@ S_regmatch(pTHX_ regnode *prog)
PL_regendp[n] = locinput - PL_bostr;
if (n > *PL_reglastparen)
*PL_reglastparen = n;
+ *PL_reglastcloseparen = n;
break;
case GROUPP:
n = ARG(scan); /* which paren pair */
diff --git a/regexp.h b/regexp.h
index f21d9d37c0..89537c2c6b 100644
--- a/regexp.h
+++ b/regexp.h
@@ -37,6 +37,7 @@ typedef struct regexp {
I32 prelen; /* length of precomp */
U32 nparens; /* number of parentheses */
U32 lastparen; /* last paren matched */
+ U32 lastcloseparen; /* last paren matched */
U32 reganch; /* Internal use only +
Tainted information used by regexec? */
regnode program[1]; /* Unwarranted chumminess with compiler. */
diff --git a/t/op/pat.t b/t/op/pat.t
index 9635ad9820..57f7cb7eb9 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
$| = 1;
-print "1..639\n";
+print "1..660\n";
BEGIN {
chdir 't' if -d 't';
@@ -1854,3 +1854,38 @@ print "ok 638\n";
print "not " unless " " =~ /[[:print:]]/;
print "ok 639\n";
+##
+## Test basic $^N usage outside of a regex
+##
+$x = "abcdef";
+$T="ok 640\n";if ($x =~ /cde/ and not defined $^N) {print $T} else {print "not $T"};
+$T="ok 641\n";if ($x =~ /(cde)/ and $^N eq "cde") {print $T} else {print "not $T"};
+$T="ok 642\n";if ($x =~ /(c)(d)(e)/ and $^N eq "e") {print $T} else {print "not $T"};
+$T="ok 643\n";if ($x =~ /(c(d)e)/ and $^N eq "cde") {print $T} else {print "not $T"};
+$T="ok 644\n";if ($x =~ /(foo)|(c(d)e)/ and $^N eq "cde") {print $T} else {print "not $T"};
+$T="ok 645\n";if ($x =~ /(c(d)e)|(foo)/ and $^N eq "cde") {print $T} else {print "not $T"};
+$T="ok 646\n";if ($x =~ /(c(d)e)|(abc)/ and $^N eq "abc") {print $T} else {print "not $T"};
+$T="ok 647\n";if ($x =~ /(c(d)e)|(abc)x/ and $^N eq "cde") {print $T} else {print "not $T"};
+$T="ok 648\n";if ($x =~ /(c(d)e)(abc)?/ and $^N eq "cde") {print $T} else {print "not $T"};
+$T="ok 649\n";if ($x =~ /(?:c(d)e)/ and $^N eq "d" ) {print $T} else {print "not $T"};
+$T="ok 650\n";if ($x =~ /(?:c(d)e)(?:f)/ and $^N eq "d" ) {print $T} else {print "not $T"};
+$T="ok 651\n";if ($x =~ /(?:([abc])|([def]))*/ and $^N eq "f" ){print $T} else {print "not $T"};
+$T="ok 652\n";if ($x =~ /(?:([ace])|([bdf]))*/ and $^N eq "f" ){print $T} else {print "not $T"};
+$T="ok 653\n";if ($x =~ /(([ace])|([bd]))*/ and $^N eq "e" ){print $T} else {print "not $T"};
+{
+ $T="ok 654\n";if($x =~ /(([ace])|([bdf]))*/ and $^N eq "f" ){print $T} else {print "not $T"};
+}
+## test to see if $^N is automatically localized -- it should now
+## have the value set in test 653
+$T="ok 655\n";if ($^N eq "e" ){print $T} else {print "not $T"};
+
+##
+## Now test inside (?{...})
+##
+$T="ok 656\n";if ($x =~ /a([abc])(?{$y=$^N})c/ and $y eq "b" ){print $T} else {print "not $T"};
+$T="ok 657\n";if ($x =~ /a([abc]+)(?{$y=$^N})d/ and $y eq "bc"){print $T} else {print "not $T"};
+$T="ok 658\n";if ($x =~ /a([abcdefg]+)(?{$y=$^N})d/ and $y eq "bc"){print $T} else {print "not $T"};
+$T="ok 659\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})d)(?{$z=$^N})e/ and $y eq "bc" and $z eq "abcd")
+ {print $T} else {print "not $T"};
+$T="ok 660\n";if ($x =~ /(a([abcdefg]+)(?{$y=$^N})de)(?{$z=$^N})/ and $y eq "bc" and $z eq "abcde")
+ {print $T} else {print "not $T"};
diff --git a/thrdvar.h b/thrdvar.h
index 2cfbfa2dad..a739ecd7d6 100644
--- a/thrdvar.h
+++ b/thrdvar.h
@@ -182,6 +182,7 @@ PERLVAR(Tregeol, char *) /* End of input, for $ check. */
PERLVAR(Tregstartp, I32 *) /* Pointer to startp array. */
PERLVAR(Tregendp, I32 *) /* Ditto for endp. */
PERLVAR(Treglastparen, U32 *) /* Similarly for lastparen. */
+PERLVAR(Treglastcloseparen, U32 *) /* Similarly for lastcloseparen. */
PERLVAR(Tregtill, char *) /* How far we are required to go. */
PERLVAR(Tregcompat1, char) /* used to be regprev1 */
PERLVAR(Treg_start_tmp, char **) /* from regexec.c */