summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MANIFEST1
-rw-r--r--ext/B/t/concise-xs.t2
-rw-r--r--gv.c6
-rw-r--r--mg.c33
-rw-r--r--op.h13
-rw-r--r--pod/perlop.pod21
-rw-r--r--pod/perlre.pod16
-rw-r--r--pod/perlvar.pod24
-rw-r--r--pp_hot.c11
-rw-r--r--regcomp.c201
-rw-r--r--regcomp.h3
-rw-r--r--regexp.h12
-rwxr-xr-xt/op/regexp.t10
-rw-r--r--t/op/regexp_kmod.t39
-rw-r--r--toke.c28
15 files changed, 281 insertions, 139 deletions
diff --git a/MANIFEST b/MANIFEST
index 9b8753ab15..7a8aa79b3f 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -3569,6 +3569,7 @@ t/op/regexp_notrie.t See if regular expressions work without trie optimisation
t/op/regexp_qr_embed.t See if regular expressions work with embedded qr//
t/op/regexp_qr.t See if regular expressions work as qr//
t/op/regexp.t See if regular expressions work
+t/op/regexp_kmod.t See if regexp /k modifier works as expected
t/op/regexp_trielist.t See if regular expressions work with trie optimisation
t/op/regmesg.t See if one can get regular expression errors
t/op/repeat.t See if x operator works
diff --git a/ext/B/t/concise-xs.t b/ext/B/t/concise-xs.t
index b4f053f67f..d3711cccbb 100644
--- a/ext/B/t/concise-xs.t
+++ b/ext/B/t/concise-xs.t
@@ -117,7 +117,7 @@ use Getopt::Std;
use Carp;
use Test::More tests => ( # per-pkg tests (function ct + require_ok)
40 + 16 # Data::Dumper, Digest::MD5
- + 517 + 238 # B::Deparse, B
+ + 517 + 239 # B::Deparse, B
+ 595 + 190 # POSIX, IO::Socket
+ 3 * ($] > 5.009)
+ 16 * ($] >= 5.009003)
diff --git a/gv.c b/gv.c
index 8630c1b059..4332df9b4e 100644
--- a/gv.c
+++ b/gv.c
@@ -1109,10 +1109,16 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags,
if (strEQ(name2, "NCODING"))
goto magicalize;
break;
+ case '\015': /* $^MATCH */
+ if (strEQ(name2, "ATCH"))
+ goto ro_magicalize;
case '\017': /* $^OPEN */
if (strEQ(name2, "PEN"))
goto magicalize;
break;
+ case '\020': /* $^PREMATCH $^POSTMATCH */
+ if (strEQ(name2, "REMATCH") || strEQ(name2, "OSTMATCH"))
+ goto ro_magicalize;
case '\024': /* ${^TAINT} */
if (strEQ(name2, "AINT"))
goto ro_magicalize;
diff --git a/mg.c b/mg.c
index 2bb9b664c7..86f0e12796 100644
--- a/mg.c
+++ b/mg.c
@@ -779,8 +779,14 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
}
}
break;
- case '\020': /* ^P */
- sv_setiv(sv, (IV)PL_perldb);
+ case '\020':
+ if (nextchar == '\0') { /* ^P */
+ sv_setiv(sv, (IV)PL_perldb);
+ } else if (strEQ(remaining, "REMATCH")) { /* $^PREMATCH */
+ goto do_prematch_fetch;
+ } else if (strEQ(remaining, "OSTMATCH")) { /* $^POSTMATCH */
+ goto do_postmatch_fetch;
+ }
break;
case '\023': /* ^S */
if (nextchar == '\0') {
@@ -847,18 +853,21 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
SvPOK_only(sv);
}
break;
+ case '\015': /* $^MATCH */
+ if (strEQ(remaining, "ATCH")) {
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9': case '&':
- if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
- /*
- * Pre-threads, this was paren = atoi(GvENAME((GV*)mg->mg_obj));
- * XXX Does the new way break anything?
- */
- paren = atoi(mg->mg_ptr); /* $& is in [0] */
- reg_numbered_buff_get( paren, rx, sv, 0);
- break;
+ if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
+ /*
+ * Pre-threads, this was paren = atoi(GvENAME((GV*)mg->mg_obj));
+ * XXX Does the new way break anything?
+ */
+ paren = atoi(mg->mg_ptr); /* $& is in [0] */
+ reg_numbered_buff_get( paren, rx, sv, 0);
+ break;
+ }
+ sv_setsv(sv,&PL_sv_undef);
}
- sv_setsv(sv,&PL_sv_undef);
break;
case '+':
if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
@@ -880,6 +889,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
sv_setsv(sv,&PL_sv_undef);
break;
case '`':
+ do_prematch_fetch:
if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
reg_numbered_buff_get( -2, rx, sv, 0);
break;
@@ -887,6 +897,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg)
sv_setsv(sv,&PL_sv_undef);
break;
case '\'':
+ do_postmatch_fetch:
if (PL_curpm && (rx = PM_GETRE(PL_curpm))) {
reg_numbered_buff_get( -1, rx, sv, 0);
break;
diff --git a/op.h b/op.h
index 1ac4aa0c4e..d8c54781ff 100644
--- a/op.h
+++ b/op.h
@@ -371,14 +371,15 @@ struct pmop {
/* The following flags have exact equivalents in regcomp.h with the prefix RXf_
* which are stored in the regexp->extflags member.
*/
-#define PMf_LOCALE 0x0800 /* use locale for character types */
-#define PMf_MULTILINE 0x1000 /* assume multiple lines */
-#define PMf_SINGLELINE 0x2000 /* assume single line */
-#define PMf_FOLD 0x4000 /* case insensitivity */
-#define PMf_EXTENDED 0x8000 /* chuck embedded whitespace */
+#define PMf_LOCALE 0x00800 /* use locale for character types */
+#define PMf_MULTILINE 0x01000 /* assume multiple lines */
+#define PMf_SINGLELINE 0x02000 /* assume single line */
+#define PMf_FOLD 0x04000 /* case insensitivity */
+#define PMf_EXTENDED 0x08000 /* chuck embedded whitespace */
+#define PMf_KEEPCOPY 0x10000 /* copy the string when matching */
/* mask of bits that need to be transfered to re->extflags */
-#define PMf_COMPILETIME (PMf_MULTILINE|PMf_SINGLELINE|PMf_LOCALE|PMf_FOLD|PMf_EXTENDED)
+#define PMf_COMPILETIME (PMf_MULTILINE|PMf_SINGLELINE|PMf_LOCALE|PMf_FOLD|PMf_EXTENDED|PMf_KEEPCOPY)
#ifdef USE_ITHREADS
diff --git a/pod/perlop.pod b/pod/perlop.pod
index 46af19b230..7b84a683ac 100644
--- a/pod/perlop.pod
+++ b/pod/perlop.pod
@@ -1067,7 +1067,7 @@ X<m> X<operator, match>
X<regexp, options> X<regexp> X<regex, options> X<regex>
X</c> X</i> X</m> X</o> X</s> X</x>
-=item /PATTERN/cgimosx
+=item /PATTERN/cgimosxk
Searches a string for a pattern match, and in scalar context returns
true if it succeeds, false if it fails. If no string is specified
@@ -1080,13 +1080,15 @@ is in effect.
Options are:
- c Do not reset search position on a failed match when /g is in effect.
- g Match globally, i.e., find all occurrences.
i Do case-insensitive pattern matching.
m Treat string as multiple lines.
- o Compile pattern only once.
s Treat string as single line.
x Use extended regular expressions.
+ g Match globally, i.e., find all occurrences.
+ c Do not reset search position on a failed match when /g is in effect.
+ o Compile pattern only once.
+ k Keep a copy of the matched string so that ${^MATCH} and friends
+ will be defined.
If "/" is the delimiter then the initial C<m> is optional. With the C<m>
you can use any pair of non-alphanumeric, non-whitespace characters
@@ -1449,7 +1451,7 @@ put comments into a multi-line C<qw>-string. For this reason, the
C<use warnings> pragma and the B<-w> switch (that is, the C<$^W> variable)
produces warnings if the STRING contains the "," or the "#" character.
-=item s/PATTERN/REPLACEMENT/egimosx
+=item s/PATTERN/REPLACEMENT/egimosxk
X<substitute> X<substitution> X<replace> X<regexp, replace>
X<regexp, substitute> X</e> X</g> X</i> X</m> X</o> X</s> X</x>
@@ -1475,13 +1477,16 @@ when C<use locale> is in effect.
Options are:
- e Evaluate the right side as an expression.
- g Replace globally, i.e., all occurrences.
i Do case-insensitive pattern matching.
m Treat string as multiple lines.
- o Compile pattern only once.
s Treat string as single line.
x Use extended regular expressions.
+ g Replace globally, i.e., all occurrences.
+ o Compile pattern only once.
+ k Keep a copy of the original string so ${^MATCH} and friends
+ will be defined.
+ e Evaluate the right side as an expression.
+
Any non-alphanumeric, non-whitespace delimiter may replace the
slashes. If single quotes are used, no interpretation is done on the
diff --git a/pod/perlre.pod b/pod/perlre.pod
index 7133a02c96..d886d094a7 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -586,6 +586,15 @@ already paid the price. As of 5.005, C<$&> is not so costly as the
other two.
X<$&> X<$`> X<$'>
+As a workaround for this problem, Perl 5.10 introduces C<${^PREMATCH}>,
+C<${^MATCH}> and C<${^POSTMATCH}>, which are equivalent to C<$`>, C<$&>
+and C<$'>, B<except> that they are only guaranteed to be defined after a
+successful match that was executed with the C</k> (keep-copy) modifier.
+The use of these variables incurs no global performance penalty, unlike
+their punctuation char equivalents, however at the trade-off that you
+have to tell perl when you want to use them.
+X</k> X<k modifier>
+
Backslashed metacharacters in Perl are alphanumeric, such as C<\b>,
C<\w>, C<\n>. Unlike some other regular expression languages, there
are no backslashed symbols that aren't alphanumeric. So anything
@@ -639,7 +648,7 @@ whitespace formatting, a simple C<#> will suffice. Note that Perl closes
the comment as soon as it sees a C<)>, so there is no way to put a literal
C<)> in the comment.
-=item C<(?imsx-imsx)>
+=item C<(?kimsx-imsx)>
X<(?)>
One or more embedded pattern-match modifiers, to be turned on (or
@@ -667,6 +676,11 @@ will match a repeated (I<including the case>!) word C<blah> in any
case, assuming C<x> modifier, and no C<i> modifier outside this
group.
+Note that the C<k> modifier is special in that it can only be enabled,
+not disabled, and that its presence anywhere in a pattern has a global
+effect. Thus C<(?-k)> and C<(?-k:...)> are meaningless and will warn
+when executed under C<use warnings>.
+
=item C<(?:pattern)>
X<(?:)>
diff --git a/pod/perlvar.pod b/pod/perlvar.pod
index a211c378ae..b4db654178 100644
--- a/pod/perlvar.pod
+++ b/pod/perlvar.pod
@@ -228,6 +228,14 @@ performance penalty on all regular expression matches. See L</BUGS>.
See L</@-> for a replacement.
+=item ${^MATCH}
+X<${^MATCH}>
+
+This is similar to C<$&> (C<$POSTMATCH>) except that it does not incur the
+performance penalty associated with that variable, and is only guaranteed
+to return a defined value when the pattern was compiled or executed with
+the C</k> modifier.
+
=item $PREMATCH
=item $`
@@ -243,6 +251,14 @@ performance penalty on all regular expression matches. See L</BUGS>.
See L</@-> for a replacement.
+=item ${^PREMATCH}
+X<${^PREMATCH}>
+
+This is similar to C<$`> ($PREMATCH) except that it does not incur the
+performance penalty associated with that variable, and is only guaranteed
+to return a defined value when the pattern was compiled or executed with
+the C</k> modifier.
+
=item $POSTMATCH
=item $'
@@ -264,6 +280,14 @@ performance penalty on all regular expression matches. See L</BUGS>.
See L</@-> for a replacement.
+=item ${^POSTMATCH}
+X<${^POSTMATCH}>
+
+This is similar to C<$'> (C<$POSTMATCH>) except that it does not incur the
+performance penalty associated with that variable, and is only guaranteed
+to return a defined value when the pattern was compiled or executed with
+the C</k> modifier.
+
=item $LAST_PAREN_MATCH
=item $+
diff --git a/pp_hot.c b/pp_hot.c
index f1ad3edee5..71673111eb 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -1368,7 +1368,8 @@ PP(pp_match)
/* remove comment to get faster /g but possibly unsafe $1 vars after a
match. Test for the unsafe vars will fail as well*/
if (( /* !global && */ rx->nparens)
- || SvTEMP(TARG) || PL_sawampersand || (pm->op_pmflags & PMf_EVAL))
+ || SvTEMP(TARG) || PL_sawampersand ||
+ (pm->op_pmflags & (PMf_EVAL|PMf_KEEPCOPY)))
r_flags |= REXEC_COPY_STR;
if (SvSCREAM(TARG))
r_flags |= REXEC_SCREAM;
@@ -1391,6 +1392,7 @@ play_it_again:
goto nope;
if ( (rx->extflags & RXf_CHECK_ALL)
&& !PL_sawampersand
+ && !(pm->op_pmflags & PMf_KEEPCOPY)
&& ((rx->extflags & RXf_NOSCAN)
|| !((rx->extflags & RXf_INTUIT_TAIL)
&& (r_flags & REXEC_SCREAM)))
@@ -1516,7 +1518,7 @@ yup: /* Confirmed by INTUIT */
rx->sublen = strend - truebase;
goto gotcha;
}
- if (PL_sawampersand) {
+ if (PL_sawampersand || pm->op_pmflags & PMf_KEEPCOPY) {
I32 off;
#ifdef PERL_OLD_COPY_ON_WRITE
if (SvIsCOW(TARG) || (SvFLAGS(TARG) & CAN_COW_MASK) == CAN_COW_FLAGS) {
@@ -1547,6 +1549,8 @@ yup: /* Confirmed by INTUIT */
rx->startp[0] = s - truebase;
rx->endp[0] = s - truebase + rx->minlenret;
}
+ /* including rx->nparens in the below code seems highly suspicious.
+ -dmq */
rx->nparens = rx->lastparen = rx->lastcloseparen = 0; /* used by @-, @+, and $^N */
LEAVE_SCOPE(oldsave);
RETPUSHYES;
@@ -2152,7 +2156,7 @@ PP(pp_subst)
rx = PM_GETRE(pm);
}
r_flags = (rx->nparens || SvTEMP(TARG) || PL_sawampersand
- || (pm->op_pmflags & PMf_EVAL))
+ || (pm->op_pmflags & (PMf_EVAL|PMf_KEEPCOPY)) )
? REXEC_COPY_STR : 0;
if (SvSCREAM(TARG))
r_flags |= REXEC_SCREAM;
@@ -2167,6 +2171,7 @@ PP(pp_subst)
/* How to do it in subst? */
/* if ( (rx->extflags & RXf_CHECK_ALL)
&& !PL_sawampersand
+ && !(pm->op_pmflags & PMf_KEEPCOPY)
&& ((rx->extflags & RXf_NOSCAN)
|| !((rx->extflags & RXf_INTUIT_TAIL)
&& (r_flags & REXEC_SCREAM))))
diff --git a/regcomp.c b/regcomp.c
index 4d139f211a..d07f177402 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -172,6 +172,7 @@ typedef struct RExC_state_t {
#define RExC_recurse (pRExC_state->recurse)
#define RExC_recurse_count (pRExC_state->recurse_count)
+
#define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?')
#define ISMULT2(s) ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
((*s) == '{' && regcurly(s)))
@@ -4592,8 +4593,8 @@ reStudy:
ri->name_list_idx = add_data( pRExC_state, 1, "p" );
ri->data->data[ri->name_list_idx] = (void*)SvREFCNT_inc(RExC_paren_name_list);
} else
- ri->name_list_idx = 0;
#endif
+ ri->name_list_idx = 0;
if (RExC_recurse_count) {
for ( ; RExC_recurse_count ; RExC_recurse_count-- ) {
@@ -4676,12 +4677,18 @@ Perl_reg_numbered_buff_get(pTHX_ I32 paren, const REGEXP * const rx, SV* usesv,
SV *sv = usesv ? usesv : newSVpvs("");
PERL_UNUSED_ARG(flags);
- if (paren == -2 && (s = rx->subbeg) && rx->startp[0] != -1) {
+ if (!rx->subbeg) {
+ sv_setsv(sv,&PL_sv_undef);
+ return sv;
+ }
+ else
+ if (paren == -2 && rx->startp[0] != -1) {
/* $` */
i = rx->startp[0];
+ s = rx->subbeg;
}
else
- if (paren == -1 && rx->subbeg && rx->endp[0] != -1) {
+ if (paren == -1 && rx->endp[0] != -1) {
/* $' */
s = rx->subbeg + rx->endp[0];
i = rx->sublen - rx->endp[0];
@@ -4694,47 +4701,43 @@ Perl_reg_numbered_buff_get(pTHX_ I32 paren, const REGEXP * const rx, SV* usesv,
/* $& $1 ... */
i = t1 - s1;
s = rx->subbeg + s1;
- }
-
- if (s) {
- assert(rx->subbeg);
- assert(rx->sublen >= (s - rx->subbeg) + i );
-
- if (i >= 0) {
- const int oldtainted = PL_tainted;
- TAINT_NOT;
- sv_setpvn(sv, s, i);
- PL_tainted = oldtainted;
- if ( (rx->extflags & RXf_CANY_SEEN)
- ? (RX_MATCH_UTF8(rx)
- && (!i || is_utf8_string((U8*)s, i)))
- : (RX_MATCH_UTF8(rx)) )
- {
- SvUTF8_on(sv);
- }
- else
- SvUTF8_off(sv);
- if (PL_tainting) {
- if (RX_MATCH_TAINTED(rx)) {
- if (SvTYPE(sv) >= SVt_PVMG) {
- MAGIC* const mg = SvMAGIC(sv);
- MAGIC* mgt;
- PL_tainted = 1;
- SvMAGIC_set(sv, mg->mg_moremagic);
- SvTAINT(sv);
- if ((mgt = SvMAGIC(sv))) {
- mg->mg_moremagic = mgt;
- SvMAGIC_set(sv, mg);
- }
- } else {
- PL_tainted = 1;
- SvTAINT(sv);
+ } else {
+ sv_setsv(sv,&PL_sv_undef);
+ return sv;
+ }
+ assert(rx->sublen >= (s - rx->subbeg) + i );
+ if (i >= 0) {
+ const int oldtainted = PL_tainted;
+ TAINT_NOT;
+ sv_setpvn(sv, s, i);
+ PL_tainted = oldtainted;
+ if ( (rx->extflags & RXf_CANY_SEEN)
+ ? (RX_MATCH_UTF8(rx)
+ && (!i || is_utf8_string((U8*)s, i)))
+ : (RX_MATCH_UTF8(rx)) )
+ {
+ SvUTF8_on(sv);
+ }
+ else
+ SvUTF8_off(sv);
+ if (PL_tainting) {
+ if (RX_MATCH_TAINTED(rx)) {
+ if (SvTYPE(sv) >= SVt_PVMG) {
+ MAGIC* const mg = SvMAGIC(sv);
+ MAGIC* mgt;
+ PL_tainted = 1;
+ SvMAGIC_set(sv, mg->mg_moremagic);
+ SvTAINT(sv);
+ if ((mgt = SvMAGIC(sv))) {
+ mg->mg_moremagic = mgt;
+ SvMAGIC_set(sv, mg);
}
- } else
- SvTAINTED_off(sv);
- }
- } else {
- sv_setsv(sv,&PL_sv_undef);
+ } else {
+ PL_tainted = 1;
+ SvTAINT(sv);
+ }
+ } else
+ SvTAINTED_off(sv);
}
} else {
sv_setsv(sv,&PL_sv_undef);
@@ -5006,8 +5009,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
return ret;
} else
if (*RExC_parse == '?') { /* (?...) */
- U32 posflags = 0, negflags = 0;
- U32 *flagsp = &posflags;
bool is_logical = 0;
const char * const seqstart = RExC_parse;
@@ -5431,13 +5432,20 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
vFAIL("Sequence (? incomplete");
break;
default:
- --RExC_parse;
- parse_flags: /* (?i) */
- while (*RExC_parse && strchr("iogcmsx", *RExC_parse)) {
+ --RExC_parse;
+ parse_flags: /* (?i) */
+ {
+ U32 posflags = 0, negflags = 0;
+ U32 *flagsp = &posflags;
+
+ while (*RExC_parse) {
+ /* && strchr("iogcmsx", *RExC_parse) */
/* (?g), (?gc) and (?o) are useless here
and must be globally applied -- japhy */
-
- if (*RExC_parse == 'o' || *RExC_parse == 'g') {
+ switch (*RExC_parse) {
+ CASE_STD_PMMOD_FLAGS_PARSE_SET(flagsp);
+ case 'o':
+ case 'g':
if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
const I32 wflagbit = *RExC_parse == 'o' ? WASTED_O : WASTED_G;
if (! (wastedflags & wflagbit) ) {
@@ -5452,8 +5460,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
);
}
}
- }
- else if (*RExC_parse == 'c') {
+ break;
+
+ case 'c':
if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
if (! (wastedflags & WASTED_C) ) {
wastedflags |= WASTED_GC;
@@ -5465,33 +5474,45 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
);
}
}
- }
- else { pmflag(flagsp, *RExC_parse); }
-
- ++RExC_parse;
- }
- if (*RExC_parse == '-') {
- flagsp = &negflags;
- wastedflags = 0; /* reset so (?g-c) warns twice */
+ break;
+ case 'k':
+ if (flagsp == &negflags) {
+ if (SIZE_ONLY && ckWARN(WARN_REGEXP))
+ vWARN(RExC_parse + 1,"Useless use of (?-k)");
+ } else {
+ *flagsp |= RXf_PMf_KEEPCOPY;
+ }
+ break;
+ case '-':
+ if (flagsp == &negflags)
+ goto unknown;
+ flagsp = &negflags;
+ wastedflags = 0; /* reset so (?g-c) warns twice */
+ break;
+ case ':':
+ paren = ':';
+ /*FALLTHROUGH*/
+ case ')':
+ RExC_flags |= posflags;
+ RExC_flags &= ~negflags;
+ nextchar(pRExC_state);
+ if (paren != ':') {
+ *flagp = TRYAGAIN;
+ return NULL;
+ } else {
+ ret = NULL;
+ goto parse_rest;
+ }
+ /*NOTREACHED*/
+ default:
+ unknown:
+ RExC_parse++;
+ vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
+ /*NOTREACHED*/
+ }
++RExC_parse;
- goto parse_flags;
}
- RExC_flags |= posflags;
- RExC_flags &= ~negflags;
- if (*RExC_parse == ':') {
- RExC_parse++;
- paren = ':';
- break;
- }
- unknown:
- if (*RExC_parse != ')') {
- RExC_parse++;
- vFAIL3("Sequence (%.*s...) not recognized", RExC_parse-seqstart, seqstart);
- }
- nextchar(pRExC_state);
- *flagp = TRYAGAIN;
- return NULL;
- }
+ }} /* one for the default block, one for the switch */
}
else { /* (...) */
capturing_parens:
@@ -5516,7 +5537,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
}
else /* ! paren */
ret = NULL;
-
+
+ parse_rest:
/* Pick up the branches, linking them together. */
parse_start = RExC_parse; /* MJD */
br = regbranch(pRExC_state, &flags, 1,depth+1);
@@ -8803,6 +8825,8 @@ Perl_regdupe_internal(pTHX_ const regexp *r, CLONE_PARAMS *param)
else
reti->data = NULL;
+ reti->name_list_idx = ri->name_list_idx;
+
Newx(reti->offsets, 2*len+1, U32);
Copy(ri->offsets, reti->offsets, 2*len+1, U32);
@@ -8846,13 +8870,16 @@ Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval ) {
if (!mg->mg_ptr) {
const char *fptr = "msix";
- char reflags[6];
+ char reflags[7];
char ch;
- int left = 0;
- int right = 4;
- bool need_newline = 0;
- U16 reganch = (U16)((re->extflags & RXf_PMf_COMPILETIME) >> 12);
-
+ bool hask = ((re->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
+ bool hasm = ((re->extflags & RXf_PMf_STD_PMMOD) != RXf_PMf_STD_PMMOD);
+ U16 reganch = (U16)((re->extflags & RXf_PMf_STD_PMMOD) >> 12);
+ bool need_newline = 0;
+ int left = 0;
+ int right = 4 + hask;
+ if (hask)
+ reflags[left++]='k';
while((ch = *fptr++)) {
if(reganch & 1) {
reflags[left++] = ch;
@@ -8862,11 +8889,11 @@ Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval ) {
}
reganch >>= 1;
}
- if(left != 4) {
+ if(hasm) {
reflags[left] = '-';
- left = 5;
+ left = 5 + hask;
}
-
+ /* printf("[%*.7s]\n",left,reflags); */
mg->mg_len = re->prelen + 4 + left;
/*
* If /x was used, we have to worry about a regex ending with a
diff --git a/regcomp.h b/regcomp.h
index ad7400fc3c..d4a5001c30 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -103,10 +103,7 @@ typedef struct regexp_paren_ofs {
} regexp_paren_ofs;
typedef struct regexp_internal {
-#ifdef DEBUGGING
int name_list_idx; /* Optional data index of an array of paren names */
-#endif
-
U32 *offsets; /* offset annotations 20001228 MJD
data about mapping the program to the
string*/
diff --git a/regexp.h b/regexp.h
index c28c78e07a..4045fbd84b 100644
--- a/regexp.h
+++ b/regexp.h
@@ -144,11 +144,18 @@ typedef struct regexp_engine {
#define RXf_PMf_SINGLELINE 0x00002000 /* /s */
#define RXf_PMf_FOLD 0x00004000 /* /i */
#define RXf_PMf_EXTENDED 0x00008000 /* /x */
+#define RXf_PMf_KEEPCOPY 0x00010000 /* /k */
/* these flags are transfered from the PMOP->op_pmflags member during compilation */
-#define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED)
+#define RXf_PMf_STD_PMMOD (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_FOLD|RXf_PMf_EXTENDED)
+#define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY)
+
+#define CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl) \
+ case 'i': *(pmfl) |= RXf_PMf_FOLD; break; \
+ case 'm': *(pmfl) |= RXf_PMf_MULTILINE; break; \
+ case 's': *(pmfl) |= RXf_PMf_SINGLELINE; break; \
+ case 'x': *(pmfl) |= RXf_PMf_EXTENDED; break
/* What we have seen */
-/* one bit here */
#define RXf_LOOKBEHIND_SEEN 0x00020000
#define RXf_EVAL_SEEN 0x00040000
#define RXf_CANY_SEEN 0x00080000
@@ -448,6 +455,7 @@ struct re_save_state {
#define SAVESTACK_ALLOC_FOR_RE_SAVE_STATE \
(1 + ((sizeof(struct re_save_state) - 1) / sizeof(*PL_savestack)))
+
/*
* Local variables:
* c-indentation-style: bsd
diff --git a/t/op/regexp.t b/t/op/regexp.t
index cce19fc03d..a7cd5fc7b9 100755
--- a/t/op/regexp.t
+++ b/t/op/regexp.t
@@ -125,7 +125,15 @@ EOFCODE
}
else {
if (!$match || $got ne $expect) {
- print "not ok $. ($study) $input => `$got', match=$match\n$code\n";
+ eval { require Data::Dumper };
+ if ($@) {
+ print "not ok $. ($study) $input => `$got', match=$match\n$code\n";
+ }
+ else { # better diagnostics
+ my $s = Data::Dumper->new([$subject],['subject'])->Useqq(1)->Dump;
+ my $g = Data::Dumper->new([$got],['got'])->Useqq(1)->Dump;
+ print "not ok $. ($study) $input => `$got', match=$match\n$s\n$g\n$code\n";
+ }
next TEST;
}
}
diff --git a/t/op/regexp_kmod.t b/t/op/regexp_kmod.t
new file mode 100644
index 0000000000..84efd83546
--- /dev/null
+++ b/t/op/regexp_kmod.t
@@ -0,0 +1,39 @@
+#!./perl
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = '../lib';
+ require './test.pl';
+}
+
+use strict;
+use warnings;
+
+our @tests = (
+ # /k Pattern PRE MATCH POST
+ [ 'k', "456", "123-", "456", "-789"],
+ [ '', "(456)", "123-", "456", "-789"],
+ [ '', "456", undef, undef, undef ],
+);
+
+plan tests => 4 * @tests + 2;
+my $W = "";
+
+$SIG{__WARN__} = sub { $W.=join("",@_); };
+sub _u($$) { "$_[0] is ".(defined $_[1] ? "'$_[1]'" : "undef") }
+
+$_ = '123-456-789';
+foreach my $test (@tests) {
+ my ($k, $pat,$l,$m,$r) = @$test;
+ my $test_name = "/$pat/$k";
+ my $ok = ok($k ? /$pat/k : /$pat/, $test_name);
+ SKIP: {
+ skip "/$pat/$k failed to match", 3
+ unless $ok;
+ is(${^PREMATCH}, $l,_u "$test_name: ^PREMATCH",$l);
+ is(${^MATCH}, $m,_u "$test_name: ^MATCH",$m );
+ is(${^POSTMATCH}, $r,_u "$test_name: ^POSTMATCH",$r );
+ }
+}
+is($W,"","No warnings should be produced");
+ok(!defined ${^MATCH}, "No /k in scope so ^MATCH is undef");
diff --git a/toke.c b/toke.c
index f9f0627121..dcbf3d6360 100644
--- a/toke.c
+++ b/toke.c
@@ -10763,20 +10763,16 @@ void
Perl_pmflag(pTHX_ U32* pmfl, int ch)
{
PERL_UNUSED_CONTEXT;
- if (ch == 'i')
- *pmfl |= PMf_FOLD;
- else if (ch == 'g')
- *pmfl |= PMf_GLOBAL;
- else if (ch == 'c')
- *pmfl |= PMf_CONTINUE;
- else if (ch == 'o')
- *pmfl |= PMf_KEEP;
- else if (ch == 'm')
- *pmfl |= PMf_MULTILINE;
- else if (ch == 's')
- *pmfl |= PMf_SINGLELINE;
- else if (ch == 'x')
- *pmfl |= PMf_EXTENDED;
+ if (ch<256) {
+ char c = (char)ch;
+ switch (c) {
+ CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
+ case 'g': *pmfl |= PMf_GLOBAL; break;
+ case 'c': *pmfl |= PMf_CONTINUE; break;
+ case 'o': *pmfl |= PMf_KEEP; break;
+ case 'k': *pmfl |= PMf_KEEPCOPY; break;
+ }
+ }
}
STATIC char *
@@ -10786,7 +10782,7 @@ S_scan_pat(pTHX_ char *start, I32 type)
PMOP *pm;
char *s = scan_str(start,!!PL_madskills,FALSE);
const char * const valid_flags =
- (const char *)((type == OP_QR) ? "iomsx" : "iogcmsx");
+ (const char *)((type == OP_QR) ? "iomsxk" : "iogcmsxk");
#ifdef PERL_MAD
char *modstart;
#endif
@@ -10887,7 +10883,7 @@ S_scan_subst(pTHX_ char *start)
s++;
es++;
}
- else if (strchr("iogcmsx", *s))
+ else if (strchr("iogcmsxk", *s))
pmflag(&pm->op_pmflags,*s++);
else
break;