summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--op.c18
-rw-r--r--regcomp.c90
-rw-r--r--t/re/overload.t19
-rw-r--r--t/re/pat_re_eval.t120
-rw-r--r--toke.c5
5 files changed, 232 insertions, 20 deletions
diff --git a/op.c b/op.c
index c502d3fe0d..a46d68b181 100644
--- a/op.c
+++ b/op.c
@@ -4545,11 +4545,21 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor)
LINKLIST(expr);
- /* fix up DO blocks; treat each one as a separate little sub */
+ /* fix up DO blocks; treat each one as a separate little sub;
+ * also, mark any arrays as LIST/REF */
if (expr->op_type == OP_LIST) {
OP *o;
for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
+
+ if (o->op_type == OP_PADAV || o->op_type == OP_RV2AV) {
+ assert( !(o->op_flags & OPf_WANT));
+ /* push the array rather than its contents. The regex
+ * engine will retrieve and join the elements later */
+ o->op_flags |= (OPf_WANT_LIST | OPf_REF);
+ continue;
+ }
+
if (!(o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)))
continue;
o->op_next = NULL; /* undo temporary hack from above */
@@ -4583,6 +4593,12 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor)
finalize_optree(o);
}
}
+ else if (expr->op_type == OP_PADAV || expr->op_type == OP_RV2AV) {
+ assert( !(expr->op_flags & OPf_WANT));
+ /* push the array rather than its contents. The regex
+ * engine will retrieve and join the elements later */
+ expr->op_flags |= (OPf_WANT_LIST | OPf_REF);
+ }
PL_hints |= HINT_BLOCK_SCOPE;
pm = (PMOP*)o;
diff --git a/regcomp.c b/regcomp.c
index 3ada1312ce..0840778459 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4932,31 +4932,89 @@ S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state,
/* S_concat_pat(): concatenate a list of args to the pattern string pat,
* while recording any code block indices, and handling overloading,
- * nested qr// objects etc.
- * Returns pat (or the first arg, if pat was null , i.e. there is only
- * one arg).
+ * nested qr// objects etc. If pat is null, it will allocate a new
+ * string, or just return the first arg, if there's only one.
+ *
+ * Returns the malloced/updated pat.
* patternp and pat_count is the array of SVs to be concatted;
* oplist is the optional list of ops that generated the SVs;
* recompile_p is a pointer to a boolean that will be set if
* the regex will need to be recompiled.
+ * delim, if non-null is an SV that will be inserted between each element
*/
static SV*
S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
SV *pat, SV ** const patternp, int pat_count,
- OP *oplist, bool *recompile_p)
+ OP *oplist, bool *recompile_p, SV *delim)
{
SV **svp;
int n = 0;
+ bool use_delim = FALSE;
+ bool alloced = FALSE;
- assert(!pat || pat_count > 1);
+ /* if we know we have at least two args, create an empty string,
+ * then concatenate args to that. For no args, return an empty string */
+ if (!pat && pat_count != 1) {
+ pat = newSVpvn("", 0);
+ SAVEFREESV(pat);
+ alloced = TRUE;
+ }
for (svp = patternp; svp < patternp + pat_count; svp++) {
SV *sv;
SV *rx = NULL;
STRLEN orig_patlen = 0;
bool code = 0;
- SV *msv = *svp;
+ SV *msv = use_delim ? delim : *svp;
+
+ /* if we've got a delimiter, we go round the loop twice for each
+ * svp slot (except the last), using the delimiter the second
+ * time round */
+ if (use_delim) {
+ svp--;
+ use_delim = FALSE;
+ }
+ else if (delim)
+ use_delim = TRUE;
+
+ if (SvTYPE(msv) == SVt_PVAV) {
+ /* we've encountered an interpolated array within
+ * the pattern, e.g. /...@a..../. Expand the list of elements,
+ * then recursively append elements.
+ * The code in this block is based on S_pushav() */
+
+ AV *const av = (AV*)msv;
+ const I32 maxarg = AvFILL(av) + 1;
+ SV **array;
+
+ if (oplist) {
+ assert(oplist->op_type == OP_PADAV
+ || oplist->op_type == OP_RV2AV);
+ oplist = oplist->op_sibling;;
+ }
+
+ if (SvRMAGICAL(av)) {
+ U32 i;
+
+ Newx(array, maxarg, SV*);
+ SAVEFREEPV(array);
+ for (i=0; i < (U32)maxarg; i++) {
+ SV ** const svp = av_fetch(av, i, FALSE);
+ array[i] = svp ? *svp : &PL_sv_undef;
+ }
+ }
+ else
+ array = AvARRAY(av);
+
+ pat = S_concat_pat(aTHX_ pRExC_state, pat,
+ array, maxarg, NULL, recompile_p,
+ /* $" */
+ GvSV((gv_fetchpvs("\"", GV_ADDMULTI, SVt_PV))));
+
+ continue;
+ }
+
/* we make the assumption here that each op in the list of
* op_siblings maps to one SV pushed onto the stack,
@@ -5024,6 +5082,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
}
if (SvROK(msv) && SvTYPE(SvRV(msv)) == SVt_REGEXP)
msv = SvRV(msv);
+
if (pat) {
/* this is a partially unrolled
* sv_catsv_nomg(pat, msv);
@@ -5043,6 +5102,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
}
else
pat = msv;
+
if (code)
pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1;
}
@@ -5084,6 +5144,10 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
}
}
}
+ /* avoid calling magic multiple times on a single element e.g. =~ $qr */
+ if (alloced)
+ SvSETMAGIC(pat);
+
return pat;
}
@@ -5419,7 +5483,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
I32 flags;
I32 minlen = 0;
U32 rx_flags;
- SV *pat = NULL;
+ SV *pat;
SV *code_blocksv = NULL;
SV** new_patternp = patternp;
@@ -5579,16 +5643,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
expr = expr->op_sibling;
}
- if (pat_count > 1) {
- pat = newSVpvn("", 0);
- SAVEFREESV(pat);
- }
-
- pat = S_concat_pat(aTHX_ pRExC_state, pat, new_patternp, pat_count,
- expr, &recompile);
-
- if (pat_count > 1)
- SvSETMAGIC(pat);
+ pat = S_concat_pat(aTHX_ pRExC_state, NULL, new_patternp, pat_count,
+ expr, &recompile, NULL);
/* handle bare (possibly after overloading) regex: foo =~ $re */
{
diff --git a/t/re/overload.t b/t/re/overload.t
index ec0ae3d06f..dc76663fee 100644
--- a/t/re/overload.t
+++ b/t/re/overload.t
@@ -202,4 +202,23 @@ no warnings 'syntax';
}
+{
+ # [perl #115004]
+ # array interpolation within patterns should handle qr overloading
+ # (like it does for scalar vars)
+
+ {
+ package P115004;
+ use overload 'qr' => sub { return qr/a/ };
+ }
+
+ my $o = bless [], 'P115004';
+ my @a = ($o);
+
+ ok("a" =~ /^$o$/, "qr overloading with scalar var interpolation");
+ ok("a" =~ /^@a$/, "qr overloading with array var interpolation");
+
+}
+
+
done_testing();
diff --git a/t/re/pat_re_eval.t b/t/re/pat_re_eval.t
index cef15a0fbd..e47aaf3484 100644
--- a/t/re/pat_re_eval.t
+++ b/t/re/pat_re_eval.t
@@ -23,7 +23,7 @@ BEGIN {
}
-plan tests => 464; # Update this when adding/deleting tests.
+plan tests => 519; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -1057,6 +1057,124 @@ sub run_tests {
pass("cSVOPo_sv");
}
+ # [perl #115004]
+ # code blocks in qr objects that are interpolated in arrays need
+ # handling the same as if they were interpolated from scalar vars
+ # (before this code would need 'use re "eval"')
+
+ {
+ use Tie::Array;
+
+ use vars '@global';
+ local @global;
+ my @array;
+ my @refs = (0, \@array, 2);
+ my @tied;
+ tie @tied, 'Tie::StdArray';
+ {
+ my $bb = 'B';
+ my $dd = 'D';
+ @array = ('A', qr/(??{$bb})/, 'C', qr/(??{$dd})/, 'E');
+ @tied = @array;
+ @global = @array;
+ }
+ my $bb = 'X';
+ my $dd = 'Y';
+ ok("A B C D E=" =~ /@array/, 'bare interpolated array match');
+ ok("A B C D E=" =~ qr/@array/, 'qr bare interpolated array match');
+ ok("A B C D E=" =~ /@global/, 'bare interpolated global array match');
+ ok("A B C D E=" =~ qr/@global/,
+ 'qr bare interpolated global array match');
+ ok("A B C D E=" =~ /@{$refs[1]}/, 'bare interpolated ref array match');
+ ok("A B C D E=" =~ qr/@{$refs[1]}/,
+ 'qr bare interpolated ref array match');
+ ok("A B C D E=" =~ /@tied/, 'bare interpolated tied array match');
+ ok("A B C D E=" =~ qr/@tied/, 'qr bare interpolated tied array match');
+ ok("aA B C D E=" =~ /^a@array=$/, 'interpolated array match');
+ ok("aA B C D E=" =~ qr/^a@array=$/, 'qr interpolated array match');
+ ok("aA B C D E=" =~ /^a@global=$/, 'interpolated global array match');
+ ok("aA B C D E=" =~ qr/^a@global=$/,
+ 'qr interpolated global array match');
+ ok("aA B C D E=" =~ /^a@{$refs[1]}=$/, 'interpolated ref array match');
+ ok("aA B C D E=" =~ qr/^a@{$refs[1]}=$/,
+ 'qr interpolated ref array match');
+ ok("aA B C D E=" =~ /^a@tied=$/, 'interpolated tied array match');
+ ok("aA B C D E=" =~ qr/^a@tied=$/, 'qr interpolated tied array match');
+
+ {
+ local $" = '-';
+ ok("aA-B-C-D-E=" =~ /^a@{array}=$/,
+ 'interpolated array match with local sep');
+ ok("aA-B-C-D-E=" =~ qr/^a@{array}=$/,
+ 'qr interpolated array match with local sep');
+ ok("aA-B-C-D-E=" =~ /^a@{global}=$/,
+ 'interpolated global array match with local sep');
+ ok("aA-B-C-D-E=" =~ qr/^a@{global}=$/,
+ 'qr interpolated global array match with local sep');
+ ok("aA-B-C-D-E=" =~ /^a@{tied}=$/,
+ 'interpolated tied array match with local sep');
+ ok("aA-B-C-D-E=" =~ qr/^a@{tied}=$/,
+ 'qr interpolated tied array match with local sep');
+ }
+
+ # but don't handle the array ourselves in the presence of \Q etc
+
+ @array = ('A', '(?{})');
+ @global = @array;
+ @tied = @array;
+ ok("aA (?{})=" =~ /^a\Q@{array}\E=$/,
+ 'interpolated array match with \Q');
+ ok("aA (?{})=" =~ qr/^a\Q@{array}\E=$/,
+ 'qr interpolated array match with \Q');
+ ok("aA (?{})=" =~ /^a\Q@{global}\E=$/,
+ 'interpolated global array match with \Q');
+ ok("aA (?{})=" =~ qr/^a\Q@{global}\E=$/,
+ 'qr interpolated global array match with \Q');
+ ok("aA (?{})=" =~ /^a\Q@{$refs[1]}\E=$/,
+ 'interpolated ref array match with \Q');
+ ok("aA (?{})=" =~ qr/^a\Q@{$refs[1]}\E=$/,
+ 'qr interpolated ref array match with \Q');
+ ok("aA (?{})=" =~ /^a\Q@{tied}\E=$/,
+ 'interpolated tied array match with \Q');
+ ok("aA (?{})=" =~ qr/^a\Q@{tied}\E=$/,
+ 'qr interpolated tied array match with \Q');
+
+ # and check it works with an empty array
+
+ @array = ();
+ @global = ();
+ @tied = ();
+ ok("a=" =~ /^a@array=$/, 'empty array match');
+ ok("a=" =~ qr/^a@array=$/, 'qr empty array match');
+ ok("a=" =~ /^a@global=$/, 'empty global array match');
+ ok("a=" =~ qr/^a@global=$/, 'qr empty global array match');
+ ok("a=" =~ /^a@tied=$/, 'empty tied array match');
+ ok("a=" =~ qr/^a@tied=$/, 'qr empty tied array match');
+ ok("a=" =~ /^a\Q@{array}\E=$/, 'empty array match with \Q');
+ ok("a=" =~ /^a\Q@{array}\E=$/, 'empty array match with \Q');
+ ok("a=" =~ qr/^a\Q@{global}\E=$/,
+ 'qr empty global array match with \Q');
+ ok("a=" =~ /^a\Q@{tied}\E=$/, 'empty tied array match with \Q');
+ ok("a=" =~ qr/^a\Q@{tied}\E=$/, 'qr empty tied array match with \Q');
+
+ # NB: these below are empty patterns, so they happen to use the
+ # successful match from the line above
+
+ ok("a=" =~ /@array/, 'empty array pattern');
+ ok("a=" =~ qr/@array/, 'qr empty array pattern');
+ ok("a=" =~ /@global/, 'empty global array pattern');
+ ok("a=" =~ qr/@global/, 'qr empty global array pattern');
+ ok("a=" =~ /@tied/, 'empty tied pattern');
+ ok("a=" =~ qr/@tied/, 'qr empty tied pattern');
+ ok("a=" =~ /\Q@array\E/, 'empty array pattern with \Q');
+ ok("a=" =~ qr/\Q@array\E/, 'qr empty array pattern with \Q');
+ ok("a=" =~ /\Q@global\E/, 'empty global array pattern with \Q');
+ ok("a=" =~ qr/\Q@global\E/, 'qr empty global array pattern with \Q');
+ ok("a=" =~ /\Q@tied\E/, 'empty tied pattern with \Q');
+ ok("a=" =~ qr/\Q@tied\E/, 'qr empty tied pattern with \Q');
+ ok("a=" =~ //, 'completely empty pattern');
+ ok("a=" =~ qr//, 'qr completely empty pattern');
+ }
} # End of sub run_tests
diff --git a/toke.c b/toke.c
index 43adb3e4b8..08e9c4dc9c 100644
--- a/toke.c
+++ b/toke.c
@@ -4845,7 +4845,10 @@ Perl_yylex(pTHX)
DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
"### Interpolated variable\n"); });
PL_expect = XTERM;
- PL_lex_dojoin = (*PL_bufptr == '@');
+ /* for /@a/, we leave the joining for the regex engine to do
+ * (unless we're within \Q etc) */
+ PL_lex_dojoin = (*PL_bufptr == '@'
+ && (!PL_lex_inpat || PL_lex_casemods));
PL_lex_state = LEX_INTERPNORMAL;
if (PL_lex_dojoin) {
start_force(PL_curforce);