diff options
-rw-r--r-- | op.c | 18 | ||||
-rw-r--r-- | regcomp.c | 90 | ||||
-rw-r--r-- | t/re/overload.t | 19 | ||||
-rw-r--r-- | t/re/pat_re_eval.t | 120 | ||||
-rw-r--r-- | toke.c | 5 |
5 files changed, 232 insertions, 20 deletions
@@ -4545,11 +4545,21 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor) LINKLIST(expr); - /* fix up DO blocks; treat each one as a separate little sub */ + /* fix up DO blocks; treat each one as a separate little sub; + * also, mark any arrays as LIST/REF */ if (expr->op_type == OP_LIST) { OP *o; for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) { + + if (o->op_type == OP_PADAV || o->op_type == OP_RV2AV) { + assert( !(o->op_flags & OPf_WANT)); + /* push the array rather than its contents. The regex + * engine will retrieve and join the elements later */ + o->op_flags |= (OPf_WANT_LIST | OPf_REF); + continue; + } + if (!(o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL))) continue; o->op_next = NULL; /* undo temporary hack from above */ @@ -4583,6 +4593,12 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor) finalize_optree(o); } } + else if (expr->op_type == OP_PADAV || expr->op_type == OP_RV2AV) { + assert( !(expr->op_flags & OPf_WANT)); + /* push the array rather than its contents. The regex + * engine will retrieve and join the elements later */ + expr->op_flags |= (OPf_WANT_LIST | OPf_REF); + } PL_hints |= HINT_BLOCK_SCOPE; pm = (PMOP*)o; @@ -4932,31 +4932,89 @@ S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state, /* S_concat_pat(): concatenate a list of args to the pattern string pat, * while recording any code block indices, and handling overloading, - * nested qr// objects etc. - * Returns pat (or the first arg, if pat was null , i.e. there is only - * one arg). + * nested qr// objects etc. If pat is null, it will allocate a new + * string, or just return the first arg, if there's only one. + * + * Returns the malloced/updated pat. * patternp and pat_count is the array of SVs to be concatted; * oplist is the optional list of ops that generated the SVs; * recompile_p is a pointer to a boolean that will be set if * the regex will need to be recompiled. + * delim, if non-null is an SV that will be inserted between each element */ static SV* S_concat_pat(pTHX_ RExC_state_t * const pRExC_state, SV *pat, SV ** const patternp, int pat_count, - OP *oplist, bool *recompile_p) + OP *oplist, bool *recompile_p, SV *delim) { SV **svp; int n = 0; + bool use_delim = FALSE; + bool alloced = FALSE; - assert(!pat || pat_count > 1); + /* if we know we have at least two args, create an empty string, + * then concatenate args to that. For no args, return an empty string */ + if (!pat && pat_count != 1) { + pat = newSVpvn("", 0); + SAVEFREESV(pat); + alloced = TRUE; + } for (svp = patternp; svp < patternp + pat_count; svp++) { SV *sv; SV *rx = NULL; STRLEN orig_patlen = 0; bool code = 0; - SV *msv = *svp; + SV *msv = use_delim ? delim : *svp; + + /* if we've got a delimiter, we go round the loop twice for each + * svp slot (except the last), using the delimiter the second + * time round */ + if (use_delim) { + svp--; + use_delim = FALSE; + } + else if (delim) + use_delim = TRUE; + + if (SvTYPE(msv) == SVt_PVAV) { + /* we've encountered an interpolated array within + * the pattern, e.g. /...@a..../. Expand the list of elements, + * then recursively append elements. + * The code in this block is based on S_pushav() */ + + AV *const av = (AV*)msv; + const I32 maxarg = AvFILL(av) + 1; + SV **array; + + if (oplist) { + assert(oplist->op_type == OP_PADAV + || oplist->op_type == OP_RV2AV); + oplist = oplist->op_sibling;; + } + + if (SvRMAGICAL(av)) { + U32 i; + + Newx(array, maxarg, SV*); + SAVEFREEPV(array); + for (i=0; i < (U32)maxarg; i++) { + SV ** const svp = av_fetch(av, i, FALSE); + array[i] = svp ? *svp : &PL_sv_undef; + } + } + else + array = AvARRAY(av); + + pat = S_concat_pat(aTHX_ pRExC_state, pat, + array, maxarg, NULL, recompile_p, + /* $" */ + GvSV((gv_fetchpvs("\"", GV_ADDMULTI, SVt_PV)))); + + continue; + } + /* we make the assumption here that each op in the list of * op_siblings maps to one SV pushed onto the stack, @@ -5024,6 +5082,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state, } if (SvROK(msv) && SvTYPE(SvRV(msv)) == SVt_REGEXP) msv = SvRV(msv); + if (pat) { /* this is a partially unrolled * sv_catsv_nomg(pat, msv); @@ -5043,6 +5102,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state, } else pat = msv; + if (code) pRExC_state->code_blocks[n-1].end = SvCUR(pat)-1; } @@ -5084,6 +5144,10 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state, } } } + /* avoid calling magic multiple times on a single element e.g. =~ $qr */ + if (alloced) + SvSETMAGIC(pat); + return pat; } @@ -5419,7 +5483,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, I32 flags; I32 minlen = 0; U32 rx_flags; - SV *pat = NULL; + SV *pat; SV *code_blocksv = NULL; SV** new_patternp = patternp; @@ -5579,16 +5643,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, expr = expr->op_sibling; } - if (pat_count > 1) { - pat = newSVpvn("", 0); - SAVEFREESV(pat); - } - - pat = S_concat_pat(aTHX_ pRExC_state, pat, new_patternp, pat_count, - expr, &recompile); - - if (pat_count > 1) - SvSETMAGIC(pat); + pat = S_concat_pat(aTHX_ pRExC_state, NULL, new_patternp, pat_count, + expr, &recompile, NULL); /* handle bare (possibly after overloading) regex: foo =~ $re */ { diff --git a/t/re/overload.t b/t/re/overload.t index ec0ae3d06f..dc76663fee 100644 --- a/t/re/overload.t +++ b/t/re/overload.t @@ -202,4 +202,23 @@ no warnings 'syntax'; } +{ + # [perl #115004] + # array interpolation within patterns should handle qr overloading + # (like it does for scalar vars) + + { + package P115004; + use overload 'qr' => sub { return qr/a/ }; + } + + my $o = bless [], 'P115004'; + my @a = ($o); + + ok("a" =~ /^$o$/, "qr overloading with scalar var interpolation"); + ok("a" =~ /^@a$/, "qr overloading with array var interpolation"); + +} + + done_testing(); diff --git a/t/re/pat_re_eval.t b/t/re/pat_re_eval.t index cef15a0fbd..e47aaf3484 100644 --- a/t/re/pat_re_eval.t +++ b/t/re/pat_re_eval.t @@ -23,7 +23,7 @@ BEGIN { } -plan tests => 464; # Update this when adding/deleting tests. +plan tests => 519; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1057,6 +1057,124 @@ sub run_tests { pass("cSVOPo_sv"); } + # [perl #115004] + # code blocks in qr objects that are interpolated in arrays need + # handling the same as if they were interpolated from scalar vars + # (before this code would need 'use re "eval"') + + { + use Tie::Array; + + use vars '@global'; + local @global; + my @array; + my @refs = (0, \@array, 2); + my @tied; + tie @tied, 'Tie::StdArray'; + { + my $bb = 'B'; + my $dd = 'D'; + @array = ('A', qr/(??{$bb})/, 'C', qr/(??{$dd})/, 'E'); + @tied = @array; + @global = @array; + } + my $bb = 'X'; + my $dd = 'Y'; + ok("A B C D E=" =~ /@array/, 'bare interpolated array match'); + ok("A B C D E=" =~ qr/@array/, 'qr bare interpolated array match'); + ok("A B C D E=" =~ /@global/, 'bare interpolated global array match'); + ok("A B C D E=" =~ qr/@global/, + 'qr bare interpolated global array match'); + ok("A B C D E=" =~ /@{$refs[1]}/, 'bare interpolated ref array match'); + ok("A B C D E=" =~ qr/@{$refs[1]}/, + 'qr bare interpolated ref array match'); + ok("A B C D E=" =~ /@tied/, 'bare interpolated tied array match'); + ok("A B C D E=" =~ qr/@tied/, 'qr bare interpolated tied array match'); + ok("aA B C D E=" =~ /^a@array=$/, 'interpolated array match'); + ok("aA B C D E=" =~ qr/^a@array=$/, 'qr interpolated array match'); + ok("aA B C D E=" =~ /^a@global=$/, 'interpolated global array match'); + ok("aA B C D E=" =~ qr/^a@global=$/, + 'qr interpolated global array match'); + ok("aA B C D E=" =~ /^a@{$refs[1]}=$/, 'interpolated ref array match'); + ok("aA B C D E=" =~ qr/^a@{$refs[1]}=$/, + 'qr interpolated ref array match'); + ok("aA B C D E=" =~ /^a@tied=$/, 'interpolated tied array match'); + ok("aA B C D E=" =~ qr/^a@tied=$/, 'qr interpolated tied array match'); + + { + local $" = '-'; + ok("aA-B-C-D-E=" =~ /^a@{array}=$/, + 'interpolated array match with local sep'); + ok("aA-B-C-D-E=" =~ qr/^a@{array}=$/, + 'qr interpolated array match with local sep'); + ok("aA-B-C-D-E=" =~ /^a@{global}=$/, + 'interpolated global array match with local sep'); + ok("aA-B-C-D-E=" =~ qr/^a@{global}=$/, + 'qr interpolated global array match with local sep'); + ok("aA-B-C-D-E=" =~ /^a@{tied}=$/, + 'interpolated tied array match with local sep'); + ok("aA-B-C-D-E=" =~ qr/^a@{tied}=$/, + 'qr interpolated tied array match with local sep'); + } + + # but don't handle the array ourselves in the presence of \Q etc + + @array = ('A', '(?{})'); + @global = @array; + @tied = @array; + ok("aA (?{})=" =~ /^a\Q@{array}\E=$/, + 'interpolated array match with \Q'); + ok("aA (?{})=" =~ qr/^a\Q@{array}\E=$/, + 'qr interpolated array match with \Q'); + ok("aA (?{})=" =~ /^a\Q@{global}\E=$/, + 'interpolated global array match with \Q'); + ok("aA (?{})=" =~ qr/^a\Q@{global}\E=$/, + 'qr interpolated global array match with \Q'); + ok("aA (?{})=" =~ /^a\Q@{$refs[1]}\E=$/, + 'interpolated ref array match with \Q'); + ok("aA (?{})=" =~ qr/^a\Q@{$refs[1]}\E=$/, + 'qr interpolated ref array match with \Q'); + ok("aA (?{})=" =~ /^a\Q@{tied}\E=$/, + 'interpolated tied array match with \Q'); + ok("aA (?{})=" =~ qr/^a\Q@{tied}\E=$/, + 'qr interpolated tied array match with \Q'); + + # and check it works with an empty array + + @array = (); + @global = (); + @tied = (); + ok("a=" =~ /^a@array=$/, 'empty array match'); + ok("a=" =~ qr/^a@array=$/, 'qr empty array match'); + ok("a=" =~ /^a@global=$/, 'empty global array match'); + ok("a=" =~ qr/^a@global=$/, 'qr empty global array match'); + ok("a=" =~ /^a@tied=$/, 'empty tied array match'); + ok("a=" =~ qr/^a@tied=$/, 'qr empty tied array match'); + ok("a=" =~ /^a\Q@{array}\E=$/, 'empty array match with \Q'); + ok("a=" =~ /^a\Q@{array}\E=$/, 'empty array match with \Q'); + ok("a=" =~ qr/^a\Q@{global}\E=$/, + 'qr empty global array match with \Q'); + ok("a=" =~ /^a\Q@{tied}\E=$/, 'empty tied array match with \Q'); + ok("a=" =~ qr/^a\Q@{tied}\E=$/, 'qr empty tied array match with \Q'); + + # NB: these below are empty patterns, so they happen to use the + # successful match from the line above + + ok("a=" =~ /@array/, 'empty array pattern'); + ok("a=" =~ qr/@array/, 'qr empty array pattern'); + ok("a=" =~ /@global/, 'empty global array pattern'); + ok("a=" =~ qr/@global/, 'qr empty global array pattern'); + ok("a=" =~ /@tied/, 'empty tied pattern'); + ok("a=" =~ qr/@tied/, 'qr empty tied pattern'); + ok("a=" =~ /\Q@array\E/, 'empty array pattern with \Q'); + ok("a=" =~ qr/\Q@array\E/, 'qr empty array pattern with \Q'); + ok("a=" =~ /\Q@global\E/, 'empty global array pattern with \Q'); + ok("a=" =~ qr/\Q@global\E/, 'qr empty global array pattern with \Q'); + ok("a=" =~ /\Q@tied\E/, 'empty tied pattern with \Q'); + ok("a=" =~ qr/\Q@tied\E/, 'qr empty tied pattern with \Q'); + ok("a=" =~ //, 'completely empty pattern'); + ok("a=" =~ qr//, 'qr completely empty pattern'); + } } # End of sub run_tests @@ -4845,7 +4845,10 @@ Perl_yylex(pTHX) DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log, "### Interpolated variable\n"); }); PL_expect = XTERM; - PL_lex_dojoin = (*PL_bufptr == '@'); + /* for /@a/, we leave the joining for the regex engine to do + * (unless we're within \Q etc) */ + PL_lex_dojoin = (*PL_bufptr == '@' + && (!PL_lex_inpat || PL_lex_casemods)); PL_lex_state = LEX_INTERPNORMAL; if (PL_lex_dojoin) { start_force(PL_curforce); |