diff options
author | David Mitchell <davem@iabyn.com> | 2018-02-19 22:20:40 +0000 |
---|---|---|
committer | David Mitchell <davem@iabyn.com> | 2018-02-19 22:20:40 +0000 |
commit | f7675015b1fdb2eee619d34c20a279db07dcf844 (patch) | |
tree | 4be98d26838e5f349e32dcb885052f6400547372 | |
parent | 6d37ab4efc1fb099593fb29406193ae79c3be543 (diff) | |
parent | 8327fe931a244c33965f30d2ba4bbe7248016951 (diff) | |
download | perl-f7675015b1fdb2eee619d34c20a279db07dcf844.tar.gz |
[MERGE] fixups for OP_MULTICONCAT
This branch fixes three main issues caused by the OP_MULTICONCAT
optimisation:
* When dealing with magic/tie/overloading, there were many edge cases;
the way these are handled has been completely rewritten, fixing
bugs related to exactly when and how args are sringified and/or tested
for undefinedness;
* relatedly, on a stringified concat such as $x .= "$y", or
$a = "$b$c$d", the stringification is honoured under exactly the same
(buggy) set of conditions that pertained before multiconcat was
introduced; this is noticable in overloading whether the result calls the
stringify method and thus returns a plain string, or whether it skips that
and returns an overloaded object, e.g. for $a = "$b$c$d", does $a end up
as a ref or a string. Before the branch it was always a ref, now its
sometimes a string.
* concats within a runtime pattern code block could crash, e.g.
/$a(?{ $b . "c" })/
-rw-r--r-- | lib/overload.t | 129 | ||||
-rw-r--r-- | op.c | 20 | ||||
-rw-r--r-- | pp_hot.c | 627 | ||||
-rw-r--r-- | regen/op_private | 2 | ||||
-rw-r--r-- | t/opbasic/concat.t | 15 | ||||
-rw-r--r-- | t/re/pat_re_eval.t | 25 |
6 files changed, 474 insertions, 344 deletions
diff --git a/lib/overload.t b/lib/overload.t index 2afa6cf437..055daab30f 100644 --- a/lib/overload.t +++ b/lib/overload.t @@ -48,7 +48,7 @@ package main; $| = 1; BEGIN { require './test.pl'; require './charset_tools.pl' } -plan tests => 5338; +plan tests => 5362; use Scalar::Util qw(tainted); @@ -3047,3 +3047,130 @@ package RT132385 { # ditto with a mutator ::is($o .= $r1, "obj-ref1", "RT #132385 o.=r1"); } + +# the RHS of an overloaded .= should be passed as-is to the overload +# method, rather than being stringified or otherwise being processed in +# such a way that it triggers an undef warning +package RT132783 { + use warnings; + use overload '.=' => sub { return "foo" }; + my $w = 0; + local $SIG{__WARN__} = sub { $w++ }; + my $undef; + my $ov = bless []; + $ov .= $undef; + ::is($w, 0, "RT #132783 - should be no warnings"); +} + +# changing the overloaded object to a plain string within an overload +# method should be permanent. +package RT132827 { + use overload '""' => sub { $_[0] = "a" }; + my $ov = bless []; + my $b = $ov . "b"; + ::is(ref \$ov, "SCALAR", "RT #132827"); +} + +# RT #132793 +# An arg like like "$b" in $overloaded .= "$b" should be stringified +# before being passed to the method + +package RT132793 { + my $type; + my $str = 0; + use overload + '.=' => sub { $type = ref(\$_[1]); "foo"; }, + '""' => sub { $str++; "bar" }; + + my $a = bless {}; + my $b = bless {}; + $a .= "$b"; + ::is($type, "SCALAR", "RT #132793 type"); + ::is($str, 1, "RT #132793 stringify count"); +} + +# RT #132801 +# A second RHS-not-stringified bug + +package RT132801 { + my $type; + my $str = 0; + my $concat = 0; + use overload + '.' => sub { $concat++; bless []; }, + '""' => sub { $str++; "bar" }; + + my $a = "A"; + my $b = bless []; + my $c; + $c = "$a-$b"; + ::is($concat, 1, "RT #132801 concat count"); + ::is($str, 1, "RT #132801 stringify count"); +} + +# General testing of optimising away OP_STRINGIFY, and whether +# OP_MULTICONCAT emulates existing behaviour. +# +# It could well be argued that the existing behaviour is buggy, but +# for now emulate the old behaviour. +# +# In more detail: +# +# Since 5.000, any OP_STRINGIFY immediately following an OP_CONCAT +# is optimised away, on the assumption that since concat will always +# return a valid string anyway, it doesn't need stringifying. +# So in "$x", the stringify is needed, but on "$x$y" it isn't. +# This assumption is flawed once overloading has been introduced, since +# concat might return an overloaded object which still needs stringifying. +# However, this flawed behaviour is apparently needed by at least one +# module, and is tested for in opbasic/concat.t: see RT #124160. +# +# There is also a wart with the OPpTARGET_MY optimisation: specifically, +# in $lex = "...", if $lex is a lexical var, then a chain of 2 or more +# concats *doesn't* optimise away OP_STRINGIFY: +# +# $lex = "$x"; # stringifies +# $lex = "$x$y"; # doesn't stringify +# $lex = "$x$y$z..."; # stringifies + +package Stringify { + my $count; + use overload + '.' => sub { + my ($a, $b, $rev) = @_; + bless [ $rev ? "$b" . $a->[0] : $a->[0] . "$b" ]; + }, + '""' => sub { $count++; $_[0][0] }, + ; + + for my $test( + [ 1, '$pkg = "$ov"' ], + [ 1, '$lex = "$ov"' ], + [ 1, 'my $a = "$ov"' ], + [ 1, '$pkg .= "$ov"' ], + [ 1, '$lex .= "$ov"' ], + [ 1, 'my $a .= "$ov"' ], + + [ 0, '$pkg = "$ov$x"' ], + [ 0, '$lex = "$ov$x"' ], + [ 0, 'my $a = "$ov$x"' ], + [ 0, '$pkg .= "$ov$x"' ], + [ 0, '$lex .= "$ov$x"' ], + [ 0, 'my $a .= "$ov$x"' ], + + [ 0, '$pkg = "$ov$x$y"' ], + [ 1, '$lex = "$ov$x$y"' ], # XXX note the anomaly + [ 0, 'my $a = "$ov$x$y"' ], + [ 0, '$pkg .= "$ov$x$y"' ], + [ 0, '$lex .= "$ov$x$y"' ], + [ 0, 'my $a .= "$ov$x$y"' ], + ) + { + my ($stringify, $code) = @$test; + our $pkg = 'P'; + my ($ov, $x, $y, $lex) = (bless(['OV']), qw(X Y L)); + $count = 0; + eval "$code; 1" or die $@; + ::is $count, $stringify, $code; + } +} @@ -3204,16 +3204,16 @@ S_maybe_multiconcat(pTHX_ OP *o) OP *prev; /* set prev to the sibling *before* the arg to be cut out, - * e.g.: + * e.g. when cutting EXPR: * * | - * kid= CONST + * kid= CONCAT * | - * prev= CONST -- EXPR + * prev= CONCAT -- EXPR * | */ if (argp == args && kid->op_type != OP_CONCAT) { - /* in e.g. '$x . = f(1)' there's no RHS concat tree + /* in e.g. '$x .= f(1)' there's no RHS concat tree * so the expression to be cut isn't kid->op_last but * kid itself */ OP *o1, *o2; @@ -6952,9 +6952,15 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, OP *repl, UV flags, I32 floor) op_null(scope); } - if (is_compiletime) - /* runtime finalizes as part of finalizing whole tree */ - optimize_optree(o); + /* XXX optimize_optree() must be called on o before + * CALL_PEEP(), as currently S_maybe_multiconcat() can't + * currently cope with a peephole-optimised optree. + * Calling optimize_optree() here ensures that condition + * is met, but may mean optimize_optree() is applied + * to the same optree later (where hopefully it won't do any + * harm as it can't convert an op to multiconcat if it's + * already been converted */ + optimize_optree(o); /* have to peep the DOs individually as we've removed it from * the op_next chain */ @@ -253,11 +253,17 @@ PP(pp_unstack) return NORMAL; } -PP(pp_concat) + +/* The main body of pp_concat, not including the magic/overload and + * stack handling. + * It does targ = left . right. + * Moved into a separate function so that pp_multiconcat() can use it + * too. + */ + +PERL_STATIC_INLINE void +S_do_concat(pTHX_ SV *left, SV *right, SV *targ, U8 targmy) { - dSP; dATARGET; tryAMAGICbin_MG(concat_amg, AMGf_assign); - { - dPOPTOPssrl; bool lbyte; STRLEN rlen; const char *rpv = NULL; @@ -285,7 +291,7 @@ PP(pp_concat) else { /* $l .= $r and left == TARG */ if (!SvOK(left)) { if ((left == right /* $l .= $l */ - || (PL_op->op_private & OPpTARGET_MY)) /* $l = $l . $r */ + || targmy) /* $l = $l . $r */ && ckWARN(WARN_UNINITIALIZED) ) report_uninit(left); @@ -314,8 +320,17 @@ PP(pp_concat) } } sv_catpvn_nomg(TARG, rpv, rlen); + SvSETMAGIC(TARG); +} - SETTARG; + +PP(pp_concat) +{ + dSP; dATARGET; tryAMAGICbin_MG(concat_amg, AMGf_assign); + { + dPOPTOPssrl; + S_do_concat(aTHX_ left, right, targ, PL_op->op_private & OPpTARGET_MY); + SETs(TARG); RETURN; } } @@ -346,8 +361,8 @@ In addition: sprintf "...%s...". Don't call '.' overloading: only use '""' overloading. - OPpMULTICONCAT_STRINGIFY: (for Deparse's benefit) the RHS was of the - form "...$a...$b..." rather than + OPpMULTICONCAT_STRINGIFY: the RHS was of the form + "...$a...$b..." rather than "..." . $a . "..." . $b . "..." An OP_MULTICONCAT is of type UNOP_AUX. The fixed slots of the aux array are @@ -384,8 +399,7 @@ PP(pp_multiconcat) { dSP; SV *targ; /* The SV to be assigned or appended to */ - SV *dsv; /* the SV to concat args to (often == targ) */ - char *dsv_pv; /* where within SvPVX(dsv) we're writing to */ + char *targ_pv; /* where within SvPVX(targ) we're writing to */ STRLEN targ_len; /* SvCUR(targ) */ SV **toparg; /* the highest arg position on the stack */ UNOP_AUX_item *aux; /* PL_op->op_aux buffer */ @@ -393,7 +407,7 @@ PP(pp_multiconcat) const char *const_pv; /* the current segment of the const string buf */ SSize_t nargs; /* how many args were expected */ SSize_t stack_adj; /* how much to adjust SP on return */ - STRLEN grow; /* final size of destination string (dsv) */ + STRLEN grow; /* final size of destination string (targ) */ UV targ_count; /* how many times targ has appeared on the RHS */ bool is_append; /* OPpMULTICONCAT_APPEND flag is set */ bool slow_concat; /* args too complex for quick concat */ @@ -441,10 +455,6 @@ PP(pp_multiconcat) toparg = SP; SP -= (nargs - 1); - dsv = targ; /* Set the destination for all concats. This is - initially targ; later on, dsv may be switched - to point to a TEMP SV if overloading is - encountered. */ grow = 1; /* allow for '\0' at minimum */ targ_count = 0; targ_chain = NULL; @@ -465,13 +475,13 @@ PP(pp_multiconcat) * Where an arg is actually targ, the stringification is deferred: * the length is set to 0, and the slot is added to targ_chain. * - * If an overloaded arg is found, the loop is abandoned at that point, - * and dsv is set to an SvTEMP SV where the results-so-far will be - * accumulated. + * If a magic, overloaded, or otherwise weird arg is found, which + * might have side effects when stringified, the loop is abandoned and + * we goto a code block where a more basic 'emulate calling + * pp_cpncat() on each arg in turn' is done. */ for (; SP <= toparg; SP++, svpv_end++) { - bool simple_flags; U32 utf8; STRLEN len; SV *sv; @@ -479,161 +489,54 @@ PP(pp_multiconcat) assert(svpv_end - svpv_buf < PERL_MULTICONCAT_MAXARG); sv = *SP; - simple_flags = (SvFLAGS(sv) & (SVs_GMG|SVf_ROK|SVf_POK)) == SVf_POK; /* this if/else chain is arranged so that common/simple cases * take few conditionals */ - if (LIKELY(simple_flags && (sv != targ))) { - /* common case: sv is a simple PV and not the targ */ - svpv_end->pv = SvPVX(sv); + if (LIKELY((SvFLAGS(sv) & (SVs_GMG|SVf_ROK|SVf_POK)) == SVf_POK)) { + /* common case: sv is a simple non-magical PV */ + if (targ == sv) { + /* targ appears on RHS. + * Delay storing PV pointer; instead, add slot to targ_chain + * so it can be populated later, after targ has been grown and + * we know its final SvPVX() address. + */ + targ_on_rhs: + svpv_end->len = 0; /* zerojng here means we can skip + updating later if targ_len == 0 */ + svpv_end->pv = (char*)targ_chain; + targ_chain = svpv_end; + targ_count++; + continue; + } + len = SvCUR(sv); + svpv_end->pv = SvPVX(sv); } - else if (simple_flags) { - /* sv is targ (but can't be magic or overloaded). - * Delay storing PV pointer; instead, add slot to targ_chain - * so it can be populated later, after targ has been grown and - * we know its final SvPVX() address. + else if (UNLIKELY(SvFLAGS(sv) & (SVs_GMG|SVf_ROK))) + /* may have side effects: tie, overload etc. + * Abandon 'stringify everything first' and handle + * args in strict order. Note that already-stringified args + * will be reprocessed, which is safe because the each first + * stringification would have been idempotent. */ - targ_on_rhs: - svpv_end->len = 0; /* zerojng here means we can skip - updating later if targ_len == 0 */ - svpv_end->pv = (char*)targ_chain; - targ_chain = svpv_end; - targ_count++; - continue; - } - else { - if (UNLIKELY(SvFLAGS(sv) & (SVs_GMG|SVf_ROK))) { - /* its got magic, is tied, and/or is overloaded */ - SvGETMAGIC(sv); - - if (UNLIKELY(SvAMAGIC(sv)) - && !(PL_op->op_private & OPpMULTICONCAT_FAKE)) - { - /* One of the RHS args is overloaded. Abandon stringifying - * the args at this point, then in the concat loop later - * on, concat the plain args stringified so far into a - * TEMP SV. At the end of this function the remaining - * args (including the current one) will be handled - * specially, using overload calls. - * FAKE implies an optimised sprintf which doesn't use - * concat overloading, only "" overloading. - */ - - if ( svpv_end == svpv_buf + 1 - /* no const string segments */ - && aux[PERL_MULTICONCAT_IX_LENGTHS].ssize == -1 - && aux[PERL_MULTICONCAT_IX_LENGTHS + 1].ssize == -1 - ) { - /* special case: if the overloaded sv is the - * second arg in the concat chain, stop at the - * first arg rather than this, so that - * - * $arg1 . $arg2 - * - * invokes overloading as - * - * concat($arg2, $arg1, 1) - * - * rather than - * - * concat($arg2, "$arg1", 1) - * - * This means that if for example arg1 is a ref, - * it gets passed as-is to the concat method - * rather than a stringified copy. If it's not the - * first arg, it doesn't matter, as in $arg0 . - * $arg1 . $arg2, where the result of ($arg0 . - * $arg1) will already be a string. - * THis isn't perfect: we'll have already - * done SvPV($arg1) on the previous iteration; - * and are now throwing away that result and - * hoping arg1 hasn;t been affected. - */ - svpv_end--; - SP--; - } - - setup_overload: - dsv = newSVpvn_flags("", 0, SVs_TEMP); - - if (targ_chain) { - /* Get the string value of targ and populate any - * RHS slots which use it */ - char *pv = SvPV_nomg(targ, len); - dst_utf8 |= (SvFLAGS(targ) & SVf_UTF8); - grow += len * targ_count; - do { - struct multiconcat_svpv *p = targ_chain; - targ_chain = (struct multiconcat_svpv *)(p->pv); - p->pv = pv; - p->len = len; - } while (targ_chain); - } - else if (is_append) - SvGETMAGIC(targ); - - goto phase3; - } - - if (SvFLAGS(sv) & SVs_RMG) { - /* probably tied; copy it to guarantee separate values - * each time it's used, e.g. "-$tied-$tied-$tied-", - * since FETCH() isn't necessarily idempotent */ - SV *nsv = newSV(0); - sv_setsv_flags(nsv, sv, SV_NOSTEAL); - sv_2mortal(nsv); - if ( sv == targ - && is_append - && nargs == 1 - /* no const string segments */ - && aux[PERL_MULTICONCAT_IX_LENGTHS].ssize == -1 - && aux[PERL_MULTICONCAT_IX_LENGTHS+1].ssize == -1) - { - /* special-case $tied .= $tied. - * - * For something like - * sub FETCH { $i++ } - * then - * $tied .= $tied . $tied . $tied; - * will STORE "4123" - * while - * $tied .= $tied - * will STORE "12" - * - * i.e. for a single mutator concat, the LHS is - * retrieved first; in all other cases it is - * retrieved last. Whether this is sane behaviour - * is open to debate; but for now, multiconcat (as - * it is an optimisation) tries to reproduce - * existing behaviour. - */ - sv_catsv(nsv, sv); - sv_setsv(sv,nsv); - SP++; - goto phase7; /* just return targ as-is */ - } - - sv = nsv; - } - } - - if (sv == targ) { - /* must warn for each RH usage of targ, except that - * we will later get one warning when doing - * SvPV_force(targ), *except* on '.=' */ - if ( !SvOK(sv) - && (targ_chain || is_append) - && ckWARN(WARN_UNINITIALIZED) - ) - report_uninit(sv); - goto targ_on_rhs; - } - - /* stringify general SV */ + goto do_magical; + else if (SvNIOK(sv)) { + if (targ == sv) + goto targ_on_rhs; + /* stringify general valid scalar */ svpv_end->pv = sv_2pv_flags(sv, &len, 0); } + else if (!SvOK(sv)) { + if (ckWARN(WARN_UNINITIALIZED)) + /* an undef value in the presence of warnings may trigger + * side affects */ + goto do_magical; + svpv_end->pv = (char*)""; + len = 0; + } + else + goto do_magical; /* something weird */ utf8 = (SvFLAGS(sv) & SVf_UTF8); dst_utf8 |= utf8; @@ -652,31 +555,9 @@ PP(pp_multiconcat) */ if (is_append) { - if (UNLIKELY(SvFLAGS(targ) & (SVs_GMG|SVf_ROK))) { - SvGETMAGIC(targ); /* must do before SvAMAGIC() check */ - if (UNLIKELY(SvAMAGIC(targ))) { - /* $overloaded .= ....; - * accumulate RHS in a temp SV rather than targ, - * then append tmp to targ at the end using overload - */ - assert(!targ_chain); - dsv = newSVpvn_flags("", 0, SVs_TEMP); - - if ( svpv_end == svpv_buf + 1 - /* no const string segments */ - && aux[PERL_MULTICONCAT_IX_LENGTHS].ssize == -1 - ) { - /* special case $overloaded .= $arg1: - * avoid stringifying $arg1. - * Similar to the $arg1 . $arg2 case in phase1 - */ - svpv_end--; - SP--; - } - - goto phase3; - } - } + /* abandon quick route if using targ might have side effects */ + if (UNLIKELY(SvFLAGS(targ) & (SVs_GMG|SVf_ROK))) + goto do_magical; if (SvOK(targ)) { U32 targ_utf8; @@ -694,6 +575,10 @@ PP(pp_multiconcat) grow += targ_len * (targ_count + is_append); goto phase3; } + else if (ckWARN(WARN_UNINITIALIZED)) + /* warning might have side effects */ + goto do_magical; + /* the undef targ will be silently SvPVCLEAR()ed below */ } else if (UNLIKELY(SvTYPE(targ) >= SVt_REGEXP)) { /* Assigning to some weird LHS type. Don't force the LHS to be an @@ -705,28 +590,19 @@ PP(pp_multiconcat) * (which makes the 'F' typeglob an alias to the * '*main::F*main::F' typeglob). */ - goto setup_overload; + goto do_magical; } - else if (targ_chain) { + else if (targ_chain) /* targ was found on RHS. - * We don't need the SvGETMAGIC() call and SvAMAGIC() test as - * both were already done earlier in the SvPV() loop; other - * than that we can share the same code with the append - * branch below. - * Note that this goto jumps directly into the SvOK() branch - * even if targ isn't SvOK(), to force an 'uninitialised' - * warning; e.g. - * $undef .= .... targ only on LHS: don't warn - * $undef .= $undef .... targ on RHS too: warn + * Force stringify it, using the same code as the append branch + * above, except that we don't need the magic/overload/undef + * checks as these will already have been done in the phase 1 + * loop. */ - assert(!SvAMAGIC(targ)); goto stringify_targ; - } - /* unrolled SvPVCLEAR() - mostly: no need to grow or set SvCUR() to 0; * those will be done later. */ - assert(targ == dsv); SV_CHECK_THINKFIRST_COW_DROP(targ); SvUPGRADE(targ, SVt_PV); SvFLAGS(targ) &= ~(SVf_OK|SVf_IVisUV|SVf_UTF8); @@ -737,10 +613,10 @@ PP(pp_multiconcat) /* -------------------------------------------------------------- * Phase 3: * - * UTF-8 tweaks and grow dsv: + * UTF-8 tweaks and grow targ: * * Now that we know the length and utf8-ness of both the targ and - * args, grow dsv to the size needed to accumulate all the args, based + * args, grow targ to the size needed to accumulate all the args, based * on whether targ appears on the RHS, whether we're appending, and * whether any non-utf8 args expand in size if converted to utf8. * @@ -779,7 +655,7 @@ PP(pp_multiconcat) /* turn off utf8 handling if 'use bytes' is in scope */ if (UNLIKELY(dst_utf8 && IN_BYTES)) { dst_utf8 = 0; - SvUTF8_off(dsv); + SvUTF8_off(targ); /* undo all the negative lengths which flag utf8-ness */ for (svpv_p = svpv_buf; svpv_p < svpv_end; svpv_p++) { SSize_t len = svpv_p->len; @@ -837,16 +713,16 @@ PP(pp_multiconcat) /* unrolled SvGROW(), except don't check for SVf_IsCOW, which should * already have been dropped */ - assert(!SvIsCOW(dsv)); - dsv_pv = (SvLEN(dsv) < (grow) ? sv_grow(dsv,grow) : SvPVX(dsv)); + assert(!SvIsCOW(targ)); + targ_pv = (SvLEN(targ) < (grow) ? sv_grow(targ,grow) : SvPVX(targ)); /* -------------------------------------------------------------- * Phase 4: * - * Now that dsv (which is probably targ) has been grown, we know the - * final address of the targ PVX, if needed. Preserve / move targ - * contents if appending or if targ appears on RHS. + * Now that targ has been grown, we know the final address of the targ + * PVX, if needed. Preserve / move targ contents if appending or if + * targ appears on RHS. * * Also update svpv_buf slots in targ_chain. * @@ -869,7 +745,7 @@ PP(pp_multiconcat) * On exit, the targ contents will have been moved to the * earliest place they are needed (e.g. $x = "abc$x" will shift them * 3 bytes, while $x .= ... will leave them at the beginning); - * and dst_pv will point to the location within SvPVX(dsv) where the + * and dst_pv will point to the location within SvPVX(targ) where the * next arg should be copied. */ @@ -877,13 +753,12 @@ PP(pp_multiconcat) if (targ_len) { struct multiconcat_svpv *tc_stop; - char *targ_pv = dsv_pv; + char *targ_buf = targ_pv; /* ptr to original targ string */ - assert(targ == dsv); assert(is_append || targ_count); if (is_append) { - dsv_pv += targ_len; + targ_pv += targ_len; tc_stop = NULL; } else { @@ -924,8 +799,8 @@ PP(pp_multiconcat) } if (offset) { - targ_pv += offset; - Move(dsv_pv, targ_pv, targ_len, char); + targ_buf += offset; + Move(targ_pv, targ_buf, targ_len, char); /* a negative length implies don't Copy(), but do increment */ svpv_p->len = -((SSize_t)targ_len); slow_concat = TRUE; @@ -934,7 +809,7 @@ PP(pp_multiconcat) /* skip the first targ copy */ svpv_base++; const_lens++; - dsv_pv += targ_len; + targ_pv += targ_len; } /* Don't populate the first targ slot in the loop below; it's @@ -948,7 +823,7 @@ PP(pp_multiconcat) while (targ_chain != tc_stop) { struct multiconcat_svpv *p = targ_chain; targ_chain = (struct multiconcat_svpv *)(p->pv); - p->pv = targ_pv; + p->pv = targ_buf; p->len = (SSize_t)targ_len; } } @@ -957,7 +832,7 @@ PP(pp_multiconcat) /* -------------------------------------------------------------- * Phase 5: * - * Append all the args in svpv_buf, plus the const strings, to dsv. + * Append all the args in svpv_buf, plus the const strings, to targ. * * On entry to this section the (pv,len) pairs in svpv_buf have the * following meanings: @@ -965,7 +840,7 @@ PP(pp_multiconcat) * (pv, -(len+extra)) a plain string which will expand by 'extra' * bytes when converted to utf8 * (0, -len) left-most targ, whose content has already - * been copied. Just advance dsv_pv by len. + * been copied. Just advance targ_pv by len. */ /* If there are no constant strings and no special case args @@ -976,8 +851,8 @@ PP(pp_multiconcat) SSize_t len = svpv_p->len; if (!len) continue; - Copy(svpv_p->pv, dsv_pv, len, char); - dsv_pv += len; + Copy(svpv_p->pv, targ_pv, len, char); + targ_pv += len; } const_lens += (svpv_end - svpv_base + 1); } @@ -992,8 +867,8 @@ PP(pp_multiconcat) /* append next const string segment */ if (len > 0) { - Copy(const_pv, dsv_pv, len, char); - dsv_pv += len; + Copy(const_pv, targ_pv, len, char); + targ_pv += len; const_pv += len; } @@ -1004,8 +879,8 @@ PP(pp_multiconcat) len = svpv_p->len; if (LIKELY(len > 0)) { - Copy(svpv_p->pv, dsv_pv, len, char); - dsv_pv += len; + Copy(svpv_p->pv, targ_pv, len, char); + targ_pv += len; } else if (UNLIKELY(len < 0)) { /* negative length indicates two special cases */ @@ -1013,141 +888,227 @@ PP(pp_multiconcat) len = -len; if (UNLIKELY(p)) { /* copy plain-but-variant pv to a utf8 targ */ - char * end_pv = dsv_pv + len; + char * end_pv = targ_pv + len; assert(dst_utf8); - while (dsv_pv < end_pv) { + while (targ_pv < end_pv) { U8 c = (U8) *p++; - append_utf8_from_native_byte(c, (U8**)&dsv_pv); + append_utf8_from_native_byte(c, (U8**)&targ_pv); } } else /* arg is already-copied targ */ - dsv_pv += len; + targ_pv += len; } } } - *dsv_pv = '\0'; - SvCUR_set(dsv, dsv_pv - SvPVX(dsv)); - assert(grow >= SvCUR(dsv) + 1); - assert(SvLEN(dsv) >= SvCUR(dsv) + 1); + *targ_pv = '\0'; + SvCUR_set(targ, targ_pv - SvPVX(targ)); + assert(grow >= SvCUR(targ) + 1); + assert(SvLEN(targ) >= SvCUR(targ) + 1); /* -------------------------------------------------------------- * Phase 6: * - * Handle overloading. If an overloaded arg or targ was detected - * earlier, dsv will have been set to a new mortal, and any args and - * consts to the left of the first overloaded arg will have been - * accumulated to it. This section completes any further concatenation - * steps with overloading handled. + * return result */ - if (UNLIKELY(dsv != targ)) { - SV *res; + SP -= stack_adj; + SvTAINT(targ); + SETTARG; + RETURN; - SvFLAGS(dsv) |= dst_utf8; + /* -------------------------------------------------------------- + * Phase 7: + * + * We only get here if any of the args (or targ too in the case of + * append) have something which might cause side effects, such + * as magic, overload, or an undef value in the presence of warnings. + * In that case, any earlier attempt to stringify the args will have + * been abandoned, and we come here instead. + * + * Here, we concat each arg in turn the old-fashioned way: essentially + * emulating pp_concat() in a loop. This means that all the weird edge + * cases will be handled correctly, if not necessarily speedily. + * + * Note that some args may already have been stringified - those are + * processed again, which is safe, since only args without side-effects + * were stringified earlier. + */ + + do_magical: + { + SSize_t i, n; + SV *left = NULL; + SV *right; + SV* nexttarg; + bool nextappend; + U32 utf8 = 0; + SV **svp; + const char *cpv = aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv; + UNOP_AUX_item *lens = aux + PERL_MULTICONCAT_IX_LENGTHS; + Size_t arg_count = 0; /* how many args have been processed */ + + if (!cpv) { + cpv = aux[PERL_MULTICONCAT_IX_UTF8_PV].pv; + utf8 = SVf_UTF8; + } - if (SP <= toparg) { - /* Stringifying the RHS was abandoned because *SP - * is overloaded. dsv contains all the concatted strings - * before *SP. Apply the rest of the args using overloading. + svp = toparg - nargs + 1; + + /* iterate for: + * nargs arguments, + * plus possible nargs+1 consts, + * plus, if appending, a final targ in an extra last iteration + */ + + n = nargs *2 + 1; + for (i = 0; i <= n; i++) { + SSize_t len; + + /* if necessary, stringify the final RHS result in + * something like $targ .= "$a$b$c" - simulating + * pp_stringify */ - SV *left, *right, *res; - int i; - bool getmg = FALSE; - /* number of args already concatted */ - SSize_t n = (nargs - 1) - (toparg - SP); - /* current arg is either the first - * or second value to be concatted - * (including constant strings), so would - * form part of the first concat */ - bool first_concat = ( n == 0 - || (n == 1 && const_lens[-2].ssize < 0 - && const_lens[-1].ssize < 0)); - int f_assign = first_concat ? 0 : AMGf_assign; - - left = dsv; - - for (; n < nargs; n++) { - /* loop twice, first applying the arg, then the const segment */ - for (i = 0; i < 2; i++) { - if (i) { - /* append next const string segment */ - STRLEN len = (STRLEN)((const_lens++)->ssize); - /* a length of -1 implies no constant string - * rather than a zero-length one, e.g. - * ($a . $b) versus ($a . "" . $b) - */ - if ((SSize_t)len < 0) - continue; + if ( i == n + && (PL_op->op_private &OPpMULTICONCAT_STRINGIFY) + && !(SvPOK(left)) + /* extra conditions for backwards compatibility: + * probably incorrect, but keep the existing behaviour + * for now. The rules are: + * $x = "$ov" single arg: stringify; + * $x = "$ov$y" multiple args: don't stringify, + * $lex = "$ov$y$z" except TARGMY with at least 2 concats + */ + && ( arg_count == 1 + || ( arg_count >= 3 + && !is_append + && (PL_op->op_private & OPpTARGET_MY) + && !(PL_op->op_private & OPpLVAL_INTRO) + ) + ) + ) + { + SV *tmp = sv_newmortal(); + sv_copypv(tmp, left); + SvSETMAGIC(tmp); + left = tmp; + } - /* set right to the next constant string segment */ - right = newSVpvn_flags(const_pv, len, - (dst_utf8 | SVs_TEMP)); - const_pv += len; - } - else { - /* append next arg */ - right = *SP++; - if (getmg) - SvGETMAGIC(right); - else - /* SvGETMAGIC already called on this SV just - * before we broke from the loop earlier */ - getmg = TRUE; - - if (first_concat && n == 0 && const_lens[-1].ssize < 0) { - /* nothing before the current arg; repeat the - * loop to get a second arg */ - left = right; - first_concat = FALSE; - continue; - } - } + /* do one extra iteration to handle $targ in $targ .= ... */ + if (i == n && !is_append) + break; + + /* get the next arg SV or regen the next const SV */ + len = lens[i >> 1].ssize; + if (i == n) { + /* handle the final targ .= (....) */ + right = left; + left = targ; + } + else if (i & 1) + right = svp[(i >> 1)]; + else if (len < 0) + continue; /* no const in this position */ + else { + right = newSVpvn_flags(cpv, len, (utf8 | SVs_TEMP)); + cpv += len; + } + + arg_count++; + + if (arg_count <= 1) { + left = right; + continue; /* need at least two SVs to concat together */ + } + + if (arg_count == 2 && i < n) { + /* for the first concat, create a mortal acting like the + * padtmp from OP_CONST. In later iterations this will + * be appended to */ + nexttarg = sv_newmortal(); + nextappend = FALSE; + } + else { + nexttarg = left; + nextappend = TRUE; + } + + /* Handle possible overloading. + * This is basically an unrolled + * tryAMAGICbin_MG(concat_amg, AMGf_assign); + * and + * Perl_try_amagic_bin() + * call, but using left and right rather than SP[-1], SP[0], + * and not relying on OPf_STACKED implying .= + */ - if ((SvAMAGIC(left) || SvAMAGIC(right)) - && (res = amagic_call(left, right, concat_amg, f_assign)) + if ((SvFLAGS(left)|SvFLAGS(right)) & (SVf_ROK|SVs_GMG)) { + SvGETMAGIC(left); + if (left != right) + SvGETMAGIC(right); + + if ((SvAMAGIC(left) || SvAMAGIC(right)) + /* sprintf doesn't do concat overloading, + * but allow for $x .= sprintf(...) + */ + && ( !(PL_op->op_private & OPpMULTICONCAT_FAKE) + || i == n) ) - left = res; - else { - if (left != dsv) { - sv_setsv(dsv, left); - left = dsv; + { + SV * const tmpsv = amagic_call(left, right, concat_amg, + (nextappend ? AMGf_assign: 0)); + if (tmpsv) { + /* NB: tryAMAGICbin_MG() includes an SvPADMY test + * here, which isn;t needed as any implicit + * assign does under OPpTARGET_MY is done after + * this loop */ + if (nextappend) { + sv_setsv(left, tmpsv); + SvSETMAGIC(left); } - sv_catsv_nomg(left, right); + else + left = tmpsv; + continue; + } + } + + /* if both args are the same magical value, make one a copy */ + if (left == right && SvGMAGICAL(left)) { + left = sv_newmortal(); + /* Print the uninitialized warning now, so it includes the + * variable name. */ + if (!SvOK(right)) { + if (ckWARN(WARN_UNINITIALIZED)) + report_uninit(right); + sv_setsv_flags(left, &PL_sv_no, 0); } - f_assign = AMGf_assign; + else + sv_setsv_flags(left, right, 0); + SvGETMAGIC(right); } } - dsv = left; - } - /* assign/append RHS (dsv) to LHS (targ) */ - if (is_append) { - if ((SvAMAGIC(targ) || SvAMAGIC(dsv)) - && (res = amagic_call(targ, dsv, concat_amg, AMGf_assign)) - ) - sv_setsv(targ, res); - else - sv_catsv_nomg(targ, dsv); + /* nexttarg = left . right */ + S_do_concat(aTHX_ left, right, nexttarg, 0); + left = nexttarg; } - else - sv_setsv(targ, dsv); - } - - /* -------------------------------------------------------------- - * Phase 7: - * - * return result - */ - phase7: + SP = toparg - stack_adj + 1; - SP -= stack_adj; - SvTAINT(targ); - SETTARG; - RETURN; + /* Assign result of all RHS concats (left) to LHS (targ). + * If we are appending, targ will already have been appended to in + * the loop */ + if (is_append) + SvTAINT(targ); + else { + sv_setsv(targ, left); + SvSETMAGIC(targ); + } + SETs(targ); + RETURN; + } } diff --git a/regen/op_private b/regen/op_private index 49cb4bc035..a94c0c38c0 100644 --- a/regen/op_private +++ b/regen/op_private @@ -824,7 +824,7 @@ addbits('multiconcat', 6 => qw(OPpMULTICONCAT_APPEND APPEND), # $x .= .... 5 => qw(OPpMULTICONCAT_FAKE FAKE), # sprintf() optimised to MC. # 4 OPpTARGET_MY - 3 => qw(OPpMULTICONCAT_STRINGIFY STRINGIFY), # "$a$b...", (for Deparse.pm) + 3 => qw(OPpMULTICONCAT_STRINGIFY STRINGIFY), # "$a$b..." ); diff --git a/t/opbasic/concat.t b/t/opbasic/concat.t index 42851d23b9..9ce9722f5c 100644 --- a/t/opbasic/concat.t +++ b/t/opbasic/concat.t @@ -39,7 +39,7 @@ sub is { return $ok; } -print "1..252\n"; +print "1..253\n"; ($a, $b, $c) = qw(foo bar); @@ -840,3 +840,16 @@ ok(ref(CORE::state $y = "a $o b") eq 'o', "AXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXD", "RT #132646"); } + +# RT #132595 +# multiconcat shouldn't affect the order of arg evaluation +package RT132595 { + my $a = "a"; + my $i = 0; + sub TIESCALAR { bless({}, $_[0]) } + sub FETCH { ++$i; $a = "b".$i; "c".$i } + my $t; + tie $t, "RT132595"; + my $res = $a.$t.$a.$t; + ::is($res, "b1c1b1c2", "RT #132595"); +} diff --git a/t/re/pat_re_eval.t b/t/re/pat_re_eval.t index efcff4e71c..f88a8651a1 100644 --- a/t/re/pat_re_eval.t +++ b/t/re/pat_re_eval.t @@ -23,7 +23,7 @@ BEGIN { our @global; -plan tests => 497; # Update this when adding/deleting tests. +plan tests => 502; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1278,6 +1278,29 @@ sub run_tests { is $max, 2, "RT #126697"; } + # RT #132772 + # + # Ensure that optimisation of OP_CONST into OP_MULTICONCAT doesn't + # leave any freed ops in the execution path. This is is associated + # with rpeep() being called before optimize_optree(), which causes + # gv/rv2sv to be prematurely optimised into gvsv, confusing + # S_maybe_multiconcat when it tries to reorganise a concat subtree + # into a multiconcat list + + { + my $a = "a"; + local $b = "b"; # not lexical, so optimised to OP_GVSV + local $_ = "abc"; + ok /^a(??{ $b."c" })$/, "RT #132772 - compile time"; + ok /^$a(??{ $b."c" })$/, "RT #132772 - run time"; + my $qr = qr/^a(??{ $b."c" })$/; + ok /$qr/, "RT #132772 - compile time time qr//"; + $qr = qr/(??{ $b."c" })$/; + ok /^a$qr$/, "RT #132772 - compile time time qr// compound"; + $qr = qr/$a(??{ $b."c" })$/; + ok /^$qr$/, "RT #132772 - run time time qr//"; + } + } # End of sub run_tests |