summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2018-02-19 22:20:40 +0000
committerDavid Mitchell <davem@iabyn.com>2018-02-19 22:20:40 +0000
commitf7675015b1fdb2eee619d34c20a279db07dcf844 (patch)
tree4be98d26838e5f349e32dcb885052f6400547372
parent6d37ab4efc1fb099593fb29406193ae79c3be543 (diff)
parent8327fe931a244c33965f30d2ba4bbe7248016951 (diff)
downloadperl-f7675015b1fdb2eee619d34c20a279db07dcf844.tar.gz
[MERGE] fixups for OP_MULTICONCAT
This branch fixes three main issues caused by the OP_MULTICONCAT optimisation: * When dealing with magic/tie/overloading, there were many edge cases; the way these are handled has been completely rewritten, fixing bugs related to exactly when and how args are sringified and/or tested for undefinedness; * relatedly, on a stringified concat such as $x .= "$y", or $a = "$b$c$d", the stringification is honoured under exactly the same (buggy) set of conditions that pertained before multiconcat was introduced; this is noticable in overloading whether the result calls the stringify method and thus returns a plain string, or whether it skips that and returns an overloaded object, e.g. for $a = "$b$c$d", does $a end up as a ref or a string. Before the branch it was always a ref, now its sometimes a string. * concats within a runtime pattern code block could crash, e.g. /$a(?{ $b . "c" })/
-rw-r--r--lib/overload.t129
-rw-r--r--op.c20
-rw-r--r--pp_hot.c627
-rw-r--r--regen/op_private2
-rw-r--r--t/opbasic/concat.t15
-rw-r--r--t/re/pat_re_eval.t25
6 files changed, 474 insertions, 344 deletions
diff --git a/lib/overload.t b/lib/overload.t
index 2afa6cf437..055daab30f 100644
--- a/lib/overload.t
+++ b/lib/overload.t
@@ -48,7 +48,7 @@ package main;
$| = 1;
BEGIN { require './test.pl'; require './charset_tools.pl' }
-plan tests => 5338;
+plan tests => 5362;
use Scalar::Util qw(tainted);
@@ -3047,3 +3047,130 @@ package RT132385 {
# ditto with a mutator
::is($o .= $r1, "obj-ref1", "RT #132385 o.=r1");
}
+
+# the RHS of an overloaded .= should be passed as-is to the overload
+# method, rather than being stringified or otherwise being processed in
+# such a way that it triggers an undef warning
+package RT132783 {
+ use warnings;
+ use overload '.=' => sub { return "foo" };
+ my $w = 0;
+ local $SIG{__WARN__} = sub { $w++ };
+ my $undef;
+ my $ov = bless [];
+ $ov .= $undef;
+ ::is($w, 0, "RT #132783 - should be no warnings");
+}
+
+# changing the overloaded object to a plain string within an overload
+# method should be permanent.
+package RT132827 {
+ use overload '""' => sub { $_[0] = "a" };
+ my $ov = bless [];
+ my $b = $ov . "b";
+ ::is(ref \$ov, "SCALAR", "RT #132827");
+}
+
+# RT #132793
+# An arg like like "$b" in $overloaded .= "$b" should be stringified
+# before being passed to the method
+
+package RT132793 {
+ my $type;
+ my $str = 0;
+ use overload
+ '.=' => sub { $type = ref(\$_[1]); "foo"; },
+ '""' => sub { $str++; "bar" };
+
+ my $a = bless {};
+ my $b = bless {};
+ $a .= "$b";
+ ::is($type, "SCALAR", "RT #132793 type");
+ ::is($str, 1, "RT #132793 stringify count");
+}
+
+# RT #132801
+# A second RHS-not-stringified bug
+
+package RT132801 {
+ my $type;
+ my $str = 0;
+ my $concat = 0;
+ use overload
+ '.' => sub { $concat++; bless []; },
+ '""' => sub { $str++; "bar" };
+
+ my $a = "A";
+ my $b = bless [];
+ my $c;
+ $c = "$a-$b";
+ ::is($concat, 1, "RT #132801 concat count");
+ ::is($str, 1, "RT #132801 stringify count");
+}
+
+# General testing of optimising away OP_STRINGIFY, and whether
+# OP_MULTICONCAT emulates existing behaviour.
+#
+# It could well be argued that the existing behaviour is buggy, but
+# for now emulate the old behaviour.
+#
+# In more detail:
+#
+# Since 5.000, any OP_STRINGIFY immediately following an OP_CONCAT
+# is optimised away, on the assumption that since concat will always
+# return a valid string anyway, it doesn't need stringifying.
+# So in "$x", the stringify is needed, but on "$x$y" it isn't.
+# This assumption is flawed once overloading has been introduced, since
+# concat might return an overloaded object which still needs stringifying.
+# However, this flawed behaviour is apparently needed by at least one
+# module, and is tested for in opbasic/concat.t: see RT #124160.
+#
+# There is also a wart with the OPpTARGET_MY optimisation: specifically,
+# in $lex = "...", if $lex is a lexical var, then a chain of 2 or more
+# concats *doesn't* optimise away OP_STRINGIFY:
+#
+# $lex = "$x"; # stringifies
+# $lex = "$x$y"; # doesn't stringify
+# $lex = "$x$y$z..."; # stringifies
+
+package Stringify {
+ my $count;
+ use overload
+ '.' => sub {
+ my ($a, $b, $rev) = @_;
+ bless [ $rev ? "$b" . $a->[0] : $a->[0] . "$b" ];
+ },
+ '""' => sub { $count++; $_[0][0] },
+ ;
+
+ for my $test(
+ [ 1, '$pkg = "$ov"' ],
+ [ 1, '$lex = "$ov"' ],
+ [ 1, 'my $a = "$ov"' ],
+ [ 1, '$pkg .= "$ov"' ],
+ [ 1, '$lex .= "$ov"' ],
+ [ 1, 'my $a .= "$ov"' ],
+
+ [ 0, '$pkg = "$ov$x"' ],
+ [ 0, '$lex = "$ov$x"' ],
+ [ 0, 'my $a = "$ov$x"' ],
+ [ 0, '$pkg .= "$ov$x"' ],
+ [ 0, '$lex .= "$ov$x"' ],
+ [ 0, 'my $a .= "$ov$x"' ],
+
+ [ 0, '$pkg = "$ov$x$y"' ],
+ [ 1, '$lex = "$ov$x$y"' ], # XXX note the anomaly
+ [ 0, 'my $a = "$ov$x$y"' ],
+ [ 0, '$pkg .= "$ov$x$y"' ],
+ [ 0, '$lex .= "$ov$x$y"' ],
+ [ 0, 'my $a .= "$ov$x$y"' ],
+ )
+ {
+ my ($stringify, $code) = @$test;
+ our $pkg = 'P';
+ my ($ov, $x, $y, $lex) = (bless(['OV']), qw(X Y L));
+ $count = 0;
+ eval "$code; 1" or die $@;
+ ::is $count, $stringify, $code;
+ }
+}
diff --git a/op.c b/op.c
index c6f228b2e0..22751775af 100644
--- a/op.c
+++ b/op.c
@@ -3204,16 +3204,16 @@ S_maybe_multiconcat(pTHX_ OP *o)
OP *prev;
/* set prev to the sibling *before* the arg to be cut out,
- * e.g.:
+ * e.g. when cutting EXPR:
*
* |
- * kid= CONST
+ * kid= CONCAT
* |
- * prev= CONST -- EXPR
+ * prev= CONCAT -- EXPR
* |
*/
if (argp == args && kid->op_type != OP_CONCAT) {
- /* in e.g. '$x . = f(1)' there's no RHS concat tree
+ /* in e.g. '$x .= f(1)' there's no RHS concat tree
* so the expression to be cut isn't kid->op_last but
* kid itself */
OP *o1, *o2;
@@ -6952,9 +6952,15 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, OP *repl, UV flags, I32 floor)
op_null(scope);
}
- if (is_compiletime)
- /* runtime finalizes as part of finalizing whole tree */
- optimize_optree(o);
+ /* XXX optimize_optree() must be called on o before
+ * CALL_PEEP(), as currently S_maybe_multiconcat() can't
+ * currently cope with a peephole-optimised optree.
+ * Calling optimize_optree() here ensures that condition
+ * is met, but may mean optimize_optree() is applied
+ * to the same optree later (where hopefully it won't do any
+ * harm as it can't convert an op to multiconcat if it's
+ * already been converted */
+ optimize_optree(o);
/* have to peep the DOs individually as we've removed it from
* the op_next chain */
diff --git a/pp_hot.c b/pp_hot.c
index 328d6f0659..1cdc90aa27 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -253,11 +253,17 @@ PP(pp_unstack)
return NORMAL;
}
-PP(pp_concat)
+
+/* The main body of pp_concat, not including the magic/overload and
+ * stack handling.
+ * It does targ = left . right.
+ * Moved into a separate function so that pp_multiconcat() can use it
+ * too.
+ */
+
+PERL_STATIC_INLINE void
+S_do_concat(pTHX_ SV *left, SV *right, SV *targ, U8 targmy)
{
- dSP; dATARGET; tryAMAGICbin_MG(concat_amg, AMGf_assign);
- {
- dPOPTOPssrl;
bool lbyte;
STRLEN rlen;
const char *rpv = NULL;
@@ -285,7 +291,7 @@ PP(pp_concat)
else { /* $l .= $r and left == TARG */
if (!SvOK(left)) {
if ((left == right /* $l .= $l */
- || (PL_op->op_private & OPpTARGET_MY)) /* $l = $l . $r */
+ || targmy) /* $l = $l . $r */
&& ckWARN(WARN_UNINITIALIZED)
)
report_uninit(left);
@@ -314,8 +320,17 @@ PP(pp_concat)
}
}
sv_catpvn_nomg(TARG, rpv, rlen);
+ SvSETMAGIC(TARG);
+}
- SETTARG;
+
+PP(pp_concat)
+{
+ dSP; dATARGET; tryAMAGICbin_MG(concat_amg, AMGf_assign);
+ {
+ dPOPTOPssrl;
+ S_do_concat(aTHX_ left, right, targ, PL_op->op_private & OPpTARGET_MY);
+ SETs(TARG);
RETURN;
}
}
@@ -346,8 +361,8 @@ In addition:
sprintf "...%s...". Don't call '.'
overloading: only use '""' overloading.
- OPpMULTICONCAT_STRINGIFY: (for Deparse's benefit) the RHS was of the
- form "...$a...$b..." rather than
+ OPpMULTICONCAT_STRINGIFY: the RHS was of the form
+ "...$a...$b..." rather than
"..." . $a . "..." . $b . "..."
An OP_MULTICONCAT is of type UNOP_AUX. The fixed slots of the aux array are
@@ -384,8 +399,7 @@ PP(pp_multiconcat)
{
dSP;
SV *targ; /* The SV to be assigned or appended to */
- SV *dsv; /* the SV to concat args to (often == targ) */
- char *dsv_pv; /* where within SvPVX(dsv) we're writing to */
+ char *targ_pv; /* where within SvPVX(targ) we're writing to */
STRLEN targ_len; /* SvCUR(targ) */
SV **toparg; /* the highest arg position on the stack */
UNOP_AUX_item *aux; /* PL_op->op_aux buffer */
@@ -393,7 +407,7 @@ PP(pp_multiconcat)
const char *const_pv; /* the current segment of the const string buf */
SSize_t nargs; /* how many args were expected */
SSize_t stack_adj; /* how much to adjust SP on return */
- STRLEN grow; /* final size of destination string (dsv) */
+ STRLEN grow; /* final size of destination string (targ) */
UV targ_count; /* how many times targ has appeared on the RHS */
bool is_append; /* OPpMULTICONCAT_APPEND flag is set */
bool slow_concat; /* args too complex for quick concat */
@@ -441,10 +455,6 @@ PP(pp_multiconcat)
toparg = SP;
SP -= (nargs - 1);
- dsv = targ; /* Set the destination for all concats. This is
- initially targ; later on, dsv may be switched
- to point to a TEMP SV if overloading is
- encountered. */
grow = 1; /* allow for '\0' at minimum */
targ_count = 0;
targ_chain = NULL;
@@ -465,13 +475,13 @@ PP(pp_multiconcat)
* Where an arg is actually targ, the stringification is deferred:
* the length is set to 0, and the slot is added to targ_chain.
*
- * If an overloaded arg is found, the loop is abandoned at that point,
- * and dsv is set to an SvTEMP SV where the results-so-far will be
- * accumulated.
+ * If a magic, overloaded, or otherwise weird arg is found, which
+ * might have side effects when stringified, the loop is abandoned and
+ * we goto a code block where a more basic 'emulate calling
+ * pp_cpncat() on each arg in turn' is done.
*/
for (; SP <= toparg; SP++, svpv_end++) {
- bool simple_flags;
U32 utf8;
STRLEN len;
SV *sv;
@@ -479,161 +489,54 @@ PP(pp_multiconcat)
assert(svpv_end - svpv_buf < PERL_MULTICONCAT_MAXARG);
sv = *SP;
- simple_flags = (SvFLAGS(sv) & (SVs_GMG|SVf_ROK|SVf_POK)) == SVf_POK;
/* this if/else chain is arranged so that common/simple cases
* take few conditionals */
- if (LIKELY(simple_flags && (sv != targ))) {
- /* common case: sv is a simple PV and not the targ */
- svpv_end->pv = SvPVX(sv);
+ if (LIKELY((SvFLAGS(sv) & (SVs_GMG|SVf_ROK|SVf_POK)) == SVf_POK)) {
+ /* common case: sv is a simple non-magical PV */
+ if (targ == sv) {
+ /* targ appears on RHS.
+ * Delay storing PV pointer; instead, add slot to targ_chain
+ * so it can be populated later, after targ has been grown and
+ * we know its final SvPVX() address.
+ */
+ targ_on_rhs:
+ svpv_end->len = 0; /* zerojng here means we can skip
+ updating later if targ_len == 0 */
+ svpv_end->pv = (char*)targ_chain;
+ targ_chain = svpv_end;
+ targ_count++;
+ continue;
+ }
+
len = SvCUR(sv);
+ svpv_end->pv = SvPVX(sv);
}
- else if (simple_flags) {
- /* sv is targ (but can't be magic or overloaded).
- * Delay storing PV pointer; instead, add slot to targ_chain
- * so it can be populated later, after targ has been grown and
- * we know its final SvPVX() address.
+ else if (UNLIKELY(SvFLAGS(sv) & (SVs_GMG|SVf_ROK)))
+ /* may have side effects: tie, overload etc.
+ * Abandon 'stringify everything first' and handle
+ * args in strict order. Note that already-stringified args
+ * will be reprocessed, which is safe because the each first
+ * stringification would have been idempotent.
*/
- targ_on_rhs:
- svpv_end->len = 0; /* zerojng here means we can skip
- updating later if targ_len == 0 */
- svpv_end->pv = (char*)targ_chain;
- targ_chain = svpv_end;
- targ_count++;
- continue;
- }
- else {
- if (UNLIKELY(SvFLAGS(sv) & (SVs_GMG|SVf_ROK))) {
- /* its got magic, is tied, and/or is overloaded */
- SvGETMAGIC(sv);
-
- if (UNLIKELY(SvAMAGIC(sv))
- && !(PL_op->op_private & OPpMULTICONCAT_FAKE))
- {
- /* One of the RHS args is overloaded. Abandon stringifying
- * the args at this point, then in the concat loop later
- * on, concat the plain args stringified so far into a
- * TEMP SV. At the end of this function the remaining
- * args (including the current one) will be handled
- * specially, using overload calls.
- * FAKE implies an optimised sprintf which doesn't use
- * concat overloading, only "" overloading.
- */
-
- if ( svpv_end == svpv_buf + 1
- /* no const string segments */
- && aux[PERL_MULTICONCAT_IX_LENGTHS].ssize == -1
- && aux[PERL_MULTICONCAT_IX_LENGTHS + 1].ssize == -1
- ) {
- /* special case: if the overloaded sv is the
- * second arg in the concat chain, stop at the
- * first arg rather than this, so that
- *
- * $arg1 . $arg2
- *
- * invokes overloading as
- *
- * concat($arg2, $arg1, 1)
- *
- * rather than
- *
- * concat($arg2, "$arg1", 1)
- *
- * This means that if for example arg1 is a ref,
- * it gets passed as-is to the concat method
- * rather than a stringified copy. If it's not the
- * first arg, it doesn't matter, as in $arg0 .
- * $arg1 . $arg2, where the result of ($arg0 .
- * $arg1) will already be a string.
- * THis isn't perfect: we'll have already
- * done SvPV($arg1) on the previous iteration;
- * and are now throwing away that result and
- * hoping arg1 hasn;t been affected.
- */
- svpv_end--;
- SP--;
- }
-
- setup_overload:
- dsv = newSVpvn_flags("", 0, SVs_TEMP);
-
- if (targ_chain) {
- /* Get the string value of targ and populate any
- * RHS slots which use it */
- char *pv = SvPV_nomg(targ, len);
- dst_utf8 |= (SvFLAGS(targ) & SVf_UTF8);
- grow += len * targ_count;
- do {
- struct multiconcat_svpv *p = targ_chain;
- targ_chain = (struct multiconcat_svpv *)(p->pv);
- p->pv = pv;
- p->len = len;
- } while (targ_chain);
- }
- else if (is_append)
- SvGETMAGIC(targ);
-
- goto phase3;
- }
-
- if (SvFLAGS(sv) & SVs_RMG) {
- /* probably tied; copy it to guarantee separate values
- * each time it's used, e.g. "-$tied-$tied-$tied-",
- * since FETCH() isn't necessarily idempotent */
- SV *nsv = newSV(0);
- sv_setsv_flags(nsv, sv, SV_NOSTEAL);
- sv_2mortal(nsv);
- if ( sv == targ
- && is_append
- && nargs == 1
- /* no const string segments */
- && aux[PERL_MULTICONCAT_IX_LENGTHS].ssize == -1
- && aux[PERL_MULTICONCAT_IX_LENGTHS+1].ssize == -1)
- {
- /* special-case $tied .= $tied.
- *
- * For something like
- * sub FETCH { $i++ }
- * then
- * $tied .= $tied . $tied . $tied;
- * will STORE "4123"
- * while
- * $tied .= $tied
- * will STORE "12"
- *
- * i.e. for a single mutator concat, the LHS is
- * retrieved first; in all other cases it is
- * retrieved last. Whether this is sane behaviour
- * is open to debate; but for now, multiconcat (as
- * it is an optimisation) tries to reproduce
- * existing behaviour.
- */
- sv_catsv(nsv, sv);
- sv_setsv(sv,nsv);
- SP++;
- goto phase7; /* just return targ as-is */
- }
-
- sv = nsv;
- }
- }
-
- if (sv == targ) {
- /* must warn for each RH usage of targ, except that
- * we will later get one warning when doing
- * SvPV_force(targ), *except* on '.=' */
- if ( !SvOK(sv)
- && (targ_chain || is_append)
- && ckWARN(WARN_UNINITIALIZED)
- )
- report_uninit(sv);
- goto targ_on_rhs;
- }
-
- /* stringify general SV */
+ goto do_magical;
+ else if (SvNIOK(sv)) {
+ if (targ == sv)
+ goto targ_on_rhs;
+ /* stringify general valid scalar */
svpv_end->pv = sv_2pv_flags(sv, &len, 0);
}
+ else if (!SvOK(sv)) {
+ if (ckWARN(WARN_UNINITIALIZED))
+ /* an undef value in the presence of warnings may trigger
+ * side affects */
+ goto do_magical;
+ svpv_end->pv = (char*)"";
+ len = 0;
+ }
+ else
+ goto do_magical; /* something weird */
utf8 = (SvFLAGS(sv) & SVf_UTF8);
dst_utf8 |= utf8;
@@ -652,31 +555,9 @@ PP(pp_multiconcat)
*/
if (is_append) {
- if (UNLIKELY(SvFLAGS(targ) & (SVs_GMG|SVf_ROK))) {
- SvGETMAGIC(targ); /* must do before SvAMAGIC() check */
- if (UNLIKELY(SvAMAGIC(targ))) {
- /* $overloaded .= ....;
- * accumulate RHS in a temp SV rather than targ,
- * then append tmp to targ at the end using overload
- */
- assert(!targ_chain);
- dsv = newSVpvn_flags("", 0, SVs_TEMP);
-
- if ( svpv_end == svpv_buf + 1
- /* no const string segments */
- && aux[PERL_MULTICONCAT_IX_LENGTHS].ssize == -1
- ) {
- /* special case $overloaded .= $arg1:
- * avoid stringifying $arg1.
- * Similar to the $arg1 . $arg2 case in phase1
- */
- svpv_end--;
- SP--;
- }
-
- goto phase3;
- }
- }
+ /* abandon quick route if using targ might have side effects */
+ if (UNLIKELY(SvFLAGS(targ) & (SVs_GMG|SVf_ROK)))
+ goto do_magical;
if (SvOK(targ)) {
U32 targ_utf8;
@@ -694,6 +575,10 @@ PP(pp_multiconcat)
grow += targ_len * (targ_count + is_append);
goto phase3;
}
+ else if (ckWARN(WARN_UNINITIALIZED))
+ /* warning might have side effects */
+ goto do_magical;
+ /* the undef targ will be silently SvPVCLEAR()ed below */
}
else if (UNLIKELY(SvTYPE(targ) >= SVt_REGEXP)) {
/* Assigning to some weird LHS type. Don't force the LHS to be an
@@ -705,28 +590,19 @@ PP(pp_multiconcat)
* (which makes the 'F' typeglob an alias to the
* '*main::F*main::F' typeglob).
*/
- goto setup_overload;
+ goto do_magical;
}
- else if (targ_chain) {
+ else if (targ_chain)
/* targ was found on RHS.
- * We don't need the SvGETMAGIC() call and SvAMAGIC() test as
- * both were already done earlier in the SvPV() loop; other
- * than that we can share the same code with the append
- * branch below.
- * Note that this goto jumps directly into the SvOK() branch
- * even if targ isn't SvOK(), to force an 'uninitialised'
- * warning; e.g.
- * $undef .= .... targ only on LHS: don't warn
- * $undef .= $undef .... targ on RHS too: warn
+ * Force stringify it, using the same code as the append branch
+ * above, except that we don't need the magic/overload/undef
+ * checks as these will already have been done in the phase 1
+ * loop.
*/
- assert(!SvAMAGIC(targ));
goto stringify_targ;
- }
-
/* unrolled SvPVCLEAR() - mostly: no need to grow or set SvCUR() to 0;
* those will be done later. */
- assert(targ == dsv);
SV_CHECK_THINKFIRST_COW_DROP(targ);
SvUPGRADE(targ, SVt_PV);
SvFLAGS(targ) &= ~(SVf_OK|SVf_IVisUV|SVf_UTF8);
@@ -737,10 +613,10 @@ PP(pp_multiconcat)
/* --------------------------------------------------------------
* Phase 3:
*
- * UTF-8 tweaks and grow dsv:
+ * UTF-8 tweaks and grow targ:
*
* Now that we know the length and utf8-ness of both the targ and
- * args, grow dsv to the size needed to accumulate all the args, based
+ * args, grow targ to the size needed to accumulate all the args, based
* on whether targ appears on the RHS, whether we're appending, and
* whether any non-utf8 args expand in size if converted to utf8.
*
@@ -779,7 +655,7 @@ PP(pp_multiconcat)
/* turn off utf8 handling if 'use bytes' is in scope */
if (UNLIKELY(dst_utf8 && IN_BYTES)) {
dst_utf8 = 0;
- SvUTF8_off(dsv);
+ SvUTF8_off(targ);
/* undo all the negative lengths which flag utf8-ness */
for (svpv_p = svpv_buf; svpv_p < svpv_end; svpv_p++) {
SSize_t len = svpv_p->len;
@@ -837,16 +713,16 @@ PP(pp_multiconcat)
/* unrolled SvGROW(), except don't check for SVf_IsCOW, which should
* already have been dropped */
- assert(!SvIsCOW(dsv));
- dsv_pv = (SvLEN(dsv) < (grow) ? sv_grow(dsv,grow) : SvPVX(dsv));
+ assert(!SvIsCOW(targ));
+ targ_pv = (SvLEN(targ) < (grow) ? sv_grow(targ,grow) : SvPVX(targ));
/* --------------------------------------------------------------
* Phase 4:
*
- * Now that dsv (which is probably targ) has been grown, we know the
- * final address of the targ PVX, if needed. Preserve / move targ
- * contents if appending or if targ appears on RHS.
+ * Now that targ has been grown, we know the final address of the targ
+ * PVX, if needed. Preserve / move targ contents if appending or if
+ * targ appears on RHS.
*
* Also update svpv_buf slots in targ_chain.
*
@@ -869,7 +745,7 @@ PP(pp_multiconcat)
* On exit, the targ contents will have been moved to the
* earliest place they are needed (e.g. $x = "abc$x" will shift them
* 3 bytes, while $x .= ... will leave them at the beginning);
- * and dst_pv will point to the location within SvPVX(dsv) where the
+ * and dst_pv will point to the location within SvPVX(targ) where the
* next arg should be copied.
*/
@@ -877,13 +753,12 @@ PP(pp_multiconcat)
if (targ_len) {
struct multiconcat_svpv *tc_stop;
- char *targ_pv = dsv_pv;
+ char *targ_buf = targ_pv; /* ptr to original targ string */
- assert(targ == dsv);
assert(is_append || targ_count);
if (is_append) {
- dsv_pv += targ_len;
+ targ_pv += targ_len;
tc_stop = NULL;
}
else {
@@ -924,8 +799,8 @@ PP(pp_multiconcat)
}
if (offset) {
- targ_pv += offset;
- Move(dsv_pv, targ_pv, targ_len, char);
+ targ_buf += offset;
+ Move(targ_pv, targ_buf, targ_len, char);
/* a negative length implies don't Copy(), but do increment */
svpv_p->len = -((SSize_t)targ_len);
slow_concat = TRUE;
@@ -934,7 +809,7 @@ PP(pp_multiconcat)
/* skip the first targ copy */
svpv_base++;
const_lens++;
- dsv_pv += targ_len;
+ targ_pv += targ_len;
}
/* Don't populate the first targ slot in the loop below; it's
@@ -948,7 +823,7 @@ PP(pp_multiconcat)
while (targ_chain != tc_stop) {
struct multiconcat_svpv *p = targ_chain;
targ_chain = (struct multiconcat_svpv *)(p->pv);
- p->pv = targ_pv;
+ p->pv = targ_buf;
p->len = (SSize_t)targ_len;
}
}
@@ -957,7 +832,7 @@ PP(pp_multiconcat)
/* --------------------------------------------------------------
* Phase 5:
*
- * Append all the args in svpv_buf, plus the const strings, to dsv.
+ * Append all the args in svpv_buf, plus the const strings, to targ.
*
* On entry to this section the (pv,len) pairs in svpv_buf have the
* following meanings:
@@ -965,7 +840,7 @@ PP(pp_multiconcat)
* (pv, -(len+extra)) a plain string which will expand by 'extra'
* bytes when converted to utf8
* (0, -len) left-most targ, whose content has already
- * been copied. Just advance dsv_pv by len.
+ * been copied. Just advance targ_pv by len.
*/
/* If there are no constant strings and no special case args
@@ -976,8 +851,8 @@ PP(pp_multiconcat)
SSize_t len = svpv_p->len;
if (!len)
continue;
- Copy(svpv_p->pv, dsv_pv, len, char);
- dsv_pv += len;
+ Copy(svpv_p->pv, targ_pv, len, char);
+ targ_pv += len;
}
const_lens += (svpv_end - svpv_base + 1);
}
@@ -992,8 +867,8 @@ PP(pp_multiconcat)
/* append next const string segment */
if (len > 0) {
- Copy(const_pv, dsv_pv, len, char);
- dsv_pv += len;
+ Copy(const_pv, targ_pv, len, char);
+ targ_pv += len;
const_pv += len;
}
@@ -1004,8 +879,8 @@ PP(pp_multiconcat)
len = svpv_p->len;
if (LIKELY(len > 0)) {
- Copy(svpv_p->pv, dsv_pv, len, char);
- dsv_pv += len;
+ Copy(svpv_p->pv, targ_pv, len, char);
+ targ_pv += len;
}
else if (UNLIKELY(len < 0)) {
/* negative length indicates two special cases */
@@ -1013,141 +888,227 @@ PP(pp_multiconcat)
len = -len;
if (UNLIKELY(p)) {
/* copy plain-but-variant pv to a utf8 targ */
- char * end_pv = dsv_pv + len;
+ char * end_pv = targ_pv + len;
assert(dst_utf8);
- while (dsv_pv < end_pv) {
+ while (targ_pv < end_pv) {
U8 c = (U8) *p++;
- append_utf8_from_native_byte(c, (U8**)&dsv_pv);
+ append_utf8_from_native_byte(c, (U8**)&targ_pv);
}
}
else
/* arg is already-copied targ */
- dsv_pv += len;
+ targ_pv += len;
}
}
}
- *dsv_pv = '\0';
- SvCUR_set(dsv, dsv_pv - SvPVX(dsv));
- assert(grow >= SvCUR(dsv) + 1);
- assert(SvLEN(dsv) >= SvCUR(dsv) + 1);
+ *targ_pv = '\0';
+ SvCUR_set(targ, targ_pv - SvPVX(targ));
+ assert(grow >= SvCUR(targ) + 1);
+ assert(SvLEN(targ) >= SvCUR(targ) + 1);
/* --------------------------------------------------------------
* Phase 6:
*
- * Handle overloading. If an overloaded arg or targ was detected
- * earlier, dsv will have been set to a new mortal, and any args and
- * consts to the left of the first overloaded arg will have been
- * accumulated to it. This section completes any further concatenation
- * steps with overloading handled.
+ * return result
*/
- if (UNLIKELY(dsv != targ)) {
- SV *res;
+ SP -= stack_adj;
+ SvTAINT(targ);
+ SETTARG;
+ RETURN;
- SvFLAGS(dsv) |= dst_utf8;
+ /* --------------------------------------------------------------
+ * Phase 7:
+ *
+ * We only get here if any of the args (or targ too in the case of
+ * append) have something which might cause side effects, such
+ * as magic, overload, or an undef value in the presence of warnings.
+ * In that case, any earlier attempt to stringify the args will have
+ * been abandoned, and we come here instead.
+ *
+ * Here, we concat each arg in turn the old-fashioned way: essentially
+ * emulating pp_concat() in a loop. This means that all the weird edge
+ * cases will be handled correctly, if not necessarily speedily.
+ *
+ * Note that some args may already have been stringified - those are
+ * processed again, which is safe, since only args without side-effects
+ * were stringified earlier.
+ */
+
+ do_magical:
+ {
+ SSize_t i, n;
+ SV *left = NULL;
+ SV *right;
+ SV* nexttarg;
+ bool nextappend;
+ U32 utf8 = 0;
+ SV **svp;
+ const char *cpv = aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv;
+ UNOP_AUX_item *lens = aux + PERL_MULTICONCAT_IX_LENGTHS;
+ Size_t arg_count = 0; /* how many args have been processed */
+
+ if (!cpv) {
+ cpv = aux[PERL_MULTICONCAT_IX_UTF8_PV].pv;
+ utf8 = SVf_UTF8;
+ }
- if (SP <= toparg) {
- /* Stringifying the RHS was abandoned because *SP
- * is overloaded. dsv contains all the concatted strings
- * before *SP. Apply the rest of the args using overloading.
+ svp = toparg - nargs + 1;
+
+ /* iterate for:
+ * nargs arguments,
+ * plus possible nargs+1 consts,
+ * plus, if appending, a final targ in an extra last iteration
+ */
+
+ n = nargs *2 + 1;
+ for (i = 0; i <= n; i++) {
+ SSize_t len;
+
+ /* if necessary, stringify the final RHS result in
+ * something like $targ .= "$a$b$c" - simulating
+ * pp_stringify
*/
- SV *left, *right, *res;
- int i;
- bool getmg = FALSE;
- /* number of args already concatted */
- SSize_t n = (nargs - 1) - (toparg - SP);
- /* current arg is either the first
- * or second value to be concatted
- * (including constant strings), so would
- * form part of the first concat */
- bool first_concat = ( n == 0
- || (n == 1 && const_lens[-2].ssize < 0
- && const_lens[-1].ssize < 0));
- int f_assign = first_concat ? 0 : AMGf_assign;
-
- left = dsv;
-
- for (; n < nargs; n++) {
- /* loop twice, first applying the arg, then the const segment */
- for (i = 0; i < 2; i++) {
- if (i) {
- /* append next const string segment */
- STRLEN len = (STRLEN)((const_lens++)->ssize);
- /* a length of -1 implies no constant string
- * rather than a zero-length one, e.g.
- * ($a . $b) versus ($a . "" . $b)
- */
- if ((SSize_t)len < 0)
- continue;
+ if ( i == n
+ && (PL_op->op_private &OPpMULTICONCAT_STRINGIFY)
+ && !(SvPOK(left))
+ /* extra conditions for backwards compatibility:
+ * probably incorrect, but keep the existing behaviour
+ * for now. The rules are:
+ * $x = "$ov" single arg: stringify;
+ * $x = "$ov$y" multiple args: don't stringify,
+ * $lex = "$ov$y$z" except TARGMY with at least 2 concats
+ */
+ && ( arg_count == 1
+ || ( arg_count >= 3
+ && !is_append
+ && (PL_op->op_private & OPpTARGET_MY)
+ && !(PL_op->op_private & OPpLVAL_INTRO)
+ )
+ )
+ )
+ {
+ SV *tmp = sv_newmortal();
+ sv_copypv(tmp, left);
+ SvSETMAGIC(tmp);
+ left = tmp;
+ }
- /* set right to the next constant string segment */
- right = newSVpvn_flags(const_pv, len,
- (dst_utf8 | SVs_TEMP));
- const_pv += len;
- }
- else {
- /* append next arg */
- right = *SP++;
- if (getmg)
- SvGETMAGIC(right);
- else
- /* SvGETMAGIC already called on this SV just
- * before we broke from the loop earlier */
- getmg = TRUE;
-
- if (first_concat && n == 0 && const_lens[-1].ssize < 0) {
- /* nothing before the current arg; repeat the
- * loop to get a second arg */
- left = right;
- first_concat = FALSE;
- continue;
- }
- }
+ /* do one extra iteration to handle $targ in $targ .= ... */
+ if (i == n && !is_append)
+ break;
+
+ /* get the next arg SV or regen the next const SV */
+ len = lens[i >> 1].ssize;
+ if (i == n) {
+ /* handle the final targ .= (....) */
+ right = left;
+ left = targ;
+ }
+ else if (i & 1)
+ right = svp[(i >> 1)];
+ else if (len < 0)
+ continue; /* no const in this position */
+ else {
+ right = newSVpvn_flags(cpv, len, (utf8 | SVs_TEMP));
+ cpv += len;
+ }
+
+ arg_count++;
+
+ if (arg_count <= 1) {
+ left = right;
+ continue; /* need at least two SVs to concat together */
+ }
+
+ if (arg_count == 2 && i < n) {
+ /* for the first concat, create a mortal acting like the
+ * padtmp from OP_CONST. In later iterations this will
+ * be appended to */
+ nexttarg = sv_newmortal();
+ nextappend = FALSE;
+ }
+ else {
+ nexttarg = left;
+ nextappend = TRUE;
+ }
+
+ /* Handle possible overloading.
+ * This is basically an unrolled
+ * tryAMAGICbin_MG(concat_amg, AMGf_assign);
+ * and
+ * Perl_try_amagic_bin()
+ * call, but using left and right rather than SP[-1], SP[0],
+ * and not relying on OPf_STACKED implying .=
+ */
- if ((SvAMAGIC(left) || SvAMAGIC(right))
- && (res = amagic_call(left, right, concat_amg, f_assign))
+ if ((SvFLAGS(left)|SvFLAGS(right)) & (SVf_ROK|SVs_GMG)) {
+ SvGETMAGIC(left);
+ if (left != right)
+ SvGETMAGIC(right);
+
+ if ((SvAMAGIC(left) || SvAMAGIC(right))
+ /* sprintf doesn't do concat overloading,
+ * but allow for $x .= sprintf(...)
+ */
+ && ( !(PL_op->op_private & OPpMULTICONCAT_FAKE)
+ || i == n)
)
- left = res;
- else {
- if (left != dsv) {
- sv_setsv(dsv, left);
- left = dsv;
+ {
+ SV * const tmpsv = amagic_call(left, right, concat_amg,
+ (nextappend ? AMGf_assign: 0));
+ if (tmpsv) {
+ /* NB: tryAMAGICbin_MG() includes an SvPADMY test
+ * here, which isn;t needed as any implicit
+ * assign does under OPpTARGET_MY is done after
+ * this loop */
+ if (nextappend) {
+ sv_setsv(left, tmpsv);
+ SvSETMAGIC(left);
}
- sv_catsv_nomg(left, right);
+ else
+ left = tmpsv;
+ continue;
+ }
+ }
+
+ /* if both args are the same magical value, make one a copy */
+ if (left == right && SvGMAGICAL(left)) {
+ left = sv_newmortal();
+ /* Print the uninitialized warning now, so it includes the
+ * variable name. */
+ if (!SvOK(right)) {
+ if (ckWARN(WARN_UNINITIALIZED))
+ report_uninit(right);
+ sv_setsv_flags(left, &PL_sv_no, 0);
}
- f_assign = AMGf_assign;
+ else
+ sv_setsv_flags(left, right, 0);
+ SvGETMAGIC(right);
}
}
- dsv = left;
- }
- /* assign/append RHS (dsv) to LHS (targ) */
- if (is_append) {
- if ((SvAMAGIC(targ) || SvAMAGIC(dsv))
- && (res = amagic_call(targ, dsv, concat_amg, AMGf_assign))
- )
- sv_setsv(targ, res);
- else
- sv_catsv_nomg(targ, dsv);
+ /* nexttarg = left . right */
+ S_do_concat(aTHX_ left, right, nexttarg, 0);
+ left = nexttarg;
}
- else
- sv_setsv(targ, dsv);
- }
-
- /* --------------------------------------------------------------
- * Phase 7:
- *
- * return result
- */
- phase7:
+ SP = toparg - stack_adj + 1;
- SP -= stack_adj;
- SvTAINT(targ);
- SETTARG;
- RETURN;
+ /* Assign result of all RHS concats (left) to LHS (targ).
+ * If we are appending, targ will already have been appended to in
+ * the loop */
+ if (is_append)
+ SvTAINT(targ);
+ else {
+ sv_setsv(targ, left);
+ SvSETMAGIC(targ);
+ }
+ SETs(targ);
+ RETURN;
+ }
}
diff --git a/regen/op_private b/regen/op_private
index 49cb4bc035..a94c0c38c0 100644
--- a/regen/op_private
+++ b/regen/op_private
@@ -824,7 +824,7 @@ addbits('multiconcat',
6 => qw(OPpMULTICONCAT_APPEND APPEND), # $x .= ....
5 => qw(OPpMULTICONCAT_FAKE FAKE), # sprintf() optimised to MC.
# 4 OPpTARGET_MY
- 3 => qw(OPpMULTICONCAT_STRINGIFY STRINGIFY), # "$a$b...", (for Deparse.pm)
+ 3 => qw(OPpMULTICONCAT_STRINGIFY STRINGIFY), # "$a$b..."
);
diff --git a/t/opbasic/concat.t b/t/opbasic/concat.t
index 42851d23b9..9ce9722f5c 100644
--- a/t/opbasic/concat.t
+++ b/t/opbasic/concat.t
@@ -39,7 +39,7 @@ sub is {
return $ok;
}
-print "1..252\n";
+print "1..253\n";
($a, $b, $c) = qw(foo bar);
@@ -840,3 +840,16 @@ ok(ref(CORE::state $y = "a $o b") eq 'o',
"AXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXDAXBCXD",
"RT #132646");
}
+
+# RT #132595
+# multiconcat shouldn't affect the order of arg evaluation
+package RT132595 {
+ my $a = "a";
+ my $i = 0;
+ sub TIESCALAR { bless({}, $_[0]) }
+ sub FETCH { ++$i; $a = "b".$i; "c".$i }
+ my $t;
+ tie $t, "RT132595";
+ my $res = $a.$t.$a.$t;
+ ::is($res, "b1c1b1c2", "RT #132595");
+}
diff --git a/t/re/pat_re_eval.t b/t/re/pat_re_eval.t
index efcff4e71c..f88a8651a1 100644
--- a/t/re/pat_re_eval.t
+++ b/t/re/pat_re_eval.t
@@ -23,7 +23,7 @@ BEGIN {
our @global;
-plan tests => 497; # Update this when adding/deleting tests.
+plan tests => 502; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -1278,6 +1278,29 @@ sub run_tests {
is $max, 2, "RT #126697";
}
+ # RT #132772
+ #
+ # Ensure that optimisation of OP_CONST into OP_MULTICONCAT doesn't
+ # leave any freed ops in the execution path. This is is associated
+ # with rpeep() being called before optimize_optree(), which causes
+ # gv/rv2sv to be prematurely optimised into gvsv, confusing
+ # S_maybe_multiconcat when it tries to reorganise a concat subtree
+ # into a multiconcat list
+
+ {
+ my $a = "a";
+ local $b = "b"; # not lexical, so optimised to OP_GVSV
+ local $_ = "abc";
+ ok /^a(??{ $b."c" })$/, "RT #132772 - compile time";
+ ok /^$a(??{ $b."c" })$/, "RT #132772 - run time";
+ my $qr = qr/^a(??{ $b."c" })$/;
+ ok /$qr/, "RT #132772 - compile time time qr//";
+ $qr = qr/(??{ $b."c" })$/;
+ ok /^a$qr$/, "RT #132772 - compile time time qr// compound";
+ $qr = qr/$a(??{ $b."c" })$/;
+ ok /^$qr$/, "RT #132772 - run time time qr//";
+ }
+
} # End of sub run_tests