summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dist/Safe/t/safeops.t1
-rw-r--r--dump.c56
-rw-r--r--embed.fnc1
-rw-r--r--embed.h1
-rw-r--r--ext/B/B.xs59
-rw-r--r--ext/B/t/optree_samples.t186
-rw-r--r--ext/Opcode/Opcode.pm4
-rw-r--r--gv.c7
-rw-r--r--lib/B/Deparse.pm227
-rw-r--r--lib/B/Deparse.t133
-rw-r--r--lib/B/Op_private.pm18
-rw-r--r--op.c902
-rw-r--r--op.h2
-rw-r--r--opcode.h365
-rw-r--r--opnames.h661
-rw-r--r--perl.h20
-rw-r--r--pp_hot.c803
-rw-r--r--pp_proto.h1
-rw-r--r--proto.h3
-rw-r--r--regen/op_private11
-rwxr-xr-xregen/opcode.pl2
-rw-r--r--regen/opcodes1
-rw-r--r--sv.c3
-rw-r--r--t/op/gmagic.t23
-rw-r--r--t/op/sprintf2.t76
-rw-r--r--t/op/state.t9
-rw-r--r--t/opbasic/concat.t651
-rw-r--r--t/perf/benchmarks351
-rw-r--r--t/perf/opcount.t236
29 files changed, 4174 insertions, 639 deletions
diff --git a/dist/Safe/t/safeops.t b/dist/Safe/t/safeops.t
index 0b696a8149..ea15931600 100644
--- a/dist/Safe/t/safeops.t
+++ b/dist/Safe/t/safeops.t
@@ -234,6 +234,7 @@ exists exists $h{Key}
rv2hv %h
helem $h{kEy}
hslice @h{kEy}
+multiconcat SKIP (set by optimizer)
multideref SKIP (set by optimizer)
unpack unpack
pack pack
diff --git a/dump.c b/dump.c
index a2c0bbc5b5..bf01207e24 100644
--- a/dump.c
+++ b/dump.c
@@ -1141,6 +1141,15 @@ S_do_op_dump_bar(pTHX_ I32 level, UV bar, PerlIO *file, const OP *o)
break;
}
+ case OP_MULTICONCAT:
+ S_opdump_indent(aTHX_ o, level, bar, file, "NARGS = %" UVuf "\n",
+ cUNOP_AUXo->op_aux[PERL_MULTICONCAT_IX_NARGS].uv);
+ /* XXX really ought to dump each field individually,
+ * but that's too much like hard work */
+ S_opdump_indent(aTHX_ o, level, bar, file, "CONSTS = (%" SVf ")\n",
+ SVfARG(multiconcat_stringify(o)));
+ break;
+
case OP_CONST:
case OP_HINTSEVAL:
case OP_METHOD_NAMED:
@@ -2728,6 +2737,48 @@ Perl_multideref_stringify(pTHX_ const OP *o, CV *cv)
}
+/* Return a temporary SV containing a stringified representation of
+ * the op_aux field of a MULTICONCAT op. Note that if the aux contains
+ * both plain and utf8 versions of the const string and indices, only
+ * the first is displayed.
+ */
+
+SV*
+Perl_multiconcat_stringify(pTHX_ const OP *o)
+{
+ UNOP_AUX_item *aux = cUNOP_AUXo->op_aux;
+ UNOP_AUX_item *lens;
+ STRLEN len;
+ UV nargs;
+ char *s;
+ SV *out = newSVpvn_flags("", 0, SVs_TEMP);
+
+ PERL_ARGS_ASSERT_MULTICONCAT_STRINGIFY;
+
+ nargs = aux[PERL_MULTICONCAT_IX_NARGS].uv;
+ s = aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv;
+ len = aux[PERL_MULTICONCAT_IX_PLAIN_LEN].size;
+ if (!s) {
+ s = aux[PERL_MULTICONCAT_IX_UTF8_PV].pv;
+ len = aux[PERL_MULTICONCAT_IX_UTF8_LEN].size;
+ sv_catpvs(out, "UTF8 ");
+ }
+ pv_pretty(out, s, len, 50,
+ NULL, NULL,
+ (PERL_PV_PRETTY_NOCLEAR
+ |PERL_PV_PRETTY_QUOTE
+ |PERL_PV_PRETTY_ELLIPSES));
+
+ lens = aux + PERL_MULTICONCAT_IX_LENGTHS;
+ nargs++;
+ while (nargs-- > 0) {
+ Perl_sv_catpvf(aTHX_ out, ",%" IVdf, (IV)lens->size);
+ lens++;
+ }
+ return out;
+}
+
+
I32
Perl_debop(pTHX_ const OP *o)
{
@@ -2772,6 +2823,11 @@ Perl_debop(pTHX_ const OP *o)
SVfARG(multideref_stringify(o, deb_curcv(cxstack_ix))));
break;
+ case OP_MULTICONCAT:
+ PerlIO_printf(Perl_debug_log, "(%" SVf ")",
+ SVfARG(multiconcat_stringify(o)));
+ break;
+
default:
break;
}
diff --git a/embed.fnc b/embed.fnc
index fdc3eca26a..434f225aae 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -395,6 +395,7 @@ Afp |void |deb |NN const char* pat|...
Ap |void |vdeb |NN const char* pat|NULLOK va_list* args
Ap |void |debprofdump
EXp |SV* |multideref_stringify |NN const OP* o|NULLOK CV *cv
+EXp |SV* |multiconcat_stringify |NN const OP* o
Ap |I32 |debop |NN const OP* o
Ap |I32 |debstack
Ap |I32 |debstackptrs
diff --git a/embed.h b/embed.h
index 39f579af24..cd5ff23e72 100644
--- a/embed.h
+++ b/embed.h
@@ -923,6 +923,7 @@
#define cv_ckproto_len_flags(a,b,c,d,e) Perl_cv_ckproto_len_flags(aTHX_ a,b,c,d,e)
#define grok_atoUV Perl_grok_atoUV
#define mg_find_mglob(a) Perl_mg_find_mglob(aTHX_ a)
+#define multiconcat_stringify(a) Perl_multiconcat_stringify(aTHX_ a)
#define multideref_stringify(a,b) Perl_multideref_stringify(aTHX_ a,b)
#define op_clear(a) Perl_op_clear(aTHX_ a)
#define qerror(a) Perl_qerror(aTHX_ a)
diff --git a/ext/B/B.xs b/ext/B/B.xs
index 74edd38720..02b78caa5d 100644
--- a/ext/B/B.xs
+++ b/ext/B/B.xs
@@ -1183,6 +1183,10 @@ string(o, cv)
PPCODE:
aux = cUNOP_AUXo->op_aux;
switch (o->op_type) {
+ case OP_MULTICONCAT:
+ ret = multiconcat_stringify(o);
+ break;
+
case OP_MULTIDEREF:
ret = multideref_stringify(o, cv);
break;
@@ -1238,6 +1242,61 @@ aux_list(o, cv)
(char)aux[2].iv) : &PL_sv_no));
break;
+ case OP_MULTICONCAT:
+ {
+ UV nargs = aux[0].uv;
+ char *p;
+ STRLEN len;
+ U32 utf8 = 0;
+ SV *sv;
+ UNOP_AUX_item *lens;
+
+ /* return (nargs, const string, segment len 0, 1, 2, ...) */
+
+ /* if this changes, this block of code probably needs fixing */
+ assert(PERL_MULTICONCAT_HEADER_SIZE == 5);
+ nargs = aux[PERL_MULTICONCAT_IX_NARGS].uv;
+ EXTEND(SP, ((SSize_t)(2 + (nargs+1))));
+ PUSHs(sv_2mortal(newSViv(nargs)));
+
+ p = aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv;
+ len = aux[PERL_MULTICONCAT_IX_PLAIN_LEN].size;
+ if (!p) {
+ p = aux[PERL_MULTICONCAT_IX_UTF8_PV].pv;
+ len = aux[PERL_MULTICONCAT_IX_UTF8_LEN].size;
+ utf8 = SVf_UTF8;
+ }
+ sv = newSVpvn(p, len);
+ SvFLAGS(sv) |= utf8;
+ PUSHs(sv_2mortal(sv));
+
+ lens = aux + PERL_MULTICONCAT_IX_LENGTHS;
+ nargs++; /* loop (nargs+1) times */
+ if (utf8) {
+ U8 *p = (U8*)SvPVX(sv);
+ while (nargs--) {
+ SSize_t bytes = lens->size;
+ SSize_t chars;
+ if (bytes <= 0)
+ chars = bytes;
+ else {
+ /* return char lengths rather than byte lengths */
+ chars = utf8_length(p, p + bytes);
+ p += bytes;
+ }
+ lens++;
+ PUSHs(sv_2mortal(newSViv(chars)));
+ }
+ }
+ else {
+ while (nargs--) {
+ PUSHs(sv_2mortal(newSViv(lens->size)));
+ lens++;
+ }
+ }
+ break;
+ }
+
case OP_MULTIDEREF:
#ifdef USE_ITHREADS
# define ITEM_SV(item) *av_fetch(comppad, (item)->pad_offset, FALSE);
diff --git a/ext/B/t/optree_samples.t b/ext/B/t/optree_samples.t
index 7374626663..7b63f6f141 100644
--- a/ext/B/t/optree_samples.t
+++ b/ext/B/t/optree_samples.t
@@ -240,38 +240,36 @@ checkOptree ( name => '-exec sub { foreach (1..10) {print "foo $_"} }',
# 3 <$> const[IV 1] s
# 4 <$> const[IV 10] s
# 5 <#> gv[*_] s
-# 6 <{> enteriter(next->d last->g redo->7) KS/DEF
-# e <0> iter s
-# f <|> and(other->7) K/1
-# 7 <;> nextstate(main 442 optree.t:158) v:>,<,%
+# 6 <{> enteriter(next->c last->f redo->7) KS/DEF
+# d <0> iter s
+# e <|> and(other->7) K/1
+# 7 <;> nextstate(main 1659 optree_samples.t:234) v
# 8 <0> pushmark s
-# 9 <$> const[PV "foo "] s
-# a <#> gvsv[*_] s
-# b <2> concat[t4] sK/2
-# c <@> print vK
-# d <0> unstack s
-# goto e
-# g <2> leaveloop K/2
-# h <1> leavesub[1 ref] K/REFC,1
+# 9 <#> gvsv[*_] s
+# a <+> multiconcat("foo ",4,-1)[t5] sK/STRINGIFY
+# b <@> print vK
+# c <0> unstack s
+# goto d
+# f <2> leaveloop K/2
+# g <1> leavesub[1 ref] K/REFC,1
EOT_EOT
# 1 <;> nextstate(main 444 optree_samples.t:182) v:>,<,%
# 2 <0> pushmark s
# 3 <$> const(IV 1) s
# 4 <$> const(IV 10) s
# 5 <$> gv(*_) s
-# 6 <{> enteriter(next->d last->g redo->7) KS/DEF
-# e <0> iter s
-# f <|> and(other->7) K/1
+# 6 <{> enteriter(next->c last->f redo->7) KS/DEF
+# d <0> iter s
+# e <|> and(other->7) K/1
# 7 <;> nextstate(main 443 optree_samples.t:182) v:>,<,%
# 8 <0> pushmark s
-# 9 <$> const(PV "foo ") s
-# a <$> gvsv(*_) s
-# b <2> concat[t3] sK/2
-# c <@> print vK
-# d <0> unstack s
-# goto e
-# g <2> leaveloop K/2
-# h <1> leavesub[1 ref] K/REFC,1
+# 9 <$> gvsv(*_) s
+# a <+> multiconcat("foo ",4,-1)[t4] sK/STRINGIFY
+# b <@> print vK
+# c <0> unstack s
+# goto d
+# f <2> leaveloop K/2
+# g <1> leavesub[1 ref] K/REFC,1
EONT_EONT
checkOptree ( name => '-basic sub { print "foo $_" foreach (1..10) }',
@@ -279,55 +277,53 @@ checkOptree ( name => '-basic sub { print "foo $_" foreach (1..10) }',
bcopts => '-basic',
strip_open_hints => 1,
expect => <<'EOT_EOT', expect_nt => <<'EONT_EONT');
-# g <1> leavesub[1 ref] K/REFC,1 ->(end)
-# - <@> lineseq KP ->g
+# f <1> leavesub[1 ref] K/REFC,1 ->(end)
+# - <@> lineseq KP ->f
# 1 <;> nextstate(main 445 optree.t:167) v:>,<,% ->2
-# f <2> leaveloop K/2 ->g
-# 6 <{> enteriter(next->c last->f redo->7) KS/DEF ->d
+# e <2> leaveloop K/2 ->f
+# 6 <{> enteriter(next->b last->e redo->7) KS/DEF ->c
# - <0> ex-pushmark s ->2
# - <1> ex-list lK ->5
# 2 <0> pushmark s ->3
# 3 <$> const[IV 1] s ->4
# 4 <$> const[IV 10] s ->5
# 5 <#> gv[*_] s ->6
-# - <1> null K/1 ->f
-# e <|> and(other->7) K/1 ->f
-# d <0> iter s ->e
+# - <1> null K/1 ->e
+# d <|> and(other->7) K/1 ->e
+# c <0> iter s ->d
# - <@> lineseq sK ->-
-# b <@> print vK ->c
+# a <@> print vK ->b
# 7 <0> pushmark s ->8
-# - <1> ex-stringify sK/1 ->b
-# - <0> ex-pushmark s ->8
-# a <2> concat[t2] sK/2 ->b
-# 8 <$> const[PV "foo "] s ->9
-# - <1> ex-rv2sv sK/1 ->a
-# 9 <#> gvsv[*_] s ->a
-# c <0> unstack s ->d
+# 9 <+> multiconcat("foo ",4,-1)[t3] sK/STRINGIFY ->a
+# - <0> ex-pushmark s ->-
+# - <0> ex-const s ->8
+# - <1> ex-rv2sv sK/1 ->9
+# 8 <#> gvsv[*_] s ->9
+# b <0> unstack s ->c
EOT_EOT
-# g <1> leavesub[1 ref] K/REFC,1 ->(end)
-# - <@> lineseq KP ->g
+# f <1> leavesub[1 ref] K/REFC,1 ->(end)
+# - <@> lineseq KP ->f
# 1 <;> nextstate(main 446 optree_samples.t:192) v:>,<,% ->2
-# f <2> leaveloop K/2 ->g
-# 6 <{> enteriter(next->c last->f redo->7) KS/DEF ->d
+# e <2> leaveloop K/2 ->f
+# 6 <{> enteriter(next->b last->e redo->7) KS/DEF ->c
# - <0> ex-pushmark s ->2
# - <1> ex-list lK ->5
# 2 <0> pushmark s ->3
# 3 <$> const(IV 1) s ->4
# 4 <$> const(IV 10) s ->5
# 5 <$> gv(*_) s ->6
-# - <1> null K/1 ->f
-# e <|> and(other->7) K/1 ->f
-# d <0> iter s ->e
+# - <1> null K/1 ->e
+# d <|> and(other->7) K/1 ->e
+# c <0> iter s ->d
# - <@> lineseq sK ->-
-# b <@> print vK ->c
+# a <@> print vK ->b
# 7 <0> pushmark s ->8
-# - <1> ex-stringify sK/1 ->b
-# - <0> ex-pushmark s ->8
-# a <2> concat[t1] sK/2 ->b
-# 8 <$> const(PV "foo ") s ->9
-# - <1> ex-rv2sv sK/1 ->a
-# 9 <$> gvsv(*_) s ->a
-# c <0> unstack s ->d
+# 9 <+> multiconcat("foo ",4,-1)[t2] sK/STRINGIFY ->a
+# - <0> ex-pushmark s ->-
+# - <0> ex-const s ->8
+# - <1> ex-rv2sv sK/1 ->9
+# 8 <$> gvsv(*_) s ->9
+# b <0> unstack s ->c
EONT_EONT
checkOptree ( name => '-exec -e foreach (1..10) {print qq{foo $_}}',
@@ -341,19 +337,18 @@ checkOptree ( name => '-exec -e foreach (1..10) {print qq{foo $_}}',
# 4 <$> const[IV 1] s
# 5 <$> const[IV 10] s
# 6 <#> gv[*_] s
-# 7 <{> enteriter(next->e last->h redo->8) vKS/DEF
-# f <0> iter s
-# g <|> and(other->8) vK/1
+# 7 <{> enteriter(next->d last->g redo->8) vKS/DEF
+# e <0> iter s
+# f <|> and(other->8) vK/1
# 8 <;> nextstate(main 1 -e:1) v:>,<,%
# 9 <0> pushmark s
-# a <$> const[PV "foo "] s
-# b <#> gvsv[*_] s
-# c <2> concat[t4] sK/2
-# d <@> print vK
-# e <0> unstack v
-# goto f
-# h <2> leaveloop vK/2
-# i <@> leave[1 ref] vKP/REFC
+# a <#> gvsv[*_] s
+# b <+> multiconcat("foo ",4,-1)[t5] sK/STRINGIFY
+# c <@> print vK
+# d <0> unstack v
+# goto e
+# g <2> leaveloop vK/2
+# h <@> leave[1 ref] vKP/REFC
EOT_EOT
# 1 <0> enter
# 2 <;> nextstate(main 2 -e:1) v:>,<,%,{
@@ -361,19 +356,18 @@ EOT_EOT
# 4 <$> const(IV 1) s
# 5 <$> const(IV 10) s
# 6 <$> gv(*_) s
-# 7 <{> enteriter(next->e last->h redo->8) vKS/DEF
-# f <0> iter s
-# g <|> and(other->8) vK/1
+# 7 <{> enteriter(next->d last->g redo->8) vKS/DEF
+# e <0> iter s
+# f <|> and(other->8) vK/1
# 8 <;> nextstate(main 1 -e:1) v:>,<,%
# 9 <0> pushmark s
-# a <$> const(PV "foo ") s
-# b <$> gvsv(*_) s
-# c <2> concat[t3] sK/2
-# d <@> print vK
-# e <0> unstack v
-# goto f
-# h <2> leaveloop vK/2
-# i <@> leave[1 ref] vKP/REFC
+# a <$> gvsv(*_) s
+# b <+> multiconcat("foo ",4,-1)[t4] sK/STRINGIFY
+# c <@> print vK
+# d <0> unstack v
+# goto e
+# g <2> leaveloop vK/2
+# h <@> leave[1 ref] vKP/REFC
EONT_EONT
checkOptree ( name => '-exec sub { print "foo $_" foreach (1..10) }',
@@ -386,36 +380,34 @@ checkOptree ( name => '-exec sub { print "foo $_" foreach (1..10) }',
# 3 <$> const[IV 1] s
# 4 <$> const[IV 10] s
# 5 <#> gv[*_] s
-# 6 <{> enteriter(next->c last->f redo->7) KS/DEF
-# d <0> iter s
-# e <|> and(other->7) K/1
+# 6 <{> enteriter(next->b last->e redo->7) KS/DEF
+# c <0> iter s
+# d <|> and(other->7) K/1
# 7 <0> pushmark s
-# 8 <$> const[PV "foo "] s
-# 9 <#> gvsv[*_] s
-# a <2> concat[t2] sK/2
-# b <@> print vK
-# c <0> unstack s
-# goto d
-# f <2> leaveloop K/2
-# g <1> leavesub[1 ref] K/REFC,1
+# 8 <#> gvsv[*_] s
+# 9 <+> multiconcat("foo ",4,-1)[t3] sK/STRINGIFY
+# a <@> print vK
+# b <0> unstack s
+# goto c
+# e <2> leaveloop K/2
+# f <1> leavesub[1 ref] K/REFC,1
EOT_EOT
# 1 <;> nextstate(main 447 optree_samples.t:252) v:>,<,%
# 2 <0> pushmark s
# 3 <$> const(IV 1) s
# 4 <$> const(IV 10) s
# 5 <$> gv(*_) s
-# 6 <{> enteriter(next->c last->f redo->7) KS/DEF
-# d <0> iter s
-# e <|> and(other->7) K/1
+# 6 <{> enteriter(next->b last->e redo->7) KS/DEF
+# c <0> iter s
+# d <|> and(other->7) K/1
# 7 <0> pushmark s
-# 8 <$> const(PV "foo ") s
-# 9 <$> gvsv(*_) s
-# a <2> concat[t1] sK/2
-# b <@> print vK
-# c <0> unstack s
-# goto d
-# f <2> leaveloop K/2
-# g <1> leavesub[1 ref] K/REFC,1
+# 8 <$> gvsv(*_) s
+# 9 <+> multiconcat("foo ",4,-1)[t2] sK/STRINGIFY
+# a <@> print vK
+# b <0> unstack s
+# goto c
+# e <2> leaveloop K/2
+# f <1> leavesub[1 ref] K/REFC,1
EONT_EONT
pass("GREP: SAMPLES FROM PERLDOC -F GREP");
diff --git a/ext/Opcode/Opcode.pm b/ext/Opcode/Opcode.pm
index 7de0ac95fe..01c495d5d6 100644
--- a/ext/Opcode/Opcode.pm
+++ b/ext/Opcode/Opcode.pm
@@ -6,7 +6,7 @@ use strict;
our($VERSION, @ISA, @EXPORT_OK);
-$VERSION = "1.40";
+$VERSION = "1.41";
use Carp;
use Exporter ();
@@ -353,7 +353,7 @@ These memory related ops are not included in :base_core because they
can easily be used to implement a resource attack (e.g., consume all
available memory).
- concat repeat join range
+ concat multiconcat repeat join range
anonlist anonhash
diff --git a/gv.c b/gv.c
index fed5b7c2ee..90e8fe0f1e 100644
--- a/gv.c
+++ b/gv.c
@@ -3476,7 +3476,12 @@ Perl_amagic_call(pTHX_ SV *left, SV *right, int method, int flags)
SV* res;
const bool oldcatch = CATCH_GET;
I32 oldmark, nret;
- U8 gimme = force_scalar ? G_SCALAR : GIMME_V;
+ /* for multiconcat, we may call overload several times,
+ * with the context of individual concats being scalar,
+ * regardless of the overall context of the multiconcat op
+ */
+ U8 gimme = (force_scalar || PL_op->op_type == OP_MULTICONCAT)
+ ? G_SCALAR : GIMME_V;
CATCH_SET(TRUE);
Zero(&myop, 1, BINOP);
diff --git a/lib/B/Deparse.pm b/lib/B/Deparse.pm
index a1f7adcb6d..4ff427c51c 100644
--- a/lib/B/Deparse.pm
+++ b/lib/B/Deparse.pm
@@ -19,6 +19,7 @@ use B qw(class main_root main_start main_cv svref_2object opnumber perlstring
OPpSORT_REVERSE OPpMULTIDEREF_EXISTS OPpMULTIDEREF_DELETE
OPpSPLIT_ASSIGN OPpSPLIT_LEX
OPpPADHV_ISKEYS OPpRV2HV_ISKEYS
+ OPpMULTICONCAT_APPEND OPpMULTICONCAT_STRINGIFY OPpMULTICONCAT_FAKE
OPpTRUEBOOL OPpINDEX_BOOLNEG
SVf_IOK SVf_NOK SVf_ROK SVf_POK SVpad_OUR SVf_FAKE SVs_RMG SVs_SMG
SVs_PADTMP SVpad_TYPED
@@ -50,7 +51,7 @@ use B qw(class main_root main_start main_cv svref_2object opnumber perlstring
MDEREF_SHIFT
);
-$VERSION = '1.43';
+$VERSION = '1.44';
use strict;
use vars qw/$AUTOLOAD/;
use warnings ();
@@ -365,7 +366,7 @@ BEGIN {
BEGIN { for (qw[ const stringify rv2sv list glob pushmark null aelem
- kvaslice kvhslice
+ kvaslice kvhslice padsv
nextstate dbstate rv2av rv2hv helem custom ]) {
eval "sub OP_\U$_ () { " . opnumber($_) . "}"
}}
@@ -4404,6 +4405,142 @@ sub multideref_var_name {
}
+# deparse an OP_MULTICONCAT. If $in_dq is 1, we're within
+# a double-quoted string, so for example.
+# "abc\Qdef$x\Ebar"
+# might get compiled as
+# multiconcat("abc", metaquote(multiconcat("def", $x)), "bar")
+# and the inner multiconcat should be deparsed as C<def$x> rather than
+# the normal C<def . $x>
+# Ditto if $in_dq is 2, handle qr/...\Qdef$x\E.../.
+
+sub do_multiconcat {
+ my $self = shift;
+ my($op, $cx, $in_dq) = @_;
+
+ my $kid;
+ my @kids;
+ my $assign;
+ my $append;
+ my $lhs = "";
+
+ for ($kid = $op->first; !null $kid; $kid = $kid->sibling) {
+ # skip the consts and/or padsv we've optimised away
+ push @kids, $kid
+ unless $kid->type == OP_NULL
+ && ( $kid->targ == OP_PADSV
+ || $kid->targ == OP_CONST
+ || $kid->targ == OP_PUSHMARK);
+ }
+
+ $append = ($op->private & OPpMULTICONCAT_APPEND);
+
+ if ($op->private & OPpTARGET_MY) {
+ # '$lex = ...' or '$lex .= ....' or 'my $lex = '
+ $lhs = $self->padname($op->targ);
+ $lhs = "my $lhs" if ($op->private & OPpLVAL_INTRO);
+ $assign = 1;
+ }
+ elsif ($op->flags & OPf_STACKED) {
+ # 'expr = ...' or 'expr .= ....'
+ my $expr = $append ? shift(@kids) : pop(@kids);
+ $lhs = $self->deparse($expr, 7);
+ $assign = 1;
+ }
+
+ if ($assign) {
+ $lhs .= $append ? ' .= ' : ' = ';
+ }
+
+ my ($nargs, $const_str, @const_lens) = $op->aux_list($self->{curcv});
+
+ my @consts;
+ my $i = 0;
+ for (@const_lens) {
+ if ($_ == -1) {
+ push @consts, undef;
+ }
+ else {
+ push @consts, substr($const_str, $i, $_);
+ my @args;
+ $i += $_;
+ }
+ }
+
+ my $rhs = "";
+
+ if ( $in_dq
+ || (($op->private & OPpMULTICONCAT_STRINGIFY) && !$self->{'unquote'}))
+ {
+ # "foo=$foo bar=$bar "
+ my $not_first;
+ while (@consts) {
+ $rhs = dq_disambiguate($rhs, $self->dq(shift(@kids), 18))
+ if $not_first;
+ $not_first = 1;
+ my $c = shift @consts;
+ if (defined $c) {
+ if ($in_dq == 2) {
+ # in pattern: don't convert newline to '\n' etc etc
+ my $s = re_uninterp(escape_re(re_unback($c)));
+ $rhs = re_dq_disambiguate($rhs, $s)
+ }
+ else {
+ my $s = uninterp(escape_str(unback($c)));
+ $rhs = dq_disambiguate($rhs, $s)
+ }
+ }
+ }
+ return $rhs if $in_dq;
+ $rhs = single_delim("qq", '"', $rhs, $self);
+ }
+ elsif ($op->private & OPpMULTICONCAT_FAKE) {
+ # sprintf("foo=%s bar=%s ", $foo, $bar)
+
+ my @all;
+ @consts = map { $_ //= ''; s/%/%%/g; $_ } @consts;
+ my $fmt = join '%s', @consts;
+ push @all, $self->quoted_const_str($fmt);
+
+ # the following is a stripped down copy of sub listop {}
+ my $parens = $assign || ($cx >= 5) || $self->{'parens'};
+ my $fullname = $self->keyword('sprintf');
+ push @all, map $self->deparse($_, 6), @kids;
+
+ $rhs = $parens
+ ? "$fullname(" . join(", ", @all) . ")"
+ : "$fullname " . join(", ", @all);
+ }
+ else {
+ # "foo=" . $foo . " bar=" . $bar
+ my @all;
+ my $not_first;
+ while (@consts) {
+ push @all, $self->deparse(shift(@kids), 18) if $not_first;
+ $not_first = 1;
+ my $c = shift @consts;
+ if (defined $c) {
+ push @all, $self->quoted_const_str($c);
+ }
+ }
+ $rhs .= join ' . ', @all;
+ }
+
+ my $text = $lhs . $rhs;
+
+ $text = "($text)" if ($cx >= (($assign) ? 7 : 18+1))
+ || $self->{'parens'};
+
+ return $text;
+}
+
+
+sub pp_multiconcat {
+ my $self = shift;
+ $self->do_multiconcat(@_, 0);
+}
+
+
sub pp_multideref {
my $self = shift;
my($op, $cx) = @_;
@@ -4786,7 +4923,7 @@ sub retscalar {
|study|pos|preinc|i_preinc|predec|i_predec|postinc
|i_postinc|postdec|i_postdec|pow|multiply|i_multiply
|divide|i_divide|modulo|i_modulo|add|i_add|subtract
- |i_subtract|concat|stringify|left_shift|right_shift|lt
+ |i_subtract|concat|multiconcat|stringify|left_shift|right_shift|lt
|i_lt|gt|i_gt|le|i_le|ge|i_ge|eq|i_eq|ne|i_ne|ncmp|i_ncmp
|slt|sgt|sle|sge|seq|sne|scmp|[sn]?bit_(?:and|x?or)|negate
|i_negate|not|[sn]?complement|smartmatch|atan2|sin|cos
@@ -5162,6 +5299,20 @@ sub split_float {
return ($mantissa, $exponent);
}
+
+# suitably single- or double-quote a literal constant string
+
+sub quoted_const_str {
+ my ($self, $str) =@_;
+ if ($str =~ /[[:^print:]]/a) {
+ return single_delim("qq", '"',
+ uninterp(escape_str unback $str), $self);
+ } else {
+ return single_delim("q", "'", unback($str), $self);
+ }
+}
+
+
sub const {
my $self = shift;
my($sv, $cx) = @_;
@@ -5275,12 +5426,7 @@ sub const {
return $self->maybe_parens("\\$const", $cx, 20);
} elsif ($sv->FLAGS & SVf_POK) {
my $str = $sv->PV;
- if ($str =~ /[[:^print:]]/a) {
- return single_delim("qq", '"',
- uninterp(escape_str unback $str), $self);
- } else {
- return single_delim("q", "'", unback($str), $self);
- }
+ return $self->quoted_const_str($str);
} else {
return "undef";
}
@@ -5340,6 +5486,25 @@ sub pp_const {
return $self->const($sv, $cx);
}
+
+# Join two components of a double-quoted string, disambiguating
+# "${foo}bar", "${foo}{bar}", "${foo}[1]", "$foo\::bar"
+
+sub dq_disambiguate {
+ my ($first, $last) = @_;
+ ($last =~ /^[A-Z\\\^\[\]_?]/ &&
+ $first =~ s/([\$@])\^$/${1}{^}/) # "${^}W" etc
+ || ($last =~ /^[:'{\[\w_]/ && #'
+ $first =~ s/([\$@])([A-Za-z_]\w*)$/${1}{$2}/);
+ return $first . $last;
+}
+
+
+# Deparse a double-quoted optree. For example, "$a[0]\Q$b\Efo\"o" gets
+# compiled to concat(concat($[0],quotemeta($b)),const("fo\"o")), and this
+# sub deparses it back to $a[0]\Q$b\Efo"o
+# (It does not add delimiters)
+
sub dq {
my $self = shift;
my $op = shift;
@@ -5348,16 +5513,9 @@ sub dq {
return '$[' if $op->private & OPpCONST_ARYBASE;
return uninterp(escape_str(unback($self->const_sv($op)->as_string)));
} elsif ($type eq "concat") {
- my $first = $self->dq($op->first);
- my $last = $self->dq($op->last);
-
- # Disambiguate "${foo}bar", "${foo}{bar}", "${foo}[1]", "$foo\::bar"
- ($last =~ /^[A-Z\\\^\[\]_?]/ &&
- $first =~ s/([\$@])\^$/${1}{^}/) # "${^}W" etc
- || ($last =~ /^[:'{\[\w_]/ && #'
- $first =~ s/([\$@])([A-Za-z_]\w*)$/${1}{$2}/);
-
- return $first . $last;
+ return dq_disambiguate($self->dq($op->first), $self->dq($op->last));
+ } elsif ($type eq "multiconcat") {
+ return $self->do_multiconcat($op, 26, 1);
} elsif ($type eq "uc") {
return '\U' . $self->dq($op->first->sibling) . '\E';
} elsif ($type eq "lc") {
@@ -5682,9 +5840,11 @@ sub pp_trans {
sub pp_transr { push @_, 'r'; goto &pp_trans }
+# Join two components of a double-quoted re, disambiguating
+# "${foo}bar", "${foo}{bar}", "${foo}[1]".
+
sub re_dq_disambiguate {
my ($first, $last) = @_;
- # Disambiguate "${foo}bar", "${foo}{bar}", "${foo}[1]"
($last =~ /^[A-Z\\\^\[\]_?]/ &&
$first =~ s/([\$@])\^$/${1}{^}/) # "${^}W" etc
|| ($last =~ /^[{\[\w_]/ &&
@@ -5706,6 +5866,8 @@ sub re_dq {
my $first = $self->re_dq($op->first);
my $last = $self->re_dq($op->last);
return re_dq_disambiguate($first, $last);
+ } elsif ($type eq "multiconcat") {
+ return $self->do_multiconcat($op, 26, 2);
} elsif ($type eq "uc") {
return '\U' . $self->re_dq($op->first->sibling) . '\E';
} elsif ($type eq "lc") {
@@ -5754,6 +5916,31 @@ sub pure_string {
return $self->pure_string($op->first)
&& $self->pure_string($op->last);
}
+ elsif ($type eq 'multiconcat') {
+ my ($kid, @kids);
+ for ($kid = $op->first; !null $kid; $kid = $kid->sibling) {
+ # skip the consts and/or padsv we've optimised away
+ push @kids, $kid
+ unless $kid->type == OP_NULL
+ && ( $kid->targ == OP_PADSV
+ || $kid->targ == OP_CONST
+ || $kid->targ == OP_PUSHMARK);
+ }
+
+ if ($op->flags & OPf_STACKED) {
+ # remove expr from @kids where 'expr = ...' or 'expr .= ....'
+ if ($op->private & OPpMULTICONCAT_APPEND) {
+ shift(@kids);
+ }
+ else {
+ pop(@kids);
+ }
+ }
+ for (@kids) {
+ return 0 unless $self->pure_string($_);
+ }
+ return 1;
+ }
elsif (is_scalar($op) || $type =~ /^[ah]elem$/) {
return 1;
}
diff --git a/lib/B/Deparse.t b/lib/B/Deparse.t
index f1aae49fcb..b75a162a0e 100644
--- a/lib/B/Deparse.t
+++ b/lib/B/Deparse.t
@@ -2718,3 +2718,136 @@ $c = (index($a, $b) == -1);
$c = (rindex($a, $b) == -1);
$c = (index($a, $b) != -1);
$c = (rindex($a, $b) != -1);
+####
+# plain multiconcat
+my($a, $b, $c, $d, @a);
+$d = length $a . $b . $c;
+$d = length($a) . $b . $c;
+print '' . $a;
+push @a, ($a . '') * $b;
+unshift @a, "$a" * ($b . '');
+print $a . 'x' . $b . $c;
+print $a . 'x' . $b . $c, $d;
+print $b . $c . ($a . $b);
+print $b . $c . ($a . $b);
+print $b . $c . @a;
+print $a . "\x{100}";
+####
+# double-quoted multiconcat
+my($a, $b, $c, $d, @a);
+print "${a}x\x{100}$b$c";
+print "$a\Q$b\E$c\Ua$a\E\Lb$b\uc$c\E$a${b}c$c";
+print "A=$a[length 'b' . $c . 'd'] b=$b";
+print "A=@a B=$b";
+print "\x{101}$a\x{100}";
+$a = qr/\Q
+$b $c
+\x80
+\x{100}
+\E$c
+/;
+####
+# sprintf multiconcat
+my($a, $b, $c, $d, @a);
+print sprintf("%s%s%%%sx%s\x{100}%s", $a, $b, $c, scalar @a, $d);
+####
+# multiconcat with lexical assign
+my($a, $b, $c, $d, $e, @a);
+$d = 'foo' . $a;
+$d = "foo$a";
+$d = $a . '';
+$d = 'foo' . $a . 'bar';
+$d = $a . $b;
+$d = $a . $b . $c;
+$d = $a . $b . $c . @a;
+$e = ($d = $a . $b . $c);
+$d = !$a . $b . $c;
+$a = $b . $c . ($a . $b);
+$e = f($d = !$a . $b) . $c;
+$d = "${a}x\x{100}$b$c";
+f($d = !$a . $b . $c);
+####
+# multiconcat with lexical my
+my($a, $b, $c, $d, $e, @a);
+my $d1 = 'foo' . $a;
+my $d2 = "foo$a";
+my $d3 = $a . '';
+my $d4 = 'foo' . $a . 'bar';
+my $d5 = $a . $b;
+my $d6 = $a . $b . $c;
+my $e7 = ($d = $a . $b . $c);
+my $d8 = !$a . $b . $c;
+my $d9 = $b . $c . ($a . $b);
+my $da = f($d = !$a . $b) . $c;
+my $dc = "${a}x\x{100}$b$c";
+f(my $db = !$a . $b . $c);
+my $dd = $a . $b . $c . @a;
+####
+# multiconcat with lexical append
+my($a, $b, $c, $d, $e, @a);
+$d .= '';
+$d .= $a;
+$d .= "$a";
+$d .= 'foo' . $a;
+$d .= "foo$a";
+$d .= $a . '';
+$d .= 'foo' . $a . 'bar';
+$d .= $a . $b;
+$d .= $a . $b . $c;
+$d .= $a . $b . @a;
+$e .= ($d = $a . $b . $c);
+$d .= !$a . $b . $c;
+$a .= $b . $c . ($a . $b);
+$e .= f($d .= !$a . $b) . $c;
+f($d .= !$a . $b . $c);
+$d .= "${a}x\x{100}$b$c";
+####
+# multiconcat with expression assign
+my($a, $b, $c, @a);
+our($d, $e);
+$d = 'foo' . $a;
+$d = "foo$a";
+$d = $a . '';
+$d = 'foo' . $a . 'bar';
+$d = $a . $b;
+$d = $a . $b . $c;
+$d = $a . $b . @a;
+$e = ($d = $a . $b . $c);
+$a["-$b-"] = !$a . $b . $c;
+$a[$b]{$c}{$d ? $a : $b . $c} = !$a . $b . $c;
+$a = $b . $c . ($a . $b);
+$e = f($d = !$a . $b) . $c;
+$d = "${a}x\x{100}$b$c";
+f($d = !$a . $b . $c);
+####
+# multiconcat with expression concat
+my($a, $b, $c, @a);
+our($d, $e);
+$d .= 'foo' . $a;
+$d .= "foo$a";
+$d .= $a . '';
+$d .= 'foo' . $a . 'bar';
+$d .= $a . $b;
+$d .= $a . $b . $c;
+$d .= $a . $b . @a;
+$e .= ($d .= $a . $b . $c);
+$a["-$b-"] .= !$a . $b . $c;
+$a[$b]{$c}{$d ? $a : $b . $c} .= !$a . $b . $c;
+$a .= $b . $c . ($a . $b);
+$e .= f($d .= !$a . $b) . $c;
+$d .= "${a}x\x{100}$b$c";
+f($d .= !$a . $b . $c);
+####
+# multiconcat with CORE::sprintf
+# CONTEXT sub sprintf {}
+my($a, $b);
+my $x = CORE::sprintf('%s%s', $a, $b);
+####
+# multiconcat with backticks
+my($a, $b);
+our $x;
+$x = `$a-$b`;
+####
+# multiconcat within qr//
+my($r, $a, $b);
+$r = qr/abc\Q$a-$b\Exyz/;
diff --git a/lib/B/Op_private.pm b/lib/B/Op_private.pm
index 9d2f6155e2..6c9840e9bc 100644
--- a/lib/B/Op_private.pm
+++ b/lib/B/Op_private.pm
@@ -134,7 +134,7 @@ $bits{$_}{6} = 'OPpINDEX_BOOLNEG' for qw(index rindex);
$bits{$_}{1} = 'OPpITER_REVERSED' for qw(enteriter iter);
$bits{$_}{7} = 'OPpLVALUE' for qw(leave leaveloop);
$bits{$_}{6} = 'OPpLVAL_DEFER' for qw(aelem helem multideref);
-$bits{$_}{7} = 'OPpLVAL_INTRO' for qw(aelem aslice cond_expr delete enteriter entersub gvsv helem hslice list lvavref lvref lvrefslice multideref padav padhv padrange padsv pushmark refassign rv2av rv2gv rv2hv rv2sv split);
+$bits{$_}{7} = 'OPpLVAL_INTRO' for qw(aelem aslice cond_expr delete enteriter entersub gvsv helem hslice list lvavref lvref lvrefslice multiconcat multideref padav padhv padrange padsv pushmark refassign rv2av rv2gv rv2hv rv2sv split);
$bits{$_}{2} = 'OPpLVREF_ELEM' for qw(lvref refassign);
$bits{$_}{3} = 'OPpLVREF_ITER' for qw(lvref refassign);
$bits{$_}{3} = 'OPpMAYBE_LVSUB' for qw(aassign aelem akeys aslice av2arylen avhvswitch helem hslice keys kvaslice kvhslice multideref padav padhv pos rv2av rv2gv rv2hv substr values vec);
@@ -149,7 +149,7 @@ $bits{$_}{6} = 'OPpPAD_STATE' for qw(lvavref lvref padav padhv padsv pushmark re
$bits{$_}{7} = 'OPpPV_IS_UTF8' for qw(dump goto last next redo);
$bits{$_}{6} = 'OPpREFCOUNTED' for qw(leave leaveeval leavesub leavesublv leavewrite);
$bits{$_}{2} = 'OPpSLICEWARNING' for qw(aslice hslice padav padhv rv2av rv2hv);
-$bits{$_}{4} = 'OPpTARGET_MY' for qw(abs add atan2 chdir chmod chomp chown chr chroot concat cos crypt divide exec exp flock getpgrp getppid getpriority hex i_add i_divide i_modulo i_multiply i_subtract index int kill left_shift length link log mkdir modulo multiply nbit_and nbit_or nbit_xor ncomplement oct ord pow push rand rename right_shift rindex rmdir schomp scomplement setpgrp setpriority sin sleep sqrt srand stringify subtract symlink system time unlink unshift utime wait waitpid);
+$bits{$_}{4} = 'OPpTARGET_MY' for qw(abs add atan2 chdir chmod chomp chown chr chroot concat cos crypt divide exec exp flock getpgrp getppid getpriority hex i_add i_divide i_modulo i_multiply i_subtract index int kill left_shift length link log mkdir modulo multiconcat multiply nbit_and nbit_or nbit_xor ncomplement oct ord pow push rand rename right_shift rindex rmdir schomp scomplement setpgrp setpriority sin sleep sqrt srand stringify subtract symlink system time unlink unshift utime wait waitpid);
$bits{$_}{5} = 'OPpTRANS_COMPLEMENT' for qw(trans transr);
$bits{$_}{7} = 'OPpTRANS_DELETE' for qw(trans transr);
$bits{$_}{0} = 'OPpTRANS_FROM_UTF' for qw(trans transr);
@@ -439,6 +439,7 @@ $bits{method_super}{0} = $bf[0];
@{$bits{msgget}}{3,2,1,0} = ($bf[4], $bf[4], $bf[4], $bf[4]);
@{$bits{msgrcv}}{3,2,1,0} = ($bf[4], $bf[4], $bf[4], $bf[4]);
@{$bits{msgsnd}}{3,2,1,0} = ($bf[4], $bf[4], $bf[4], $bf[4]);
+@{$bits{multiconcat}}{6,5,3,0} = ('OPpMULTICONCAT_APPEND', 'OPpMULTICONCAT_FAKE', 'OPpMULTICONCAT_STRINGIFY', $bf[0]);
@{$bits{multideref}}{5,4,0} = ('OPpMULTIDEREF_DELETE', 'OPpMULTIDEREF_EXISTS', $bf[0]);
@{$bits{multiply}}{1,0} = ($bf[1], $bf[1]);
@{$bits{nbit_and}}{1,0} = ($bf[1], $bf[1]);
@@ -650,6 +651,9 @@ our %defines = (
OPpMAYBE_LVSUB => 8,
OPpMAYBE_TRUEBOOL => 16,
OPpMAY_RETURN_CONSTANT => 32,
+ OPpMULTICONCAT_APPEND => 64,
+ OPpMULTICONCAT_FAKE => 32,
+ OPpMULTICONCAT_STRINGIFY => 8,
OPpMULTIDEREF_DELETE => 32,
OPpMULTIDEREF_EXISTS => 16,
OPpOFFBYONE => 128,
@@ -752,6 +756,9 @@ our %labels = (
OPpMAYBE_LVSUB => 'LVSUB',
OPpMAYBE_TRUEBOOL => 'BOOL?',
OPpMAY_RETURN_CONSTANT => 'CONST',
+ OPpMULTICONCAT_APPEND => 'APPEND',
+ OPpMULTICONCAT_FAKE => 'FAKE',
+ OPpMULTICONCAT_STRINGIFY => 'STRINGIFY',
OPpMULTIDEREF_DELETE => 'DELETE',
OPpMULTIDEREF_EXISTS => 'EXISTS',
OPpOFFBYONE => '+1',
@@ -817,10 +824,11 @@ our %ops_using = (
OPpLIST_GUESSED => [qw(list)],
OPpLVALUE => [qw(leave leaveloop)],
OPpLVAL_DEFER => [qw(aelem helem multideref)],
- OPpLVAL_INTRO => [qw(aelem aslice cond_expr delete enteriter entersub gvsv helem hslice list lvavref lvref lvrefslice multideref padav padhv padrange padsv pushmark refassign rv2av rv2gv rv2hv rv2sv split)],
+ OPpLVAL_INTRO => [qw(aelem aslice cond_expr delete enteriter entersub gvsv helem hslice list lvavref lvref lvrefslice multiconcat multideref padav padhv padrange padsv pushmark refassign rv2av rv2gv rv2hv rv2sv split)],
OPpLVREF_ELEM => [qw(lvref refassign)],
OPpMAYBE_LVSUB => [qw(aassign aelem akeys aslice av2arylen avhvswitch helem hslice keys kvaslice kvhslice multideref padav padhv pos rv2av rv2gv rv2hv substr values vec)],
OPpMAYBE_TRUEBOOL => [qw(padhv ref rv2hv)],
+ OPpMULTICONCAT_APPEND => [qw(multiconcat)],
OPpMULTIDEREF_DELETE => [qw(multideref)],
OPpOFFBYONE => [qw(caller runcv wantarray)],
OPpOPEN_IN_CRLF => [qw(backtick open)],
@@ -836,7 +844,7 @@ our %ops_using = (
OPpSORT_DESCEND => [qw(sort)],
OPpSPLIT_ASSIGN => [qw(split)],
OPpSUBSTR_REPL_FIRST => [qw(substr)],
- OPpTARGET_MY => [qw(abs add atan2 chdir chmod chomp chown chr chroot concat cos crypt divide exec exp flock getpgrp getppid getpriority hex i_add i_divide i_modulo i_multiply i_subtract index int kill left_shift length link log mkdir modulo multiply nbit_and nbit_or nbit_xor ncomplement oct ord pow push rand rename right_shift rindex rmdir schomp scomplement setpgrp setpriority sin sleep sqrt srand stringify subtract symlink system time unlink unshift utime wait waitpid)],
+ OPpTARGET_MY => [qw(abs add atan2 chdir chmod chomp chown chr chroot concat cos crypt divide exec exp flock getpgrp getppid getpriority hex i_add i_divide i_modulo i_multiply i_subtract index int kill left_shift length link log mkdir modulo multiconcat multiply nbit_and nbit_or nbit_xor ncomplement oct ord pow push rand rename right_shift rindex rmdir schomp scomplement setpgrp setpriority sin sleep sqrt srand stringify subtract symlink system time unlink unshift utime wait waitpid)],
OPpTRANS_COMPLEMENT => [qw(trans transr)],
OPpTRUEBOOL => [qw(grepwhile index length padav padhv pos ref rindex rv2av rv2hv subst)],
);
@@ -863,6 +871,8 @@ $ops_using{OPpFT_STACKED} = $ops_using{OPpFT_AFTER_t};
$ops_using{OPpFT_STACKING} = $ops_using{OPpFT_AFTER_t};
$ops_using{OPpLVREF_ITER} = $ops_using{OPpLVREF_ELEM};
$ops_using{OPpMAY_RETURN_CONSTANT} = $ops_using{OPpENTERSUB_NOPAREN};
+$ops_using{OPpMULTICONCAT_FAKE} = $ops_using{OPpMULTICONCAT_APPEND};
+$ops_using{OPpMULTICONCAT_STRINGIFY} = $ops_using{OPpMULTICONCAT_APPEND};
$ops_using{OPpMULTIDEREF_EXISTS} = $ops_using{OPpMULTIDEREF_DELETE};
$ops_using{OPpOPEN_IN_RAW} = $ops_using{OPpOPEN_IN_CRLF};
$ops_using{OPpOPEN_OUT_CRLF} = $ops_using{OPpOPEN_IN_CRLF};
diff --git a/op.c b/op.c
index 416ac2db22..689f696857 100644
--- a/op.c
+++ b/op.c
@@ -1078,6 +1078,22 @@ Perl_op_clear(pTHX_ OP *o)
PerlMemShared_free(cUNOP_AUXo->op_aux);
break;
+ case OP_MULTICONCAT:
+ {
+ UNOP_AUX_item *aux = cUNOP_AUXo->op_aux;
+ /* aux[PERL_MULTICONCAT_IX_PLAIN_PV] and/or
+ * aux[PERL_MULTICONCAT_IX_UTF8_PV] point to plain and/or
+ * utf8 shared strings */
+ char *p1 = aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv;
+ char *p2 = aux[PERL_MULTICONCAT_IX_UTF8_PV].pv;
+ if (p1)
+ PerlMemShared_free(p1);
+ if (p2 && p1 != p2)
+ PerlMemShared_free(p2);
+ PerlMemShared_free(aux);
+ }
+ break;
+
case OP_MULTIDEREF:
{
UNOP_AUX_item *items = cUNOP_AUXo->op_aux;
@@ -2470,6 +2486,883 @@ S_check_hash_fields_and_hekify(pTHX_ UNOP *rop, SVOP *key_op)
}
}
+/* info returned by S_sprintf_is_multiconcatable() */
+
+struct sprintf_ismc_info {
+ UV nargs; /* num of args to sprintf (not including the format) */
+ char *start; /* start of raw format string */
+ char *end; /* bytes after end of raw format string */
+ STRLEN total_len; /* total length (in bytes) of format string, not
+ including '%s' and half of '%%' */
+ STRLEN variant; /* number of bytes by which total_len_p would grow
+ if upgraded to utf8 */
+ bool utf8; /* whether the format is utf8 */
+};
+
+
+/* is the OP_SPRINTF o suitable for converting into a multiconcat op?
+ * i.e. its format argument is a const string with only '%s' and '%%'
+ * formats, and the number of args is known, e.g.
+ * sprintf "a=%s f=%s", $a[0], scalar(f());
+ * but not
+ * sprintf "i=%d a=%s f=%s", $i, @a, f();
+ *
+ * If successful, the sprintf_ismc_info struct pointed to by info will be
+ * populated.
+ */
+
+STATIC bool
+S_sprintf_is_multiconcatable(pTHX_ OP *o,struct sprintf_ismc_info *info)
+{
+ OP *pm, *constop, *kid;
+ SV *sv;
+ char *s, *e, *p;
+ UV nargs, nformats;
+ STRLEN cur, total_len, variant;
+ bool utf8;
+
+ /* if sprintf's behaviour changes, die here so that someone
+ * can decide whether to enhance this function or skip optimising
+ * under those new circumstances */
+ assert(!(o->op_flags & OPf_STACKED));
+ assert(!(PL_opargs[OP_SPRINTF] & OA_TARGLEX));
+ assert(!(o->op_private & ~OPpARG4_MASK));
+
+ pm = cUNOPo->op_first;
+ if (pm->op_type != OP_PUSHMARK) /* weird coreargs stuff */
+ return FALSE;
+ constop = OpSIBLING(pm);
+ if (!constop || constop->op_type != OP_CONST)
+ return FALSE;
+ sv = cSVOPx_sv(constop);
+ if (SvMAGICAL(sv) || !SvPOK(sv))
+ return FALSE;
+
+ s = SvPV(sv, cur);
+ e = s + cur;
+
+ /* Scan format for %% and %s and work out how many %s there are.
+ * Abandon if other format types are found.
+ */
+
+ nformats = 0;
+ total_len = 0;
+ variant = 0;
+
+ for (p = s; p < e; p++) {
+ if (*p != '%') {
+ total_len++;
+ if (UTF8_IS_INVARIANT(*p))
+ variant++;
+ continue;
+ }
+ p++;
+ if (p >= e)
+ return FALSE; /* lone % at end gives "Invalid conversion" */
+ if (*p == '%')
+ total_len++;
+ else if (*p == 's')
+ nformats++;
+ else
+ return FALSE;
+ }
+
+ if (!nformats || nformats > PERL_MULTICONCAT_MAXARG)
+ return FALSE;
+
+ utf8 = cBOOL(SvUTF8(sv));
+ if (utf8)
+ variant = 0;
+
+ /* scan args; they must all be in scalar cxt */
+
+ nargs = 0;
+ kid = OpSIBLING(constop);
+
+ while (kid) {
+ if ((kid->op_flags & OPf_WANT) != OPf_WANT_SCALAR)
+ return FALSE;
+ nargs++;
+ kid = OpSIBLING(kid);
+ }
+
+ if (nargs != nformats)
+ return FALSE; /* e.g. sprintf("%s%s", $a); */
+
+
+ info->nargs = nargs;
+ info->start = s;
+ info->end = e;
+ info->total_len = total_len;
+ info->variant = variant;
+ info->utf8 = utf8;
+
+ return TRUE;
+}
+
+
+
+/* S_maybe_multiconcat():
+ *
+ * given an OP_STRINGIFY, OP_SASSIGN, OP_CONCAT or OP_SPRINTF op, possibly
+ * convert it (and its children) into an OP_MULTICONCAT. See the code
+ * comments just before pp_multiconcat() for the full details of what
+ * OP_MULTICONCAT supports.
+ *
+ * Basically we're looking for an optree with a chain of OP_CONCATS down
+ * the LHS (or an OP_SPRINTF), with possibly an OP_SASSIGN, and/or
+ * OP_STRINGIFY, and/or OP_CONCAT acting as '.=' at its head, e.g.
+ *
+ * $x = "$a$b-$c"
+ *
+ * looks like
+ *
+ * SASSIGN
+ * |
+ * STRINGIFY -- PADSV[$x]
+ * |
+ * |
+ * ex-PUSHMARK -- CONCAT/S
+ * |
+ * CONCAT/S -- PADSV[$d]
+ * |
+ * CONCAT -- CONST["-"]
+ * |
+ * PADSV[$a] -- PADSV[$b]
+ *
+ * Note that at this stage the OP_SASSIGN may have already been optimised
+ * away with OPpTARGET_MY set on the OP_STRINGIFY or OP_CONCAT.
+ */
+
+STATIC void
+S_maybe_multiconcat(pTHX_ OP *o)
+{
+ OP *lastkidop; /* the right-most of any kids unshifted onto o */
+ OP *topop; /* the top-most op in the concat tree (often equals o,
+ unless there are assign/stringify ops above it */
+ OP *parentop; /* the parent op of topop (or itself if no parent) */
+ OP *targmyop; /* the op (if any) with the OPpTARGET_MY flag */
+ OP *targetop; /* the op corresponding to target=... or target.=... */
+ OP *stringop; /* the OP_STRINGIFY op, if any */
+ OP *nextop; /* used for recreating the op_next chain without consts */
+ OP *kid; /* general-purpose op pointer */
+ UNOP_AUX_item *aux;
+ UNOP_AUX_item *lenp;
+ char *const_str, *p;
+ struct sprintf_ismc_info sprintf_info;
+
+ /* store info about each arg in args[];
+ * toparg is the highest used slot; argp is a general
+ * pointer to args[] slots */
+ struct {
+ void *p; /* initially points to const sv (or null for op);
+ later, set to SvPV(constsv), with ... */
+ STRLEN len; /* ... len set to SvPV(..., len) */
+ } *argp, *toparg, args[PERL_MULTICONCAT_MAXARG*2 + 1];
+
+ UV nargs = 0;
+ UV nconst = 0;
+ STRLEN variant;
+ bool utf8 = FALSE;
+ bool kid_is_last = FALSE; /* most args will be the RHS kid of a concat op;
+ the last-processed arg will the LHS of one,
+ as args are processed in reverse order */
+ U8 stacked_last = 0; /* whether the last seen concat op was STACKED */
+ STRLEN total_len = 0; /* sum of the lengths of the const segments */
+ U8 flags = 0; /* what will become the op_flags and ... */
+ U8 private_flags = 0; /* ... op_private of the multiconcat op */
+ bool is_sprintf = FALSE; /* we're optimising an sprintf */
+ bool is_targable = FALSE; /* targetop is an OPpTARGET_MY candidate */
+
+ /* -----------------------------------------------------------------
+ * Phase 1:
+ *
+ * Examine the optree non-destructively to determine whether it's
+ * suitable to be converted into an OP_MULTICONCAT. Accumulate
+ * information about the optree in args[].
+ */
+
+ argp = args;
+ targmyop = NULL;
+ targetop = NULL;
+ stringop = NULL;
+ topop = o;
+ parentop = o;
+
+ assert( o->op_type == OP_SASSIGN
+ || o->op_type == OP_CONCAT
+ || o->op_type == OP_SPRINTF
+ || o->op_type == OP_STRINGIFY);
+
+ /* first see if, at the top of the tree, there is an assign,
+ * append and/or stringify */
+
+ if (topop->op_type == OP_SASSIGN) {
+ /* expr = ..... */
+ if (o->op_ppaddr != PL_ppaddr[OP_SASSIGN])
+ return;
+ if (o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV))
+ return;
+ assert(!(o->op_private & ~OPpARG2_MASK)); /* barf on unknown flags */
+
+ parentop = topop;
+ topop = cBINOPo->op_first;
+ targetop = OpSIBLING(topop);
+ if (!targetop) /* probably some sort of syntax error */
+ return;
+ }
+ else if ( topop->op_type == OP_CONCAT
+ && (topop->op_flags & OPf_STACKED)
+ && (cUNOPo->op_first->op_flags & OPf_MOD))
+ {
+ /* expr .= ..... */
+
+ /* OPpTARGET_MY shouldn't be able to be set here. If it is,
+ * decide what to do about it */
+ assert(!(o->op_private & OPpTARGET_MY));
+
+ /* barf on unknown flags */
+ assert(!(o->op_private & ~(OPpARG2_MASK|OPpTARGET_MY)));
+ private_flags |= OPpMULTICONCAT_APPEND;
+ targetop = cBINOPo->op_first;
+ parentop = topop;
+ topop = OpSIBLING(targetop);
+
+ /* $x .= <FOO> gets optimised to rcatline instead */
+ if (topop->op_type == OP_READLINE)
+ return;
+ }
+
+ if (targetop) {
+ /* Can targetop (the LHS) if it's a padsv, be be optimised
+ * away and use OPpTARGET_MY instead?
+ */
+ if ( (targetop->op_type == OP_PADSV)
+ && !(targetop->op_private & OPpDEREF)
+ && !(targetop->op_private & OPpPAD_STATE)
+ /* we don't support 'my $x .= ...' */
+ && ( o->op_type == OP_SASSIGN
+ || !(targetop->op_private & OPpLVAL_INTRO))
+ )
+ is_targable = TRUE;
+ }
+
+ if (topop->op_type == OP_STRINGIFY) {
+ if (topop->op_ppaddr != PL_ppaddr[OP_STRINGIFY])
+ return;
+ stringop = topop;
+
+ /* barf on unknown flags */
+ assert(!(o->op_private & ~(OPpARG4_MASK|OPpTARGET_MY)));
+
+ if ((topop->op_private & OPpTARGET_MY)) {
+ if (o->op_type == OP_SASSIGN)
+ return; /* can't have two assigns */
+ targmyop = topop;
+ }
+
+ private_flags |= OPpMULTICONCAT_STRINGIFY;
+ parentop = topop;
+ topop = cBINOPx(topop)->op_first;
+ assert(OP_TYPE_IS_OR_WAS_NN(topop, OP_PUSHMARK));
+ topop = OpSIBLING(topop);
+ }
+
+ if (topop->op_type == OP_SPRINTF) {
+ if (topop->op_ppaddr != PL_ppaddr[OP_SPRINTF])
+ return;
+ if (S_sprintf_is_multiconcatable(aTHX_ topop, &sprintf_info)) {
+ nargs = sprintf_info.nargs;
+ total_len = sprintf_info.total_len;
+ variant = sprintf_info.variant;
+ utf8 = sprintf_info.utf8;
+ is_sprintf = TRUE;
+ private_flags |= OPpMULTICONCAT_FAKE;
+ toparg = argp;
+ /* we have an sprintf op rather than a concat optree.
+ * Skip most of the code below which is associated with
+ * processing that optree. We also skip phase 2, determining
+ * whether its cost effective to optimise, since for sprintf,
+ * multiconcat is *always* faster */
+ goto create_aux;
+ }
+ /* note that even if the sprintf itself isn't multiconcatable,
+ * the expression as a whole may be, e.g. in
+ * $x .= sprintf("%d",...)
+ * the sprintf op will be left as-is, but the concat/S op may
+ * be upgraded to multiconcat
+ */
+ }
+ else if (topop->op_type == OP_CONCAT) {
+ if (topop->op_ppaddr != PL_ppaddr[OP_CONCAT])
+ return;
+
+ if ((topop->op_private & OPpTARGET_MY)) {
+ if (o->op_type == OP_SASSIGN || targmyop)
+ return; /* can't have two assigns */
+ targmyop = topop;
+ }
+ }
+
+ /* Is it safe to convert a sassign/stringify/concat op into
+ * a multiconcat? */
+ assert((PL_opargs[OP_SASSIGN] & OA_CLASS_MASK) == OA_BINOP);
+ assert((PL_opargs[OP_CONCAT] & OA_CLASS_MASK) == OA_BINOP);
+ assert((PL_opargs[OP_STRINGIFY] & OA_CLASS_MASK) == OA_LISTOP);
+ assert((PL_opargs[OP_SPRINTF] & OA_CLASS_MASK) == OA_LISTOP);
+ STATIC_ASSERT_STMT( STRUCT_OFFSET(BINOP, op_last)
+ == STRUCT_OFFSET(UNOP_AUX, op_aux));
+ STATIC_ASSERT_STMT( STRUCT_OFFSET(LISTOP, op_last)
+ == STRUCT_OFFSET(UNOP_AUX, op_aux));
+
+ /* Now scan the down the tree looking for a series of
+ * CONCAT/OPf_STACKED ops on the LHS (with the last one not
+ * stacked). For example this tree:
+ *
+ * |
+ * CONCAT/STACKED
+ * |
+ * CONCAT/STACKED -- EXPR5
+ * |
+ * CONCAT/STACKED -- EXPR4
+ * |
+ * CONCAT -- EXPR3
+ * |
+ * EXPR1 -- EXPR2
+ *
+ * corresponds to an expression like
+ *
+ * (EXPR1 . EXPR2 . EXPR3 . EXPR4 . EXPR5)
+ *
+ * Record info about each EXPR in args[]: in particular, whether it is
+ * a stringifiable OP_CONST and if so what the const sv is.
+ *
+ * The reason why the last concat can't be STACKED is the difference
+ * between
+ *
+ * ((($a .= $a) .= $a) .= $a) .= $a
+ *
+ * and
+ * $a . $a . $a . $a . $a
+ *
+ * The main difference between the optrees for those two constructs
+ * is the presence of the last STACKED. As well as modifying $a,
+ * the former sees the changed $a between each concat, so if $s is
+ * initially 'a', the first returns 'a' x 16, while the latter returns
+ * 'a' x 5. And pp_multiconcat can't handle that kind of thing.
+ */
+
+ kid = topop;
+
+ for (;;) {
+ OP *argop;
+ SV *sv;
+ bool last = FALSE;
+
+ if ( kid->op_type == OP_CONCAT
+ && !kid_is_last
+ ) {
+ OP *k1, *k2;
+ k1 = cUNOPx(kid)->op_first;
+ k2 = OpSIBLING(k1);
+ /* shouldn't happen except maybe after compile err? */
+ if (!k2)
+ return;
+
+ /* avoid turning (A . B . ($lex = C) ...) into (A . B . C ...) */
+ if (kid->op_private & OPpTARGET_MY)
+ kid_is_last = TRUE;
+
+ stacked_last = (kid->op_flags & OPf_STACKED);
+ if (!stacked_last)
+ kid_is_last = TRUE;
+
+ kid = k1;
+ argop = k2;
+ }
+ else {
+ argop = kid;
+ last = TRUE;
+ }
+
+ if ( nargs > PERL_MULTICONCAT_MAXARG - 2
+ || (argp - args + 1) > (PERL_MULTICONCAT_MAXARG*2 + 1) - 2)
+ {
+ /* At least two spare slots are needed to decompose both
+ * concat args. If there are no slots left, continue to
+ * examine the rest of the optree, but don't push new values
+ * on args[]. If the optree as a whole is legal for conversion
+ * (in particular that the last concat isn't STACKED), then
+ * the first PERL_MULTICONCAT_MAXARG elements of the optree
+ * can be converted into an OP_MULTICONCAT now, with the first
+ * child of that op being the remainder of the optree -
+ * which may itself later be converted to a multiconcat op
+ * too.
+ */
+ if (last) {
+ /* the last arg is the rest of the optree */
+ argp++->p = NULL;
+ nargs++;
+ }
+ }
+ else if ( argop->op_type == OP_CONST
+ && ((sv = cSVOPx_sv(argop)))
+ /* defer stringification until runtime of 'constant'
+ * things that might stringify variantly, e.g. the radix
+ * point of NVs, or overloaded RVs */
+ && (SvPOK(sv) || SvIOK(sv))
+ && (!SvGMAGICAL(sv))
+ ) {
+ argp++->p = sv;
+ utf8 |= cBOOL(SvUTF8(sv));
+ nconst++;
+ }
+ else {
+ argp++->p = NULL;
+ nargs++;
+ }
+
+ if (last)
+ break;
+ }
+
+ toparg = argp - 1;
+
+ if (stacked_last)
+ return; /* we don't support ((A.=B).=C)...) */
+
+ /* -----------------------------------------------------------------
+ * Phase 2:
+ *
+ * At this point we have determined that the optree *can* be converted
+ * into a multiconcat. Having gathered all the evidence, we now decide
+ * whether it *should*.
+ */
+
+
+ /* we need at least one concat action, e.g.:
+ *
+ * Y . Z
+ * X = Y . Z
+ * X .= Y
+ *
+ * otherwise we could be doing something like $x = "foo", which
+ * if treated as as a concat, would fail to COW.
+ */
+ if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 2)
+ return;
+
+ /* Benchmarking seems to indicate that we gain if:
+ * * we optimise at least two actions into a single multiconcat
+ * (e.g concat+concat, sassign+concat);
+ * * or if we can eliminate at least 1 OP_CONST;
+ * * or if we can eliminate a padsv via OPpTARGET_MY
+ */
+
+ if (
+ /* eliminated at least one OP_CONST */
+ nconst >= 1
+ /* eliminated an OP_SASSIGN */
+ || o->op_type == OP_SASSIGN
+ /* eliminated an OP_PADSV */
+ || (!targmyop && is_targable)
+ )
+ /* definitely a net gain to optimise */
+ goto optimise;
+
+ /* ... if not, what else? */
+
+ /* special-case '$lex1 = expr . $lex1' (where expr isn't lex1):
+ * multiconcat is faster (due to not creating a temporary copy of
+ * $lex1), whereas for a general $lex1 = $lex2 . $lex3, concat is
+ * faster.
+ */
+ if ( nconst == 0
+ && nargs == 2
+ && targmyop
+ && topop->op_type == OP_CONCAT
+ ) {
+ PADOFFSET t = targmyop->op_targ;
+ OP *k1 = cBINOPx(topop)->op_first;
+ OP *k2 = cBINOPx(topop)->op_last;
+ if ( k2->op_type == OP_PADSV
+ && k2->op_targ == t
+ && ( k1->op_type != OP_PADSV
+ || k1->op_targ != t)
+ )
+ goto optimise;
+ }
+
+ /* need at least two concats */
+ if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 3)
+ return;
+
+
+
+ /* -----------------------------------------------------------------
+ * Phase 3:
+ *
+ * At this point the optree has been verified as ok to be optimised
+ * into an OP_MULTICONCAT. Now start changing things.
+ */
+
+ optimise:
+
+ /* stringify all const args and determine utf8ness */
+
+ variant = 0;
+ for (argp = args; argp <= toparg; argp++) {
+ SV *sv = (SV*)argp->p;
+ if (!sv)
+ continue; /* not a const op */
+ if (utf8 && !SvUTF8(sv))
+ sv_utf8_upgrade_nomg(sv);
+ argp->p = SvPV_nomg(sv, argp->len);
+ total_len += argp->len;
+
+ /* see if any strings would grow if converted to utf8 */
+ if (!utf8) {
+ char *p = (char*)argp->p;
+ STRLEN len = argp->len;
+ while (len--) {
+ U8 c = *p++;
+ if (!UTF8_IS_INVARIANT(c))
+ variant++;
+ }
+ }
+ }
+
+ /* create and populate aux struct */
+
+ create_aux:
+
+ aux = (UNOP_AUX_item*)PerlMemShared_malloc(
+ sizeof(UNOP_AUX_item)
+ * (
+ PERL_MULTICONCAT_HEADER_SIZE
+ + ((nargs + 1) * (variant ? 2 : 1))
+ )
+ );
+ const_str = (char *)PerlMemShared_malloc(total_len);
+
+ /* Extract all the non-const expressions from the concat tree then
+ * dispose of the old tree, e.g. convert the tree from this:
+ *
+ * o => SASSIGN
+ * |
+ * STRINGIFY -- TARGET
+ * |
+ * ex-PUSHMARK -- CONCAT
+ * |
+ * CONCAT -- EXPR5
+ * |
+ * CONCAT -- EXPR4
+ * |
+ * CONCAT -- EXPR3
+ * |
+ * EXPR1 -- EXPR2
+ *
+ *
+ * to:
+ *
+ * o => MULTICONCAT
+ * |
+ * ex-PUSHMARK -- EXPR1 -- EXPR2 -- EXPR3 -- EXPR4 -- EXPR5 -- TARGET
+ *
+ * except that if EXPRi is an OP_CONST, it's discarded.
+ *
+ * During the conversion process, EXPR ops are stripped from the tree
+ * and unshifted onto o. Finally, any of o's remaining original
+ * childen are discarded and o is converted into an OP_MULTICONCAT.
+ *
+ * In this middle of this, o may contain both: unshifted args on the
+ * left, and some remaining original args on the right. lastkidop
+ * is set to point to the right-most unshifted arg to delineate
+ * between the two sets.
+ */
+
+
+ if (is_sprintf) {
+ /* create a copy of the format with the %'s removed, and record
+ * the sizes of the const string segments in the aux struct */
+ char *q, *oldq;
+ lenp = aux + PERL_MULTICONCAT_IX_LENGTHS;
+
+ p = sprintf_info.start;
+ q = const_str;
+ oldq = q;
+ for (; p < sprintf_info.end; p++) {
+ if (*p == '%') {
+ p++;
+ if (*p != '%') {
+ (lenp++)->uv = q - oldq;
+ oldq = q;
+ continue;
+ }
+ }
+ *q++ = *p;
+ }
+ lenp->uv = q - oldq;
+ assert((STRLEN)(q - const_str) == total_len);
+
+ /* Attach all the args (i.e. the kids of the sprintf) to o (which
+ * may or may not be topop) The pushmark and const ops need to be
+ * kept in case they're an op_next entry point.
+ */
+ lastkidop = cLISTOPx(topop)->op_last;
+ kid = cUNOPx(topop)->op_first; /* pushmark */
+ op_null(kid);
+ op_null(OpSIBLING(kid)); /* const */
+ if (o != topop) {
+ kid = op_sibling_splice(topop, NULL, -1, NULL); /* cut all args */
+ op_sibling_splice(o, NULL, 0, kid); /* and attach to o */
+ lastkidop->op_next = o;
+ }
+ }
+ else {
+ p = const_str;
+ lenp = aux + PERL_MULTICONCAT_IX_LENGTHS;
+
+ lenp->size = -1;
+
+ /* Concatenate all const strings into const_str.
+ * Note that args[] contains the RHS args in reverse order, so
+ * we scan args[] from top to bottom to get constant strings
+ * in L-R order
+ */
+ for (argp = toparg; argp >= args; argp--) {
+ if (!argp->p)
+ /* not a const op */
+ (++lenp)->size = -1;
+ else {
+ STRLEN l = argp->len;
+ Copy(argp->p, p, l, char);
+ p += l;
+ if (lenp->size == -1)
+ lenp->size = l;
+ else
+ lenp->size += l;
+ }
+ }
+
+ kid = topop;
+ nextop = o;
+ lastkidop = NULL;
+
+ for (argp = args; argp <= toparg; argp++) {
+ /* only keep non-const args, except keep the first-in-next-chain
+ * arg no matter what it is (but nulled if OP_CONST), because it
+ * may be the entry point to this subtree from the previous
+ * op_next.
+ */
+ bool last = (argp == toparg);
+ OP *prev;
+
+ /* set prev to the sibling *before* the arg to be cut out,
+ * e.g.:
+ *
+ * |
+ * kid= CONST
+ * |
+ * prev= CONST -- EXPR
+ * |
+ */
+ if (argp == args && kid->op_type != OP_CONCAT) {
+ /* in e.g. '$x . = f(1)' there's no RHS concat tree
+ * so the expression to be cut isn't kid->op_last but
+ * kid itself */
+ OP *o1, *o2;
+ /* find the op before kid */
+ o1 = NULL;
+ o2 = cUNOPx(parentop)->op_first;
+ while (o2 && o2 != kid) {
+ o1 = o2;
+ o2 = OpSIBLING(o2);
+ }
+ assert(o2 == kid);
+ prev = o1;
+ kid = parentop;
+ }
+ else if (kid == o && lastkidop)
+ prev = last ? lastkidop : OpSIBLING(lastkidop);
+ else
+ prev = last ? NULL : cUNOPx(kid)->op_first;
+
+ if (!argp->p || last) {
+ /* cut RH op */
+ OP *aop = op_sibling_splice(kid, prev, 1, NULL);
+ /* and unshift to front of o */
+ op_sibling_splice(o, NULL, 0, aop);
+ /* record the right-most op added to o: later we will
+ * free anything to the right of it */
+ if (!lastkidop)
+ lastkidop = aop;
+ aop->op_next = nextop;
+ if (last) {
+ if (argp->p)
+ /* null the const at start of op_next chain */
+ op_null(aop);
+ }
+ else if (prev)
+ nextop = prev->op_next;
+ }
+
+ /* the last two arguments are both attached to the same concat op */
+ if (argp < toparg - 1)
+ kid = prev;
+ }
+ }
+
+ /* Populate the aux struct */
+
+ aux[PERL_MULTICONCAT_IX_NARGS].uv = nargs;
+ aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv = utf8 ? NULL : const_str;
+ aux[PERL_MULTICONCAT_IX_PLAIN_LEN].size = utf8 ? 0 : total_len;
+ aux[PERL_MULTICONCAT_IX_UTF8_PV].pv = const_str;
+ aux[PERL_MULTICONCAT_IX_UTF8_LEN].size = total_len;
+
+ /* if variant > 0, calculate a variant const string and lengths where
+ * the utf8 version of the string will take 'variant' more bytes than
+ * the plain one. */
+
+ if (variant) {
+ char *p = const_str;
+ STRLEN ulen = total_len + variant;
+ UNOP_AUX_item *lens = aux + PERL_MULTICONCAT_IX_LENGTHS;
+ UNOP_AUX_item *ulens = lens + (nargs + 1);
+ char *up = (char*)PerlMemShared_malloc(ulen);
+ UV n;
+
+ aux[PERL_MULTICONCAT_IX_UTF8_PV].pv = up;
+ aux[PERL_MULTICONCAT_IX_UTF8_LEN].size = ulen;
+
+ for (n = 0; n < (nargs + 1); n++) {
+ SSize_t l, ul, i;
+ l = ul = (lens++)->size;
+ for (i = 0; i < l; i++) {
+ U8 c = *p++;
+ if (UTF8_IS_INVARIANT(c))
+ *up++ = c;
+ else {
+ *up++ = UTF8_EIGHT_BIT_HI(c);
+ *up++ = UTF8_EIGHT_BIT_LO(c);
+ ul++;
+ }
+ }
+ (ulens++)->size = ul;
+ }
+ }
+
+ if (stringop) {
+ /* if there was a top(ish)-level OP_STRINGIFY, we need to keep
+ * that op's first child - an ex-PUSHMARK - because the op_next of
+ * the previous op may point to it (i.e. it's the entry point for
+ * the o optree)
+ */
+ OP *pmop =
+ (stringop == o)
+ ? op_sibling_splice(o, lastkidop, 1, NULL)
+ : op_sibling_splice(stringop, NULL, 1, NULL);
+ assert(OP_TYPE_IS_OR_WAS_NN(pmop, OP_PUSHMARK));
+ op_sibling_splice(o, NULL, 0, pmop);
+ if (!lastkidop)
+ lastkidop = pmop;
+ }
+
+ /* Optimise
+ * target = A.B.C...
+ * target .= A.B.C...
+ */
+
+ if (targetop) {
+ assert(!targmyop);
+
+ if (o->op_type == OP_SASSIGN) {
+ /* Move the target subtree from being the last of o's children
+ * to being the last of o's preserved children.
+ * Note the difference between 'target = ...' and 'target .= ...':
+ * for the former, target is executed last; for the latter,
+ * first.
+ */
+ kid = OpSIBLING(lastkidop);
+ op_sibling_splice(o, kid, 1, NULL); /* cut target op */
+ op_sibling_splice(o, lastkidop, 0, targetop); /* and paste */
+ lastkidop->op_next = kid->op_next;
+ lastkidop = targetop;
+ }
+ else {
+ /* Move the target subtree from being the first of o's
+ * original children to being the first of *all* o's children.
+ */
+ if (lastkidop) {
+ op_sibling_splice(o, lastkidop, 1, NULL); /* cut target op */
+ op_sibling_splice(o, NULL, 0, targetop); /* and paste*/
+ }
+ else {
+ /* if the RHS of .= doesn't contain a concat (e.g.
+ * $x .= "foo"), it gets missed by the "strip ops from the
+ * tree and add to o" loop earlier */
+ assert(topop->op_type != OP_CONCAT);
+ if (stringop) {
+ /* in e.g. $x .= "$y", move the $y expression
+ * from being a child of OP_STRINGIFY to being the
+ * second child of the OP_CONCAT
+ */
+ assert(cUNOPx(stringop)->op_first == topop);
+ op_sibling_splice(stringop, NULL, 1, NULL);
+ op_sibling_splice(o, cUNOPo->op_first, 0, topop);
+ }
+ assert(topop == OpSIBLING(cBINOPo->op_first));
+ if (toparg->p)
+ op_null(topop);
+ lastkidop = topop;
+ }
+ }
+
+ if (is_targable) {
+ /* optimise
+ * my $lex = A.B.C...
+ * $lex = A.B.C...
+ * $lex .= A.B.C...
+ * The original padsv op is kept but nulled in case it's the
+ * entry point for the optree (which it will be for
+ * '$lex .= ... '
+ */
+ private_flags |= OPpTARGET_MY;
+ private_flags |= (targetop->op_private & OPpLVAL_INTRO);
+ o->op_targ = targetop->op_targ;
+ targetop->op_targ = 0;
+ op_null(targetop);
+ }
+ else
+ flags |= OPf_STACKED;
+ }
+ else if (targmyop) {
+ private_flags |= OPpTARGET_MY;
+ if (o != targmyop) {
+ o->op_targ = targmyop->op_targ;
+ targmyop->op_targ = 0;
+ }
+ }
+
+ /* detach the emaciated husk of the sprintf/concat optree and free it */
+ for (;;) {
+ kid = op_sibling_splice(o, lastkidop, 1, NULL);
+ if (!kid)
+ break;
+ op_free(kid);
+ }
+
+ /* and convert o into a multiconcat */
+
+ o->op_flags = (flags|OPf_KIDS|stacked_last
+ |(o->op_flags & (OPf_WANT|OPf_PARENS)));
+ o->op_private = private_flags;
+ o->op_type = OP_MULTICONCAT;
+ o->op_ppaddr = PL_ppaddr[OP_MULTICONCAT];
+ cUNOP_AUXo->op_aux = aux;
+}
+
/* do all the final processing on an optree (e.g. running the peephole
* optimiser on it), then attach it to cv (if cv is non-null)
@@ -2549,6 +3442,13 @@ S_optimize_op(pTHX_ OP* o)
break;
+ case OP_CONCAT:
+ case OP_SASSIGN:
+ case OP_STRINGIFY:
+ case OP_SPRINTF:
+ S_maybe_multiconcat(aTHX_ o);
+ break;
+
case OP_SUBST:
if (cPMOPo->op_pmreplrootu.op_pmreplroot)
optimize_op(cPMOPo->op_pmreplrootu.op_pmreplroot);
@@ -9903,6 +10803,7 @@ Perl_ck_concat(pTHX_ OP *o)
PERL_ARGS_ASSERT_CK_CONCAT;
PERL_UNUSED_CONTEXT;
+ /* reuse the padtmp returned by the concat child */
if (kid->op_type == OP_CONCAT && !(kid->op_private & OPpTARGET_MY) &&
!(kUNOP->op_first->op_flags & OPf_MOD))
o->op_flags |= OPf_STACKED;
@@ -10858,6 +11759,7 @@ Perl_ck_sassign(pTHX_ OP *o)
return S_maybe_targlex(aTHX_ o);
}
+
OP *
Perl_ck_match(pTHX_ OP *o)
{
diff --git a/op.h b/op.h
index ef85148bfd..513fc655fd 100644
--- a/op.h
+++ b/op.h
@@ -188,6 +188,8 @@ typedef union {
SV *sv;
IV iv;
UV uv;
+ char *pv;
+ SSize_t size;
} UNOP_AUX_item;
#ifdef USE_ITHREADS
diff --git a/opcode.h b/opcode.h
index d3ff17c1a0..10e68168de 100644
--- a/opcode.h
+++ b/opcode.h
@@ -213,6 +213,7 @@ EXTCONST char* const PL_op_name[] = {
"subtract",
"i_subtract",
"concat",
+ "multiconcat",
"stringify",
"left_shift",
"right_shift",
@@ -617,6 +618,7 @@ EXTCONST char* const PL_op_desc[] = {
"subtraction (-)",
"integer subtraction (-)",
"concatenation (.) or string",
+ "concatenation (.) or string",
"string",
"left bitshift (<<)",
"right bitshift (>>)",
@@ -1033,6 +1035,7 @@ EXT Perl_ppaddr_t PL_ppaddr[] /* or perlvars.h */
Perl_pp_subtract,
Perl_pp_i_subtract,
Perl_pp_concat,
+ Perl_pp_multiconcat,
Perl_pp_stringify,
Perl_pp_left_shift,
Perl_pp_right_shift,
@@ -1445,6 +1448,7 @@ EXT Perl_check_t PL_check[] /* or perlvars.h */
Perl_ck_null, /* subtract */
Perl_ck_null, /* i_subtract */
Perl_ck_concat, /* concat */
+ Perl_ck_null, /* multiconcat */
Perl_ck_stringify, /* stringify */
Perl_ck_bitop, /* left_shift */
Perl_ck_bitop, /* right_shift */
@@ -1853,6 +1857,7 @@ EXTCONST U32 PL_opargs[] = {
0x0001123e, /* subtract */
0x0001121e, /* i_subtract */
0x0001121e, /* concat */
+ 0x00000f1c, /* multiconcat */
0x0000141e, /* stringify */
0x0001121e, /* left_shift */
0x0001121e, /* right_shift */
@@ -2232,6 +2237,7 @@ END_EXTERN_C
#define OPpITER_DEF 0x08
#define OPpLVREF_ITER 0x08
#define OPpMAYBE_LVSUB 0x08
+#define OPpMULTICONCAT_STRINGIFY 0x08
#define OPpREVERSE_INPLACE 0x08
#define OPpSORT_INPLACE 0x08
#define OPpSPLIT_LEX 0x08
@@ -2258,6 +2264,7 @@ END_EXTERN_C
#define OPpKVSLICE 0x20
#define OPpLVREF_HV 0x20
#define OPpMAY_RETURN_CONSTANT 0x20
+#define OPpMULTICONCAT_FAKE 0x20
#define OPpMULTIDEREF_DELETE 0x20
#define OPpOPEN_IN_CRLF 0x20
#define OPpSORT_QSORT 0x20
@@ -2278,6 +2285,7 @@ END_EXTERN_C
#define OPpINDEX_BOOLNEG 0x40
#define OPpLIST_GUESSED 0x40
#define OPpLVAL_DEFER 0x40
+#define OPpMULTICONCAT_APPEND 0x40
#define OPpOPEN_OUT_RAW 0x40
#define OPpOUR_INTRO 0x40
#define OPpPAD_STATE 0x40
@@ -2326,6 +2334,7 @@ EXTCONST char PL_op_private_labels[] = {
'<','U','T','F','\0',
'>','U','T','F','\0',
'A','M','P','E','R','\0',
+ 'A','P','P','E','N','D','\0',
'A','S','S','I','G','N','\0',
'A','V','\0',
'B','A','R','E','\0',
@@ -2404,6 +2413,7 @@ EXTCONST char PL_op_private_labels[] = {
'S','T','A','B','L','E','\0',
'S','T','A','T','E','\0',
'S','T','R','I','C','T','\0',
+ 'S','T','R','I','N','G','I','F','Y','\0',
'S','U','B','\0',
'S','V','\0',
'T','A','R','G','\0',
@@ -2432,14 +2442,14 @@ EXTCONST char PL_op_private_labels[] = {
EXTCONST I16 PL_op_private_bitfields[] = {
0, 8, -1,
0, 8, -1,
- 0, 565, -1,
+ 0, 582, -1,
0, 8, -1,
0, 8, -1,
- 0, 572, -1,
- 0, 561, -1,
- 1, -1, 0, 529, 1, 33, 2, 283, -1,
- 4, -1, 1, 164, 2, 171, 3, 178, -1,
- 4, -1, 0, 529, 1, 33, 2, 283, 3, 110, -1,
+ 0, 589, -1,
+ 0, 578, -1,
+ 1, -1, 0, 546, 1, 40, 2, 290, -1,
+ 4, -1, 1, 171, 2, 178, 3, 185, -1,
+ 4, -1, 0, 546, 1, 40, 2, 290, 3, 117, -1,
};
@@ -2515,7 +2525,8 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = {
80, /* subtract */
80, /* i_subtract */
80, /* concat */
- 84, /* stringify */
+ 84, /* multiconcat */
+ 90, /* stringify */
80, /* left_shift */
80, /* right_shift */
12, /* lt */
@@ -2555,11 +2566,11 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = {
75, /* ncomplement */
75, /* scomplement */
12, /* smartmatch */
- 84, /* atan2 */
+ 90, /* atan2 */
75, /* sin */
75, /* cos */
- 84, /* rand */
- 84, /* srand */
+ 90, /* rand */
+ 90, /* srand */
75, /* exp */
75, /* log */
75, /* sqrt */
@@ -2567,97 +2578,97 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = {
75, /* hex */
75, /* oct */
75, /* abs */
- 86, /* length */
- 89, /* substr */
- 92, /* vec */
- 94, /* index */
- 94, /* rindex */
+ 92, /* length */
+ 95, /* substr */
+ 98, /* vec */
+ 100, /* index */
+ 100, /* rindex */
52, /* sprintf */
52, /* formline */
75, /* ord */
75, /* chr */
- 84, /* crypt */
+ 90, /* crypt */
0, /* ucfirst */
0, /* lcfirst */
0, /* uc */
0, /* lc */
0, /* quotemeta */
- 98, /* rv2av */
- 105, /* aelemfast */
- 105, /* aelemfast_lex */
- 106, /* aelem */
- 111, /* aslice */
- 114, /* kvaslice */
+ 104, /* rv2av */
+ 111, /* aelemfast */
+ 111, /* aelemfast_lex */
+ 112, /* aelem */
+ 117, /* aslice */
+ 120, /* kvaslice */
0, /* aeach */
0, /* avalues */
40, /* akeys */
0, /* each */
40, /* values */
40, /* keys */
- 115, /* delete */
- 119, /* exists */
- 121, /* rv2hv */
- 106, /* helem */
- 111, /* hslice */
- 114, /* kvhslice */
- 129, /* multideref */
+ 121, /* delete */
+ 125, /* exists */
+ 127, /* rv2hv */
+ 112, /* helem */
+ 117, /* hslice */
+ 120, /* kvhslice */
+ 135, /* multideref */
52, /* unpack */
52, /* pack */
- 136, /* split */
+ 142, /* split */
52, /* join */
- 141, /* list */
+ 147, /* list */
12, /* lslice */
52, /* anonlist */
52, /* anonhash */
52, /* splice */
- 84, /* push */
+ 90, /* push */
0, /* pop */
0, /* shift */
- 84, /* unshift */
- 143, /* sort */
- 151, /* reverse */
+ 90, /* unshift */
+ 149, /* sort */
+ 157, /* reverse */
0, /* grepstart */
- 153, /* grepwhile */
+ 159, /* grepwhile */
0, /* mapstart */
0, /* mapwhile */
0, /* range */
- 155, /* flip */
- 155, /* flop */
+ 161, /* flip */
+ 161, /* flop */
0, /* and */
0, /* or */
12, /* xor */
0, /* dor */
- 157, /* cond_expr */
+ 163, /* cond_expr */
0, /* andassign */
0, /* orassign */
0, /* dorassign */
- 159, /* entersub */
- 166, /* leavesub */
- 166, /* leavesublv */
+ 165, /* entersub */
+ 172, /* leavesub */
+ 172, /* leavesublv */
0, /* argcheck */
- 168, /* argelem */
+ 174, /* argelem */
0, /* argdefelem */
- 170, /* caller */
+ 176, /* caller */
52, /* warn */
52, /* die */
52, /* reset */
-1, /* lineseq */
- 172, /* nextstate */
- 172, /* dbstate */
+ 178, /* nextstate */
+ 178, /* dbstate */
-1, /* unstack */
-1, /* enter */
- 173, /* leave */
+ 179, /* leave */
-1, /* scope */
- 175, /* enteriter */
- 179, /* iter */
+ 181, /* enteriter */
+ 185, /* iter */
-1, /* enterloop */
- 180, /* leaveloop */
+ 186, /* leaveloop */
-1, /* return */
- 182, /* last */
- 182, /* next */
- 182, /* redo */
- 182, /* dump */
- 182, /* goto */
+ 188, /* last */
+ 188, /* next */
+ 188, /* redo */
+ 188, /* dump */
+ 188, /* goto */
52, /* exit */
0, /* method */
0, /* method_named */
@@ -2670,7 +2681,7 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = {
0, /* leavewhen */
-1, /* break */
-1, /* continue */
- 184, /* open */
+ 190, /* open */
52, /* close */
52, /* pipe_op */
52, /* fileno */
@@ -2686,7 +2697,7 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = {
52, /* getc */
52, /* read */
52, /* enterwrite */
- 166, /* leavewrite */
+ 172, /* leavewrite */
-1, /* prtf */
-1, /* print */
-1, /* say */
@@ -2700,7 +2711,7 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = {
52, /* truncate */
52, /* fcntl */
52, /* ioctl */
- 84, /* flock */
+ 90, /* flock */
52, /* send */
52, /* recv */
52, /* socket */
@@ -2716,44 +2727,44 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = {
0, /* getpeername */
0, /* lstat */
0, /* stat */
- 189, /* ftrread */
- 189, /* ftrwrite */
- 189, /* ftrexec */
- 189, /* fteread */
- 189, /* ftewrite */
- 189, /* fteexec */
- 194, /* ftis */
- 194, /* ftsize */
- 194, /* ftmtime */
- 194, /* ftatime */
- 194, /* ftctime */
- 194, /* ftrowned */
- 194, /* fteowned */
- 194, /* ftzero */
- 194, /* ftsock */
- 194, /* ftchr */
- 194, /* ftblk */
- 194, /* ftfile */
- 194, /* ftdir */
- 194, /* ftpipe */
- 194, /* ftsuid */
- 194, /* ftsgid */
- 194, /* ftsvtx */
- 194, /* ftlink */
- 194, /* fttty */
- 194, /* fttext */
- 194, /* ftbinary */
- 84, /* chdir */
- 84, /* chown */
+ 195, /* ftrread */
+ 195, /* ftrwrite */
+ 195, /* ftrexec */
+ 195, /* fteread */
+ 195, /* ftewrite */
+ 195, /* fteexec */
+ 200, /* ftis */
+ 200, /* ftsize */
+ 200, /* ftmtime */
+ 200, /* ftatime */
+ 200, /* ftctime */
+ 200, /* ftrowned */
+ 200, /* fteowned */
+ 200, /* ftzero */
+ 200, /* ftsock */
+ 200, /* ftchr */
+ 200, /* ftblk */
+ 200, /* ftfile */
+ 200, /* ftdir */
+ 200, /* ftpipe */
+ 200, /* ftsuid */
+ 200, /* ftsgid */
+ 200, /* ftsvtx */
+ 200, /* ftlink */
+ 200, /* fttty */
+ 200, /* fttext */
+ 200, /* ftbinary */
+ 90, /* chdir */
+ 90, /* chown */
75, /* chroot */
- 84, /* unlink */
- 84, /* chmod */
- 84, /* utime */
- 84, /* rename */
- 84, /* link */
- 84, /* symlink */
+ 90, /* unlink */
+ 90, /* chmod */
+ 90, /* utime */
+ 90, /* rename */
+ 90, /* link */
+ 90, /* symlink */
0, /* readlink */
- 84, /* mkdir */
+ 90, /* mkdir */
75, /* rmdir */
52, /* open_dir */
0, /* readdir */
@@ -2762,22 +2773,22 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = {
0, /* rewinddir */
0, /* closedir */
-1, /* fork */
- 198, /* wait */
- 84, /* waitpid */
- 84, /* system */
- 84, /* exec */
- 84, /* kill */
- 198, /* getppid */
- 84, /* getpgrp */
- 84, /* setpgrp */
- 84, /* getpriority */
- 84, /* setpriority */
- 198, /* time */
+ 204, /* wait */
+ 90, /* waitpid */
+ 90, /* system */
+ 90, /* exec */
+ 90, /* kill */
+ 204, /* getppid */
+ 90, /* getpgrp */
+ 90, /* setpgrp */
+ 90, /* getpriority */
+ 90, /* setpriority */
+ 204, /* time */
-1, /* tms */
0, /* localtime */
52, /* gmtime */
0, /* alarm */
- 84, /* sleep */
+ 90, /* sleep */
52, /* shmget */
52, /* shmctl */
52, /* shmread */
@@ -2792,8 +2803,8 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = {
0, /* require */
0, /* dofile */
-1, /* hintseval */
- 199, /* entereval */
- 166, /* leaveeval */
+ 205, /* entereval */
+ 172, /* leaveeval */
0, /* entertry */
-1, /* leavetry */
0, /* ghbyname */
@@ -2831,18 +2842,18 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = {
0, /* lock */
0, /* once */
-1, /* custom */
- 205, /* coreargs */
- 209, /* avhvswitch */
+ 211, /* coreargs */
+ 215, /* avhvswitch */
3, /* runcv */
0, /* fc */
-1, /* padcv */
-1, /* introcv */
-1, /* clonecv */
- 211, /* padrange */
- 213, /* refassign */
- 219, /* lvref */
- 225, /* lvrefslice */
- 226, /* lvavref */
+ 217, /* padrange */
+ 219, /* refassign */
+ 225, /* lvref */
+ 231, /* lvrefslice */
+ 232, /* lvavref */
0, /* anonconst */
};
@@ -2863,73 +2874,74 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = {
EXTCONST U16 PL_op_private_bitdefs[] = {
0x0003, /* scalar, prototype, refgen, srefgen, readline, regcmaybe, regcreset, regcomp, substcont, chop, schop, defined, undef, study, preinc, i_preinc, predec, i_predec, postinc, i_postinc, postdec, i_postdec, negate, i_negate, not, complement, ucfirst, lcfirst, uc, lc, quotemeta, aeach, avalues, each, pop, shift, grepstart, mapstart, mapwhile, range, and, or, dor, andassign, orassign, dorassign, argcheck, argdefelem, method, method_named, method_super, method_redir, method_redir_super, entergiven, leavegiven, enterwhen, leavewhen, untie, tied, dbmclose, getsockname, getpeername, lstat, stat, readlink, readdir, telldir, rewinddir, closedir, localtime, alarm, require, dofile, entertry, ghbyname, gnbyname, gpbyname, shostent, snetent, sprotoent, sservent, gpwnam, gpwuid, ggrnam, ggrgid, lock, once, fc, anonconst */
- 0x2e5c, 0x4019, /* pushmark */
+ 0x2f3c, 0x40f9, /* pushmark */
0x00bd, /* wantarray, runcv */
- 0x0498, 0x18d0, 0x40cc, 0x3b88, 0x32a5, /* const */
- 0x2e5c, 0x33f9, /* gvsv */
- 0x1735, /* gv */
+ 0x0578, 0x19b0, 0x41ac, 0x3c68, 0x3385, /* const */
+ 0x2f3c, 0x34d9, /* gvsv */
+ 0x1815, /* gv */
0x0067, /* gelem, lt, i_lt, gt, i_gt, le, i_le, ge, i_ge, eq, i_eq, ne, i_ne, ncmp, i_ncmp, slt, sgt, sle, sge, seq, sne, scmp, bit_and, bit_xor, bit_or, sbit_and, sbit_xor, sbit_or, smartmatch, lslice, xor */
- 0x2e5c, 0x4018, 0x03d7, /* padsv */
- 0x2e5c, 0x4018, 0x0614, 0x2f4c, 0x3d09, /* padav */
- 0x2e5c, 0x4018, 0x0614, 0x06b0, 0x2f4c, 0x3d08, 0x29c1, /* padhv */
- 0x2e5c, 0x1ab8, 0x03d6, 0x2f4c, 0x31c8, 0x40c4, 0x0003, /* rv2gv */
- 0x2e5c, 0x33f8, 0x03d6, 0x40c4, 0x0003, /* rv2sv */
- 0x2f4c, 0x0003, /* av2arylen, akeys, values, keys */
- 0x313c, 0x0ef8, 0x0c54, 0x028c, 0x4288, 0x40c4, 0x0003, /* rv2cv */
- 0x0614, 0x06b0, 0x0003, /* ref */
+ 0x2f3c, 0x40f8, 0x03d7, /* padsv */
+ 0x2f3c, 0x40f8, 0x06f4, 0x302c, 0x3de9, /* padav */
+ 0x2f3c, 0x40f8, 0x06f4, 0x0790, 0x302c, 0x3de8, 0x2aa1, /* padhv */
+ 0x2f3c, 0x1b98, 0x03d6, 0x302c, 0x32a8, 0x41a4, 0x0003, /* rv2gv */
+ 0x2f3c, 0x34d8, 0x03d6, 0x41a4, 0x0003, /* rv2sv */
+ 0x302c, 0x0003, /* av2arylen, akeys, values, keys */
+ 0x321c, 0x0fd8, 0x0d34, 0x028c, 0x44a8, 0x41a4, 0x0003, /* rv2cv */
+ 0x06f4, 0x0790, 0x0003, /* ref */
0x018f, /* bless, glob, sprintf, formline, unpack, pack, join, anonlist, anonhash, splice, warn, die, reset, exit, close, pipe_op, fileno, umask, binmode, tie, dbmopen, sselect, select, getc, read, enterwrite, sysopen, sysseek, sysread, syswrite, eof, tell, seek, truncate, fcntl, ioctl, send, recv, socket, sockpair, bind, connect, listen, accept, shutdown, gsockopt, ssockopt, open_dir, seekdir, gmtime, shmget, shmctl, shmread, shmwrite, msgget, msgctl, msgsnd, msgrcv, semop, semget, semctl, ghbyaddr, gnbyaddr, gpbynumber, gsbyname, gsbyport, syscall */
- 0x35dc, 0x34f8, 0x2714, 0x2650, 0x0003, /* backtick */
- 0x0615, /* subst */
- 0x0ffc, 0x2038, 0x0834, 0x3e4c, 0x23c8, 0x01e4, 0x0141, /* trans, transr */
- 0x0e3c, 0x0538, 0x0067, /* sassign */
- 0x0af8, 0x09f4, 0x08f0, 0x2f4c, 0x0608, 0x0067, /* aassign */
- 0x4330, 0x0003, /* chomp, schomp, ncomplement, scomplement, sin, cos, exp, log, sqrt, int, hex, oct, abs, ord, chr, chroot, rmdir */
- 0x0614, 0x2f4c, 0x0003, /* pos */
- 0x4330, 0x0067, /* pow, multiply, i_multiply, divide, i_divide, modulo, i_modulo, add, i_add, subtract, i_subtract, concat, left_shift, right_shift, nbit_and, nbit_xor, nbit_or */
- 0x13b8, 0x0067, /* repeat */
- 0x4330, 0x018f, /* stringify, atan2, rand, srand, crypt, push, unshift, flock, chdir, chown, unlink, chmod, utime, rename, link, symlink, mkdir, waitpid, system, exec, kill, getpgrp, setpgrp, getpriority, setpriority, sleep */
- 0x0614, 0x4330, 0x0003, /* length */
- 0x38f0, 0x2f4c, 0x012b, /* substr */
- 0x2f4c, 0x0067, /* vec */
- 0x30b8, 0x0614, 0x4330, 0x018f, /* index, rindex */
- 0x2e5c, 0x33f8, 0x0614, 0x2f4c, 0x3d08, 0x40c4, 0x0003, /* rv2av */
+ 0x36bc, 0x35d8, 0x27f4, 0x2730, 0x0003, /* backtick */
+ 0x06f5, /* subst */
+ 0x10dc, 0x2118, 0x0914, 0x3f2c, 0x24a8, 0x01e4, 0x0141, /* trans, transr */
+ 0x0f1c, 0x0618, 0x0067, /* sassign */
+ 0x0bd8, 0x0ad4, 0x09d0, 0x302c, 0x06e8, 0x0067, /* aassign */
+ 0x4550, 0x0003, /* chomp, schomp, ncomplement, scomplement, sin, cos, exp, log, sqrt, int, hex, oct, abs, ord, chr, chroot, rmdir */
+ 0x06f4, 0x302c, 0x0003, /* pos */
+ 0x4550, 0x0067, /* pow, multiply, i_multiply, divide, i_divide, modulo, i_modulo, add, i_add, subtract, i_subtract, concat, left_shift, right_shift, nbit_and, nbit_xor, nbit_or */
+ 0x1498, 0x0067, /* repeat */
+ 0x2f3c, 0x0358, 0x1b94, 0x4550, 0x428c, 0x0003, /* multiconcat */
+ 0x4550, 0x018f, /* stringify, atan2, rand, srand, crypt, push, unshift, flock, chdir, chown, unlink, chmod, utime, rename, link, symlink, mkdir, waitpid, system, exec, kill, getpgrp, setpgrp, getpriority, setpriority, sleep */
+ 0x06f4, 0x4550, 0x0003, /* length */
+ 0x39d0, 0x302c, 0x012b, /* substr */
+ 0x302c, 0x0067, /* vec */
+ 0x3198, 0x06f4, 0x4550, 0x018f, /* index, rindex */
+ 0x2f3c, 0x34d8, 0x06f4, 0x302c, 0x3de8, 0x41a4, 0x0003, /* rv2av */
0x025f, /* aelemfast, aelemfast_lex */
- 0x2e5c, 0x2d58, 0x03d6, 0x2f4c, 0x0067, /* aelem, helem */
- 0x2e5c, 0x2f4c, 0x3d09, /* aslice, hslice */
- 0x2f4d, /* kvaslice, kvhslice */
- 0x2e5c, 0x3c58, 0x2a74, 0x0003, /* delete */
- 0x41b8, 0x0003, /* exists */
- 0x2e5c, 0x33f8, 0x0614, 0x06b0, 0x2f4c, 0x3d08, 0x40c4, 0x29c1, /* rv2hv */
- 0x2e5c, 0x2d58, 0x1074, 0x19d0, 0x2f4c, 0x40c4, 0x0003, /* multideref */
- 0x2e5c, 0x33f8, 0x0350, 0x2b6c, 0x2489, /* split */
- 0x2e5c, 0x20f9, /* list */
- 0x449c, 0x3f38, 0x3694, 0x1310, 0x27ac, 0x39e8, 0x28a4, 0x3361, /* sort */
- 0x27ac, 0x0003, /* reverse */
- 0x0614, 0x0003, /* grepwhile */
- 0x2bf8, 0x0003, /* flip, flop */
- 0x2e5c, 0x0003, /* cond_expr */
- 0x2e5c, 0x0ef8, 0x03d6, 0x028c, 0x4288, 0x40c4, 0x2561, /* entersub */
- 0x3758, 0x0003, /* leavesub, leavesublv, leavewrite, leaveeval */
+ 0x2f3c, 0x2e38, 0x03d6, 0x302c, 0x0067, /* aelem, helem */
+ 0x2f3c, 0x302c, 0x3de9, /* aslice, hslice */
+ 0x302d, /* kvaslice, kvhslice */
+ 0x2f3c, 0x3d38, 0x2b54, 0x0003, /* delete */
+ 0x43d8, 0x0003, /* exists */
+ 0x2f3c, 0x34d8, 0x06f4, 0x0790, 0x302c, 0x3de8, 0x41a4, 0x2aa1, /* rv2hv */
+ 0x2f3c, 0x2e38, 0x1154, 0x1ab0, 0x302c, 0x41a4, 0x0003, /* multideref */
+ 0x2f3c, 0x34d8, 0x0430, 0x2c4c, 0x2569, /* split */
+ 0x2f3c, 0x21d9, /* list */
+ 0x46bc, 0x4018, 0x3774, 0x13f0, 0x288c, 0x3ac8, 0x2984, 0x3441, /* sort */
+ 0x288c, 0x0003, /* reverse */
+ 0x06f4, 0x0003, /* grepwhile */
+ 0x2cd8, 0x0003, /* flip, flop */
+ 0x2f3c, 0x0003, /* cond_expr */
+ 0x2f3c, 0x0fd8, 0x03d6, 0x028c, 0x44a8, 0x41a4, 0x2641, /* entersub */
+ 0x3838, 0x0003, /* leavesub, leavesublv, leavewrite, leaveeval */
0x02aa, 0x0003, /* argelem */
0x00bc, 0x018f, /* caller */
- 0x22d5, /* nextstate, dbstate */
- 0x2cfc, 0x3759, /* leave */
- 0x2e5c, 0x33f8, 0x0f6c, 0x3a65, /* enteriter */
- 0x3a65, /* iter */
- 0x2cfc, 0x0067, /* leaveloop */
- 0x45bc, 0x0003, /* last, next, redo, dump, goto */
- 0x35dc, 0x34f8, 0x2714, 0x2650, 0x018f, /* open */
- 0x1c70, 0x1ecc, 0x1d88, 0x1b44, 0x0003, /* ftrread, ftrwrite, ftrexec, fteread, ftewrite, fteexec */
- 0x1c70, 0x1ecc, 0x1d88, 0x0003, /* ftis, ftsize, ftmtime, ftatime, ftctime, ftrowned, fteowned, ftzero, ftsock, ftchr, ftblk, ftfile, ftdir, ftpipe, ftsuid, ftsgid, ftsvtx, ftlink, fttty, fttext, ftbinary */
- 0x4331, /* wait, getppid, time */
- 0x37f4, 0x0d10, 0x076c, 0x4408, 0x21e4, 0x0003, /* entereval */
- 0x301c, 0x0018, 0x1224, 0x1141, /* coreargs */
- 0x2f4c, 0x00c7, /* avhvswitch */
- 0x2e5c, 0x01fb, /* padrange */
- 0x2e5c, 0x4018, 0x04f6, 0x292c, 0x1828, 0x0067, /* refassign */
- 0x2e5c, 0x4018, 0x04f6, 0x292c, 0x1828, 0x0003, /* lvref */
- 0x2e5d, /* lvrefslice */
- 0x2e5c, 0x4018, 0x0003, /* lvavref */
+ 0x23b5, /* nextstate, dbstate */
+ 0x2ddc, 0x3839, /* leave */
+ 0x2f3c, 0x34d8, 0x104c, 0x3b45, /* enteriter */
+ 0x3b45, /* iter */
+ 0x2ddc, 0x0067, /* leaveloop */
+ 0x47dc, 0x0003, /* last, next, redo, dump, goto */
+ 0x36bc, 0x35d8, 0x27f4, 0x2730, 0x018f, /* open */
+ 0x1d50, 0x1fac, 0x1e68, 0x1c24, 0x0003, /* ftrread, ftrwrite, ftrexec, fteread, ftewrite, fteexec */
+ 0x1d50, 0x1fac, 0x1e68, 0x0003, /* ftis, ftsize, ftmtime, ftatime, ftctime, ftrowned, fteowned, ftzero, ftsock, ftchr, ftblk, ftfile, ftdir, ftpipe, ftsuid, ftsgid, ftsvtx, ftlink, fttty, fttext, ftbinary */
+ 0x4551, /* wait, getppid, time */
+ 0x38d4, 0x0df0, 0x084c, 0x4628, 0x22c4, 0x0003, /* entereval */
+ 0x30fc, 0x0018, 0x1304, 0x1221, /* coreargs */
+ 0x302c, 0x00c7, /* avhvswitch */
+ 0x2f3c, 0x01fb, /* padrange */
+ 0x2f3c, 0x40f8, 0x04f6, 0x2a0c, 0x1908, 0x0067, /* refassign */
+ 0x2f3c, 0x40f8, 0x04f6, 0x2a0c, 0x1908, 0x0003, /* lvref */
+ 0x2f3d, /* lvrefslice */
+ 0x2f3c, 0x40f8, 0x0003, /* lvavref */
};
@@ -3005,6 +3017,7 @@ EXTCONST U8 PL_op_private_valid[] = {
/* SUBTRACT */ (OPpARG2_MASK|OPpTARGET_MY),
/* I_SUBTRACT */ (OPpARG2_MASK|OPpTARGET_MY),
/* CONCAT */ (OPpARG2_MASK|OPpTARGET_MY),
+ /* MULTICONCAT */ (OPpARG1_MASK|OPpMULTICONCAT_STRINGIFY|OPpTARGET_MY|OPpMULTICONCAT_FAKE|OPpMULTICONCAT_APPEND|OPpLVAL_INTRO),
/* STRINGIFY */ (OPpARG4_MASK|OPpTARGET_MY),
/* LEFT_SHIFT */ (OPpARG2_MASK|OPpTARGET_MY),
/* RIGHT_SHIFT */ (OPpARG2_MASK|OPpTARGET_MY),
diff --git a/opnames.h b/opnames.h
index 55b6b4266b..d87ba88f01 100644
--- a/opnames.h
+++ b/opnames.h
@@ -81,339 +81,340 @@ typedef enum opcode {
OP_SUBTRACT = 64,
OP_I_SUBTRACT = 65,
OP_CONCAT = 66,
- OP_STRINGIFY = 67,
- OP_LEFT_SHIFT = 68,
- OP_RIGHT_SHIFT = 69,
- OP_LT = 70,
- OP_I_LT = 71,
- OP_GT = 72,
- OP_I_GT = 73,
- OP_LE = 74,
- OP_I_LE = 75,
- OP_GE = 76,
- OP_I_GE = 77,
- OP_EQ = 78,
- OP_I_EQ = 79,
- OP_NE = 80,
- OP_I_NE = 81,
- OP_NCMP = 82,
- OP_I_NCMP = 83,
- OP_SLT = 84,
- OP_SGT = 85,
- OP_SLE = 86,
- OP_SGE = 87,
- OP_SEQ = 88,
- OP_SNE = 89,
- OP_SCMP = 90,
- OP_BIT_AND = 91,
- OP_BIT_XOR = 92,
- OP_BIT_OR = 93,
- OP_NBIT_AND = 94,
- OP_NBIT_XOR = 95,
- OP_NBIT_OR = 96,
- OP_SBIT_AND = 97,
- OP_SBIT_XOR = 98,
- OP_SBIT_OR = 99,
- OP_NEGATE = 100,
- OP_I_NEGATE = 101,
- OP_NOT = 102,
- OP_COMPLEMENT = 103,
- OP_NCOMPLEMENT = 104,
- OP_SCOMPLEMENT = 105,
- OP_SMARTMATCH = 106,
- OP_ATAN2 = 107,
- OP_SIN = 108,
- OP_COS = 109,
- OP_RAND = 110,
- OP_SRAND = 111,
- OP_EXP = 112,
- OP_LOG = 113,
- OP_SQRT = 114,
- OP_INT = 115,
- OP_HEX = 116,
- OP_OCT = 117,
- OP_ABS = 118,
- OP_LENGTH = 119,
- OP_SUBSTR = 120,
- OP_VEC = 121,
- OP_INDEX = 122,
- OP_RINDEX = 123,
- OP_SPRINTF = 124,
- OP_FORMLINE = 125,
- OP_ORD = 126,
- OP_CHR = 127,
- OP_CRYPT = 128,
- OP_UCFIRST = 129,
- OP_LCFIRST = 130,
- OP_UC = 131,
- OP_LC = 132,
- OP_QUOTEMETA = 133,
- OP_RV2AV = 134,
- OP_AELEMFAST = 135,
- OP_AELEMFAST_LEX = 136,
- OP_AELEM = 137,
- OP_ASLICE = 138,
- OP_KVASLICE = 139,
- OP_AEACH = 140,
- OP_AVALUES = 141,
- OP_AKEYS = 142,
- OP_EACH = 143,
- OP_VALUES = 144,
- OP_KEYS = 145,
- OP_DELETE = 146,
- OP_EXISTS = 147,
- OP_RV2HV = 148,
- OP_HELEM = 149,
- OP_HSLICE = 150,
- OP_KVHSLICE = 151,
- OP_MULTIDEREF = 152,
- OP_UNPACK = 153,
- OP_PACK = 154,
- OP_SPLIT = 155,
- OP_JOIN = 156,
- OP_LIST = 157,
- OP_LSLICE = 158,
- OP_ANONLIST = 159,
- OP_ANONHASH = 160,
- OP_SPLICE = 161,
- OP_PUSH = 162,
- OP_POP = 163,
- OP_SHIFT = 164,
- OP_UNSHIFT = 165,
- OP_SORT = 166,
- OP_REVERSE = 167,
- OP_GREPSTART = 168,
- OP_GREPWHILE = 169,
- OP_MAPSTART = 170,
- OP_MAPWHILE = 171,
- OP_RANGE = 172,
- OP_FLIP = 173,
- OP_FLOP = 174,
- OP_AND = 175,
- OP_OR = 176,
- OP_XOR = 177,
- OP_DOR = 178,
- OP_COND_EXPR = 179,
- OP_ANDASSIGN = 180,
- OP_ORASSIGN = 181,
- OP_DORASSIGN = 182,
- OP_ENTERSUB = 183,
- OP_LEAVESUB = 184,
- OP_LEAVESUBLV = 185,
- OP_ARGCHECK = 186,
- OP_ARGELEM = 187,
- OP_ARGDEFELEM = 188,
- OP_CALLER = 189,
- OP_WARN = 190,
- OP_DIE = 191,
- OP_RESET = 192,
- OP_LINESEQ = 193,
- OP_NEXTSTATE = 194,
- OP_DBSTATE = 195,
- OP_UNSTACK = 196,
- OP_ENTER = 197,
- OP_LEAVE = 198,
- OP_SCOPE = 199,
- OP_ENTERITER = 200,
- OP_ITER = 201,
- OP_ENTERLOOP = 202,
- OP_LEAVELOOP = 203,
- OP_RETURN = 204,
- OP_LAST = 205,
- OP_NEXT = 206,
- OP_REDO = 207,
- OP_DUMP = 208,
- OP_GOTO = 209,
- OP_EXIT = 210,
- OP_METHOD = 211,
- OP_METHOD_NAMED = 212,
- OP_METHOD_SUPER = 213,
- OP_METHOD_REDIR = 214,
- OP_METHOD_REDIR_SUPER = 215,
- OP_ENTERGIVEN = 216,
- OP_LEAVEGIVEN = 217,
- OP_ENTERWHEN = 218,
- OP_LEAVEWHEN = 219,
- OP_BREAK = 220,
- OP_CONTINUE = 221,
- OP_OPEN = 222,
- OP_CLOSE = 223,
- OP_PIPE_OP = 224,
- OP_FILENO = 225,
- OP_UMASK = 226,
- OP_BINMODE = 227,
- OP_TIE = 228,
- OP_UNTIE = 229,
- OP_TIED = 230,
- OP_DBMOPEN = 231,
- OP_DBMCLOSE = 232,
- OP_SSELECT = 233,
- OP_SELECT = 234,
- OP_GETC = 235,
- OP_READ = 236,
- OP_ENTERWRITE = 237,
- OP_LEAVEWRITE = 238,
- OP_PRTF = 239,
- OP_PRINT = 240,
- OP_SAY = 241,
- OP_SYSOPEN = 242,
- OP_SYSSEEK = 243,
- OP_SYSREAD = 244,
- OP_SYSWRITE = 245,
- OP_EOF = 246,
- OP_TELL = 247,
- OP_SEEK = 248,
- OP_TRUNCATE = 249,
- OP_FCNTL = 250,
- OP_IOCTL = 251,
- OP_FLOCK = 252,
- OP_SEND = 253,
- OP_RECV = 254,
- OP_SOCKET = 255,
- OP_SOCKPAIR = 256,
- OP_BIND = 257,
- OP_CONNECT = 258,
- OP_LISTEN = 259,
- OP_ACCEPT = 260,
- OP_SHUTDOWN = 261,
- OP_GSOCKOPT = 262,
- OP_SSOCKOPT = 263,
- OP_GETSOCKNAME = 264,
- OP_GETPEERNAME = 265,
- OP_LSTAT = 266,
- OP_STAT = 267,
- OP_FTRREAD = 268,
- OP_FTRWRITE = 269,
- OP_FTREXEC = 270,
- OP_FTEREAD = 271,
- OP_FTEWRITE = 272,
- OP_FTEEXEC = 273,
- OP_FTIS = 274,
- OP_FTSIZE = 275,
- OP_FTMTIME = 276,
- OP_FTATIME = 277,
- OP_FTCTIME = 278,
- OP_FTROWNED = 279,
- OP_FTEOWNED = 280,
- OP_FTZERO = 281,
- OP_FTSOCK = 282,
- OP_FTCHR = 283,
- OP_FTBLK = 284,
- OP_FTFILE = 285,
- OP_FTDIR = 286,
- OP_FTPIPE = 287,
- OP_FTSUID = 288,
- OP_FTSGID = 289,
- OP_FTSVTX = 290,
- OP_FTLINK = 291,
- OP_FTTTY = 292,
- OP_FTTEXT = 293,
- OP_FTBINARY = 294,
- OP_CHDIR = 295,
- OP_CHOWN = 296,
- OP_CHROOT = 297,
- OP_UNLINK = 298,
- OP_CHMOD = 299,
- OP_UTIME = 300,
- OP_RENAME = 301,
- OP_LINK = 302,
- OP_SYMLINK = 303,
- OP_READLINK = 304,
- OP_MKDIR = 305,
- OP_RMDIR = 306,
- OP_OPEN_DIR = 307,
- OP_READDIR = 308,
- OP_TELLDIR = 309,
- OP_SEEKDIR = 310,
- OP_REWINDDIR = 311,
- OP_CLOSEDIR = 312,
- OP_FORK = 313,
- OP_WAIT = 314,
- OP_WAITPID = 315,
- OP_SYSTEM = 316,
- OP_EXEC = 317,
- OP_KILL = 318,
- OP_GETPPID = 319,
- OP_GETPGRP = 320,
- OP_SETPGRP = 321,
- OP_GETPRIORITY = 322,
- OP_SETPRIORITY = 323,
- OP_TIME = 324,
- OP_TMS = 325,
- OP_LOCALTIME = 326,
- OP_GMTIME = 327,
- OP_ALARM = 328,
- OP_SLEEP = 329,
- OP_SHMGET = 330,
- OP_SHMCTL = 331,
- OP_SHMREAD = 332,
- OP_SHMWRITE = 333,
- OP_MSGGET = 334,
- OP_MSGCTL = 335,
- OP_MSGSND = 336,
- OP_MSGRCV = 337,
- OP_SEMOP = 338,
- OP_SEMGET = 339,
- OP_SEMCTL = 340,
- OP_REQUIRE = 341,
- OP_DOFILE = 342,
- OP_HINTSEVAL = 343,
- OP_ENTEREVAL = 344,
- OP_LEAVEEVAL = 345,
- OP_ENTERTRY = 346,
- OP_LEAVETRY = 347,
- OP_GHBYNAME = 348,
- OP_GHBYADDR = 349,
- OP_GHOSTENT = 350,
- OP_GNBYNAME = 351,
- OP_GNBYADDR = 352,
- OP_GNETENT = 353,
- OP_GPBYNAME = 354,
- OP_GPBYNUMBER = 355,
- OP_GPROTOENT = 356,
- OP_GSBYNAME = 357,
- OP_GSBYPORT = 358,
- OP_GSERVENT = 359,
- OP_SHOSTENT = 360,
- OP_SNETENT = 361,
- OP_SPROTOENT = 362,
- OP_SSERVENT = 363,
- OP_EHOSTENT = 364,
- OP_ENETENT = 365,
- OP_EPROTOENT = 366,
- OP_ESERVENT = 367,
- OP_GPWNAM = 368,
- OP_GPWUID = 369,
- OP_GPWENT = 370,
- OP_SPWENT = 371,
- OP_EPWENT = 372,
- OP_GGRNAM = 373,
- OP_GGRGID = 374,
- OP_GGRENT = 375,
- OP_SGRENT = 376,
- OP_EGRENT = 377,
- OP_GETLOGIN = 378,
- OP_SYSCALL = 379,
- OP_LOCK = 380,
- OP_ONCE = 381,
- OP_CUSTOM = 382,
- OP_COREARGS = 383,
- OP_AVHVSWITCH = 384,
- OP_RUNCV = 385,
- OP_FC = 386,
- OP_PADCV = 387,
- OP_INTROCV = 388,
- OP_CLONECV = 389,
- OP_PADRANGE = 390,
- OP_REFASSIGN = 391,
- OP_LVREF = 392,
- OP_LVREFSLICE = 393,
- OP_LVAVREF = 394,
- OP_ANONCONST = 395,
+ OP_MULTICONCAT = 67,
+ OP_STRINGIFY = 68,
+ OP_LEFT_SHIFT = 69,
+ OP_RIGHT_SHIFT = 70,
+ OP_LT = 71,
+ OP_I_LT = 72,
+ OP_GT = 73,
+ OP_I_GT = 74,
+ OP_LE = 75,
+ OP_I_LE = 76,
+ OP_GE = 77,
+ OP_I_GE = 78,
+ OP_EQ = 79,
+ OP_I_EQ = 80,
+ OP_NE = 81,
+ OP_I_NE = 82,
+ OP_NCMP = 83,
+ OP_I_NCMP = 84,
+ OP_SLT = 85,
+ OP_SGT = 86,
+ OP_SLE = 87,
+ OP_SGE = 88,
+ OP_SEQ = 89,
+ OP_SNE = 90,
+ OP_SCMP = 91,
+ OP_BIT_AND = 92,
+ OP_BIT_XOR = 93,
+ OP_BIT_OR = 94,
+ OP_NBIT_AND = 95,
+ OP_NBIT_XOR = 96,
+ OP_NBIT_OR = 97,
+ OP_SBIT_AND = 98,
+ OP_SBIT_XOR = 99,
+ OP_SBIT_OR = 100,
+ OP_NEGATE = 101,
+ OP_I_NEGATE = 102,
+ OP_NOT = 103,
+ OP_COMPLEMENT = 104,
+ OP_NCOMPLEMENT = 105,
+ OP_SCOMPLEMENT = 106,
+ OP_SMARTMATCH = 107,
+ OP_ATAN2 = 108,
+ OP_SIN = 109,
+ OP_COS = 110,
+ OP_RAND = 111,
+ OP_SRAND = 112,
+ OP_EXP = 113,
+ OP_LOG = 114,
+ OP_SQRT = 115,
+ OP_INT = 116,
+ OP_HEX = 117,
+ OP_OCT = 118,
+ OP_ABS = 119,
+ OP_LENGTH = 120,
+ OP_SUBSTR = 121,
+ OP_VEC = 122,
+ OP_INDEX = 123,
+ OP_RINDEX = 124,
+ OP_SPRINTF = 125,
+ OP_FORMLINE = 126,
+ OP_ORD = 127,
+ OP_CHR = 128,
+ OP_CRYPT = 129,
+ OP_UCFIRST = 130,
+ OP_LCFIRST = 131,
+ OP_UC = 132,
+ OP_LC = 133,
+ OP_QUOTEMETA = 134,
+ OP_RV2AV = 135,
+ OP_AELEMFAST = 136,
+ OP_AELEMFAST_LEX = 137,
+ OP_AELEM = 138,
+ OP_ASLICE = 139,
+ OP_KVASLICE = 140,
+ OP_AEACH = 141,
+ OP_AVALUES = 142,
+ OP_AKEYS = 143,
+ OP_EACH = 144,
+ OP_VALUES = 145,
+ OP_KEYS = 146,
+ OP_DELETE = 147,
+ OP_EXISTS = 148,
+ OP_RV2HV = 149,
+ OP_HELEM = 150,
+ OP_HSLICE = 151,
+ OP_KVHSLICE = 152,
+ OP_MULTIDEREF = 153,
+ OP_UNPACK = 154,
+ OP_PACK = 155,
+ OP_SPLIT = 156,
+ OP_JOIN = 157,
+ OP_LIST = 158,
+ OP_LSLICE = 159,
+ OP_ANONLIST = 160,
+ OP_ANONHASH = 161,
+ OP_SPLICE = 162,
+ OP_PUSH = 163,
+ OP_POP = 164,
+ OP_SHIFT = 165,
+ OP_UNSHIFT = 166,
+ OP_SORT = 167,
+ OP_REVERSE = 168,
+ OP_GREPSTART = 169,
+ OP_GREPWHILE = 170,
+ OP_MAPSTART = 171,
+ OP_MAPWHILE = 172,
+ OP_RANGE = 173,
+ OP_FLIP = 174,
+ OP_FLOP = 175,
+ OP_AND = 176,
+ OP_OR = 177,
+ OP_XOR = 178,
+ OP_DOR = 179,
+ OP_COND_EXPR = 180,
+ OP_ANDASSIGN = 181,
+ OP_ORASSIGN = 182,
+ OP_DORASSIGN = 183,
+ OP_ENTERSUB = 184,
+ OP_LEAVESUB = 185,
+ OP_LEAVESUBLV = 186,
+ OP_ARGCHECK = 187,
+ OP_ARGELEM = 188,
+ OP_ARGDEFELEM = 189,
+ OP_CALLER = 190,
+ OP_WARN = 191,
+ OP_DIE = 192,
+ OP_RESET = 193,
+ OP_LINESEQ = 194,
+ OP_NEXTSTATE = 195,
+ OP_DBSTATE = 196,
+ OP_UNSTACK = 197,
+ OP_ENTER = 198,
+ OP_LEAVE = 199,
+ OP_SCOPE = 200,
+ OP_ENTERITER = 201,
+ OP_ITER = 202,
+ OP_ENTERLOOP = 203,
+ OP_LEAVELOOP = 204,
+ OP_RETURN = 205,
+ OP_LAST = 206,
+ OP_NEXT = 207,
+ OP_REDO = 208,
+ OP_DUMP = 209,
+ OP_GOTO = 210,
+ OP_EXIT = 211,
+ OP_METHOD = 212,
+ OP_METHOD_NAMED = 213,
+ OP_METHOD_SUPER = 214,
+ OP_METHOD_REDIR = 215,
+ OP_METHOD_REDIR_SUPER = 216,
+ OP_ENTERGIVEN = 217,
+ OP_LEAVEGIVEN = 218,
+ OP_ENTERWHEN = 219,
+ OP_LEAVEWHEN = 220,
+ OP_BREAK = 221,
+ OP_CONTINUE = 222,
+ OP_OPEN = 223,
+ OP_CLOSE = 224,
+ OP_PIPE_OP = 225,
+ OP_FILENO = 226,
+ OP_UMASK = 227,
+ OP_BINMODE = 228,
+ OP_TIE = 229,
+ OP_UNTIE = 230,
+ OP_TIED = 231,
+ OP_DBMOPEN = 232,
+ OP_DBMCLOSE = 233,
+ OP_SSELECT = 234,
+ OP_SELECT = 235,
+ OP_GETC = 236,
+ OP_READ = 237,
+ OP_ENTERWRITE = 238,
+ OP_LEAVEWRITE = 239,
+ OP_PRTF = 240,
+ OP_PRINT = 241,
+ OP_SAY = 242,
+ OP_SYSOPEN = 243,
+ OP_SYSSEEK = 244,
+ OP_SYSREAD = 245,
+ OP_SYSWRITE = 246,
+ OP_EOF = 247,
+ OP_TELL = 248,
+ OP_SEEK = 249,
+ OP_TRUNCATE = 250,
+ OP_FCNTL = 251,
+ OP_IOCTL = 252,
+ OP_FLOCK = 253,
+ OP_SEND = 254,
+ OP_RECV = 255,
+ OP_SOCKET = 256,
+ OP_SOCKPAIR = 257,
+ OP_BIND = 258,
+ OP_CONNECT = 259,
+ OP_LISTEN = 260,
+ OP_ACCEPT = 261,
+ OP_SHUTDOWN = 262,
+ OP_GSOCKOPT = 263,
+ OP_SSOCKOPT = 264,
+ OP_GETSOCKNAME = 265,
+ OP_GETPEERNAME = 266,
+ OP_LSTAT = 267,
+ OP_STAT = 268,
+ OP_FTRREAD = 269,
+ OP_FTRWRITE = 270,
+ OP_FTREXEC = 271,
+ OP_FTEREAD = 272,
+ OP_FTEWRITE = 273,
+ OP_FTEEXEC = 274,
+ OP_FTIS = 275,
+ OP_FTSIZE = 276,
+ OP_FTMTIME = 277,
+ OP_FTATIME = 278,
+ OP_FTCTIME = 279,
+ OP_FTROWNED = 280,
+ OP_FTEOWNED = 281,
+ OP_FTZERO = 282,
+ OP_FTSOCK = 283,
+ OP_FTCHR = 284,
+ OP_FTBLK = 285,
+ OP_FTFILE = 286,
+ OP_FTDIR = 287,
+ OP_FTPIPE = 288,
+ OP_FTSUID = 289,
+ OP_FTSGID = 290,
+ OP_FTSVTX = 291,
+ OP_FTLINK = 292,
+ OP_FTTTY = 293,
+ OP_FTTEXT = 294,
+ OP_FTBINARY = 295,
+ OP_CHDIR = 296,
+ OP_CHOWN = 297,
+ OP_CHROOT = 298,
+ OP_UNLINK = 299,
+ OP_CHMOD = 300,
+ OP_UTIME = 301,
+ OP_RENAME = 302,
+ OP_LINK = 303,
+ OP_SYMLINK = 304,
+ OP_READLINK = 305,
+ OP_MKDIR = 306,
+ OP_RMDIR = 307,
+ OP_OPEN_DIR = 308,
+ OP_READDIR = 309,
+ OP_TELLDIR = 310,
+ OP_SEEKDIR = 311,
+ OP_REWINDDIR = 312,
+ OP_CLOSEDIR = 313,
+ OP_FORK = 314,
+ OP_WAIT = 315,
+ OP_WAITPID = 316,
+ OP_SYSTEM = 317,
+ OP_EXEC = 318,
+ OP_KILL = 319,
+ OP_GETPPID = 320,
+ OP_GETPGRP = 321,
+ OP_SETPGRP = 322,
+ OP_GETPRIORITY = 323,
+ OP_SETPRIORITY = 324,
+ OP_TIME = 325,
+ OP_TMS = 326,
+ OP_LOCALTIME = 327,
+ OP_GMTIME = 328,
+ OP_ALARM = 329,
+ OP_SLEEP = 330,
+ OP_SHMGET = 331,
+ OP_SHMCTL = 332,
+ OP_SHMREAD = 333,
+ OP_SHMWRITE = 334,
+ OP_MSGGET = 335,
+ OP_MSGCTL = 336,
+ OP_MSGSND = 337,
+ OP_MSGRCV = 338,
+ OP_SEMOP = 339,
+ OP_SEMGET = 340,
+ OP_SEMCTL = 341,
+ OP_REQUIRE = 342,
+ OP_DOFILE = 343,
+ OP_HINTSEVAL = 344,
+ OP_ENTEREVAL = 345,
+ OP_LEAVEEVAL = 346,
+ OP_ENTERTRY = 347,
+ OP_LEAVETRY = 348,
+ OP_GHBYNAME = 349,
+ OP_GHBYADDR = 350,
+ OP_GHOSTENT = 351,
+ OP_GNBYNAME = 352,
+ OP_GNBYADDR = 353,
+ OP_GNETENT = 354,
+ OP_GPBYNAME = 355,
+ OP_GPBYNUMBER = 356,
+ OP_GPROTOENT = 357,
+ OP_GSBYNAME = 358,
+ OP_GSBYPORT = 359,
+ OP_GSERVENT = 360,
+ OP_SHOSTENT = 361,
+ OP_SNETENT = 362,
+ OP_SPROTOENT = 363,
+ OP_SSERVENT = 364,
+ OP_EHOSTENT = 365,
+ OP_ENETENT = 366,
+ OP_EPROTOENT = 367,
+ OP_ESERVENT = 368,
+ OP_GPWNAM = 369,
+ OP_GPWUID = 370,
+ OP_GPWENT = 371,
+ OP_SPWENT = 372,
+ OP_EPWENT = 373,
+ OP_GGRNAM = 374,
+ OP_GGRGID = 375,
+ OP_GGRENT = 376,
+ OP_SGRENT = 377,
+ OP_EGRENT = 378,
+ OP_GETLOGIN = 379,
+ OP_SYSCALL = 380,
+ OP_LOCK = 381,
+ OP_ONCE = 382,
+ OP_CUSTOM = 383,
+ OP_COREARGS = 384,
+ OP_AVHVSWITCH = 385,
+ OP_RUNCV = 386,
+ OP_FC = 387,
+ OP_PADCV = 388,
+ OP_INTROCV = 389,
+ OP_CLONECV = 390,
+ OP_PADRANGE = 391,
+ OP_REFASSIGN = 392,
+ OP_LVREF = 393,
+ OP_LVREFSLICE = 394,
+ OP_LVAVREF = 395,
+ OP_ANONCONST = 396,
OP_max
} opcode;
-#define MAXO 396
+#define MAXO 397
#define OP_FREED MAXO
/* the OP_IS_* macros are optimized to a simple range check because
diff --git a/perl.h b/perl.h
index f433dc7f43..f299835eef 100644
--- a/perl.h
+++ b/perl.h
@@ -863,6 +863,26 @@ EXTERN_C int usleep(unsigned int);
#endif /* PERL_CORE */
+/* Maximum number of args that may be passed to an OP_MULTICONCAT op.
+ * It determines the size of local arrays in S_maybe_multiconcat() and
+ * pp_multiconcat().
+ */
+#define PERL_MULTICONCAT_MAXARG 64
+
+/* The indexes of fields of a multiconcat aux struct.
+ * The fixed fields are followed by nargs+1 const segment lengths,
+ * and if utf8 and non-utf8 differ, a second nargs+1 set for utf8.
+ */
+
+#define PERL_MULTICONCAT_IX_NARGS 0 /* number of arguments */
+#define PERL_MULTICONCAT_IX_PLAIN_PV 1 /* non-utf8 constant string */
+#define PERL_MULTICONCAT_IX_PLAIN_LEN 2 /* non-utf8 constant string length */
+#define PERL_MULTICONCAT_IX_UTF8_PV 3 /* utf8 constant string */
+#define PERL_MULTICONCAT_IX_UTF8_LEN 4 /* utf8 constant string length */
+#define PERL_MULTICONCAT_IX_LENGTHS 5 /* first of nargs+1 const segment lens */
+#define PERL_MULTICONCAT_HEADER_SIZE 5 /* The number of fields of a
+ multiconcat header */
+
/* We no longer default to creating a new SV for GvSV.
Do this before embed. */
#ifndef PERL_CREATE_GVSV
diff --git a/pp_hot.c b/pp_hot.c
index 558214d6ab..fff91396ff 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -320,6 +320,809 @@ PP(pp_concat)
}
}
+
+/* pp_multiconcat()
+
+Concatenate one or more args, possibly interleaved with constant string
+segments. The result may be assigned to, or appended to, a variable or
+expression.
+
+Several op_flags and/or op_private bits indicate what the target is, and
+whether it's appended to. Valid permutations are:
+
+ - (PADTMP) = (A.B.C....)
+ OPpTARGET_MY $lex = (A.B.C....)
+ OPpTARGET_MY,OPpLVAL_INTRO my $lex = (A.B.C....)
+ OPpTARGET_MY,OPpMULTICONCAT_APPEND $lex .= (A.B.C....)
+ OPf_STACKED expr = (A.B.C....)
+ OPf_STACKED,OPpMULTICONCAT_APPEND expr .= (A.B.C....)
+
+Other combinations like (A.B).(C.D) are not optimised into a multiconcat
+op, as it's too hard to get the correct ordering of ties, overload etc.
+
+In addition:
+
+ OPpMULTICONCAT_FAKE: not a real concat, instead an optimised
+ sprintf "...%s...". Don't call '.'
+ overloading: only use '""' overloading.
+
+ OPpMULTICONCAT_STRINGIFY: (for Deparse's benefit) the RHS was of the
+ form "...$a...$b..." rather than
+ "..." . $a . "..." . $b . "..."
+
+An OP_MULTICONCAT is of type UNOP_AUX. The fixed slots of the aux array are
+defined with PERL_MULTICONCAT_IX_FOO constants, where:
+
+
+ FOO index description
+ -------- ----- ----------------------------------
+ NARGS 0 number of arguments
+ PLAIN_PV 1 non-utf8 constant string
+ PLAIN_LEN 2 non-utf8 constant string length
+ UTF8_PV 3 utf8 constant string
+ UTF8_LEN 4 utf8 constant string length
+ LENGTHS 5 first of nargs+1 const segment lengths
+
+The idea is that a general string concatenation will have a fixed (known
+at compile time) number of variable args, interspersed with constant
+strings, e.g. "a=$a b=$b\n"
+
+All the constant string segments "a=", " b=" and "\n" are stored as a
+single string "a= b=\n", pointed to from the PLAIN_PV/UTF8_PV slot, along
+with a series of segment lengths: e.g. 2,3,1. In the case where the
+constant string is plain but has a different utf8 representation, both
+variants are stored, and two sets of (nargs+1) segments lengths are stored
+in the slots beginning at PERL_MULTICONCAT_IX_LENGTHS.
+
+A segment length of -1 indicates that there is no constant string at that
+point; this distinguishes between e.g. ($a . $b) and ($a . "" . $b), which
+have differing overloading behaviour.
+
+*/
+
+PP(pp_multiconcat)
+{
+ dSP;
+ SV *targ; /* The SV to be assigned or appended to */
+ SV *dsv; /* the SV to concat args to (often == targ) */
+ char *dsv_pv; /* where within SvPVX(dsv) we're writing to */
+ STRLEN targ_len; /* SvCUR(targ) */
+ SV **toparg; /* the highest arg position on the stack */
+ UNOP_AUX_item *aux; /* PL_op->op_aux buffer */
+ UNOP_AUX_item *const_lens; /* the segment length array part of aux */
+ const char *const_pv; /* the current segment of the const string buf */
+ UV nargs; /* how many args were expected */
+ UV stack_adj; /* how much to adjust SP on return */
+ STRLEN grow; /* final size of destination string (dsv) */
+ UV targ_count; /* how many times targ has appeared on the RHS */
+ bool is_append; /* OPpMULTICONCAT_APPEND flag is set */
+ bool slow_concat; /* args too complex for quick concat */
+ U32 dst_utf8; /* the result will be utf8 (indicate this with
+ SVf_UTF8 in a U32, rather than using bool,
+ for ease of testing and setting) */
+ /* for each arg, holds the result of an SvPV() call */
+ struct multiconcat_svpv {
+ char *pv;
+ SSize_t len;
+ }
+ *targ_chain, /* chain of slots where targ has appeared on RHS */
+ *svpv_p, /* ptr for looping through svpv_buf */
+ *svpv_base, /* first slot (may be greater than svpv_buf), */
+ *svpv_end, /* and slot after highest result so far, of: */
+ svpv_buf[PERL_MULTICONCAT_MAXARG]; /* buf for storing SvPV() results */
+
+ aux = cUNOP_AUXx(PL_op)->op_aux;
+ stack_adj = nargs = aux[PERL_MULTICONCAT_IX_NARGS].uv;
+ is_append = cBOOL(PL_op->op_private & OPpMULTICONCAT_APPEND);
+
+ /* get targ from the stack or pad */
+
+ if (PL_op->op_flags & OPf_STACKED) {
+ if (is_append) {
+ /* for 'expr .= ...', expr is the bottom item on the stack */
+ targ = SP[-nargs];
+ stack_adj++;
+ }
+ else
+ /* for 'expr = ...', expr is the top item on the stack */
+ targ = POPs;
+ }
+ else {
+ SV **svp = &(PAD_SVl(PL_op->op_targ));
+ targ = *svp;
+ if (PL_op->op_private & OPpLVAL_INTRO) {
+ assert(PL_op->op_private & OPpTARGET_MY);
+ save_clearsv(svp);
+ }
+ if (!nargs)
+ /* $lex .= "const" doesn't cause anything to be pushed */
+ EXTEND(SP,1);
+ }
+
+ toparg = SP;
+ SP -= (nargs - 1);
+ dsv = targ; /* Set the destination for all concats. This is
+ initially targ; later on, dsv may be switched
+ to point to a TEMP SV if overloading is
+ encountered. */
+ grow = 1; /* allow for '\0' at minimum */
+ targ_count = 0;
+ targ_chain = NULL;
+ targ_len = 0;
+ svpv_end = svpv_buf;
+ /* only utf8 variants of the const strings? */
+ dst_utf8 = aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv ? 0 : SVf_UTF8;
+
+
+ /* --------------------------------------------------------------
+ * Phase 1:
+ *
+ * stringify (i.e. SvPV()) every arg and store the resultant pv/len/utf8
+ * triplets in svpv_buf[]. Also increment 'grow' by the args' lengths.
+ *
+ * utf8 is indicated by storing a negative length.
+ *
+ * Where an arg is actually targ, the stringification is deferred:
+ * the length is set to 0, and the slot is added to targ_chain.
+ *
+ * If an overloaded arg is found, the loop is abandoned at that point,
+ * and dsv is set to an SvTEMP SV where the results-so-far will be
+ * accumulated.
+ */
+
+ for (; SP <= toparg; SP++, svpv_end++) {
+ bool simple_flags;
+ U32 utf8;
+ STRLEN len;
+ SV *sv;
+
+ assert(svpv_end - svpv_buf < PERL_MULTICONCAT_MAXARG);
+
+ sv = *SP;
+ simple_flags = (SvFLAGS(sv) & (SVs_GMG|SVf_ROK|SVf_POK)) == SVf_POK;
+
+ /* this if/else chain is arranged so that common/simple cases
+ * take few conditionals */
+
+ if (LIKELY(simple_flags && (sv != targ))) {
+ /* common case: sv is a simple PV and not the targ */
+ svpv_end->pv = SvPVX(sv);
+ len = SvCUR(sv);
+ }
+ else if (simple_flags) {
+ /* sv is targ (but can't be magic or overloaded).
+ * Delay storing PV pointer; instead, add slot to targ_chain
+ * so it can be populated later, after targ has been grown and
+ * we know its final SvPVX() address.
+ */
+ targ_on_rhs:
+ svpv_end->len = 0; /* zerojng here means we can skip
+ updating later if targ_len == 0 */
+ svpv_end->pv = (char*)targ_chain;
+ targ_chain = svpv_end;
+ targ_count++;
+ continue;
+ }
+ else {
+ if (UNLIKELY(SvFLAGS(sv) & (SVs_GMG|SVf_ROK))) {
+ /* its got magic, is tied, and/or is overloaded */
+ SvGETMAGIC(sv);
+
+ if (UNLIKELY(SvAMAGIC(sv))
+ && !(PL_op->op_private & OPpMULTICONCAT_FAKE))
+ {
+ /* One of the RHS args is overloaded. Abandon stringifying
+ * the args at this point, then in the concat loop later
+ * on, concat the plain args stringified so far into a
+ * TEMP SV. At the end of this function the remaining
+ * args (including the current one) will be handled
+ * specially, using overload calls.
+ * FAKE implies an optimised sprintf which doesn't use
+ * concat overloading, only "" overloading.
+ */
+ setup_overload:
+ dsv = newSVpvn_flags("", 0, SVs_TEMP);
+
+ if (targ_chain) {
+ /* Get the string value of targ and populate any
+ * RHS slots which use it */
+ char *pv = SvPV_nomg(targ, len);
+ dst_utf8 |= (SvFLAGS(targ) & SVf_UTF8);
+ grow += len * targ_count;
+ do {
+ struct multiconcat_svpv *p = targ_chain;
+ targ_chain = (struct multiconcat_svpv *)(p->pv);
+ p->pv = pv;
+ p->len = len;
+ } while (targ_chain);
+ }
+ else if (is_append)
+ SvGETMAGIC(targ);
+
+ goto phase3;
+ }
+
+ if (SvFLAGS(sv) & SVs_RMG) {
+ /* probably tied; copy it to guarantee separate values
+ * each time it's used, e.g. "-$tied-$tied-$tied-",
+ * since FETCH() isn't necessarily idempotent */
+ SV *nsv = newSV(0);
+ sv_setsv_flags(nsv, sv, SV_NOSTEAL);
+ sv_2mortal(nsv);
+ if ( sv == targ
+ && is_append
+ && nargs == 1
+ /* no const string segments */
+ && aux[PERL_MULTICONCAT_IX_LENGTHS].size == -1
+ && aux[PERL_MULTICONCAT_IX_LENGTHS+1].size == -1)
+ {
+ /* special-case $tied .= $tied.
+ *
+ * For something like
+ * sub FETCH { $i++ }
+ * then
+ * $tied .= $tied . $tied . $tied;
+ * will STORE "4123"
+ * while
+ * $tied .= $tied
+ * will STORE "12"
+ *
+ * i.e. for a single mutator concat, the LHS is
+ * retrieved first; in all other cases it is
+ * retrieved last. Whether this is sane behaviour
+ * is open to debate; but for now, multiconcat (as
+ * it is an optimisation) tries to reproduce
+ * existing behaviour.
+ */
+ sv_catsv(nsv, sv);
+ sv_setsv(sv,nsv);
+ SP++;
+ goto phase7; /* just return targ as-is */
+ }
+
+ sv = nsv;
+ }
+ }
+
+ if (sv == targ) {
+ /* must warn for each RH usage of targ, except that
+ * we will later get one warning when doing
+ * SvPV_force(targ), *except* on '.=' */
+ if ( !SvOK(sv)
+ && (targ_chain || is_append)
+ && ckWARN(WARN_UNINITIALIZED)
+ )
+ report_uninit(sv);
+ goto targ_on_rhs;
+ }
+
+ /* stringify general SV */
+ svpv_end->pv = sv_2pv_flags(sv, &len, 0);
+ }
+
+ utf8 = (SvFLAGS(sv) & SVf_UTF8);
+ dst_utf8 |= utf8;
+ ASSUME(len < SSize_t_MAX);
+ svpv_end->len = utf8 ? -(SSize_t)len : (SSize_t)len;
+ grow += len;
+ }
+
+ /* --------------------------------------------------------------
+ * Phase 2:
+ *
+ * Stringify targ:
+ *
+ * if targ appears on the RHS or is appended to, force stringify it;
+ * otherwise set it to "". Then set targ_len.
+ */
+
+ if (is_append) {
+ if (UNLIKELY(SvFLAGS(targ) & (SVs_GMG|SVf_ROK))) {
+ SvGETMAGIC(targ); /* must do before SvAMAGIC() check */
+ if (UNLIKELY(SvAMAGIC(targ))) {
+ /* $overloaded .= ....;
+ * accumulate RHS in a temp SV rather than targ,
+ * then append tmp to targ at the end using overload
+ */
+ assert(!targ_chain);
+ dsv = newSVpvn_flags("", 0, SVs_TEMP);
+ goto phase3;
+ }
+ }
+
+ if (SvOK(targ)) {
+ U32 targ_utf8;
+ stringify_targ:
+ SvPV_force_nomg_nolen(targ);
+ targ_utf8 = SvFLAGS(targ) & SVf_UTF8;
+ if (UNLIKELY(dst_utf8 & ~targ_utf8)) {
+ if (LIKELY(!IN_BYTES))
+ sv_utf8_upgrade_nomg(targ);
+ }
+ else
+ dst_utf8 |= targ_utf8;
+
+ targ_len = SvCUR(targ);
+ grow += targ_len * (targ_count + is_append);
+ goto phase3;
+ }
+ }
+ else if (UNLIKELY(SvTYPE(targ) >= SVt_REGEXP)) {
+ /* Assigning to some weird LHS type. Don't force the LHS to be an
+ * empty string; instead, do things 'long hand' by using the
+ * overload code path, which concats to a TEMP sv and does
+ * sv_catsv() calls rather than COPY()s. This ensures that even
+ * bizarre code like this doesn't break or crash:
+ * *F = *F . *F.
+ * (which makes the 'F' typeglob an alias to the
+ * '*main::F*main::F' typeglob).
+ */
+ goto setup_overload;
+ }
+ else if (targ_chain) {
+ /* targ was found on RHS.
+ * We don't need the SvGETMAGIC() call and SvAMAGIC() test as
+ * both were already done earlier in the SvPV() loop; other
+ * than that we can share the same code with the append
+ * branch below.
+ * Note that this goto jumps directly into the SvOK() branch
+ * even if targ isn't SvOK(), to force an 'uninitialised'
+ * warning; e.g.
+ * $undef .= .... targ only on LHS: don't warn
+ * $undef .= $undef .... targ on RHS too: warn
+ */
+ assert(!SvAMAGIC(targ));
+ goto stringify_targ;
+ }
+
+
+ /* unrolled SvPVCLEAR() - mostly: no need to grow or set SvCUR() to 0;
+ * those will be done later. */
+ assert(targ == dsv);
+ SV_CHECK_THINKFIRST_COW_DROP(targ);
+ SvUPGRADE(targ, SVt_PV);
+ SvFLAGS(targ) &= ~(SVf_OK|SVf_IVisUV|SVf_UTF8);
+ SvFLAGS(targ) |= (SVf_POK|SVp_POK|dst_utf8);
+
+ phase3:
+
+ /* --------------------------------------------------------------
+ * Phase 3:
+ *
+ * UTF-8 tweaks and grow dsv:
+ *
+ * Now that we know the length and utf8-ness of both the targ and
+ * args, grow dsv to the size needed to accumulate all the args, based
+ * on whether targ appears on the RHS, whether we're appending, and
+ * whether any non-utf8 args expand in size if converted to utf8.
+ *
+ * For the latter, if dst_utf8 we scan non-utf8 args looking for
+ * variant chars, and adjust the svpv->len value of those args to the
+ * utf8 size and negate it to flag them. At the same time we un-negate
+ * the lens of any utf8 args since after this phase we no longer care
+ * whether an arg is utf8 or not.
+ *
+ * Finally, initialise const_lens and const_pv based on utf8ness.
+ * Note that there are 3 permutations:
+ *
+ * * If the constant string is invariant whether utf8 or not (e.g. "abc"),
+ * then aux[PERL_MULTICONCAT_IX_PLAIN_PV/LEN] are the same as
+ * aux[PERL_MULTICONCAT_IX_UTF8_PV/LEN] and there is one set of
+ * segment lengths.
+ *
+ * * If the string is fully utf8, e.g. "\x{100}", then
+ * aux[PERL_MULTICONCAT_IX_PLAIN_PV/LEN] == (NULL,0) and there is
+ * one set of segment lengths.
+ *
+ * * If the string has different plain and utf8 representations
+ * (e.g. "\x80"), then then aux[PERL_MULTICONCAT_IX_PLAIN_PV/LEN]]
+ * holds the plain rep, while aux[PERL_MULTICONCAT_IX_UTF8_PV/LEN]
+ * holds the utf8 rep, and there are 2 sets of segment lengths,
+ * with the utf8 set following after the plain set.
+ *
+ * On entry to this section the (pv,len) pairs in svpv_buf have the
+ * following meanings:
+ * (pv, len) a plain string
+ * (pv, -len) a utf8 string
+ * (NULL, 0) left-most targ \ linked together R-to-L
+ * (next, 0) other targ / in targ_chain
+ */
+
+ /* turn off utf8 handling if 'use bytes' is in scope */
+ if (UNLIKELY(dst_utf8 && IN_BYTES)) {
+ dst_utf8 = 0;
+ SvUTF8_off(dsv);
+ /* undo all the negative lengths which flag utf8-ness */
+ for (svpv_p = svpv_buf; svpv_p < svpv_end; svpv_p++) {
+ SSize_t len = svpv_p->len;
+ if (len < 0)
+ svpv_p->len = -len;
+ }
+ }
+
+ /* grow += total of lengths of constant string segments */
+ {
+ SSize_t len;
+ len = aux[dst_utf8 ? PERL_MULTICONCAT_IX_UTF8_LEN
+ : PERL_MULTICONCAT_IX_PLAIN_LEN].size;
+ slow_concat = cBOOL(len);
+ grow += len;
+ }
+
+ const_lens = aux + PERL_MULTICONCAT_IX_LENGTHS;
+
+ if (dst_utf8) {
+ const_pv = aux[PERL_MULTICONCAT_IX_UTF8_PV].pv;
+ if ( aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv
+ && const_pv != aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv)
+ /* separate sets of lengths for plain and utf8 */
+ const_lens += nargs + 1;
+
+ /* If the result is utf8 but some of the args aren't,
+ * calculate how much extra growth is needed for all the chars
+ * which will expand to two utf8 bytes.
+ * Also, if the growth is non-zero, negate the length to indicate
+ * that this this is a variant string. Conversely, un-negate the
+ * length on utf8 args (which was only needed to flag non-utf8
+ * args in this loop */
+ for (svpv_p = svpv_buf; svpv_p < svpv_end; svpv_p++) {
+ char *p;
+ SSize_t len, l, extra;
+
+ len = svpv_p->len;
+ if (len <= 0) {
+ svpv_p->len = -len;
+ continue;
+ }
+
+ p = svpv_p->pv;
+ extra = 0;
+ l = len;
+ while (l--)
+ extra += !UTF8_IS_INVARIANT(*p++);
+ if (UNLIKELY(extra)) {
+ grow += extra;
+ /* -ve len indicates special handling */
+ svpv_p->len = -(len + extra);
+ slow_concat = TRUE;
+ }
+ }
+ }
+ else
+ const_pv = aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv;
+
+ /* unrolled SvGROW(), except don't check for SVf_IsCOW, which should
+ * already have been dropped */
+ assert(!SvIsCOW(dsv));
+ dsv_pv = (SvLEN(dsv) < (grow) ? sv_grow(dsv,grow) : SvPVX(dsv));
+
+
+ /* --------------------------------------------------------------
+ * Phase 4:
+ *
+ * Now that dsv (which is probably targ) has been grown, we know the
+ * final address of the targ PVX, if needed. Preserve / move targ
+ * contents if appending or if targ appears on RHS.
+ *
+ * Also update svpv_buf slots in targ_chain.
+ *
+ * Don't bother with any of this if the target length is zero:
+ * targ_len is set to zero unless we're appending or targ appears on
+ * RHS. And even if it is, we can optimise by skipping this chunk of
+ * code for zero targ_len. In the latter case, we don't need to update
+ * the slots in targ_chain with the (zero length) target string, since
+ * we set the len in such slots to 0 earlier, and since the Copy() is
+ * skipped on zero length, it doesn't matter what svpv_p->pv contains.
+ *
+ * On entry to this section the (pv,len) pairs in svpv_buf have the
+ * following meanings:
+ * (pv, len) a pure-plain or utf8 string
+ * (pv, -(len+extra)) a plain string which will expand by 'extra'
+ * bytes when converted to utf8
+ * (NULL, 0) left-most targ \ linked together R-to-L
+ * (next, 0) other targ / in targ_chain
+ *
+ * On exit, the targ contents will have been moved to the
+ * earliest place they are needed (e.g. $x = "abc$x" will shift them
+ * 3 bytes, while $x .= ... will leave them at the beginning);
+ * and dst_pv will point to the location within SvPVX(dsv) where the
+ * next arg should be copied.
+ */
+
+ svpv_base = svpv_buf;
+
+ if (targ_len) {
+ struct multiconcat_svpv *tc_stop;
+ char *targ_pv = dsv_pv;
+
+ assert(targ == dsv);
+ assert(is_append || targ_count);
+
+ if (is_append) {
+ dsv_pv += targ_len;
+ tc_stop = NULL;
+ }
+ else {
+ /* The targ appears on RHS, e.g. '$t = $a . $t . $t'.
+ * Move the current contents of targ to the first
+ * position where it's needed, and use that as the src buffer
+ * for any further uses (such as the second RHS $t above).
+ * In calculating the first position, we need to sum the
+ * lengths of all consts and args before that.
+ */
+
+ UNOP_AUX_item *lens = const_lens;
+ /* length of first const string segment */
+ STRLEN offset = lens->size > 0 ? lens->size : 0;
+
+ assert(targ_chain);
+ svpv_p = svpv_base;
+
+ for (;;) {
+ SSize_t len;
+ if (!svpv_p->pv)
+ break; /* the first targ argument */
+ /* add lengths of the next arg and const string segment */
+ len = svpv_p->len;
+ if (len < 0) /* variant args have this */
+ len = -len;
+ offset += (STRLEN)len;
+ len = (++lens)->size;
+ offset += (len >= 0) ? (STRLEN)len : 0;
+ if (!offset) {
+ /* all args and consts so far are empty; update
+ * the start position for the concat later */
+ svpv_base++;
+ const_lens++;
+ }
+ svpv_p++;
+ assert(svpv_p < svpv_end);
+ }
+
+ if (offset) {
+ targ_pv += offset;
+ Move(dsv_pv, targ_pv, targ_len, char);
+ /* a negative length implies don't Copy(), but do increment */
+ svpv_p->len = -targ_len;
+ slow_concat = TRUE;
+ }
+ else {
+ /* skip the first targ copy */
+ svpv_base++;
+ const_lens++;
+ dsv_pv += targ_len;
+ }
+
+ /* Don't populate the first targ slot in the loop below; it's
+ * either not used because we advanced svpv_base beyond it, or
+ * we already stored the special -targ_len value in it
+ */
+ tc_stop = svpv_p;
+ }
+
+ /* populate slots in svpv_buf representing targ on RHS */
+ while (targ_chain != tc_stop) {
+ struct multiconcat_svpv *p = targ_chain;
+ targ_chain = (struct multiconcat_svpv *)(p->pv);
+ p->pv = targ_pv;
+ p->len = (SSize_t)targ_len;
+ }
+ }
+
+
+ /* --------------------------------------------------------------
+ * Phase 5:
+ *
+ * Append all the args in svpv_buf, plus the const strings, to dsv.
+ *
+ * On entry to this section the (pv,len) pairs in svpv_buf have the
+ * following meanings:
+ * (pv, len) a pure-plain or utf8 string (which may be targ)
+ * (pv, -(len+extra)) a plain string which will expand by 'extra'
+ * bytes when converted to utf8
+ * (0, -len) left-most targ, whose content has already
+ * been copied. Just advance dsv_pv by len.
+ */
+
+ /* If there are no constant strings and no special case args
+ * (svpv_p->len < 0), use a simpler, more efficient concat loop
+ */
+ if (!slow_concat) {
+ for (svpv_p = svpv_base; svpv_p < svpv_end; svpv_p++) {
+ SSize_t len = svpv_p->len;
+ if (!len)
+ continue;
+ Copy(svpv_p->pv, dsv_pv, len, char);
+ dsv_pv += len;
+ }
+ const_lens += (svpv_end - svpv_base + 1);
+ }
+ else {
+ /* Note that we iterate the loop nargs+1 times: to append nargs
+ * arguments and nargs+1 constant strings. For example, "-$a-$b-"
+ */
+ svpv_p = svpv_base - 1;
+
+ for (;;) {
+ SSize_t len = (const_lens++)->size;
+
+ /* append next const string segment */
+ if (len > 0) {
+ Copy(const_pv, dsv_pv, len, char);
+ dsv_pv += len;
+ const_pv += len;
+ }
+
+ if (++svpv_p == svpv_end)
+ break;
+
+ /* append next arg */
+ len = svpv_p->len;
+
+ if (LIKELY(len > 0)) {
+ Copy(svpv_p->pv, dsv_pv, len, char);
+ dsv_pv += len;
+ }
+ else if (UNLIKELY(len < 0)) {
+ /* negative length indicates two special cases */
+ const char *p = svpv_p->pv;
+ len = -len;
+ if (UNLIKELY(p)) {
+ /* copy plain-but-variant pv to a utf8 targ */
+ assert(dst_utf8);
+ while (len--) {
+ U8 c = (U8) *p++;
+ if (UTF8_IS_INVARIANT(c))
+ *dsv_pv++ = c;
+ else {
+ *dsv_pv++ = UTF8_EIGHT_BIT_HI(c);
+ *dsv_pv++ = UTF8_EIGHT_BIT_LO(c);
+ len--;
+ }
+ }
+ }
+ else
+ /* arg is already-copied targ */
+ dsv_pv += len;
+ }
+
+ }
+ }
+
+ *dsv_pv = '\0';
+ SvCUR_set(dsv, dsv_pv - SvPVX(dsv));
+ assert(grow >= SvCUR(dsv) + 1);
+ assert(SvLEN(dsv) >= SvCUR(dsv) + 1);
+
+ /* --------------------------------------------------------------
+ * Phase 6:
+ *
+ * Handle overloading. If an overloaded arg or targ was detected
+ * earlier, dsv will have been set to a new mortal, and any args and
+ * consts to the left of the first overloaded arg will have been
+ * accumulated to it. This section completes any further concatenation
+ * steps with overloading handled.
+ */
+
+ if (UNLIKELY(dsv != targ)) {
+ SV *res;
+
+ SvFLAGS(dsv) |= dst_utf8;
+
+ if (SP <= toparg) {
+ /* Stringifying the RHS was abandoned because *SP
+ * is overloaded. dsv contains all the concatted strings
+ * before *SP. Apply the rest of the args using overloading.
+ */
+ SV *left, *right, *res;
+ int i;
+ bool getmg = FALSE;
+ SV *constsv = NULL;
+ /* number of args already concatted */
+ STRLEN n = (nargs - 1) - (toparg - SP);
+ /* current arg is either the first
+ * or second value to be concatted
+ * (including constant strings), so would
+ * form part of the first concat */
+ bool first_concat = ( n == 0
+ || (n == 1 && const_lens[-2].size < 0
+ && const_lens[-1].size < 0));
+ int f_assign = first_concat ? 0 : AMGf_assign;
+
+ left = dsv;
+
+ for (; n < nargs; n++) {
+ /* loop twice, first applying the arg, then the const segment */
+ for (i = 0; i < 2; i++) {
+ if (i) {
+ /* append next const string segment */
+ STRLEN len = (STRLEN)((const_lens++)->size);
+ /* a length of -1 implies no constant string
+ * rather than a zero-length one, e.g.
+ * ($a . $b) versus ($a . "" . $b)
+ */
+ if ((SSize_t)len < 0)
+ continue;
+
+ /* set constsv to the next constant string segment */
+ if (constsv) {
+ sv_setpvn(constsv, const_pv, len);
+ if (dst_utf8)
+ SvUTF8_on(constsv);
+ else
+ SvUTF8_off(constsv);
+ }
+ else
+ constsv = newSVpvn_flags(const_pv, len,
+ (dst_utf8 | SVs_TEMP));
+
+ right = constsv;
+ const_pv += len;
+ }
+ else {
+ /* append next arg */
+ right = *SP++;
+ if (getmg)
+ SvGETMAGIC(right);
+ else
+ /* SvGETMAGIC already called on this SV just
+ * before we broke from the loop earlier */
+ getmg = TRUE;
+
+ if (first_concat && n == 0 && const_lens[-1].size < 0) {
+ /* nothing before the current arg; repeat the
+ * loop to get a second arg */
+ left = right;
+ first_concat = FALSE;
+ continue;
+ }
+ }
+
+ if ((SvAMAGIC(left) || SvAMAGIC(right))
+ && (res = amagic_call(left, right, concat_amg, f_assign))
+ )
+ left = res;
+ else {
+ if (left != dsv) {
+ sv_setsv(dsv, left);
+ left = dsv;
+ }
+ sv_catsv_nomg(left, right);
+ }
+ f_assign = AMGf_assign;
+ }
+ }
+ dsv = left;
+ }
+
+ /* assign/append RHS (dsv) to LHS (targ) */
+ if (is_append) {
+ if ((SvAMAGIC(targ) || SvAMAGIC(dsv))
+ && (res = amagic_call(targ, dsv, concat_amg, AMGf_assign))
+ )
+ sv_setsv(targ, res);
+ else
+ sv_catsv_nomg(targ, dsv);
+ }
+ else
+ sv_setsv(targ, dsv);
+ }
+
+ /* --------------------------------------------------------------
+ * Phase 7:
+ *
+ * return result
+ */
+
+ phase7:
+
+ SP -= stack_adj;
+ SvTAINT(targ);
+ SETTARG;
+ RETURN;
+}
+
+
/* push the elements of av onto the stack.
* Returns PL_op->op_next to allow tail-call optimisation of its callers */
diff --git a/pp_proto.h b/pp_proto.h
index e931546799..407cbd14a3 100644
--- a/pp_proto.h
+++ b/pp_proto.h
@@ -162,6 +162,7 @@ PERL_CALLCONV OP *Perl_pp_method_redir_super(pTHX);
PERL_CALLCONV OP *Perl_pp_method_super(pTHX);
PERL_CALLCONV OP *Perl_pp_mkdir(pTHX);
PERL_CALLCONV OP *Perl_pp_modulo(pTHX);
+PERL_CALLCONV OP *Perl_pp_multiconcat(pTHX);
PERL_CALLCONV OP *Perl_pp_multideref(pTHX);
PERL_CALLCONV OP *Perl_pp_multiply(pTHX);
PERL_CALLCONV OP *Perl_pp_nbit_and(pTHX);
diff --git a/proto.h b/proto.h
index 9efca653d9..c6a9b366c3 100644
--- a/proto.h
+++ b/proto.h
@@ -2067,6 +2067,9 @@ PERL_CALLCONV void Perl_mro_set_mro(pTHX_ struct mro_meta *const meta, SV *const
PERL_CALLCONV SV* Perl_mro_set_private_data(pTHX_ struct mro_meta *const smeta, const struct mro_alg *const which, SV *const data);
#define PERL_ARGS_ASSERT_MRO_SET_PRIVATE_DATA \
assert(smeta); assert(which); assert(data)
+PERL_CALLCONV SV* Perl_multiconcat_stringify(pTHX_ const OP* o);
+#define PERL_ARGS_ASSERT_MULTICONCAT_STRINGIFY \
+ assert(o)
PERL_CALLCONV SV* Perl_multideref_stringify(pTHX_ const OP* o, CV *cv);
#define PERL_ARGS_ASSERT_MULTIDEREF_STRINGIFY \
assert(o)
diff --git a/regen/op_private b/regen/op_private
index 94e0009425..d9082e7709 100644
--- a/regen/op_private
+++ b/regen/op_private
@@ -300,7 +300,8 @@ for (qw(nextstate dbstate)) {
addbits($_, 7 => qw(OPpLVAL_INTRO LVINTRO))
for qw(gvsv rv2sv rv2hv rv2gv rv2av aelem helem aslice split
hslice delete padsv padav padhv enteriter entersub padrange
- pushmark cond_expr refassign lvref lvrefslice lvavref multideref),
+ pushmark cond_expr refassign lvref lvrefslice lvavref multideref
+ multiconcat),
'list', # this gets set in my_attrs() for some reason
;
@@ -807,6 +808,14 @@ for (qw(index rindex)) {
addbits($_, 6 => qw(OPpINDEX_BOOLNEG NEG));
}
+addbits('multiconcat',
+ # 7 OPpLVAL_INTRO
+ 6 => qw(OPpMULTICONCAT_APPEND APPEND), # $x .= ....
+ 5 => qw(OPpMULTICONCAT_FAKE FAKE), # sprintf() optimised to MC.
+ # 4 OPpTARGET_MY
+ 3 => qw(OPpMULTICONCAT_STRINGIFY STRINGIFY), # "$a$b...", (for Deparse.pm)
+);
+
1;
diff --git a/regen/opcode.pl b/regen/opcode.pl
index d1c0faf6dc..01178668cd 100755
--- a/regen/opcode.pl
+++ b/regen/opcode.pl
@@ -55,7 +55,7 @@ while (<OPS>) {
$args = '' unless defined $args;
warn qq[Description "$desc" duplicates $seen{$desc}\n]
- if $seen{$desc} and $key !~ "transr|(?:intro|clone)cv|lvref";
+ if $seen{$desc} and $key !~ "concat|transr|(?:intro|clone)cv|lvref";
die qq[Opcode "$key" duplicates $seen{$key}\n] if $seen{$key};
die qq[Opcode "freed" is reserved for the slab allocator\n]
if $key eq 'freed';
diff --git a/regen/opcodes b/regen/opcodes
index 096c6fe823..5aa8a94fa5 100644
--- a/regen/opcodes
+++ b/regen/opcodes
@@ -131,6 +131,7 @@ i_add integer addition (+) ck_null ifsT2 S S
subtract subtraction (-) ck_null IfsT2 S S
i_subtract integer subtraction (-) ck_null ifsT2 S S
concat concatenation (.) or string ck_concat fsT2 S S
+multiconcat concatenation (.) or string ck_null sT+
stringify string ck_stringify fsT@ S
left_shift left bitshift (<<) ck_bitop fsT2 S S
diff --git a/sv.c b/sv.c
index 06168501ee..8e54364dc9 100644
--- a/sv.c
+++ b/sv.c
@@ -16996,6 +16996,9 @@ Perl_report_uninit(pTHX_ const SV *uninit_sv)
if (PL_op) {
desc = PL_op->op_type == OP_STRINGIFY && PL_op->op_folded
? "join or string"
+ : PL_op->op_type == OP_MULTICONCAT
+ && (PL_op->op_private & OPpMULTICONCAT_FAKE)
+ ? "sprintf"
: OP_DESC(PL_op);
if (uninit_sv && PL_curpad) {
varname = find_uninit_var(PL_op, uninit_sv, 0, &desc);
diff --git a/t/op/gmagic.t b/t/op/gmagic.t
index 5b2845bab4..210e8e5cc9 100644
--- a/t/op/gmagic.t
+++ b/t/op/gmagic.t
@@ -192,6 +192,29 @@ ok($wgot == 0, 'a plain *foo causes no set-magic');
'mortal magic var from do is copied';
}
+# For better or worse, the order in which concat args are fetched varies
+# depending on their number. In A .= B.C.D, they are fetched in the order
+# BCDA, while for A .= B, the order is AB (so for a single concat, the LHS
+# tied arg is FETCH()ed first). Make sure multiconcat preserves current
+# behaviour.
+
+package Increment {
+ sub TIESCALAR { bless [0, 0] }
+ # returns a new value for each FETCH, until the first STORE
+ sub FETCH { my $x = $_[0][0]; $_[0][0]++ unless $_[0][1]; $x }
+ sub STORE { @{$_[0]} = ($_[1],1) }
+
+ my $t;
+ tie $t, 'Increment';
+ my $r;
+ $r = $t . $t;
+ ::is $r, '01', 'Increment 01';
+ $r = "-$t-$t-$t-";
+ ::is $r, '-2-3-4-', 'Increment 234';
+ $t .= "-$t-$t-$t-";
+ ::is $t, '8-5-6-7-', 'Increment 8567';
+}
+
done_testing();
# adapted from Tie::Counter by Abigail
diff --git a/t/op/sprintf2.t b/t/op/sprintf2.t
index 6d32004a9d..eb90c763d8 100644
--- a/t/op/sprintf2.t
+++ b/t/op/sprintf2.t
@@ -1039,5 +1039,81 @@ like sprintf("%p", 0+'NaN'), qr/^[0-9a-f]+$/, "%p and NaN";
}
}
+# multiconcat: only one scalar assign at most should be optimised away
+
+{
+ local our $x1 = '';
+ local our $x2 = '';
+ my ($a, $b) = qw(abcd wxyz);
+ $x1 = ($x2 = sprintf("%s%s", $a, $b));
+ is $x1, "abcdwxyz", "\$x1 = \$x2 = sprintf(): x1";
+ is $x2, "abcdwxyz", "\$x1 = \$x2 = sprintf(): x2";
+
+ my $y1 = '';
+ my $y2 = '';
+ $y1 = ($y2 = sprintf("%s%s", $a, $b));
+ is $y1, "abcdwxyz", "\$y1 = \$y2 = sprintf(): y1";
+ is $y2, "abcdwxyz", "\$y1 = \$y2 = sprintf(): y2";
+}
+
+# multiconcat: mutator optimisation
+
+{
+ my $lex = 'abc';
+ my $a1 = 'pqr';
+ my $a2 = 'xyz';
+ $lex .= sprintf "(%s,%s)", $a1, $a2;
+ is $lex, "abc(pqr,xyz)", "\$lex .= sprintf ...";
+
+ local our $pkg = "def";
+ $pkg .= sprintf "(%s,%s)", $a1, $a2;
+ is $pkg, "def(pqr,xyz)", "\$pkg .= sprintf ...";
+
+ my @ary;
+ $ary[3] = "ghi";
+ $ary[3] .= sprintf "(%s,%s)", $a1, $a2;
+ is $ary[3], "ghi(pqr,xyz)", "\$ary[3] .= sprintf ...";
+}
+
+# multiconcat: strings with 0x80..0xff chars and/or utf8 chars
+
+{
+ my $plain = "abc";
+ my $s80 = "d\x{80}e";
+ my $s81 = "h\x{81}i";
+ my $utf8 = "f\x{100}g";
+ my $res;
+
+ $res = sprintf "-%s-%s-\x{90}-%s-\x{91}-%s-\x{92}",
+ $plain, $s80, $utf8, $s81;
+ is $res, "-abc-d\x{80}e-\x{90}-f\x{100}g-\x{91}-h\x{81}i-\x{92}",
+ "multiconcat 80.ff handling";
+
+ $res = sprintf "%s \x{101} %s", $plain, $plain;
+ is $res, "abc \x{101} abc", "multiconcat p u p";
+
+ $res = sprintf "%s \x{101} %s", $plain, $utf8;
+ is $res, "abc \x{101} f\x{100}g", "multiconcat p u u";
+}
+
+# check /INTRO flag set correctly on multiconcat
+
+{
+ my $a = "a";
+ my $b = "b";
+ my $x;
+ {
+ $x = sprintf "-%s-%s-", $a, $b;
+ }
+ is $x, "-a-b-", "no INTRO flag on non-my";
+ for (1,2) {
+ my $y;
+ is $y, undef, "INTRO flag on my: $_";
+ $y = sprintf "-%s-%s-", $b, $a;
+ is $y, "-b-a-", "INTRO flag on my - result: $_";
+ }
+}
+
+
done_testing();
diff --git a/t/op/state.t b/t/op/state.t
index 92f1f60887..39eeecd7c5 100644
--- a/t/op/state.t
+++ b/t/op/state.t
@@ -9,7 +9,7 @@ BEGIN {
use strict;
-plan tests => 124;
+plan tests => 126;
# Before loading feature.pm, test it with CORE::
ok eval 'CORE::state $x = 1;', 'CORE::state outside of feature.pm scope';
@@ -439,6 +439,13 @@ sub rt_123029 {
}
ok(rt_123029(), "state variables don't surprisingly disappear when accessed");
+# make sure multiconcat doesn't break state
+
+for (1,2) {
+ state $s = "-$_-";
+ is($s, "-1-", "state with multiconcat pass $_");
+}
+
__DATA__
state ($a) = 1;
(state $a) = 1;
diff --git a/t/opbasic/concat.t b/t/opbasic/concat.t
index 7802fc98ce..55965c1702 100644
--- a/t/opbasic/concat.t
+++ b/t/opbasic/concat.t
@@ -5,12 +5,13 @@ BEGIN {
@INC = '../lib';
}
-# ok() functions from other sources (e.g., t/test.pl) may use concatenation,
-# but that is what is being tested in this file. Hence, we place this file
-# in the directory where do not use t/test.pl, and we write an ok() function
-# specially written to avoid any concatenation.
+# ok()/is() functions from other sources (e.g., t/test.pl) may use
+# concatenation, but that is what is being tested in this file. Hence, we
+# place this file in the directory where do not use t/test.pl, and we
+# write functions specially written to avoid any concatenation.
my $test = 1;
+
sub ok {
my($ok, $name) = @_;
@@ -22,7 +23,23 @@ sub ok {
return $ok;
}
-print "1..31\n";
+sub is {
+ my($got, $expected, $name) = @_;
+
+ my $ok = $got eq $expected;
+
+ printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name;
+
+ if (!$ok) {
+ printf "# Failed test at line %d\n", (caller)[2];
+ printf "# got: %s\n#expected: %s\n", $got, $expected;
+ }
+
+ $test++;
+ return $ok;
+}
+
+print "1..251\n";
($a, $b, $c) = qw(foo bar);
@@ -132,6 +149,7 @@ sub beq { use bytes; $_[0] eq $_[1]; }
my $up = "\x{100}\xB6";
my $x1 = $p;
my $y1 = $u;
+ my ($x2, $x3, $x4, $y2);
use bytes;
ok(beq($p.$u, $p.$b), "perl #26905, left eq bytes");
@@ -144,11 +162,15 @@ sub beq { use bytes; $_[0] eq $_[1]; }
$y1 .= $p;
$y2 = $u . $p;
+ $x3 = $p; $x3 .= $u . $u;
+ $x4 = $p . $u . $u;
+
no bytes;
ok(beq($x1, $x2), "perl #26905, left, .= vs = . in bytes");
ok(beq($y1, $y2), "perl #26905, right, .= vs = . in bytes");
ok(($x1 eq $x2), "perl #26905, left, .= vs = . in chars");
ok(($y1 eq $y2), "perl #26905, right, .= vs = . in chars");
+ ok(($x3 eq $x4), "perl #26905, twin, .= vs = . in chars");
}
{
@@ -164,8 +186,627 @@ sub beq { use bytes; $_[0] eq $_[1]; }
ok($x eq "ab-append-", "Appending to something initialized using constant folding");
}
+# non-POK consts
+
+{
+ my $a = "a";
+ my $b;
+ $b = $a . $a . 1;
+ ok($b eq "aa1", "aa1");
+ $b = 2 . $a . $a;
+ ok($b eq "2aa", "2aa");
+}
+
# [perl #124160]
package o { use overload "." => sub { $_[0] }, fallback => 1 }
$o = bless [], "o";
ok(ref(CORE::state $y = "a $o b") eq 'o',
'state $y = "foo $bar baz" does not stringify; only concats');
+
+
+# multiconcat: utf8 dest with non-utf8 args should grow dest sufficiently.
+# This is mainly for valgrind or ASAN to detect problems with.
+
+{
+ my $s = "\x{100}";
+ my $t = "\x80" x 1024;
+ $s .= "-$t-";
+ ok length($s) == 1027, "utf8 dest with non-utf8 args";
+}
+
+# target on RHS
+
+{
+ my $a = "abc";
+ $a .= $a;
+ ok($a eq 'abcabc', 'append self');
+
+ $a = "abc";
+ $a = $a . $a;
+ ok($a eq 'abcabc', 'double self');
+
+ $a = "abc";
+ $a .= $a . $a;
+ ok($a eq 'abcabcabc', 'append double self');
+
+ $a = "abc";
+ $a = "$a-$a";
+ ok($a eq 'abc-abc', 'double self with const');
+
+ $a = "abc";
+ $a .= "$a-$a";
+ ok($a eq 'abcabc-abc', 'append double self with const');
+
+ $a = "abc";
+ $a .= $a . $a . $a;
+ ok($a eq 'abcabcabcabc', 'append triple self');
+
+ $a = "abc";
+ $a = "$a-$a=$a";
+ ok($a eq 'abc-abc=abc', 'triple self with const');
+
+ $a = "abc";
+ $a .= "$a-$a=$a";
+ ok($a eq 'abcabc-abc=abc', 'append triple self with const');
+}
+
+# test the sorts of optree which may (or may not) get optimised into
+# a single MULTICONCAT op. It's based on a loop in t/perf/opcount.t,
+# but here the loop is unwound as we would need to use concat to
+# generate the expected results to compare with the actual results,
+# which would rather defeat the object.
+
+{
+ my ($a1, $a2, $a3) = qw(1 2 3);
+ our $pkg;
+ my $lex;
+
+ is("-", '-', '"-"');
+ is("-", '-', '"-"');
+ is("-", '-', '"-"');
+ is("-", '-', '"-"');
+ is($a1, '1', '$a1');
+ is("-".$a1, '-1', '"-".$a1');
+ is($a1."-", '1-', '$a1."-"');
+ is("-".$a1."-", '-1-', '"-".$a1."-"');
+ is("$a1", '1', '"$a1"');
+ is("-$a1", '-1', '"-$a1"');
+ is("$a1-", '1-', '"$a1-"');
+ is("-$a1-", '-1-', '"-$a1-"');
+ is($a1.$a2, '12', '$a1.$a2');
+ is($a1."-".$a2, '1-2', '$a1."-".$a2');
+ is("-".$a1."-".$a2, '-1-2', '"-".$a1."-".$a2');
+ is($a1."-".$a2."-", '1-2-', '$a1."-".$a2."-"');
+ is("-".$a1."-".$a2."-", '-1-2-', '"-".$a1."-".$a2."-"');
+ is("$a1$a2", '12', '"$a1$a2"');
+ is("$a1-$a2", '1-2', '"$a1-$a2"');
+ is("-$a1-$a2", '-1-2', '"-$a1-$a2"');
+ is("$a1-$a2-", '1-2-', '"$a1-$a2-"');
+ is("-$a1-$a2-", '-1-2-', '"-$a1-$a2-"');
+ is($a1.$a2.$a3, '123', '$a1.$a2.$a3');
+ is($a1."-".$a2."-".$a3, '1-2-3', '$a1."-".$a2."-".$a3');
+ is("-".$a1."-".$a2."-".$a3, '-1-2-3', '"-".$a1."-".$a2."-".$a3');
+ is($a1."-".$a2."-".$a3."-", '1-2-3-', '$a1."-".$a2."-".$a3."-"');
+ is("-".$a1."-".$a2."-".$a3."-", '-1-2-3-', '"-".$a1."-".$a2."-".$a3."-"');
+ is("$a1$a2$a3", '123', '"$a1$a2$a3"');
+ is("$a1-$a2-$a3", '1-2-3', '"$a1-$a2-$a3"');
+ is("-$a1-$a2-$a3", '-1-2-3', '"-$a1-$a2-$a3"');
+ is("$a1-$a2-$a3-", '1-2-3-', '"$a1-$a2-$a3-"');
+ is("-$a1-$a2-$a3-", '-1-2-3-', '"-$a1-$a2-$a3-"');
+ $pkg = "-";
+ is($pkg, '-', '$pkg = "-"');
+ $pkg = "-";
+ is($pkg, '-', '$pkg = "-"');
+ $pkg = "-";
+ is($pkg, '-', '$pkg = "-"');
+ $pkg = "-";
+ is($pkg, '-', '$pkg = "-"');
+ $pkg = $a1;
+ is($pkg, '1', '$pkg = $a1');
+ $pkg = "-".$a1;
+ is($pkg, '-1', '$pkg = "-".$a1');
+ $pkg = $a1."-";
+ is($pkg, '1-', '$pkg = $a1."-"');
+ $pkg = "-".$a1."-";
+ is($pkg, '-1-', '$pkg = "-".$a1."-"');
+ $pkg = "$a1";
+ is($pkg, '1', '$pkg = "$a1"');
+ $pkg = "-$a1";
+ is($pkg, '-1', '$pkg = "-$a1"');
+ $pkg = "$a1-";
+ is($pkg, '1-', '$pkg = "$a1-"');
+ $pkg = "-$a1-";
+ is($pkg, '-1-', '$pkg = "-$a1-"');
+ $pkg = $a1.$a2;
+ is($pkg, '12', '$pkg = $a1.$a2');
+ $pkg = $a1."-".$a2;
+ is($pkg, '1-2', '$pkg = $a1."-".$a2');
+ $pkg = "-".$a1."-".$a2;
+ is($pkg, '-1-2', '$pkg = "-".$a1."-".$a2');
+ $pkg = $a1."-".$a2."-";
+ is($pkg, '1-2-', '$pkg = $a1."-".$a2."-"');
+ $pkg = "-".$a1."-".$a2."-";
+ is($pkg, '-1-2-', '$pkg = "-".$a1."-".$a2."-"');
+ $pkg = "$a1$a2";
+ is($pkg, '12', '$pkg = "$a1$a2"');
+ $pkg = "$a1-$a2";
+ is($pkg, '1-2', '$pkg = "$a1-$a2"');
+ $pkg = "-$a1-$a2";
+ is($pkg, '-1-2', '$pkg = "-$a1-$a2"');
+ $pkg = "$a1-$a2-";
+ is($pkg, '1-2-', '$pkg = "$a1-$a2-"');
+ $pkg = "-$a1-$a2-";
+ is($pkg, '-1-2-', '$pkg = "-$a1-$a2-"');
+ $pkg = $a1.$a2.$a3;
+ is($pkg, '123', '$pkg = $a1.$a2.$a3');
+ $pkg = $a1."-".$a2."-".$a3;
+ is($pkg, '1-2-3', '$pkg = $a1."-".$a2."-".$a3');
+ $pkg = "-".$a1."-".$a2."-".$a3;
+ is($pkg, '-1-2-3', '$pkg = "-".$a1."-".$a2."-".$a3');
+ $pkg = $a1."-".$a2."-".$a3."-";
+ is($pkg, '1-2-3-', '$pkg = $a1."-".$a2."-".$a3."-"');
+ $pkg = "-".$a1."-".$a2."-".$a3."-";
+ is($pkg, '-1-2-3-', '$pkg = "-".$a1."-".$a2."-".$a3."-"');
+ $pkg = "$a1$a2$a3";
+ is($pkg, '123', '$pkg = "$a1$a2$a3"');
+ $pkg = "$a1-$a2-$a3";
+ is($pkg, '1-2-3', '$pkg = "$a1-$a2-$a3"');
+ $pkg = "-$a1-$a2-$a3";
+ is($pkg, '-1-2-3', '$pkg = "-$a1-$a2-$a3"');
+ $pkg = "$a1-$a2-$a3-";
+ is($pkg, '1-2-3-', '$pkg = "$a1-$a2-$a3-"');
+ $pkg = "-$a1-$a2-$a3-";
+ is($pkg, '-1-2-3-', '$pkg = "-$a1-$a2-$a3-"');
+ $pkg = 'P';
+ $pkg .= "-";
+ is($pkg, 'P-', '$pkg .= "-"');
+ $pkg = 'P';
+ $pkg .= "-";
+ is($pkg, 'P-', '$pkg .= "-"');
+ $pkg = 'P';
+ $pkg .= "-";
+ is($pkg, 'P-', '$pkg .= "-"');
+ $pkg = 'P';
+ $pkg .= "-";
+ is($pkg, 'P-', '$pkg .= "-"');
+ $pkg = 'P';
+ $pkg .= $a1;
+ is($pkg, 'P1', '$pkg .= $a1');
+ $pkg = 'P';
+ $pkg .= "-".$a1;
+ is($pkg, 'P-1', '$pkg .= "-".$a1');
+ $pkg = 'P';
+ $pkg .= $a1."-";
+ is($pkg, 'P1-', '$pkg .= $a1."-"');
+ $pkg = 'P';
+ $pkg .= "-".$a1."-";
+ is($pkg, 'P-1-', '$pkg .= "-".$a1."-"');
+ $pkg = 'P';
+ $pkg .= "$a1";
+ is($pkg, 'P1', '$pkg .= "$a1"');
+ $pkg = 'P';
+ $pkg .= "-$a1";
+ is($pkg, 'P-1', '$pkg .= "-$a1"');
+ $pkg = 'P';
+ $pkg .= "$a1-";
+ is($pkg, 'P1-', '$pkg .= "$a1-"');
+ $pkg = 'P';
+ $pkg .= "-$a1-";
+ is($pkg, 'P-1-', '$pkg .= "-$a1-"');
+ $pkg = 'P';
+ $pkg .= $a1.$a2;
+ is($pkg, 'P12', '$pkg .= $a1.$a2');
+ $pkg = 'P';
+ $pkg .= $a1."-".$a2;
+ is($pkg, 'P1-2', '$pkg .= $a1."-".$a2');
+ $pkg = 'P';
+ $pkg .= "-".$a1."-".$a2;
+ is($pkg, 'P-1-2', '$pkg .= "-".$a1."-".$a2');
+ $pkg = 'P';
+ $pkg .= $a1."-".$a2."-";
+ is($pkg, 'P1-2-', '$pkg .= $a1."-".$a2."-"');
+ $pkg = 'P';
+ $pkg .= "-".$a1."-".$a2."-";
+ is($pkg, 'P-1-2-', '$pkg .= "-".$a1."-".$a2."-"');
+ $pkg = 'P';
+ $pkg .= "$a1$a2";
+ is($pkg, 'P12', '$pkg .= "$a1$a2"');
+ $pkg = 'P';
+ $pkg .= "$a1-$a2";
+ is($pkg, 'P1-2', '$pkg .= "$a1-$a2"');
+ $pkg = 'P';
+ $pkg .= "-$a1-$a2";
+ is($pkg, 'P-1-2', '$pkg .= "-$a1-$a2"');
+ $pkg = 'P';
+ $pkg .= "$a1-$a2-";
+ is($pkg, 'P1-2-', '$pkg .= "$a1-$a2-"');
+ $pkg = 'P';
+ $pkg .= "-$a1-$a2-";
+ is($pkg, 'P-1-2-', '$pkg .= "-$a1-$a2-"');
+ $pkg = 'P';
+ $pkg .= $a1.$a2.$a3;
+ is($pkg, 'P123', '$pkg .= $a1.$a2.$a3');
+ $pkg = 'P';
+ $pkg .= $a1."-".$a2."-".$a3;
+ is($pkg, 'P1-2-3', '$pkg .= $a1."-".$a2."-".$a3');
+ $pkg = 'P';
+ $pkg .= "-".$a1."-".$a2."-".$a3;
+ is($pkg, 'P-1-2-3', '$pkg .= "-".$a1."-".$a2."-".$a3');
+ $pkg = 'P';
+ $pkg .= $a1."-".$a2."-".$a3."-";
+ is($pkg, 'P1-2-3-', '$pkg .= $a1."-".$a2."-".$a3."-"');
+ $pkg = 'P';
+ $pkg .= "-".$a1."-".$a2."-".$a3."-";
+ is($pkg, 'P-1-2-3-', '$pkg .= "-".$a1."-".$a2."-".$a3."-"');
+ $pkg = 'P';
+ $pkg .= "$a1$a2$a3";
+ is($pkg, 'P123', '$pkg .= "$a1$a2$a3"');
+ $pkg = 'P';
+ $pkg .= "$a1-$a2-$a3";
+ is($pkg, 'P1-2-3', '$pkg .= "$a1-$a2-$a3"');
+ $pkg = 'P';
+ $pkg .= "-$a1-$a2-$a3";
+ is($pkg, 'P-1-2-3', '$pkg .= "-$a1-$a2-$a3"');
+ $pkg = 'P';
+ $pkg .= "$a1-$a2-$a3-";
+ is($pkg, 'P1-2-3-', '$pkg .= "$a1-$a2-$a3-"');
+ $pkg = 'P';
+ $pkg .= "-$a1-$a2-$a3-";
+ is($pkg, 'P-1-2-3-', '$pkg .= "-$a1-$a2-$a3-"');
+ $lex = "-";
+ is($lex, '-', '$lex = "-"');
+ $lex = "-";
+ is($lex, '-', '$lex = "-"');
+ $lex = "-";
+ is($lex, '-', '$lex = "-"');
+ $lex = "-";
+ is($lex, '-', '$lex = "-"');
+ $lex = $a1;
+ is($lex, '1', '$lex = $a1');
+ $lex = "-".$a1;
+ is($lex, '-1', '$lex = "-".$a1');
+ $lex = $a1."-";
+ is($lex, '1-', '$lex = $a1."-"');
+ $lex = "-".$a1."-";
+ is($lex, '-1-', '$lex = "-".$a1."-"');
+ $lex = "$a1";
+ is($lex, '1', '$lex = "$a1"');
+ $lex = "-$a1";
+ is($lex, '-1', '$lex = "-$a1"');
+ $lex = "$a1-";
+ is($lex, '1-', '$lex = "$a1-"');
+ $lex = "-$a1-";
+ is($lex, '-1-', '$lex = "-$a1-"');
+ $lex = $a1.$a2;
+ is($lex, '12', '$lex = $a1.$a2');
+ $lex = $a1."-".$a2;
+ is($lex, '1-2', '$lex = $a1."-".$a2');
+ $lex = "-".$a1."-".$a2;
+ is($lex, '-1-2', '$lex = "-".$a1."-".$a2');
+ $lex = $a1."-".$a2."-";
+ is($lex, '1-2-', '$lex = $a1."-".$a2."-"');
+ $lex = "-".$a1."-".$a2."-";
+ is($lex, '-1-2-', '$lex = "-".$a1."-".$a2."-"');
+ $lex = "$a1$a2";
+ is($lex, '12', '$lex = "$a1$a2"');
+ $lex = "$a1-$a2";
+ is($lex, '1-2', '$lex = "$a1-$a2"');
+ $lex = "-$a1-$a2";
+ is($lex, '-1-2', '$lex = "-$a1-$a2"');
+ $lex = "$a1-$a2-";
+ is($lex, '1-2-', '$lex = "$a1-$a2-"');
+ $lex = "-$a1-$a2-";
+ is($lex, '-1-2-', '$lex = "-$a1-$a2-"');
+ $lex = $a1.$a2.$a3;
+ is($lex, '123', '$lex = $a1.$a2.$a3');
+ $lex = $a1."-".$a2."-".$a3;
+ is($lex, '1-2-3', '$lex = $a1."-".$a2."-".$a3');
+ $lex = "-".$a1."-".$a2."-".$a3;
+ is($lex, '-1-2-3', '$lex = "-".$a1."-".$a2."-".$a3');
+ $lex = $a1."-".$a2."-".$a3."-";
+ is($lex, '1-2-3-', '$lex = $a1."-".$a2."-".$a3."-"');
+ $lex = "-".$a1."-".$a2."-".$a3."-";
+ is($lex, '-1-2-3-', '$lex = "-".$a1."-".$a2."-".$a3."-"');
+ $lex = "$a1$a2$a3";
+ is($lex, '123', '$lex = "$a1$a2$a3"');
+ $lex = "$a1-$a2-$a3";
+ is($lex, '1-2-3', '$lex = "$a1-$a2-$a3"');
+ $lex = "-$a1-$a2-$a3";
+ is($lex, '-1-2-3', '$lex = "-$a1-$a2-$a3"');
+ $lex = "$a1-$a2-$a3-";
+ is($lex, '1-2-3-', '$lex = "$a1-$a2-$a3-"');
+ $lex = "-$a1-$a2-$a3-";
+ is($lex, '-1-2-3-', '$lex = "-$a1-$a2-$a3-"');
+ $lex = 'L';
+ $lex .= "-";
+ is($lex, 'L-', '$lex .= "-"');
+ $lex = 'L';
+ $lex .= "-";
+ is($lex, 'L-', '$lex .= "-"');
+ $lex = 'L';
+ $lex .= "-";
+ is($lex, 'L-', '$lex .= "-"');
+ $lex = 'L';
+ $lex .= "-";
+ is($lex, 'L-', '$lex .= "-"');
+ $lex = 'L';
+ $lex .= $a1;
+ is($lex, 'L1', '$lex .= $a1');
+ $lex = 'L';
+ $lex .= "-".$a1;
+ is($lex, 'L-1', '$lex .= "-".$a1');
+ $lex = 'L';
+ $lex .= $a1."-";
+ is($lex, 'L1-', '$lex .= $a1."-"');
+ $lex = 'L';
+ $lex .= "-".$a1."-";
+ is($lex, 'L-1-', '$lex .= "-".$a1."-"');
+ $lex = 'L';
+ $lex .= "$a1";
+ is($lex, 'L1', '$lex .= "$a1"');
+ $lex = 'L';
+ $lex .= "-$a1";
+ is($lex, 'L-1', '$lex .= "-$a1"');
+ $lex = 'L';
+ $lex .= "$a1-";
+ is($lex, 'L1-', '$lex .= "$a1-"');
+ $lex = 'L';
+ $lex .= "-$a1-";
+ is($lex, 'L-1-', '$lex .= "-$a1-"');
+ $lex = 'L';
+ $lex .= $a1.$a2;
+ is($lex, 'L12', '$lex .= $a1.$a2');
+ $lex = 'L';
+ $lex .= $a1."-".$a2;
+ is($lex, 'L1-2', '$lex .= $a1."-".$a2');
+ $lex = 'L';
+ $lex .= "-".$a1."-".$a2;
+ is($lex, 'L-1-2', '$lex .= "-".$a1."-".$a2');
+ $lex = 'L';
+ $lex .= $a1."-".$a2."-";
+ is($lex, 'L1-2-', '$lex .= $a1."-".$a2."-"');
+ $lex = 'L';
+ $lex .= "-".$a1."-".$a2."-";
+ is($lex, 'L-1-2-', '$lex .= "-".$a1."-".$a2."-"');
+ $lex = 'L';
+ $lex .= "$a1$a2";
+ is($lex, 'L12', '$lex .= "$a1$a2"');
+ $lex = 'L';
+ $lex .= "$a1-$a2";
+ is($lex, 'L1-2', '$lex .= "$a1-$a2"');
+ $lex = 'L';
+ $lex .= "-$a1-$a2";
+ is($lex, 'L-1-2', '$lex .= "-$a1-$a2"');
+ $lex = 'L';
+ $lex .= "$a1-$a2-";
+ is($lex, 'L1-2-', '$lex .= "$a1-$a2-"');
+ $lex = 'L';
+ $lex .= "-$a1-$a2-";
+ is($lex, 'L-1-2-', '$lex .= "-$a1-$a2-"');
+ $lex = 'L';
+ $lex .= $a1.$a2.$a3;
+ is($lex, 'L123', '$lex .= $a1.$a2.$a3');
+ $lex = 'L';
+ $lex .= $a1."-".$a2."-".$a3;
+ is($lex, 'L1-2-3', '$lex .= $a1."-".$a2."-".$a3');
+ $lex = 'L';
+ $lex .= "-".$a1."-".$a2."-".$a3;
+ is($lex, 'L-1-2-3', '$lex .= "-".$a1."-".$a2."-".$a3');
+ $lex = 'L';
+ $lex .= $a1."-".$a2."-".$a3."-";
+ is($lex, 'L1-2-3-', '$lex .= $a1."-".$a2."-".$a3."-"');
+ $lex = 'L';
+ $lex .= "-".$a1."-".$a2."-".$a3."-";
+ is($lex, 'L-1-2-3-', '$lex .= "-".$a1."-".$a2."-".$a3."-"');
+ $lex = 'L';
+ $lex .= "$a1$a2$a3";
+ is($lex, 'L123', '$lex .= "$a1$a2$a3"');
+ $lex = 'L';
+ $lex .= "$a1-$a2-$a3";
+ is($lex, 'L1-2-3', '$lex .= "$a1-$a2-$a3"');
+ $lex = 'L';
+ $lex .= "-$a1-$a2-$a3";
+ is($lex, 'L-1-2-3', '$lex .= "-$a1-$a2-$a3"');
+ $lex = 'L';
+ $lex .= "$a1-$a2-$a3-";
+ is($lex, 'L1-2-3-', '$lex .= "$a1-$a2-$a3-"');
+ $lex = 'L';
+ $lex .= "-$a1-$a2-$a3-";
+ is($lex, 'L-1-2-3-', '$lex .= "-$a1-$a2-$a3-"');
+ {
+ my $l = "-";
+ is($l, '-', 'my $l = "-"');
+ }
+ {
+ my $l = "-";
+ is($l, '-', 'my $l = "-"');
+ }
+ {
+ my $l = "-";
+ is($l, '-', 'my $l = "-"');
+ }
+ {
+ my $l = "-";
+ is($l, '-', 'my $l = "-"');
+ }
+ {
+ my $l = $a1;
+ is($l, '1', 'my $l = $a1');
+ }
+ {
+ my $l = "-".$a1;
+ is($l, '-1', 'my $l = "-".$a1');
+ }
+ {
+ my $l = $a1."-";
+ is($l, '1-', 'my $l = $a1."-"');
+ }
+ {
+ my $l = "-".$a1."-";
+ is($l, '-1-', 'my $l = "-".$a1."-"');
+ }
+ {
+ my $l = "$a1";
+ is($l, '1', 'my $l = "$a1"');
+ }
+ {
+ my $l = "-$a1";
+ is($l, '-1', 'my $l = "-$a1"');
+ }
+ {
+ my $l = "$a1-";
+ is($l, '1-', 'my $l = "$a1-"');
+ }
+ {
+ my $l = "-$a1-";
+ is($l, '-1-', 'my $l = "-$a1-"');
+ }
+ {
+ my $l = $a1.$a2;
+ is($l, '12', 'my $l = $a1.$a2');
+ }
+ {
+ my $l = $a1."-".$a2;
+ is($l, '1-2', 'my $l = $a1."-".$a2');
+ }
+ {
+ my $l = "-".$a1."-".$a2;
+ is($l, '-1-2', 'my $l = "-".$a1."-".$a2');
+ }
+ {
+ my $l = $a1."-".$a2."-";
+ is($l, '1-2-', 'my $l = $a1."-".$a2."-"');
+ }
+ {
+ my $l = "-".$a1."-".$a2."-";
+ is($l, '-1-2-', 'my $l = "-".$a1."-".$a2."-"');
+ }
+ {
+ my $l = "$a1$a2";
+ is($l, '12', 'my $l = "$a1$a2"');
+ }
+ {
+ my $l = "$a1-$a2";
+ is($l, '1-2', 'my $l = "$a1-$a2"');
+ }
+ {
+ my $l = "-$a1-$a2";
+ is($l, '-1-2', 'my $l = "-$a1-$a2"');
+ }
+ {
+ my $l = "$a1-$a2-";
+ is($l, '1-2-', 'my $l = "$a1-$a2-"');
+ }
+ {
+ my $l = "-$a1-$a2-";
+ is($l, '-1-2-', 'my $l = "-$a1-$a2-"');
+ }
+ {
+ my $l = $a1.$a2.$a3;
+ is($l, '123', 'my $l = $a1.$a2.$a3');
+ }
+ {
+ my $l = $a1."-".$a2."-".$a3;
+ is($l, '1-2-3', 'my $l = $a1."-".$a2."-".$a3');
+ }
+ {
+ my $l = "-".$a1."-".$a2."-".$a3;
+ is($l, '-1-2-3', 'my $l = "-".$a1."-".$a2."-".$a3');
+ }
+ {
+ my $l = $a1."-".$a2."-".$a3."-";
+ is($l, '1-2-3-', 'my $l = $a1."-".$a2."-".$a3."-"');
+ }
+ {
+ my $l = "-".$a1."-".$a2."-".$a3."-";
+ is($l, '-1-2-3-', 'my $l = "-".$a1."-".$a2."-".$a3."-"');
+ }
+ {
+ my $l = "$a1$a2$a3";
+ is($l, '123', 'my $l = "$a1$a2$a3"');
+ }
+ {
+ my $l = "$a1-$a2-$a3";
+ is($l, '1-2-3', 'my $l = "$a1-$a2-$a3"');
+ }
+ {
+ my $l = "-$a1-$a2-$a3";
+ is($l, '-1-2-3', 'my $l = "-$a1-$a2-$a3"');
+ }
+ {
+ my $l = "$a1-$a2-$a3-";
+ is($l, '1-2-3-', 'my $l = "$a1-$a2-$a3-"');
+ }
+ {
+ my $l = "-$a1-$a2-$a3-";
+ is($l, '-1-2-3-', 'my $l = "-$a1-$a2-$a3-"');
+ }
+}
+
+# multiconcat optimises away scalar assign, and is responsible
+# for handling the assign itself. If the LHS is something weird,
+# make sure it's handled ok
+
+{
+ my $a = 'a';
+ my $b = 'b';
+ my $o = 'o';
+
+ my $re = qr/abc/;
+ $$re = $a . $b;
+ is($$re, "ab", '$$re = $a . $b');
+
+ #passing a hash elem to a sub creates a PVLV
+ my $s = sub { $_[0] = $a . $b; };
+ my %h;
+ $s->($h{foo});
+ is($h{foo}, "ab", "PVLV");
+
+ # assigning a string to a typeglob creates an alias
+ $Foo = 'myfoo';
+ *Bar = ("F" . $o . $o);
+ is($Bar, "myfoo", '*Bar = "Foo"');
+
+ # while that same typeglob also appearing on the RHS returns
+ # a stringified value
+
+ package QPR {
+ ${'*QPR::Bar*QPR::BarBaz'} = 'myfoobarbaz';
+ *Bar = (*Bar . *Bar . "Baz");
+ ::is($Bar, "myfoobarbaz", '*Bar = (*Bar . *Bar . "Baz")');
+ }
+}
+
+# distinguish between '=' and '.=' where the LHS has the OPf_MOD flag
+
+{
+ my $foo = "foo";
+ my $a . $foo; # weird but legal
+ is($a, '', 'my $a . $foo');
+ my $b; $b .= $foo;
+ is($b, 'foo', 'my $b; $b .= $foo');
+}
+
+# distinguish between nested appends and concats; the former is
+# affected by the change of value of the target on each concat.
+# This is why multiconcat shouldn't be used in that case
+
+{
+ my $a = "a";
+ (($a .= $a) .= $a) .= $a;
+ is($a, "aaaaaaaa", '(($a .= $a) .= $a) .= $a;');
+}
+
+# check everything works ok near the max arg size of a multiconcat
+
+{
+ my @a = map "<$_>", 0..99;
+ for my $i (60..68) { # check each side of 64 threshold
+ my $c = join '.', map "\$a[$_]", 0..$i;
+ my $got = eval $c or die $@;
+ my $empty = ''; # don't use a const string in case join'' ever
+ # gets optimised into a multiconcat
+ my $expected = join $empty, @a[0..$i];
+ is($got, $expected, "long concat chain $i");
+ }
+}
diff --git a/t/perf/benchmarks b/t/perf/benchmarks
index 0bebf54ccc..7795079c78 100644
--- a/t/perf/benchmarks
+++ b/t/perf/benchmarks
@@ -1096,10 +1096,257 @@
},
+ # concatenation; quite possibly optimised to OP_MULTICONCAT
+
+ 'expr::concat::cl' => {
+ setup => 'my $lex = "abcd"',
+ code => '"foo" . $lex',
+ },
+ 'expr::concat::lc' => {
+ setup => 'my $lex = "abcd"',
+ code => '$lex . "foo"',
+ },
+ 'expr::concat::ll' => {
+ setup => 'my $lex1 = "abcd"; my $lex2 = "wxyz"',
+ code => '$lex1 . $lex2',
+ },
+
+ 'expr::concat::l_append_c' => {
+ setup => 'my $lex',
+ pre => '$lex = "abcd"',
+ code => '$lex .= "foo"',
+ },
+ 'expr::concat::l_append_l' => {
+ setup => 'my $lex1; my $lex2 = "wxyz"',
+ pre => '$lex1 = "abcd"',
+ code => '$lex1 .= $lex2',
+ },
+ 'expr::concat::l_append_ll' => {
+ setup => 'my $lex1; my $lex2 = "pqrs"; my $lex3 = "wxyz"',
+ pre => '$lex1 = "abcd"',
+ code => '$lex1 .= $lex2 . $lex3',
+ },
+ 'expr::concat::l_append_clclc' => {
+ setup => 'my $lex1; my $lex2 = "pqrs"; my $lex3 = "wxyz"',
+ pre => '$lex1 = "abcd"',
+ code => '$lex1 .= "-foo-$lex2-foo-$lex3-foo"',
+ },
+ 'expr::concat::l_append_lll' => {
+ setup => 'my $lex1; my ($lex2, $lex3, $lex4) = qw(pqrs wxyz 1234)',
+ pre => '$lex1 = "abcd"',
+ code => '$lex1 .= $lex2 . $lex3 . $lex4',
+ },
+
+ 'expr::concat::m_ll' => {
+ setup => 'my $lex1 = "abcd"; my $lex2 = "wxyz"',
+ code => 'my $lex = $lex1 . $lex2',
+ },
+ 'expr::concat::m_lll' => {
+ setup => 'my $lex1 = "abcd"; my $lex2 = "pqrs"; my $lex3 = "wxyz"',
+ code => 'my $lex = $lex1 . $lex2 . $lex3',
+ },
+ 'expr::concat::m_cl' => {
+ setup => 'my $lex1 = "abcd"',
+ code => 'my $lex = "const$lex1"',
+ },
+ 'expr::concat::m_clclc' => {
+ setup => 'my $lex1 = "abcd"; my $lex2 = "wxyz"',
+ code => 'my $lex = "foo=$lex1 bar=$lex2\n"',
+ },
+ 'expr::concat::m_clclc_long' => {
+ desc => 'my $lex = "foooooooooo=$lex1 baaaaaaaaar=$lex2\n" where lex1/2 are 400 chars',
+ setup => 'my $lex1 = "abcd" x 100; my $lex2 = "wxyz" x 100',
+ code => 'my $lex = "foooooooooo=$lex1 baaaaaaaaar=$lex2\n"',
+ },
+
+ 'expr::concat::l_ll' => {
+ setup => 'my $lex; my $lex1 = "abcd"; my $lex2 = "wxyz"',
+ code => '$lex = $lex1 . $lex2',
+ },
+ 'expr::concat::l_ll_ldup' => {
+ setup => 'my $lex1; my $lex2 = "wxyz"',
+ pre => '$lex1 = "abcd"',
+ code => '$lex1 = $lex1 . $lex2',
+ },
+ 'expr::concat::l_ll_rdup' => {
+ setup => 'my $lex1; my $lex2 = "wxyz"',
+ pre => '$lex1 = "abcd"',
+ code => '$lex1 = $lex2 . $lex1',
+ },
+ 'expr::concat::l_ll_lrdup' => {
+ setup => 'my $lex1',
+ pre => '$lex1 = "abcd"',
+ code => '$lex1 = $lex1 . $lex1',
+ },
+ 'expr::concat::l_lll' => {
+ setup => 'my $lex; my $lex1 = "abcd"; my $lex2 = "pqrs"; my $lex3 = "wxyz"',
+ code => '$lex = $lex1 . $lex2 . $lex3',
+ },
+ 'expr::concat::l_lllll' => {
+ setup => 'my $lex; my $lex1 = "abcd"; my $lex2 = "pqrs"; my $lex3 = "wxyz"; my $lex4 = "the quick brown fox"; my $lex5 = "to be, or not to be..."',
+ code => '$lex = $lex1 . $lex2 . $lex3 . $lex4 . $lex5',
+ },
+ 'expr::concat::l_cl' => {
+ setup => 'my $lex; my $lex1 = "abcd"',
+ code => '$lex = "const$lex1"',
+ },
+ 'expr::concat::l_clclc' => {
+ setup => 'my $lex; my $lex1 = "abcd"; my $lex2 = "wxyz"',
+ code => '$lex = "foo=$lex1 bar=$lex2\n"',
+ },
+ 'expr::concat::l_clclc_long' => {
+ desc => '$lex = "foooooooooo=$lex1 baaaaaaaaar=$lex2\n" where lex1/2 are 400 chars',
+ setup => 'my $lex; my $lex1 = "abcd" x 100; my $lex2 = "wxyz" x 100',
+ code => '$lex = "foooooooooo=$lex1 baaaaaaaaar=$lex2\n"',
+ },
+ 'expr::concat::l_clclclclclc' => {
+ setup => 'my $lex; my $lex1 = "abcd"; my $lex2 = "pqrs"; my $lex3 = "the quick brown fox"; my $lex4 = "to be, or not to be..."',
+ code => '$lex = "foo1=$lex1 foo2=$lex2 foo3=$lex3 foo4=$lex4\n"',
+ },
+
+ 'expr::concat::g_append_c' => {
+ setup => 'our $pkg',
+ pre => '$pkg = "abcd"',
+ code => '$pkg .= "foo"',
+ },
+ 'expr::concat::g_append_l' => {
+ setup => 'our $pkg; my $lex1 = "wxyz"',
+ pre => '$pkg = "abcd"',
+ code => '$pkg .= $lex1',
+ },
+ 'expr::concat::g_append_ll' => {
+ setup => 'our $pkg; my $lex1 = "pqrs"; my $lex2 = "wxyz"',
+ pre => '$pkg = "abcd"',
+ code => '$pkg .= $lex1 . $lex2',
+ },
+ 'expr::concat::g_append_clclc' => {
+ setup => 'our $pkg; my $lex1 = "pqrs"; my $lex2 = "wxyz"',
+ pre => '$pkg = "abcd"',
+ code => '$pkg .= "-foo-$lex1-foo-$lex2-foo-"',
+ },
+
+ 'expr::concat::g_ll' => {
+ setup => 'our $pkg; my $lex1 = "abcd"; my $lex2 = "wxyz"',
+ code => '$pkg = $lex1 . $lex2',
+ },
+ 'expr::concat::g_gl_ldup' => {
+ setup => 'our $pkg; my $lex2 = "wxyz"',
+ pre => '$pkg = "abcd"',
+ code => '$pkg = $pkg . $lex2',
+ },
+ 'expr::concat::g_lg_rdup' => {
+ setup => 'our $pkg; my $lex1 = "wxyz"',
+ pre => '$pkg = "abcd"',
+ code => '$pkg = $lex1 . $pkg',
+ },
+ 'expr::concat::g_gg_lrdup' => {
+ setup => 'our $pkg',
+ pre => '$pkg = "abcd"',
+ code => '$pkg = $pkg . $pkg',
+ },
+ 'expr::concat::g_lll' => {
+ setup => 'our $pkg; my $lex1 = "abcd"; my $lex2 = "pqrs"; my $lex3 = "wxyz"',
+ code => '$pkg = $lex1 . $lex2 . $lex3',
+ },
+ 'expr::concat::g_cl' => {
+ setup => 'our $pkg; my $lex1 = "abcd"',
+ code => '$pkg = "const$lex1"',
+ },
+ 'expr::concat::g_clclc' => {
+ setup => 'our $pkg; my $lex1 = "abcd"; my $lex2 = "wxyz"',
+ code => '$pkg = "foo=$lex1 bar=$lex2\n"',
+ },
+ 'expr::concat::g_clclc_long' => {
+ desc => '$pkg = "foooooooooo=$lex1 baaaaaaaaar=$lex2\n" where lex1/2 are 400 chars',
+ setup => 'our $pkg; my $lex1 = "abcd" x 100; my $lex2 = "wxyz" x 100',
+ code => '$pkg = "foooooooooo=$lex1 baaaaaaaaar=$lex2\n"',
+ },
+
+ 'expr::concat::utf8_uuu' => {
+ desc => 'my $s = $a.$b.$c where all args are utf8',
+ setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "\x{101}fghij"; my $c = "\x{102}klmn"',
+ code => '$s = $a.$b.$c',
+ },
+ 'expr::concat::utf8_suu' => {
+ desc => 'my $s = "foo=$a bar=$b baz=$c" where $b,$c are utf8',
+ setup => 'my $s; my $a = "abcde"; my $b = "\x{100}fghij"; my $c = "\x{101}klmn"',
+ code => '$s = "foo=$a bar=$b baz=$c"',
+ },
+ 'expr::concat::utf8_usu' => {
+ desc => 'my $s = "foo=$a bar=$b baz=$c" where $a,$c are utf8',
+ setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "fghij"; my $c = "\x{101}klmn"',
+ code => '$s = "foo=$a bar=$b baz=$c"',
+ },
+ 'expr::concat::utf8_usx' => {
+ desc => 'my $s = "foo=$a bar=$b baz=$c" where $a is utf8, $c has 0x80',
+ setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "fghij"; my $c = "\x80\x81klmn"',
+ code => '$s = "foo=$a bar=$b baz=$c"',
+ },
+
+ 'expr::concat::utf8_s_append_uuu' => {
+ desc => '$s .= $a.$b.$c where all RH args are utf8',
+ setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "\x{101}fghij"; my $c = "\x{102}klmn"',
+ pre => '$s = "abcd"',
+ code => '$s .= $a.$b.$c',
+ },
+ 'expr::concat::utf8_s_append_suu' => {
+ desc => '$s .= "foo=$a bar=$b baz=$c" where $b,$c are utf8',
+ setup => 'my $s; my $a = "abcde"; my $b = "\x{100}fghij"; my $c = "\x{101}klmn"',
+ pre => '$s = "abcd"',
+ code => '$s .= "foo=$a bar=$b baz=$c"',
+ },
+ 'expr::concat::utf8_s_append_usu' => {
+ desc => '$s .= "foo=$a bar=$b baz=$c" where $a,$c are utf8',
+ setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "fghij"; my $c = "\x{101}klmn"',
+ pre => '$s = "abcd"',
+ code => '$s .= "foo=$a bar=$b baz=$c"',
+ },
+ 'expr::concat::utf8_s_append_usx' => {
+ desc => '$s .= "foo=$a bar=$b baz=$c" where $a is utf8, $c has 0x80',
+ setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "fghij"; my $c = "\x80\x81klmn"',
+ pre => '$s = "abcd"',
+ code => '$s .= "foo=$a bar=$b baz=$c"',
+ },
+
+ 'expr::concat::utf8_u_append_uuu' => {
+ desc => '$s .= $a.$b.$c where all args are utf8',
+ setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "\x{101}fghij"; my $c = "\x{102}klmn"',
+ pre => '$s = "\x{100}wxyz"',
+ code => '$s .= $a.$b.$c',
+ },
+ 'expr::concat::utf8_u_append_suu' => {
+ desc => '$s .= "foo=$a bar=$b baz=$c" where $s,$b,$c are utf8',
+ setup => 'my $s; my $a = "abcde"; my $b = "\x{100}fghij"; my $c = "\x{101}klmn"',
+ pre => '$s = "\x{100}wxyz"',
+ code => '$s .= "foo=$a bar=$b baz=$c"',
+ },
+ 'expr::concat::utf8_u_append_usu' => {
+ desc => '$s .= "foo=$a bar=$b baz=$c" where $s,$a,$c are utf8',
+ setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "fghij"; my $c = "\x{101}klmn"',
+ pre => '$s = "\x{100}wxyz"',
+ code => '$s .= "foo=$a bar=$b baz=$c"',
+ },
+ 'expr::concat::utf8_u_append_usx' => {
+ desc => '$s .= "foo=$a bar=$b baz=$c" where $s,$a are utf8, $c has 0x80',
+ setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "fghij"; my $c = "\x80\x81klmn"',
+ pre => '$s = "\x{100}wxyz"',
+ code => '$s .= "foo=$a bar=$b baz=$c"',
+ },
+
+ 'expr::concat::nested_mutator' => {
+ setup => 'my $lex1; my ($lex2, $lex3, $lex4) = qw(abcd pqrs wxyz)',
+ pre => '$lex1 = "QPR"',
+ code => '(($lex1 .= $lex2) .= $lex3) .= $lex4',
+ },
+
# scalar assign, OP_SASSIGN
+ 'expr::sassign::my_conststr' => {
+ setup => '',
+ code => 'my $x = "abc"',
+ },
'expr::sassign::scalar_lex_int' => {
desc => 'lexical $x = 1',
setup => 'my $x',
@@ -1271,6 +1518,18 @@
},
+ # JOIN
+
+
+ 'func::join::empty_l_ll' => {
+ setup => 'my $lex; my $lex1 = "abcd"; my $lex2 = "wxyz"',
+ code => '$lex = join "", $lex1, $lex2',
+ },
+
+
+ # KEYS
+
+
'func::keys::lex::void_cxt_empty' => {
desc => ' keys() on an empty lexical hash in void context',
setup => 'my %h = ()',
@@ -1525,6 +1784,8 @@
code => '@a = (split(/:/, $s, 2), 1);',
},
+ # SPRINTF
+
'func::sprintf::d' => {
desc => '%d',
@@ -1594,6 +1855,96 @@
code => '$s = sprintf "foo=%s bar=%s baz=%s", $a, $b, $c',
},
+ # sprint that's likely to be optimised to an OP_MULTICONCAT
+
+ 'func::sprintf::l' => {
+ setup => 'my $lex1 = "abcd"',
+ code => 'sprintf "%s", $lex1',
+ },
+ 'func::sprintf::g_l' => {
+ setup => 'our $pkg; my $lex1 = "abcd"',
+ code => '$pkg = sprintf "%s", $lex1',
+ },
+ 'func::sprintf::g_append_l' => {
+ setup => 'our $pkg; my $lex1 = "abcd"',
+ pre => '$pkg = "pqrs"',
+ code => '$pkg .= sprintf "%s", $lex1',
+ },
+ 'func::sprintf::g_ll' => {
+ setup => 'our $pkg; my $lex1 = "abcd"; my $lex2 = "wxyz"',
+ code => '$pkg = sprintf "%s%s", $lex1, $lex2',
+ },
+ 'func::sprintf::g_append_ll' => {
+ setup => 'our $pkg; my $lex1 = "abcd"; my $lex2 = "wxyz"',
+ pre => '$pkg = "pqrs"',
+ code => '$pkg .= sprintf "%s%s", $lex1, $lex2',
+ },
+ 'func::sprintf::g_cl' => {
+ setup => 'our $pkg; my $lex1 = "abcd"',
+ code => '$pkg = sprintf "foo=%s", $lex1',
+ },
+ 'func::sprintf::g_clclc' => {
+ setup => 'our $pkg; my $lex1 = "abcd"; my $lex2 = "wxyz"',
+ code => '$pkg = sprintf "foo=%s bar=%s\n", $lex1, $lex2',
+ },
+
+ 'func::sprintf::l_l' => {
+ setup => 'my $lex; my $lex1 = "abcd"',
+ code => '$lex = sprintf "%s", $lex1',
+ },
+ 'func::sprintf::l_append_l' => {
+ setup => 'my $lex; my $lex1 = "abcd"',
+ pre => '$lex = "pqrs"',
+ code => '$lex .= sprintf "%s", $lex1',
+ },
+ 'func::sprintf::ll' => {
+ setup => 'my $lex1 = "abcd"; my $lex2 = "wxyz"',
+ code => 'sprintf "%s%s", $lex1, $lex2',
+ },
+ 'func::sprintf::l_ll' => {
+ setup => 'my $lex; my $lex1 = "abcd"; my $lex2 = "wxyz"',
+ code => '$lex = sprintf "%s%s", $lex1, $lex2',
+ },
+ 'func::sprintf::l_append_ll' => {
+ setup => 'my $lex; my $lex1 = "abcd"; my $lex2 = "wxyz"',
+ pre => '$lex = "pqrs"',
+ code => '$lex .= sprintf "%s%s", $lex1, $lex2',
+ },
+ 'func::sprintf::l_cl' => {
+ setup => 'my $lex; my $lex1 = "abcd"',
+ code => '$lex = sprintf "foo=%s", $lex1',
+ },
+ 'func::sprintf::l_clclc' => {
+ setup => 'my $lex; my $lex1 = "abcd"; my $lex2 = "wxyz"',
+ code => '$lex = sprintf "foo=%s bar=%s\n", $lex1, $lex2',
+ },
+
+ 'func::sprintf::m_l' => {
+ setup => 'my $lex1 = "abcd"',
+ code => 'my $lex = sprintf "%s", $lex1',
+ },
+ 'func::sprintf::m_ll' => {
+ setup => 'my $lex1 = "abcd"; my $lex2 = "wxyz"',
+ code => 'my $lex = sprintf "%s%s", $lex1, $lex2',
+ },
+ 'func::sprintf::m_cl' => {
+ setup => 'my $lex1 = "abcd"',
+ code => 'my $lex = sprintf "foo=%s", $lex1',
+ },
+ 'func::sprintf::m_clclc' => {
+ setup => 'my $lex1 = "abcd"; my $lex2 = "wxyz"',
+ code => 'my $lex = sprintf "foo=%s bar=%s\n", $lex1, $lex2',
+ },
+
+ 'func::sprintf::utf8__l_lll' => {
+ desc => '$s = sprintf("foo=%s bar=%s baz=%s", $a, $b, $c) where $a,$c are utf8',
+ setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "fghij"; my $c = "\x{101}klmn"',
+ code => '$s = sprintf "foo=%s bar=%s baz=%s", $a, $b, $c',
+ },
+
+
+ # S///
+
'func::subst::bool' => {
desc => 's/// in boolean context',
setup => '',
diff --git a/t/perf/opcount.t b/t/perf/opcount.t
index 0ff4b7246c..0ded6cd360 100644
--- a/t/perf/opcount.t
+++ b/t/perf/opcount.t
@@ -20,7 +20,7 @@ BEGIN {
use warnings;
use strict;
-plan 2309;
+plan 2579;
use B ();
@@ -400,3 +400,237 @@ test_opcount(0, 'barewords can be constant-folded',
}
}
}
+
+
+# a sprintf that can't be optimised shouldn't stop the .= concat being
+# optimised
+
+{
+ my ($i,$j,$s);
+ test_opcount(0, "sprintf pessimised",
+ sub { $s .= sprintf "%d%d",$i, $j },
+ {
+ const => 1,
+ sprintf => 1,
+ concat => 0,
+ multiconcat => 1,
+ padsv => 2,
+ });
+}
+
+
+# sprintf with constant args should be constant folded
+
+test_opcount(0, "sprintf constant args",
+ sub { sprintf "%s%s", "abc", "def" },
+ {
+ const => 1,
+ sprintf => 0,
+ multiconcat => 0.
+ });
+
+#
+# concats and assigns that should be optimised into a single multiconcat
+# op
+
+{
+
+ my %seen; # weed out duplicate combinations
+
+ # these are the ones where using multiconcat isn't a gain, so should
+ # be pessimised
+ my %pessimise = map { $_ => 1 }
+ '$a1.$a2',
+ '"$a1$a2"',
+ '$pkg .= $a1',
+ '$pkg .= "$a1"',
+ '$lex = $a1.$a2',
+ '$lex = "$a1$a2"',
+ # these already constant folded
+ 'sprintf("-")',
+ '$pkg = sprintf("-")',
+ '$lex = sprintf("-")',
+ 'my $l = sprintf("-")',
+ ;
+
+ for my $lhs (
+ '',
+ '$pkg = ',
+ '$pkg .= ',
+ '$lex = ',
+ '$lex .= ',
+ 'my $l = ',
+ ) {
+ for my $nargs (0..3) {
+ for my $type (0..2) {
+ # 0: $a . $b
+ # 1: "$a$b"
+ # 2: sprintf("%s%s", $a, $b)
+
+ for my $const (0..4) {
+ # 0: no consts: "$a1$a2"
+ # 1: interior consts: "$a1-$a2"
+ # 2: + LH edge: "-$a1-$a2"
+ # 3: + RH edge: "$a1-$a2-"
+ # 4: + both edge: "-$a1-$a2-"
+
+ my @args;
+ my @sprintf_args;
+ my $c = $type == 0 ? '"-"' : '-';
+ push @args, $c if $const == 2 || $const == 4;
+ for my $n (1..$nargs) {
+ if ($type == 2) {
+ # sprintf
+ push @sprintf_args, "\$a$n";
+ push @args, '%s';
+ }
+ else {
+ push @args, "\$a$n";
+ }
+ push @args, $c if $const;
+ }
+ pop @args if $const == 1 || $const == 2;
+
+ push @args, $c if $nargs == 0 && $const == 1;
+
+
+ if ($type == 2) {
+ # sprintf
+ next unless @args;
+ }
+ else {
+ # To ensure that there's at least once concat
+ # action, if appending, need at least one RHS arg;
+ # else least 2 args:
+ # $x = $a . $b
+ # $x .= $a
+ next unless @args >= ($lhs =~ /\./ ? 1 : 2);
+ }
+
+ my $rhs;
+ if ($type == 0) {
+ $rhs = join('.', @args);
+ }
+ elsif ($type == 1) {
+ $rhs = '"' . join('', @args) . '"'
+ }
+ else {
+ $rhs = 'sprintf("'
+ . join('', @args)
+ . '"'
+ . join('', map ",$_", @sprintf_args)
+ . ')';
+ }
+
+ my $expr = $lhs . $rhs;
+
+ next if exists $seen{$expr};
+ $seen{$expr} = 1;
+
+ my ($a1, $a2, $a3);
+ my $lex;
+ our $pkg;
+ my $sub = eval qq{sub { $expr }};
+ die "eval(sub { $expr }: $@" if $@;
+
+ my $pm = $pessimise{$expr};
+ test_opcount(0, ($pm ? "concat " : "multiconcat")
+ . ": $expr",
+ $sub,
+ $pm
+ ? { multiconcat => 0 }
+ : {
+ multiconcat => 1,
+ padsv => $nargs,
+ concat => 0,
+ sprintf => 0,
+ const => 0,
+ sassign => 0,
+ stringify => 0,
+ gv => 0, # optimised to gvsv
+ });
+ }
+ }
+ }
+ }
+}
+
+# $lex = "foo" should *not* get converted into a multiconcat - there's
+# no actual concatenation involved, and treating it as a degnerate concat
+# would forego any COW copy efficiency
+
+test_opcount(0, '$lex = "foo"', sub { my $x; $x = "foo"; },
+ {
+ multiconcat => 0,
+ });
+
+# for '$lex1 = $lex2 . $lex3', multiconcat is normally slower than
+# concat, except in the specific case of '$lex1 = $lex2 . $lex1'
+
+test_opcount(0, '$lex1 = $lex2 . $lex1', sub { my ($x,$y); $x = $y . $x },
+ {
+ multiconcat => 1,
+ padsv => 4, # 2 are from the my()
+ concat => 0,
+ sassign => 0,
+ stringify => 0,
+ });
+test_opcount(0, '$lex1 = "$lex2$lex1"', sub { my ($x,$y); $x = "$y$x" },
+ {
+ multiconcat => 1,
+ padsv => 4, # 2 are from the my()
+ concat => 0,
+ sassign => 0,
+ stringify => 0,
+ });
+test_opcount(0, '$lex1 = $lex1 . $lex1', sub { my $x; $x = $x . $x },
+ {
+ multiconcat => 0,
+ });
+
+# 'my $x .= ...' doesn't make a lot of sense and so isn't optimised
+test_opcount(0, 'my $a .= $b.$c.$d', sub { our ($b,$c,$d); my $a .= $b.$c.$d },
+ {
+ padsv => 1,
+ });
+
+# prefer rcatline optimisation over multiconcat
+
+test_opcount(0, "rcatline", sub { my ($x,$y); open FOO, "xxx"; $x .= <FOO> },
+ {
+ rcatline => 1,
+ readline => 0,
+ multiconcat => 0,
+ concat => 0,
+ });
+
+# long chains of concats should be converted into chained multiconcats
+
+{
+ my @a;
+ for my $i (60..68) { # check each side of 64 threshold
+ my $c = join '.', map "\$a[$_]", 1..$i;
+ my $sub = eval qq{sub { $c }} or die $@;
+ test_opcount(0, "long chain $i", $sub,
+ {
+ multiconcat => $i > 65 ? 2 : 1,
+ concat => $i == 65 ? 1 : 0,
+ aelem => 0,
+ aelemfast => 0,
+ });
+ }
+}
+
+# with C<$state $s = $a . $b . ....>, the assign is optimised away,
+# but the padsv isn't (it's treated like a general LHS expression rather
+# than using OPpTARGET_MY).
+
+test_opcount(0, "state works with multiconcat",
+ sub { use feature 'state'; our ($a, $b, $c); state $s = $a . $b . $c },
+ {
+ multiconcat => 1,
+ concat => 0,
+ sassign => 0,
+ once => 1,
+ padsv => 2, # one each for the next/once branches
+ });