diff options
-rw-r--r-- | dist/Safe/t/safeops.t | 1 | ||||
-rw-r--r-- | dump.c | 56 | ||||
-rw-r--r-- | embed.fnc | 1 | ||||
-rw-r--r-- | embed.h | 1 | ||||
-rw-r--r-- | ext/B/B.xs | 59 | ||||
-rw-r--r-- | ext/B/t/optree_samples.t | 186 | ||||
-rw-r--r-- | ext/Opcode/Opcode.pm | 4 | ||||
-rw-r--r-- | gv.c | 7 | ||||
-rw-r--r-- | lib/B/Deparse.pm | 227 | ||||
-rw-r--r-- | lib/B/Deparse.t | 133 | ||||
-rw-r--r-- | lib/B/Op_private.pm | 18 | ||||
-rw-r--r-- | op.c | 902 | ||||
-rw-r--r-- | op.h | 2 | ||||
-rw-r--r-- | opcode.h | 365 | ||||
-rw-r--r-- | opnames.h | 661 | ||||
-rw-r--r-- | perl.h | 20 | ||||
-rw-r--r-- | pp_hot.c | 803 | ||||
-rw-r--r-- | pp_proto.h | 1 | ||||
-rw-r--r-- | proto.h | 3 | ||||
-rw-r--r-- | regen/op_private | 11 | ||||
-rwxr-xr-x | regen/opcode.pl | 2 | ||||
-rw-r--r-- | regen/opcodes | 1 | ||||
-rw-r--r-- | sv.c | 3 | ||||
-rw-r--r-- | t/op/gmagic.t | 23 | ||||
-rw-r--r-- | t/op/sprintf2.t | 76 | ||||
-rw-r--r-- | t/op/state.t | 9 | ||||
-rw-r--r-- | t/opbasic/concat.t | 651 | ||||
-rw-r--r-- | t/perf/benchmarks | 351 | ||||
-rw-r--r-- | t/perf/opcount.t | 236 |
29 files changed, 4174 insertions, 639 deletions
diff --git a/dist/Safe/t/safeops.t b/dist/Safe/t/safeops.t index 0b696a8149..ea15931600 100644 --- a/dist/Safe/t/safeops.t +++ b/dist/Safe/t/safeops.t @@ -234,6 +234,7 @@ exists exists $h{Key} rv2hv %h helem $h{kEy} hslice @h{kEy} +multiconcat SKIP (set by optimizer) multideref SKIP (set by optimizer) unpack unpack pack pack @@ -1141,6 +1141,15 @@ S_do_op_dump_bar(pTHX_ I32 level, UV bar, PerlIO *file, const OP *o) break; } + case OP_MULTICONCAT: + S_opdump_indent(aTHX_ o, level, bar, file, "NARGS = %" UVuf "\n", + cUNOP_AUXo->op_aux[PERL_MULTICONCAT_IX_NARGS].uv); + /* XXX really ought to dump each field individually, + * but that's too much like hard work */ + S_opdump_indent(aTHX_ o, level, bar, file, "CONSTS = (%" SVf ")\n", + SVfARG(multiconcat_stringify(o))); + break; + case OP_CONST: case OP_HINTSEVAL: case OP_METHOD_NAMED: @@ -2728,6 +2737,48 @@ Perl_multideref_stringify(pTHX_ const OP *o, CV *cv) } +/* Return a temporary SV containing a stringified representation of + * the op_aux field of a MULTICONCAT op. Note that if the aux contains + * both plain and utf8 versions of the const string and indices, only + * the first is displayed. + */ + +SV* +Perl_multiconcat_stringify(pTHX_ const OP *o) +{ + UNOP_AUX_item *aux = cUNOP_AUXo->op_aux; + UNOP_AUX_item *lens; + STRLEN len; + UV nargs; + char *s; + SV *out = newSVpvn_flags("", 0, SVs_TEMP); + + PERL_ARGS_ASSERT_MULTICONCAT_STRINGIFY; + + nargs = aux[PERL_MULTICONCAT_IX_NARGS].uv; + s = aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv; + len = aux[PERL_MULTICONCAT_IX_PLAIN_LEN].size; + if (!s) { + s = aux[PERL_MULTICONCAT_IX_UTF8_PV].pv; + len = aux[PERL_MULTICONCAT_IX_UTF8_LEN].size; + sv_catpvs(out, "UTF8 "); + } + pv_pretty(out, s, len, 50, + NULL, NULL, + (PERL_PV_PRETTY_NOCLEAR + |PERL_PV_PRETTY_QUOTE + |PERL_PV_PRETTY_ELLIPSES)); + + lens = aux + PERL_MULTICONCAT_IX_LENGTHS; + nargs++; + while (nargs-- > 0) { + Perl_sv_catpvf(aTHX_ out, ",%" IVdf, (IV)lens->size); + lens++; + } + return out; +} + + I32 Perl_debop(pTHX_ const OP *o) { @@ -2772,6 +2823,11 @@ Perl_debop(pTHX_ const OP *o) SVfARG(multideref_stringify(o, deb_curcv(cxstack_ix)))); break; + case OP_MULTICONCAT: + PerlIO_printf(Perl_debug_log, "(%" SVf ")", + SVfARG(multiconcat_stringify(o))); + break; + default: break; } @@ -395,6 +395,7 @@ Afp |void |deb |NN const char* pat|... Ap |void |vdeb |NN const char* pat|NULLOK va_list* args Ap |void |debprofdump EXp |SV* |multideref_stringify |NN const OP* o|NULLOK CV *cv +EXp |SV* |multiconcat_stringify |NN const OP* o Ap |I32 |debop |NN const OP* o Ap |I32 |debstack Ap |I32 |debstackptrs @@ -923,6 +923,7 @@ #define cv_ckproto_len_flags(a,b,c,d,e) Perl_cv_ckproto_len_flags(aTHX_ a,b,c,d,e) #define grok_atoUV Perl_grok_atoUV #define mg_find_mglob(a) Perl_mg_find_mglob(aTHX_ a) +#define multiconcat_stringify(a) Perl_multiconcat_stringify(aTHX_ a) #define multideref_stringify(a,b) Perl_multideref_stringify(aTHX_ a,b) #define op_clear(a) Perl_op_clear(aTHX_ a) #define qerror(a) Perl_qerror(aTHX_ a) diff --git a/ext/B/B.xs b/ext/B/B.xs index 74edd38720..02b78caa5d 100644 --- a/ext/B/B.xs +++ b/ext/B/B.xs @@ -1183,6 +1183,10 @@ string(o, cv) PPCODE: aux = cUNOP_AUXo->op_aux; switch (o->op_type) { + case OP_MULTICONCAT: + ret = multiconcat_stringify(o); + break; + case OP_MULTIDEREF: ret = multideref_stringify(o, cv); break; @@ -1238,6 +1242,61 @@ aux_list(o, cv) (char)aux[2].iv) : &PL_sv_no)); break; + case OP_MULTICONCAT: + { + UV nargs = aux[0].uv; + char *p; + STRLEN len; + U32 utf8 = 0; + SV *sv; + UNOP_AUX_item *lens; + + /* return (nargs, const string, segment len 0, 1, 2, ...) */ + + /* if this changes, this block of code probably needs fixing */ + assert(PERL_MULTICONCAT_HEADER_SIZE == 5); + nargs = aux[PERL_MULTICONCAT_IX_NARGS].uv; + EXTEND(SP, ((SSize_t)(2 + (nargs+1)))); + PUSHs(sv_2mortal(newSViv(nargs))); + + p = aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv; + len = aux[PERL_MULTICONCAT_IX_PLAIN_LEN].size; + if (!p) { + p = aux[PERL_MULTICONCAT_IX_UTF8_PV].pv; + len = aux[PERL_MULTICONCAT_IX_UTF8_LEN].size; + utf8 = SVf_UTF8; + } + sv = newSVpvn(p, len); + SvFLAGS(sv) |= utf8; + PUSHs(sv_2mortal(sv)); + + lens = aux + PERL_MULTICONCAT_IX_LENGTHS; + nargs++; /* loop (nargs+1) times */ + if (utf8) { + U8 *p = (U8*)SvPVX(sv); + while (nargs--) { + SSize_t bytes = lens->size; + SSize_t chars; + if (bytes <= 0) + chars = bytes; + else { + /* return char lengths rather than byte lengths */ + chars = utf8_length(p, p + bytes); + p += bytes; + } + lens++; + PUSHs(sv_2mortal(newSViv(chars))); + } + } + else { + while (nargs--) { + PUSHs(sv_2mortal(newSViv(lens->size))); + lens++; + } + } + break; + } + case OP_MULTIDEREF: #ifdef USE_ITHREADS # define ITEM_SV(item) *av_fetch(comppad, (item)->pad_offset, FALSE); diff --git a/ext/B/t/optree_samples.t b/ext/B/t/optree_samples.t index 7374626663..7b63f6f141 100644 --- a/ext/B/t/optree_samples.t +++ b/ext/B/t/optree_samples.t @@ -240,38 +240,36 @@ checkOptree ( name => '-exec sub { foreach (1..10) {print "foo $_"} }', # 3 <$> const[IV 1] s # 4 <$> const[IV 10] s # 5 <#> gv[*_] s -# 6 <{> enteriter(next->d last->g redo->7) KS/DEF -# e <0> iter s -# f <|> and(other->7) K/1 -# 7 <;> nextstate(main 442 optree.t:158) v:>,<,% +# 6 <{> enteriter(next->c last->f redo->7) KS/DEF +# d <0> iter s +# e <|> and(other->7) K/1 +# 7 <;> nextstate(main 1659 optree_samples.t:234) v # 8 <0> pushmark s -# 9 <$> const[PV "foo "] s -# a <#> gvsv[*_] s -# b <2> concat[t4] sK/2 -# c <@> print vK -# d <0> unstack s -# goto e -# g <2> leaveloop K/2 -# h <1> leavesub[1 ref] K/REFC,1 +# 9 <#> gvsv[*_] s +# a <+> multiconcat("foo ",4,-1)[t5] sK/STRINGIFY +# b <@> print vK +# c <0> unstack s +# goto d +# f <2> leaveloop K/2 +# g <1> leavesub[1 ref] K/REFC,1 EOT_EOT # 1 <;> nextstate(main 444 optree_samples.t:182) v:>,<,% # 2 <0> pushmark s # 3 <$> const(IV 1) s # 4 <$> const(IV 10) s # 5 <$> gv(*_) s -# 6 <{> enteriter(next->d last->g redo->7) KS/DEF -# e <0> iter s -# f <|> and(other->7) K/1 +# 6 <{> enteriter(next->c last->f redo->7) KS/DEF +# d <0> iter s +# e <|> and(other->7) K/1 # 7 <;> nextstate(main 443 optree_samples.t:182) v:>,<,% # 8 <0> pushmark s -# 9 <$> const(PV "foo ") s -# a <$> gvsv(*_) s -# b <2> concat[t3] sK/2 -# c <@> print vK -# d <0> unstack s -# goto e -# g <2> leaveloop K/2 -# h <1> leavesub[1 ref] K/REFC,1 +# 9 <$> gvsv(*_) s +# a <+> multiconcat("foo ",4,-1)[t4] sK/STRINGIFY +# b <@> print vK +# c <0> unstack s +# goto d +# f <2> leaveloop K/2 +# g <1> leavesub[1 ref] K/REFC,1 EONT_EONT checkOptree ( name => '-basic sub { print "foo $_" foreach (1..10) }', @@ -279,55 +277,53 @@ checkOptree ( name => '-basic sub { print "foo $_" foreach (1..10) }', bcopts => '-basic', strip_open_hints => 1, expect => <<'EOT_EOT', expect_nt => <<'EONT_EONT'); -# g <1> leavesub[1 ref] K/REFC,1 ->(end) -# - <@> lineseq KP ->g +# f <1> leavesub[1 ref] K/REFC,1 ->(end) +# - <@> lineseq KP ->f # 1 <;> nextstate(main 445 optree.t:167) v:>,<,% ->2 -# f <2> leaveloop K/2 ->g -# 6 <{> enteriter(next->c last->f redo->7) KS/DEF ->d +# e <2> leaveloop K/2 ->f +# 6 <{> enteriter(next->b last->e redo->7) KS/DEF ->c # - <0> ex-pushmark s ->2 # - <1> ex-list lK ->5 # 2 <0> pushmark s ->3 # 3 <$> const[IV 1] s ->4 # 4 <$> const[IV 10] s ->5 # 5 <#> gv[*_] s ->6 -# - <1> null K/1 ->f -# e <|> and(other->7) K/1 ->f -# d <0> iter s ->e +# - <1> null K/1 ->e +# d <|> and(other->7) K/1 ->e +# c <0> iter s ->d # - <@> lineseq sK ->- -# b <@> print vK ->c +# a <@> print vK ->b # 7 <0> pushmark s ->8 -# - <1> ex-stringify sK/1 ->b -# - <0> ex-pushmark s ->8 -# a <2> concat[t2] sK/2 ->b -# 8 <$> const[PV "foo "] s ->9 -# - <1> ex-rv2sv sK/1 ->a -# 9 <#> gvsv[*_] s ->a -# c <0> unstack s ->d +# 9 <+> multiconcat("foo ",4,-1)[t3] sK/STRINGIFY ->a +# - <0> ex-pushmark s ->- +# - <0> ex-const s ->8 +# - <1> ex-rv2sv sK/1 ->9 +# 8 <#> gvsv[*_] s ->9 +# b <0> unstack s ->c EOT_EOT -# g <1> leavesub[1 ref] K/REFC,1 ->(end) -# - <@> lineseq KP ->g +# f <1> leavesub[1 ref] K/REFC,1 ->(end) +# - <@> lineseq KP ->f # 1 <;> nextstate(main 446 optree_samples.t:192) v:>,<,% ->2 -# f <2> leaveloop K/2 ->g -# 6 <{> enteriter(next->c last->f redo->7) KS/DEF ->d +# e <2> leaveloop K/2 ->f +# 6 <{> enteriter(next->b last->e redo->7) KS/DEF ->c # - <0> ex-pushmark s ->2 # - <1> ex-list lK ->5 # 2 <0> pushmark s ->3 # 3 <$> const(IV 1) s ->4 # 4 <$> const(IV 10) s ->5 # 5 <$> gv(*_) s ->6 -# - <1> null K/1 ->f -# e <|> and(other->7) K/1 ->f -# d <0> iter s ->e +# - <1> null K/1 ->e +# d <|> and(other->7) K/1 ->e +# c <0> iter s ->d # - <@> lineseq sK ->- -# b <@> print vK ->c +# a <@> print vK ->b # 7 <0> pushmark s ->8 -# - <1> ex-stringify sK/1 ->b -# - <0> ex-pushmark s ->8 -# a <2> concat[t1] sK/2 ->b -# 8 <$> const(PV "foo ") s ->9 -# - <1> ex-rv2sv sK/1 ->a -# 9 <$> gvsv(*_) s ->a -# c <0> unstack s ->d +# 9 <+> multiconcat("foo ",4,-1)[t2] sK/STRINGIFY ->a +# - <0> ex-pushmark s ->- +# - <0> ex-const s ->8 +# - <1> ex-rv2sv sK/1 ->9 +# 8 <$> gvsv(*_) s ->9 +# b <0> unstack s ->c EONT_EONT checkOptree ( name => '-exec -e foreach (1..10) {print qq{foo $_}}', @@ -341,19 +337,18 @@ checkOptree ( name => '-exec -e foreach (1..10) {print qq{foo $_}}', # 4 <$> const[IV 1] s # 5 <$> const[IV 10] s # 6 <#> gv[*_] s -# 7 <{> enteriter(next->e last->h redo->8) vKS/DEF -# f <0> iter s -# g <|> and(other->8) vK/1 +# 7 <{> enteriter(next->d last->g redo->8) vKS/DEF +# e <0> iter s +# f <|> and(other->8) vK/1 # 8 <;> nextstate(main 1 -e:1) v:>,<,% # 9 <0> pushmark s -# a <$> const[PV "foo "] s -# b <#> gvsv[*_] s -# c <2> concat[t4] sK/2 -# d <@> print vK -# e <0> unstack v -# goto f -# h <2> leaveloop vK/2 -# i <@> leave[1 ref] vKP/REFC +# a <#> gvsv[*_] s +# b <+> multiconcat("foo ",4,-1)[t5] sK/STRINGIFY +# c <@> print vK +# d <0> unstack v +# goto e +# g <2> leaveloop vK/2 +# h <@> leave[1 ref] vKP/REFC EOT_EOT # 1 <0> enter # 2 <;> nextstate(main 2 -e:1) v:>,<,%,{ @@ -361,19 +356,18 @@ EOT_EOT # 4 <$> const(IV 1) s # 5 <$> const(IV 10) s # 6 <$> gv(*_) s -# 7 <{> enteriter(next->e last->h redo->8) vKS/DEF -# f <0> iter s -# g <|> and(other->8) vK/1 +# 7 <{> enteriter(next->d last->g redo->8) vKS/DEF +# e <0> iter s +# f <|> and(other->8) vK/1 # 8 <;> nextstate(main 1 -e:1) v:>,<,% # 9 <0> pushmark s -# a <$> const(PV "foo ") s -# b <$> gvsv(*_) s -# c <2> concat[t3] sK/2 -# d <@> print vK -# e <0> unstack v -# goto f -# h <2> leaveloop vK/2 -# i <@> leave[1 ref] vKP/REFC +# a <$> gvsv(*_) s +# b <+> multiconcat("foo ",4,-1)[t4] sK/STRINGIFY +# c <@> print vK +# d <0> unstack v +# goto e +# g <2> leaveloop vK/2 +# h <@> leave[1 ref] vKP/REFC EONT_EONT checkOptree ( name => '-exec sub { print "foo $_" foreach (1..10) }', @@ -386,36 +380,34 @@ checkOptree ( name => '-exec sub { print "foo $_" foreach (1..10) }', # 3 <$> const[IV 1] s # 4 <$> const[IV 10] s # 5 <#> gv[*_] s -# 6 <{> enteriter(next->c last->f redo->7) KS/DEF -# d <0> iter s -# e <|> and(other->7) K/1 +# 6 <{> enteriter(next->b last->e redo->7) KS/DEF +# c <0> iter s +# d <|> and(other->7) K/1 # 7 <0> pushmark s -# 8 <$> const[PV "foo "] s -# 9 <#> gvsv[*_] s -# a <2> concat[t2] sK/2 -# b <@> print vK -# c <0> unstack s -# goto d -# f <2> leaveloop K/2 -# g <1> leavesub[1 ref] K/REFC,1 +# 8 <#> gvsv[*_] s +# 9 <+> multiconcat("foo ",4,-1)[t3] sK/STRINGIFY +# a <@> print vK +# b <0> unstack s +# goto c +# e <2> leaveloop K/2 +# f <1> leavesub[1 ref] K/REFC,1 EOT_EOT # 1 <;> nextstate(main 447 optree_samples.t:252) v:>,<,% # 2 <0> pushmark s # 3 <$> const(IV 1) s # 4 <$> const(IV 10) s # 5 <$> gv(*_) s -# 6 <{> enteriter(next->c last->f redo->7) KS/DEF -# d <0> iter s -# e <|> and(other->7) K/1 +# 6 <{> enteriter(next->b last->e redo->7) KS/DEF +# c <0> iter s +# d <|> and(other->7) K/1 # 7 <0> pushmark s -# 8 <$> const(PV "foo ") s -# 9 <$> gvsv(*_) s -# a <2> concat[t1] sK/2 -# b <@> print vK -# c <0> unstack s -# goto d -# f <2> leaveloop K/2 -# g <1> leavesub[1 ref] K/REFC,1 +# 8 <$> gvsv(*_) s +# 9 <+> multiconcat("foo ",4,-1)[t2] sK/STRINGIFY +# a <@> print vK +# b <0> unstack s +# goto c +# e <2> leaveloop K/2 +# f <1> leavesub[1 ref] K/REFC,1 EONT_EONT pass("GREP: SAMPLES FROM PERLDOC -F GREP"); diff --git a/ext/Opcode/Opcode.pm b/ext/Opcode/Opcode.pm index 7de0ac95fe..01c495d5d6 100644 --- a/ext/Opcode/Opcode.pm +++ b/ext/Opcode/Opcode.pm @@ -6,7 +6,7 @@ use strict; our($VERSION, @ISA, @EXPORT_OK); -$VERSION = "1.40"; +$VERSION = "1.41"; use Carp; use Exporter (); @@ -353,7 +353,7 @@ These memory related ops are not included in :base_core because they can easily be used to implement a resource attack (e.g., consume all available memory). - concat repeat join range + concat multiconcat repeat join range anonlist anonhash @@ -3476,7 +3476,12 @@ Perl_amagic_call(pTHX_ SV *left, SV *right, int method, int flags) SV* res; const bool oldcatch = CATCH_GET; I32 oldmark, nret; - U8 gimme = force_scalar ? G_SCALAR : GIMME_V; + /* for multiconcat, we may call overload several times, + * with the context of individual concats being scalar, + * regardless of the overall context of the multiconcat op + */ + U8 gimme = (force_scalar || PL_op->op_type == OP_MULTICONCAT) + ? G_SCALAR : GIMME_V; CATCH_SET(TRUE); Zero(&myop, 1, BINOP); diff --git a/lib/B/Deparse.pm b/lib/B/Deparse.pm index a1f7adcb6d..4ff427c51c 100644 --- a/lib/B/Deparse.pm +++ b/lib/B/Deparse.pm @@ -19,6 +19,7 @@ use B qw(class main_root main_start main_cv svref_2object opnumber perlstring OPpSORT_REVERSE OPpMULTIDEREF_EXISTS OPpMULTIDEREF_DELETE OPpSPLIT_ASSIGN OPpSPLIT_LEX OPpPADHV_ISKEYS OPpRV2HV_ISKEYS + OPpMULTICONCAT_APPEND OPpMULTICONCAT_STRINGIFY OPpMULTICONCAT_FAKE OPpTRUEBOOL OPpINDEX_BOOLNEG SVf_IOK SVf_NOK SVf_ROK SVf_POK SVpad_OUR SVf_FAKE SVs_RMG SVs_SMG SVs_PADTMP SVpad_TYPED @@ -50,7 +51,7 @@ use B qw(class main_root main_start main_cv svref_2object opnumber perlstring MDEREF_SHIFT ); -$VERSION = '1.43'; +$VERSION = '1.44'; use strict; use vars qw/$AUTOLOAD/; use warnings (); @@ -365,7 +366,7 @@ BEGIN { BEGIN { for (qw[ const stringify rv2sv list glob pushmark null aelem - kvaslice kvhslice + kvaslice kvhslice padsv nextstate dbstate rv2av rv2hv helem custom ]) { eval "sub OP_\U$_ () { " . opnumber($_) . "}" }} @@ -4404,6 +4405,142 @@ sub multideref_var_name { } +# deparse an OP_MULTICONCAT. If $in_dq is 1, we're within +# a double-quoted string, so for example. +# "abc\Qdef$x\Ebar" +# might get compiled as +# multiconcat("abc", metaquote(multiconcat("def", $x)), "bar") +# and the inner multiconcat should be deparsed as C<def$x> rather than +# the normal C<def . $x> +# Ditto if $in_dq is 2, handle qr/...\Qdef$x\E.../. + +sub do_multiconcat { + my $self = shift; + my($op, $cx, $in_dq) = @_; + + my $kid; + my @kids; + my $assign; + my $append; + my $lhs = ""; + + for ($kid = $op->first; !null $kid; $kid = $kid->sibling) { + # skip the consts and/or padsv we've optimised away + push @kids, $kid + unless $kid->type == OP_NULL + && ( $kid->targ == OP_PADSV + || $kid->targ == OP_CONST + || $kid->targ == OP_PUSHMARK); + } + + $append = ($op->private & OPpMULTICONCAT_APPEND); + + if ($op->private & OPpTARGET_MY) { + # '$lex = ...' or '$lex .= ....' or 'my $lex = ' + $lhs = $self->padname($op->targ); + $lhs = "my $lhs" if ($op->private & OPpLVAL_INTRO); + $assign = 1; + } + elsif ($op->flags & OPf_STACKED) { + # 'expr = ...' or 'expr .= ....' + my $expr = $append ? shift(@kids) : pop(@kids); + $lhs = $self->deparse($expr, 7); + $assign = 1; + } + + if ($assign) { + $lhs .= $append ? ' .= ' : ' = '; + } + + my ($nargs, $const_str, @const_lens) = $op->aux_list($self->{curcv}); + + my @consts; + my $i = 0; + for (@const_lens) { + if ($_ == -1) { + push @consts, undef; + } + else { + push @consts, substr($const_str, $i, $_); + my @args; + $i += $_; + } + } + + my $rhs = ""; + + if ( $in_dq + || (($op->private & OPpMULTICONCAT_STRINGIFY) && !$self->{'unquote'})) + { + # "foo=$foo bar=$bar " + my $not_first; + while (@consts) { + $rhs = dq_disambiguate($rhs, $self->dq(shift(@kids), 18)) + if $not_first; + $not_first = 1; + my $c = shift @consts; + if (defined $c) { + if ($in_dq == 2) { + # in pattern: don't convert newline to '\n' etc etc + my $s = re_uninterp(escape_re(re_unback($c))); + $rhs = re_dq_disambiguate($rhs, $s) + } + else { + my $s = uninterp(escape_str(unback($c))); + $rhs = dq_disambiguate($rhs, $s) + } + } + } + return $rhs if $in_dq; + $rhs = single_delim("qq", '"', $rhs, $self); + } + elsif ($op->private & OPpMULTICONCAT_FAKE) { + # sprintf("foo=%s bar=%s ", $foo, $bar) + + my @all; + @consts = map { $_ //= ''; s/%/%%/g; $_ } @consts; + my $fmt = join '%s', @consts; + push @all, $self->quoted_const_str($fmt); + + # the following is a stripped down copy of sub listop {} + my $parens = $assign || ($cx >= 5) || $self->{'parens'}; + my $fullname = $self->keyword('sprintf'); + push @all, map $self->deparse($_, 6), @kids; + + $rhs = $parens + ? "$fullname(" . join(", ", @all) . ")" + : "$fullname " . join(", ", @all); + } + else { + # "foo=" . $foo . " bar=" . $bar + my @all; + my $not_first; + while (@consts) { + push @all, $self->deparse(shift(@kids), 18) if $not_first; + $not_first = 1; + my $c = shift @consts; + if (defined $c) { + push @all, $self->quoted_const_str($c); + } + } + $rhs .= join ' . ', @all; + } + + my $text = $lhs . $rhs; + + $text = "($text)" if ($cx >= (($assign) ? 7 : 18+1)) + || $self->{'parens'}; + + return $text; +} + + +sub pp_multiconcat { + my $self = shift; + $self->do_multiconcat(@_, 0); +} + + sub pp_multideref { my $self = shift; my($op, $cx) = @_; @@ -4786,7 +4923,7 @@ sub retscalar { |study|pos|preinc|i_preinc|predec|i_predec|postinc |i_postinc|postdec|i_postdec|pow|multiply|i_multiply |divide|i_divide|modulo|i_modulo|add|i_add|subtract - |i_subtract|concat|stringify|left_shift|right_shift|lt + |i_subtract|concat|multiconcat|stringify|left_shift|right_shift|lt |i_lt|gt|i_gt|le|i_le|ge|i_ge|eq|i_eq|ne|i_ne|ncmp|i_ncmp |slt|sgt|sle|sge|seq|sne|scmp|[sn]?bit_(?:and|x?or)|negate |i_negate|not|[sn]?complement|smartmatch|atan2|sin|cos @@ -5162,6 +5299,20 @@ sub split_float { return ($mantissa, $exponent); } + +# suitably single- or double-quote a literal constant string + +sub quoted_const_str { + my ($self, $str) =@_; + if ($str =~ /[[:^print:]]/a) { + return single_delim("qq", '"', + uninterp(escape_str unback $str), $self); + } else { + return single_delim("q", "'", unback($str), $self); + } +} + + sub const { my $self = shift; my($sv, $cx) = @_; @@ -5275,12 +5426,7 @@ sub const { return $self->maybe_parens("\\$const", $cx, 20); } elsif ($sv->FLAGS & SVf_POK) { my $str = $sv->PV; - if ($str =~ /[[:^print:]]/a) { - return single_delim("qq", '"', - uninterp(escape_str unback $str), $self); - } else { - return single_delim("q", "'", unback($str), $self); - } + return $self->quoted_const_str($str); } else { return "undef"; } @@ -5340,6 +5486,25 @@ sub pp_const { return $self->const($sv, $cx); } + +# Join two components of a double-quoted string, disambiguating +# "${foo}bar", "${foo}{bar}", "${foo}[1]", "$foo\::bar" + +sub dq_disambiguate { + my ($first, $last) = @_; + ($last =~ /^[A-Z\\\^\[\]_?]/ && + $first =~ s/([\$@])\^$/${1}{^}/) # "${^}W" etc + || ($last =~ /^[:'{\[\w_]/ && #' + $first =~ s/([\$@])([A-Za-z_]\w*)$/${1}{$2}/); + return $first . $last; +} + + +# Deparse a double-quoted optree. For example, "$a[0]\Q$b\Efo\"o" gets +# compiled to concat(concat($[0],quotemeta($b)),const("fo\"o")), and this +# sub deparses it back to $a[0]\Q$b\Efo"o +# (It does not add delimiters) + sub dq { my $self = shift; my $op = shift; @@ -5348,16 +5513,9 @@ sub dq { return '$[' if $op->private & OPpCONST_ARYBASE; return uninterp(escape_str(unback($self->const_sv($op)->as_string))); } elsif ($type eq "concat") { - my $first = $self->dq($op->first); - my $last = $self->dq($op->last); - - # Disambiguate "${foo}bar", "${foo}{bar}", "${foo}[1]", "$foo\::bar" - ($last =~ /^[A-Z\\\^\[\]_?]/ && - $first =~ s/([\$@])\^$/${1}{^}/) # "${^}W" etc - || ($last =~ /^[:'{\[\w_]/ && #' - $first =~ s/([\$@])([A-Za-z_]\w*)$/${1}{$2}/); - - return $first . $last; + return dq_disambiguate($self->dq($op->first), $self->dq($op->last)); + } elsif ($type eq "multiconcat") { + return $self->do_multiconcat($op, 26, 1); } elsif ($type eq "uc") { return '\U' . $self->dq($op->first->sibling) . '\E'; } elsif ($type eq "lc") { @@ -5682,9 +5840,11 @@ sub pp_trans { sub pp_transr { push @_, 'r'; goto &pp_trans } +# Join two components of a double-quoted re, disambiguating +# "${foo}bar", "${foo}{bar}", "${foo}[1]". + sub re_dq_disambiguate { my ($first, $last) = @_; - # Disambiguate "${foo}bar", "${foo}{bar}", "${foo}[1]" ($last =~ /^[A-Z\\\^\[\]_?]/ && $first =~ s/([\$@])\^$/${1}{^}/) # "${^}W" etc || ($last =~ /^[{\[\w_]/ && @@ -5706,6 +5866,8 @@ sub re_dq { my $first = $self->re_dq($op->first); my $last = $self->re_dq($op->last); return re_dq_disambiguate($first, $last); + } elsif ($type eq "multiconcat") { + return $self->do_multiconcat($op, 26, 2); } elsif ($type eq "uc") { return '\U' . $self->re_dq($op->first->sibling) . '\E'; } elsif ($type eq "lc") { @@ -5754,6 +5916,31 @@ sub pure_string { return $self->pure_string($op->first) && $self->pure_string($op->last); } + elsif ($type eq 'multiconcat') { + my ($kid, @kids); + for ($kid = $op->first; !null $kid; $kid = $kid->sibling) { + # skip the consts and/or padsv we've optimised away + push @kids, $kid + unless $kid->type == OP_NULL + && ( $kid->targ == OP_PADSV + || $kid->targ == OP_CONST + || $kid->targ == OP_PUSHMARK); + } + + if ($op->flags & OPf_STACKED) { + # remove expr from @kids where 'expr = ...' or 'expr .= ....' + if ($op->private & OPpMULTICONCAT_APPEND) { + shift(@kids); + } + else { + pop(@kids); + } + } + for (@kids) { + return 0 unless $self->pure_string($_); + } + return 1; + } elsif (is_scalar($op) || $type =~ /^[ah]elem$/) { return 1; } diff --git a/lib/B/Deparse.t b/lib/B/Deparse.t index f1aae49fcb..b75a162a0e 100644 --- a/lib/B/Deparse.t +++ b/lib/B/Deparse.t @@ -2718,3 +2718,136 @@ $c = (index($a, $b) == -1); $c = (rindex($a, $b) == -1); $c = (index($a, $b) != -1); $c = (rindex($a, $b) != -1); +#### +# plain multiconcat +my($a, $b, $c, $d, @a); +$d = length $a . $b . $c; +$d = length($a) . $b . $c; +print '' . $a; +push @a, ($a . '') * $b; +unshift @a, "$a" * ($b . ''); +print $a . 'x' . $b . $c; +print $a . 'x' . $b . $c, $d; +print $b . $c . ($a . $b); +print $b . $c . ($a . $b); +print $b . $c . @a; +print $a . "\x{100}"; +#### +# double-quoted multiconcat +my($a, $b, $c, $d, @a); +print "${a}x\x{100}$b$c"; +print "$a\Q$b\E$c\Ua$a\E\Lb$b\uc$c\E$a${b}c$c"; +print "A=$a[length 'b' . $c . 'd'] b=$b"; +print "A=@a B=$b"; +print "\x{101}$a\x{100}"; +$a = qr/\Q +$b $c +\x80 +\x{100} +\E$c +/; +#### +# sprintf multiconcat +my($a, $b, $c, $d, @a); +print sprintf("%s%s%%%sx%s\x{100}%s", $a, $b, $c, scalar @a, $d); +#### +# multiconcat with lexical assign +my($a, $b, $c, $d, $e, @a); +$d = 'foo' . $a; +$d = "foo$a"; +$d = $a . ''; +$d = 'foo' . $a . 'bar'; +$d = $a . $b; +$d = $a . $b . $c; +$d = $a . $b . $c . @a; +$e = ($d = $a . $b . $c); +$d = !$a . $b . $c; +$a = $b . $c . ($a . $b); +$e = f($d = !$a . $b) . $c; +$d = "${a}x\x{100}$b$c"; +f($d = !$a . $b . $c); +#### +# multiconcat with lexical my +my($a, $b, $c, $d, $e, @a); +my $d1 = 'foo' . $a; +my $d2 = "foo$a"; +my $d3 = $a . ''; +my $d4 = 'foo' . $a . 'bar'; +my $d5 = $a . $b; +my $d6 = $a . $b . $c; +my $e7 = ($d = $a . $b . $c); +my $d8 = !$a . $b . $c; +my $d9 = $b . $c . ($a . $b); +my $da = f($d = !$a . $b) . $c; +my $dc = "${a}x\x{100}$b$c"; +f(my $db = !$a . $b . $c); +my $dd = $a . $b . $c . @a; +#### +# multiconcat with lexical append +my($a, $b, $c, $d, $e, @a); +$d .= ''; +$d .= $a; +$d .= "$a"; +$d .= 'foo' . $a; +$d .= "foo$a"; +$d .= $a . ''; +$d .= 'foo' . $a . 'bar'; +$d .= $a . $b; +$d .= $a . $b . $c; +$d .= $a . $b . @a; +$e .= ($d = $a . $b . $c); +$d .= !$a . $b . $c; +$a .= $b . $c . ($a . $b); +$e .= f($d .= !$a . $b) . $c; +f($d .= !$a . $b . $c); +$d .= "${a}x\x{100}$b$c"; +#### +# multiconcat with expression assign +my($a, $b, $c, @a); +our($d, $e); +$d = 'foo' . $a; +$d = "foo$a"; +$d = $a . ''; +$d = 'foo' . $a . 'bar'; +$d = $a . $b; +$d = $a . $b . $c; +$d = $a . $b . @a; +$e = ($d = $a . $b . $c); +$a["-$b-"] = !$a . $b . $c; +$a[$b]{$c}{$d ? $a : $b . $c} = !$a . $b . $c; +$a = $b . $c . ($a . $b); +$e = f($d = !$a . $b) . $c; +$d = "${a}x\x{100}$b$c"; +f($d = !$a . $b . $c); +#### +# multiconcat with expression concat +my($a, $b, $c, @a); +our($d, $e); +$d .= 'foo' . $a; +$d .= "foo$a"; +$d .= $a . ''; +$d .= 'foo' . $a . 'bar'; +$d .= $a . $b; +$d .= $a . $b . $c; +$d .= $a . $b . @a; +$e .= ($d .= $a . $b . $c); +$a["-$b-"] .= !$a . $b . $c; +$a[$b]{$c}{$d ? $a : $b . $c} .= !$a . $b . $c; +$a .= $b . $c . ($a . $b); +$e .= f($d .= !$a . $b) . $c; +$d .= "${a}x\x{100}$b$c"; +f($d .= !$a . $b . $c); +#### +# multiconcat with CORE::sprintf +# CONTEXT sub sprintf {} +my($a, $b); +my $x = CORE::sprintf('%s%s', $a, $b); +#### +# multiconcat with backticks +my($a, $b); +our $x; +$x = `$a-$b`; +#### +# multiconcat within qr// +my($r, $a, $b); +$r = qr/abc\Q$a-$b\Exyz/; diff --git a/lib/B/Op_private.pm b/lib/B/Op_private.pm index 9d2f6155e2..6c9840e9bc 100644 --- a/lib/B/Op_private.pm +++ b/lib/B/Op_private.pm @@ -134,7 +134,7 @@ $bits{$_}{6} = 'OPpINDEX_BOOLNEG' for qw(index rindex); $bits{$_}{1} = 'OPpITER_REVERSED' for qw(enteriter iter); $bits{$_}{7} = 'OPpLVALUE' for qw(leave leaveloop); $bits{$_}{6} = 'OPpLVAL_DEFER' for qw(aelem helem multideref); -$bits{$_}{7} = 'OPpLVAL_INTRO' for qw(aelem aslice cond_expr delete enteriter entersub gvsv helem hslice list lvavref lvref lvrefslice multideref padav padhv padrange padsv pushmark refassign rv2av rv2gv rv2hv rv2sv split); +$bits{$_}{7} = 'OPpLVAL_INTRO' for qw(aelem aslice cond_expr delete enteriter entersub gvsv helem hslice list lvavref lvref lvrefslice multiconcat multideref padav padhv padrange padsv pushmark refassign rv2av rv2gv rv2hv rv2sv split); $bits{$_}{2} = 'OPpLVREF_ELEM' for qw(lvref refassign); $bits{$_}{3} = 'OPpLVREF_ITER' for qw(lvref refassign); $bits{$_}{3} = 'OPpMAYBE_LVSUB' for qw(aassign aelem akeys aslice av2arylen avhvswitch helem hslice keys kvaslice kvhslice multideref padav padhv pos rv2av rv2gv rv2hv substr values vec); @@ -149,7 +149,7 @@ $bits{$_}{6} = 'OPpPAD_STATE' for qw(lvavref lvref padav padhv padsv pushmark re $bits{$_}{7} = 'OPpPV_IS_UTF8' for qw(dump goto last next redo); $bits{$_}{6} = 'OPpREFCOUNTED' for qw(leave leaveeval leavesub leavesublv leavewrite); $bits{$_}{2} = 'OPpSLICEWARNING' for qw(aslice hslice padav padhv rv2av rv2hv); -$bits{$_}{4} = 'OPpTARGET_MY' for qw(abs add atan2 chdir chmod chomp chown chr chroot concat cos crypt divide exec exp flock getpgrp getppid getpriority hex i_add i_divide i_modulo i_multiply i_subtract index int kill left_shift length link log mkdir modulo multiply nbit_and nbit_or nbit_xor ncomplement oct ord pow push rand rename right_shift rindex rmdir schomp scomplement setpgrp setpriority sin sleep sqrt srand stringify subtract symlink system time unlink unshift utime wait waitpid); +$bits{$_}{4} = 'OPpTARGET_MY' for qw(abs add atan2 chdir chmod chomp chown chr chroot concat cos crypt divide exec exp flock getpgrp getppid getpriority hex i_add i_divide i_modulo i_multiply i_subtract index int kill left_shift length link log mkdir modulo multiconcat multiply nbit_and nbit_or nbit_xor ncomplement oct ord pow push rand rename right_shift rindex rmdir schomp scomplement setpgrp setpriority sin sleep sqrt srand stringify subtract symlink system time unlink unshift utime wait waitpid); $bits{$_}{5} = 'OPpTRANS_COMPLEMENT' for qw(trans transr); $bits{$_}{7} = 'OPpTRANS_DELETE' for qw(trans transr); $bits{$_}{0} = 'OPpTRANS_FROM_UTF' for qw(trans transr); @@ -439,6 +439,7 @@ $bits{method_super}{0} = $bf[0]; @{$bits{msgget}}{3,2,1,0} = ($bf[4], $bf[4], $bf[4], $bf[4]); @{$bits{msgrcv}}{3,2,1,0} = ($bf[4], $bf[4], $bf[4], $bf[4]); @{$bits{msgsnd}}{3,2,1,0} = ($bf[4], $bf[4], $bf[4], $bf[4]); +@{$bits{multiconcat}}{6,5,3,0} = ('OPpMULTICONCAT_APPEND', 'OPpMULTICONCAT_FAKE', 'OPpMULTICONCAT_STRINGIFY', $bf[0]); @{$bits{multideref}}{5,4,0} = ('OPpMULTIDEREF_DELETE', 'OPpMULTIDEREF_EXISTS', $bf[0]); @{$bits{multiply}}{1,0} = ($bf[1], $bf[1]); @{$bits{nbit_and}}{1,0} = ($bf[1], $bf[1]); @@ -650,6 +651,9 @@ our %defines = ( OPpMAYBE_LVSUB => 8, OPpMAYBE_TRUEBOOL => 16, OPpMAY_RETURN_CONSTANT => 32, + OPpMULTICONCAT_APPEND => 64, + OPpMULTICONCAT_FAKE => 32, + OPpMULTICONCAT_STRINGIFY => 8, OPpMULTIDEREF_DELETE => 32, OPpMULTIDEREF_EXISTS => 16, OPpOFFBYONE => 128, @@ -752,6 +756,9 @@ our %labels = ( OPpMAYBE_LVSUB => 'LVSUB', OPpMAYBE_TRUEBOOL => 'BOOL?', OPpMAY_RETURN_CONSTANT => 'CONST', + OPpMULTICONCAT_APPEND => 'APPEND', + OPpMULTICONCAT_FAKE => 'FAKE', + OPpMULTICONCAT_STRINGIFY => 'STRINGIFY', OPpMULTIDEREF_DELETE => 'DELETE', OPpMULTIDEREF_EXISTS => 'EXISTS', OPpOFFBYONE => '+1', @@ -817,10 +824,11 @@ our %ops_using = ( OPpLIST_GUESSED => [qw(list)], OPpLVALUE => [qw(leave leaveloop)], OPpLVAL_DEFER => [qw(aelem helem multideref)], - OPpLVAL_INTRO => [qw(aelem aslice cond_expr delete enteriter entersub gvsv helem hslice list lvavref lvref lvrefslice multideref padav padhv padrange padsv pushmark refassign rv2av rv2gv rv2hv rv2sv split)], + OPpLVAL_INTRO => [qw(aelem aslice cond_expr delete enteriter entersub gvsv helem hslice list lvavref lvref lvrefslice multiconcat multideref padav padhv padrange padsv pushmark refassign rv2av rv2gv rv2hv rv2sv split)], OPpLVREF_ELEM => [qw(lvref refassign)], OPpMAYBE_LVSUB => [qw(aassign aelem akeys aslice av2arylen avhvswitch helem hslice keys kvaslice kvhslice multideref padav padhv pos rv2av rv2gv rv2hv substr values vec)], OPpMAYBE_TRUEBOOL => [qw(padhv ref rv2hv)], + OPpMULTICONCAT_APPEND => [qw(multiconcat)], OPpMULTIDEREF_DELETE => [qw(multideref)], OPpOFFBYONE => [qw(caller runcv wantarray)], OPpOPEN_IN_CRLF => [qw(backtick open)], @@ -836,7 +844,7 @@ our %ops_using = ( OPpSORT_DESCEND => [qw(sort)], OPpSPLIT_ASSIGN => [qw(split)], OPpSUBSTR_REPL_FIRST => [qw(substr)], - OPpTARGET_MY => [qw(abs add atan2 chdir chmod chomp chown chr chroot concat cos crypt divide exec exp flock getpgrp getppid getpriority hex i_add i_divide i_modulo i_multiply i_subtract index int kill left_shift length link log mkdir modulo multiply nbit_and nbit_or nbit_xor ncomplement oct ord pow push rand rename right_shift rindex rmdir schomp scomplement setpgrp setpriority sin sleep sqrt srand stringify subtract symlink system time unlink unshift utime wait waitpid)], + OPpTARGET_MY => [qw(abs add atan2 chdir chmod chomp chown chr chroot concat cos crypt divide exec exp flock getpgrp getppid getpriority hex i_add i_divide i_modulo i_multiply i_subtract index int kill left_shift length link log mkdir modulo multiconcat multiply nbit_and nbit_or nbit_xor ncomplement oct ord pow push rand rename right_shift rindex rmdir schomp scomplement setpgrp setpriority sin sleep sqrt srand stringify subtract symlink system time unlink unshift utime wait waitpid)], OPpTRANS_COMPLEMENT => [qw(trans transr)], OPpTRUEBOOL => [qw(grepwhile index length padav padhv pos ref rindex rv2av rv2hv subst)], ); @@ -863,6 +871,8 @@ $ops_using{OPpFT_STACKED} = $ops_using{OPpFT_AFTER_t}; $ops_using{OPpFT_STACKING} = $ops_using{OPpFT_AFTER_t}; $ops_using{OPpLVREF_ITER} = $ops_using{OPpLVREF_ELEM}; $ops_using{OPpMAY_RETURN_CONSTANT} = $ops_using{OPpENTERSUB_NOPAREN}; +$ops_using{OPpMULTICONCAT_FAKE} = $ops_using{OPpMULTICONCAT_APPEND}; +$ops_using{OPpMULTICONCAT_STRINGIFY} = $ops_using{OPpMULTICONCAT_APPEND}; $ops_using{OPpMULTIDEREF_EXISTS} = $ops_using{OPpMULTIDEREF_DELETE}; $ops_using{OPpOPEN_IN_RAW} = $ops_using{OPpOPEN_IN_CRLF}; $ops_using{OPpOPEN_OUT_CRLF} = $ops_using{OPpOPEN_IN_CRLF}; @@ -1078,6 +1078,22 @@ Perl_op_clear(pTHX_ OP *o) PerlMemShared_free(cUNOP_AUXo->op_aux); break; + case OP_MULTICONCAT: + { + UNOP_AUX_item *aux = cUNOP_AUXo->op_aux; + /* aux[PERL_MULTICONCAT_IX_PLAIN_PV] and/or + * aux[PERL_MULTICONCAT_IX_UTF8_PV] point to plain and/or + * utf8 shared strings */ + char *p1 = aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv; + char *p2 = aux[PERL_MULTICONCAT_IX_UTF8_PV].pv; + if (p1) + PerlMemShared_free(p1); + if (p2 && p1 != p2) + PerlMemShared_free(p2); + PerlMemShared_free(aux); + } + break; + case OP_MULTIDEREF: { UNOP_AUX_item *items = cUNOP_AUXo->op_aux; @@ -2470,6 +2486,883 @@ S_check_hash_fields_and_hekify(pTHX_ UNOP *rop, SVOP *key_op) } } +/* info returned by S_sprintf_is_multiconcatable() */ + +struct sprintf_ismc_info { + UV nargs; /* num of args to sprintf (not including the format) */ + char *start; /* start of raw format string */ + char *end; /* bytes after end of raw format string */ + STRLEN total_len; /* total length (in bytes) of format string, not + including '%s' and half of '%%' */ + STRLEN variant; /* number of bytes by which total_len_p would grow + if upgraded to utf8 */ + bool utf8; /* whether the format is utf8 */ +}; + + +/* is the OP_SPRINTF o suitable for converting into a multiconcat op? + * i.e. its format argument is a const string with only '%s' and '%%' + * formats, and the number of args is known, e.g. + * sprintf "a=%s f=%s", $a[0], scalar(f()); + * but not + * sprintf "i=%d a=%s f=%s", $i, @a, f(); + * + * If successful, the sprintf_ismc_info struct pointed to by info will be + * populated. + */ + +STATIC bool +S_sprintf_is_multiconcatable(pTHX_ OP *o,struct sprintf_ismc_info *info) +{ + OP *pm, *constop, *kid; + SV *sv; + char *s, *e, *p; + UV nargs, nformats; + STRLEN cur, total_len, variant; + bool utf8; + + /* if sprintf's behaviour changes, die here so that someone + * can decide whether to enhance this function or skip optimising + * under those new circumstances */ + assert(!(o->op_flags & OPf_STACKED)); + assert(!(PL_opargs[OP_SPRINTF] & OA_TARGLEX)); + assert(!(o->op_private & ~OPpARG4_MASK)); + + pm = cUNOPo->op_first; + if (pm->op_type != OP_PUSHMARK) /* weird coreargs stuff */ + return FALSE; + constop = OpSIBLING(pm); + if (!constop || constop->op_type != OP_CONST) + return FALSE; + sv = cSVOPx_sv(constop); + if (SvMAGICAL(sv) || !SvPOK(sv)) + return FALSE; + + s = SvPV(sv, cur); + e = s + cur; + + /* Scan format for %% and %s and work out how many %s there are. + * Abandon if other format types are found. + */ + + nformats = 0; + total_len = 0; + variant = 0; + + for (p = s; p < e; p++) { + if (*p != '%') { + total_len++; + if (UTF8_IS_INVARIANT(*p)) + variant++; + continue; + } + p++; + if (p >= e) + return FALSE; /* lone % at end gives "Invalid conversion" */ + if (*p == '%') + total_len++; + else if (*p == 's') + nformats++; + else + return FALSE; + } + + if (!nformats || nformats > PERL_MULTICONCAT_MAXARG) + return FALSE; + + utf8 = cBOOL(SvUTF8(sv)); + if (utf8) + variant = 0; + + /* scan args; they must all be in scalar cxt */ + + nargs = 0; + kid = OpSIBLING(constop); + + while (kid) { + if ((kid->op_flags & OPf_WANT) != OPf_WANT_SCALAR) + return FALSE; + nargs++; + kid = OpSIBLING(kid); + } + + if (nargs != nformats) + return FALSE; /* e.g. sprintf("%s%s", $a); */ + + + info->nargs = nargs; + info->start = s; + info->end = e; + info->total_len = total_len; + info->variant = variant; + info->utf8 = utf8; + + return TRUE; +} + + + +/* S_maybe_multiconcat(): + * + * given an OP_STRINGIFY, OP_SASSIGN, OP_CONCAT or OP_SPRINTF op, possibly + * convert it (and its children) into an OP_MULTICONCAT. See the code + * comments just before pp_multiconcat() for the full details of what + * OP_MULTICONCAT supports. + * + * Basically we're looking for an optree with a chain of OP_CONCATS down + * the LHS (or an OP_SPRINTF), with possibly an OP_SASSIGN, and/or + * OP_STRINGIFY, and/or OP_CONCAT acting as '.=' at its head, e.g. + * + * $x = "$a$b-$c" + * + * looks like + * + * SASSIGN + * | + * STRINGIFY -- PADSV[$x] + * | + * | + * ex-PUSHMARK -- CONCAT/S + * | + * CONCAT/S -- PADSV[$d] + * | + * CONCAT -- CONST["-"] + * | + * PADSV[$a] -- PADSV[$b] + * + * Note that at this stage the OP_SASSIGN may have already been optimised + * away with OPpTARGET_MY set on the OP_STRINGIFY or OP_CONCAT. + */ + +STATIC void +S_maybe_multiconcat(pTHX_ OP *o) +{ + OP *lastkidop; /* the right-most of any kids unshifted onto o */ + OP *topop; /* the top-most op in the concat tree (often equals o, + unless there are assign/stringify ops above it */ + OP *parentop; /* the parent op of topop (or itself if no parent) */ + OP *targmyop; /* the op (if any) with the OPpTARGET_MY flag */ + OP *targetop; /* the op corresponding to target=... or target.=... */ + OP *stringop; /* the OP_STRINGIFY op, if any */ + OP *nextop; /* used for recreating the op_next chain without consts */ + OP *kid; /* general-purpose op pointer */ + UNOP_AUX_item *aux; + UNOP_AUX_item *lenp; + char *const_str, *p; + struct sprintf_ismc_info sprintf_info; + + /* store info about each arg in args[]; + * toparg is the highest used slot; argp is a general + * pointer to args[] slots */ + struct { + void *p; /* initially points to const sv (or null for op); + later, set to SvPV(constsv), with ... */ + STRLEN len; /* ... len set to SvPV(..., len) */ + } *argp, *toparg, args[PERL_MULTICONCAT_MAXARG*2 + 1]; + + UV nargs = 0; + UV nconst = 0; + STRLEN variant; + bool utf8 = FALSE; + bool kid_is_last = FALSE; /* most args will be the RHS kid of a concat op; + the last-processed arg will the LHS of one, + as args are processed in reverse order */ + U8 stacked_last = 0; /* whether the last seen concat op was STACKED */ + STRLEN total_len = 0; /* sum of the lengths of the const segments */ + U8 flags = 0; /* what will become the op_flags and ... */ + U8 private_flags = 0; /* ... op_private of the multiconcat op */ + bool is_sprintf = FALSE; /* we're optimising an sprintf */ + bool is_targable = FALSE; /* targetop is an OPpTARGET_MY candidate */ + + /* ----------------------------------------------------------------- + * Phase 1: + * + * Examine the optree non-destructively to determine whether it's + * suitable to be converted into an OP_MULTICONCAT. Accumulate + * information about the optree in args[]. + */ + + argp = args; + targmyop = NULL; + targetop = NULL; + stringop = NULL; + topop = o; + parentop = o; + + assert( o->op_type == OP_SASSIGN + || o->op_type == OP_CONCAT + || o->op_type == OP_SPRINTF + || o->op_type == OP_STRINGIFY); + + /* first see if, at the top of the tree, there is an assign, + * append and/or stringify */ + + if (topop->op_type == OP_SASSIGN) { + /* expr = ..... */ + if (o->op_ppaddr != PL_ppaddr[OP_SASSIGN]) + return; + if (o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV)) + return; + assert(!(o->op_private & ~OPpARG2_MASK)); /* barf on unknown flags */ + + parentop = topop; + topop = cBINOPo->op_first; + targetop = OpSIBLING(topop); + if (!targetop) /* probably some sort of syntax error */ + return; + } + else if ( topop->op_type == OP_CONCAT + && (topop->op_flags & OPf_STACKED) + && (cUNOPo->op_first->op_flags & OPf_MOD)) + { + /* expr .= ..... */ + + /* OPpTARGET_MY shouldn't be able to be set here. If it is, + * decide what to do about it */ + assert(!(o->op_private & OPpTARGET_MY)); + + /* barf on unknown flags */ + assert(!(o->op_private & ~(OPpARG2_MASK|OPpTARGET_MY))); + private_flags |= OPpMULTICONCAT_APPEND; + targetop = cBINOPo->op_first; + parentop = topop; + topop = OpSIBLING(targetop); + + /* $x .= <FOO> gets optimised to rcatline instead */ + if (topop->op_type == OP_READLINE) + return; + } + + if (targetop) { + /* Can targetop (the LHS) if it's a padsv, be be optimised + * away and use OPpTARGET_MY instead? + */ + if ( (targetop->op_type == OP_PADSV) + && !(targetop->op_private & OPpDEREF) + && !(targetop->op_private & OPpPAD_STATE) + /* we don't support 'my $x .= ...' */ + && ( o->op_type == OP_SASSIGN + || !(targetop->op_private & OPpLVAL_INTRO)) + ) + is_targable = TRUE; + } + + if (topop->op_type == OP_STRINGIFY) { + if (topop->op_ppaddr != PL_ppaddr[OP_STRINGIFY]) + return; + stringop = topop; + + /* barf on unknown flags */ + assert(!(o->op_private & ~(OPpARG4_MASK|OPpTARGET_MY))); + + if ((topop->op_private & OPpTARGET_MY)) { + if (o->op_type == OP_SASSIGN) + return; /* can't have two assigns */ + targmyop = topop; + } + + private_flags |= OPpMULTICONCAT_STRINGIFY; + parentop = topop; + topop = cBINOPx(topop)->op_first; + assert(OP_TYPE_IS_OR_WAS_NN(topop, OP_PUSHMARK)); + topop = OpSIBLING(topop); + } + + if (topop->op_type == OP_SPRINTF) { + if (topop->op_ppaddr != PL_ppaddr[OP_SPRINTF]) + return; + if (S_sprintf_is_multiconcatable(aTHX_ topop, &sprintf_info)) { + nargs = sprintf_info.nargs; + total_len = sprintf_info.total_len; + variant = sprintf_info.variant; + utf8 = sprintf_info.utf8; + is_sprintf = TRUE; + private_flags |= OPpMULTICONCAT_FAKE; + toparg = argp; + /* we have an sprintf op rather than a concat optree. + * Skip most of the code below which is associated with + * processing that optree. We also skip phase 2, determining + * whether its cost effective to optimise, since for sprintf, + * multiconcat is *always* faster */ + goto create_aux; + } + /* note that even if the sprintf itself isn't multiconcatable, + * the expression as a whole may be, e.g. in + * $x .= sprintf("%d",...) + * the sprintf op will be left as-is, but the concat/S op may + * be upgraded to multiconcat + */ + } + else if (topop->op_type == OP_CONCAT) { + if (topop->op_ppaddr != PL_ppaddr[OP_CONCAT]) + return; + + if ((topop->op_private & OPpTARGET_MY)) { + if (o->op_type == OP_SASSIGN || targmyop) + return; /* can't have two assigns */ + targmyop = topop; + } + } + + /* Is it safe to convert a sassign/stringify/concat op into + * a multiconcat? */ + assert((PL_opargs[OP_SASSIGN] & OA_CLASS_MASK) == OA_BINOP); + assert((PL_opargs[OP_CONCAT] & OA_CLASS_MASK) == OA_BINOP); + assert((PL_opargs[OP_STRINGIFY] & OA_CLASS_MASK) == OA_LISTOP); + assert((PL_opargs[OP_SPRINTF] & OA_CLASS_MASK) == OA_LISTOP); + STATIC_ASSERT_STMT( STRUCT_OFFSET(BINOP, op_last) + == STRUCT_OFFSET(UNOP_AUX, op_aux)); + STATIC_ASSERT_STMT( STRUCT_OFFSET(LISTOP, op_last) + == STRUCT_OFFSET(UNOP_AUX, op_aux)); + + /* Now scan the down the tree looking for a series of + * CONCAT/OPf_STACKED ops on the LHS (with the last one not + * stacked). For example this tree: + * + * | + * CONCAT/STACKED + * | + * CONCAT/STACKED -- EXPR5 + * | + * CONCAT/STACKED -- EXPR4 + * | + * CONCAT -- EXPR3 + * | + * EXPR1 -- EXPR2 + * + * corresponds to an expression like + * + * (EXPR1 . EXPR2 . EXPR3 . EXPR4 . EXPR5) + * + * Record info about each EXPR in args[]: in particular, whether it is + * a stringifiable OP_CONST and if so what the const sv is. + * + * The reason why the last concat can't be STACKED is the difference + * between + * + * ((($a .= $a) .= $a) .= $a) .= $a + * + * and + * $a . $a . $a . $a . $a + * + * The main difference between the optrees for those two constructs + * is the presence of the last STACKED. As well as modifying $a, + * the former sees the changed $a between each concat, so if $s is + * initially 'a', the first returns 'a' x 16, while the latter returns + * 'a' x 5. And pp_multiconcat can't handle that kind of thing. + */ + + kid = topop; + + for (;;) { + OP *argop; + SV *sv; + bool last = FALSE; + + if ( kid->op_type == OP_CONCAT + && !kid_is_last + ) { + OP *k1, *k2; + k1 = cUNOPx(kid)->op_first; + k2 = OpSIBLING(k1); + /* shouldn't happen except maybe after compile err? */ + if (!k2) + return; + + /* avoid turning (A . B . ($lex = C) ...) into (A . B . C ...) */ + if (kid->op_private & OPpTARGET_MY) + kid_is_last = TRUE; + + stacked_last = (kid->op_flags & OPf_STACKED); + if (!stacked_last) + kid_is_last = TRUE; + + kid = k1; + argop = k2; + } + else { + argop = kid; + last = TRUE; + } + + if ( nargs > PERL_MULTICONCAT_MAXARG - 2 + || (argp - args + 1) > (PERL_MULTICONCAT_MAXARG*2 + 1) - 2) + { + /* At least two spare slots are needed to decompose both + * concat args. If there are no slots left, continue to + * examine the rest of the optree, but don't push new values + * on args[]. If the optree as a whole is legal for conversion + * (in particular that the last concat isn't STACKED), then + * the first PERL_MULTICONCAT_MAXARG elements of the optree + * can be converted into an OP_MULTICONCAT now, with the first + * child of that op being the remainder of the optree - + * which may itself later be converted to a multiconcat op + * too. + */ + if (last) { + /* the last arg is the rest of the optree */ + argp++->p = NULL; + nargs++; + } + } + else if ( argop->op_type == OP_CONST + && ((sv = cSVOPx_sv(argop))) + /* defer stringification until runtime of 'constant' + * things that might stringify variantly, e.g. the radix + * point of NVs, or overloaded RVs */ + && (SvPOK(sv) || SvIOK(sv)) + && (!SvGMAGICAL(sv)) + ) { + argp++->p = sv; + utf8 |= cBOOL(SvUTF8(sv)); + nconst++; + } + else { + argp++->p = NULL; + nargs++; + } + + if (last) + break; + } + + toparg = argp - 1; + + if (stacked_last) + return; /* we don't support ((A.=B).=C)...) */ + + /* ----------------------------------------------------------------- + * Phase 2: + * + * At this point we have determined that the optree *can* be converted + * into a multiconcat. Having gathered all the evidence, we now decide + * whether it *should*. + */ + + + /* we need at least one concat action, e.g.: + * + * Y . Z + * X = Y . Z + * X .= Y + * + * otherwise we could be doing something like $x = "foo", which + * if treated as as a concat, would fail to COW. + */ + if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 2) + return; + + /* Benchmarking seems to indicate that we gain if: + * * we optimise at least two actions into a single multiconcat + * (e.g concat+concat, sassign+concat); + * * or if we can eliminate at least 1 OP_CONST; + * * or if we can eliminate a padsv via OPpTARGET_MY + */ + + if ( + /* eliminated at least one OP_CONST */ + nconst >= 1 + /* eliminated an OP_SASSIGN */ + || o->op_type == OP_SASSIGN + /* eliminated an OP_PADSV */ + || (!targmyop && is_targable) + ) + /* definitely a net gain to optimise */ + goto optimise; + + /* ... if not, what else? */ + + /* special-case '$lex1 = expr . $lex1' (where expr isn't lex1): + * multiconcat is faster (due to not creating a temporary copy of + * $lex1), whereas for a general $lex1 = $lex2 . $lex3, concat is + * faster. + */ + if ( nconst == 0 + && nargs == 2 + && targmyop + && topop->op_type == OP_CONCAT + ) { + PADOFFSET t = targmyop->op_targ; + OP *k1 = cBINOPx(topop)->op_first; + OP *k2 = cBINOPx(topop)->op_last; + if ( k2->op_type == OP_PADSV + && k2->op_targ == t + && ( k1->op_type != OP_PADSV + || k1->op_targ != t) + ) + goto optimise; + } + + /* need at least two concats */ + if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 3) + return; + + + + /* ----------------------------------------------------------------- + * Phase 3: + * + * At this point the optree has been verified as ok to be optimised + * into an OP_MULTICONCAT. Now start changing things. + */ + + optimise: + + /* stringify all const args and determine utf8ness */ + + variant = 0; + for (argp = args; argp <= toparg; argp++) { + SV *sv = (SV*)argp->p; + if (!sv) + continue; /* not a const op */ + if (utf8 && !SvUTF8(sv)) + sv_utf8_upgrade_nomg(sv); + argp->p = SvPV_nomg(sv, argp->len); + total_len += argp->len; + + /* see if any strings would grow if converted to utf8 */ + if (!utf8) { + char *p = (char*)argp->p; + STRLEN len = argp->len; + while (len--) { + U8 c = *p++; + if (!UTF8_IS_INVARIANT(c)) + variant++; + } + } + } + + /* create and populate aux struct */ + + create_aux: + + aux = (UNOP_AUX_item*)PerlMemShared_malloc( + sizeof(UNOP_AUX_item) + * ( + PERL_MULTICONCAT_HEADER_SIZE + + ((nargs + 1) * (variant ? 2 : 1)) + ) + ); + const_str = (char *)PerlMemShared_malloc(total_len); + + /* Extract all the non-const expressions from the concat tree then + * dispose of the old tree, e.g. convert the tree from this: + * + * o => SASSIGN + * | + * STRINGIFY -- TARGET + * | + * ex-PUSHMARK -- CONCAT + * | + * CONCAT -- EXPR5 + * | + * CONCAT -- EXPR4 + * | + * CONCAT -- EXPR3 + * | + * EXPR1 -- EXPR2 + * + * + * to: + * + * o => MULTICONCAT + * | + * ex-PUSHMARK -- EXPR1 -- EXPR2 -- EXPR3 -- EXPR4 -- EXPR5 -- TARGET + * + * except that if EXPRi is an OP_CONST, it's discarded. + * + * During the conversion process, EXPR ops are stripped from the tree + * and unshifted onto o. Finally, any of o's remaining original + * childen are discarded and o is converted into an OP_MULTICONCAT. + * + * In this middle of this, o may contain both: unshifted args on the + * left, and some remaining original args on the right. lastkidop + * is set to point to the right-most unshifted arg to delineate + * between the two sets. + */ + + + if (is_sprintf) { + /* create a copy of the format with the %'s removed, and record + * the sizes of the const string segments in the aux struct */ + char *q, *oldq; + lenp = aux + PERL_MULTICONCAT_IX_LENGTHS; + + p = sprintf_info.start; + q = const_str; + oldq = q; + for (; p < sprintf_info.end; p++) { + if (*p == '%') { + p++; + if (*p != '%') { + (lenp++)->uv = q - oldq; + oldq = q; + continue; + } + } + *q++ = *p; + } + lenp->uv = q - oldq; + assert((STRLEN)(q - const_str) == total_len); + + /* Attach all the args (i.e. the kids of the sprintf) to o (which + * may or may not be topop) The pushmark and const ops need to be + * kept in case they're an op_next entry point. + */ + lastkidop = cLISTOPx(topop)->op_last; + kid = cUNOPx(topop)->op_first; /* pushmark */ + op_null(kid); + op_null(OpSIBLING(kid)); /* const */ + if (o != topop) { + kid = op_sibling_splice(topop, NULL, -1, NULL); /* cut all args */ + op_sibling_splice(o, NULL, 0, kid); /* and attach to o */ + lastkidop->op_next = o; + } + } + else { + p = const_str; + lenp = aux + PERL_MULTICONCAT_IX_LENGTHS; + + lenp->size = -1; + + /* Concatenate all const strings into const_str. + * Note that args[] contains the RHS args in reverse order, so + * we scan args[] from top to bottom to get constant strings + * in L-R order + */ + for (argp = toparg; argp >= args; argp--) { + if (!argp->p) + /* not a const op */ + (++lenp)->size = -1; + else { + STRLEN l = argp->len; + Copy(argp->p, p, l, char); + p += l; + if (lenp->size == -1) + lenp->size = l; + else + lenp->size += l; + } + } + + kid = topop; + nextop = o; + lastkidop = NULL; + + for (argp = args; argp <= toparg; argp++) { + /* only keep non-const args, except keep the first-in-next-chain + * arg no matter what it is (but nulled if OP_CONST), because it + * may be the entry point to this subtree from the previous + * op_next. + */ + bool last = (argp == toparg); + OP *prev; + + /* set prev to the sibling *before* the arg to be cut out, + * e.g.: + * + * | + * kid= CONST + * | + * prev= CONST -- EXPR + * | + */ + if (argp == args && kid->op_type != OP_CONCAT) { + /* in e.g. '$x . = f(1)' there's no RHS concat tree + * so the expression to be cut isn't kid->op_last but + * kid itself */ + OP *o1, *o2; + /* find the op before kid */ + o1 = NULL; + o2 = cUNOPx(parentop)->op_first; + while (o2 && o2 != kid) { + o1 = o2; + o2 = OpSIBLING(o2); + } + assert(o2 == kid); + prev = o1; + kid = parentop; + } + else if (kid == o && lastkidop) + prev = last ? lastkidop : OpSIBLING(lastkidop); + else + prev = last ? NULL : cUNOPx(kid)->op_first; + + if (!argp->p || last) { + /* cut RH op */ + OP *aop = op_sibling_splice(kid, prev, 1, NULL); + /* and unshift to front of o */ + op_sibling_splice(o, NULL, 0, aop); + /* record the right-most op added to o: later we will + * free anything to the right of it */ + if (!lastkidop) + lastkidop = aop; + aop->op_next = nextop; + if (last) { + if (argp->p) + /* null the const at start of op_next chain */ + op_null(aop); + } + else if (prev) + nextop = prev->op_next; + } + + /* the last two arguments are both attached to the same concat op */ + if (argp < toparg - 1) + kid = prev; + } + } + + /* Populate the aux struct */ + + aux[PERL_MULTICONCAT_IX_NARGS].uv = nargs; + aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv = utf8 ? NULL : const_str; + aux[PERL_MULTICONCAT_IX_PLAIN_LEN].size = utf8 ? 0 : total_len; + aux[PERL_MULTICONCAT_IX_UTF8_PV].pv = const_str; + aux[PERL_MULTICONCAT_IX_UTF8_LEN].size = total_len; + + /* if variant > 0, calculate a variant const string and lengths where + * the utf8 version of the string will take 'variant' more bytes than + * the plain one. */ + + if (variant) { + char *p = const_str; + STRLEN ulen = total_len + variant; + UNOP_AUX_item *lens = aux + PERL_MULTICONCAT_IX_LENGTHS; + UNOP_AUX_item *ulens = lens + (nargs + 1); + char *up = (char*)PerlMemShared_malloc(ulen); + UV n; + + aux[PERL_MULTICONCAT_IX_UTF8_PV].pv = up; + aux[PERL_MULTICONCAT_IX_UTF8_LEN].size = ulen; + + for (n = 0; n < (nargs + 1); n++) { + SSize_t l, ul, i; + l = ul = (lens++)->size; + for (i = 0; i < l; i++) { + U8 c = *p++; + if (UTF8_IS_INVARIANT(c)) + *up++ = c; + else { + *up++ = UTF8_EIGHT_BIT_HI(c); + *up++ = UTF8_EIGHT_BIT_LO(c); + ul++; + } + } + (ulens++)->size = ul; + } + } + + if (stringop) { + /* if there was a top(ish)-level OP_STRINGIFY, we need to keep + * that op's first child - an ex-PUSHMARK - because the op_next of + * the previous op may point to it (i.e. it's the entry point for + * the o optree) + */ + OP *pmop = + (stringop == o) + ? op_sibling_splice(o, lastkidop, 1, NULL) + : op_sibling_splice(stringop, NULL, 1, NULL); + assert(OP_TYPE_IS_OR_WAS_NN(pmop, OP_PUSHMARK)); + op_sibling_splice(o, NULL, 0, pmop); + if (!lastkidop) + lastkidop = pmop; + } + + /* Optimise + * target = A.B.C... + * target .= A.B.C... + */ + + if (targetop) { + assert(!targmyop); + + if (o->op_type == OP_SASSIGN) { + /* Move the target subtree from being the last of o's children + * to being the last of o's preserved children. + * Note the difference between 'target = ...' and 'target .= ...': + * for the former, target is executed last; for the latter, + * first. + */ + kid = OpSIBLING(lastkidop); + op_sibling_splice(o, kid, 1, NULL); /* cut target op */ + op_sibling_splice(o, lastkidop, 0, targetop); /* and paste */ + lastkidop->op_next = kid->op_next; + lastkidop = targetop; + } + else { + /* Move the target subtree from being the first of o's + * original children to being the first of *all* o's children. + */ + if (lastkidop) { + op_sibling_splice(o, lastkidop, 1, NULL); /* cut target op */ + op_sibling_splice(o, NULL, 0, targetop); /* and paste*/ + } + else { + /* if the RHS of .= doesn't contain a concat (e.g. + * $x .= "foo"), it gets missed by the "strip ops from the + * tree and add to o" loop earlier */ + assert(topop->op_type != OP_CONCAT); + if (stringop) { + /* in e.g. $x .= "$y", move the $y expression + * from being a child of OP_STRINGIFY to being the + * second child of the OP_CONCAT + */ + assert(cUNOPx(stringop)->op_first == topop); + op_sibling_splice(stringop, NULL, 1, NULL); + op_sibling_splice(o, cUNOPo->op_first, 0, topop); + } + assert(topop == OpSIBLING(cBINOPo->op_first)); + if (toparg->p) + op_null(topop); + lastkidop = topop; + } + } + + if (is_targable) { + /* optimise + * my $lex = A.B.C... + * $lex = A.B.C... + * $lex .= A.B.C... + * The original padsv op is kept but nulled in case it's the + * entry point for the optree (which it will be for + * '$lex .= ... ' + */ + private_flags |= OPpTARGET_MY; + private_flags |= (targetop->op_private & OPpLVAL_INTRO); + o->op_targ = targetop->op_targ; + targetop->op_targ = 0; + op_null(targetop); + } + else + flags |= OPf_STACKED; + } + else if (targmyop) { + private_flags |= OPpTARGET_MY; + if (o != targmyop) { + o->op_targ = targmyop->op_targ; + targmyop->op_targ = 0; + } + } + + /* detach the emaciated husk of the sprintf/concat optree and free it */ + for (;;) { + kid = op_sibling_splice(o, lastkidop, 1, NULL); + if (!kid) + break; + op_free(kid); + } + + /* and convert o into a multiconcat */ + + o->op_flags = (flags|OPf_KIDS|stacked_last + |(o->op_flags & (OPf_WANT|OPf_PARENS))); + o->op_private = private_flags; + o->op_type = OP_MULTICONCAT; + o->op_ppaddr = PL_ppaddr[OP_MULTICONCAT]; + cUNOP_AUXo->op_aux = aux; +} + /* do all the final processing on an optree (e.g. running the peephole * optimiser on it), then attach it to cv (if cv is non-null) @@ -2549,6 +3442,13 @@ S_optimize_op(pTHX_ OP* o) break; + case OP_CONCAT: + case OP_SASSIGN: + case OP_STRINGIFY: + case OP_SPRINTF: + S_maybe_multiconcat(aTHX_ o); + break; + case OP_SUBST: if (cPMOPo->op_pmreplrootu.op_pmreplroot) optimize_op(cPMOPo->op_pmreplrootu.op_pmreplroot); @@ -9903,6 +10803,7 @@ Perl_ck_concat(pTHX_ OP *o) PERL_ARGS_ASSERT_CK_CONCAT; PERL_UNUSED_CONTEXT; + /* reuse the padtmp returned by the concat child */ if (kid->op_type == OP_CONCAT && !(kid->op_private & OPpTARGET_MY) && !(kUNOP->op_first->op_flags & OPf_MOD)) o->op_flags |= OPf_STACKED; @@ -10858,6 +11759,7 @@ Perl_ck_sassign(pTHX_ OP *o) return S_maybe_targlex(aTHX_ o); } + OP * Perl_ck_match(pTHX_ OP *o) { @@ -188,6 +188,8 @@ typedef union { SV *sv; IV iv; UV uv; + char *pv; + SSize_t size; } UNOP_AUX_item; #ifdef USE_ITHREADS @@ -213,6 +213,7 @@ EXTCONST char* const PL_op_name[] = { "subtract", "i_subtract", "concat", + "multiconcat", "stringify", "left_shift", "right_shift", @@ -617,6 +618,7 @@ EXTCONST char* const PL_op_desc[] = { "subtraction (-)", "integer subtraction (-)", "concatenation (.) or string", + "concatenation (.) or string", "string", "left bitshift (<<)", "right bitshift (>>)", @@ -1033,6 +1035,7 @@ EXT Perl_ppaddr_t PL_ppaddr[] /* or perlvars.h */ Perl_pp_subtract, Perl_pp_i_subtract, Perl_pp_concat, + Perl_pp_multiconcat, Perl_pp_stringify, Perl_pp_left_shift, Perl_pp_right_shift, @@ -1445,6 +1448,7 @@ EXT Perl_check_t PL_check[] /* or perlvars.h */ Perl_ck_null, /* subtract */ Perl_ck_null, /* i_subtract */ Perl_ck_concat, /* concat */ + Perl_ck_null, /* multiconcat */ Perl_ck_stringify, /* stringify */ Perl_ck_bitop, /* left_shift */ Perl_ck_bitop, /* right_shift */ @@ -1853,6 +1857,7 @@ EXTCONST U32 PL_opargs[] = { 0x0001123e, /* subtract */ 0x0001121e, /* i_subtract */ 0x0001121e, /* concat */ + 0x00000f1c, /* multiconcat */ 0x0000141e, /* stringify */ 0x0001121e, /* left_shift */ 0x0001121e, /* right_shift */ @@ -2232,6 +2237,7 @@ END_EXTERN_C #define OPpITER_DEF 0x08 #define OPpLVREF_ITER 0x08 #define OPpMAYBE_LVSUB 0x08 +#define OPpMULTICONCAT_STRINGIFY 0x08 #define OPpREVERSE_INPLACE 0x08 #define OPpSORT_INPLACE 0x08 #define OPpSPLIT_LEX 0x08 @@ -2258,6 +2264,7 @@ END_EXTERN_C #define OPpKVSLICE 0x20 #define OPpLVREF_HV 0x20 #define OPpMAY_RETURN_CONSTANT 0x20 +#define OPpMULTICONCAT_FAKE 0x20 #define OPpMULTIDEREF_DELETE 0x20 #define OPpOPEN_IN_CRLF 0x20 #define OPpSORT_QSORT 0x20 @@ -2278,6 +2285,7 @@ END_EXTERN_C #define OPpINDEX_BOOLNEG 0x40 #define OPpLIST_GUESSED 0x40 #define OPpLVAL_DEFER 0x40 +#define OPpMULTICONCAT_APPEND 0x40 #define OPpOPEN_OUT_RAW 0x40 #define OPpOUR_INTRO 0x40 #define OPpPAD_STATE 0x40 @@ -2326,6 +2334,7 @@ EXTCONST char PL_op_private_labels[] = { '<','U','T','F','\0', '>','U','T','F','\0', 'A','M','P','E','R','\0', + 'A','P','P','E','N','D','\0', 'A','S','S','I','G','N','\0', 'A','V','\0', 'B','A','R','E','\0', @@ -2404,6 +2413,7 @@ EXTCONST char PL_op_private_labels[] = { 'S','T','A','B','L','E','\0', 'S','T','A','T','E','\0', 'S','T','R','I','C','T','\0', + 'S','T','R','I','N','G','I','F','Y','\0', 'S','U','B','\0', 'S','V','\0', 'T','A','R','G','\0', @@ -2432,14 +2442,14 @@ EXTCONST char PL_op_private_labels[] = { EXTCONST I16 PL_op_private_bitfields[] = { 0, 8, -1, 0, 8, -1, - 0, 565, -1, + 0, 582, -1, 0, 8, -1, 0, 8, -1, - 0, 572, -1, - 0, 561, -1, - 1, -1, 0, 529, 1, 33, 2, 283, -1, - 4, -1, 1, 164, 2, 171, 3, 178, -1, - 4, -1, 0, 529, 1, 33, 2, 283, 3, 110, -1, + 0, 589, -1, + 0, 578, -1, + 1, -1, 0, 546, 1, 40, 2, 290, -1, + 4, -1, 1, 171, 2, 178, 3, 185, -1, + 4, -1, 0, 546, 1, 40, 2, 290, 3, 117, -1, }; @@ -2515,7 +2525,8 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 80, /* subtract */ 80, /* i_subtract */ 80, /* concat */ - 84, /* stringify */ + 84, /* multiconcat */ + 90, /* stringify */ 80, /* left_shift */ 80, /* right_shift */ 12, /* lt */ @@ -2555,11 +2566,11 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 75, /* ncomplement */ 75, /* scomplement */ 12, /* smartmatch */ - 84, /* atan2 */ + 90, /* atan2 */ 75, /* sin */ 75, /* cos */ - 84, /* rand */ - 84, /* srand */ + 90, /* rand */ + 90, /* srand */ 75, /* exp */ 75, /* log */ 75, /* sqrt */ @@ -2567,97 +2578,97 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 75, /* hex */ 75, /* oct */ 75, /* abs */ - 86, /* length */ - 89, /* substr */ - 92, /* vec */ - 94, /* index */ - 94, /* rindex */ + 92, /* length */ + 95, /* substr */ + 98, /* vec */ + 100, /* index */ + 100, /* rindex */ 52, /* sprintf */ 52, /* formline */ 75, /* ord */ 75, /* chr */ - 84, /* crypt */ + 90, /* crypt */ 0, /* ucfirst */ 0, /* lcfirst */ 0, /* uc */ 0, /* lc */ 0, /* quotemeta */ - 98, /* rv2av */ - 105, /* aelemfast */ - 105, /* aelemfast_lex */ - 106, /* aelem */ - 111, /* aslice */ - 114, /* kvaslice */ + 104, /* rv2av */ + 111, /* aelemfast */ + 111, /* aelemfast_lex */ + 112, /* aelem */ + 117, /* aslice */ + 120, /* kvaslice */ 0, /* aeach */ 0, /* avalues */ 40, /* akeys */ 0, /* each */ 40, /* values */ 40, /* keys */ - 115, /* delete */ - 119, /* exists */ - 121, /* rv2hv */ - 106, /* helem */ - 111, /* hslice */ - 114, /* kvhslice */ - 129, /* multideref */ + 121, /* delete */ + 125, /* exists */ + 127, /* rv2hv */ + 112, /* helem */ + 117, /* hslice */ + 120, /* kvhslice */ + 135, /* multideref */ 52, /* unpack */ 52, /* pack */ - 136, /* split */ + 142, /* split */ 52, /* join */ - 141, /* list */ + 147, /* list */ 12, /* lslice */ 52, /* anonlist */ 52, /* anonhash */ 52, /* splice */ - 84, /* push */ + 90, /* push */ 0, /* pop */ 0, /* shift */ - 84, /* unshift */ - 143, /* sort */ - 151, /* reverse */ + 90, /* unshift */ + 149, /* sort */ + 157, /* reverse */ 0, /* grepstart */ - 153, /* grepwhile */ + 159, /* grepwhile */ 0, /* mapstart */ 0, /* mapwhile */ 0, /* range */ - 155, /* flip */ - 155, /* flop */ + 161, /* flip */ + 161, /* flop */ 0, /* and */ 0, /* or */ 12, /* xor */ 0, /* dor */ - 157, /* cond_expr */ + 163, /* cond_expr */ 0, /* andassign */ 0, /* orassign */ 0, /* dorassign */ - 159, /* entersub */ - 166, /* leavesub */ - 166, /* leavesublv */ + 165, /* entersub */ + 172, /* leavesub */ + 172, /* leavesublv */ 0, /* argcheck */ - 168, /* argelem */ + 174, /* argelem */ 0, /* argdefelem */ - 170, /* caller */ + 176, /* caller */ 52, /* warn */ 52, /* die */ 52, /* reset */ -1, /* lineseq */ - 172, /* nextstate */ - 172, /* dbstate */ + 178, /* nextstate */ + 178, /* dbstate */ -1, /* unstack */ -1, /* enter */ - 173, /* leave */ + 179, /* leave */ -1, /* scope */ - 175, /* enteriter */ - 179, /* iter */ + 181, /* enteriter */ + 185, /* iter */ -1, /* enterloop */ - 180, /* leaveloop */ + 186, /* leaveloop */ -1, /* return */ - 182, /* last */ - 182, /* next */ - 182, /* redo */ - 182, /* dump */ - 182, /* goto */ + 188, /* last */ + 188, /* next */ + 188, /* redo */ + 188, /* dump */ + 188, /* goto */ 52, /* exit */ 0, /* method */ 0, /* method_named */ @@ -2670,7 +2681,7 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 0, /* leavewhen */ -1, /* break */ -1, /* continue */ - 184, /* open */ + 190, /* open */ 52, /* close */ 52, /* pipe_op */ 52, /* fileno */ @@ -2686,7 +2697,7 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 52, /* getc */ 52, /* read */ 52, /* enterwrite */ - 166, /* leavewrite */ + 172, /* leavewrite */ -1, /* prtf */ -1, /* print */ -1, /* say */ @@ -2700,7 +2711,7 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 52, /* truncate */ 52, /* fcntl */ 52, /* ioctl */ - 84, /* flock */ + 90, /* flock */ 52, /* send */ 52, /* recv */ 52, /* socket */ @@ -2716,44 +2727,44 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 0, /* getpeername */ 0, /* lstat */ 0, /* stat */ - 189, /* ftrread */ - 189, /* ftrwrite */ - 189, /* ftrexec */ - 189, /* fteread */ - 189, /* ftewrite */ - 189, /* fteexec */ - 194, /* ftis */ - 194, /* ftsize */ - 194, /* ftmtime */ - 194, /* ftatime */ - 194, /* ftctime */ - 194, /* ftrowned */ - 194, /* fteowned */ - 194, /* ftzero */ - 194, /* ftsock */ - 194, /* ftchr */ - 194, /* ftblk */ - 194, /* ftfile */ - 194, /* ftdir */ - 194, /* ftpipe */ - 194, /* ftsuid */ - 194, /* ftsgid */ - 194, /* ftsvtx */ - 194, /* ftlink */ - 194, /* fttty */ - 194, /* fttext */ - 194, /* ftbinary */ - 84, /* chdir */ - 84, /* chown */ + 195, /* ftrread */ + 195, /* ftrwrite */ + 195, /* ftrexec */ + 195, /* fteread */ + 195, /* ftewrite */ + 195, /* fteexec */ + 200, /* ftis */ + 200, /* ftsize */ + 200, /* ftmtime */ + 200, /* ftatime */ + 200, /* ftctime */ + 200, /* ftrowned */ + 200, /* fteowned */ + 200, /* ftzero */ + 200, /* ftsock */ + 200, /* ftchr */ + 200, /* ftblk */ + 200, /* ftfile */ + 200, /* ftdir */ + 200, /* ftpipe */ + 200, /* ftsuid */ + 200, /* ftsgid */ + 200, /* ftsvtx */ + 200, /* ftlink */ + 200, /* fttty */ + 200, /* fttext */ + 200, /* ftbinary */ + 90, /* chdir */ + 90, /* chown */ 75, /* chroot */ - 84, /* unlink */ - 84, /* chmod */ - 84, /* utime */ - 84, /* rename */ - 84, /* link */ - 84, /* symlink */ + 90, /* unlink */ + 90, /* chmod */ + 90, /* utime */ + 90, /* rename */ + 90, /* link */ + 90, /* symlink */ 0, /* readlink */ - 84, /* mkdir */ + 90, /* mkdir */ 75, /* rmdir */ 52, /* open_dir */ 0, /* readdir */ @@ -2762,22 +2773,22 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 0, /* rewinddir */ 0, /* closedir */ -1, /* fork */ - 198, /* wait */ - 84, /* waitpid */ - 84, /* system */ - 84, /* exec */ - 84, /* kill */ - 198, /* getppid */ - 84, /* getpgrp */ - 84, /* setpgrp */ - 84, /* getpriority */ - 84, /* setpriority */ - 198, /* time */ + 204, /* wait */ + 90, /* waitpid */ + 90, /* system */ + 90, /* exec */ + 90, /* kill */ + 204, /* getppid */ + 90, /* getpgrp */ + 90, /* setpgrp */ + 90, /* getpriority */ + 90, /* setpriority */ + 204, /* time */ -1, /* tms */ 0, /* localtime */ 52, /* gmtime */ 0, /* alarm */ - 84, /* sleep */ + 90, /* sleep */ 52, /* shmget */ 52, /* shmctl */ 52, /* shmread */ @@ -2792,8 +2803,8 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 0, /* require */ 0, /* dofile */ -1, /* hintseval */ - 199, /* entereval */ - 166, /* leaveeval */ + 205, /* entereval */ + 172, /* leaveeval */ 0, /* entertry */ -1, /* leavetry */ 0, /* ghbyname */ @@ -2831,18 +2842,18 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { 0, /* lock */ 0, /* once */ -1, /* custom */ - 205, /* coreargs */ - 209, /* avhvswitch */ + 211, /* coreargs */ + 215, /* avhvswitch */ 3, /* runcv */ 0, /* fc */ -1, /* padcv */ -1, /* introcv */ -1, /* clonecv */ - 211, /* padrange */ - 213, /* refassign */ - 219, /* lvref */ - 225, /* lvrefslice */ - 226, /* lvavref */ + 217, /* padrange */ + 219, /* refassign */ + 225, /* lvref */ + 231, /* lvrefslice */ + 232, /* lvavref */ 0, /* anonconst */ }; @@ -2863,73 +2874,74 @@ EXTCONST I16 PL_op_private_bitdef_ix[] = { EXTCONST U16 PL_op_private_bitdefs[] = { 0x0003, /* scalar, prototype, refgen, srefgen, readline, regcmaybe, regcreset, regcomp, substcont, chop, schop, defined, undef, study, preinc, i_preinc, predec, i_predec, postinc, i_postinc, postdec, i_postdec, negate, i_negate, not, complement, ucfirst, lcfirst, uc, lc, quotemeta, aeach, avalues, each, pop, shift, grepstart, mapstart, mapwhile, range, and, or, dor, andassign, orassign, dorassign, argcheck, argdefelem, method, method_named, method_super, method_redir, method_redir_super, entergiven, leavegiven, enterwhen, leavewhen, untie, tied, dbmclose, getsockname, getpeername, lstat, stat, readlink, readdir, telldir, rewinddir, closedir, localtime, alarm, require, dofile, entertry, ghbyname, gnbyname, gpbyname, shostent, snetent, sprotoent, sservent, gpwnam, gpwuid, ggrnam, ggrgid, lock, once, fc, anonconst */ - 0x2e5c, 0x4019, /* pushmark */ + 0x2f3c, 0x40f9, /* pushmark */ 0x00bd, /* wantarray, runcv */ - 0x0498, 0x18d0, 0x40cc, 0x3b88, 0x32a5, /* const */ - 0x2e5c, 0x33f9, /* gvsv */ - 0x1735, /* gv */ + 0x0578, 0x19b0, 0x41ac, 0x3c68, 0x3385, /* const */ + 0x2f3c, 0x34d9, /* gvsv */ + 0x1815, /* gv */ 0x0067, /* gelem, lt, i_lt, gt, i_gt, le, i_le, ge, i_ge, eq, i_eq, ne, i_ne, ncmp, i_ncmp, slt, sgt, sle, sge, seq, sne, scmp, bit_and, bit_xor, bit_or, sbit_and, sbit_xor, sbit_or, smartmatch, lslice, xor */ - 0x2e5c, 0x4018, 0x03d7, /* padsv */ - 0x2e5c, 0x4018, 0x0614, 0x2f4c, 0x3d09, /* padav */ - 0x2e5c, 0x4018, 0x0614, 0x06b0, 0x2f4c, 0x3d08, 0x29c1, /* padhv */ - 0x2e5c, 0x1ab8, 0x03d6, 0x2f4c, 0x31c8, 0x40c4, 0x0003, /* rv2gv */ - 0x2e5c, 0x33f8, 0x03d6, 0x40c4, 0x0003, /* rv2sv */ - 0x2f4c, 0x0003, /* av2arylen, akeys, values, keys */ - 0x313c, 0x0ef8, 0x0c54, 0x028c, 0x4288, 0x40c4, 0x0003, /* rv2cv */ - 0x0614, 0x06b0, 0x0003, /* ref */ + 0x2f3c, 0x40f8, 0x03d7, /* padsv */ + 0x2f3c, 0x40f8, 0x06f4, 0x302c, 0x3de9, /* padav */ + 0x2f3c, 0x40f8, 0x06f4, 0x0790, 0x302c, 0x3de8, 0x2aa1, /* padhv */ + 0x2f3c, 0x1b98, 0x03d6, 0x302c, 0x32a8, 0x41a4, 0x0003, /* rv2gv */ + 0x2f3c, 0x34d8, 0x03d6, 0x41a4, 0x0003, /* rv2sv */ + 0x302c, 0x0003, /* av2arylen, akeys, values, keys */ + 0x321c, 0x0fd8, 0x0d34, 0x028c, 0x44a8, 0x41a4, 0x0003, /* rv2cv */ + 0x06f4, 0x0790, 0x0003, /* ref */ 0x018f, /* bless, glob, sprintf, formline, unpack, pack, join, anonlist, anonhash, splice, warn, die, reset, exit, close, pipe_op, fileno, umask, binmode, tie, dbmopen, sselect, select, getc, read, enterwrite, sysopen, sysseek, sysread, syswrite, eof, tell, seek, truncate, fcntl, ioctl, send, recv, socket, sockpair, bind, connect, listen, accept, shutdown, gsockopt, ssockopt, open_dir, seekdir, gmtime, shmget, shmctl, shmread, shmwrite, msgget, msgctl, msgsnd, msgrcv, semop, semget, semctl, ghbyaddr, gnbyaddr, gpbynumber, gsbyname, gsbyport, syscall */ - 0x35dc, 0x34f8, 0x2714, 0x2650, 0x0003, /* backtick */ - 0x0615, /* subst */ - 0x0ffc, 0x2038, 0x0834, 0x3e4c, 0x23c8, 0x01e4, 0x0141, /* trans, transr */ - 0x0e3c, 0x0538, 0x0067, /* sassign */ - 0x0af8, 0x09f4, 0x08f0, 0x2f4c, 0x0608, 0x0067, /* aassign */ - 0x4330, 0x0003, /* chomp, schomp, ncomplement, scomplement, sin, cos, exp, log, sqrt, int, hex, oct, abs, ord, chr, chroot, rmdir */ - 0x0614, 0x2f4c, 0x0003, /* pos */ - 0x4330, 0x0067, /* pow, multiply, i_multiply, divide, i_divide, modulo, i_modulo, add, i_add, subtract, i_subtract, concat, left_shift, right_shift, nbit_and, nbit_xor, nbit_or */ - 0x13b8, 0x0067, /* repeat */ - 0x4330, 0x018f, /* stringify, atan2, rand, srand, crypt, push, unshift, flock, chdir, chown, unlink, chmod, utime, rename, link, symlink, mkdir, waitpid, system, exec, kill, getpgrp, setpgrp, getpriority, setpriority, sleep */ - 0x0614, 0x4330, 0x0003, /* length */ - 0x38f0, 0x2f4c, 0x012b, /* substr */ - 0x2f4c, 0x0067, /* vec */ - 0x30b8, 0x0614, 0x4330, 0x018f, /* index, rindex */ - 0x2e5c, 0x33f8, 0x0614, 0x2f4c, 0x3d08, 0x40c4, 0x0003, /* rv2av */ + 0x36bc, 0x35d8, 0x27f4, 0x2730, 0x0003, /* backtick */ + 0x06f5, /* subst */ + 0x10dc, 0x2118, 0x0914, 0x3f2c, 0x24a8, 0x01e4, 0x0141, /* trans, transr */ + 0x0f1c, 0x0618, 0x0067, /* sassign */ + 0x0bd8, 0x0ad4, 0x09d0, 0x302c, 0x06e8, 0x0067, /* aassign */ + 0x4550, 0x0003, /* chomp, schomp, ncomplement, scomplement, sin, cos, exp, log, sqrt, int, hex, oct, abs, ord, chr, chroot, rmdir */ + 0x06f4, 0x302c, 0x0003, /* pos */ + 0x4550, 0x0067, /* pow, multiply, i_multiply, divide, i_divide, modulo, i_modulo, add, i_add, subtract, i_subtract, concat, left_shift, right_shift, nbit_and, nbit_xor, nbit_or */ + 0x1498, 0x0067, /* repeat */ + 0x2f3c, 0x0358, 0x1b94, 0x4550, 0x428c, 0x0003, /* multiconcat */ + 0x4550, 0x018f, /* stringify, atan2, rand, srand, crypt, push, unshift, flock, chdir, chown, unlink, chmod, utime, rename, link, symlink, mkdir, waitpid, system, exec, kill, getpgrp, setpgrp, getpriority, setpriority, sleep */ + 0x06f4, 0x4550, 0x0003, /* length */ + 0x39d0, 0x302c, 0x012b, /* substr */ + 0x302c, 0x0067, /* vec */ + 0x3198, 0x06f4, 0x4550, 0x018f, /* index, rindex */ + 0x2f3c, 0x34d8, 0x06f4, 0x302c, 0x3de8, 0x41a4, 0x0003, /* rv2av */ 0x025f, /* aelemfast, aelemfast_lex */ - 0x2e5c, 0x2d58, 0x03d6, 0x2f4c, 0x0067, /* aelem, helem */ - 0x2e5c, 0x2f4c, 0x3d09, /* aslice, hslice */ - 0x2f4d, /* kvaslice, kvhslice */ - 0x2e5c, 0x3c58, 0x2a74, 0x0003, /* delete */ - 0x41b8, 0x0003, /* exists */ - 0x2e5c, 0x33f8, 0x0614, 0x06b0, 0x2f4c, 0x3d08, 0x40c4, 0x29c1, /* rv2hv */ - 0x2e5c, 0x2d58, 0x1074, 0x19d0, 0x2f4c, 0x40c4, 0x0003, /* multideref */ - 0x2e5c, 0x33f8, 0x0350, 0x2b6c, 0x2489, /* split */ - 0x2e5c, 0x20f9, /* list */ - 0x449c, 0x3f38, 0x3694, 0x1310, 0x27ac, 0x39e8, 0x28a4, 0x3361, /* sort */ - 0x27ac, 0x0003, /* reverse */ - 0x0614, 0x0003, /* grepwhile */ - 0x2bf8, 0x0003, /* flip, flop */ - 0x2e5c, 0x0003, /* cond_expr */ - 0x2e5c, 0x0ef8, 0x03d6, 0x028c, 0x4288, 0x40c4, 0x2561, /* entersub */ - 0x3758, 0x0003, /* leavesub, leavesublv, leavewrite, leaveeval */ + 0x2f3c, 0x2e38, 0x03d6, 0x302c, 0x0067, /* aelem, helem */ + 0x2f3c, 0x302c, 0x3de9, /* aslice, hslice */ + 0x302d, /* kvaslice, kvhslice */ + 0x2f3c, 0x3d38, 0x2b54, 0x0003, /* delete */ + 0x43d8, 0x0003, /* exists */ + 0x2f3c, 0x34d8, 0x06f4, 0x0790, 0x302c, 0x3de8, 0x41a4, 0x2aa1, /* rv2hv */ + 0x2f3c, 0x2e38, 0x1154, 0x1ab0, 0x302c, 0x41a4, 0x0003, /* multideref */ + 0x2f3c, 0x34d8, 0x0430, 0x2c4c, 0x2569, /* split */ + 0x2f3c, 0x21d9, /* list */ + 0x46bc, 0x4018, 0x3774, 0x13f0, 0x288c, 0x3ac8, 0x2984, 0x3441, /* sort */ + 0x288c, 0x0003, /* reverse */ + 0x06f4, 0x0003, /* grepwhile */ + 0x2cd8, 0x0003, /* flip, flop */ + 0x2f3c, 0x0003, /* cond_expr */ + 0x2f3c, 0x0fd8, 0x03d6, 0x028c, 0x44a8, 0x41a4, 0x2641, /* entersub */ + 0x3838, 0x0003, /* leavesub, leavesublv, leavewrite, leaveeval */ 0x02aa, 0x0003, /* argelem */ 0x00bc, 0x018f, /* caller */ - 0x22d5, /* nextstate, dbstate */ - 0x2cfc, 0x3759, /* leave */ - 0x2e5c, 0x33f8, 0x0f6c, 0x3a65, /* enteriter */ - 0x3a65, /* iter */ - 0x2cfc, 0x0067, /* leaveloop */ - 0x45bc, 0x0003, /* last, next, redo, dump, goto */ - 0x35dc, 0x34f8, 0x2714, 0x2650, 0x018f, /* open */ - 0x1c70, 0x1ecc, 0x1d88, 0x1b44, 0x0003, /* ftrread, ftrwrite, ftrexec, fteread, ftewrite, fteexec */ - 0x1c70, 0x1ecc, 0x1d88, 0x0003, /* ftis, ftsize, ftmtime, ftatime, ftctime, ftrowned, fteowned, ftzero, ftsock, ftchr, ftblk, ftfile, ftdir, ftpipe, ftsuid, ftsgid, ftsvtx, ftlink, fttty, fttext, ftbinary */ - 0x4331, /* wait, getppid, time */ - 0x37f4, 0x0d10, 0x076c, 0x4408, 0x21e4, 0x0003, /* entereval */ - 0x301c, 0x0018, 0x1224, 0x1141, /* coreargs */ - 0x2f4c, 0x00c7, /* avhvswitch */ - 0x2e5c, 0x01fb, /* padrange */ - 0x2e5c, 0x4018, 0x04f6, 0x292c, 0x1828, 0x0067, /* refassign */ - 0x2e5c, 0x4018, 0x04f6, 0x292c, 0x1828, 0x0003, /* lvref */ - 0x2e5d, /* lvrefslice */ - 0x2e5c, 0x4018, 0x0003, /* lvavref */ + 0x23b5, /* nextstate, dbstate */ + 0x2ddc, 0x3839, /* leave */ + 0x2f3c, 0x34d8, 0x104c, 0x3b45, /* enteriter */ + 0x3b45, /* iter */ + 0x2ddc, 0x0067, /* leaveloop */ + 0x47dc, 0x0003, /* last, next, redo, dump, goto */ + 0x36bc, 0x35d8, 0x27f4, 0x2730, 0x018f, /* open */ + 0x1d50, 0x1fac, 0x1e68, 0x1c24, 0x0003, /* ftrread, ftrwrite, ftrexec, fteread, ftewrite, fteexec */ + 0x1d50, 0x1fac, 0x1e68, 0x0003, /* ftis, ftsize, ftmtime, ftatime, ftctime, ftrowned, fteowned, ftzero, ftsock, ftchr, ftblk, ftfile, ftdir, ftpipe, ftsuid, ftsgid, ftsvtx, ftlink, fttty, fttext, ftbinary */ + 0x4551, /* wait, getppid, time */ + 0x38d4, 0x0df0, 0x084c, 0x4628, 0x22c4, 0x0003, /* entereval */ + 0x30fc, 0x0018, 0x1304, 0x1221, /* coreargs */ + 0x302c, 0x00c7, /* avhvswitch */ + 0x2f3c, 0x01fb, /* padrange */ + 0x2f3c, 0x40f8, 0x04f6, 0x2a0c, 0x1908, 0x0067, /* refassign */ + 0x2f3c, 0x40f8, 0x04f6, 0x2a0c, 0x1908, 0x0003, /* lvref */ + 0x2f3d, /* lvrefslice */ + 0x2f3c, 0x40f8, 0x0003, /* lvavref */ }; @@ -3005,6 +3017,7 @@ EXTCONST U8 PL_op_private_valid[] = { /* SUBTRACT */ (OPpARG2_MASK|OPpTARGET_MY), /* I_SUBTRACT */ (OPpARG2_MASK|OPpTARGET_MY), /* CONCAT */ (OPpARG2_MASK|OPpTARGET_MY), + /* MULTICONCAT */ (OPpARG1_MASK|OPpMULTICONCAT_STRINGIFY|OPpTARGET_MY|OPpMULTICONCAT_FAKE|OPpMULTICONCAT_APPEND|OPpLVAL_INTRO), /* STRINGIFY */ (OPpARG4_MASK|OPpTARGET_MY), /* LEFT_SHIFT */ (OPpARG2_MASK|OPpTARGET_MY), /* RIGHT_SHIFT */ (OPpARG2_MASK|OPpTARGET_MY), @@ -81,339 +81,340 @@ typedef enum opcode { OP_SUBTRACT = 64, OP_I_SUBTRACT = 65, OP_CONCAT = 66, - OP_STRINGIFY = 67, - OP_LEFT_SHIFT = 68, - OP_RIGHT_SHIFT = 69, - OP_LT = 70, - OP_I_LT = 71, - OP_GT = 72, - OP_I_GT = 73, - OP_LE = 74, - OP_I_LE = 75, - OP_GE = 76, - OP_I_GE = 77, - OP_EQ = 78, - OP_I_EQ = 79, - OP_NE = 80, - OP_I_NE = 81, - OP_NCMP = 82, - OP_I_NCMP = 83, - OP_SLT = 84, - OP_SGT = 85, - OP_SLE = 86, - OP_SGE = 87, - OP_SEQ = 88, - OP_SNE = 89, - OP_SCMP = 90, - OP_BIT_AND = 91, - OP_BIT_XOR = 92, - OP_BIT_OR = 93, - OP_NBIT_AND = 94, - OP_NBIT_XOR = 95, - OP_NBIT_OR = 96, - OP_SBIT_AND = 97, - OP_SBIT_XOR = 98, - OP_SBIT_OR = 99, - OP_NEGATE = 100, - OP_I_NEGATE = 101, - OP_NOT = 102, - OP_COMPLEMENT = 103, - OP_NCOMPLEMENT = 104, - OP_SCOMPLEMENT = 105, - OP_SMARTMATCH = 106, - OP_ATAN2 = 107, - OP_SIN = 108, - OP_COS = 109, - OP_RAND = 110, - OP_SRAND = 111, - OP_EXP = 112, - OP_LOG = 113, - OP_SQRT = 114, - OP_INT = 115, - OP_HEX = 116, - OP_OCT = 117, - OP_ABS = 118, - OP_LENGTH = 119, - OP_SUBSTR = 120, - OP_VEC = 121, - OP_INDEX = 122, - OP_RINDEX = 123, - OP_SPRINTF = 124, - OP_FORMLINE = 125, - OP_ORD = 126, - OP_CHR = 127, - OP_CRYPT = 128, - OP_UCFIRST = 129, - OP_LCFIRST = 130, - OP_UC = 131, - OP_LC = 132, - OP_QUOTEMETA = 133, - OP_RV2AV = 134, - OP_AELEMFAST = 135, - OP_AELEMFAST_LEX = 136, - OP_AELEM = 137, - OP_ASLICE = 138, - OP_KVASLICE = 139, - OP_AEACH = 140, - OP_AVALUES = 141, - OP_AKEYS = 142, - OP_EACH = 143, - OP_VALUES = 144, - OP_KEYS = 145, - OP_DELETE = 146, - OP_EXISTS = 147, - OP_RV2HV = 148, - OP_HELEM = 149, - OP_HSLICE = 150, - OP_KVHSLICE = 151, - OP_MULTIDEREF = 152, - OP_UNPACK = 153, - OP_PACK = 154, - OP_SPLIT = 155, - OP_JOIN = 156, - OP_LIST = 157, - OP_LSLICE = 158, - OP_ANONLIST = 159, - OP_ANONHASH = 160, - OP_SPLICE = 161, - OP_PUSH = 162, - OP_POP = 163, - OP_SHIFT = 164, - OP_UNSHIFT = 165, - OP_SORT = 166, - OP_REVERSE = 167, - OP_GREPSTART = 168, - OP_GREPWHILE = 169, - OP_MAPSTART = 170, - OP_MAPWHILE = 171, - OP_RANGE = 172, - OP_FLIP = 173, - OP_FLOP = 174, - OP_AND = 175, - OP_OR = 176, - OP_XOR = 177, - OP_DOR = 178, - OP_COND_EXPR = 179, - OP_ANDASSIGN = 180, - OP_ORASSIGN = 181, - OP_DORASSIGN = 182, - OP_ENTERSUB = 183, - OP_LEAVESUB = 184, - OP_LEAVESUBLV = 185, - OP_ARGCHECK = 186, - OP_ARGELEM = 187, - OP_ARGDEFELEM = 188, - OP_CALLER = 189, - OP_WARN = 190, - OP_DIE = 191, - OP_RESET = 192, - OP_LINESEQ = 193, - OP_NEXTSTATE = 194, - OP_DBSTATE = 195, - OP_UNSTACK = 196, - OP_ENTER = 197, - OP_LEAVE = 198, - OP_SCOPE = 199, - OP_ENTERITER = 200, - OP_ITER = 201, - OP_ENTERLOOP = 202, - OP_LEAVELOOP = 203, - OP_RETURN = 204, - OP_LAST = 205, - OP_NEXT = 206, - OP_REDO = 207, - OP_DUMP = 208, - OP_GOTO = 209, - OP_EXIT = 210, - OP_METHOD = 211, - OP_METHOD_NAMED = 212, - OP_METHOD_SUPER = 213, - OP_METHOD_REDIR = 214, - OP_METHOD_REDIR_SUPER = 215, - OP_ENTERGIVEN = 216, - OP_LEAVEGIVEN = 217, - OP_ENTERWHEN = 218, - OP_LEAVEWHEN = 219, - OP_BREAK = 220, - OP_CONTINUE = 221, - OP_OPEN = 222, - OP_CLOSE = 223, - OP_PIPE_OP = 224, - OP_FILENO = 225, - OP_UMASK = 226, - OP_BINMODE = 227, - OP_TIE = 228, - OP_UNTIE = 229, - OP_TIED = 230, - OP_DBMOPEN = 231, - OP_DBMCLOSE = 232, - OP_SSELECT = 233, - OP_SELECT = 234, - OP_GETC = 235, - OP_READ = 236, - OP_ENTERWRITE = 237, - OP_LEAVEWRITE = 238, - OP_PRTF = 239, - OP_PRINT = 240, - OP_SAY = 241, - OP_SYSOPEN = 242, - OP_SYSSEEK = 243, - OP_SYSREAD = 244, - OP_SYSWRITE = 245, - OP_EOF = 246, - OP_TELL = 247, - OP_SEEK = 248, - OP_TRUNCATE = 249, - OP_FCNTL = 250, - OP_IOCTL = 251, - OP_FLOCK = 252, - OP_SEND = 253, - OP_RECV = 254, - OP_SOCKET = 255, - OP_SOCKPAIR = 256, - OP_BIND = 257, - OP_CONNECT = 258, - OP_LISTEN = 259, - OP_ACCEPT = 260, - OP_SHUTDOWN = 261, - OP_GSOCKOPT = 262, - OP_SSOCKOPT = 263, - OP_GETSOCKNAME = 264, - OP_GETPEERNAME = 265, - OP_LSTAT = 266, - OP_STAT = 267, - OP_FTRREAD = 268, - OP_FTRWRITE = 269, - OP_FTREXEC = 270, - OP_FTEREAD = 271, - OP_FTEWRITE = 272, - OP_FTEEXEC = 273, - OP_FTIS = 274, - OP_FTSIZE = 275, - OP_FTMTIME = 276, - OP_FTATIME = 277, - OP_FTCTIME = 278, - OP_FTROWNED = 279, - OP_FTEOWNED = 280, - OP_FTZERO = 281, - OP_FTSOCK = 282, - OP_FTCHR = 283, - OP_FTBLK = 284, - OP_FTFILE = 285, - OP_FTDIR = 286, - OP_FTPIPE = 287, - OP_FTSUID = 288, - OP_FTSGID = 289, - OP_FTSVTX = 290, - OP_FTLINK = 291, - OP_FTTTY = 292, - OP_FTTEXT = 293, - OP_FTBINARY = 294, - OP_CHDIR = 295, - OP_CHOWN = 296, - OP_CHROOT = 297, - OP_UNLINK = 298, - OP_CHMOD = 299, - OP_UTIME = 300, - OP_RENAME = 301, - OP_LINK = 302, - OP_SYMLINK = 303, - OP_READLINK = 304, - OP_MKDIR = 305, - OP_RMDIR = 306, - OP_OPEN_DIR = 307, - OP_READDIR = 308, - OP_TELLDIR = 309, - OP_SEEKDIR = 310, - OP_REWINDDIR = 311, - OP_CLOSEDIR = 312, - OP_FORK = 313, - OP_WAIT = 314, - OP_WAITPID = 315, - OP_SYSTEM = 316, - OP_EXEC = 317, - OP_KILL = 318, - OP_GETPPID = 319, - OP_GETPGRP = 320, - OP_SETPGRP = 321, - OP_GETPRIORITY = 322, - OP_SETPRIORITY = 323, - OP_TIME = 324, - OP_TMS = 325, - OP_LOCALTIME = 326, - OP_GMTIME = 327, - OP_ALARM = 328, - OP_SLEEP = 329, - OP_SHMGET = 330, - OP_SHMCTL = 331, - OP_SHMREAD = 332, - OP_SHMWRITE = 333, - OP_MSGGET = 334, - OP_MSGCTL = 335, - OP_MSGSND = 336, - OP_MSGRCV = 337, - OP_SEMOP = 338, - OP_SEMGET = 339, - OP_SEMCTL = 340, - OP_REQUIRE = 341, - OP_DOFILE = 342, - OP_HINTSEVAL = 343, - OP_ENTEREVAL = 344, - OP_LEAVEEVAL = 345, - OP_ENTERTRY = 346, - OP_LEAVETRY = 347, - OP_GHBYNAME = 348, - OP_GHBYADDR = 349, - OP_GHOSTENT = 350, - OP_GNBYNAME = 351, - OP_GNBYADDR = 352, - OP_GNETENT = 353, - OP_GPBYNAME = 354, - OP_GPBYNUMBER = 355, - OP_GPROTOENT = 356, - OP_GSBYNAME = 357, - OP_GSBYPORT = 358, - OP_GSERVENT = 359, - OP_SHOSTENT = 360, - OP_SNETENT = 361, - OP_SPROTOENT = 362, - OP_SSERVENT = 363, - OP_EHOSTENT = 364, - OP_ENETENT = 365, - OP_EPROTOENT = 366, - OP_ESERVENT = 367, - OP_GPWNAM = 368, - OP_GPWUID = 369, - OP_GPWENT = 370, - OP_SPWENT = 371, - OP_EPWENT = 372, - OP_GGRNAM = 373, - OP_GGRGID = 374, - OP_GGRENT = 375, - OP_SGRENT = 376, - OP_EGRENT = 377, - OP_GETLOGIN = 378, - OP_SYSCALL = 379, - OP_LOCK = 380, - OP_ONCE = 381, - OP_CUSTOM = 382, - OP_COREARGS = 383, - OP_AVHVSWITCH = 384, - OP_RUNCV = 385, - OP_FC = 386, - OP_PADCV = 387, - OP_INTROCV = 388, - OP_CLONECV = 389, - OP_PADRANGE = 390, - OP_REFASSIGN = 391, - OP_LVREF = 392, - OP_LVREFSLICE = 393, - OP_LVAVREF = 394, - OP_ANONCONST = 395, + OP_MULTICONCAT = 67, + OP_STRINGIFY = 68, + OP_LEFT_SHIFT = 69, + OP_RIGHT_SHIFT = 70, + OP_LT = 71, + OP_I_LT = 72, + OP_GT = 73, + OP_I_GT = 74, + OP_LE = 75, + OP_I_LE = 76, + OP_GE = 77, + OP_I_GE = 78, + OP_EQ = 79, + OP_I_EQ = 80, + OP_NE = 81, + OP_I_NE = 82, + OP_NCMP = 83, + OP_I_NCMP = 84, + OP_SLT = 85, + OP_SGT = 86, + OP_SLE = 87, + OP_SGE = 88, + OP_SEQ = 89, + OP_SNE = 90, + OP_SCMP = 91, + OP_BIT_AND = 92, + OP_BIT_XOR = 93, + OP_BIT_OR = 94, + OP_NBIT_AND = 95, + OP_NBIT_XOR = 96, + OP_NBIT_OR = 97, + OP_SBIT_AND = 98, + OP_SBIT_XOR = 99, + OP_SBIT_OR = 100, + OP_NEGATE = 101, + OP_I_NEGATE = 102, + OP_NOT = 103, + OP_COMPLEMENT = 104, + OP_NCOMPLEMENT = 105, + OP_SCOMPLEMENT = 106, + OP_SMARTMATCH = 107, + OP_ATAN2 = 108, + OP_SIN = 109, + OP_COS = 110, + OP_RAND = 111, + OP_SRAND = 112, + OP_EXP = 113, + OP_LOG = 114, + OP_SQRT = 115, + OP_INT = 116, + OP_HEX = 117, + OP_OCT = 118, + OP_ABS = 119, + OP_LENGTH = 120, + OP_SUBSTR = 121, + OP_VEC = 122, + OP_INDEX = 123, + OP_RINDEX = 124, + OP_SPRINTF = 125, + OP_FORMLINE = 126, + OP_ORD = 127, + OP_CHR = 128, + OP_CRYPT = 129, + OP_UCFIRST = 130, + OP_LCFIRST = 131, + OP_UC = 132, + OP_LC = 133, + OP_QUOTEMETA = 134, + OP_RV2AV = 135, + OP_AELEMFAST = 136, + OP_AELEMFAST_LEX = 137, + OP_AELEM = 138, + OP_ASLICE = 139, + OP_KVASLICE = 140, + OP_AEACH = 141, + OP_AVALUES = 142, + OP_AKEYS = 143, + OP_EACH = 144, + OP_VALUES = 145, + OP_KEYS = 146, + OP_DELETE = 147, + OP_EXISTS = 148, + OP_RV2HV = 149, + OP_HELEM = 150, + OP_HSLICE = 151, + OP_KVHSLICE = 152, + OP_MULTIDEREF = 153, + OP_UNPACK = 154, + OP_PACK = 155, + OP_SPLIT = 156, + OP_JOIN = 157, + OP_LIST = 158, + OP_LSLICE = 159, + OP_ANONLIST = 160, + OP_ANONHASH = 161, + OP_SPLICE = 162, + OP_PUSH = 163, + OP_POP = 164, + OP_SHIFT = 165, + OP_UNSHIFT = 166, + OP_SORT = 167, + OP_REVERSE = 168, + OP_GREPSTART = 169, + OP_GREPWHILE = 170, + OP_MAPSTART = 171, + OP_MAPWHILE = 172, + OP_RANGE = 173, + OP_FLIP = 174, + OP_FLOP = 175, + OP_AND = 176, + OP_OR = 177, + OP_XOR = 178, + OP_DOR = 179, + OP_COND_EXPR = 180, + OP_ANDASSIGN = 181, + OP_ORASSIGN = 182, + OP_DORASSIGN = 183, + OP_ENTERSUB = 184, + OP_LEAVESUB = 185, + OP_LEAVESUBLV = 186, + OP_ARGCHECK = 187, + OP_ARGELEM = 188, + OP_ARGDEFELEM = 189, + OP_CALLER = 190, + OP_WARN = 191, + OP_DIE = 192, + OP_RESET = 193, + OP_LINESEQ = 194, + OP_NEXTSTATE = 195, + OP_DBSTATE = 196, + OP_UNSTACK = 197, + OP_ENTER = 198, + OP_LEAVE = 199, + OP_SCOPE = 200, + OP_ENTERITER = 201, + OP_ITER = 202, + OP_ENTERLOOP = 203, + OP_LEAVELOOP = 204, + OP_RETURN = 205, + OP_LAST = 206, + OP_NEXT = 207, + OP_REDO = 208, + OP_DUMP = 209, + OP_GOTO = 210, + OP_EXIT = 211, + OP_METHOD = 212, + OP_METHOD_NAMED = 213, + OP_METHOD_SUPER = 214, + OP_METHOD_REDIR = 215, + OP_METHOD_REDIR_SUPER = 216, + OP_ENTERGIVEN = 217, + OP_LEAVEGIVEN = 218, + OP_ENTERWHEN = 219, + OP_LEAVEWHEN = 220, + OP_BREAK = 221, + OP_CONTINUE = 222, + OP_OPEN = 223, + OP_CLOSE = 224, + OP_PIPE_OP = 225, + OP_FILENO = 226, + OP_UMASK = 227, + OP_BINMODE = 228, + OP_TIE = 229, + OP_UNTIE = 230, + OP_TIED = 231, + OP_DBMOPEN = 232, + OP_DBMCLOSE = 233, + OP_SSELECT = 234, + OP_SELECT = 235, + OP_GETC = 236, + OP_READ = 237, + OP_ENTERWRITE = 238, + OP_LEAVEWRITE = 239, + OP_PRTF = 240, + OP_PRINT = 241, + OP_SAY = 242, + OP_SYSOPEN = 243, + OP_SYSSEEK = 244, + OP_SYSREAD = 245, + OP_SYSWRITE = 246, + OP_EOF = 247, + OP_TELL = 248, + OP_SEEK = 249, + OP_TRUNCATE = 250, + OP_FCNTL = 251, + OP_IOCTL = 252, + OP_FLOCK = 253, + OP_SEND = 254, + OP_RECV = 255, + OP_SOCKET = 256, + OP_SOCKPAIR = 257, + OP_BIND = 258, + OP_CONNECT = 259, + OP_LISTEN = 260, + OP_ACCEPT = 261, + OP_SHUTDOWN = 262, + OP_GSOCKOPT = 263, + OP_SSOCKOPT = 264, + OP_GETSOCKNAME = 265, + OP_GETPEERNAME = 266, + OP_LSTAT = 267, + OP_STAT = 268, + OP_FTRREAD = 269, + OP_FTRWRITE = 270, + OP_FTREXEC = 271, + OP_FTEREAD = 272, + OP_FTEWRITE = 273, + OP_FTEEXEC = 274, + OP_FTIS = 275, + OP_FTSIZE = 276, + OP_FTMTIME = 277, + OP_FTATIME = 278, + OP_FTCTIME = 279, + OP_FTROWNED = 280, + OP_FTEOWNED = 281, + OP_FTZERO = 282, + OP_FTSOCK = 283, + OP_FTCHR = 284, + OP_FTBLK = 285, + OP_FTFILE = 286, + OP_FTDIR = 287, + OP_FTPIPE = 288, + OP_FTSUID = 289, + OP_FTSGID = 290, + OP_FTSVTX = 291, + OP_FTLINK = 292, + OP_FTTTY = 293, + OP_FTTEXT = 294, + OP_FTBINARY = 295, + OP_CHDIR = 296, + OP_CHOWN = 297, + OP_CHROOT = 298, + OP_UNLINK = 299, + OP_CHMOD = 300, + OP_UTIME = 301, + OP_RENAME = 302, + OP_LINK = 303, + OP_SYMLINK = 304, + OP_READLINK = 305, + OP_MKDIR = 306, + OP_RMDIR = 307, + OP_OPEN_DIR = 308, + OP_READDIR = 309, + OP_TELLDIR = 310, + OP_SEEKDIR = 311, + OP_REWINDDIR = 312, + OP_CLOSEDIR = 313, + OP_FORK = 314, + OP_WAIT = 315, + OP_WAITPID = 316, + OP_SYSTEM = 317, + OP_EXEC = 318, + OP_KILL = 319, + OP_GETPPID = 320, + OP_GETPGRP = 321, + OP_SETPGRP = 322, + OP_GETPRIORITY = 323, + OP_SETPRIORITY = 324, + OP_TIME = 325, + OP_TMS = 326, + OP_LOCALTIME = 327, + OP_GMTIME = 328, + OP_ALARM = 329, + OP_SLEEP = 330, + OP_SHMGET = 331, + OP_SHMCTL = 332, + OP_SHMREAD = 333, + OP_SHMWRITE = 334, + OP_MSGGET = 335, + OP_MSGCTL = 336, + OP_MSGSND = 337, + OP_MSGRCV = 338, + OP_SEMOP = 339, + OP_SEMGET = 340, + OP_SEMCTL = 341, + OP_REQUIRE = 342, + OP_DOFILE = 343, + OP_HINTSEVAL = 344, + OP_ENTEREVAL = 345, + OP_LEAVEEVAL = 346, + OP_ENTERTRY = 347, + OP_LEAVETRY = 348, + OP_GHBYNAME = 349, + OP_GHBYADDR = 350, + OP_GHOSTENT = 351, + OP_GNBYNAME = 352, + OP_GNBYADDR = 353, + OP_GNETENT = 354, + OP_GPBYNAME = 355, + OP_GPBYNUMBER = 356, + OP_GPROTOENT = 357, + OP_GSBYNAME = 358, + OP_GSBYPORT = 359, + OP_GSERVENT = 360, + OP_SHOSTENT = 361, + OP_SNETENT = 362, + OP_SPROTOENT = 363, + OP_SSERVENT = 364, + OP_EHOSTENT = 365, + OP_ENETENT = 366, + OP_EPROTOENT = 367, + OP_ESERVENT = 368, + OP_GPWNAM = 369, + OP_GPWUID = 370, + OP_GPWENT = 371, + OP_SPWENT = 372, + OP_EPWENT = 373, + OP_GGRNAM = 374, + OP_GGRGID = 375, + OP_GGRENT = 376, + OP_SGRENT = 377, + OP_EGRENT = 378, + OP_GETLOGIN = 379, + OP_SYSCALL = 380, + OP_LOCK = 381, + OP_ONCE = 382, + OP_CUSTOM = 383, + OP_COREARGS = 384, + OP_AVHVSWITCH = 385, + OP_RUNCV = 386, + OP_FC = 387, + OP_PADCV = 388, + OP_INTROCV = 389, + OP_CLONECV = 390, + OP_PADRANGE = 391, + OP_REFASSIGN = 392, + OP_LVREF = 393, + OP_LVREFSLICE = 394, + OP_LVAVREF = 395, + OP_ANONCONST = 396, OP_max } opcode; -#define MAXO 396 +#define MAXO 397 #define OP_FREED MAXO /* the OP_IS_* macros are optimized to a simple range check because @@ -863,6 +863,26 @@ EXTERN_C int usleep(unsigned int); #endif /* PERL_CORE */ +/* Maximum number of args that may be passed to an OP_MULTICONCAT op. + * It determines the size of local arrays in S_maybe_multiconcat() and + * pp_multiconcat(). + */ +#define PERL_MULTICONCAT_MAXARG 64 + +/* The indexes of fields of a multiconcat aux struct. + * The fixed fields are followed by nargs+1 const segment lengths, + * and if utf8 and non-utf8 differ, a second nargs+1 set for utf8. + */ + +#define PERL_MULTICONCAT_IX_NARGS 0 /* number of arguments */ +#define PERL_MULTICONCAT_IX_PLAIN_PV 1 /* non-utf8 constant string */ +#define PERL_MULTICONCAT_IX_PLAIN_LEN 2 /* non-utf8 constant string length */ +#define PERL_MULTICONCAT_IX_UTF8_PV 3 /* utf8 constant string */ +#define PERL_MULTICONCAT_IX_UTF8_LEN 4 /* utf8 constant string length */ +#define PERL_MULTICONCAT_IX_LENGTHS 5 /* first of nargs+1 const segment lens */ +#define PERL_MULTICONCAT_HEADER_SIZE 5 /* The number of fields of a + multiconcat header */ + /* We no longer default to creating a new SV for GvSV. Do this before embed. */ #ifndef PERL_CREATE_GVSV @@ -320,6 +320,809 @@ PP(pp_concat) } } + +/* pp_multiconcat() + +Concatenate one or more args, possibly interleaved with constant string +segments. The result may be assigned to, or appended to, a variable or +expression. + +Several op_flags and/or op_private bits indicate what the target is, and +whether it's appended to. Valid permutations are: + + - (PADTMP) = (A.B.C....) + OPpTARGET_MY $lex = (A.B.C....) + OPpTARGET_MY,OPpLVAL_INTRO my $lex = (A.B.C....) + OPpTARGET_MY,OPpMULTICONCAT_APPEND $lex .= (A.B.C....) + OPf_STACKED expr = (A.B.C....) + OPf_STACKED,OPpMULTICONCAT_APPEND expr .= (A.B.C....) + +Other combinations like (A.B).(C.D) are not optimised into a multiconcat +op, as it's too hard to get the correct ordering of ties, overload etc. + +In addition: + + OPpMULTICONCAT_FAKE: not a real concat, instead an optimised + sprintf "...%s...". Don't call '.' + overloading: only use '""' overloading. + + OPpMULTICONCAT_STRINGIFY: (for Deparse's benefit) the RHS was of the + form "...$a...$b..." rather than + "..." . $a . "..." . $b . "..." + +An OP_MULTICONCAT is of type UNOP_AUX. The fixed slots of the aux array are +defined with PERL_MULTICONCAT_IX_FOO constants, where: + + + FOO index description + -------- ----- ---------------------------------- + NARGS 0 number of arguments + PLAIN_PV 1 non-utf8 constant string + PLAIN_LEN 2 non-utf8 constant string length + UTF8_PV 3 utf8 constant string + UTF8_LEN 4 utf8 constant string length + LENGTHS 5 first of nargs+1 const segment lengths + +The idea is that a general string concatenation will have a fixed (known +at compile time) number of variable args, interspersed with constant +strings, e.g. "a=$a b=$b\n" + +All the constant string segments "a=", " b=" and "\n" are stored as a +single string "a= b=\n", pointed to from the PLAIN_PV/UTF8_PV slot, along +with a series of segment lengths: e.g. 2,3,1. In the case where the +constant string is plain but has a different utf8 representation, both +variants are stored, and two sets of (nargs+1) segments lengths are stored +in the slots beginning at PERL_MULTICONCAT_IX_LENGTHS. + +A segment length of -1 indicates that there is no constant string at that +point; this distinguishes between e.g. ($a . $b) and ($a . "" . $b), which +have differing overloading behaviour. + +*/ + +PP(pp_multiconcat) +{ + dSP; + SV *targ; /* The SV to be assigned or appended to */ + SV *dsv; /* the SV to concat args to (often == targ) */ + char *dsv_pv; /* where within SvPVX(dsv) we're writing to */ + STRLEN targ_len; /* SvCUR(targ) */ + SV **toparg; /* the highest arg position on the stack */ + UNOP_AUX_item *aux; /* PL_op->op_aux buffer */ + UNOP_AUX_item *const_lens; /* the segment length array part of aux */ + const char *const_pv; /* the current segment of the const string buf */ + UV nargs; /* how many args were expected */ + UV stack_adj; /* how much to adjust SP on return */ + STRLEN grow; /* final size of destination string (dsv) */ + UV targ_count; /* how many times targ has appeared on the RHS */ + bool is_append; /* OPpMULTICONCAT_APPEND flag is set */ + bool slow_concat; /* args too complex for quick concat */ + U32 dst_utf8; /* the result will be utf8 (indicate this with + SVf_UTF8 in a U32, rather than using bool, + for ease of testing and setting) */ + /* for each arg, holds the result of an SvPV() call */ + struct multiconcat_svpv { + char *pv; + SSize_t len; + } + *targ_chain, /* chain of slots where targ has appeared on RHS */ + *svpv_p, /* ptr for looping through svpv_buf */ + *svpv_base, /* first slot (may be greater than svpv_buf), */ + *svpv_end, /* and slot after highest result so far, of: */ + svpv_buf[PERL_MULTICONCAT_MAXARG]; /* buf for storing SvPV() results */ + + aux = cUNOP_AUXx(PL_op)->op_aux; + stack_adj = nargs = aux[PERL_MULTICONCAT_IX_NARGS].uv; + is_append = cBOOL(PL_op->op_private & OPpMULTICONCAT_APPEND); + + /* get targ from the stack or pad */ + + if (PL_op->op_flags & OPf_STACKED) { + if (is_append) { + /* for 'expr .= ...', expr is the bottom item on the stack */ + targ = SP[-nargs]; + stack_adj++; + } + else + /* for 'expr = ...', expr is the top item on the stack */ + targ = POPs; + } + else { + SV **svp = &(PAD_SVl(PL_op->op_targ)); + targ = *svp; + if (PL_op->op_private & OPpLVAL_INTRO) { + assert(PL_op->op_private & OPpTARGET_MY); + save_clearsv(svp); + } + if (!nargs) + /* $lex .= "const" doesn't cause anything to be pushed */ + EXTEND(SP,1); + } + + toparg = SP; + SP -= (nargs - 1); + dsv = targ; /* Set the destination for all concats. This is + initially targ; later on, dsv may be switched + to point to a TEMP SV if overloading is + encountered. */ + grow = 1; /* allow for '\0' at minimum */ + targ_count = 0; + targ_chain = NULL; + targ_len = 0; + svpv_end = svpv_buf; + /* only utf8 variants of the const strings? */ + dst_utf8 = aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv ? 0 : SVf_UTF8; + + + /* -------------------------------------------------------------- + * Phase 1: + * + * stringify (i.e. SvPV()) every arg and store the resultant pv/len/utf8 + * triplets in svpv_buf[]. Also increment 'grow' by the args' lengths. + * + * utf8 is indicated by storing a negative length. + * + * Where an arg is actually targ, the stringification is deferred: + * the length is set to 0, and the slot is added to targ_chain. + * + * If an overloaded arg is found, the loop is abandoned at that point, + * and dsv is set to an SvTEMP SV where the results-so-far will be + * accumulated. + */ + + for (; SP <= toparg; SP++, svpv_end++) { + bool simple_flags; + U32 utf8; + STRLEN len; + SV *sv; + + assert(svpv_end - svpv_buf < PERL_MULTICONCAT_MAXARG); + + sv = *SP; + simple_flags = (SvFLAGS(sv) & (SVs_GMG|SVf_ROK|SVf_POK)) == SVf_POK; + + /* this if/else chain is arranged so that common/simple cases + * take few conditionals */ + + if (LIKELY(simple_flags && (sv != targ))) { + /* common case: sv is a simple PV and not the targ */ + svpv_end->pv = SvPVX(sv); + len = SvCUR(sv); + } + else if (simple_flags) { + /* sv is targ (but can't be magic or overloaded). + * Delay storing PV pointer; instead, add slot to targ_chain + * so it can be populated later, after targ has been grown and + * we know its final SvPVX() address. + */ + targ_on_rhs: + svpv_end->len = 0; /* zerojng here means we can skip + updating later if targ_len == 0 */ + svpv_end->pv = (char*)targ_chain; + targ_chain = svpv_end; + targ_count++; + continue; + } + else { + if (UNLIKELY(SvFLAGS(sv) & (SVs_GMG|SVf_ROK))) { + /* its got magic, is tied, and/or is overloaded */ + SvGETMAGIC(sv); + + if (UNLIKELY(SvAMAGIC(sv)) + && !(PL_op->op_private & OPpMULTICONCAT_FAKE)) + { + /* One of the RHS args is overloaded. Abandon stringifying + * the args at this point, then in the concat loop later + * on, concat the plain args stringified so far into a + * TEMP SV. At the end of this function the remaining + * args (including the current one) will be handled + * specially, using overload calls. + * FAKE implies an optimised sprintf which doesn't use + * concat overloading, only "" overloading. + */ + setup_overload: + dsv = newSVpvn_flags("", 0, SVs_TEMP); + + if (targ_chain) { + /* Get the string value of targ and populate any + * RHS slots which use it */ + char *pv = SvPV_nomg(targ, len); + dst_utf8 |= (SvFLAGS(targ) & SVf_UTF8); + grow += len * targ_count; + do { + struct multiconcat_svpv *p = targ_chain; + targ_chain = (struct multiconcat_svpv *)(p->pv); + p->pv = pv; + p->len = len; + } while (targ_chain); + } + else if (is_append) + SvGETMAGIC(targ); + + goto phase3; + } + + if (SvFLAGS(sv) & SVs_RMG) { + /* probably tied; copy it to guarantee separate values + * each time it's used, e.g. "-$tied-$tied-$tied-", + * since FETCH() isn't necessarily idempotent */ + SV *nsv = newSV(0); + sv_setsv_flags(nsv, sv, SV_NOSTEAL); + sv_2mortal(nsv); + if ( sv == targ + && is_append + && nargs == 1 + /* no const string segments */ + && aux[PERL_MULTICONCAT_IX_LENGTHS].size == -1 + && aux[PERL_MULTICONCAT_IX_LENGTHS+1].size == -1) + { + /* special-case $tied .= $tied. + * + * For something like + * sub FETCH { $i++ } + * then + * $tied .= $tied . $tied . $tied; + * will STORE "4123" + * while + * $tied .= $tied + * will STORE "12" + * + * i.e. for a single mutator concat, the LHS is + * retrieved first; in all other cases it is + * retrieved last. Whether this is sane behaviour + * is open to debate; but for now, multiconcat (as + * it is an optimisation) tries to reproduce + * existing behaviour. + */ + sv_catsv(nsv, sv); + sv_setsv(sv,nsv); + SP++; + goto phase7; /* just return targ as-is */ + } + + sv = nsv; + } + } + + if (sv == targ) { + /* must warn for each RH usage of targ, except that + * we will later get one warning when doing + * SvPV_force(targ), *except* on '.=' */ + if ( !SvOK(sv) + && (targ_chain || is_append) + && ckWARN(WARN_UNINITIALIZED) + ) + report_uninit(sv); + goto targ_on_rhs; + } + + /* stringify general SV */ + svpv_end->pv = sv_2pv_flags(sv, &len, 0); + } + + utf8 = (SvFLAGS(sv) & SVf_UTF8); + dst_utf8 |= utf8; + ASSUME(len < SSize_t_MAX); + svpv_end->len = utf8 ? -(SSize_t)len : (SSize_t)len; + grow += len; + } + + /* -------------------------------------------------------------- + * Phase 2: + * + * Stringify targ: + * + * if targ appears on the RHS or is appended to, force stringify it; + * otherwise set it to "". Then set targ_len. + */ + + if (is_append) { + if (UNLIKELY(SvFLAGS(targ) & (SVs_GMG|SVf_ROK))) { + SvGETMAGIC(targ); /* must do before SvAMAGIC() check */ + if (UNLIKELY(SvAMAGIC(targ))) { + /* $overloaded .= ....; + * accumulate RHS in a temp SV rather than targ, + * then append tmp to targ at the end using overload + */ + assert(!targ_chain); + dsv = newSVpvn_flags("", 0, SVs_TEMP); + goto phase3; + } + } + + if (SvOK(targ)) { + U32 targ_utf8; + stringify_targ: + SvPV_force_nomg_nolen(targ); + targ_utf8 = SvFLAGS(targ) & SVf_UTF8; + if (UNLIKELY(dst_utf8 & ~targ_utf8)) { + if (LIKELY(!IN_BYTES)) + sv_utf8_upgrade_nomg(targ); + } + else + dst_utf8 |= targ_utf8; + + targ_len = SvCUR(targ); + grow += targ_len * (targ_count + is_append); + goto phase3; + } + } + else if (UNLIKELY(SvTYPE(targ) >= SVt_REGEXP)) { + /* Assigning to some weird LHS type. Don't force the LHS to be an + * empty string; instead, do things 'long hand' by using the + * overload code path, which concats to a TEMP sv and does + * sv_catsv() calls rather than COPY()s. This ensures that even + * bizarre code like this doesn't break or crash: + * *F = *F . *F. + * (which makes the 'F' typeglob an alias to the + * '*main::F*main::F' typeglob). + */ + goto setup_overload; + } + else if (targ_chain) { + /* targ was found on RHS. + * We don't need the SvGETMAGIC() call and SvAMAGIC() test as + * both were already done earlier in the SvPV() loop; other + * than that we can share the same code with the append + * branch below. + * Note that this goto jumps directly into the SvOK() branch + * even if targ isn't SvOK(), to force an 'uninitialised' + * warning; e.g. + * $undef .= .... targ only on LHS: don't warn + * $undef .= $undef .... targ on RHS too: warn + */ + assert(!SvAMAGIC(targ)); + goto stringify_targ; + } + + + /* unrolled SvPVCLEAR() - mostly: no need to grow or set SvCUR() to 0; + * those will be done later. */ + assert(targ == dsv); + SV_CHECK_THINKFIRST_COW_DROP(targ); + SvUPGRADE(targ, SVt_PV); + SvFLAGS(targ) &= ~(SVf_OK|SVf_IVisUV|SVf_UTF8); + SvFLAGS(targ) |= (SVf_POK|SVp_POK|dst_utf8); + + phase3: + + /* -------------------------------------------------------------- + * Phase 3: + * + * UTF-8 tweaks and grow dsv: + * + * Now that we know the length and utf8-ness of both the targ and + * args, grow dsv to the size needed to accumulate all the args, based + * on whether targ appears on the RHS, whether we're appending, and + * whether any non-utf8 args expand in size if converted to utf8. + * + * For the latter, if dst_utf8 we scan non-utf8 args looking for + * variant chars, and adjust the svpv->len value of those args to the + * utf8 size and negate it to flag them. At the same time we un-negate + * the lens of any utf8 args since after this phase we no longer care + * whether an arg is utf8 or not. + * + * Finally, initialise const_lens and const_pv based on utf8ness. + * Note that there are 3 permutations: + * + * * If the constant string is invariant whether utf8 or not (e.g. "abc"), + * then aux[PERL_MULTICONCAT_IX_PLAIN_PV/LEN] are the same as + * aux[PERL_MULTICONCAT_IX_UTF8_PV/LEN] and there is one set of + * segment lengths. + * + * * If the string is fully utf8, e.g. "\x{100}", then + * aux[PERL_MULTICONCAT_IX_PLAIN_PV/LEN] == (NULL,0) and there is + * one set of segment lengths. + * + * * If the string has different plain and utf8 representations + * (e.g. "\x80"), then then aux[PERL_MULTICONCAT_IX_PLAIN_PV/LEN]] + * holds the plain rep, while aux[PERL_MULTICONCAT_IX_UTF8_PV/LEN] + * holds the utf8 rep, and there are 2 sets of segment lengths, + * with the utf8 set following after the plain set. + * + * On entry to this section the (pv,len) pairs in svpv_buf have the + * following meanings: + * (pv, len) a plain string + * (pv, -len) a utf8 string + * (NULL, 0) left-most targ \ linked together R-to-L + * (next, 0) other targ / in targ_chain + */ + + /* turn off utf8 handling if 'use bytes' is in scope */ + if (UNLIKELY(dst_utf8 && IN_BYTES)) { + dst_utf8 = 0; + SvUTF8_off(dsv); + /* undo all the negative lengths which flag utf8-ness */ + for (svpv_p = svpv_buf; svpv_p < svpv_end; svpv_p++) { + SSize_t len = svpv_p->len; + if (len < 0) + svpv_p->len = -len; + } + } + + /* grow += total of lengths of constant string segments */ + { + SSize_t len; + len = aux[dst_utf8 ? PERL_MULTICONCAT_IX_UTF8_LEN + : PERL_MULTICONCAT_IX_PLAIN_LEN].size; + slow_concat = cBOOL(len); + grow += len; + } + + const_lens = aux + PERL_MULTICONCAT_IX_LENGTHS; + + if (dst_utf8) { + const_pv = aux[PERL_MULTICONCAT_IX_UTF8_PV].pv; + if ( aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv + && const_pv != aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv) + /* separate sets of lengths for plain and utf8 */ + const_lens += nargs + 1; + + /* If the result is utf8 but some of the args aren't, + * calculate how much extra growth is needed for all the chars + * which will expand to two utf8 bytes. + * Also, if the growth is non-zero, negate the length to indicate + * that this this is a variant string. Conversely, un-negate the + * length on utf8 args (which was only needed to flag non-utf8 + * args in this loop */ + for (svpv_p = svpv_buf; svpv_p < svpv_end; svpv_p++) { + char *p; + SSize_t len, l, extra; + + len = svpv_p->len; + if (len <= 0) { + svpv_p->len = -len; + continue; + } + + p = svpv_p->pv; + extra = 0; + l = len; + while (l--) + extra += !UTF8_IS_INVARIANT(*p++); + if (UNLIKELY(extra)) { + grow += extra; + /* -ve len indicates special handling */ + svpv_p->len = -(len + extra); + slow_concat = TRUE; + } + } + } + else + const_pv = aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv; + + /* unrolled SvGROW(), except don't check for SVf_IsCOW, which should + * already have been dropped */ + assert(!SvIsCOW(dsv)); + dsv_pv = (SvLEN(dsv) < (grow) ? sv_grow(dsv,grow) : SvPVX(dsv)); + + + /* -------------------------------------------------------------- + * Phase 4: + * + * Now that dsv (which is probably targ) has been grown, we know the + * final address of the targ PVX, if needed. Preserve / move targ + * contents if appending or if targ appears on RHS. + * + * Also update svpv_buf slots in targ_chain. + * + * Don't bother with any of this if the target length is zero: + * targ_len is set to zero unless we're appending or targ appears on + * RHS. And even if it is, we can optimise by skipping this chunk of + * code for zero targ_len. In the latter case, we don't need to update + * the slots in targ_chain with the (zero length) target string, since + * we set the len in such slots to 0 earlier, and since the Copy() is + * skipped on zero length, it doesn't matter what svpv_p->pv contains. + * + * On entry to this section the (pv,len) pairs in svpv_buf have the + * following meanings: + * (pv, len) a pure-plain or utf8 string + * (pv, -(len+extra)) a plain string which will expand by 'extra' + * bytes when converted to utf8 + * (NULL, 0) left-most targ \ linked together R-to-L + * (next, 0) other targ / in targ_chain + * + * On exit, the targ contents will have been moved to the + * earliest place they are needed (e.g. $x = "abc$x" will shift them + * 3 bytes, while $x .= ... will leave them at the beginning); + * and dst_pv will point to the location within SvPVX(dsv) where the + * next arg should be copied. + */ + + svpv_base = svpv_buf; + + if (targ_len) { + struct multiconcat_svpv *tc_stop; + char *targ_pv = dsv_pv; + + assert(targ == dsv); + assert(is_append || targ_count); + + if (is_append) { + dsv_pv += targ_len; + tc_stop = NULL; + } + else { + /* The targ appears on RHS, e.g. '$t = $a . $t . $t'. + * Move the current contents of targ to the first + * position where it's needed, and use that as the src buffer + * for any further uses (such as the second RHS $t above). + * In calculating the first position, we need to sum the + * lengths of all consts and args before that. + */ + + UNOP_AUX_item *lens = const_lens; + /* length of first const string segment */ + STRLEN offset = lens->size > 0 ? lens->size : 0; + + assert(targ_chain); + svpv_p = svpv_base; + + for (;;) { + SSize_t len; + if (!svpv_p->pv) + break; /* the first targ argument */ + /* add lengths of the next arg and const string segment */ + len = svpv_p->len; + if (len < 0) /* variant args have this */ + len = -len; + offset += (STRLEN)len; + len = (++lens)->size; + offset += (len >= 0) ? (STRLEN)len : 0; + if (!offset) { + /* all args and consts so far are empty; update + * the start position for the concat later */ + svpv_base++; + const_lens++; + } + svpv_p++; + assert(svpv_p < svpv_end); + } + + if (offset) { + targ_pv += offset; + Move(dsv_pv, targ_pv, targ_len, char); + /* a negative length implies don't Copy(), but do increment */ + svpv_p->len = -targ_len; + slow_concat = TRUE; + } + else { + /* skip the first targ copy */ + svpv_base++; + const_lens++; + dsv_pv += targ_len; + } + + /* Don't populate the first targ slot in the loop below; it's + * either not used because we advanced svpv_base beyond it, or + * we already stored the special -targ_len value in it + */ + tc_stop = svpv_p; + } + + /* populate slots in svpv_buf representing targ on RHS */ + while (targ_chain != tc_stop) { + struct multiconcat_svpv *p = targ_chain; + targ_chain = (struct multiconcat_svpv *)(p->pv); + p->pv = targ_pv; + p->len = (SSize_t)targ_len; + } + } + + + /* -------------------------------------------------------------- + * Phase 5: + * + * Append all the args in svpv_buf, plus the const strings, to dsv. + * + * On entry to this section the (pv,len) pairs in svpv_buf have the + * following meanings: + * (pv, len) a pure-plain or utf8 string (which may be targ) + * (pv, -(len+extra)) a plain string which will expand by 'extra' + * bytes when converted to utf8 + * (0, -len) left-most targ, whose content has already + * been copied. Just advance dsv_pv by len. + */ + + /* If there are no constant strings and no special case args + * (svpv_p->len < 0), use a simpler, more efficient concat loop + */ + if (!slow_concat) { + for (svpv_p = svpv_base; svpv_p < svpv_end; svpv_p++) { + SSize_t len = svpv_p->len; + if (!len) + continue; + Copy(svpv_p->pv, dsv_pv, len, char); + dsv_pv += len; + } + const_lens += (svpv_end - svpv_base + 1); + } + else { + /* Note that we iterate the loop nargs+1 times: to append nargs + * arguments and nargs+1 constant strings. For example, "-$a-$b-" + */ + svpv_p = svpv_base - 1; + + for (;;) { + SSize_t len = (const_lens++)->size; + + /* append next const string segment */ + if (len > 0) { + Copy(const_pv, dsv_pv, len, char); + dsv_pv += len; + const_pv += len; + } + + if (++svpv_p == svpv_end) + break; + + /* append next arg */ + len = svpv_p->len; + + if (LIKELY(len > 0)) { + Copy(svpv_p->pv, dsv_pv, len, char); + dsv_pv += len; + } + else if (UNLIKELY(len < 0)) { + /* negative length indicates two special cases */ + const char *p = svpv_p->pv; + len = -len; + if (UNLIKELY(p)) { + /* copy plain-but-variant pv to a utf8 targ */ + assert(dst_utf8); + while (len--) { + U8 c = (U8) *p++; + if (UTF8_IS_INVARIANT(c)) + *dsv_pv++ = c; + else { + *dsv_pv++ = UTF8_EIGHT_BIT_HI(c); + *dsv_pv++ = UTF8_EIGHT_BIT_LO(c); + len--; + } + } + } + else + /* arg is already-copied targ */ + dsv_pv += len; + } + + } + } + + *dsv_pv = '\0'; + SvCUR_set(dsv, dsv_pv - SvPVX(dsv)); + assert(grow >= SvCUR(dsv) + 1); + assert(SvLEN(dsv) >= SvCUR(dsv) + 1); + + /* -------------------------------------------------------------- + * Phase 6: + * + * Handle overloading. If an overloaded arg or targ was detected + * earlier, dsv will have been set to a new mortal, and any args and + * consts to the left of the first overloaded arg will have been + * accumulated to it. This section completes any further concatenation + * steps with overloading handled. + */ + + if (UNLIKELY(dsv != targ)) { + SV *res; + + SvFLAGS(dsv) |= dst_utf8; + + if (SP <= toparg) { + /* Stringifying the RHS was abandoned because *SP + * is overloaded. dsv contains all the concatted strings + * before *SP. Apply the rest of the args using overloading. + */ + SV *left, *right, *res; + int i; + bool getmg = FALSE; + SV *constsv = NULL; + /* number of args already concatted */ + STRLEN n = (nargs - 1) - (toparg - SP); + /* current arg is either the first + * or second value to be concatted + * (including constant strings), so would + * form part of the first concat */ + bool first_concat = ( n == 0 + || (n == 1 && const_lens[-2].size < 0 + && const_lens[-1].size < 0)); + int f_assign = first_concat ? 0 : AMGf_assign; + + left = dsv; + + for (; n < nargs; n++) { + /* loop twice, first applying the arg, then the const segment */ + for (i = 0; i < 2; i++) { + if (i) { + /* append next const string segment */ + STRLEN len = (STRLEN)((const_lens++)->size); + /* a length of -1 implies no constant string + * rather than a zero-length one, e.g. + * ($a . $b) versus ($a . "" . $b) + */ + if ((SSize_t)len < 0) + continue; + + /* set constsv to the next constant string segment */ + if (constsv) { + sv_setpvn(constsv, const_pv, len); + if (dst_utf8) + SvUTF8_on(constsv); + else + SvUTF8_off(constsv); + } + else + constsv = newSVpvn_flags(const_pv, len, + (dst_utf8 | SVs_TEMP)); + + right = constsv; + const_pv += len; + } + else { + /* append next arg */ + right = *SP++; + if (getmg) + SvGETMAGIC(right); + else + /* SvGETMAGIC already called on this SV just + * before we broke from the loop earlier */ + getmg = TRUE; + + if (first_concat && n == 0 && const_lens[-1].size < 0) { + /* nothing before the current arg; repeat the + * loop to get a second arg */ + left = right; + first_concat = FALSE; + continue; + } + } + + if ((SvAMAGIC(left) || SvAMAGIC(right)) + && (res = amagic_call(left, right, concat_amg, f_assign)) + ) + left = res; + else { + if (left != dsv) { + sv_setsv(dsv, left); + left = dsv; + } + sv_catsv_nomg(left, right); + } + f_assign = AMGf_assign; + } + } + dsv = left; + } + + /* assign/append RHS (dsv) to LHS (targ) */ + if (is_append) { + if ((SvAMAGIC(targ) || SvAMAGIC(dsv)) + && (res = amagic_call(targ, dsv, concat_amg, AMGf_assign)) + ) + sv_setsv(targ, res); + else + sv_catsv_nomg(targ, dsv); + } + else + sv_setsv(targ, dsv); + } + + /* -------------------------------------------------------------- + * Phase 7: + * + * return result + */ + + phase7: + + SP -= stack_adj; + SvTAINT(targ); + SETTARG; + RETURN; +} + + /* push the elements of av onto the stack. * Returns PL_op->op_next to allow tail-call optimisation of its callers */ diff --git a/pp_proto.h b/pp_proto.h index e931546799..407cbd14a3 100644 --- a/pp_proto.h +++ b/pp_proto.h @@ -162,6 +162,7 @@ PERL_CALLCONV OP *Perl_pp_method_redir_super(pTHX); PERL_CALLCONV OP *Perl_pp_method_super(pTHX); PERL_CALLCONV OP *Perl_pp_mkdir(pTHX); PERL_CALLCONV OP *Perl_pp_modulo(pTHX); +PERL_CALLCONV OP *Perl_pp_multiconcat(pTHX); PERL_CALLCONV OP *Perl_pp_multideref(pTHX); PERL_CALLCONV OP *Perl_pp_multiply(pTHX); PERL_CALLCONV OP *Perl_pp_nbit_and(pTHX); @@ -2067,6 +2067,9 @@ PERL_CALLCONV void Perl_mro_set_mro(pTHX_ struct mro_meta *const meta, SV *const PERL_CALLCONV SV* Perl_mro_set_private_data(pTHX_ struct mro_meta *const smeta, const struct mro_alg *const which, SV *const data); #define PERL_ARGS_ASSERT_MRO_SET_PRIVATE_DATA \ assert(smeta); assert(which); assert(data) +PERL_CALLCONV SV* Perl_multiconcat_stringify(pTHX_ const OP* o); +#define PERL_ARGS_ASSERT_MULTICONCAT_STRINGIFY \ + assert(o) PERL_CALLCONV SV* Perl_multideref_stringify(pTHX_ const OP* o, CV *cv); #define PERL_ARGS_ASSERT_MULTIDEREF_STRINGIFY \ assert(o) diff --git a/regen/op_private b/regen/op_private index 94e0009425..d9082e7709 100644 --- a/regen/op_private +++ b/regen/op_private @@ -300,7 +300,8 @@ for (qw(nextstate dbstate)) { addbits($_, 7 => qw(OPpLVAL_INTRO LVINTRO)) for qw(gvsv rv2sv rv2hv rv2gv rv2av aelem helem aslice split hslice delete padsv padav padhv enteriter entersub padrange - pushmark cond_expr refassign lvref lvrefslice lvavref multideref), + pushmark cond_expr refassign lvref lvrefslice lvavref multideref + multiconcat), 'list', # this gets set in my_attrs() for some reason ; @@ -807,6 +808,14 @@ for (qw(index rindex)) { addbits($_, 6 => qw(OPpINDEX_BOOLNEG NEG)); } +addbits('multiconcat', + # 7 OPpLVAL_INTRO + 6 => qw(OPpMULTICONCAT_APPEND APPEND), # $x .= .... + 5 => qw(OPpMULTICONCAT_FAKE FAKE), # sprintf() optimised to MC. + # 4 OPpTARGET_MY + 3 => qw(OPpMULTICONCAT_STRINGIFY STRINGIFY), # "$a$b...", (for Deparse.pm) +); + 1; diff --git a/regen/opcode.pl b/regen/opcode.pl index d1c0faf6dc..01178668cd 100755 --- a/regen/opcode.pl +++ b/regen/opcode.pl @@ -55,7 +55,7 @@ while (<OPS>) { $args = '' unless defined $args; warn qq[Description "$desc" duplicates $seen{$desc}\n] - if $seen{$desc} and $key !~ "transr|(?:intro|clone)cv|lvref"; + if $seen{$desc} and $key !~ "concat|transr|(?:intro|clone)cv|lvref"; die qq[Opcode "$key" duplicates $seen{$key}\n] if $seen{$key}; die qq[Opcode "freed" is reserved for the slab allocator\n] if $key eq 'freed'; diff --git a/regen/opcodes b/regen/opcodes index 096c6fe823..5aa8a94fa5 100644 --- a/regen/opcodes +++ b/regen/opcodes @@ -131,6 +131,7 @@ i_add integer addition (+) ck_null ifsT2 S S subtract subtraction (-) ck_null IfsT2 S S i_subtract integer subtraction (-) ck_null ifsT2 S S concat concatenation (.) or string ck_concat fsT2 S S +multiconcat concatenation (.) or string ck_null sT+ stringify string ck_stringify fsT@ S left_shift left bitshift (<<) ck_bitop fsT2 S S @@ -16996,6 +16996,9 @@ Perl_report_uninit(pTHX_ const SV *uninit_sv) if (PL_op) { desc = PL_op->op_type == OP_STRINGIFY && PL_op->op_folded ? "join or string" + : PL_op->op_type == OP_MULTICONCAT + && (PL_op->op_private & OPpMULTICONCAT_FAKE) + ? "sprintf" : OP_DESC(PL_op); if (uninit_sv && PL_curpad) { varname = find_uninit_var(PL_op, uninit_sv, 0, &desc); diff --git a/t/op/gmagic.t b/t/op/gmagic.t index 5b2845bab4..210e8e5cc9 100644 --- a/t/op/gmagic.t +++ b/t/op/gmagic.t @@ -192,6 +192,29 @@ ok($wgot == 0, 'a plain *foo causes no set-magic'); 'mortal magic var from do is copied'; } +# For better or worse, the order in which concat args are fetched varies +# depending on their number. In A .= B.C.D, they are fetched in the order +# BCDA, while for A .= B, the order is AB (so for a single concat, the LHS +# tied arg is FETCH()ed first). Make sure multiconcat preserves current +# behaviour. + +package Increment { + sub TIESCALAR { bless [0, 0] } + # returns a new value for each FETCH, until the first STORE + sub FETCH { my $x = $_[0][0]; $_[0][0]++ unless $_[0][1]; $x } + sub STORE { @{$_[0]} = ($_[1],1) } + + my $t; + tie $t, 'Increment'; + my $r; + $r = $t . $t; + ::is $r, '01', 'Increment 01'; + $r = "-$t-$t-$t-"; + ::is $r, '-2-3-4-', 'Increment 234'; + $t .= "-$t-$t-$t-"; + ::is $t, '8-5-6-7-', 'Increment 8567'; +} + done_testing(); # adapted from Tie::Counter by Abigail diff --git a/t/op/sprintf2.t b/t/op/sprintf2.t index 6d32004a9d..eb90c763d8 100644 --- a/t/op/sprintf2.t +++ b/t/op/sprintf2.t @@ -1039,5 +1039,81 @@ like sprintf("%p", 0+'NaN'), qr/^[0-9a-f]+$/, "%p and NaN"; } } +# multiconcat: only one scalar assign at most should be optimised away + +{ + local our $x1 = ''; + local our $x2 = ''; + my ($a, $b) = qw(abcd wxyz); + $x1 = ($x2 = sprintf("%s%s", $a, $b)); + is $x1, "abcdwxyz", "\$x1 = \$x2 = sprintf(): x1"; + is $x2, "abcdwxyz", "\$x1 = \$x2 = sprintf(): x2"; + + my $y1 = ''; + my $y2 = ''; + $y1 = ($y2 = sprintf("%s%s", $a, $b)); + is $y1, "abcdwxyz", "\$y1 = \$y2 = sprintf(): y1"; + is $y2, "abcdwxyz", "\$y1 = \$y2 = sprintf(): y2"; +} + +# multiconcat: mutator optimisation + +{ + my $lex = 'abc'; + my $a1 = 'pqr'; + my $a2 = 'xyz'; + $lex .= sprintf "(%s,%s)", $a1, $a2; + is $lex, "abc(pqr,xyz)", "\$lex .= sprintf ..."; + + local our $pkg = "def"; + $pkg .= sprintf "(%s,%s)", $a1, $a2; + is $pkg, "def(pqr,xyz)", "\$pkg .= sprintf ..."; + + my @ary; + $ary[3] = "ghi"; + $ary[3] .= sprintf "(%s,%s)", $a1, $a2; + is $ary[3], "ghi(pqr,xyz)", "\$ary[3] .= sprintf ..."; +} + +# multiconcat: strings with 0x80..0xff chars and/or utf8 chars + +{ + my $plain = "abc"; + my $s80 = "d\x{80}e"; + my $s81 = "h\x{81}i"; + my $utf8 = "f\x{100}g"; + my $res; + + $res = sprintf "-%s-%s-\x{90}-%s-\x{91}-%s-\x{92}", + $plain, $s80, $utf8, $s81; + is $res, "-abc-d\x{80}e-\x{90}-f\x{100}g-\x{91}-h\x{81}i-\x{92}", + "multiconcat 80.ff handling"; + + $res = sprintf "%s \x{101} %s", $plain, $plain; + is $res, "abc \x{101} abc", "multiconcat p u p"; + + $res = sprintf "%s \x{101} %s", $plain, $utf8; + is $res, "abc \x{101} f\x{100}g", "multiconcat p u u"; +} + +# check /INTRO flag set correctly on multiconcat + +{ + my $a = "a"; + my $b = "b"; + my $x; + { + $x = sprintf "-%s-%s-", $a, $b; + } + is $x, "-a-b-", "no INTRO flag on non-my"; + for (1,2) { + my $y; + is $y, undef, "INTRO flag on my: $_"; + $y = sprintf "-%s-%s-", $b, $a; + is $y, "-b-a-", "INTRO flag on my - result: $_"; + } +} + + done_testing(); diff --git a/t/op/state.t b/t/op/state.t index 92f1f60887..39eeecd7c5 100644 --- a/t/op/state.t +++ b/t/op/state.t @@ -9,7 +9,7 @@ BEGIN { use strict; -plan tests => 124; +plan tests => 126; # Before loading feature.pm, test it with CORE:: ok eval 'CORE::state $x = 1;', 'CORE::state outside of feature.pm scope'; @@ -439,6 +439,13 @@ sub rt_123029 { } ok(rt_123029(), "state variables don't surprisingly disappear when accessed"); +# make sure multiconcat doesn't break state + +for (1,2) { + state $s = "-$_-"; + is($s, "-1-", "state with multiconcat pass $_"); +} + __DATA__ state ($a) = 1; (state $a) = 1; diff --git a/t/opbasic/concat.t b/t/opbasic/concat.t index 7802fc98ce..55965c1702 100644 --- a/t/opbasic/concat.t +++ b/t/opbasic/concat.t @@ -5,12 +5,13 @@ BEGIN { @INC = '../lib'; } -# ok() functions from other sources (e.g., t/test.pl) may use concatenation, -# but that is what is being tested in this file. Hence, we place this file -# in the directory where do not use t/test.pl, and we write an ok() function -# specially written to avoid any concatenation. +# ok()/is() functions from other sources (e.g., t/test.pl) may use +# concatenation, but that is what is being tested in this file. Hence, we +# place this file in the directory where do not use t/test.pl, and we +# write functions specially written to avoid any concatenation. my $test = 1; + sub ok { my($ok, $name) = @_; @@ -22,7 +23,23 @@ sub ok { return $ok; } -print "1..31\n"; +sub is { + my($got, $expected, $name) = @_; + + my $ok = $got eq $expected; + + printf "%sok %d - %s\n", ($ok ? "" : "not "), $test, $name; + + if (!$ok) { + printf "# Failed test at line %d\n", (caller)[2]; + printf "# got: %s\n#expected: %s\n", $got, $expected; + } + + $test++; + return $ok; +} + +print "1..251\n"; ($a, $b, $c) = qw(foo bar); @@ -132,6 +149,7 @@ sub beq { use bytes; $_[0] eq $_[1]; } my $up = "\x{100}\xB6"; my $x1 = $p; my $y1 = $u; + my ($x2, $x3, $x4, $y2); use bytes; ok(beq($p.$u, $p.$b), "perl #26905, left eq bytes"); @@ -144,11 +162,15 @@ sub beq { use bytes; $_[0] eq $_[1]; } $y1 .= $p; $y2 = $u . $p; + $x3 = $p; $x3 .= $u . $u; + $x4 = $p . $u . $u; + no bytes; ok(beq($x1, $x2), "perl #26905, left, .= vs = . in bytes"); ok(beq($y1, $y2), "perl #26905, right, .= vs = . in bytes"); ok(($x1 eq $x2), "perl #26905, left, .= vs = . in chars"); ok(($y1 eq $y2), "perl #26905, right, .= vs = . in chars"); + ok(($x3 eq $x4), "perl #26905, twin, .= vs = . in chars"); } { @@ -164,8 +186,627 @@ sub beq { use bytes; $_[0] eq $_[1]; } ok($x eq "ab-append-", "Appending to something initialized using constant folding"); } +# non-POK consts + +{ + my $a = "a"; + my $b; + $b = $a . $a . 1; + ok($b eq "aa1", "aa1"); + $b = 2 . $a . $a; + ok($b eq "2aa", "2aa"); +} + # [perl #124160] package o { use overload "." => sub { $_[0] }, fallback => 1 } $o = bless [], "o"; ok(ref(CORE::state $y = "a $o b") eq 'o', 'state $y = "foo $bar baz" does not stringify; only concats'); + + +# multiconcat: utf8 dest with non-utf8 args should grow dest sufficiently. +# This is mainly for valgrind or ASAN to detect problems with. + +{ + my $s = "\x{100}"; + my $t = "\x80" x 1024; + $s .= "-$t-"; + ok length($s) == 1027, "utf8 dest with non-utf8 args"; +} + +# target on RHS + +{ + my $a = "abc"; + $a .= $a; + ok($a eq 'abcabc', 'append self'); + + $a = "abc"; + $a = $a . $a; + ok($a eq 'abcabc', 'double self'); + + $a = "abc"; + $a .= $a . $a; + ok($a eq 'abcabcabc', 'append double self'); + + $a = "abc"; + $a = "$a-$a"; + ok($a eq 'abc-abc', 'double self with const'); + + $a = "abc"; + $a .= "$a-$a"; + ok($a eq 'abcabc-abc', 'append double self with const'); + + $a = "abc"; + $a .= $a . $a . $a; + ok($a eq 'abcabcabcabc', 'append triple self'); + + $a = "abc"; + $a = "$a-$a=$a"; + ok($a eq 'abc-abc=abc', 'triple self with const'); + + $a = "abc"; + $a .= "$a-$a=$a"; + ok($a eq 'abcabc-abc=abc', 'append triple self with const'); +} + +# test the sorts of optree which may (or may not) get optimised into +# a single MULTICONCAT op. It's based on a loop in t/perf/opcount.t, +# but here the loop is unwound as we would need to use concat to +# generate the expected results to compare with the actual results, +# which would rather defeat the object. + +{ + my ($a1, $a2, $a3) = qw(1 2 3); + our $pkg; + my $lex; + + is("-", '-', '"-"'); + is("-", '-', '"-"'); + is("-", '-', '"-"'); + is("-", '-', '"-"'); + is($a1, '1', '$a1'); + is("-".$a1, '-1', '"-".$a1'); + is($a1."-", '1-', '$a1."-"'); + is("-".$a1."-", '-1-', '"-".$a1."-"'); + is("$a1", '1', '"$a1"'); + is("-$a1", '-1', '"-$a1"'); + is("$a1-", '1-', '"$a1-"'); + is("-$a1-", '-1-', '"-$a1-"'); + is($a1.$a2, '12', '$a1.$a2'); + is($a1."-".$a2, '1-2', '$a1."-".$a2'); + is("-".$a1."-".$a2, '-1-2', '"-".$a1."-".$a2'); + is($a1."-".$a2."-", '1-2-', '$a1."-".$a2."-"'); + is("-".$a1."-".$a2."-", '-1-2-', '"-".$a1."-".$a2."-"'); + is("$a1$a2", '12', '"$a1$a2"'); + is("$a1-$a2", '1-2', '"$a1-$a2"'); + is("-$a1-$a2", '-1-2', '"-$a1-$a2"'); + is("$a1-$a2-", '1-2-', '"$a1-$a2-"'); + is("-$a1-$a2-", '-1-2-', '"-$a1-$a2-"'); + is($a1.$a2.$a3, '123', '$a1.$a2.$a3'); + is($a1."-".$a2."-".$a3, '1-2-3', '$a1."-".$a2."-".$a3'); + is("-".$a1."-".$a2."-".$a3, '-1-2-3', '"-".$a1."-".$a2."-".$a3'); + is($a1."-".$a2."-".$a3."-", '1-2-3-', '$a1."-".$a2."-".$a3."-"'); + is("-".$a1."-".$a2."-".$a3."-", '-1-2-3-', '"-".$a1."-".$a2."-".$a3."-"'); + is("$a1$a2$a3", '123', '"$a1$a2$a3"'); + is("$a1-$a2-$a3", '1-2-3', '"$a1-$a2-$a3"'); + is("-$a1-$a2-$a3", '-1-2-3', '"-$a1-$a2-$a3"'); + is("$a1-$a2-$a3-", '1-2-3-', '"$a1-$a2-$a3-"'); + is("-$a1-$a2-$a3-", '-1-2-3-', '"-$a1-$a2-$a3-"'); + $pkg = "-"; + is($pkg, '-', '$pkg = "-"'); + $pkg = "-"; + is($pkg, '-', '$pkg = "-"'); + $pkg = "-"; + is($pkg, '-', '$pkg = "-"'); + $pkg = "-"; + is($pkg, '-', '$pkg = "-"'); + $pkg = $a1; + is($pkg, '1', '$pkg = $a1'); + $pkg = "-".$a1; + is($pkg, '-1', '$pkg = "-".$a1'); + $pkg = $a1."-"; + is($pkg, '1-', '$pkg = $a1."-"'); + $pkg = "-".$a1."-"; + is($pkg, '-1-', '$pkg = "-".$a1."-"'); + $pkg = "$a1"; + is($pkg, '1', '$pkg = "$a1"'); + $pkg = "-$a1"; + is($pkg, '-1', '$pkg = "-$a1"'); + $pkg = "$a1-"; + is($pkg, '1-', '$pkg = "$a1-"'); + $pkg = "-$a1-"; + is($pkg, '-1-', '$pkg = "-$a1-"'); + $pkg = $a1.$a2; + is($pkg, '12', '$pkg = $a1.$a2'); + $pkg = $a1."-".$a2; + is($pkg, '1-2', '$pkg = $a1."-".$a2'); + $pkg = "-".$a1."-".$a2; + is($pkg, '-1-2', '$pkg = "-".$a1."-".$a2'); + $pkg = $a1."-".$a2."-"; + is($pkg, '1-2-', '$pkg = $a1."-".$a2."-"'); + $pkg = "-".$a1."-".$a2."-"; + is($pkg, '-1-2-', '$pkg = "-".$a1."-".$a2."-"'); + $pkg = "$a1$a2"; + is($pkg, '12', '$pkg = "$a1$a2"'); + $pkg = "$a1-$a2"; + is($pkg, '1-2', '$pkg = "$a1-$a2"'); + $pkg = "-$a1-$a2"; + is($pkg, '-1-2', '$pkg = "-$a1-$a2"'); + $pkg = "$a1-$a2-"; + is($pkg, '1-2-', '$pkg = "$a1-$a2-"'); + $pkg = "-$a1-$a2-"; + is($pkg, '-1-2-', '$pkg = "-$a1-$a2-"'); + $pkg = $a1.$a2.$a3; + is($pkg, '123', '$pkg = $a1.$a2.$a3'); + $pkg = $a1."-".$a2."-".$a3; + is($pkg, '1-2-3', '$pkg = $a1."-".$a2."-".$a3'); + $pkg = "-".$a1."-".$a2."-".$a3; + is($pkg, '-1-2-3', '$pkg = "-".$a1."-".$a2."-".$a3'); + $pkg = $a1."-".$a2."-".$a3."-"; + is($pkg, '1-2-3-', '$pkg = $a1."-".$a2."-".$a3."-"'); + $pkg = "-".$a1."-".$a2."-".$a3."-"; + is($pkg, '-1-2-3-', '$pkg = "-".$a1."-".$a2."-".$a3."-"'); + $pkg = "$a1$a2$a3"; + is($pkg, '123', '$pkg = "$a1$a2$a3"'); + $pkg = "$a1-$a2-$a3"; + is($pkg, '1-2-3', '$pkg = "$a1-$a2-$a3"'); + $pkg = "-$a1-$a2-$a3"; + is($pkg, '-1-2-3', '$pkg = "-$a1-$a2-$a3"'); + $pkg = "$a1-$a2-$a3-"; + is($pkg, '1-2-3-', '$pkg = "$a1-$a2-$a3-"'); + $pkg = "-$a1-$a2-$a3-"; + is($pkg, '-1-2-3-', '$pkg = "-$a1-$a2-$a3-"'); + $pkg = 'P'; + $pkg .= "-"; + is($pkg, 'P-', '$pkg .= "-"'); + $pkg = 'P'; + $pkg .= "-"; + is($pkg, 'P-', '$pkg .= "-"'); + $pkg = 'P'; + $pkg .= "-"; + is($pkg, 'P-', '$pkg .= "-"'); + $pkg = 'P'; + $pkg .= "-"; + is($pkg, 'P-', '$pkg .= "-"'); + $pkg = 'P'; + $pkg .= $a1; + is($pkg, 'P1', '$pkg .= $a1'); + $pkg = 'P'; + $pkg .= "-".$a1; + is($pkg, 'P-1', '$pkg .= "-".$a1'); + $pkg = 'P'; + $pkg .= $a1."-"; + is($pkg, 'P1-', '$pkg .= $a1."-"'); + $pkg = 'P'; + $pkg .= "-".$a1."-"; + is($pkg, 'P-1-', '$pkg .= "-".$a1."-"'); + $pkg = 'P'; + $pkg .= "$a1"; + is($pkg, 'P1', '$pkg .= "$a1"'); + $pkg = 'P'; + $pkg .= "-$a1"; + is($pkg, 'P-1', '$pkg .= "-$a1"'); + $pkg = 'P'; + $pkg .= "$a1-"; + is($pkg, 'P1-', '$pkg .= "$a1-"'); + $pkg = 'P'; + $pkg .= "-$a1-"; + is($pkg, 'P-1-', '$pkg .= "-$a1-"'); + $pkg = 'P'; + $pkg .= $a1.$a2; + is($pkg, 'P12', '$pkg .= $a1.$a2'); + $pkg = 'P'; + $pkg .= $a1."-".$a2; + is($pkg, 'P1-2', '$pkg .= $a1."-".$a2'); + $pkg = 'P'; + $pkg .= "-".$a1."-".$a2; + is($pkg, 'P-1-2', '$pkg .= "-".$a1."-".$a2'); + $pkg = 'P'; + $pkg .= $a1."-".$a2."-"; + is($pkg, 'P1-2-', '$pkg .= $a1."-".$a2."-"'); + $pkg = 'P'; + $pkg .= "-".$a1."-".$a2."-"; + is($pkg, 'P-1-2-', '$pkg .= "-".$a1."-".$a2."-"'); + $pkg = 'P'; + $pkg .= "$a1$a2"; + is($pkg, 'P12', '$pkg .= "$a1$a2"'); + $pkg = 'P'; + $pkg .= "$a1-$a2"; + is($pkg, 'P1-2', '$pkg .= "$a1-$a2"'); + $pkg = 'P'; + $pkg .= "-$a1-$a2"; + is($pkg, 'P-1-2', '$pkg .= "-$a1-$a2"'); + $pkg = 'P'; + $pkg .= "$a1-$a2-"; + is($pkg, 'P1-2-', '$pkg .= "$a1-$a2-"'); + $pkg = 'P'; + $pkg .= "-$a1-$a2-"; + is($pkg, 'P-1-2-', '$pkg .= "-$a1-$a2-"'); + $pkg = 'P'; + $pkg .= $a1.$a2.$a3; + is($pkg, 'P123', '$pkg .= $a1.$a2.$a3'); + $pkg = 'P'; + $pkg .= $a1."-".$a2."-".$a3; + is($pkg, 'P1-2-3', '$pkg .= $a1."-".$a2."-".$a3'); + $pkg = 'P'; + $pkg .= "-".$a1."-".$a2."-".$a3; + is($pkg, 'P-1-2-3', '$pkg .= "-".$a1."-".$a2."-".$a3'); + $pkg = 'P'; + $pkg .= $a1."-".$a2."-".$a3."-"; + is($pkg, 'P1-2-3-', '$pkg .= $a1."-".$a2."-".$a3."-"'); + $pkg = 'P'; + $pkg .= "-".$a1."-".$a2."-".$a3."-"; + is($pkg, 'P-1-2-3-', '$pkg .= "-".$a1."-".$a2."-".$a3."-"'); + $pkg = 'P'; + $pkg .= "$a1$a2$a3"; + is($pkg, 'P123', '$pkg .= "$a1$a2$a3"'); + $pkg = 'P'; + $pkg .= "$a1-$a2-$a3"; + is($pkg, 'P1-2-3', '$pkg .= "$a1-$a2-$a3"'); + $pkg = 'P'; + $pkg .= "-$a1-$a2-$a3"; + is($pkg, 'P-1-2-3', '$pkg .= "-$a1-$a2-$a3"'); + $pkg = 'P'; + $pkg .= "$a1-$a2-$a3-"; + is($pkg, 'P1-2-3-', '$pkg .= "$a1-$a2-$a3-"'); + $pkg = 'P'; + $pkg .= "-$a1-$a2-$a3-"; + is($pkg, 'P-1-2-3-', '$pkg .= "-$a1-$a2-$a3-"'); + $lex = "-"; + is($lex, '-', '$lex = "-"'); + $lex = "-"; + is($lex, '-', '$lex = "-"'); + $lex = "-"; + is($lex, '-', '$lex = "-"'); + $lex = "-"; + is($lex, '-', '$lex = "-"'); + $lex = $a1; + is($lex, '1', '$lex = $a1'); + $lex = "-".$a1; + is($lex, '-1', '$lex = "-".$a1'); + $lex = $a1."-"; + is($lex, '1-', '$lex = $a1."-"'); + $lex = "-".$a1."-"; + is($lex, '-1-', '$lex = "-".$a1."-"'); + $lex = "$a1"; + is($lex, '1', '$lex = "$a1"'); + $lex = "-$a1"; + is($lex, '-1', '$lex = "-$a1"'); + $lex = "$a1-"; + is($lex, '1-', '$lex = "$a1-"'); + $lex = "-$a1-"; + is($lex, '-1-', '$lex = "-$a1-"'); + $lex = $a1.$a2; + is($lex, '12', '$lex = $a1.$a2'); + $lex = $a1."-".$a2; + is($lex, '1-2', '$lex = $a1."-".$a2'); + $lex = "-".$a1."-".$a2; + is($lex, '-1-2', '$lex = "-".$a1."-".$a2'); + $lex = $a1."-".$a2."-"; + is($lex, '1-2-', '$lex = $a1."-".$a2."-"'); + $lex = "-".$a1."-".$a2."-"; + is($lex, '-1-2-', '$lex = "-".$a1."-".$a2."-"'); + $lex = "$a1$a2"; + is($lex, '12', '$lex = "$a1$a2"'); + $lex = "$a1-$a2"; + is($lex, '1-2', '$lex = "$a1-$a2"'); + $lex = "-$a1-$a2"; + is($lex, '-1-2', '$lex = "-$a1-$a2"'); + $lex = "$a1-$a2-"; + is($lex, '1-2-', '$lex = "$a1-$a2-"'); + $lex = "-$a1-$a2-"; + is($lex, '-1-2-', '$lex = "-$a1-$a2-"'); + $lex = $a1.$a2.$a3; + is($lex, '123', '$lex = $a1.$a2.$a3'); + $lex = $a1."-".$a2."-".$a3; + is($lex, '1-2-3', '$lex = $a1."-".$a2."-".$a3'); + $lex = "-".$a1."-".$a2."-".$a3; + is($lex, '-1-2-3', '$lex = "-".$a1."-".$a2."-".$a3'); + $lex = $a1."-".$a2."-".$a3."-"; + is($lex, '1-2-3-', '$lex = $a1."-".$a2."-".$a3."-"'); + $lex = "-".$a1."-".$a2."-".$a3."-"; + is($lex, '-1-2-3-', '$lex = "-".$a1."-".$a2."-".$a3."-"'); + $lex = "$a1$a2$a3"; + is($lex, '123', '$lex = "$a1$a2$a3"'); + $lex = "$a1-$a2-$a3"; + is($lex, '1-2-3', '$lex = "$a1-$a2-$a3"'); + $lex = "-$a1-$a2-$a3"; + is($lex, '-1-2-3', '$lex = "-$a1-$a2-$a3"'); + $lex = "$a1-$a2-$a3-"; + is($lex, '1-2-3-', '$lex = "$a1-$a2-$a3-"'); + $lex = "-$a1-$a2-$a3-"; + is($lex, '-1-2-3-', '$lex = "-$a1-$a2-$a3-"'); + $lex = 'L'; + $lex .= "-"; + is($lex, 'L-', '$lex .= "-"'); + $lex = 'L'; + $lex .= "-"; + is($lex, 'L-', '$lex .= "-"'); + $lex = 'L'; + $lex .= "-"; + is($lex, 'L-', '$lex .= "-"'); + $lex = 'L'; + $lex .= "-"; + is($lex, 'L-', '$lex .= "-"'); + $lex = 'L'; + $lex .= $a1; + is($lex, 'L1', '$lex .= $a1'); + $lex = 'L'; + $lex .= "-".$a1; + is($lex, 'L-1', '$lex .= "-".$a1'); + $lex = 'L'; + $lex .= $a1."-"; + is($lex, 'L1-', '$lex .= $a1."-"'); + $lex = 'L'; + $lex .= "-".$a1."-"; + is($lex, 'L-1-', '$lex .= "-".$a1."-"'); + $lex = 'L'; + $lex .= "$a1"; + is($lex, 'L1', '$lex .= "$a1"'); + $lex = 'L'; + $lex .= "-$a1"; + is($lex, 'L-1', '$lex .= "-$a1"'); + $lex = 'L'; + $lex .= "$a1-"; + is($lex, 'L1-', '$lex .= "$a1-"'); + $lex = 'L'; + $lex .= "-$a1-"; + is($lex, 'L-1-', '$lex .= "-$a1-"'); + $lex = 'L'; + $lex .= $a1.$a2; + is($lex, 'L12', '$lex .= $a1.$a2'); + $lex = 'L'; + $lex .= $a1."-".$a2; + is($lex, 'L1-2', '$lex .= $a1."-".$a2'); + $lex = 'L'; + $lex .= "-".$a1."-".$a2; + is($lex, 'L-1-2', '$lex .= "-".$a1."-".$a2'); + $lex = 'L'; + $lex .= $a1."-".$a2."-"; + is($lex, 'L1-2-', '$lex .= $a1."-".$a2."-"'); + $lex = 'L'; + $lex .= "-".$a1."-".$a2."-"; + is($lex, 'L-1-2-', '$lex .= "-".$a1."-".$a2."-"'); + $lex = 'L'; + $lex .= "$a1$a2"; + is($lex, 'L12', '$lex .= "$a1$a2"'); + $lex = 'L'; + $lex .= "$a1-$a2"; + is($lex, 'L1-2', '$lex .= "$a1-$a2"'); + $lex = 'L'; + $lex .= "-$a1-$a2"; + is($lex, 'L-1-2', '$lex .= "-$a1-$a2"'); + $lex = 'L'; + $lex .= "$a1-$a2-"; + is($lex, 'L1-2-', '$lex .= "$a1-$a2-"'); + $lex = 'L'; + $lex .= "-$a1-$a2-"; + is($lex, 'L-1-2-', '$lex .= "-$a1-$a2-"'); + $lex = 'L'; + $lex .= $a1.$a2.$a3; + is($lex, 'L123', '$lex .= $a1.$a2.$a3'); + $lex = 'L'; + $lex .= $a1."-".$a2."-".$a3; + is($lex, 'L1-2-3', '$lex .= $a1."-".$a2."-".$a3'); + $lex = 'L'; + $lex .= "-".$a1."-".$a2."-".$a3; + is($lex, 'L-1-2-3', '$lex .= "-".$a1."-".$a2."-".$a3'); + $lex = 'L'; + $lex .= $a1."-".$a2."-".$a3."-"; + is($lex, 'L1-2-3-', '$lex .= $a1."-".$a2."-".$a3."-"'); + $lex = 'L'; + $lex .= "-".$a1."-".$a2."-".$a3."-"; + is($lex, 'L-1-2-3-', '$lex .= "-".$a1."-".$a2."-".$a3."-"'); + $lex = 'L'; + $lex .= "$a1$a2$a3"; + is($lex, 'L123', '$lex .= "$a1$a2$a3"'); + $lex = 'L'; + $lex .= "$a1-$a2-$a3"; + is($lex, 'L1-2-3', '$lex .= "$a1-$a2-$a3"'); + $lex = 'L'; + $lex .= "-$a1-$a2-$a3"; + is($lex, 'L-1-2-3', '$lex .= "-$a1-$a2-$a3"'); + $lex = 'L'; + $lex .= "$a1-$a2-$a3-"; + is($lex, 'L1-2-3-', '$lex .= "$a1-$a2-$a3-"'); + $lex = 'L'; + $lex .= "-$a1-$a2-$a3-"; + is($lex, 'L-1-2-3-', '$lex .= "-$a1-$a2-$a3-"'); + { + my $l = "-"; + is($l, '-', 'my $l = "-"'); + } + { + my $l = "-"; + is($l, '-', 'my $l = "-"'); + } + { + my $l = "-"; + is($l, '-', 'my $l = "-"'); + } + { + my $l = "-"; + is($l, '-', 'my $l = "-"'); + } + { + my $l = $a1; + is($l, '1', 'my $l = $a1'); + } + { + my $l = "-".$a1; + is($l, '-1', 'my $l = "-".$a1'); + } + { + my $l = $a1."-"; + is($l, '1-', 'my $l = $a1."-"'); + } + { + my $l = "-".$a1."-"; + is($l, '-1-', 'my $l = "-".$a1."-"'); + } + { + my $l = "$a1"; + is($l, '1', 'my $l = "$a1"'); + } + { + my $l = "-$a1"; + is($l, '-1', 'my $l = "-$a1"'); + } + { + my $l = "$a1-"; + is($l, '1-', 'my $l = "$a1-"'); + } + { + my $l = "-$a1-"; + is($l, '-1-', 'my $l = "-$a1-"'); + } + { + my $l = $a1.$a2; + is($l, '12', 'my $l = $a1.$a2'); + } + { + my $l = $a1."-".$a2; + is($l, '1-2', 'my $l = $a1."-".$a2'); + } + { + my $l = "-".$a1."-".$a2; + is($l, '-1-2', 'my $l = "-".$a1."-".$a2'); + } + { + my $l = $a1."-".$a2."-"; + is($l, '1-2-', 'my $l = $a1."-".$a2."-"'); + } + { + my $l = "-".$a1."-".$a2."-"; + is($l, '-1-2-', 'my $l = "-".$a1."-".$a2."-"'); + } + { + my $l = "$a1$a2"; + is($l, '12', 'my $l = "$a1$a2"'); + } + { + my $l = "$a1-$a2"; + is($l, '1-2', 'my $l = "$a1-$a2"'); + } + { + my $l = "-$a1-$a2"; + is($l, '-1-2', 'my $l = "-$a1-$a2"'); + } + { + my $l = "$a1-$a2-"; + is($l, '1-2-', 'my $l = "$a1-$a2-"'); + } + { + my $l = "-$a1-$a2-"; + is($l, '-1-2-', 'my $l = "-$a1-$a2-"'); + } + { + my $l = $a1.$a2.$a3; + is($l, '123', 'my $l = $a1.$a2.$a3'); + } + { + my $l = $a1."-".$a2."-".$a3; + is($l, '1-2-3', 'my $l = $a1."-".$a2."-".$a3'); + } + { + my $l = "-".$a1."-".$a2."-".$a3; + is($l, '-1-2-3', 'my $l = "-".$a1."-".$a2."-".$a3'); + } + { + my $l = $a1."-".$a2."-".$a3."-"; + is($l, '1-2-3-', 'my $l = $a1."-".$a2."-".$a3."-"'); + } + { + my $l = "-".$a1."-".$a2."-".$a3."-"; + is($l, '-1-2-3-', 'my $l = "-".$a1."-".$a2."-".$a3."-"'); + } + { + my $l = "$a1$a2$a3"; + is($l, '123', 'my $l = "$a1$a2$a3"'); + } + { + my $l = "$a1-$a2-$a3"; + is($l, '1-2-3', 'my $l = "$a1-$a2-$a3"'); + } + { + my $l = "-$a1-$a2-$a3"; + is($l, '-1-2-3', 'my $l = "-$a1-$a2-$a3"'); + } + { + my $l = "$a1-$a2-$a3-"; + is($l, '1-2-3-', 'my $l = "$a1-$a2-$a3-"'); + } + { + my $l = "-$a1-$a2-$a3-"; + is($l, '-1-2-3-', 'my $l = "-$a1-$a2-$a3-"'); + } +} + +# multiconcat optimises away scalar assign, and is responsible +# for handling the assign itself. If the LHS is something weird, +# make sure it's handled ok + +{ + my $a = 'a'; + my $b = 'b'; + my $o = 'o'; + + my $re = qr/abc/; + $$re = $a . $b; + is($$re, "ab", '$$re = $a . $b'); + + #passing a hash elem to a sub creates a PVLV + my $s = sub { $_[0] = $a . $b; }; + my %h; + $s->($h{foo}); + is($h{foo}, "ab", "PVLV"); + + # assigning a string to a typeglob creates an alias + $Foo = 'myfoo'; + *Bar = ("F" . $o . $o); + is($Bar, "myfoo", '*Bar = "Foo"'); + + # while that same typeglob also appearing on the RHS returns + # a stringified value + + package QPR { + ${'*QPR::Bar*QPR::BarBaz'} = 'myfoobarbaz'; + *Bar = (*Bar . *Bar . "Baz"); + ::is($Bar, "myfoobarbaz", '*Bar = (*Bar . *Bar . "Baz")'); + } +} + +# distinguish between '=' and '.=' where the LHS has the OPf_MOD flag + +{ + my $foo = "foo"; + my $a . $foo; # weird but legal + is($a, '', 'my $a . $foo'); + my $b; $b .= $foo; + is($b, 'foo', 'my $b; $b .= $foo'); +} + +# distinguish between nested appends and concats; the former is +# affected by the change of value of the target on each concat. +# This is why multiconcat shouldn't be used in that case + +{ + my $a = "a"; + (($a .= $a) .= $a) .= $a; + is($a, "aaaaaaaa", '(($a .= $a) .= $a) .= $a;'); +} + +# check everything works ok near the max arg size of a multiconcat + +{ + my @a = map "<$_>", 0..99; + for my $i (60..68) { # check each side of 64 threshold + my $c = join '.', map "\$a[$_]", 0..$i; + my $got = eval $c or die $@; + my $empty = ''; # don't use a const string in case join'' ever + # gets optimised into a multiconcat + my $expected = join $empty, @a[0..$i]; + is($got, $expected, "long concat chain $i"); + } +} diff --git a/t/perf/benchmarks b/t/perf/benchmarks index 0bebf54ccc..7795079c78 100644 --- a/t/perf/benchmarks +++ b/t/perf/benchmarks @@ -1096,10 +1096,257 @@ }, + # concatenation; quite possibly optimised to OP_MULTICONCAT + + 'expr::concat::cl' => { + setup => 'my $lex = "abcd"', + code => '"foo" . $lex', + }, + 'expr::concat::lc' => { + setup => 'my $lex = "abcd"', + code => '$lex . "foo"', + }, + 'expr::concat::ll' => { + setup => 'my $lex1 = "abcd"; my $lex2 = "wxyz"', + code => '$lex1 . $lex2', + }, + + 'expr::concat::l_append_c' => { + setup => 'my $lex', + pre => '$lex = "abcd"', + code => '$lex .= "foo"', + }, + 'expr::concat::l_append_l' => { + setup => 'my $lex1; my $lex2 = "wxyz"', + pre => '$lex1 = "abcd"', + code => '$lex1 .= $lex2', + }, + 'expr::concat::l_append_ll' => { + setup => 'my $lex1; my $lex2 = "pqrs"; my $lex3 = "wxyz"', + pre => '$lex1 = "abcd"', + code => '$lex1 .= $lex2 . $lex3', + }, + 'expr::concat::l_append_clclc' => { + setup => 'my $lex1; my $lex2 = "pqrs"; my $lex3 = "wxyz"', + pre => '$lex1 = "abcd"', + code => '$lex1 .= "-foo-$lex2-foo-$lex3-foo"', + }, + 'expr::concat::l_append_lll' => { + setup => 'my $lex1; my ($lex2, $lex3, $lex4) = qw(pqrs wxyz 1234)', + pre => '$lex1 = "abcd"', + code => '$lex1 .= $lex2 . $lex3 . $lex4', + }, + + 'expr::concat::m_ll' => { + setup => 'my $lex1 = "abcd"; my $lex2 = "wxyz"', + code => 'my $lex = $lex1 . $lex2', + }, + 'expr::concat::m_lll' => { + setup => 'my $lex1 = "abcd"; my $lex2 = "pqrs"; my $lex3 = "wxyz"', + code => 'my $lex = $lex1 . $lex2 . $lex3', + }, + 'expr::concat::m_cl' => { + setup => 'my $lex1 = "abcd"', + code => 'my $lex = "const$lex1"', + }, + 'expr::concat::m_clclc' => { + setup => 'my $lex1 = "abcd"; my $lex2 = "wxyz"', + code => 'my $lex = "foo=$lex1 bar=$lex2\n"', + }, + 'expr::concat::m_clclc_long' => { + desc => 'my $lex = "foooooooooo=$lex1 baaaaaaaaar=$lex2\n" where lex1/2 are 400 chars', + setup => 'my $lex1 = "abcd" x 100; my $lex2 = "wxyz" x 100', + code => 'my $lex = "foooooooooo=$lex1 baaaaaaaaar=$lex2\n"', + }, + + 'expr::concat::l_ll' => { + setup => 'my $lex; my $lex1 = "abcd"; my $lex2 = "wxyz"', + code => '$lex = $lex1 . $lex2', + }, + 'expr::concat::l_ll_ldup' => { + setup => 'my $lex1; my $lex2 = "wxyz"', + pre => '$lex1 = "abcd"', + code => '$lex1 = $lex1 . $lex2', + }, + 'expr::concat::l_ll_rdup' => { + setup => 'my $lex1; my $lex2 = "wxyz"', + pre => '$lex1 = "abcd"', + code => '$lex1 = $lex2 . $lex1', + }, + 'expr::concat::l_ll_lrdup' => { + setup => 'my $lex1', + pre => '$lex1 = "abcd"', + code => '$lex1 = $lex1 . $lex1', + }, + 'expr::concat::l_lll' => { + setup => 'my $lex; my $lex1 = "abcd"; my $lex2 = "pqrs"; my $lex3 = "wxyz"', + code => '$lex = $lex1 . $lex2 . $lex3', + }, + 'expr::concat::l_lllll' => { + setup => 'my $lex; my $lex1 = "abcd"; my $lex2 = "pqrs"; my $lex3 = "wxyz"; my $lex4 = "the quick brown fox"; my $lex5 = "to be, or not to be..."', + code => '$lex = $lex1 . $lex2 . $lex3 . $lex4 . $lex5', + }, + 'expr::concat::l_cl' => { + setup => 'my $lex; my $lex1 = "abcd"', + code => '$lex = "const$lex1"', + }, + 'expr::concat::l_clclc' => { + setup => 'my $lex; my $lex1 = "abcd"; my $lex2 = "wxyz"', + code => '$lex = "foo=$lex1 bar=$lex2\n"', + }, + 'expr::concat::l_clclc_long' => { + desc => '$lex = "foooooooooo=$lex1 baaaaaaaaar=$lex2\n" where lex1/2 are 400 chars', + setup => 'my $lex; my $lex1 = "abcd" x 100; my $lex2 = "wxyz" x 100', + code => '$lex = "foooooooooo=$lex1 baaaaaaaaar=$lex2\n"', + }, + 'expr::concat::l_clclclclclc' => { + setup => 'my $lex; my $lex1 = "abcd"; my $lex2 = "pqrs"; my $lex3 = "the quick brown fox"; my $lex4 = "to be, or not to be..."', + code => '$lex = "foo1=$lex1 foo2=$lex2 foo3=$lex3 foo4=$lex4\n"', + }, + + 'expr::concat::g_append_c' => { + setup => 'our $pkg', + pre => '$pkg = "abcd"', + code => '$pkg .= "foo"', + }, + 'expr::concat::g_append_l' => { + setup => 'our $pkg; my $lex1 = "wxyz"', + pre => '$pkg = "abcd"', + code => '$pkg .= $lex1', + }, + 'expr::concat::g_append_ll' => { + setup => 'our $pkg; my $lex1 = "pqrs"; my $lex2 = "wxyz"', + pre => '$pkg = "abcd"', + code => '$pkg .= $lex1 . $lex2', + }, + 'expr::concat::g_append_clclc' => { + setup => 'our $pkg; my $lex1 = "pqrs"; my $lex2 = "wxyz"', + pre => '$pkg = "abcd"', + code => '$pkg .= "-foo-$lex1-foo-$lex2-foo-"', + }, + + 'expr::concat::g_ll' => { + setup => 'our $pkg; my $lex1 = "abcd"; my $lex2 = "wxyz"', + code => '$pkg = $lex1 . $lex2', + }, + 'expr::concat::g_gl_ldup' => { + setup => 'our $pkg; my $lex2 = "wxyz"', + pre => '$pkg = "abcd"', + code => '$pkg = $pkg . $lex2', + }, + 'expr::concat::g_lg_rdup' => { + setup => 'our $pkg; my $lex1 = "wxyz"', + pre => '$pkg = "abcd"', + code => '$pkg = $lex1 . $pkg', + }, + 'expr::concat::g_gg_lrdup' => { + setup => 'our $pkg', + pre => '$pkg = "abcd"', + code => '$pkg = $pkg . $pkg', + }, + 'expr::concat::g_lll' => { + setup => 'our $pkg; my $lex1 = "abcd"; my $lex2 = "pqrs"; my $lex3 = "wxyz"', + code => '$pkg = $lex1 . $lex2 . $lex3', + }, + 'expr::concat::g_cl' => { + setup => 'our $pkg; my $lex1 = "abcd"', + code => '$pkg = "const$lex1"', + }, + 'expr::concat::g_clclc' => { + setup => 'our $pkg; my $lex1 = "abcd"; my $lex2 = "wxyz"', + code => '$pkg = "foo=$lex1 bar=$lex2\n"', + }, + 'expr::concat::g_clclc_long' => { + desc => '$pkg = "foooooooooo=$lex1 baaaaaaaaar=$lex2\n" where lex1/2 are 400 chars', + setup => 'our $pkg; my $lex1 = "abcd" x 100; my $lex2 = "wxyz" x 100', + code => '$pkg = "foooooooooo=$lex1 baaaaaaaaar=$lex2\n"', + }, + + 'expr::concat::utf8_uuu' => { + desc => 'my $s = $a.$b.$c where all args are utf8', + setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "\x{101}fghij"; my $c = "\x{102}klmn"', + code => '$s = $a.$b.$c', + }, + 'expr::concat::utf8_suu' => { + desc => 'my $s = "foo=$a bar=$b baz=$c" where $b,$c are utf8', + setup => 'my $s; my $a = "abcde"; my $b = "\x{100}fghij"; my $c = "\x{101}klmn"', + code => '$s = "foo=$a bar=$b baz=$c"', + }, + 'expr::concat::utf8_usu' => { + desc => 'my $s = "foo=$a bar=$b baz=$c" where $a,$c are utf8', + setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "fghij"; my $c = "\x{101}klmn"', + code => '$s = "foo=$a bar=$b baz=$c"', + }, + 'expr::concat::utf8_usx' => { + desc => 'my $s = "foo=$a bar=$b baz=$c" where $a is utf8, $c has 0x80', + setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "fghij"; my $c = "\x80\x81klmn"', + code => '$s = "foo=$a bar=$b baz=$c"', + }, + + 'expr::concat::utf8_s_append_uuu' => { + desc => '$s .= $a.$b.$c where all RH args are utf8', + setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "\x{101}fghij"; my $c = "\x{102}klmn"', + pre => '$s = "abcd"', + code => '$s .= $a.$b.$c', + }, + 'expr::concat::utf8_s_append_suu' => { + desc => '$s .= "foo=$a bar=$b baz=$c" where $b,$c are utf8', + setup => 'my $s; my $a = "abcde"; my $b = "\x{100}fghij"; my $c = "\x{101}klmn"', + pre => '$s = "abcd"', + code => '$s .= "foo=$a bar=$b baz=$c"', + }, + 'expr::concat::utf8_s_append_usu' => { + desc => '$s .= "foo=$a bar=$b baz=$c" where $a,$c are utf8', + setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "fghij"; my $c = "\x{101}klmn"', + pre => '$s = "abcd"', + code => '$s .= "foo=$a bar=$b baz=$c"', + }, + 'expr::concat::utf8_s_append_usx' => { + desc => '$s .= "foo=$a bar=$b baz=$c" where $a is utf8, $c has 0x80', + setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "fghij"; my $c = "\x80\x81klmn"', + pre => '$s = "abcd"', + code => '$s .= "foo=$a bar=$b baz=$c"', + }, + + 'expr::concat::utf8_u_append_uuu' => { + desc => '$s .= $a.$b.$c where all args are utf8', + setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "\x{101}fghij"; my $c = "\x{102}klmn"', + pre => '$s = "\x{100}wxyz"', + code => '$s .= $a.$b.$c', + }, + 'expr::concat::utf8_u_append_suu' => { + desc => '$s .= "foo=$a bar=$b baz=$c" where $s,$b,$c are utf8', + setup => 'my $s; my $a = "abcde"; my $b = "\x{100}fghij"; my $c = "\x{101}klmn"', + pre => '$s = "\x{100}wxyz"', + code => '$s .= "foo=$a bar=$b baz=$c"', + }, + 'expr::concat::utf8_u_append_usu' => { + desc => '$s .= "foo=$a bar=$b baz=$c" where $s,$a,$c are utf8', + setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "fghij"; my $c = "\x{101}klmn"', + pre => '$s = "\x{100}wxyz"', + code => '$s .= "foo=$a bar=$b baz=$c"', + }, + 'expr::concat::utf8_u_append_usx' => { + desc => '$s .= "foo=$a bar=$b baz=$c" where $s,$a are utf8, $c has 0x80', + setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "fghij"; my $c = "\x80\x81klmn"', + pre => '$s = "\x{100}wxyz"', + code => '$s .= "foo=$a bar=$b baz=$c"', + }, + + 'expr::concat::nested_mutator' => { + setup => 'my $lex1; my ($lex2, $lex3, $lex4) = qw(abcd pqrs wxyz)', + pre => '$lex1 = "QPR"', + code => '(($lex1 .= $lex2) .= $lex3) .= $lex4', + }, + # scalar assign, OP_SASSIGN + 'expr::sassign::my_conststr' => { + setup => '', + code => 'my $x = "abc"', + }, 'expr::sassign::scalar_lex_int' => { desc => 'lexical $x = 1', setup => 'my $x', @@ -1271,6 +1518,18 @@ }, + # JOIN + + + 'func::join::empty_l_ll' => { + setup => 'my $lex; my $lex1 = "abcd"; my $lex2 = "wxyz"', + code => '$lex = join "", $lex1, $lex2', + }, + + + # KEYS + + 'func::keys::lex::void_cxt_empty' => { desc => ' keys() on an empty lexical hash in void context', setup => 'my %h = ()', @@ -1525,6 +1784,8 @@ code => '@a = (split(/:/, $s, 2), 1);', }, + # SPRINTF + 'func::sprintf::d' => { desc => '%d', @@ -1594,6 +1855,96 @@ code => '$s = sprintf "foo=%s bar=%s baz=%s", $a, $b, $c', }, + # sprint that's likely to be optimised to an OP_MULTICONCAT + + 'func::sprintf::l' => { + setup => 'my $lex1 = "abcd"', + code => 'sprintf "%s", $lex1', + }, + 'func::sprintf::g_l' => { + setup => 'our $pkg; my $lex1 = "abcd"', + code => '$pkg = sprintf "%s", $lex1', + }, + 'func::sprintf::g_append_l' => { + setup => 'our $pkg; my $lex1 = "abcd"', + pre => '$pkg = "pqrs"', + code => '$pkg .= sprintf "%s", $lex1', + }, + 'func::sprintf::g_ll' => { + setup => 'our $pkg; my $lex1 = "abcd"; my $lex2 = "wxyz"', + code => '$pkg = sprintf "%s%s", $lex1, $lex2', + }, + 'func::sprintf::g_append_ll' => { + setup => 'our $pkg; my $lex1 = "abcd"; my $lex2 = "wxyz"', + pre => '$pkg = "pqrs"', + code => '$pkg .= sprintf "%s%s", $lex1, $lex2', + }, + 'func::sprintf::g_cl' => { + setup => 'our $pkg; my $lex1 = "abcd"', + code => '$pkg = sprintf "foo=%s", $lex1', + }, + 'func::sprintf::g_clclc' => { + setup => 'our $pkg; my $lex1 = "abcd"; my $lex2 = "wxyz"', + code => '$pkg = sprintf "foo=%s bar=%s\n", $lex1, $lex2', + }, + + 'func::sprintf::l_l' => { + setup => 'my $lex; my $lex1 = "abcd"', + code => '$lex = sprintf "%s", $lex1', + }, + 'func::sprintf::l_append_l' => { + setup => 'my $lex; my $lex1 = "abcd"', + pre => '$lex = "pqrs"', + code => '$lex .= sprintf "%s", $lex1', + }, + 'func::sprintf::ll' => { + setup => 'my $lex1 = "abcd"; my $lex2 = "wxyz"', + code => 'sprintf "%s%s", $lex1, $lex2', + }, + 'func::sprintf::l_ll' => { + setup => 'my $lex; my $lex1 = "abcd"; my $lex2 = "wxyz"', + code => '$lex = sprintf "%s%s", $lex1, $lex2', + }, + 'func::sprintf::l_append_ll' => { + setup => 'my $lex; my $lex1 = "abcd"; my $lex2 = "wxyz"', + pre => '$lex = "pqrs"', + code => '$lex .= sprintf "%s%s", $lex1, $lex2', + }, + 'func::sprintf::l_cl' => { + setup => 'my $lex; my $lex1 = "abcd"', + code => '$lex = sprintf "foo=%s", $lex1', + }, + 'func::sprintf::l_clclc' => { + setup => 'my $lex; my $lex1 = "abcd"; my $lex2 = "wxyz"', + code => '$lex = sprintf "foo=%s bar=%s\n", $lex1, $lex2', + }, + + 'func::sprintf::m_l' => { + setup => 'my $lex1 = "abcd"', + code => 'my $lex = sprintf "%s", $lex1', + }, + 'func::sprintf::m_ll' => { + setup => 'my $lex1 = "abcd"; my $lex2 = "wxyz"', + code => 'my $lex = sprintf "%s%s", $lex1, $lex2', + }, + 'func::sprintf::m_cl' => { + setup => 'my $lex1 = "abcd"', + code => 'my $lex = sprintf "foo=%s", $lex1', + }, + 'func::sprintf::m_clclc' => { + setup => 'my $lex1 = "abcd"; my $lex2 = "wxyz"', + code => 'my $lex = sprintf "foo=%s bar=%s\n", $lex1, $lex2', + }, + + 'func::sprintf::utf8__l_lll' => { + desc => '$s = sprintf("foo=%s bar=%s baz=%s", $a, $b, $c) where $a,$c are utf8', + setup => 'my $s; my $a = "ab\x{100}cde"; my $b = "fghij"; my $c = "\x{101}klmn"', + code => '$s = sprintf "foo=%s bar=%s baz=%s", $a, $b, $c', + }, + + + # S/// + 'func::subst::bool' => { desc => 's/// in boolean context', setup => '', diff --git a/t/perf/opcount.t b/t/perf/opcount.t index 0ff4b7246c..0ded6cd360 100644 --- a/t/perf/opcount.t +++ b/t/perf/opcount.t @@ -20,7 +20,7 @@ BEGIN { use warnings; use strict; -plan 2309; +plan 2579; use B (); @@ -400,3 +400,237 @@ test_opcount(0, 'barewords can be constant-folded', } } } + + +# a sprintf that can't be optimised shouldn't stop the .= concat being +# optimised + +{ + my ($i,$j,$s); + test_opcount(0, "sprintf pessimised", + sub { $s .= sprintf "%d%d",$i, $j }, + { + const => 1, + sprintf => 1, + concat => 0, + multiconcat => 1, + padsv => 2, + }); +} + + +# sprintf with constant args should be constant folded + +test_opcount(0, "sprintf constant args", + sub { sprintf "%s%s", "abc", "def" }, + { + const => 1, + sprintf => 0, + multiconcat => 0. + }); + +# +# concats and assigns that should be optimised into a single multiconcat +# op + +{ + + my %seen; # weed out duplicate combinations + + # these are the ones where using multiconcat isn't a gain, so should + # be pessimised + my %pessimise = map { $_ => 1 } + '$a1.$a2', + '"$a1$a2"', + '$pkg .= $a1', + '$pkg .= "$a1"', + '$lex = $a1.$a2', + '$lex = "$a1$a2"', + # these already constant folded + 'sprintf("-")', + '$pkg = sprintf("-")', + '$lex = sprintf("-")', + 'my $l = sprintf("-")', + ; + + for my $lhs ( + '', + '$pkg = ', + '$pkg .= ', + '$lex = ', + '$lex .= ', + 'my $l = ', + ) { + for my $nargs (0..3) { + for my $type (0..2) { + # 0: $a . $b + # 1: "$a$b" + # 2: sprintf("%s%s", $a, $b) + + for my $const (0..4) { + # 0: no consts: "$a1$a2" + # 1: interior consts: "$a1-$a2" + # 2: + LH edge: "-$a1-$a2" + # 3: + RH edge: "$a1-$a2-" + # 4: + both edge: "-$a1-$a2-" + + my @args; + my @sprintf_args; + my $c = $type == 0 ? '"-"' : '-'; + push @args, $c if $const == 2 || $const == 4; + for my $n (1..$nargs) { + if ($type == 2) { + # sprintf + push @sprintf_args, "\$a$n"; + push @args, '%s'; + } + else { + push @args, "\$a$n"; + } + push @args, $c if $const; + } + pop @args if $const == 1 || $const == 2; + + push @args, $c if $nargs == 0 && $const == 1; + + + if ($type == 2) { + # sprintf + next unless @args; + } + else { + # To ensure that there's at least once concat + # action, if appending, need at least one RHS arg; + # else least 2 args: + # $x = $a . $b + # $x .= $a + next unless @args >= ($lhs =~ /\./ ? 1 : 2); + } + + my $rhs; + if ($type == 0) { + $rhs = join('.', @args); + } + elsif ($type == 1) { + $rhs = '"' . join('', @args) . '"' + } + else { + $rhs = 'sprintf("' + . join('', @args) + . '"' + . join('', map ",$_", @sprintf_args) + . ')'; + } + + my $expr = $lhs . $rhs; + + next if exists $seen{$expr}; + $seen{$expr} = 1; + + my ($a1, $a2, $a3); + my $lex; + our $pkg; + my $sub = eval qq{sub { $expr }}; + die "eval(sub { $expr }: $@" if $@; + + my $pm = $pessimise{$expr}; + test_opcount(0, ($pm ? "concat " : "multiconcat") + . ": $expr", + $sub, + $pm + ? { multiconcat => 0 } + : { + multiconcat => 1, + padsv => $nargs, + concat => 0, + sprintf => 0, + const => 0, + sassign => 0, + stringify => 0, + gv => 0, # optimised to gvsv + }); + } + } + } + } +} + +# $lex = "foo" should *not* get converted into a multiconcat - there's +# no actual concatenation involved, and treating it as a degnerate concat +# would forego any COW copy efficiency + +test_opcount(0, '$lex = "foo"', sub { my $x; $x = "foo"; }, + { + multiconcat => 0, + }); + +# for '$lex1 = $lex2 . $lex3', multiconcat is normally slower than +# concat, except in the specific case of '$lex1 = $lex2 . $lex1' + +test_opcount(0, '$lex1 = $lex2 . $lex1', sub { my ($x,$y); $x = $y . $x }, + { + multiconcat => 1, + padsv => 4, # 2 are from the my() + concat => 0, + sassign => 0, + stringify => 0, + }); +test_opcount(0, '$lex1 = "$lex2$lex1"', sub { my ($x,$y); $x = "$y$x" }, + { + multiconcat => 1, + padsv => 4, # 2 are from the my() + concat => 0, + sassign => 0, + stringify => 0, + }); +test_opcount(0, '$lex1 = $lex1 . $lex1', sub { my $x; $x = $x . $x }, + { + multiconcat => 0, + }); + +# 'my $x .= ...' doesn't make a lot of sense and so isn't optimised +test_opcount(0, 'my $a .= $b.$c.$d', sub { our ($b,$c,$d); my $a .= $b.$c.$d }, + { + padsv => 1, + }); + +# prefer rcatline optimisation over multiconcat + +test_opcount(0, "rcatline", sub { my ($x,$y); open FOO, "xxx"; $x .= <FOO> }, + { + rcatline => 1, + readline => 0, + multiconcat => 0, + concat => 0, + }); + +# long chains of concats should be converted into chained multiconcats + +{ + my @a; + for my $i (60..68) { # check each side of 64 threshold + my $c = join '.', map "\$a[$_]", 1..$i; + my $sub = eval qq{sub { $c }} or die $@; + test_opcount(0, "long chain $i", $sub, + { + multiconcat => $i > 65 ? 2 : 1, + concat => $i == 65 ? 1 : 0, + aelem => 0, + aelemfast => 0, + }); + } +} + +# with C<$state $s = $a . $b . ....>, the assign is optimised away, +# but the padsv isn't (it's treated like a general LHS expression rather +# than using OPpTARGET_MY). + +test_opcount(0, "state works with multiconcat", + sub { use feature 'state'; our ($a, $b, $c); state $s = $a . $b . $c }, + { + multiconcat => 1, + concat => 0, + sassign => 0, + once => 1, + padsv => 2, # one each for the next/once branches + }); |