From 24fcb59fcc2fac06ec8b42aeedb3a16987d87db4 Mon Sep 17 00:00:00 2001 From: Father Chrysostomos Date: Fri, 25 Nov 2011 23:04:22 -0800 Subject: Optimise substr assignment in void context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In void context we can optimise substr($foo, $bar, $baz) = $replacement; to something like substr($foo, $bar, $baz, $replacement); except that the execution order must be preserved. So what we actu- ally do is substr($replacement, $foo, $bar, $baz); with a flag to indicate that the replacement comes first. This means we can also optimise assignment to two-argument substr the same way. Although optimisations are not supposed to change behaviour, this one does. • It stops substr assignment from calling get-magic twice, which means the optimisation makes things less buggy than usual. • It causes the uninitialized warning (for an undefined first argu- ment) to mention the substr operator, as it did before the previous commit, rather than the assignment operator. I think that sort of detail is minor enough. I had to make the warning about clobbering references apply whenever substr does a replacement, and not only when used as an lvalue. So four-argument substr now emits that warning. I would consider that a bug fix, too. Also, if the numeric arguments to four-argument substr and the replacement string are undefined, the order of the uninitialized warn- ings is slightly different, but is consistent regardless of whether the optimisation is in effect. I believe this will make 95% of substr assignments run faster. So there is less incentive to use what I consider the less readable form (the four-argument form, which is not self-documenting). Since I like naïve benchmarks, here are Before and After: $ time ./miniperl -le 'do{$x="hello"; substr ($x,0,0) = 34;0}for 1..1000000' real 0m2.391s user 0m2.381s sys 0m0.005s $ time ./miniperl -le 'do{$x="hello"; substr ($x,0,0) = 34;0}for 1..1000000' real 0m0.936s user 0m0.927s sys 0m0.005s --- dist/B-Deparse/Deparse.pm | 17 +++++++++++++---- dist/B-Deparse/t/deparse.t | 4 ++++ ext/B/B/Concise.pm | 1 + ext/B/t/concise-xs.t | 2 +- op.c | 18 ++++++++++++++++++ op.h | 4 ++++ pp.c | 23 +++++++++++++++-------- t/lib/warnings/9uninit | 10 +++++----- 8 files changed, 61 insertions(+), 18 deletions(-) diff --git a/dist/B-Deparse/Deparse.pm b/dist/B-Deparse/Deparse.pm index b9381a61e5..f203a534ca 100644 --- a/dist/B-Deparse/Deparse.pm +++ b/dist/B-Deparse/Deparse.pm @@ -31,7 +31,7 @@ BEGIN { # be to fake up a dummy constant that will never actually be true. foreach (qw(OPpSORT_INPLACE OPpSORT_DESCEND OPpITER_REVERSED OPpCONST_NOVER OPpPAD_STATE PMf_SKIPWHITE RXf_SKIPWHITE - CVf_LOCKED OPpREVERSE_INPLACE + CVf_LOCKED OPpREVERSE_INPLACE OPpSUBSTR_REPL_FIRST PMf_NONDESTRUCT OPpCONST_ARYBASE OPpEVAL_BYTES)) { eval { import B $_ }; no strict 'refs'; @@ -2334,10 +2334,10 @@ sub pp_dorassign { logassignop(@_, "//=") } sub listop { my $self = shift; - my($op, $cx, $name) = @_; + my($op, $cx, $name, $kid) = @_; my(@exprs); my $parens = ($cx >= 5) || $self->{'parens'}; - my $kid = $op->first->sibling; + $kid ||= $op->first->sibling; return $self->keyword($name) if null $kid; my $first; $name = "socketpair" if $name eq "sockpair"; @@ -2377,7 +2377,16 @@ sub listop { sub pp_bless { listop(@_, "bless") } sub pp_atan2 { maybe_targmy(@_, \&listop, "atan2") } -sub pp_substr { maybe_local(@_, listop(@_, "substr")) } +sub pp_substr { + my ($self,$op,$cx) = @_; + if ($op->private & OPpSUBSTR_REPL_FIRST) { + return + listop($self, $op, 7, "substr", $op->first->sibling->sibling) + . " = " + . $self->deparse($op->first->sibling, 7); + } + maybe_local(@_, listop(@_, "substr")) +} sub pp_vec { maybe_local(@_, listop(@_, "vec")) } sub pp_index { maybe_targmy(@_, \&listop, "index") } sub pp_rindex { maybe_targmy(@_, \&listop, "rindex") } diff --git a/dist/B-Deparse/t/deparse.t b/dist/B-Deparse/t/deparse.t index 84f5f6a586..84b99252d1 100644 --- a/dist/B-Deparse/t/deparse.t +++ b/dist/B-Deparse/t/deparse.t @@ -789,3 +789,7 @@ my(@a) = ()[()]; print sort(foo('bar')); >>>> print sort(foo('bar')); +#### +# substr assignment +substr(my $a, 0, 0) = (foo(), bar()); +$a++; diff --git a/ext/B/B/Concise.pm b/ext/B/B/Concise.pm index d5c869695c..cc2c87debb 100644 --- a/ext/B/B/Concise.pm +++ b/ext/B/B/Concise.pm @@ -621,6 +621,7 @@ $priv{$_}{16} = "OURINTR" for ("gvsv", "rv2sv", "rv2av", "rv2hv", "r2gv", "enteriter"); $priv{$_}{8} = 'LVSUB' for qw(rv2av rv2gv rv2hv padav padhv aelem helem aslice hslice av2arylen keys rkeys substr pos vec); +$priv{substr}{16} = 'REPL1ST'; $priv{$_}{16} = "TARGMY" for (map(($_,"s$_"),"chop", "chomp"), map(($_,"i_$_"), "postinc", "postdec", "multiply", "divide", "modulo", diff --git a/ext/B/t/concise-xs.t b/ext/B/t/concise-xs.t index 41a2ad815a..56d2d57962 100644 --- a/ext/B/t/concise-xs.t +++ b/ext/B/t/concise-xs.t @@ -169,7 +169,7 @@ my $testpkgs = { PMf_MULTILINE PMf_ONCE PMf_SINGLELINE POSTFIX SVf_FAKE SVf_IOK SVf_NOK SVf_POK SVf_ROK SVpad_OUR SVs_RMG SVs_SMG SWAP_CHILDREN OPpPAD_STATE - OPpCONST_ARYBASE OPpEVAL_BYTES + OPpCONST_ARYBASE OPpEVAL_BYTES OPpSUBSTR_REPL_FIRST /, $] > 5.009 ? ('RXf_SKIPWHITE') : ('PMf_SKIPWHITE'), 'CVf_LOCKED', # This ends up as a constant, pre or post 5.10 ], diff --git a/op.c b/op.c index eb3dffe4af..f3088ed545 100644 --- a/op.c +++ b/op.c @@ -10300,6 +10300,24 @@ Perl_rpeep(pTHX_ register OP *o) } break; + case OP_SASSIGN: + if (OP_GIMME(o,0) == G_VOID) { + OP *right = cBINOP->op_first; + if (right) { + OP *left = right->op_sibling; + if (left->op_type == OP_SUBSTR + && (left->op_private & 7) < 4) { + op_null(o); + cBINOP->op_first = left; + right->op_sibling = + cBINOPx(left)->op_first->op_sibling; + cBINOPx(left)->op_first->op_sibling = right; + left->op_private |= OPpSUBSTR_REPL_FIRST; + } + } + } + break; + case OP_CUSTOM: { Perl_cpeep_t cpeep = XopENTRY(Perl_custom_op_xop(aTHX_ o), xop_peep); diff --git a/op.h b/op.h index 958529e306..d61198f28c 100644 --- a/op.h +++ b/op.h @@ -229,6 +229,10 @@ Deprecated. Use C instead. /* OP_RV2[AGH]V, OP_PAD[AH]V, OP_[AH]ELEM, OP_[AH]SLICE OP_AV2ARYLEN, OP_R?KEYS, OP_SUBSTR, OP_POS, OP_VEC */ #define OPpMAYBE_LVSUB 8 /* We might be an lvalue to return */ + + /* OP_SUBSTR only */ +#define OPpSUBSTR_REPL_FIRST 16 /* 1st arg is replacement string */ + /* OP_PADSV only */ #define OPpPAD_STATE 16 /* is a "state" pad */ /* for OP_RV2?V, lower bits carry hints (currently only HINT_STRICT_REFS) */ diff --git a/pp.c b/pp.c index 329ed17e34..0ecd144c99 100644 --- a/pp.c +++ b/pp.c @@ -2968,7 +2968,7 @@ PP(pp_substr) SV * len_sv; IV len_iv = 0; int len_is_uv = 1; - const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; + I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET; const bool rvalue = (GIMME_V != G_VOID); const char *tmps; SV *repl_sv = NULL; @@ -2980,11 +2980,7 @@ PP(pp_substr) if (num_args > 2) { if (num_args > 3) { - if((repl_sv = POPs)) { - repl = SvPV_const(repl_sv, repl_len); - repl_is_utf8 = DO_UTF8(repl_sv) && repl_len; - } - else num_args--; + if(!(repl_sv = POPs)) num_args--; } if ((len_sv = POPs)) { len_iv = SvIV(len_sv); @@ -2996,16 +2992,23 @@ PP(pp_substr) pos1_iv = SvIV(pos_sv); pos1_is_uv = SvIOK_UV(pos_sv); sv = POPs; + if (PL_op->op_private & OPpSUBSTR_REPL_FIRST) { + assert(!repl_sv); + repl_sv = POPs; + } PUTBACK; if (repl_sv) { + repl = SvPV_const(repl_sv, repl_len); + repl_is_utf8 = DO_UTF8(repl_sv) && repl_len; if (repl_is_utf8) { if (!DO_UTF8(sv)) sv_utf8_upgrade(sv); } else if (DO_UTF8(sv)) repl_need_utf8_upgrade = TRUE; + lvalue = 0; } - if (lvalue && !repl) { + if (lvalue) { tmps = NULL; /* unused */ SvGETMAGIC(sv); if (SvOK(sv)) (void)SvPV_nomg_const(sv, curlen); @@ -3075,7 +3078,7 @@ PP(pp_substr) STRLEN byte_pos = utf8_curlen ? sv_pos_u2b_flags(sv, pos, &byte_len, SV_CONST_RETURN) : pos; - if (lvalue && !repl) { + if (lvalue) { SV * ret; ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0); @@ -3111,6 +3114,10 @@ PP(pp_substr) repl = SvPV_const(repl_sv_copy, repl_len); repl_is_utf8 = DO_UTF8(repl_sv_copy) && repl_len; } + if (SvROK(sv)) + Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), + "Attempt to use reference as lvalue in substr" + ); if (!SvOK(sv)) sv_setpvs(sv, ""); sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0); diff --git a/t/lib/warnings/9uninit b/t/lib/warnings/9uninit index b76c2ef370..0d2d84141b 100644 --- a/t/lib/warnings/9uninit +++ b/t/lib/warnings/9uninit @@ -1019,17 +1019,17 @@ Use of uninitialized value $m1 in substr at - line 5. Use of uninitialized value $m2 in substr at - line 6. Use of uninitialized value $g1 in substr at - line 6. Use of uninitialized value $m1 in substr at - line 6. -Use of uninitialized value $g2 in substr at - line 7. Use of uninitialized value $m2 in substr at - line 7. Use of uninitialized value $g1 in substr at - line 7. +Use of uninitialized value $g2 in substr at - line 7. Use of uninitialized value $m1 in substr at - line 7. Use of uninitialized value $g1 in substr at - line 8. -Use of uninitialized value in scalar assignment at - line 8. -Use of uninitialized value $m1 in scalar assignment at - line 8. +Use of uninitialized value $g2 in substr at - line 8. +Use of uninitialized value $m1 in substr at - line 8. Use of uninitialized value $m2 in substr at - line 9. Use of uninitialized value $g1 in substr at - line 9. -Use of uninitialized value in scalar assignment at - line 9. -Use of uninitialized value $m1 in scalar assignment at - line 9. +Use of uninitialized value $g2 in substr at - line 9. +Use of uninitialized value $m1 in substr at - line 9. Use of uninitialized value $m2 in vec at - line 11. Use of uninitialized value $g1 in vec at - line 11. Use of uninitialized value $m1 in vec at - line 11. -- cgit v1.2.1