summaryrefslogtreecommitdiff
path: root/pp.c
diff options
context:
space:
mode:
authorFather Chrysostomos <sprout@cpan.org>2014-01-01 05:51:36 -0800
committerFather Chrysostomos <sprout@cpan.org>2014-01-01 08:22:41 -0800
commit5cd5e2d6301836ca9b0f94e9a100e697bd374cd8 (patch)
treecd0d8c2712c4903c31a47bf87f896d5b60fe72fc /pp.c
parent4b3427080c3b11e528bd84c7509af7fc8a60dcb1 (diff)
downloadperl-5cd5e2d6301836ca9b0f94e9a100e697bd374cd8.tar.gz
Reënable in-place lc/uc
It used to be that this code: for("$foo") { lc $_; ... } would modify $_, allowing other code in the ‘for’ block to see the changes (bug #43207). Commit 17fa077605 fixed that by changing the logic that determined whether lc/uc(first) could modify the sca- lar in place. In doing so, it stopped in-place modification from happening at all, because the condition became SvPADTMP && SvTEMP, which never happens. (SvPADTMP unually indicates an operator return value stored in a pad; i.e., a scalar that will next be used by the same operator again to return another value. SvTEMP indicates that the REFCNT will go down shortly, usually a temporary value created solely for the sake of returning something.) Now that bug #78194 is fixed, for("$foo") no longer exposes a PADTMP to the following code, so we *can* now assume (as was done erroneously before) that PADTMP indicates something like lc("$foo$bar") and modify pp_stringify’s return value in place. Also, we can extend this to apply to TEMP variables that have a ref- erence count of 1, since they cannot be in use elsewhere. We skip TEMP variables with set-magic, because they could be tied, and SvSETMAGIC would have a side effect. (That could happen with lc(delete $h{tied_elem}).) Previously, this was skipped for uc and lc for overloaded references, since stringification could change the utf8ness. That is no longer sufficient. As of Perl 5.16, typeglobs and non-overloaded blessed references can also enable their utf8 flag upon stringification, if the stash or glob names contains wide characters. So I changed the !SvAMAGIC (not overloaded) to SvPOK (is a string already), which will cover most cases where this optimisation helps. The two tests added to the end of lc.t fail with !SvAMAGIC.
Diffstat (limited to 'pp.c')
-rw-r--r--pp.c20
1 files changed, 15 insertions, 5 deletions
diff --git a/pp.c b/pp.c
index 4175808db8..cbe2df33d5 100644
--- a/pp.c
+++ b/pp.c
@@ -3493,7 +3493,10 @@ PP(pp_ucfirst)
/* We may be able to get away with changing only the first character, in
* place, but not if read-only, etc. Later we may discover more reasons to
* not convert in-place. */
- inplace = SvPADTMP(source) && !SvREADONLY(source) && SvTEMP(source);
+ inplace = !SvREADONLY(source)
+ && ( SvPADTMP(source)
+ || ( SvTEMP(source) && !SvSMAGICAL(source)
+ && SvREFCNT(source) == 1));
/* First calculate what the changed first character should be. This affects
* whether we can just swap it out, leaving the rest of the string unchanged,
@@ -3706,8 +3709,11 @@ PP(pp_uc)
SvGETMAGIC(source);
- if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
- && SvTEMP(source) && !DO_UTF8(source)
+ if ((SvPADTMP(source)
+ ||
+ (SvTEMP(source) && !SvSMAGICAL(source) && SvREFCNT(source) == 1))
+ && !SvREADONLY(source) && SvPOK(source)
+ && !DO_UTF8(source)
&& (IN_LOCALE_RUNTIME || ! IN_UNI_8_BIT)) {
/* We can convert in place. The reason we can't if in UNI_8_BIT is to
@@ -3952,8 +3958,12 @@ PP(pp_lc)
SvGETMAGIC(source);
- if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source)
- && SvTEMP(source) && !DO_UTF8(source)) {
+ if ( ( SvPADTMP(source)
+ || ( SvTEMP(source) && !SvSMAGICAL(source)
+ && SvREFCNT(source) == 1 )
+ )
+ && !SvREADONLY(source) && SvPOK(source)
+ && !DO_UTF8(source)) {
/* We can convert in place, as lowercasing anything in the latin1 range
* (or else DO_UTF8 would have been on) doesn't lengthen it */