diff options
author | Nicholas Clark <nick@ccl4.org> | 2006-04-29 14:05:55 +0000 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2006-04-29 14:05:55 +0000 |
commit | 673061948a634568cc156286d219e7f736c6048a (patch) | |
tree | c6ab92b0e28e52c35da0991f7ae94be9074850ba /pp.c | |
parent | ec9af7d430b6660eff7240fa20757fa5feb233a8 (diff) | |
download | perl-673061948a634568cc156286d219e7f736c6048a.tar.gz |
uc plus an 8 bit locale could get confused by UTF-8 values returned by
overloaded stringification.
p4raw-id: //depot/perl@28012
Diffstat (limited to 'pp.c')
-rw-r--r-- | pp.c | 144 |
1 files changed, 80 insertions, 64 deletions
@@ -3479,90 +3479,106 @@ PP(pp_ucfirst) RETURN; } +/* There's so much setup/teardown code common between uc and lc, I wonder if + it would be worth merging the two, and just having a switch outside each + of the three tight loops. */ PP(pp_uc) { dVAR; dSP; - SV *sv = TOPs; + SV *source = TOPs; STRLEN len; + STRLEN min; + SV *dest; + const U8 *s; + U8 *d; - SvGETMAGIC(sv); - if (DO_UTF8(sv)) { + SvGETMAGIC(source); + + if (SvPADTMP(source) && !SvREADONLY(source) && !SvAMAGIC(source) + && !DO_UTF8(source)) { + /* We can convert in place. */ + + dest = source; + s = d = (U8*)SvPV_force_nomg(source, len); + min = len + 1; + } else { dTARGET; - STRLEN ulen; - register U8 *d; - const U8 *s; - const U8 *send; - U8 tmpbuf[UTF8_MAXBYTES+1]; - s = (const U8*)SvPV_nomg_const(sv,len); - if (!len) { - SvUTF8_off(TARG); /* decontaminate */ - sv_setpvn(TARG, "", 0); - sv = TARG; - SETs(sv); - } - else { - STRLEN min = len + 1; + dest = TARG; - SvUPGRADE(TARG, SVt_PV); - SvGROW(TARG, min); - (void)SvPOK_only(TARG); - d = (U8*)SvPVX(TARG); - send = s + len; - while (s < send) { - STRLEN u = UTF8SKIP(s); - - toUPPER_utf8(s, tmpbuf, &ulen); - if (ulen > u && (SvLEN(TARG) < (min += ulen - u))) { - /* If the eventually required minimum size outgrows - * the available space, we need to grow. */ - const UV o = d - (U8*)SvPVX_const(TARG); - - /* If someone uppercases one million U+03B0s we - * SvGROW() one million times. Or we could try - * guessing how much to allocate without allocating - * too much. Such is life. */ - SvGROW(TARG, min); - d = (U8*)SvPVX(TARG) + o; - } - Copy(tmpbuf, d, ulen, U8); - d += ulen; - s += u; - } - *d = '\0'; - SvUTF8_on(TARG); - SvCUR_set(TARG, d - (U8*)SvPVX_const(TARG)); - sv = TARG; - SETs(sv); + /* The old implementation would copy source into TARG at this point. + This had the side effect that if source was undef, TARG was now + an undefined SV with PADTMP set, and they don't warn inside + sv_2pv_flags(). However, we're now getting the PV direct from + source, which doesn't have PADTMP set, so it would warn. Hence the + little games. */ + + if (SvOK(source)) { + s = (const U8*)SvPV_nomg_const(source, len); + } else { + s = ""; + len = 0; } + min = len + 1; + + SvUPGRADE(dest, SVt_PV); + d = SvGROW(dest, min); + (void)SvPOK_only(dest); + + SETs(dest); } - else { - U8 *s; - if (!SvPADTMP(sv) || SvREADONLY(sv)) { - dTARGET; - SvUTF8_off(TARG); /* decontaminate */ - sv_setsv_nomg(TARG, sv); - sv = TARG; - SETs(sv); + + /* Overloaded values may have toggled the UTF-8 flag on source, so we need + to check DO_UTF8 again here. */ + + if (DO_UTF8(source)) { + const U8 *const send = s + len; + U8 tmpbuf[UTF8_MAXBYTES+1]; + + while (s < send) { + const STRLEN u = UTF8SKIP(s); + STRLEN ulen; + + toUPPER_utf8(s, tmpbuf, &ulen); + if (ulen > u && (SvLEN(dest) < (min += ulen - u))) { + /* If the eventually required minimum size outgrows + * the available space, we need to grow. */ + const UV o = d - (U8*)SvPVX_const(dest); + + /* If someone uppercases one million U+03B0s we SvGROW() one + * million times. Or we could try guessing how much to + allocate without allocating too much. Such is life. */ + SvGROW(dest, min); + d = (U8*)SvPVX(dest) + o; + } + Copy(tmpbuf, d, ulen, U8); + d += ulen; + s += u; } - s = (U8*)SvPV_force_nomg(sv, len); + SvUTF8_on(dest); + *d = '\0'; + SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); + } else { if (len) { - register const U8 *send = s + len; - + const U8 *const send = s + len; if (IN_LOCALE_RUNTIME) { TAINT; - SvTAINTED_on(sv); - for (; s < send; s++) - *s = toUPPER_LC(*s); + SvTAINTED_on(dest); + for (; s < send; d++, s++) + *d = toUPPER_LC(*s); } else { - for (; s < send; s++) - *s = toUPPER(*s); + for (; s < send; d++, s++) + *d = toUPPER(*s); } } + if (source != dest) { + *d = '\0'; + SvCUR_set(dest, d - (U8*)SvPVX_const(dest)); + } } - SvSETMAGIC(sv); + SvSETMAGIC(dest); RETURN; } |