diff options
author | Inaba Hiroto <inaba@st.rim.or.jp> | 2003-02-10 09:04:51 +0900 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2003-02-26 20:38:55 +0000 |
commit | 1bd51a4ce2ce8ac9d8d3503b73fce0e0fc8f8426 (patch) | |
tree | 13df2a5182a6b0a84b4a07e7b8305a34cea4e79d /pp_ctl.c | |
parent | 351208f1b69ff774788ddf0fb62571002594640e (diff) | |
download | perl-1bd51a4ce2ce8ac9d8d3503b73fce0e0fc8f8426.tar.gz |
Re: [PATCH] Re: format, PerlIO and utf8
Message-ID: <3E466E12.E5349D84@st.rim.or.jp>
p4raw-id: //depot/perl@18785
Diffstat (limited to 'pp_ctl.c')
-rw-r--r-- | pp_ctl.c | 66 |
1 files changed, 55 insertions, 11 deletions
@@ -355,7 +355,8 @@ PP(pp_formline) bool gotsome = FALSE; STRLEN len; STRLEN fudge = SvCUR(tmpForm) * (IN_BYTES ? 1 : 3) + 1; - bool item_is_utf = FALSE; + bool item_is_utf8 = FALSE; + bool targ_is_utf8 = FALSE; if (!SvMAGICAL(tmpForm) || !SvCOMPILED(tmpForm)) { if (SvREADONLY(tmpForm)) { @@ -366,8 +367,9 @@ PP(pp_formline) else doparseform(tmpForm); } - SvPV_force(PL_formtarget, len); + if (DO_UTF8(PL_formtarget)) + targ_is_utf8 = TRUE; t = SvGROW(PL_formtarget, len + fudge + 1); /* XXX SvCUR bad */ t += len; f = SvPV(tmpForm, len); @@ -414,6 +416,26 @@ PP(pp_formline) case FF_LITERAL: arg = *fpc++; + if (targ_is_utf8 && !SvUTF8(tmpForm)) { + while (arg--) { + if (!NATIVE_IS_INVARIANT(*f)) { + U8 ch = NATIVE_TO_ASCII(*f++); + *t++ = (U8)UTF8_EIGHT_BIT_HI(ch); + *t++ = (U8)UTF8_EIGHT_BIT_LO(ch); + } + else + *t++ = *f++; + } + break; + } + if (!targ_is_utf8 && DO_UTF8(tmpForm)) { + SvCUR_set(PL_formtarget, t - SvPVX(PL_formtarget)); + *t = '\0'; + sv_utf8_upgrade(PL_formtarget); + SvGROW(PL_formtarget, SvCUR(PL_formtarget) + fudge + 1); + t = SvEND(PL_formtarget); + targ_is_utf8 = TRUE; + } while (arg--) *t++ = *f++; break; @@ -458,13 +480,13 @@ PP(pp_formline) break; s++; } - item_is_utf = TRUE; + item_is_utf8 = TRUE; itemsize = s - item; sv_pos_b2u(sv, &itemsize); break; } } - item_is_utf = FALSE; + item_is_utf8 = FALSE; if (itemsize > fieldsize) itemsize = fieldsize; send = chophere = s + itemsize; @@ -519,11 +541,11 @@ PP(pp_formline) itemsize = chophere - item; sv_pos_b2u(sv, &itemsize); } - item_is_utf = TRUE; + item_is_utf8 = TRUE; break; } } - item_is_utf = FALSE; + item_is_utf8 = FALSE; if (itemsize <= fieldsize) { send = chophere = s + itemsize; while (s < send) { @@ -579,7 +601,15 @@ PP(pp_formline) case FF_ITEM: arg = itemsize; s = item; - if (item_is_utf) { + if (item_is_utf8) { + if (!targ_is_utf8) { + SvCUR_set(PL_formtarget, t - SvPVX(PL_formtarget)); + *t = '\0'; + sv_utf8_upgrade(PL_formtarget); + SvGROW(PL_formtarget, SvCUR(PL_formtarget) + fudge + 1); + t = SvEND(PL_formtarget); + targ_is_utf8 = TRUE; + } while (arg--) { if (UTF8_IS_CONTINUED(*s)) { STRLEN skip = UTF8SKIP(s); @@ -628,22 +658,32 @@ PP(pp_formline) case FF_LINEGLOB: item = s = SvPV(sv, len); itemsize = len; - item_is_utf = FALSE; /* XXX is this correct? */ + if ((item_is_utf8 = DO_UTF8(sv))) + itemsize = sv_len_utf8(sv); if (itemsize) { + bool chopped = FALSE; gotsome = TRUE; - send = s + itemsize; + send = s + len; while (s < send) { if (*s++ == '\n') { - if (s == send) + if (s == send) { itemsize--; + chopped = TRUE; + } else lines++; } } SvCUR_set(PL_formtarget, t - SvPVX(PL_formtarget)); - sv_catpvn(PL_formtarget, item, itemsize); + if (targ_is_utf8) + SvUTF8_on(PL_formtarget); + sv_catsv(PL_formtarget, sv); + if (chopped) + SvCUR_set(PL_formtarget, SvCUR(PL_formtarget) - 1); SvGROW(PL_formtarget, SvCUR(PL_formtarget) + fudge + 1); t = SvPVX(PL_formtarget) + SvCUR(PL_formtarget); + if (item_is_utf8) + targ_is_utf8 = TRUE; } break; @@ -739,6 +779,8 @@ PP(pp_formline) if (strnEQ(linemark, linemark - arg, arg)) DIE(aTHX_ "Runaway format"); } + if (targ_is_utf8) + SvUTF8_on(PL_formtarget); FmLINES(PL_formtarget) = lines; SP = ORIGMARK; RETURNOP(cLISTOP->op_first); @@ -778,6 +820,8 @@ PP(pp_formline) case FF_END: *t = '\0'; SvCUR_set(PL_formtarget, t - SvPVX(PL_formtarget)); + if (targ_is_utf8) + SvUTF8_on(PL_formtarget); FmLINES(PL_formtarget) += lines; SP = ORIGMARK; RETPUSHYES; |