summaryrefslogtreecommitdiff
path: root/pp_ctl.c
diff options
context:
space:
mode:
authorInaba Hiroto <inaba@st.rim.or.jp>2003-02-10 09:04:51 +0900
committerJarkko Hietaniemi <jhi@iki.fi>2003-02-26 20:38:55 +0000
commit1bd51a4ce2ce8ac9d8d3503b73fce0e0fc8f8426 (patch)
tree13df2a5182a6b0a84b4a07e7b8305a34cea4e79d /pp_ctl.c
parent351208f1b69ff774788ddf0fb62571002594640e (diff)
downloadperl-1bd51a4ce2ce8ac9d8d3503b73fce0e0fc8f8426.tar.gz
Re: [PATCH] Re: format, PerlIO and utf8
Message-ID: <3E466E12.E5349D84@st.rim.or.jp> p4raw-id: //depot/perl@18785
Diffstat (limited to 'pp_ctl.c')
-rw-r--r--pp_ctl.c66
1 files changed, 55 insertions, 11 deletions
diff --git a/pp_ctl.c b/pp_ctl.c
index 15281689fb..e22297eeb8 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -355,7 +355,8 @@ PP(pp_formline)
bool gotsome = FALSE;
STRLEN len;
STRLEN fudge = SvCUR(tmpForm) * (IN_BYTES ? 1 : 3) + 1;
- bool item_is_utf = FALSE;
+ bool item_is_utf8 = FALSE;
+ bool targ_is_utf8 = FALSE;
if (!SvMAGICAL(tmpForm) || !SvCOMPILED(tmpForm)) {
if (SvREADONLY(tmpForm)) {
@@ -366,8 +367,9 @@ PP(pp_formline)
else
doparseform(tmpForm);
}
-
SvPV_force(PL_formtarget, len);
+ if (DO_UTF8(PL_formtarget))
+ targ_is_utf8 = TRUE;
t = SvGROW(PL_formtarget, len + fudge + 1); /* XXX SvCUR bad */
t += len;
f = SvPV(tmpForm, len);
@@ -414,6 +416,26 @@ PP(pp_formline)
case FF_LITERAL:
arg = *fpc++;
+ if (targ_is_utf8 && !SvUTF8(tmpForm)) {
+ while (arg--) {
+ if (!NATIVE_IS_INVARIANT(*f)) {
+ U8 ch = NATIVE_TO_ASCII(*f++);
+ *t++ = (U8)UTF8_EIGHT_BIT_HI(ch);
+ *t++ = (U8)UTF8_EIGHT_BIT_LO(ch);
+ }
+ else
+ *t++ = *f++;
+ }
+ break;
+ }
+ if (!targ_is_utf8 && DO_UTF8(tmpForm)) {
+ SvCUR_set(PL_formtarget, t - SvPVX(PL_formtarget));
+ *t = '\0';
+ sv_utf8_upgrade(PL_formtarget);
+ SvGROW(PL_formtarget, SvCUR(PL_formtarget) + fudge + 1);
+ t = SvEND(PL_formtarget);
+ targ_is_utf8 = TRUE;
+ }
while (arg--)
*t++ = *f++;
break;
@@ -458,13 +480,13 @@ PP(pp_formline)
break;
s++;
}
- item_is_utf = TRUE;
+ item_is_utf8 = TRUE;
itemsize = s - item;
sv_pos_b2u(sv, &itemsize);
break;
}
}
- item_is_utf = FALSE;
+ item_is_utf8 = FALSE;
if (itemsize > fieldsize)
itemsize = fieldsize;
send = chophere = s + itemsize;
@@ -519,11 +541,11 @@ PP(pp_formline)
itemsize = chophere - item;
sv_pos_b2u(sv, &itemsize);
}
- item_is_utf = TRUE;
+ item_is_utf8 = TRUE;
break;
}
}
- item_is_utf = FALSE;
+ item_is_utf8 = FALSE;
if (itemsize <= fieldsize) {
send = chophere = s + itemsize;
while (s < send) {
@@ -579,7 +601,15 @@ PP(pp_formline)
case FF_ITEM:
arg = itemsize;
s = item;
- if (item_is_utf) {
+ if (item_is_utf8) {
+ if (!targ_is_utf8) {
+ SvCUR_set(PL_formtarget, t - SvPVX(PL_formtarget));
+ *t = '\0';
+ sv_utf8_upgrade(PL_formtarget);
+ SvGROW(PL_formtarget, SvCUR(PL_formtarget) + fudge + 1);
+ t = SvEND(PL_formtarget);
+ targ_is_utf8 = TRUE;
+ }
while (arg--) {
if (UTF8_IS_CONTINUED(*s)) {
STRLEN skip = UTF8SKIP(s);
@@ -628,22 +658,32 @@ PP(pp_formline)
case FF_LINEGLOB:
item = s = SvPV(sv, len);
itemsize = len;
- item_is_utf = FALSE; /* XXX is this correct? */
+ if ((item_is_utf8 = DO_UTF8(sv)))
+ itemsize = sv_len_utf8(sv);
if (itemsize) {
+ bool chopped = FALSE;
gotsome = TRUE;
- send = s + itemsize;
+ send = s + len;
while (s < send) {
if (*s++ == '\n') {
- if (s == send)
+ if (s == send) {
itemsize--;
+ chopped = TRUE;
+ }
else
lines++;
}
}
SvCUR_set(PL_formtarget, t - SvPVX(PL_formtarget));
- sv_catpvn(PL_formtarget, item, itemsize);
+ if (targ_is_utf8)
+ SvUTF8_on(PL_formtarget);
+ sv_catsv(PL_formtarget, sv);
+ if (chopped)
+ SvCUR_set(PL_formtarget, SvCUR(PL_formtarget) - 1);
SvGROW(PL_formtarget, SvCUR(PL_formtarget) + fudge + 1);
t = SvPVX(PL_formtarget) + SvCUR(PL_formtarget);
+ if (item_is_utf8)
+ targ_is_utf8 = TRUE;
}
break;
@@ -739,6 +779,8 @@ PP(pp_formline)
if (strnEQ(linemark, linemark - arg, arg))
DIE(aTHX_ "Runaway format");
}
+ if (targ_is_utf8)
+ SvUTF8_on(PL_formtarget);
FmLINES(PL_formtarget) = lines;
SP = ORIGMARK;
RETURNOP(cLISTOP->op_first);
@@ -778,6 +820,8 @@ PP(pp_formline)
case FF_END:
*t = '\0';
SvCUR_set(PL_formtarget, t - SvPVX(PL_formtarget));
+ if (targ_is_utf8)
+ SvUTF8_on(PL_formtarget);
FmLINES(PL_formtarget) += lines;
SP = ORIGMARK;
RETPUSHYES;