summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2003-03-02 07:21:36 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2003-03-02 07:21:36 +0000
commit78da4d13e9515c4d820a5423a160a2c81889d633 (patch)
tree57f8664b2617e20db45689b9ba3a801f9357ea4c
parent2bfba5f0e6f81fa3f5e291fd35e61c9d1112415c (diff)
downloadperl-78da4d13e9515c4d820a5423a160a2c81889d633.tar.gz
Further Unicode formats patching from Inaba Hiroto.
p4raw-id: //depot/perl@18794
-rw-r--r--MANIFEST1
-rw-r--r--pp_ctl.c29
-rw-r--r--t/uni/write.t96
3 files changed, 117 insertions, 9 deletions
diff --git a/MANIFEST b/MANIFEST
index 341365f692..6d583a2226 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -2722,6 +2722,7 @@ t/uni/tr_eucjp.t See if Unicode tr/// works
t/uni/tr_sjis.t See if Unicode tr/// works
t/uni/tr_utf8.t See if Unicode tr/// works
t/uni/upper.t See if Unicode casing works
+t/uni/write.t See if Unicode formats work
t/win32/longpath.t Test if Win32::GetLongPathName() works
t/win32/system.t See if system works in Win*
t/win32/system_tests Test runner for system.t
diff --git a/pp_ctl.c b/pp_ctl.c
index e22297eeb8..763da0662a 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -357,6 +357,7 @@ PP(pp_formline)
STRLEN fudge = SvCUR(tmpForm) * (IN_BYTES ? 1 : 3) + 1;
bool item_is_utf8 = FALSE;
bool targ_is_utf8 = FALSE;
+ SV * nsv = Nullsv;
if (!SvMAGICAL(tmpForm) || !SvCOMPILED(tmpForm)) {
if (SvREADONLY(tmpForm)) {
@@ -417,15 +418,10 @@ PP(pp_formline)
case FF_LITERAL:
arg = *fpc++;
if (targ_is_utf8 && !SvUTF8(tmpForm)) {
- while (arg--) {
- if (!NATIVE_IS_INVARIANT(*f)) {
- U8 ch = NATIVE_TO_ASCII(*f++);
- *t++ = (U8)UTF8_EIGHT_BIT_HI(ch);
- *t++ = (U8)UTF8_EIGHT_BIT_LO(ch);
- }
- else
- *t++ = *f++;
- }
+ SvCUR_set(PL_formtarget, t - SvPVX(PL_formtarget));
+ *t = '\0';
+ sv_catpvn_utf8_upgrade(PL_formtarget, f, arg, nsv);
+ t = SvEND(PL_formtarget);
break;
}
if (!targ_is_utf8 && DO_UTF8(tmpForm)) {
@@ -635,6 +631,21 @@ PP(pp_formline)
}
break;
}
+ if (targ_is_utf8 && !item_is_utf8) {
+ SvCUR_set(PL_formtarget, t - SvPVX(PL_formtarget));
+ *t = '\0';
+ sv_catpvn_utf8_upgrade(PL_formtarget, s, arg, nsv);
+ for (; t < SvEND(PL_formtarget); t++) {
+#ifdef EBCDIC
+ int ch = *t++ = *s++;
+ if (iscntrl(ch))
+#else
+ if (!(*t & ~31))
+#endif
+ *t = ' ';
+ }
+ break;
+ }
while (arg--) {
#ifdef EBCDIC
int ch = *t++ = *s++;
diff --git a/t/uni/write.t b/t/uni/write.t
new file mode 100644
index 0000000000..95c3bbb36a
--- /dev/null
+++ b/t/uni/write.t
@@ -0,0 +1,96 @@
+#!./perl -w
+use strict;
+
+BEGIN {
+ chdir 't' if -d 't';
+ @INC = qw(../lib .);
+ require "test.pl";
+}
+
+plan tests => 6;
+
+# Some tests for UTF8 and format/write
+
+our ($bitem1, $uitem1) = ("\x{ff}", "\x{100}");
+our ($bitem2, $uitem2) = ("\x{fe}", "\x{101}");
+our ($blite1, $ulite1) = ("\x{fd}", "\x{102}");
+our ($blite2, $ulite2) = ("\x{fc}", "\x{103}");
+our ($bmulti, $umulti) = ("\x{fb}\n\x{fa}\n\x{f9}\n",
+ "\x{104}\n\x{105}\n\x{106}\n");
+
+sub fmwrtest {
+ no strict 'refs';
+ my ($out, $format, $expect, $name) = @_;
+ eval "format $out =\n$format.\n"; die $@ if $@;
+ open $out, '>:utf8', 'Uni_write.tmp' or die "Can't create Uni_write.tmp";
+ write $out;
+ close $out or die "Could not close $out: $!";
+
+ open UIN, '<:utf8', 'Uni_write.tmp' or die "Can't open Uni_write.tmp";;
+ my $result = do { local $/; <UIN>; };
+ close UIN;
+
+ is($result, $expect, $name);
+}
+
+fmwrtest OUT1 => <<EOFORMAT, <<EOEXPECT, "non-UTF8 literal / UTF8 item (1)";
+$blite1 @<<
+\$uitem1
+$blite2 @<<
+\$bitem2
+EOFORMAT
+$blite1 $uitem1
+$blite2 $bitem2
+EOEXPECT
+
+fmwrtest OUT2 => <<EOFORMAT, <<EOEXPECT, "non-UTF8 literal / UTF8 item (2)";
+$blite1 @<<
+\$bitem1
+$blite2 @<<
+\$uitem2
+EOFORMAT
+$blite1 $bitem1
+$blite2 $uitem2
+EOEXPECT
+
+fmwrtest OUT3 => <<EOFORMAT, <<EOEXPECT, "UTF8 literal / non-UTF8 item (1)";
+$ulite1 @<<
+\$bitem1
+$blite2 @<<
+\$bitem2
+EOFORMAT
+$ulite1 $bitem1
+$blite2 $bitem2
+EOEXPECT
+
+fmwrtest OUT4 => <<EOFORMAT, <<EOEXPECT, "UTF8 literal / non-UTF8 item (2)";
+$blite1 @<<
+\$bitem1
+$ulite2 @<<
+\$bitem2
+EOFORMAT
+$blite1 $bitem1
+$ulite2 $bitem2
+EOEXPECT
+
+fmwrtest OUT5 => <<EOFORMAT, <<EOEXPECT, "non-UTF8 literal / UTF8 multiline";
+$blite1
+@*
+\$umulti
+$blite2
+EOFORMAT
+$blite1
+$umulti$blite2
+EOEXPECT
+
+fmwrtest OUT6 => <<EOFORMAT, <<EOEXPECT, "UTF8 literal / non-UTF8 multiline";
+$ulite1
+@*
+\$bmulti
+$blite2
+EOFORMAT
+$ulite1
+$bmulti$blite2
+EOEXPECT
+
+unlink 'Uni_write.tmp';