summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2002-01-31 15:26:41 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2002-01-31 15:26:41 +0000
commit62961d2e50d22e7ae5a679eac7bf6d593193e108 (patch)
treef95115fdf1647a324092e9463ab667a475f4036d
parent725d232a70026302b7898692e2e17c43b9ad4e76 (diff)
downloadperl-62961d2e50d22e7ae5a679eac7bf6d593193e108.tar.gz
Turn the I/O Unicode error by default on, but the
character-generating Unicode error by default off, as Larry suggested. p4raw-id: //depot/perl@14505
-rw-r--r--doio.c2
-rw-r--r--pod/perldiag.pod5
-rw-r--r--pp.c3
-rwxr-xr-xt/io/utf8.t51
-rw-r--r--t/lib/warnings/utf891
-rw-r--r--utf8.c2
6 files changed, 137 insertions, 17 deletions
diff --git a/doio.c b/doio.c
index ab74d4a21c..3c06585356 100644
--- a/doio.c
+++ b/doio.c
@@ -1223,7 +1223,7 @@ Perl_do_print(pTHX_ register SV *sv, PerlIO *fp)
}
else if (DO_UTF8(sv)) {
if (!sv_utf8_downgrade((sv = sv_mortalcopy(sv)), TRUE)
- && ckWARN(WARN_UTF8))
+ && ckWARN_d(WARN_UTF8))
{
Perl_warner(aTHX_ WARN_UTF8, "Wide character in print");
}
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 2a3f5d097a..56b6950e9f 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -4184,7 +4184,10 @@ So put in parentheses to say what you really mean.
=item Wide character in %s
-(W utf8) Perl met a wide character (>255) when it wasn't expecting one.
+(W utf8) Perl met a wide character (>255) when it wasn't expecting
+one. This warning is by default on for I/O (like print) but can be
+turned off by C<no warnings 'utf8';>. You are supposed to explicitly
+mark the filehandle with an encoding, see L<open> and L<perlfunc/binmode>.
=item write() on closed filehandle %s
diff --git a/pp.c b/pp.c
index 2d462c45a3..51facc0730 100644
--- a/pp.c
+++ b/pp.c
@@ -3168,8 +3168,7 @@ PP(pp_chr)
if (value > 255 && !IN_BYTES) {
SvGROW(TARG, UNISKIP(value)+1);
- tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value,
- UNICODE_ALLOW_SUPER);
+ tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
SvCUR_set(TARG, tmps - SvPVX(TARG));
*tmps = '\0';
(void)SvPOK_only(TARG);
diff --git a/t/io/utf8.t b/t/io/utf8.t
index e8caf722f2..d0fe0f1462 100755
--- a/t/io/utf8.t
+++ b/t/io/utf8.t
@@ -12,7 +12,7 @@ BEGIN {
no utf8; # needed for use utf8 not griping about the raw octets
$| = 1;
-print "1..26\n";
+print "1..31\n";
open(F,"+>:utf8",'a');
print F chr(0x100).'£';
@@ -186,7 +186,7 @@ if (ord('A') == 193) {
close F;
unlink('a');
-open F, ">a";
+open F, ">:utf8", "a";
@a = map { chr(1 << ($_ << 2)) } 0..5; # 0x1, 0x10, .., 0x100000
unshift @a, chr(0); # ... and a null byte in front just for fun
print F @a;
@@ -216,6 +216,52 @@ for (@a) {
close F;
print "ok 26\n";
+{
+ # Check that warnings are on on I/O, and that they can be muffled.
+
+ local $SIG{__WARN__} = sub { $@ = shift };
+
+ undef $@;
+ open F, ">a";
+ print F chr(0x100);
+ close(F);
+
+ print $@ =~ /Wide character in print/ ? "ok 27\n" : "not ok 27\n";
+
+ undef $@;
+ open F, ">:utf8", ">a";
+ print F chr(0x100);
+ close(F);
+
+ print defined $@ ? "not ok 28\n" : "ok 28\n";
+
+ undef $@;
+ open F, ">a";
+ binmode(F, ":utf8");
+ print F chr(0x100);
+ close(F);
+
+ print defined $@ ? "not ok 29\n" : "ok 29\n";
+
+ no warnings 'utf8';
+
+ undef $@;
+ open F, ">a";
+ print F chr(0x100);
+ close(F);
+
+ print defined $@ ? "not ok 30\n" : "ok 30\n";
+
+ use warnings 'utf8';
+
+ undef $@;
+ open F, ">a";
+ print F chr(0x100);
+ close(F);
+
+ print $@ =~ /Wide character in print/ ? "ok 31\n" : "not ok 31\n";
+}
+
# sysread() and syswrite() tested in lib/open.t since Fnctl is used
END {
@@ -223,4 +269,3 @@ END {
1 while unlink "b";
}
-
diff --git a/t/lib/warnings/utf8 b/t/lib/warnings/utf8
index d2ac06f0fa..747436ab27 100644
--- a/t/lib/warnings/utf8
+++ b/t/lib/warnings/utf8
@@ -34,15 +34,88 @@ Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately af
Malformed UTF-8 character (unexpected non-continuation byte 0x73, immediately after start byte 0xf8) at - line 14.
########
use warnings 'utf8';
-my $surr = chr(0xD800);
-my $fff3 = chr(0xFFFE);
-my $ffff = chr(0xFFFF);
+my $d7ff = chr(0xD7FF);
+my $d800 = chr(0xD800);
+my $dfff = chr(0xDFFF);
+my $e000 = chr(0xE000);
+my $fffd = chr(0xFFFD);
+my $fffe = chr(0xFFFE);
+my $ffff = chr(0xFFFF);
+my $hex4 = chr(0x10000);
+my $hex5 = chr(0x100000);
+my $max = chr(0x10FFFF);
no warnings 'utf8';
-$surr = chr(0xD800);
-$fffe = chr(0xFFFE);
-$ffff = chr(0xFFFF);
+my $d7ff = chr(0xD7FF);
+my $d800 = chr(0xD800);
+my $dfff = chr(0xDFFF);
+my $e000 = chr(0xE000);
+my $fffd = chr(0xFFFD);
+my $fffe = chr(0xFFFE);
+my $ffff = chr(0xFFFF);
+my $hex4 = chr(0x10000);
+my $hex5 = chr(0x100000);
+my $max = chr(0x10FFFF);
EXPECT
-UTF-16 surrogate 0xd800 at - line 2.
-Unicode character 0xfffe is illegal at - line 3.
-Unicode character 0xffff is illegal at - line 4.
+UTF-16 surrogate 0xd800 at - line 3.
+UTF-16 surrogate 0xdfff at - line 4.
+Unicode character 0xfffe is illegal at - line 7.
+Unicode character 0xffff is illegal at - line 8.
+Unicode character 0x10ffff is illegal at - line 11.
########
+use warnings 'utf8';
+my $d7ff = pack("U", 0xD7FF);
+my $d800 = pack("U", 0xD800);
+my $dfff = pack("U", 0xDFFF);
+my $e000 = pack("U", 0xE000);
+my $fffd = pack("U", 0xFFFD);
+my $fffe = pack("U", 0xFFFE);
+my $ffff = pack("U", 0xFFFF);
+my $hex4 = pack("U", 0x10000);
+my $hex5 = pack("U", 0x100000);
+my $max = pack("U", 0x10FFFF);
+no warnings 'utf8';
+my $d7ff = pack("U", 0xD7FF);
+my $d800 = pack("U", 0xD800);
+my $dfff = pack("U", 0xDFFF);
+my $e000 = pack("U", 0xE000);
+my $fffd = pack("U", 0xFFFD);
+my $fffe = pack("U", 0xFFFE);
+my $ffff = pack("U", 0xFFFF);
+my $hex4 = pack("U", 0x10000);
+my $hex5 = pack("U", 0x100000);
+my $max = pack("U", 0x10FFFF);
+EXPECT
+UTF-16 surrogate 0xd800 at - line 3.
+UTF-16 surrogate 0xdfff at - line 4.
+Unicode character 0xfffe is illegal at - line 7.
+Unicode character 0xffff is illegal at - line 8.
+Unicode character 0x10ffff is illegal at - line 11.
+########
+use warnings 'utf8';
+my $d7ff = "\x{D7FF}";
+my $d800 = "\x{D800}";
+my $dfff = "\x{DFFF}";
+my $e000 = "\x{E000}";
+my $fffd = "\x{FFFD}";
+my $fffe = "\x{FFFE}";
+my $ffff = "\x{FFFF}";
+my $hex4 = "\x{10000}";
+my $hex5 = "\x{100000}";
+my $max = "\x{10FFFF}";
+no warnings 'utf8';
+my $d7ff = "\x{D7FF}";
+my $d800 = "\x{D800}";
+my $dfff = "\x{DFFF}";
+my $e000 = "\x{E000}";
+my $fffd = "\x{FFFD}";
+my $fffe = "\x{FFFE}";
+my $ffff = "\x{FFFF}";
+my $hex4 = "\x{10000}";
+my $hex5 = "\x{100000}";
+my $max = "\x{10FFFF}";
+EXPECT
+UTF-16 surrogate 0xd800 at - line 3.
+UTF-16 surrogate 0xdfff at - line 4.
+Unicode character 0xfffe is illegal at - line 7.
+Unicode character 0xffff is illegal at - line 8.
+Unicode character 0x10ffff is illegal at - line 11.
diff --git a/utf8.c b/utf8.c
index 5d124ba5eb..cf3f48dde3 100644
--- a/utf8.c
+++ b/utf8.c
@@ -54,7 +54,7 @@ is the recommended Unicode-aware way of saying
U8 *
Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
{
- if (ckWARN_d(WARN_UTF8)) {
+ if (ckWARN(WARN_UTF8)) {
if (UNICODE_IS_SURROGATE(uv) &&
!(flags & UNICODE_ALLOW_SURROGATE))
Perl_warner(aTHX_ WARN_UTF8, "UTF-16 surrogate 0x%04"UVxf, uv);