strange encodings upsets pp_chr

Message-Id: <20060514195532.5422.BQW10602@nifty.com> p4raw-id: //depot/perl@28193
author: SADAHIRO Tomoyuki <BQW10602@nifty.com> 2006-05-15 04:57:28 +0900
committer: Rafael Garcia-Suarez <rgarciasuarez@gmail.com> 2006-05-15 08:33:53 +0000
commit: 4c5ed6e2fe45844ca952edb0ad5be618e204247b (patch)
tree: 13f79404b1e916a7fc9aed964369c986ece9d08d /t
parent: 9c8d215a6f7e1098759dbf707ed95f4895bc227c (diff)
download: perl-4c5ed6e2fe45844ca952edb0ad5be618e204247b.tar.gz
3 files changed, 100 insertions, 24 deletions
diff --git a/t/op/chr.t b/t/op/chr.t
index e63c3b56ad..056f11ab92 100644
--- a/t/op/chr.t
+++ b/t/op/chr.t
@@ -31,7 +31,9 @@ is(chr(-3.0), "\x{FFFD}");
     is(chr(-3.0), "\xFD");
 }
 
-# Check UTF-8.
+# Check UTF-8 (not UTF-EBCDIC).
+SKIP: {
+    skip "no UTF-8 on EBCDIC", 21 if chr(193) eq 'A';
 
 sub hexes {
     no warnings 'utf8'; # avoid surrogate and beyond Unicode warnings
@@ -39,25 +41,25 @@ sub hexes {
 }
 
 # The following code points are some interesting steps in UTF-8.
-is(hexes(   0x100), "c4 80");
-is(hexes(   0x7FF), "df bf");
-is(hexes(   0x800), "e0 a0 80");
-is(hexes(   0xFFF), "e0 bf bf");
-is(hexes(  0x1000), "e1 80 80");
-is(hexes(  0xCFFF), "ec bf bf");
-is(hexes(  0xD000), "ed 80 80");
-is(hexes(  0xD7FF), "ed 9f bf");
-is(hexes(  0xD800), "ed a0 80"); # not strict utf-8 (surrogate area begin)
-is(hexes(  0xDFFF), "ed bf bf"); # not strict utf-8 (surrogate area end)
-is(hexes(  0xE000), "ee 80 80");
-is(hexes(  0xFFFF), "ef bf bf");
-is(hexes( 0x10000), "f0 90 80 80");
-is(hexes( 0x3FFFF), "f0 bf bf bf");
-is(hexes( 0x40000), "f1 80 80 80");
-is(hexes( 0xFFFFF), "f3 bf bf bf");
-is(hexes(0x100000), "f4 80 80 80");
-is(hexes(0x10FFFF), "f4 8f bf bf"); # Unicode (4.1) last code point
-is(hexes(0x110000), "f4 90 80 80");
-is(hexes(0x1FFFFF), "f7 bf bf bf"); # last four byte encoding
-is(hexes(0x200000), "f8 88 80 80 80");
-
+    is(hexes(   0x100), "c4 80");
+    is(hexes(   0x7FF), "df bf");
+    is(hexes(   0x800), "e0 a0 80");
+    is(hexes(   0xFFF), "e0 bf bf");
+    is(hexes(  0x1000), "e1 80 80");
+    is(hexes(  0xCFFF), "ec bf bf");
+    is(hexes(  0xD000), "ed 80 80");
+    is(hexes(  0xD7FF), "ed 9f bf");
+    is(hexes(  0xD800), "ed a0 80"); # not strict utf-8 (surrogate area begin)
+    is(hexes(  0xDFFF), "ed bf bf"); # not strict utf-8 (surrogate area end)
+    is(hexes(  0xE000), "ee 80 80");
+    is(hexes(  0xFFFF), "ef bf bf");
+    is(hexes( 0x10000), "f0 90 80 80");
+    is(hexes( 0x3FFFF), "f0 bf bf bf");
+    is(hexes( 0x40000), "f1 80 80 80");
+    is(hexes( 0xFFFFF), "f3 bf bf bf");
+    is(hexes(0x100000), "f4 80 80 80");
+    is(hexes(0x10FFFF), "f4 8f bf bf"); # Unicode (4.1) last code point
+    is(hexes(0x110000), "f4 90 80 80");
+    is(hexes(0x1FFFFF), "f7 bf bf bf"); # last four byte encoding
+    is(hexes(0x200000), "f8 88 80 80 80");
+}
diff --git a/t/op/ord.t b/t/op/ord.t
index 455666417f..1c82262b05 100755
--- a/t/op/ord.t
+++ b/t/op/ord.t
@@ -6,7 +6,7 @@ BEGIN {
     require "test.pl";
 }
 
-plan tests => 7;
+plan tests => 35;
 
 # compile time evaluation
 
@@ -33,3 +33,36 @@ is(ord("\x{1234}"), 0x1234, 'compile time ord \x{....}');
 $x = "\x{1234}";
 is(ord($x), 0x1234, 'runtime ord \x{....}');
 
+{
+    no warnings 'utf8'; # avoid Unicode warnings
+
+# The following code points are some interesting steps.
+    is(ord(chr(   0x100)),    0x100, '0x0100');
+    is(ord(chr(   0x3FF)),    0x3FF, 'last two-byte char in UTF-EBCDIC');
+    is(ord(chr(   0x400)),    0x400, 'first three-byte char in UTF-EBCDIC');
+    is(ord(chr(   0x7FF)),    0x7FF, 'last two-byte char in UTF-8');
+    is(ord(chr(   0x800)),    0x800, 'first three-byte char in UTF-8');
+    is(ord(chr(   0xFFF)),    0xFFF, '0x0FFF');
+    is(ord(chr(  0x1000)),   0x1000, '0x1000');
+    is(ord(chr(  0x3FFF)),   0x3FFF, 'last three-byte char in UTF-EBCDIC');
+    is(ord(chr(  0x4000)),   0x4000, 'first four-byte char in UTF-EBCDIC');
+    is(ord(chr(  0xCFFF)),   0xCFFF, '0xCFFF');
+    is(ord(chr(  0xD000)),   0xD000, '0xD000');
+    is(ord(chr(  0xD7FF)),   0xD7FF, '0xD7FF');
+    is(ord(chr(  0xD800)),   0xD800, 'surrogate begin (not strict utf-8)');
+    is(ord(chr(  0xDFFF)),   0xDFFF, 'surrogate end (not strict utf-8)');
+    is(ord(chr(  0xE000)),   0xE000, '0xE000');
+    is(ord(chr(  0xFDD0)),   0xFDD0, 'first additional noncharacter in BMP');
+    is(ord(chr(  0xFDEF)),   0xFDEF, 'last additional noncharacter in BMP');
+    is(ord(chr(  0xFFFE)),   0xFFFE, '0xFFFE');
+    is(ord(chr(  0xFFFF)),   0xFFFF, 'last three-byte char in UTF-8');
+    is(ord(chr( 0x10000)),  0x10000, 'first four-byte char in UTF-8');
+    is(ord(chr( 0x3FFFF)),  0x3FFFF, 'last four-byte char in UTF-EBCDIC');
+    is(ord(chr( 0x40000)),  0x40000, 'first five-byte char in UTF-EBCDIC');
+    is(ord(chr( 0xFFFFF)),  0xFFFFF, '0xFFFFF');
+    is(ord(chr(0x100000)), 0x100000, '0x100000');
+    is(ord(chr(0x10FFFF)), 0x10FFFF, 'Unicode last code point');
+    is(ord(chr(0x110000)), 0x110000, '0x110000');
+    is(ord(chr(0x1FFFFF)), 0x1FFFFF, 'last four-byte char in UTF-8');
+    is(ord(chr(0x200000)), 0x200000, 'first five-byte char in UTF-8');
+}
diff --git a/t/uni/chr.t b/t/uni/chr.t
new file mode 100644
index 0000000000..ab710d9e35
--- /dev/null
+++ b/t/uni/chr.t
@@ -0,0 +1,41 @@
+
+BEGIN {
+    if ($ENV{'PERL_CORE'}){
+        chdir 't';
+        @INC = '../lib';
+    }
+    require Config; import Config;
+    if ($Config{'extensions'} !~ /\bEncode\b/) {
+      print "1..0 # Skip: Encode was not built\n";
+      exit 0;
+    }
+    if (ord("A") == 193) {
+        print "1..0 # Skip: EBCDIC\n";
+        exit 0;
+    }
+    unless (PerlIO::Layer->find('perlio')){
+        print "1..0 # Skip: PerlIO required\n";
+        exit 0;
+    }
+    if ($ENV{PERL_CORE_MINITEST}) {
+        print "1..0 # Skip: no dynamic loading on miniperl, no Encode\n";
+        exit 0;
+    }
+    $| = 1;
+}
+
+use strict;
+use Test::More tests => 6;
+use Encode;
+
+use encoding 'johab';
+
+ok(chr(0x7f) eq "\x7f");
+ok(chr(0x80) eq "\x80");
+ok(chr(0xff) eq "\xff");
+
+for my $i (127, 128, 255) {
+    ok(chr($i) eq pack('C', $i));
+}
+
+__END__
author	SADAHIRO Tomoyuki <BQW10602@nifty.com>	2006-05-15 04:57:28 +0900
committer	Rafael Garcia-Suarez <rgarciasuarez@gmail.com>	2006-05-15 08:33:53 +0000
commit	4c5ed6e2fe45844ca952edb0ad5be618e204247b (patch)
tree	13f79404b1e916a7fc9aed964369c986ece9d08d /t
parent	9c8d215a6f7e1098759dbf707ed95f4895bc227c (diff)
download	perl-4c5ed6e2fe45844ca952edb0ad5be618e204247b.tar.gz