diff options
author | Nick Ing-Simmons <nik@tiuk.ti.com> | 2001-03-28 14:38:24 +0000 |
---|---|---|
committer | Nick Ing-Simmons <nik@tiuk.ti.com> | 2001-03-28 14:38:24 +0000 |
commit | daf0f78e031c718c75590ef9ef573756f805776e (patch) | |
tree | ef46cd2cf7cc422e43d555f69d103115c21cfce8 /t | |
parent | 30ff31546a6570f575cb35e80f834c872c5abdb1 (diff) | |
download | perl-daf0f78e031c718c75590ef9ef573756f805776e.tar.gz |
More EBCDIC tweaks:
- one more swash issue &~(0xA0-1) did not do the right thing,
for UTF-EBCDIC where &~(0x80-1) does for UTF-8.
- add "use re 'asciirange'" to make [!-~] etc. work
use it in MIME::QuotedPrint and t/op/regexp.t and t/op/pat.t
- Choose a key for t/op/each.t test which gets encoded.
- Skip utf8decode if this is UTF-EBCDIC.
p4raw-id: //depot/perlio@9400
Diffstat (limited to 't')
-rwxr-xr-x | t/op/each.t | 6 | ||||
-rwxr-xr-x | t/op/pat.t | 3 | ||||
-rwxr-xr-x | t/op/regexp.t | 2 | ||||
-rw-r--r-- | t/op/utf8decode.t | 20 |
4 files changed, 26 insertions, 5 deletions
diff --git a/t/op/each.t b/t/op/each.t index daddc9c3c1..6dd1ceae8c 100755 --- a/t/op/each.t +++ b/t/op/each.t @@ -165,14 +165,16 @@ print "ok 24\n"; use bytes (); -$d = pack("U*", 0xe3, 0x81, 0x82); +# on EBCDIC chars are mapped differently so pick something that needs encoding +# there too. +$d = pack("U*", 0xe3, 0x81, 0xAF); $ol = bytes::length($d); print "not " unless $ol > 3; print "ok 25\n"; %u = ($d => "downgrade"); for (keys %u) { use bytes; - print "not " if length ne 3 or $_ ne "\xe3\x81\x82"; + print "not " if length ne 3 or $_ ne "\xe3\x81\xAF"; print "ok 26\n"; } { diff --git a/t/op/pat.t b/t/op/pat.t index 4c48c33d8e..c3024a2f2d 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -11,6 +11,9 @@ BEGIN { chdir 't' if -d 't'; @INC = '../lib'; } + +use re 'asciirange'; # Compute ranges in ASCII space + eval 'use Config'; # Defaults assumed if this fails $x = "abc\ndef\n"; diff --git a/t/op/regexp.t b/t/op/regexp.t index 4a4d42fd98..0b81e714a9 100755 --- a/t/op/regexp.t +++ b/t/op/regexp.t @@ -36,6 +36,8 @@ BEGIN { @INC = '../lib'; } +use re 'asciirange'; # ranges are computed in ASCII + $iters = shift || 1; # Poor man performance suite, 10000 is OK. open(TESTS,'op/re_tests') || open(TESTS,'t/op/re_tests') || diff --git a/t/op/utf8decode.t b/t/op/utf8decode.t index 4d05a6b8d3..824805d5df 100644 --- a/t/op/utf8decode.t +++ b/t/op/utf8decode.t @@ -3,6 +3,20 @@ BEGIN { chdir 't' if -d 't'; @INC = '../lib'; + +} + +{ + my $wide = v256; + use bytes; + print STDERR ord($wide),"\n"; + if (ord($wide) == 140) { + print "1..0 # Skip: UTF-EBCDIC (not UTF-8) used here\n"; + exit 0; + } + elsif (ord($wide) != 196) { + warn sprintf("v256 starts with %02X\n",ord($wide)); + } } no utf8; @@ -13,7 +27,7 @@ my $test = 1; # This table is based on Markus Kuhn's UTF-8 Decode Stress Tester, # http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt, -# version dated 2000-09-02. +# version dated 2000-09-02. # We use the \x notation instead of raw binary bytes for \x00-\x1f\x7f-\xff # because e.g. many patch programs have issues with binary data. @@ -21,7 +35,7 @@ my $test = 1; my @MK = split(/\n/, <<__EOMK__); 1 Correct UTF-8 1.1.1 y "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5" - 11 ce:ba:e1:bd:b9:cf:83:ce:bc:ce:b5 5 -2 Boundary conditions +2 Boundary conditions 2.1 First possible sequence of certain length 2.1.1 y "\x00" 0 1 00 1 2.1.2 y "\xc2\x80" 80 2 c2:80 1 @@ -135,7 +149,7 @@ __EOMK__ sub moan { print "$id: @_"; } - + sub test_unpack_U { $WARNCNT = 0; $WARNMSG = ""; |