diff options
Diffstat (limited to 't/op')
-rwxr-xr-x | t/op/each.t | 6 | ||||
-rwxr-xr-x | t/op/pat.t | 3 | ||||
-rwxr-xr-x | t/op/regexp.t | 2 | ||||
-rw-r--r-- | t/op/utf8decode.t | 21 |
4 files changed, 27 insertions, 5 deletions
diff --git a/t/op/each.t b/t/op/each.t index daddc9c3c1..6dd1ceae8c 100755 --- a/t/op/each.t +++ b/t/op/each.t @@ -165,14 +165,16 @@ print "ok 24\n"; use bytes (); -$d = pack("U*", 0xe3, 0x81, 0x82); +# on EBCDIC chars are mapped differently so pick something that needs encoding +# there too. +$d = pack("U*", 0xe3, 0x81, 0xAF); $ol = bytes::length($d); print "not " unless $ol > 3; print "ok 25\n"; %u = ($d => "downgrade"); for (keys %u) { use bytes; - print "not " if length ne 3 or $_ ne "\xe3\x81\x82"; + print "not " if length ne 3 or $_ ne "\xe3\x81\xAF"; print "ok 26\n"; } { diff --git a/t/op/pat.t b/t/op/pat.t index 4c48c33d8e..c3024a2f2d 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -11,6 +11,9 @@ BEGIN { chdir 't' if -d 't'; @INC = '../lib'; } + +use re 'asciirange'; # Compute ranges in ASCII space + eval 'use Config'; # Defaults assumed if this fails $x = "abc\ndef\n"; diff --git a/t/op/regexp.t b/t/op/regexp.t index 4a4d42fd98..0b81e714a9 100755 --- a/t/op/regexp.t +++ b/t/op/regexp.t @@ -36,6 +36,8 @@ BEGIN { @INC = '../lib'; } +use re 'asciirange'; # ranges are computed in ASCII + $iters = shift || 1; # Poor man performance suite, 10000 is OK. open(TESTS,'op/re_tests') || open(TESTS,'t/op/re_tests') || diff --git a/t/op/utf8decode.t b/t/op/utf8decode.t index 4d05a6b8d3..494aa8cfb8 100644 --- a/t/op/utf8decode.t +++ b/t/op/utf8decode.t @@ -3,6 +3,21 @@ BEGIN { chdir 't' if -d 't'; @INC = '../lib'; + +} + +{ + my $wide = v256; + use bytes; + my $ordwide = ord($wide); + printf "# under use bytes ord(v256) = 0x%02x\n", $ordwide; + if ($ordwide == 140) { + print "1..0 # Skip: UTF-EBCDIC (not UTF-8) used here\n"; + exit 0; + } + elsif ($ordwide != 196) { + printf "# v256 starts with 0x%02x\n", $ordwide; + } } no utf8; @@ -13,7 +28,7 @@ my $test = 1; # This table is based on Markus Kuhn's UTF-8 Decode Stress Tester, # http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt, -# version dated 2000-09-02. +# version dated 2000-09-02. # We use the \x notation instead of raw binary bytes for \x00-\x1f\x7f-\xff # because e.g. many patch programs have issues with binary data. @@ -21,7 +36,7 @@ my $test = 1; my @MK = split(/\n/, <<__EOMK__); 1 Correct UTF-8 1.1.1 y "\xce\xba\xe1\xbd\xb9\xcf\x83\xce\xbc\xce\xb5" - 11 ce:ba:e1:bd:b9:cf:83:ce:bc:ce:b5 5 -2 Boundary conditions +2 Boundary conditions 2.1 First possible sequence of certain length 2.1.1 y "\x00" 0 1 00 1 2.1.2 y "\xc2\x80" 80 2 c2:80 1 @@ -135,7 +150,7 @@ __EOMK__ sub moan { print "$id: @_"; } - + sub test_unpack_U { $WARNCNT = 0; $WARNMSG = ""; |