diff options
author | Karl Williamson <khw@cpan.org> | 2021-07-10 14:02:13 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2021-07-23 15:33:27 -0700 |
commit | 907eab3fe07b8209285bae8aea08c8592f05fd2b (patch) | |
tree | 6092c76873435b3211625ce53e5a734911c1e0d2 /dist | |
parent | c0605d31d02511d881ffebfc287e1faae9303491 (diff) | |
download | perl-907eab3fe07b8209285bae8aea08c8592f05fd2b.tar.gz |
U:N: Generalize to work on EBCDIC
Diffstat (limited to 'dist')
-rw-r--r-- | dist/Unicode-Normalize/Normalize.pm | 27 | ||||
-rw-r--r-- | dist/Unicode-Normalize/mkheader | 20 | ||||
-rw-r--r-- | dist/Unicode-Normalize/t/fcdc.t | 13 | ||||
-rw-r--r-- | dist/Unicode-Normalize/t/form.t | 11 | ||||
-rw-r--r-- | dist/Unicode-Normalize/t/func.t | 13 | ||||
-rw-r--r-- | dist/Unicode-Normalize/t/illegal.t | 13 | ||||
-rw-r--r-- | dist/Unicode-Normalize/t/norm.t | 15 | ||||
-rw-r--r-- | dist/Unicode-Normalize/t/null.t | 11 | ||||
-rw-r--r-- | dist/Unicode-Normalize/t/partial1.t | 11 | ||||
-rw-r--r-- | dist/Unicode-Normalize/t/partial2.t | 15 | ||||
-rw-r--r-- | dist/Unicode-Normalize/t/proto.t | 11 | ||||
-rw-r--r-- | dist/Unicode-Normalize/t/split.t | 15 | ||||
-rw-r--r-- | dist/Unicode-Normalize/t/test.t | 15 | ||||
-rw-r--r-- | dist/Unicode-Normalize/t/tie.t | 11 |
14 files changed, 29 insertions, 172 deletions
diff --git a/dist/Unicode-Normalize/Normalize.pm b/dist/Unicode-Normalize/Normalize.pm index 5980b7dccd..7bebfadf54 100644 --- a/dist/Unicode-Normalize/Normalize.pm +++ b/dist/Unicode-Normalize/Normalize.pm @@ -1,14 +1,5 @@ package Unicode::Normalize; -BEGIN { - unless ('A' eq pack('U', 0x41)) { - die "Unicode::Normalize cannot stringify a Unicode code point\n"; - } - unless (0x41 == unpack('U', 'A')) { - die "Unicode::Normalize cannot get Unicode code point\n"; - } -} - use 5.006; use strict; use warnings; @@ -40,16 +31,26 @@ our %EXPORT_TAGS = ( ## utilities for tests ## -sub pack_U { - return pack('U*', @_); + # No EBCDIC support on early perls +*to_native = ($::IS_ASCII || $] < 5.008) + ? sub { return shift } + : sub { utf8::unicode_to_native(shift) }; + +*from_native = ($::IS_ASCII || $] < 5.008) + ? sub { return shift } + : sub { utf8::native_to_unicode(shift) }; + +# The .t files are all in terms of Unicode, so xlate to/from native +sub dot_t_pack_U { + return pack('U*', map { to_native($_) } @_); } -sub unpack_U { +sub dot_t_unpack_U { # The empty pack returns an empty UTF-8 string, so the effect is to force # the shifted parameter into being UTF-8. This allows this to work on # Perl 5.6, where there is no utf8::upgrade(). - return unpack('U*', shift(@_).pack('U*')); + return map { from_native($_) } unpack('U*', shift(@_).pack('U*')); } sub get_printable_string ($) { diff --git a/dist/Unicode-Normalize/mkheader b/dist/Unicode-Normalize/mkheader index 8d4c1b8e8d..669a8a21f9 100644 --- a/dist/Unicode-Normalize/mkheader +++ b/dist/Unicode-Normalize/mkheader @@ -24,28 +24,16 @@ use Carp; use File::Spec; use SelectSaver; -BEGIN { - unless ('A' eq pack('U', 0x41)) { - die "Unicode::Normalize cannot stringify a Unicode code point\n"; - } - unless (0x41 == unpack('U', 'A')) { - die "Unicode::Normalize cannot get Unicode code point\n"; - } -} - our $PACKAGE = 'Unicode::Normalize, mkheader'; our $prefix = "UNF_"; our $structname = "${prefix}complist"; # Starting in v5.20, the tables in lib/unicore are built using the platform's -# native character set for code points 0-255. -*pack_U = ($] ge 5.020) - ? sub { return pack('W*', @_).pack('U*'); } # The empty pack returns - # an empty UTF-8 string, - # so the effect is to - # force the return into - # being UTF-8. +# native character set for code points 0-255. But in v5.35, pack U stopped +# trying to compensate +*pack_U = ($] ge 5.020 && $] lt 5.035) + ? sub { return pack('U*', map { utf8::unicode_to_native($_) } @_); } : sub { return pack('U*', @_); }; # %Canon and %Compat will be ($codepoint => $hexstring) after _U_stringify() diff --git a/dist/Unicode-Normalize/t/fcdc.t b/dist/Unicode-Normalize/t/fcdc.t index 348f5e8ce0..b5dc195f7c 100644 --- a/dist/Unicode-Normalize/t/fcdc.t +++ b/dist/Unicode-Normalize/t/fcdc.t @@ -1,16 +1,5 @@ BEGIN { - unless ('A' eq pack('U', 0x41)) { - print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n"; - exit 0; - } - unless (0x41 == unpack('U', 'A')) { - print "1..0 # Unicode::Normalize cannot get a Unicode code point\n"; - exit 0; - } -} - -BEGIN { if ($ENV{PERL_CORE}) { chdir('t') if -d 't'; @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); @@ -29,7 +18,7 @@ use Unicode::Normalize qw(:all); ok(1); -sub _pack_U { Unicode::Normalize::pack_U(@_) } +sub _pack_U { Unicode::Normalize::dot_t_pack_U(@_) } sub hexU { _pack_U map hex, split ' ', shift } sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" } diff --git a/dist/Unicode-Normalize/t/form.t b/dist/Unicode-Normalize/t/form.t index 1e7a96e8aa..9a0850c211 100644 --- a/dist/Unicode-Normalize/t/form.t +++ b/dist/Unicode-Normalize/t/form.t @@ -1,16 +1,5 @@ BEGIN { - unless ('A' eq pack('U', 0x41)) { - print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n"; - exit 0; - } - unless (0x41 == unpack('U', 'A')) { - print "1..0 # Unicode::Normalize cannot get a Unicode code point\n"; - exit 0; - } -} - -BEGIN { if ($ENV{PERL_CORE}) { chdir('t') if -d 't'; @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); diff --git a/dist/Unicode-Normalize/t/func.t b/dist/Unicode-Normalize/t/func.t index b59ef31693..a5eb0b34b7 100644 --- a/dist/Unicode-Normalize/t/func.t +++ b/dist/Unicode-Normalize/t/func.t @@ -1,16 +1,5 @@ BEGIN { - unless ('A' eq pack('U', 0x41)) { - print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n"; - exit 0; - } - unless (0x41 == unpack('U', 'A')) { - print "1..0 # Unicode::Normalize cannot get a Unicode code point\n"; - exit 0; - } -} - -BEGIN { if ($ENV{PERL_CORE}) { chdir('t') if -d 't'; @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); @@ -29,7 +18,7 @@ use Unicode::Normalize qw(:all); ok(1); -sub _pack_U { Unicode::Normalize::pack_U(@_) } +sub _pack_U { Unicode::Normalize::dot_t_pack_U(@_) } sub hexU { _pack_U map hex, split ' ', shift } # This won't work on EBCDIC platforms prior to v5.8.0, which is when this diff --git a/dist/Unicode-Normalize/t/illegal.t b/dist/Unicode-Normalize/t/illegal.t index 4325c681c1..6a1160c013 100644 --- a/dist/Unicode-Normalize/t/illegal.t +++ b/dist/Unicode-Normalize/t/illegal.t @@ -1,16 +1,5 @@ BEGIN { - unless ('A' eq pack('U', 0x41)) { - print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n"; - exit 0; - } - unless (0x41 == unpack('U', 'A')) { - print "1..0 # Unicode::Normalize cannot get a Unicode code point\n"; - exit 0; - } -} - -BEGIN { if ($ENV{PERL_CORE}) { chdir('t') if -d 't'; @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); @@ -70,7 +59,7 @@ for my $u (0xD800, 0xDFFF, 0xFDD0, 0xFDEF, 0xFEFF, 0xFFFE, 0xFFFF, our $proc; # before the last starter our $unproc; # the last starter and after -sub _pack_U { Unicode::Normalize::pack_U(@_) } +sub _pack_U { Unicode::Normalize::dot_t_pack_U(@_) } ($proc, $unproc) = splitOnLastStarter(_pack_U(0x41, 0x300, 0x327, 0xFFFF)); ok($proc eq _pack_U(0x41, 0x300, 0x327)); diff --git a/dist/Unicode-Normalize/t/norm.t b/dist/Unicode-Normalize/t/norm.t index d9685e675f..ffb7449984 100644 --- a/dist/Unicode-Normalize/t/norm.t +++ b/dist/Unicode-Normalize/t/norm.t @@ -1,16 +1,5 @@ BEGIN { - unless ('A' eq pack('U', 0x41)) { - print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n"; - exit 0; - } - unless (0x41 == unpack('U', 'A')) { - print "1..0 # Unicode::Normalize cannot get a Unicode code point\n"; - exit 0; - } -} - -BEGIN { if ($ENV{PERL_CORE}) { chdir('t') if -d 't'; @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); @@ -29,8 +18,8 @@ use Unicode::Normalize qw(normalize); ok(1); -sub _pack_U { Unicode::Normalize::pack_U(@_) } -sub _unpack_U { Unicode::Normalize::unpack_U(@_) } +sub _pack_U { Unicode::Normalize::dot_t_pack_U(@_) } +sub _unpack_U { Unicode::Normalize::dot_t_unpack_U(@_) } ######################### diff --git a/dist/Unicode-Normalize/t/null.t b/dist/Unicode-Normalize/t/null.t index 9a0008708e..93d1bbcd9a 100644 --- a/dist/Unicode-Normalize/t/null.t +++ b/dist/Unicode-Normalize/t/null.t @@ -1,16 +1,5 @@ BEGIN { - unless ('A' eq pack('U', 0x41)) { - print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n"; - exit 0; - } - unless (0x41 == unpack('U', 'A')) { - print "1..0 # Unicode::Normalize cannot get a Unicode code point\n"; - exit 0; - } -} - -BEGIN { if ($ENV{PERL_CORE}) { chdir('t') if -d 't'; @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); diff --git a/dist/Unicode-Normalize/t/partial1.t b/dist/Unicode-Normalize/t/partial1.t index aa02705fcc..97d834352e 100644 --- a/dist/Unicode-Normalize/t/partial1.t +++ b/dist/Unicode-Normalize/t/partial1.t @@ -1,16 +1,5 @@ BEGIN { - unless ('A' eq pack('U', 0x41)) { - print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n"; - exit 0; - } - unless (0x41 == unpack('U', 'A')) { - print "1..0 # Unicode::Normalize cannot get a Unicode code point\n"; - exit 0; - } -} - -BEGIN { if ($ENV{PERL_CORE}) { chdir('t') if -d 't'; @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); diff --git a/dist/Unicode-Normalize/t/partial2.t b/dist/Unicode-Normalize/t/partial2.t index d3c24c9cf9..1cf510dbf9 100644 --- a/dist/Unicode-Normalize/t/partial2.t +++ b/dist/Unicode-Normalize/t/partial2.t @@ -1,16 +1,5 @@ BEGIN { - unless ('A' eq pack('U', 0x41)) { - print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n"; - exit 0; - } - unless (0x41 == unpack('U', 'A')) { - print "1..0 # Unicode::Normalize cannot get a Unicode code point\n"; - exit 0; - } -} - -BEGIN { if ($ENV{PERL_CORE}) { chdir('t') if -d 't'; @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); @@ -37,8 +26,8 @@ use Unicode::Normalize qw(:all); ok(1); -sub _pack_U { Unicode::Normalize::pack_U(@_) } -sub _unpack_U { Unicode::Normalize::unpack_U(@_) } +sub _pack_U { Unicode::Normalize::dot_t_pack_U(@_) } +sub _unpack_U { Unicode::Normalize::undot_t_pack_U(@_) } ######################### diff --git a/dist/Unicode-Normalize/t/proto.t b/dist/Unicode-Normalize/t/proto.t index 7a180c8945..f90777c671 100644 --- a/dist/Unicode-Normalize/t/proto.t +++ b/dist/Unicode-Normalize/t/proto.t @@ -1,16 +1,5 @@ BEGIN { - unless ('A' eq pack('U', 0x41)) { - print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n"; - exit 0; - } - unless (0x41 == unpack('U', 'A')) { - print "1..0 # Unicode::Normalize cannot get a Unicode code point\n"; - exit 0; - } -} - -BEGIN { if ($ENV{PERL_CORE}) { chdir('t') if -d 't'; @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); diff --git a/dist/Unicode-Normalize/t/split.t b/dist/Unicode-Normalize/t/split.t index 6c4d31129a..ae166f4fed 100644 --- a/dist/Unicode-Normalize/t/split.t +++ b/dist/Unicode-Normalize/t/split.t @@ -1,16 +1,5 @@ BEGIN { - unless ('A' eq pack('U', 0x41)) { - print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n"; - exit 0; - } - unless (0x41 == unpack('U', 'A')) { - print "1..0 # Unicode::Normalize cannot get a Unicode code point\n"; - exit 0; - } -} - -BEGIN { if ($ENV{PERL_CORE}) { chdir('t') if -d 't'; @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); @@ -37,8 +26,8 @@ use Unicode::Normalize qw(:all); ok(1); -sub _pack_U { Unicode::Normalize::pack_U(@_) } -sub _unpack_U { Unicode::Normalize::unpack_U(@_) } +sub _pack_U { Unicode::Normalize::dot_t_pack_U(@_) } +sub _unpack_U { Unicode::Normalize::dot_t_unpack_U(@_) } ######################### diff --git a/dist/Unicode-Normalize/t/test.t b/dist/Unicode-Normalize/t/test.t index 15ff055d93..cec3ca7bb8 100644 --- a/dist/Unicode-Normalize/t/test.t +++ b/dist/Unicode-Normalize/t/test.t @@ -1,16 +1,5 @@ BEGIN { - unless ('A' eq pack('U', 0x41)) { - print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n"; - exit 0; - } - unless (0x41 == unpack('U', 'A')) { - print "1..0 # Unicode::Normalize cannot get a Unicode code point\n"; - exit 0; - } -} - -BEGIN { if ($ENV{PERL_CORE}) { chdir('t') if -d 't'; @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); @@ -29,8 +18,8 @@ use Unicode::Normalize; ok(1); -sub _pack_U { Unicode::Normalize::pack_U(@_) } -sub _unpack_U { Unicode::Normalize::unpack_U(@_) } +sub _pack_U { Unicode::Normalize::dot_t_pack_U(@_) } +sub _unpack_U { Unicode::Normalize::dot_t_unpack_U(@_) } ######################### diff --git a/dist/Unicode-Normalize/t/tie.t b/dist/Unicode-Normalize/t/tie.t index ea050e89a5..1282405001 100644 --- a/dist/Unicode-Normalize/t/tie.t +++ b/dist/Unicode-Normalize/t/tie.t @@ -1,16 +1,5 @@ BEGIN { - unless ('A' eq pack('U', 0x41)) { - print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n"; - exit 0; - } - unless (0x41 == unpack('U', 'A')) { - print "1..0 # Unicode::Normalize cannot get a Unicode code point\n"; - exit 0; - } -} - -BEGIN { if ($ENV{PERL_CORE}) { chdir('t') if -d 't'; @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib); |