summaryrefslogtreecommitdiff
path: root/dist/Unicode-Normalize
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2021-07-10 14:02:13 -0600
committerKarl Williamson <khw@cpan.org>2021-07-23 15:33:27 -0700
commit907eab3fe07b8209285bae8aea08c8592f05fd2b (patch)
tree6092c76873435b3211625ce53e5a734911c1e0d2 /dist/Unicode-Normalize
parentc0605d31d02511d881ffebfc287e1faae9303491 (diff)
downloadperl-907eab3fe07b8209285bae8aea08c8592f05fd2b.tar.gz
U:N: Generalize to work on EBCDIC
Diffstat (limited to 'dist/Unicode-Normalize')
-rw-r--r--dist/Unicode-Normalize/Normalize.pm27
-rw-r--r--dist/Unicode-Normalize/mkheader20
-rw-r--r--dist/Unicode-Normalize/t/fcdc.t13
-rw-r--r--dist/Unicode-Normalize/t/form.t11
-rw-r--r--dist/Unicode-Normalize/t/func.t13
-rw-r--r--dist/Unicode-Normalize/t/illegal.t13
-rw-r--r--dist/Unicode-Normalize/t/norm.t15
-rw-r--r--dist/Unicode-Normalize/t/null.t11
-rw-r--r--dist/Unicode-Normalize/t/partial1.t11
-rw-r--r--dist/Unicode-Normalize/t/partial2.t15
-rw-r--r--dist/Unicode-Normalize/t/proto.t11
-rw-r--r--dist/Unicode-Normalize/t/split.t15
-rw-r--r--dist/Unicode-Normalize/t/test.t15
-rw-r--r--dist/Unicode-Normalize/t/tie.t11
14 files changed, 29 insertions, 172 deletions
diff --git a/dist/Unicode-Normalize/Normalize.pm b/dist/Unicode-Normalize/Normalize.pm
index 5980b7dccd..7bebfadf54 100644
--- a/dist/Unicode-Normalize/Normalize.pm
+++ b/dist/Unicode-Normalize/Normalize.pm
@@ -1,14 +1,5 @@
package Unicode::Normalize;
-BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- die "Unicode::Normalize cannot stringify a Unicode code point\n";
- }
- unless (0x41 == unpack('U', 'A')) {
- die "Unicode::Normalize cannot get Unicode code point\n";
- }
-}
-
use 5.006;
use strict;
use warnings;
@@ -40,16 +31,26 @@ our %EXPORT_TAGS = (
## utilities for tests
##
-sub pack_U {
- return pack('U*', @_);
+ # No EBCDIC support on early perls
+*to_native = ($::IS_ASCII || $] < 5.008)
+ ? sub { return shift }
+ : sub { utf8::unicode_to_native(shift) };
+
+*from_native = ($::IS_ASCII || $] < 5.008)
+ ? sub { return shift }
+ : sub { utf8::native_to_unicode(shift) };
+
+# The .t files are all in terms of Unicode, so xlate to/from native
+sub dot_t_pack_U {
+ return pack('U*', map { to_native($_) } @_);
}
-sub unpack_U {
+sub dot_t_unpack_U {
# The empty pack returns an empty UTF-8 string, so the effect is to force
# the shifted parameter into being UTF-8. This allows this to work on
# Perl 5.6, where there is no utf8::upgrade().
- return unpack('U*', shift(@_).pack('U*'));
+ return map { from_native($_) } unpack('U*', shift(@_).pack('U*'));
}
sub get_printable_string ($) {
diff --git a/dist/Unicode-Normalize/mkheader b/dist/Unicode-Normalize/mkheader
index 8d4c1b8e8d..669a8a21f9 100644
--- a/dist/Unicode-Normalize/mkheader
+++ b/dist/Unicode-Normalize/mkheader
@@ -24,28 +24,16 @@ use Carp;
use File::Spec;
use SelectSaver;
-BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- die "Unicode::Normalize cannot stringify a Unicode code point\n";
- }
- unless (0x41 == unpack('U', 'A')) {
- die "Unicode::Normalize cannot get Unicode code point\n";
- }
-}
-
our $PACKAGE = 'Unicode::Normalize, mkheader';
our $prefix = "UNF_";
our $structname = "${prefix}complist";
# Starting in v5.20, the tables in lib/unicore are built using the platform's
-# native character set for code points 0-255.
-*pack_U = ($] ge 5.020)
- ? sub { return pack('W*', @_).pack('U*'); } # The empty pack returns
- # an empty UTF-8 string,
- # so the effect is to
- # force the return into
- # being UTF-8.
+# native character set for code points 0-255. But in v5.35, pack U stopped
+# trying to compensate
+*pack_U = ($] ge 5.020 && $] lt 5.035)
+ ? sub { return pack('U*', map { utf8::unicode_to_native($_) } @_); }
: sub { return pack('U*', @_); };
# %Canon and %Compat will be ($codepoint => $hexstring) after _U_stringify()
diff --git a/dist/Unicode-Normalize/t/fcdc.t b/dist/Unicode-Normalize/t/fcdc.t
index 348f5e8ce0..b5dc195f7c 100644
--- a/dist/Unicode-Normalize/t/fcdc.t
+++ b/dist/Unicode-Normalize/t/fcdc.t
@@ -1,16 +1,5 @@
BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
@@ -29,7 +18,7 @@ use Unicode::Normalize qw(:all);
ok(1);
-sub _pack_U { Unicode::Normalize::pack_U(@_) }
+sub _pack_U { Unicode::Normalize::dot_t_pack_U(@_) }
sub hexU { _pack_U map hex, split ' ', shift }
sub answer { defined $_[0] ? $_[0] ? "YES" : "NO" : "MAYBE" }
diff --git a/dist/Unicode-Normalize/t/form.t b/dist/Unicode-Normalize/t/form.t
index 1e7a96e8aa..9a0850c211 100644
--- a/dist/Unicode-Normalize/t/form.t
+++ b/dist/Unicode-Normalize/t/form.t
@@ -1,16 +1,5 @@
BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
diff --git a/dist/Unicode-Normalize/t/func.t b/dist/Unicode-Normalize/t/func.t
index b59ef31693..a5eb0b34b7 100644
--- a/dist/Unicode-Normalize/t/func.t
+++ b/dist/Unicode-Normalize/t/func.t
@@ -1,16 +1,5 @@
BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
@@ -29,7 +18,7 @@ use Unicode::Normalize qw(:all);
ok(1);
-sub _pack_U { Unicode::Normalize::pack_U(@_) }
+sub _pack_U { Unicode::Normalize::dot_t_pack_U(@_) }
sub hexU { _pack_U map hex, split ' ', shift }
# This won't work on EBCDIC platforms prior to v5.8.0, which is when this
diff --git a/dist/Unicode-Normalize/t/illegal.t b/dist/Unicode-Normalize/t/illegal.t
index 4325c681c1..6a1160c013 100644
--- a/dist/Unicode-Normalize/t/illegal.t
+++ b/dist/Unicode-Normalize/t/illegal.t
@@ -1,16 +1,5 @@
BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
@@ -70,7 +59,7 @@ for my $u (0xD800, 0xDFFF, 0xFDD0, 0xFDEF, 0xFEFF, 0xFFFE, 0xFFFF,
our $proc; # before the last starter
our $unproc; # the last starter and after
-sub _pack_U { Unicode::Normalize::pack_U(@_) }
+sub _pack_U { Unicode::Normalize::dot_t_pack_U(@_) }
($proc, $unproc) = splitOnLastStarter(_pack_U(0x41, 0x300, 0x327, 0xFFFF));
ok($proc eq _pack_U(0x41, 0x300, 0x327));
diff --git a/dist/Unicode-Normalize/t/norm.t b/dist/Unicode-Normalize/t/norm.t
index d9685e675f..ffb7449984 100644
--- a/dist/Unicode-Normalize/t/norm.t
+++ b/dist/Unicode-Normalize/t/norm.t
@@ -1,16 +1,5 @@
BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
@@ -29,8 +18,8 @@ use Unicode::Normalize qw(normalize);
ok(1);
-sub _pack_U { Unicode::Normalize::pack_U(@_) }
-sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
+sub _pack_U { Unicode::Normalize::dot_t_pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::dot_t_unpack_U(@_) }
#########################
diff --git a/dist/Unicode-Normalize/t/null.t b/dist/Unicode-Normalize/t/null.t
index 9a0008708e..93d1bbcd9a 100644
--- a/dist/Unicode-Normalize/t/null.t
+++ b/dist/Unicode-Normalize/t/null.t
@@ -1,16 +1,5 @@
BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
diff --git a/dist/Unicode-Normalize/t/partial1.t b/dist/Unicode-Normalize/t/partial1.t
index aa02705fcc..97d834352e 100644
--- a/dist/Unicode-Normalize/t/partial1.t
+++ b/dist/Unicode-Normalize/t/partial1.t
@@ -1,16 +1,5 @@
BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
diff --git a/dist/Unicode-Normalize/t/partial2.t b/dist/Unicode-Normalize/t/partial2.t
index d3c24c9cf9..1cf510dbf9 100644
--- a/dist/Unicode-Normalize/t/partial2.t
+++ b/dist/Unicode-Normalize/t/partial2.t
@@ -1,16 +1,5 @@
BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
@@ -37,8 +26,8 @@ use Unicode::Normalize qw(:all);
ok(1);
-sub _pack_U { Unicode::Normalize::pack_U(@_) }
-sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
+sub _pack_U { Unicode::Normalize::dot_t_pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::undot_t_pack_U(@_) }
#########################
diff --git a/dist/Unicode-Normalize/t/proto.t b/dist/Unicode-Normalize/t/proto.t
index 7a180c8945..f90777c671 100644
--- a/dist/Unicode-Normalize/t/proto.t
+++ b/dist/Unicode-Normalize/t/proto.t
@@ -1,16 +1,5 @@
BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
diff --git a/dist/Unicode-Normalize/t/split.t b/dist/Unicode-Normalize/t/split.t
index 6c4d31129a..ae166f4fed 100644
--- a/dist/Unicode-Normalize/t/split.t
+++ b/dist/Unicode-Normalize/t/split.t
@@ -1,16 +1,5 @@
BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
@@ -37,8 +26,8 @@ use Unicode::Normalize qw(:all);
ok(1);
-sub _pack_U { Unicode::Normalize::pack_U(@_) }
-sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
+sub _pack_U { Unicode::Normalize::dot_t_pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::dot_t_unpack_U(@_) }
#########################
diff --git a/dist/Unicode-Normalize/t/test.t b/dist/Unicode-Normalize/t/test.t
index 15ff055d93..cec3ca7bb8 100644
--- a/dist/Unicode-Normalize/t/test.t
+++ b/dist/Unicode-Normalize/t/test.t
@@ -1,16 +1,5 @@
BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
@@ -29,8 +18,8 @@ use Unicode::Normalize;
ok(1);
-sub _pack_U { Unicode::Normalize::pack_U(@_) }
-sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
+sub _pack_U { Unicode::Normalize::dot_t_pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::dot_t_unpack_U(@_) }
#########################
diff --git a/dist/Unicode-Normalize/t/tie.t b/dist/Unicode-Normalize/t/tie.t
index ea050e89a5..1282405001 100644
--- a/dist/Unicode-Normalize/t/tie.t
+++ b/dist/Unicode-Normalize/t/tie.t
@@ -1,16 +1,5 @@
BEGIN {
- unless ('A' eq pack('U', 0x41)) {
- print "1..0 # Unicode::Normalize cannot pack a Unicode code point\n";
- exit 0;
- }
- unless (0x41 == unpack('U', 'A')) {
- print "1..0 # Unicode::Normalize cannot get a Unicode code point\n";
- exit 0;
- }
-}
-
-BEGIN {
if ($ENV{PERL_CORE}) {
chdir('t') if -d 't';
@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);