summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ext/XS-APItest/t/utf16_to_utf8.t2
-rw-r--r--pod/perlfunc.pod5
-rw-r--r--pp_pack.c18
-rw-r--r--t/io/bom.t3
-rw-r--r--t/op/pack.t14
-rw-r--r--utf8.c6
6 files changed, 21 insertions, 27 deletions
diff --git a/ext/XS-APItest/t/utf16_to_utf8.t b/ext/XS-APItest/t/utf16_to_utf8.t
index 612b146104..4cc63e18e3 100644
--- a/ext/XS-APItest/t/utf16_to_utf8.t
+++ b/ext/XS-APItest/t/utf16_to_utf8.t
@@ -4,8 +4,6 @@ use strict;
use Test::More;
use Encode;
-plan skip_all => 'Unclear how EBCIDC should behave' if ord "A" != 65;
-
# Bug in Encode, non chars are rejected
use XS::APItest qw(utf16_to_utf8 utf16_to_utf8_reversed
utf8_to_utf16 utf8_to_utf16_reversed);
diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod
index fbe465bbf5..762f12cb72 100644
--- a/pod/perlfunc.pod
+++ b/pod/perlfunc.pod
@@ -5247,7 +5247,10 @@ of values, as follows:
u A uuencoded string.
U A Unicode character number. Encodes to a character in char-
acter mode and UTF-8 (or UTF-EBCDIC in EBCDIC platforms) in
- byte mode.
+ byte mode. Also on EBCDIC platforms, the character number will
+ be the native EBCDIC value for character numbers below 256.
+ This allows most programs using this feature to not have to
+ care which type of platform they are running on.
w A BER compressed integer (not an ASN.1 BER, see perlpacktut
for details). Its bytes represent an unsigned integer in
diff --git a/pp_pack.c b/pp_pack.c
index 78104a978c..41f72b798f 100644
--- a/pp_pack.c
+++ b/pp_pack.c
@@ -1320,16 +1320,12 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, const char *strbeg, const c
len = UTF8SKIP(result);
if (!S_utf8_to_bytes(aTHX_ &ptr, strend,
(char *) &result[1], len-1, 'U')) break;
- auv = NATIVE_TO_UNI(utf8n_to_uvchr(result,
- len,
- &retlen,
- UTF8_ALLOW_DEFAULT));
+ auv = utf8n_to_uvchr(result, len, &retlen,
+ UTF8_ALLOW_DEFAULT);
s = ptr;
} else {
- auv = NATIVE_TO_UNI(utf8n_to_uvchr((U8*)s,
- strend - s,
- &retlen,
- UTF8_ALLOW_DEFAULT));
+ auv = utf8n_to_uvchr((U8*)s, strend - s, &retlen,
+ UTF8_ALLOW_DEFAULT);
if (retlen == (STRLEN) -1)
Perl_croak(aTHX_ "Malformed UTF-8 string in unpack");
s += retlen;
@@ -2668,7 +2664,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
auv = SvUV_no_inf(fromstr, datumtype);
if (utf8) {
U8 buffer[UTF8_MAXLEN+1], *endb;
- endb = uvchr_to_utf8_flags(buffer, UNI_TO_NATIVE(auv), 0);
+ endb = uvchr_to_utf8_flags(buffer, auv, 0);
if (cur+(endb-buffer)*UTF8_EXPAND >= end) {
*cur = '\0';
SvCUR_set(cat, cur - start);
@@ -2684,9 +2680,7 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
GROWING(0, cat, start, cur, len+UTF8_MAXLEN);
end = start+SvLEN(cat)-UTF8_MAXLEN;
}
- cur = (char *) uvchr_to_utf8_flags((U8 *) cur,
- UNI_TO_NATIVE(auv),
- 0);
+ cur = (char *) uvchr_to_utf8_flags((U8 *) cur, auv, 0);
}
}
break;
diff --git a/t/io/bom.t b/t/io/bom.t
index 0a79a056a9..482ae9bce4 100644
--- a/t/io/bom.t
+++ b/t/io/bom.t
@@ -20,8 +20,7 @@ for my $end (0, 1) {
$end ? @$_[0, 1] : @$_[1, 0]
} (
# Create UTF-16.
- [ 0xFE, 0xFF ], map [ 0, utf8::native_to_unicode(ord($_)) ],
- split //, "print 1;\nprint 2"
+ [ 0xFE, 0xFF ], map [ 0, ord($_) ], split //, "print 1;\nprint 2"
);
fresh_perl_is($prog, "12", {}, "BOM indicates $encoding");
}
diff --git a/t/op/pack.t b/t/op/pack.t
index cd5f1913b3..949daa34a3 100644
--- a/t/op/pack.t
+++ b/t/op/pack.t
@@ -879,10 +879,10 @@ EOP
{
- is("1.20.300.4000", sprintf "%vd", pack("U*",utf8::native_to_unicode(1),utf8::native_to_unicode(20),300,4000));
- is("1.20.300.4000", sprintf "%vd", pack(" U*",utf8::native_to_unicode(1),utf8::native_to_unicode(20),300,4000));
+ is("1.20.300.4000", sprintf "%vd", pack("U*",1,20,300,4000));
+ is("1.20.300.4000", sprintf "%vd", pack(" U*",1,20,300,4000));
}
-isnt(v1.20.300.4000, sprintf "%vd", pack("C0U*",utf8::native_to_unicode(1),utf8::native_to_unicode(20),300,4000));
+isnt(v1.20.300.4000, sprintf "%vd", pack("C0U*",1,20,300,4000));
my $rslt = join " ", map { ord } split "", byte_utf8a_to_utf8n("\xc7\xa2");
# The ASCII UTF-8 of U+1E2 is "\xc7\xa2"
@@ -916,7 +916,7 @@ is("@{[unpack('U*', pack('U*', 100, 200))]}", "100 200");
# does pack C0U create characters?
# The U* is expecting Unicode, so convert to that.
- is("@{[pack('C0U*', map { utf8::native_to_unicode($_) } 64, 202)]}",
+ is("@{[pack('C0U*', map { $_ } 64, 202)]}",
pack("C*", 64, @bytes202));
# does unpack U0U on byte data fail?
@@ -1511,7 +1511,7 @@ my $U_1FFC_bytes = byte_utf8a_to_utf8n("\341\277\274");
is(join(',', unpack("aC/CU", "b\0$U_1FFC_bytes")), 'b,8188');
# The U expects Unicode, so convert from native
- my $first_byte = utf8::native_to_unicode(ord substr($U_1FFC_bytes, 0, 1));
+ my $first_byte = ord substr($U_1FFC_bytes, 0, 1);
is(join(',', unpack("aU0C/UU", "b\0$U_1FFC_bytes")), "b,$first_byte");
is(join(',', unpack("aU0C/CU", "b\0$U_1FFC_bytes")), "b,$first_byte");
@@ -1794,9 +1794,9 @@ my $U_1FFC_bytes = byte_utf8a_to_utf8n("\341\277\274");
'normal A* strip leaves \xa0');
is(unpack("U0C0A*", "ab \n" . uni_to_native("\xa0") . " \0"), "ab \n" . uni_to_native("\xa0"),
'normal A* strip leaves \xa0 even if it got upgraded for technical reasons');
- is(unpack("A*", pack("a*(U0U)a*", "ab \n", 0xa0, " \0")), "ab",
+ is(unpack("A*", pack("a*(U0U)a*", "ab \n", utf8::unicode_to_native(0xa0), " \0")), "ab",
'upgraded strings A* removes \xa0');
- is(unpack("A*", pack("a*(U0UU)a*", "ab \n", 0xa0, 0x1680, " \0")), "ab",
+ is(unpack("A*", pack("a*(U0UU)a*", "ab \n", utf8::unicode_to_native(0xa0), 0x1680, " \0")), "ab",
'upgraded strings A* removes all unicode whitespace');
is(unpack("A5", pack("a*(U0U)a*", "ab \n", 0x1680, "def", "ab")), "ab",
'upgraded strings A5 removes all unicode whitespace');
diff --git a/utf8.c b/utf8.c
index 3c73714062..afd05065c1 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2659,7 +2659,7 @@ Perl_utf16_to_utf8_base(pTHX_ U8* p, U8* d, Size_t bytelen, Size_t *newlen,
}
/* Here, 'uv' is the real U32 we want to find the UTF-8 of */
- d = uvoffuni_to_utf8_flags(d, uv, 0);
+ d = uvchr_to_utf8(d, uv);
}
*newlen = d - dstart;
@@ -2712,9 +2712,9 @@ Perl_utf8_to_utf16_base(pTHX_ U8* s, U8* d, Size_t bytelen, Size_t *newlen,
while (s < send) {
STRLEN retlen;
- UV uv = NATIVE_TO_UNI(utf8n_to_uvchr(s, send - s, &retlen,
+ UV uv = utf8n_to_uvchr(s, send - s, &retlen,
/* No surrogates nor above-Unicode */
- UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE));
+ UTF8_DISALLOW_ILLEGAL_C9_INTERCHANGE);
/* The modern method is to keep going with malformed input,
* substituting the REPLACEMENT CHARACTER */