diff options
author | Nick Ing-Simmons <nik@tiuk.ti.com> | 2001-03-18 14:18:12 +0000 |
---|---|---|
committer | Nick Ing-Simmons <nik@tiuk.ti.com> | 2001-03-18 14:18:12 +0000 |
commit | 1b026014ba0f5424fabe070eda050db5e7df518a (patch) | |
tree | 60a3c121ee35a593bb71b0cfe1bf6288be335630 /ext/Encode | |
parent | be4731d2ab91c4f6213bf88a0084f6128a0db383 (diff) | |
download | perl-1b026014ba0f5424fabe070eda050db5e7df518a.tar.gz |
UTF-X encoding invariance for Encode:
- move Encode::utf8_encode to utf8::encode (likewise decode,upgrade,downgrade,valid)
- move the XS code for those to universal.c (so in miniperl)
- add utf8::unicode_to_native and its inverse to allow EBCDIC to work in true unicode.
- change ext/Encode/compile to use above.
- Fix t/lib/encode.t for above
- Teach t/lib/b.t to expect -uutf8
- In utf8.c look for SWASHNEW rather than just utf8:: package to see if
utf8.pm is needed.
p4raw-id: //depot/perlio@9198
Diffstat (limited to 'ext/Encode')
-rw-r--r-- | ext/Encode/Encode.pm | 19 | ||||
-rw-r--r-- | ext/Encode/Encode.xs | 32 | ||||
-rwxr-xr-x | ext/Encode/compile | 19 |
3 files changed, 10 insertions, 60 deletions
diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index b5ba929a54..fd85520311 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -188,14 +188,14 @@ sub from_to sub encode_utf8 { my ($str) = @_; - utf8_encode($str); + utf8::encode($str); return $str; } sub decode_utf8 { my ($str) = @_; - return undef unless utf8_decode($str); + return undef unless utf8::decode($str); return $str; } @@ -226,14 +226,14 @@ package Encode::Unicode; use base 'Encode::Encoding'; # Dummy package that provides the encode interface but leaves data -# as UTF-8 encoded. It is here so that from_to() works. +# as UTF-X encoded. It is here so that from_to() works. __PACKAGE__->Define('Unicode'); sub decode { my ($obj,$str,$chk) = @_; - Encode::utf8_upgrade($str); + utf8::upgrade($str); $_[1] = '' if $chk; return $str; } @@ -717,17 +717,6 @@ As such they are efficient, but may change. =over 4 -=item * - - $num_octets = utf8_upgrade($string); - -Converts internal representation of string to the UTF-8 form. -Returns the number of octets necessary to represent the string as UTF-8. - -=item * utf8_downgrade($string[, CHECK]) - -Converts internal representation of string to be un-encoded bytes. - =item * is_utf8(STRING [, CHECK]) [INTERNAL] Test whether the UTF-8 flag is turned on in the STRING. diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index cdb1965bda..4d62501775 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -433,38 +433,6 @@ encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check) return dst; } -MODULE = Encode PACKAGE = Encode PREFIX = sv_ - -void -valid_utf8(sv) -SV * sv -CODE: - { - STRLEN len; - char *s = SvPV(sv,len); - if (!SvUTF8(sv) || is_utf8_string((U8*)s,len)) - XSRETURN_YES; - else - XSRETURN_NO; - } - -void -sv_utf8_encode(sv) -SV * sv - -bool -sv_utf8_decode(sv) -SV * sv - -STRLEN -sv_utf8_upgrade(sv) -SV * sv - -bool -sv_utf8_downgrade(sv,failok=0) -SV * sv -bool failok - MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ PROTOTYPES: ENABLE diff --git a/ext/Encode/compile b/ext/Encode/compile index 8201043fde..d0611f719f 100755 --- a/ext/Encode/compile +++ b/ext/Encode/compile @@ -8,23 +8,16 @@ use Getopt::Std; my @orig_ARGV = @ARGV; my $perforce = '$Id$'; - sub encode_U { # UTF-8 encode long hand - only covers part of perl's range my $uv = shift; - if ($uv < 0x80) - { - return chr($uv) - } - if ($uv < 0x800) - { - return chr(($uv >> 6) | 0xC0). - chr(($uv & 0x3F) | 0x80); - } - return chr(($uv >> 12) | 0xE0). - chr((($uv >> 6) & 0x3F) | 0x80). - chr(($uv & 0x3F) | 0x80); + # chr() works in native space so convert value from table + # into that space before using chr(). + my $ch = chr(utf8::unicode_to_native($uv)); + # Now get core perl to encode that the way it likes. + utf8::encode($ch); + return $ch; } sub encode_S |