UTF-X encoding invariance for Encode:

- move Encode::utf8_encode to utf8::encode (likewise decode,upgrade,downgrade,valid) - move the XS code for those to universal.c (so in miniperl) - add utf8::unicode_to_native and its inverse to allow EBCDIC to work in true unicode. - change ext/Encode/compile to use above. - Fix t/lib/encode.t for above - Teach t/lib/b.t to expect -uutf8 - In utf8.c look for SWASHNEW rather than just utf8:: package to see if utf8.pm is needed. p4raw-id: //depot/perlio@9198
author: Nick Ing-Simmons <nik@tiuk.ti.com> 2001-03-18 14:18:12 +0000
committer: Nick Ing-Simmons <nik@tiuk.ti.com> 2001-03-18 14:18:12 +0000
commit: 1b026014ba0f5424fabe070eda050db5e7df518a (patch)
tree: 60a3c121ee35a593bb71b0cfe1bf6288be335630 /ext/Encode
parent: be4731d2ab91c4f6213bf88a0084f6128a0db383 (diff)
download: perl-1b026014ba0f5424fabe070eda050db5e7df518a.tar.gz
3 files changed, 10 insertions, 60 deletions
diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm
index b5ba929a54..fd85520311 100644
--- a/ext/Encode/Encode.pm
+++ b/ext/Encode/Encode.pm
@@ -188,14 +188,14 @@ sub from_to
 sub encode_utf8
 {
  my ($str) = @_;
- utf8_encode($str);
+ utf8::encode($str);
  return $str;
 }
 
 sub decode_utf8
 {
  my ($str) = @_;
- return undef unless utf8_decode($str);
+ return undef unless utf8::decode($str);
  return $str;
 }
 
@@ -226,14 +226,14 @@ package Encode::Unicode;
 use base 'Encode::Encoding';
 
 # Dummy package that provides the encode interface but leaves data
-# as UTF-8 encoded. It is here so that from_to() works.
+# as UTF-X encoded. It is here so that from_to() works.
 
 __PACKAGE__->Define('Unicode');
 
 sub decode
 {
  my ($obj,$str,$chk) = @_;
- Encode::utf8_upgrade($str);
+ utf8::upgrade($str);
  $_[1] = '' if $chk;
  return $str;
 }
@@ -717,17 +717,6 @@ As such they are efficient, but may change.
 
 =over 4
 
-=item *
-
-        $num_octets = utf8_upgrade($string);
-
-Converts internal representation of string to the UTF-8 form.
-Returns the number of octets necessary to represent the string as UTF-8.
-
-=item * utf8_downgrade($string[, CHECK])
-
-Converts internal representation of string to be un-encoded bytes.
-
 =item * is_utf8(STRING [, CHECK])
 
 [INTERNAL] Test whether the UTF-8 flag is turned on in the STRING.
diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs
index cdb1965bda..4d62501775 100644
--- a/ext/Encode/Encode.xs
+++ b/ext/Encode/Encode.xs
@@ -433,38 +433,6 @@ encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check)
  return dst;
 }
 
-MODULE = Encode		PACKAGE = Encode	PREFIX = sv_
-
-void
-valid_utf8(sv)
-SV *	sv
-CODE:
- {
-  STRLEN len;
-  char *s = SvPV(sv,len);
-  if (!SvUTF8(sv) || is_utf8_string((U8*)s,len))
-   XSRETURN_YES;
-  else
-   XSRETURN_NO;
- }
-
-void
-sv_utf8_encode(sv)
-SV *	sv
-
-bool
-sv_utf8_decode(sv)
-SV *	sv
-
-STRLEN
-sv_utf8_upgrade(sv)
-SV *	sv
-
-bool
-sv_utf8_downgrade(sv,failok=0)
-SV *	sv
-bool	failok
-
 MODULE = Encode		PACKAGE = Encode::XS	PREFIX = Method_
 
 PROTOTYPES: ENABLE
diff --git a/ext/Encode/compile b/ext/Encode/compile
index 8201043fde..d0611f719f 100755
--- a/ext/Encode/compile
+++ b/ext/Encode/compile
@@ -8,23 +8,16 @@ use Getopt::Std;
 my @orig_ARGV = @ARGV;
 my $perforce  = '$Id$';
 
-
 sub encode_U
 {
  # UTF-8 encode long hand - only covers part of perl's range
  my $uv = shift;
- if ($uv < 0x80)
-  {
-   return chr($uv)
-  }
- if ($uv < 0x800)
-  {
-   return chr(($uv >> 6)        | 0xC0).
-          chr(($uv & 0x3F)      | 0x80);
-  }
- return chr(($uv >> 12)         | 0xE0).
-        chr((($uv >> 6) & 0x3F) | 0x80).
-        chr(($uv & 0x3F)        | 0x80);
+ # chr() works in native space so convert value from table
+ # into that space before using chr().
+ my $ch = chr(utf8::unicode_to_native($uv));
+ # Now get core perl to encode that the way it likes.
+ utf8::encode($ch);
+ return $ch;
 }
 
 sub encode_S
author	Nick Ing-Simmons <nik@tiuk.ti.com>	2001-03-18 14:18:12 +0000
committer	Nick Ing-Simmons <nik@tiuk.ti.com>	2001-03-18 14:18:12 +0000
commit	1b026014ba0f5424fabe070eda050db5e7df518a (patch)
tree	60a3c121ee35a593bb71b0cfe1bf6288be335630 /ext/Encode
parent	be4731d2ab91c4f6213bf88a0084f6128a0db383 (diff)
download	perl-1b026014ba0f5424fabe070eda050db5e7df518a.tar.gz