Upgrade to Encode 1.11, from Dan Kogai.

p4raw-id: //depot/perl@15638
author: Jarkko Hietaniemi <jhi@iki.fi> 2002-03-31 22:31:29 +0000
committer: Jarkko Hietaniemi <jhi@iki.fi> 2002-03-31 22:31:29 +0000
commit: 3ef515df8b968e34c050cfc9602902de0efbefd6 (patch)
tree: 856419a8a26ba37abef58ee10f916805a1c23b71 /lib
parent: a23c04e41e8e54701da113d9309ef79fb888afe6 (diff)
download: perl-3ef515df8b968e34c050cfc9602902de0efbefd6.tar.gz
2 files changed, 0 insertions, 308 deletions
diff --git a/lib/encoding.pm b/lib/encoding.pm
deleted file mode 100644
index 441be3340a..0000000000
--- a/lib/encoding.pm
+++ /dev/null
@@ -1,122 +0,0 @@
-package encoding;
-
-our $VERSION = '1.00';
-
-use Encode;
-
-BEGIN {
-    if (ord("A") == 193) {
-	require Carp;
-	Carp::croak "encoding pragma does not support EBCDIC platforms";
-    }
-}
-
-sub import {
-    my ($class, $name) = @_;
-    $name = $ENV{PERL_ENCODING} if @_ < 2;
-    $name = "latin1" unless defined $name;
-    my $enc = find_encoding($name);
-    unless (defined $enc) {
-	require Carp;
-	Carp::croak "Unknown encoding '$name'";
-    }
-    ${^ENCODING} = $enc;
-}
-
-=pod
-
-=head1 NAME
-
-encoding - pragma to control the conversion of legacy data into Unicode
-
-=head1 SYNOPSIS
-
-    use encoding "iso 8859-7";
-
-    # The \xDF of ISO 8859-7 (Greek) is \x{3af} in Unicode.
-
-    $a = "\xDF";
-    $b = "\x{100}";
-
-    printf "%#x\n", ord($a); # will print 0x3af, not 0xdf
-
-    $c = $a . $b;
-
-    # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}".
-
-    # chr() is affected, and ...
-
-    print "mega\n"  if ord(chr(0xdf)) == 0x3af;
-
-    # ... ord() is affected by the encoding pragma ...
-
-    print "tera\n" if ord(pack("C", 0xdf)) == 0x3af;
-
-    # ... as are eq and cmp ...
-
-    print "peta\n" if "\x{3af}" eq  pack("C", 0xdf);
-    print "exa\n"  if "\x{3af}" cmp pack("C", 0xdf) == 0;
-
-    # ... but pack/unpack C are not affected, in case you still
-    # want back to your native encoding
-
-    print "zetta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf;
-
-=head1 DESCRIPTION
-
-Normally when legacy 8-bit data is converted to Unicode the data is
-expected to be Latin-1 (or EBCDIC in EBCDIC platforms).  With the
-encoding pragma you can change this default.
-
-The pragma is a per script, not a per block lexical.  Only the last
-C<use encoding> matters, and it affects B<the whole script>.
-
-Notice that only literals (string or regular expression) having only
-legacy code points are affected: if you mix data like this
-
-	\xDF\x{100}
-
-the data is assumed to be in (Latin 1 and) Unicode, not in your native
-encoding.  In other words, this will match in "greek":
-
-	"\xDF" =~ /\x{3af}/
-
-but this will not
-
-	"\xDF\x{100}" =~ /\x{3af}\x{100}/
-
-since the C<\xDF> on the left will B<not> be upgraded to C<\x{3af}>
-because of the C<\x{100}> on the left.  You should not be mixing your
-legacy data and Unicode in the same string.
-
-This pragma also affects encoding of the 0x80..0xFF code point range:
-normally characters in that range are left as eight-bit bytes (unless
-they are combined with characters with code points 0x100 or larger,
-in which case all characters need to become UTF-8 encoded), but if
-the C<encoding> pragma is present, even the 0x80..0xFF range always
-gets UTF-8 encoded.
-
-If no encoding is specified, the environment variable L<PERL_ENCODING>
-is consulted.  If that fails, "latin1" (ISO 8859-1) is assumed.  If no
-encoding can be found, C<Unknown encoding '...'> error will be thrown.
-
-Note if you want to get back to the original byte encoding, you need
-to use things like I/O with encoding discplines (see L<open>) or the
-Encode module, since C<no encoding> (or re-C<encoding>) do not work.
-
-=head1 KNOWN PROBLEMS
-
-For native multibyte encodings (either fixed or variable length)
-the current implementation of the regular expressions may introduce
-recoding errors for longer regular expression literals than 127 bytes.
-
-The encoding pragma is not supported on EBCDIC platforms.
-(Porters wanted.)
-
-=head1 SEE ALSO
-
-L<perlunicode>, L<Encode>
-
-=cut
-
-1;
diff --git a/lib/encoding.t b/lib/encoding.t
deleted file mode 100644
index ec21c1b3a6..0000000000
--- a/lib/encoding.t
+++ /dev/null
@@ -1,186 +0,0 @@
-BEGIN {
-    require Config; import Config;
-    if ($Config{'extensions'} !~ /\bEncode\b/) {
-      print "1..0 # Skip: Encode was not built\n";
-      exit 0;
-    }
-    if (ord("A") == 193) {
-	print "1..0 # encoding pragma does not support EBCDIC platforms\n";
-	exit(0);
-    }
-}
-
-print "1..29\n";
-
-use encoding "latin1"; # ignored (overwritten by the next line)
-use encoding "greek";  # iso 8859-7 (no "latin" alias, surprise...)
-
-# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is
-# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS),
-# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S)
-
-$a = "\xDF";
-$b = "\x{100}";
-
-print "not " unless ord($a) == 0x3af;
-print "ok 1\n";
-
-print "not " unless ord($b) == 0x100;
-print "ok 2\n";
-
-my $c;
-
-$c = $a . $b;
-
-print "not " unless ord($c) == 0x3af;
-print "ok 3\n";
-
-print "not " unless length($c) == 2;
-print "ok 4\n";
-
-print "not " unless ord(substr($c, 1, 1)) == 0x100;
-print "ok 5\n";
-
-print "not " unless ord(chr(0xdf)) == 0x3af; # spooky
-print "ok 6\n";
-
-print "not " unless ord(pack("C", 0xdf)) == 0x3af;
-print "ok 7\n";
-
-# we didn't break pack/unpack, I hope
-
-print "not " unless unpack("C", pack("C", 0xdf)) == 0xdf;
-print "ok 8\n";
-
-# the first octet of UTF-8 encoded 0x3af 
-print "not " unless unpack("C", chr(0xdf)) == 0xce;
-print "ok 9\n";
-
-print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf;
-print "ok 10\n";
-
-print "not " unless unpack("U", chr(0xdf)) == 0x3af;
-print "ok 11\n";
-
-# charnames must still work
-use charnames ':full';
-print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf;
-print "ok 12\n";
-
-# combine
-
-$c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf);
-
-print "not " unless ord($c) == 0x3af;
-print "ok 13\n";
-
-print "not " unless ord(substr($c, 1, 1)) == 0xdf;
-print "ok 14\n";
-
-print "not " unless ord(substr($c, 2, 1)) == 0x3af;
-print "ok 15\n";
-
-# regex literals
-
-print "not " unless "\xDF"    =~ /\x{3AF}/;
-print "ok 16\n";
-
-print "not " unless "\x{3AF}" =~ /\xDF/;
-print "ok 17\n";
-
-print "not " unless "\xDF"    =~ /\xDF/;
-print "ok 18\n";
-
-print "not " unless "\x{3AF}" =~ /\x{3AF}/;
-print "ok 19\n";
-
-# eq, cmp
-
-my ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = ( 
-    pack("C*", 0xDF ),       # byte
-    pack("C*", 0xDF, 0x20),  # ($bytes2 cmp $U) > 0
-    pack("U*", 0x3AF),       # $U eq $byte
-    pack("U*", 0xDF ),       # $Ub would eq $bytev w/o use encoding
-    pack("U*", 0x3B1),       # ($g1 cmp $byte) > 0; === chr(0xe1)
-    pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0;
-    pack("U*", 0x3AB),       # ($l  cmp $byte) < 0; === chr(0xdb)
-);
-
-# all the tests in this section that compare a byte encoded string 
-# ato UTF-8 encoded are run in all possible vairants 
-# all of the eq, ne, cmp operations tested,
-# $v z $u tested as well as $u z $v
-
-sub alleq($$){
-    my ($a,$b)    =    (shift, shift);
-     $a  eq  $b        &&     $b  eq  $a         && 
-  !( $a  ne  $b )      &&  !( $b  ne  $a )       &&
-   ( $a  cmp $b ) == 0 &&   ( $b  cmp $a ) == 0;
-}
-   
-sub anyeq($$){
-    my ($a,$b)    =    (shift, shift);
-     $a  eq  $b        ||     $b  eq  $a         ||
-  !( $a  ne  $b )      ||  !( $b  ne  $a )       ||
-   ( $a  cmp $b ) == 0 ||   ( $b  cmp $a ) == 0;
-}
-
-sub allgt($$){
-    my ($a,$b)    =    (shift, shift);
-    ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1;
-}
-#match the correct UTF-8 string
-print "not " unless  alleq($byte, $U);
-print "ok 20\n";
-
-#do not match a wrong UTF-8 string
-print "not " if anyeq($byte, $Ub);
-print "ok 21\n";
-
-#string ordering
-print "not " unless allgt ( $g1,    $byte  )  &&
-                    allgt ( $g2,    $byte  )  &&
-                    allgt ( $byte,  $l     )  &&
-                    allgt ( $bytes, $U     );
-print "ok 22\n";
-
-# upgrade, downgrade
-
-my ($u,$v,$v2);
-$u = $v = $v2 = pack("C*", 0xDF);
-utf8::upgrade($v);                   #explicit upgrade
-$v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade
-
-# implicit upgrade === explicit upgrade
-print "not "  if do{{use bytes; $v ne $v2}} || $v ne $v2;
-print "ok 23\n";
-
-# utf8::upgrade is transparent and does not break equality
-print "not " unless alleq( $u, $v );
-print "ok 24\n";
-
-$u = $v = pack("C*", 0xDF);
-utf8::upgrade($v);
-#test for a roundtrip, we should get back from where we left
-eval {utf8::downgrade( $v )};
-print "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v;
-print "ok 25\n";
-
-# some more eq, cmp
-
-my $byte=pack("C*", 0xDF);
-
-print "not " unless pack("U*", 0x3AF) eq $byte;
-print "ok 26\n";
-
-print "not " if chr(0xDF) cmp $byte;
-print "ok 27\n";
-
-print "not " unless ((pack("U*", 0x3B0)       cmp $byte) ==  1) &&
-                    ((pack("U*", 0x3AE)       cmp $byte) == -1) &&
-                    ((pack("U*", 0x3AF, 0x20) cmp $byte) ==  1) &&
-	            ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1);
-print "ok 28\n";
-
-# Used to core dump in 5.7.3
-print ord undef == 0 ? "ok 29\n" : "not ok 29\n";
author	Jarkko Hietaniemi <jhi@iki.fi>	2002-03-31 22:31:29 +0000
committer	Jarkko Hietaniemi <jhi@iki.fi>	2002-03-31 22:31:29 +0000
commit	3ef515df8b968e34c050cfc9602902de0efbefd6 (patch)
tree	856419a8a26ba37abef58ee10f916805a1c23b71 /lib
parent	a23c04e41e8e54701da113d9309ef79fb888afe6 (diff)
download	perl-3ef515df8b968e34c050cfc9602902de0efbefd6.tar.gz