diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2002-03-31 22:31:29 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2002-03-31 22:31:29 +0000 |
commit | 3ef515df8b968e34c050cfc9602902de0efbefd6 (patch) | |
tree | 856419a8a26ba37abef58ee10f916805a1c23b71 /ext | |
parent | a23c04e41e8e54701da113d9309ef79fb888afe6 (diff) | |
download | perl-3ef515df8b968e34c050cfc9602902de0efbefd6.tar.gz |
Upgrade to Encode 1.11, from Dan Kogai.
p4raw-id: //depot/perl@15638
Diffstat (limited to 'ext')
-rw-r--r-- | ext/Encode/AUTHORS | 1 | ||||
-rw-r--r-- | ext/Encode/Byte/Byte.pm | 4 | ||||
-rw-r--r-- | ext/Encode/Byte/Makefile.PL | 4 | ||||
-rw-r--r-- | ext/Encode/CN/Makefile.PL | 2 | ||||
-rw-r--r-- | ext/Encode/Changes | 49 | ||||
-rw-r--r-- | ext/Encode/EBCDIC/Makefile.PL | 2 | ||||
-rw-r--r-- | ext/Encode/Encode.pm | 23 | ||||
-rw-r--r-- | ext/Encode/Encode/Changes.e2x | 7 | ||||
-rw-r--r-- | ext/Encode/Encode/Makefile_PL.e2x | 150 | ||||
-rw-r--r-- | ext/Encode/Encode/README.e2x | 31 | ||||
-rw-r--r-- | ext/Encode/Encode/_PM.e2x | 23 | ||||
-rw-r--r-- | ext/Encode/Encode/_T.e2x | 7 | ||||
-rw-r--r-- | ext/Encode/JP/Makefile.PL | 2 | ||||
-rw-r--r-- | ext/Encode/KR/Makefile.PL | 2 | ||||
-rw-r--r-- | ext/Encode/MANIFEST | 144 | ||||
-rw-r--r-- | ext/Encode/Makefile.PL | 21 | ||||
-rw-r--r-- | ext/Encode/README | 15 | ||||
-rw-r--r-- | ext/Encode/Symbol/Makefile.PL | 2 | ||||
-rw-r--r-- | ext/Encode/TW/Makefile.PL | 2 | ||||
-rw-r--r-- | ext/Encode/bin/enc2xs | 251 | ||||
-rw-r--r-- | ext/Encode/bin/piconv | 4 | ||||
-rw-r--r-- | ext/Encode/bin/ucm2table (renamed from ext/Encode/ucm2table) | 0 | ||||
-rw-r--r-- | ext/Encode/encoding.pm | 233 | ||||
-rw-r--r-- | ext/Encode/lib/Encode/Alias.pm | 29 | ||||
-rw-r--r-- | ext/Encode/lib/Encode/Supported.pod | 4 | ||||
-rw-r--r-- | ext/Encode/t/encoding.t | 190 | ||||
-rw-r--r-- | ext/Encode/t/jperl.t | 57 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-1.ucm (renamed from ext/Encode/Encode/8859-1.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-10.ucm (renamed from ext/Encode/Encode/8859-10.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-11.ucm (renamed from ext/Encode/Encode/8859-11.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-13.ucm (renamed from ext/Encode/Encode/8859-13.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-14.ucm (renamed from ext/Encode/Encode/8859-14.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-15.ucm (renamed from ext/Encode/Encode/8859-15.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-16.ucm (renamed from ext/Encode/Encode/8859-16.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-2.ucm (renamed from ext/Encode/Encode/8859-2.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-3.ucm (renamed from ext/Encode/Encode/8859-3.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-4.ucm (renamed from ext/Encode/Encode/8859-4.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-5.ucm (renamed from ext/Encode/Encode/8859-5.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-6.ucm (renamed from ext/Encode/Encode/8859-6.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-7.ucm (renamed from ext/Encode/Encode/8859-7.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-8.ucm (renamed from ext/Encode/Encode/8859-8.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/8859-9.ucm (renamed from ext/Encode/Encode/8859-9.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/ascii.ucm (renamed from ext/Encode/Encode/ascii.ucm) | 2 | ||||
-rw-r--r-- | ext/Encode/ucm/big5-hkscs.ucm (renamed from ext/Encode/Encode/big5-hkscs.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/big5.ucm (renamed from ext/Encode/Encode/big5.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/cp1047.ucm (renamed from ext/Encode/Encode/cp1047.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/cp1250.ucm (renamed from ext/Encode/Encode/cp1250.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/cp37.ucm (renamed from ext/Encode/Encode/cp37.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/cp932.ucm (renamed from ext/Encode/Encode/cp932.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/cp936.ucm (renamed from ext/Encode/Encode/cp936.ucm) | 2 | ||||
-rw-r--r-- | ext/Encode/ucm/cp949.ucm (renamed from ext/Encode/Encode/cp949.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/cp950.ucm (renamed from ext/Encode/Encode/cp950.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/dingbats.ucm (renamed from ext/Encode/Encode/dingbats.ucm) | 2 | ||||
-rw-r--r-- | ext/Encode/ucm/euc-cn.ucm (renamed from ext/Encode/Encode/euc-cn.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/euc-jp.ucm (renamed from ext/Encode/Encode/euc-jp.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/euc-kr.ucm (renamed from ext/Encode/Encode/euc-kr.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/gb12345.ucm (renamed from ext/Encode/Encode/gb12345.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/gb2312.ucm (renamed from ext/Encode/Encode/gb2312.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/gsm0338.ucm (renamed from ext/Encode/Encode/gsm0338.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/hp-roman8.ucm (renamed from ext/Encode/Encode/hp-roman8.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/ibm-1250.ucm (renamed from ext/Encode/Encode/ibm-1250.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/ibm-1251.ucm (renamed from ext/Encode/Encode/ibm-1251.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/ibm-1252.ucm (renamed from ext/Encode/Encode/ibm-1252.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/ibm-1253.ucm (renamed from ext/Encode/Encode/ibm-1253.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/ibm-1254.ucm (renamed from ext/Encode/Encode/ibm-1254.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/ibm-1255.ucm (renamed from ext/Encode/Encode/ibm-1255.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/ibm-1256.ucm (renamed from ext/Encode/Encode/ibm-1256.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/ibm-1257.ucm (renamed from ext/Encode/Encode/ibm-1257.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/ibm-1258.ucm (renamed from ext/Encode/Encode/ibm-1258.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/ir-165.ucm (renamed from ext/Encode/Encode/ir-165.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/jis0201.ucm (renamed from ext/Encode/Encode/jis0201.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/johab.ucm (renamed from ext/Encode/Encode/johab.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/koi8-f.ucm (renamed from ext/Encode/Encode/koi8-f.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/koi8-r.ucm (renamed from ext/Encode/Encode/koi8-r.ucm) | 2 | ||||
-rw-r--r-- | ext/Encode/ucm/koi8-u.ucm (renamed from ext/Encode/Encode/koi8-u.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/ksc5601.ucm (renamed from ext/Encode/Encode/ksc5601.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/macCentEuro.ucm (renamed from ext/Encode/Encode/macCentEuro.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/macCroatian.ucm (renamed from ext/Encode/Encode/macCroatian.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/macCyrillic.ucm (renamed from ext/Encode/Encode/macCyrillic.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/macDingbats.ucm (renamed from ext/Encode/Encode/macDingbats.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/macGreek.ucm (renamed from ext/Encode/Encode/macGreek.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/macIceland.ucm (renamed from ext/Encode/Encode/macIceland.ucm) | 4 | ||||
-rw-r--r-- | ext/Encode/ucm/macJapan.ucm (renamed from ext/Encode/Encode/macJapan.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/macRoman.ucm (renamed from ext/Encode/Encode/macRoman.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/macRumanian.ucm (renamed from ext/Encode/Encode/macRumanian.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/macSami.ucm (renamed from ext/Encode/Encode/macSami.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/macThai.ucm (renamed from ext/Encode/Encode/macThai.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/macTurkish.ucm (renamed from ext/Encode/Encode/macTurkish.ucm) | 2 | ||||
-rw-r--r-- | ext/Encode/ucm/macUkraine.ucm (renamed from ext/Encode/Encode/macUkraine.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/nextstep.ucm (renamed from ext/Encode/Encode/nextstep.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/posix-bc.ucm (renamed from ext/Encode/Encode/posix-bc.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/shiftjis.ucm (renamed from ext/Encode/Encode/shiftjis.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/symbol.ucm (renamed from ext/Encode/Encode/symbol.ucm) | 0 | ||||
-rw-r--r-- | ext/Encode/ucm/viscii.ucm (renamed from ext/Encode/Encode/viscii.ucm) | 0 |
94 files changed, 946 insertions, 327 deletions
diff --git a/ext/Encode/AUTHORS b/ext/Encode/AUTHORS index f0d3c543d4..5831af2507 100644 --- a/ext/Encode/AUTHORS +++ b/ext/Encode/AUTHORS @@ -10,6 +10,7 @@ # This list is in alphabetical order. -- Anton Tagunov <tagunov@motor.ru> +Andreas J. Koenig <andreas.koenig@anima.de> Autrijus Tang <autrijus@autrijus.org> Dan Kogai <dankogai@dan.co.jp> Gerrit P. Haase <gp@familiehaase.de> diff --git a/ext/Encode/Byte/Byte.pm b/ext/Encode/Byte/Byte.pm index af83d00c78..9c425b93f8 100644 --- a/ext/Encode/Byte/Byte.pm +++ b/ext/Encode/Byte/Byte.pm @@ -1,6 +1,6 @@ package Encode::Byte; use Encode; -our $VERSION = do { my @r = (q$Revision: 1.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use XSLoader; XSLoader::load('Encode::Byte',$VERSION); @@ -74,7 +74,7 @@ supported are as follows. MacSami MacGreek MacThai - MacIcelandic + MacIceland MacTurkish MacUkrainian diff --git a/ext/Encode/Byte/Makefile.PL b/ext/Encode/Byte/Makefile.PL index 57ee63e63d..641317ab1b 100644 --- a/ext/Encode/Byte/Makefile.PL +++ b/ext/Encode/Byte/Makefile.PL @@ -30,7 +30,7 @@ my %tables = ( ], ); -opendir(ENC,'../Encode'); +opendir(ENC,'../ucm'); while (defined(my $file = readdir(ENC))) { if ($file =~ /(8859|ibm).*\.ucm/io) @@ -131,7 +131,7 @@ END sub postamble { my $self = shift; - my $dir = $self->catdir($self->updir,'Encode'); + my $dir = $self->catdir($self->updir,'ucm'); my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; $str .= "$name.c : $name.xs "; foreach my $table (keys %tables) diff --git a/ext/Encode/CN/Makefile.PL b/ext/Encode/CN/Makefile.PL index 3545a1d0ee..d0630b89a7 100644 --- a/ext/Encode/CN/Makefile.PL +++ b/ext/Encode/CN/Makefile.PL @@ -103,7 +103,7 @@ END sub postamble { my $self = shift; - my $dir = $self->catdir($self->updir,'Encode'); + my $dir = $self->catdir($self->updir,'ucm'); my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; $str .= "$name.c : $name.xs "; foreach my $table (keys %tables) diff --git a/ext/Encode/Changes b/ext/Encode/Changes index 3cf2226d57..613ef8c6cf 100644 --- a/ext/Encode/Changes +++ b/ext/Encode/Changes @@ -1,8 +1,53 @@ # Revision history for Perl extension Encode. # -# $Id: Changes,v 1.1 2002/03/29 20:59:39 dankogai Exp dankogai $ +# $Id: Changes,v 1.11 2002/03/31 22:12:13 dankogai Exp dankogai $ # -1.01 $Date: 2002/03/29 20:59:39 $ + +1.11 $Date: 2002/03/31 22:12:13 $ ++ t/encoding.t ++ t/jperl.t +! MANIFEST + Missing files from the MANIFEST fixed. + Message-Id: <20020401010156.H10509@alpha.hut.fi> + Version incremented just to make CPAN happy. + +1.10 2002/03/31 21:32:42 +! Makefile.PL +! README + INSTALL_UCM option added to Makefile.PL so you can install *.ucm + if you want. This should make Autrijus happy. Also, piconv + is added to default install. ++ Encode/*.e2x +! bin/enc2xs + Here-documented files that enc2xs generates are now exported + to *.e2x. Much cleaner and easier to debug. +! encoding.pm + encoding enhances so you can make it act more like such + (now prehistoric ) "localized" variations of perl like Jperl. ++ t/jperl.t + Further test for encoding.pm. Written in euc-jp ++ encoding.pm ++ t/encoding.t + Taken over form jhi. + Message-Id: <20020330174618.B10154@alpha.hut.fi> +- Encode/*.ucm ++ ucm/*.ucm +! Makefile.PL +! */Makefile.PL + *.ucm relocated to ucm/ so MakeMaker will not install'em by default. +- ucm2table ++ bin/ucm2table + *** +! AUTHORS +! Byte/Byte.pm +! Encode.pm +! Encode/macIceland.ucm +! lib/Encode/Alias.pm +! lib/Encode/Supported.pod + MacIceland fixes and Pod Typo fixes. This adds Andreas to AUTHORS. + Message-Id: <m3lmcavhjt.fsf@anima.de> + +1.01 2002/03/29 20:59:39 ! Makefile.PL ! README s/USE_SCRIPTS/MORE_SCRIPTS/ diff --git a/ext/Encode/EBCDIC/Makefile.PL b/ext/Encode/EBCDIC/Makefile.PL index 508df23da4..adf59b0116 100644 --- a/ext/Encode/EBCDIC/Makefile.PL +++ b/ext/Encode/EBCDIC/Makefile.PL @@ -98,7 +98,7 @@ END sub postamble { my $self = shift; - my $dir = $self->catdir($self->updir,'Encode'); + my $dir = $self->catdir($self->updir,'ucm'); my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; $str .= "$name.c : $name.xs "; foreach my $table (keys %tables) diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index d07bfea9e3..9add728ffe 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -1,6 +1,6 @@ package Encode; use strict; -our $VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.11 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; our $DEBUG = 0; require DynaLoader; @@ -90,14 +90,14 @@ unless ($ON_EBCDIC) { # CJK added to autoload unless EBCDIC env ); } -for my $k (qw(centeuro croatian cyrillic dingbats greek - iceland roman rumanian sami - thai turkish ukraine)) +for my $k (qw{ CentralEurRoman Croatian Cyrillic Greek + Iceland Roman Rumanian Sami + Thai Turkish Ukrainian + }) { $ExtModule{"mac$k"} = 'Encode/Byte.pm'; } - sub encodings { my $class = shift; @@ -199,7 +199,7 @@ sub from_to return undef if ($check && length($string)); $string = $t->encode($uni,$check); return undef if ($check && length($uni)); - return length($_[0] = $string); + return defined($_[0] = $string) ? length($string) : undef ; } sub encode_utf8 @@ -333,7 +333,7 @@ For example to convert ISO-8859-1 data to UTF-8: $utf8 = decode("iso-8859-1", $latin1); -=item from_to($string, FROM_ENCODING, TO_ENCODING[, CHECK]) +=item [$length =] from_to($string, FROM_ENCODING, TO_ENCODING[, CHECK]) Convert B<in-place> the data between two encodings. How did the data in $string originally get to be in FROM_ENCODING? Either using @@ -352,6 +352,9 @@ and to convert it back: Note that because the conversion happens in place, the data to be converted cannot be a string constant, it must be a scalar variable. +from_to() return the length of the converted string on success, undef +otherwise. + =back =head2 Listing available encodings @@ -384,9 +387,9 @@ To add new alias to a given encoding, Use; use Encode::Alias; define_alias(newName => ENCODING); -After that, newName can be to be used as am alias for ENCODING. -ENCODING may be either the name of an encoding or and I<encoding -object> +After that, newName can be used as an alias for ENCODING. +ENCODING may be either the name of an encoding or an I<encoding + object> See L<Encode::Alias> on details. diff --git a/ext/Encode/Encode/Changes.e2x b/ext/Encode/Encode/Changes.e2x new file mode 100644 index 0000000000..f6b5a776f9 --- /dev/null +++ b/ext/Encode/Encode/Changes.e2x @@ -0,0 +1,7 @@ +# +# $Id: Changes.e2x,v 1.10 2002/03/31 21:00:50 dankogai Exp $ +# Revision history for Perl extension Encode::$_Name_. +# + +0.01 $_Now_ + Autogenerated by enc2xs version $_Version_. diff --git a/ext/Encode/Encode/Makefile_PL.e2x b/ext/Encode/Encode/Makefile_PL.e2x new file mode 100644 index 0000000000..000ce92002 --- /dev/null +++ b/ext/Encode/Encode/Makefile_PL.e2x @@ -0,0 +1,150 @@ +# +# This file is auto-generated by: +# enc2xs version $_Version_ +# $_Now_ +# +use 5.7.2; +use strict; +use ExtUtils::MakeMaker; + +# Please edit the following to the taste! +my $name = '$_Name_'; +my %tables = ( + $_Name__t => [ $_TableFiles_ ], + ); + +#### DO NOT EDIT BEYOND THIS POINT! +my $enc2xs = '$_Enc2xs_'; +WriteMakefile( + INC => "-I$_Inc_", +#### END_OF_HEADER -- DO NOT EDIT THIS LINE BY HAND! #### + NAME => 'Encode::'.$name, + VERSION_FROM => "$name.pm", + OBJECT => '$(O_FILES)', + 'dist' => { + COMPRESS => 'gzip -9f', + SUFFIX => 'gz', + DIST_DEFAULT => 'all tardist', + }, + MAN3PODS => {}, + # OS 390 winges about line numbers > 64K ??? + XSOPT => '-nolinenumbers', + ); + +package MY; + +sub post_initialize +{ + my ($self) = @_; + my %o; + my $x = $self->{'OBJ_EXT'}; + # Add the table O_FILES + foreach my $e (keys %tables) + { + $o{$e.$x} = 1; + } + $o{"$name$x"} = 1; + $self->{'O_FILES'} = [sort keys %o]; + my @files = ("$name.xs"); + $self->{'C'} = ["$name.c"]; + # $self->{'H'} = [$self->catfile($self->updir,'encode.h')]; + my %xs; + foreach my $table (keys %tables) { + push (@{$self->{'C'}},"$table.c"); + # Do NOT add $table.h etc. to H_FILES unless we own up as to how they + # get built. + foreach my $ext (qw($(OBJ_EXT) .c .h _def.h .fnm)) { + push (@files,$table.$ext); + } + } + $self->{'XS'} = { "$name.xs" => "$name.c" }; + $self->{'clean'}{'FILES'} .= join(' ',@files); + open(XS,">$name.xs") || die "Cannot open $name.xs:$!"; + print XS <<'END'; +#include <EXTERN.h> +#include <perl.h> +#include <XSUB.h> +#define U8 U8 +#include "encode.h" +END + foreach my $table (keys %tables) { + print XS qq[#include "${table}.h"\n]; + } + print XS <<"END"; + +static void +Encode_XSEncoding(pTHX_ encode_t *enc) +{ + dSP; + HV *stash = gv_stashpv("Encode::XS", TRUE); + SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash); + int i = 0; + PUSHMARK(sp); + XPUSHs(sv); + while (enc->name[i]) + { + const char *name = enc->name[i++]; + XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); + } + PUTBACK; + call_pv("Encode::define_encoding",G_DISCARD); + SvREFCNT_dec(sv); +} + +MODULE = Encode::$name PACKAGE = Encode::$name +PROTOTYPES: DISABLE +BOOT: +{ +END + foreach my $table (keys %tables) { + print XS qq[#include "${table}_def.h"\n]; + } + print XS "}\n"; + close(XS); + return "# Built $name.xs\n\n"; +} + +sub postamble +{ + my $self = shift; + my $dir = "."; # $self->catdir('Encode'); + my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; + $str .= "$name.c : $name.xs "; + foreach my $table (keys %tables) + { + $str .= " $table.c"; + } + $str .= "\n\n"; + $str .= "$name\$(OBJ_EXT) : $name.c\n\n"; + + foreach my $table (keys %tables) + { + my $numlines = 1; + my $lengthsofar = length($str); + my $continuator = ''; + $str .= "$table.c : Makefile.PL"; + foreach my $file (@{$tables{$table}}) + { + $str .= $continuator.' '.$self->catfile($dir,$file); + if ( length($str)-$lengthsofar > 128*$numlines ) + { + $continuator .= " \\\n\t"; + $numlines++; + } else { + $continuator = ''; + } + } + $str .= $^O eq 'VMS' # In VMS quote to preserve case + ? qq{\n\t\$(PERL) $enc2xs -"Q" -"O" -o \$\@ -f $table.fnm\n\n} + : qq{\n\t\$(PERL) $enc2xs -Q -O -o \$\@ -f $table.fnm\n\n}; + open (FILELIST, ">$table.fnm") + || die "Could not open $table.fnm: $!"; + foreach my $file (@{$tables{$table}}) + { + print FILELIST $self->catfile($dir,$file) . "\n"; + } + close(FILELIST); + } + return $str; +} + diff --git a/ext/Encode/Encode/README.e2x b/ext/Encode/Encode/README.e2x new file mode 100644 index 0000000000..28a31a655c --- /dev/null +++ b/ext/Encode/Encode/README.e2x @@ -0,0 +1,31 @@ +Encode::$_Name_ version 0.1 +======== + +NAME + Encode::$_Name_ - <describe encoding> + +SYNOPSIS + use Encode::$_Name_; + #<put more words here> +ABSTRACT + <fill this in> +INSTALLATION + +To install this module type the following: + + perl Makefile.PL + make + make test + make install + +DEPENDENCIES + +This module requires perl version 5.7.3 or later. + +COPYRIGHT AND LICENCE + +Copyright (C) 2002 Your Name <your@address.domain> + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + diff --git a/ext/Encode/Encode/_PM.e2x b/ext/Encode/Encode/_PM.e2x new file mode 100644 index 0000000000..208b87ee08 --- /dev/null +++ b/ext/Encode/Encode/_PM.e2x @@ -0,0 +1,23 @@ +package Encode::$_Name_; +our $VERSION = "0.01"; + +use Encode; +use XSLoader; +XSLoader::load('Encode::$_Name_', $VERSION); + +1; +__END__ + +=head1 NAME + +Encode::$_Name_ - New Encoding + +=head1 SYNOPSIS + +You got to fill this in! + +=head1 SEE ALSO + +L<Encode> + +=cut diff --git a/ext/Encode/Encode/_T.e2x b/ext/Encode/Encode/_T.e2x new file mode 100644 index 0000000000..6cf5f293d5 --- /dev/null +++ b/ext/Encode/Encode/_T.e2x @@ -0,0 +1,7 @@ +use strict; +# Adjust the number here! +use Test::More tests => 2; + +use_ok('Encode'); +use_ok('Encode::$_Name_'); +# Add more test here! diff --git a/ext/Encode/JP/Makefile.PL b/ext/Encode/JP/Makefile.PL index 5ce6ef6b66..4c0fbd396c 100644 --- a/ext/Encode/JP/Makefile.PL +++ b/ext/Encode/JP/Makefile.PL @@ -102,7 +102,7 @@ END sub postamble { my $self = shift; - my $dir = $self->catdir($self->updir,'Encode'); + my $dir = $self->catdir($self->updir,'ucm'); my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; $str .= "$name.c : $name.xs "; foreach my $table (keys %tables) diff --git a/ext/Encode/KR/Makefile.PL b/ext/Encode/KR/Makefile.PL index df7534d0d0..db6ef25fb5 100644 --- a/ext/Encode/KR/Makefile.PL +++ b/ext/Encode/KR/Makefile.PL @@ -102,7 +102,7 @@ END sub postamble { my $self = shift; - my $dir = $self->catdir($self->updir,'Encode'); + my $dir = $self->catdir($self->updir,'ucm'); my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; $str .= "$name.c : $name.xs "; foreach my $table (keys %tables) diff --git a/ext/Encode/MANIFEST b/ext/Encode/MANIFEST index df9e51a7e9..bde0b22f72 100644 --- a/ext/Encode/MANIFEST +++ b/ext/Encode/MANIFEST @@ -8,74 +8,12 @@ EBCDIC/EBCDIC.pm Encode extension EBCDIC/Makefile.PL Encode extension Encode.pm Encode extension Encode.xs Encode extension -Encode/8859-1.ucm Unicode Character Map -Encode/8859-10.ucm Unicode Character Map -Encode/8859-11.ucm Unicode Character Map -Encode/8859-13.ucm Unicode Character Map -Encode/8859-14.ucm Unicode Character Map -Encode/8859-15.ucm Unicode Character Map -Encode/8859-16.ucm Unicode Character Map -Encode/8859-2.ucm Unicode Character Map -Encode/8859-3.ucm Unicode Character Map -Encode/8859-4.ucm Unicode Character Map -Encode/8859-5.ucm Unicode Character Map -Encode/8859-6.ucm Unicode Character Map -Encode/8859-7.ucm Unicode Character Map -Encode/8859-8.ucm Unicode Character Map -Encode/8859-9.ucm Unicode Character Map -Encode/ascii.ucm Unicode Character Map -Encode/big5-hkscs.ucm Unicode Character Map -Encode/big5.ucm Unicode Character Map -Encode/cp1047.ucm Unicode Character Map -Encode/cp1250.ucm Unicode Character Map -Encode/cp37.ucm Unicode Character Map -Encode/cp932.ucm Unicode Character Map -Encode/cp936.ucm Unicode Character Map -Encode/cp949.ucm Unicode Character Map -Encode/cp950.ucm Unicode Character Map -Encode/dingbats.ucm Unicode Character Map +Encode/Changes.e2x Skelton file for enc2xs +Encode/Makefile_PL.e2x Skelton file for enc2xs +Encode/README.e2x Skelton file for enc2xs +Encode/_PM.e2x Skelton file for enc2xs +Encode/_T.e2x Skelton file for enc2xs Encode/encode.h Encode extension header file -Encode/euc-cn.ucm Unicode Character Map -Encode/euc-jp.ucm Unicode Character Map -Encode/euc-kr.ucm Unicode Character Map -Encode/gb12345.ucm Unicode Character Map -Encode/gb2312.ucm Unicode Character Map -Encode/gsm0338.ucm Unicode Character Map -Encode/hp-roman8.ucm Unicode Character Map -Encode/ibm-1250.ucm Unicode Character Map -Encode/ibm-1251.ucm Unicode Character Map -Encode/ibm-1252.ucm Unicode Character Map -Encode/ibm-1253.ucm Unicode Character Map -Encode/ibm-1254.ucm Unicode Character Map -Encode/ibm-1255.ucm Unicode Character Map -Encode/ibm-1256.ucm Unicode Character Map -Encode/ibm-1257.ucm Unicode Character Map -Encode/ibm-1258.ucm Unicode Character Map -Encode/ir-165.ucm Unicode Character Map -Encode/jis0201.ucm Unicode Character Map -Encode/johab.ucm Unicode Character Map -Encode/koi8-f.ucm Unicode Character Map -Encode/koi8-r.ucm Unicode Character Map -Encode/koi8-u.ucm Unicode Character Map -Encode/ksc5601.ucm Unicode Character Map -Encode/macCentEuro.ucm Unicode Character Map -Encode/macCroatian.ucm Unicode Character Map -Encode/macCyrillic.ucm Unicode Character Map -Encode/macDingbats.ucm Unicode Character Map -Encode/macGreek.ucm Unicode Character Map -Encode/macIceland.ucm Unicode Character Map -Encode/macJapan.ucm Unicode Character Map -Encode/macRoman.ucm Unicode Character Map -Encode/macRumanian.ucm Unicode Character Map -Encode/macSami.ucm Unicode Character Map -Encode/macThai.ucm Unicode Character Map -Encode/macTurkish.ucm Unicode Character Map -Encode/macUkraine.ucm Unicode Character Map -Encode/nextstep.ucm Unicode Character Map -Encode/posix-bc.ucm Unicode Character Map -Encode/shiftjis.ucm Unicode Character Map -Encode/symbol.ucm Unicode Character Map -Encode/viscii.ucm Unicode Character Map JP/JP.pm Encode extension JP/Makefile.PL Encode extension KR/KR.pm Encode extension @@ -89,7 +27,9 @@ TW/Makefile.PL Encode extension TW/TW.pm Encode extension bin/enc2xs Encode module generator bin/piconv iconv by perl +bin/ucm2table Table Generator for testing encengine.c Encode extension +encoding.pm Perl Pragmactic Module lib/Encode/10646_1.pm Encode extension lib/Encode/Alias.pm Encode extension lib/Encode/CJKConstants.pm Encode extension @@ -112,12 +52,80 @@ t/Encode.t Encode extension test t/JP.t Encode extension test t/KR.t Encode extension test t/TW.t Encode extension test +t/encoding.t encoding extension test t/gb2312.euc test data t/gb2312.ref test data t/jisx0208.euc test data t/jisx0208.ref test data t/jisx0212.euc test data t/jisx0212.ref test data +t/jperl.t encoding extension test t/ksc5601.euc test data t/ksc5601.ref test data -ucm2table Table Generator for testing +ucm/8859-1.ucm Unicode Character Map +ucm/8859-10.ucm Unicode Character Map +ucm/8859-11.ucm Unicode Character Map +ucm/8859-13.ucm Unicode Character Map +ucm/8859-14.ucm Unicode Character Map +ucm/8859-15.ucm Unicode Character Map +ucm/8859-16.ucm Unicode Character Map +ucm/8859-2.ucm Unicode Character Map +ucm/8859-3.ucm Unicode Character Map +ucm/8859-4.ucm Unicode Character Map +ucm/8859-5.ucm Unicode Character Map +ucm/8859-6.ucm Unicode Character Map +ucm/8859-7.ucm Unicode Character Map +ucm/8859-8.ucm Unicode Character Map +ucm/8859-9.ucm Unicode Character Map +ucm/ascii.ucm Unicode Character Map +ucm/big5-hkscs.ucm Unicode Character Map +ucm/big5.ucm Unicode Character Map +ucm/cp1047.ucm Unicode Character Map +ucm/cp1250.ucm Unicode Character Map +ucm/cp37.ucm Unicode Character Map +ucm/cp932.ucm Unicode Character Map +ucm/cp936.ucm Unicode Character Map +ucm/cp949.ucm Unicode Character Map +ucm/cp950.ucm Unicode Character Map +ucm/dingbats.ucm Unicode Character Map +ucm/euc-cn.ucm Unicode Character Map +ucm/euc-jp.ucm Unicode Character Map +ucm/euc-kr.ucm Unicode Character Map +ucm/gb12345.ucm Unicode Character Map +ucm/gb2312.ucm Unicode Character Map +ucm/gsm0338.ucm Unicode Character Map +ucm/hp-roman8.ucm Unicode Character Map +ucm/ibm-1250.ucm Unicode Character Map +ucm/ibm-1251.ucm Unicode Character Map +ucm/ibm-1252.ucm Unicode Character Map +ucm/ibm-1253.ucm Unicode Character Map +ucm/ibm-1254.ucm Unicode Character Map +ucm/ibm-1255.ucm Unicode Character Map +ucm/ibm-1256.ucm Unicode Character Map +ucm/ibm-1257.ucm Unicode Character Map +ucm/ibm-1258.ucm Unicode Character Map +ucm/ir-165.ucm Unicode Character Map +ucm/jis0201.ucm Unicode Character Map +ucm/johab.ucm Unicode Character Map +ucm/koi8-f.ucm Unicode Character Map +ucm/koi8-r.ucm Unicode Character Map +ucm/koi8-u.ucm Unicode Character Map +ucm/ksc5601.ucm Unicode Character Map +ucm/macCentEuro.ucm Unicode Character Map +ucm/macCroatian.ucm Unicode Character Map +ucm/macCyrillic.ucm Unicode Character Map +ucm/macDingbats.ucm Unicode Character Map +ucm/macGreek.ucm Unicode Character Map +ucm/macIceland.ucm Unicode Character Map +ucm/macJapan.ucm Unicode Character Map +ucm/macRoman.ucm Unicode Character Map +ucm/macRumanian.ucm Unicode Character Map +ucm/macSami.ucm Unicode Character Map +ucm/macThai.ucm Unicode Character Map +ucm/macTurkish.ucm Unicode Character Map +ucm/macUkraine.ucm Unicode Character Map +ucm/nextstep.ucm Unicode Character Map +ucm/posix-bc.ucm Unicode Character Map +ucm/shiftjis.ucm Unicode Character Map +ucm/symbol.ucm Unicode Character Map +ucm/viscii.ucm Unicode Character Map diff --git a/ext/Encode/Makefile.PL b/ext/Encode/Makefile.PL index 0f0ed78517..71bc5fbdbe 100644 --- a/ext/Encode/Makefile.PL +++ b/ext/Encode/Makefile.PL @@ -1,5 +1,4 @@ -use 5.7.2; -use strict; +use 5.007003; use ExtUtils::MakeMaker; my %tables = @@ -9,13 +8,20 @@ my %tables = ] ); -my @exe_files = qw(bin/enc2xs); -my @more_exe_files = qw(bin/piconv); - +my @exe_files = qw(bin/enc2xs + bin/piconv + ); +my @more_exe_files = qw( + ucm2table + ); +my @pmlibdirs = qw(lib Encode); for my $arg (@ARGV){ if ($arg eq "MORE_SCRIPTS"){ push @exe_files, @more_exe_files; } + if ($arg eq "INSTALL_UCM"){ + push @pmlibdirs, "ucm"; + } } WriteMakefile( @@ -29,7 +35,8 @@ WriteMakefile( DIST_DEFAULT => 'all tardist', }, MAN3PODS => {}, - INC => "-I./Encode" + INC => "-I./Encode", + PMLIBDIRS => \@pmlibdirs, ); package MY; @@ -70,7 +77,7 @@ return ''; sub postamble { my $self = shift; - my $dir = $self->catdir($self->curdir,'Encode'); + my $dir = $self->catdir($self->curdir,'ucm'); my $str = "# Encode\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; $str .= 'Encode$(OBJ_EXT) :'; foreach my $table (keys %tables) diff --git a/ext/Encode/README b/ext/Encode/README index 590834a336..4ff4622027 100644 --- a/ext/Encode/README +++ b/ext/Encode/README @@ -23,15 +23,20 @@ To install this module type the following: To install scripts under bin/ directories also, perl Makefile.PL MORE_SCRIPTS - make - make test - make install + make && make test && make install + +by default, only enc2xs and piconv are installed. + +To install *.ucm files also, say + + perl Makefile.PL INSTALL_UCM + make && make test && make install -by default, only bin/enc2xs is installed. +by default, *.ucm are not installed. DEPENDENCIES -This module requires perl5.7.2 or later. +This module requires perl5.7.3 or later. QUESTIONS? diff --git a/ext/Encode/Symbol/Makefile.PL b/ext/Encode/Symbol/Makefile.PL index 063369908f..f5772be7a2 100644 --- a/ext/Encode/Symbol/Makefile.PL +++ b/ext/Encode/Symbol/Makefile.PL @@ -98,7 +98,7 @@ END sub postamble { my $self = shift; - my $dir = $self->catdir($self->updir,'Encode'); + my $dir = $self->catdir($self->updir,'ucm'); my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; $str .= "$name.c : $name.xs "; foreach my $table (keys %tables) diff --git a/ext/Encode/TW/Makefile.PL b/ext/Encode/TW/Makefile.PL index 9ff1203c19..2b100ee3e9 100644 --- a/ext/Encode/TW/Makefile.PL +++ b/ext/Encode/TW/Makefile.PL @@ -100,7 +100,7 @@ END sub postamble { my $self = shift; - my $dir = $self->catdir($self->updir,'Encode'); + my $dir = $self->catdir($self->updir,'ucm'); my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; $str .= "$name.c : $name.xs "; foreach my $table (keys %tables) diff --git a/ext/Encode/bin/enc2xs b/ext/Encode/bin/enc2xs index 803c2649f9..3a9780baf2 100644 --- a/ext/Encode/bin/enc2xs +++ b/ext/Encode/bin/enc2xs @@ -1,4 +1,4 @@ -#!../../../perl -w +#!./perl BEGIN { # fiddle with @INC iff I am a part of perl dist if ($^X =~ m/\bminiperl$/o){ @@ -10,7 +10,7 @@ BEGIN { use strict; use Getopt::Std; my @orig_ARGV = @ARGV; -our $VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.10 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; # These may get re-ordered. @@ -832,215 +832,58 @@ sub output_ucm print $fh "END CHARMAP\n"; } +use vars qw( + $_Enc2xs + $_Version + $_Inc + $_Name + $_TableFiles + $_Now +); + sub make_makefile_pl { eval { require Encode; }; $@ and die "You need to install Encode to use enc2xs -M\nerror: $@\n"; + # our used for variable expanstion + $_Enc2xs = $0; + $_Version = $VERSION; + $_Inc = $INC{"Encode.pm"}; $_Inc =~ s/\.pm$//o; + $_Name = shift; + $_TableFiles = join(",", map {qq('$_')} @_); + $_Now = scalar localtime(); + warn "Generating Makefile.PL\n"; + _print_expand("$_Inc/Makefile_PL.e2x", "Makefile.PL"); + warn "Generating $_Name.pm\n"; + _print_expand("$_Inc/_PM.e2x", "$_Name.pm"); + warn "Generating t/$_Name.t\n"; + _print_expand("$_Inc/_T.e2x", "t/$_Name.t"); + warn "Generating README\n"; + _print_expand("$_Inc/README.e2x", "README"); + warn "Generating t/$_Name.t\n"; + _print_expand("$_Inc/Changes.e2x", "Changes"); + exit; +} + +sub _print_expand{ eval { require File::Basename; }; $@ and die "File::Basename needed. Are you on miniperl?;\nerror: $@\n"; File::Basename->import(); - my $inc = dirname($INC{"Encode/Internal.pm"}); - my $name = shift; - my $table_files = join(",", map {qq('$_')} @_); - my $now = scalar localtime(); - open my $fh, ">Makefile.PL" or die "$!"; - print $fh <<"END_OF_HEADER"; -# -# This file is auto-generated by: -# $0 -# $now -# -use 5.7.2; -use strict; -use ExtUtils::MakeMaker; - -# Please edit the following to the taste! -my \$name = '$name'; -my \%tables = ( - encode_t => [ $table_files ], - ); - -# And leave the rest! -my \$enc2xs = '$0'; -WriteMakefile( - INC => "-I$inc", -END_OF_HEADER - - print $fh <<'END_OF_MAKEFILE_PL'; - NAME => 'Encode::'.$name, - VERSION_FROM => "$name.pm", - OBJECT => '$(O_FILES)', - 'dist' => { - COMPRESS => 'gzip -9f', - SUFFIX => 'gz', - DIST_DEFAULT => 'all tardist', - }, - MAN3PODS => {}, - # OS 390 winges about line numbers > 64K ??? - XSOPT => '-nolinenumbers', - ); - -package MY; - -sub post_initialize -{ - my ($self) = @_; - my %o; - my $x = $self->{'OBJ_EXT'}; - # Add the table O_FILES - foreach my $e (keys %tables) - { - $o{$e.$x} = 1; - } - $o{"$name$x"} = 1; - $self->{'O_FILES'} = [sort keys %o]; - my @files = ("$name.xs"); - $self->{'C'} = ["$name.c"]; - # $self->{'H'} = [$self->catfile($self->updir,'encode.h')]; - my %xs; - foreach my $table (keys %tables) { - push (@{$self->{'C'}},"$table.c"); - # Do NOT add $table.h etc. to H_FILES unless we own up as to how they - # get built. - foreach my $ext (qw($(OBJ_EXT) .c .h _def.h .fnm)) { - push (@files,$table.$ext); - } - } - $self->{'XS'} = { "$name.xs" => "$name.c" }; - $self->{'clean'}{'FILES'} .= join(' ',@files); - open(XS,">$name.xs") || die "Cannot open $name.xs:$!"; - print XS <<'END'; -#include <EXTERN.h> -#include <perl.h> -#include <XSUB.h> -#define U8 U8 -#include "encode.h" -END - foreach my $table (keys %tables) { - print XS qq[#include "${table}.h"\n]; + my ($src, $dst) = @_; + open my $in, $src or die "$src : $!"; + if ((my $d = dirname($dst)) ne '.'){ + -d $d or mkdir $d, 0755 or die "mkdir $d : $!"; + } + open my $out, ">$dst" or die "$!"; + my $asis = 0; + while (<$in>){ + if (/^#### END_OF_HEADER/){ + $asis = 1; next; + } + s/(\$_[A-Z][A-Za-z0-9]+)_/$1/gee unless $asis; + print $out $_; } - print XS <<"END"; - -static void -Encode_XSEncoding(pTHX_ encode_t *enc) -{ - dSP; - HV *stash = gv_stashpv("Encode::XS", TRUE); - SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash); - int i = 0; - PUSHMARK(sp); - XPUSHs(sv); - while (enc->name[i]) - { - const char *name = enc->name[i++]; - XPUSHs(sv_2mortal(newSVpvn(name,strlen(name)))); - } - PUTBACK; - call_pv("Encode::define_encoding",G_DISCARD); - SvREFCNT_dec(sv); -} - -MODULE = Encode::$name PACKAGE = Encode::$name -PROTOTYPES: DISABLE -BOOT: -{ -END - foreach my $table (keys %tables) { - print XS qq[#include "${table}_def.h"\n]; - } - print XS "}\n"; - close(XS); - return "# Built $name.xs\n\n"; -} - -sub postamble -{ - my $self = shift; - my $dir = "."; # $self->catdir('Encode'); - my $str = "# $name\$(OBJ_EXT) depends on .h and _def.h files not .c files - but all written by enc2xs\n"; - $str .= "$name.c : $name.xs "; - foreach my $table (keys %tables) - { - $str .= " $table.c"; - } - $str .= "\n\n"; - $str .= "$name\$(OBJ_EXT) : $name.c\n\n"; - - foreach my $table (keys %tables) - { - my $numlines = 1; - my $lengthsofar = length($str); - my $continuator = ''; - $str .= "$table.c : Makefile.PL"; - foreach my $file (@{$tables{$table}}) - { - $str .= $continuator.' '.$self->catfile($dir,$file); - if ( length($str)-$lengthsofar > 128*$numlines ) - { - $continuator .= " \\\n\t"; - $numlines++; - } else { - $continuator = ''; - } - } - $str .= $^O eq 'VMS' # In VMS quote to preserve case - ? qq{\n\t\$(PERL) $enc2xs -"Q" -"O" -o \$\@ -f $table.fnm\n\n} - : qq{\n\t\$(PERL) $enc2xs -Q -O -o \$\@ -f $table.fnm\n\n}; - open (FILELIST, ">$table.fnm") - || die "Could not open $table.fnm: $!"; - foreach my $file (@{$tables{$table}}) - { - print FILELIST $self->catfile($dir,$file) . "\n"; - } - close(FILELIST); - } - return $str; } -END_OF_MAKEFILE_PL - close $fh; - (my $pm =<<"END_OF_PM") =~ s/^# //gm; -# package Encode::$name; -# our \$VERSION = "0.01"; -# -# use Encode; -# use XSLoader; -# XSLoader::load('Encode::$name', \$VERSION); -# -# 1; -# __END__ -# -# =head1 NAME -# -# Encode::$name - New Encoding -# -# =head1 SYNOPSIS -# -# You got to fill this in! -# -# =head1 SEE ALSO -# -# L<Encode> -# -# =cut -END_OF_PM - open $fh, ">$name.pm" or die "$name.pm:$!"; - print $fh $pm; - close $fh; - -d 't' or mkdir 't', 0755 or die "mkdir t:$!"; - open $fh, ">t/$name.t" or die "t/$name.t:$!"; -print $fh <<"END_OF_TEST"; -use strict; -# Adjust the number here! -use Test::More tests => 2; - -use_ok('Encode'); -use_ok('Encode::$name'); -# Add more test here! -END_OF_TEST - close $fh; - exit; -} - __END__ =head1 NAME @@ -1083,6 +926,10 @@ defined inI<my.ucm>. C<$> is a shell prompt. Issue a command as follows; $ enc2xs -M My my.ucm + generating Makefile.PL + generating My.pm + generating README + generating Changes Now take a look at your current directory. It should look like this. diff --git a/ext/Encode/bin/piconv b/ext/Encode/bin/piconv index c7f08aec51..3958dfa8c3 100644 --- a/ext/Encode/bin/piconv +++ b/ext/Encode/bin/piconv @@ -1,5 +1,5 @@ -#!/usr/bin/perl -# $Id: piconv,v 1.0 2002/03/28 23:26:28 dankogai Exp $ +#!./perl +# $Id: piconv,v 1.10 2002/03/31 21:00:50 dankogai Exp $ # use 5.7.3; use strict; diff --git a/ext/Encode/ucm2table b/ext/Encode/bin/ucm2table index 04966133c6..04966133c6 100644 --- a/ext/Encode/ucm2table +++ b/ext/Encode/bin/ucm2table diff --git a/ext/Encode/encoding.pm b/ext/Encode/encoding.pm new file mode 100644 index 0000000000..1b3dd840cc --- /dev/null +++ b/ext/Encode/encoding.pm @@ -0,0 +1,233 @@ +package encoding; +our $VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; + +use Encode; + +BEGIN { + if (ord("A") == 193) { + require Carp; + Carp::croak "encoding pragma does not support EBCDIC platforms"; + } +} + +sub import { + my $class = shift; + my $name = shift; + my %arg = @_; + $name ||= $ENV{PERL_ENCODING}; + + my $enc = find_encoding($name); + unless (defined $enc) { + require Carp; + Carp::croak "Unknown encoding '$name'"; + } + ${^ENCODING} = $enc; # this is all you need, actually. + + # $_OPEN_ORIG = ${^OPEN}; + for my $h (qw(STDIN STDOUT STDERR)){ + if ($arg{$h}){ + unless (defined find_encoding($name)) { + require Carp; + Carp::croak "Unknown encoding for $fhname, '$arg{$h}'"; + } + eval qq{ binmode($h, ":encoding($arg{h})") }; + }else{ + eval qq{ binmode($h, ":encoding($name)") }; + } + if ($@){ + require Carp; + Carp::croak($@); + } + } + return 1; # I doubt if we need it, though +} + +sub unimport{ + no warnings; + undef ${^ENCODING}; + binmode(STDIN, ":raw"); + binmode(STDOUT, ":raw"); + binmode(STDERR, ":raw"); +} + +1; +__END__ +=pod + +=head1 NAME + +encoding - allows you to write your script in non-asii or non-utf8 + +=head1 SYNOPSIS + + use encoding "euc-jp"; # Jperl! + + # or you can even do this if your shell supports euc-jp + + > perl -Mencoding=euc-jp -e '...' + + # or from the shebang line + + #!/your/path/to/perl -Mencoding=euc-jp + + # more control + + # A simple euc-jp => utf-8 converter + use encoding "euc-jp", STDOUT => "utf8"; while(<>){print}; + + # "no encoding;" supported (but not scoped!) + no encoding; + +=head1 ABSTRACT + +Perl 5.6.0 has introduced Unicode support. You could apply +C<substr()> and regexes even to complex CJK characters -- so long as +the script was written in UTF-8. But back then text editors that +support UTF-8 was still rare and many users rather chose to writer +scripts in legacy encodings, given up whole new feature of Perl 5.6. + +With B<encoding> pragma, you can write your script in any encoding you like +(so long as the C<Encode> module supports it) and still enjoy Unicode +support. You can write a code in EUC-JP as follows; + + my $Rakuda = "\xF1\xD1\xF1\xCC"; # Camel in Kanji + #<-char-><-char-> # 4 octets + s/\bCamel\b/$Rakuda/; + +And with C<use encoding "euc-jp"> in effect, it is the same thing as +the code in UTF-8 as follow. + + my $Rakuda = "\x{99F1}\x{99DD}"; # who Unicode Characters + s/\bCamel\b/$Rakuda/; + +The B<encoding> pragma also modifies the file handle disciplines of +STDIN, STDOUT, and STDERR to the specified encoding. Therefore, + + use encoding "euc-jp"; + my $message = "Camel is the symbol of perl.\n"; + my $Rakuda = "\xF1\xD1\xF1\xCC"; # Camel in Kanji + $message =~ s/\bCamel\b/$Rakuda/; + print $message; + +Will print "\xF1\xD1\xF1\xCC is the symbol of perl.\n", not +"\x{99F1}\x{99DD} is the symbol of perl.\n". + +You can override this by giving extra arguments. See below. + +=head1 USAGE + +=over 4 + +=item use encoding [I<ENCNAME>] ; + +Sets the script encoding to I<ENCNAME> and file handle disciplines of +STDIN, STDOUT, and STDERR are set to ":encoding(I<ENCNAME>)". + +If no encoding is specified, the environment variable L<PERL_ENCODING> +is consulted. If no encoding can be found, C<Unknown encoding 'I<ENCNAME>'> +error will be thrown. + +Note that non-STD file handles remain unaffected. Use C<use open> or +C<binmode> to change disciplines of those. + +=item use encoding I<ENCNAME> [ STDIN => I<ENCNAME_IN> ...] ; + +You can also individually set encodings of STDIN, STDOUT, and STDERR +via STDI<FH> => I<ENCNAME_FH> form. In this case, you cannot omit the +first I<ENCNAME>. + +=item no encoding; + +Unsets the script encoding and the disciplines of STDIN, STDOUT, and +STDERR are reset to ":raw". + +=back + +=head1 CAVEATS + +=head2 NOT SCOPED + +The pragma is a per script, not a per block lexical. Only the last +C<use encoding> or C<matters, and it affects B<the whole script>. +Though <no encoding> pragma is supported and C<use encoding> can +appear as many times as you want in a given script, the multiple use +of this pragma is discouraged. + +=head2 DO NOT MIX MULTIPLE ENCODINGS + +Notice that only literals (string or regular expression) having only +legacy code points are affected: if you mix data like this + + \xDF\x{100} + +the data is assumed to be in (Latin 1 and) Unicode, not in your native +encoding. In other words, this will match in "greek": + + "\xDF" =~ /\x{3af}/ + +but this will not + + "\xDF\x{100}" =~ /\x{3af}\x{100}/ + +since the C<\xDF> on the left will B<not> be upgraded to C<\x{3af}> +because of the C<\x{100}> on the left. You should not be mixing your +legacy data and Unicode in the same string. + +This pragma also affects encoding of the 0x80..0xFF code point range: +normally characters in that range are left as eight-bit bytes (unless +they are combined with characters with code points 0x100 or larger, +in which case all characters need to become UTF-8 encoded), but if +the C<encoding> pragma is present, even the 0x80..0xFF range always +gets UTF-8 encoded. + +After all, the best thing about this pragma is that you don't have to +resort to \x... just to spell your name in native encoding. So feel +free to put your strings in your encoding in quotes and regexes. + +=head1 EXAMPLE - Greekperl + + use encoding "iso 8859-7"; + + # The \xDF of ISO 8859-7 (Greek) is \x{3af} in Unicode. + + $a = "\xDF"; + $b = "\x{100}"; + + printf "%#x\n", ord($a); # will print 0x3af, not 0xdf + + $c = $a . $b; + + # $c will be "\x{3af}\x{100}", not "\x{df}\x{100}". + + # chr() is affected, and ... + + print "mega\n" if ord(chr(0xdf)) == 0x3af; + + # ... ord() is affected by the encoding pragma ... + + print "tera\n" if ord(pack("C", 0xdf)) == 0x3af; + + # ... as are eq and cmp ... + + print "peta\n" if "\x{3af}" eq pack("C", 0xdf); + print "exa\n" if "\x{3af}" cmp pack("C", 0xdf) == 0; + + # ... but pack/unpack C are not affected, in case you still + # want back to your native encoding + + print "zetta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf; + +=head1 KNOWN PROBLEMS + +For native multibyte encodings (either fixed or variable length) +the current implementation of the regular expressions may introduce +recoding errors for longer regular expression literals than 127 bytes. + +The encoding pragma is not supported on EBCDIC platforms. +(Porters wanted.) + +=head1 SEE ALSO + +L<perlunicode>, L<Encode>, L<open> + +=cut diff --git a/ext/Encode/lib/Encode/Alias.pm b/ext/Encode/lib/Encode/Alias.pm index 76a995ee9e..d00d2bfaa5 100644 --- a/ext/Encode/lib/Encode/Alias.pm +++ b/ext/Encode/lib/Encode/Alias.pm @@ -1,7 +1,7 @@ package Encode::Alias; use strict; use Encode; -our $VERSION = do { my @r = (q$Revision: 1.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; our $DEBUG = 0; require Exporter; @@ -167,6 +167,9 @@ sub init_aliases # Sometimes seen with a leading zero. define_alias( qr/\bcp037\b/i => '"cp37"'); + # Mac Mappings + define_alias( qr/\bmacIcelandic$/i => '"macIceland"'); + define_alias( qr/^mac_(.*)$/i => '"mac$1"'); # Ououououou. define_alias( qr/\bmacRomanian$/i => '"macRumanian"'); @@ -235,8 +238,9 @@ Encode::Alias - alias defintions to encodings =head1 DESCRIPTION -Allows newName to be used as am alias for ENCODING. ENCODING may be -either the name of an encoding or and encoding object (as described in L<Encode>). +Allows newName to be used as an alias for ENCODING. ENCODING may be +either the name of an encoding or an encoding object (as described +in L<Encode>). Currently I<newName> can be specified in the following ways: @@ -249,28 +253,29 @@ Currently I<newName> can be specified in the following ways: define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' ); In this case if I<ENCODING> is not a reference it is C<eval>-ed to -allow C<$1> etc. to be subsituted. The example is one way to names as -used in X11 font names to alias the MIME names for the iso-8859-* -family. Note the double quote inside the single quote. +allow C<$1> etc. to be substituted. The example is one way to alias +names as used in X11 fonts to the MIME names for the iso-8859-* +family. Note the double quote inside the single quote. -If you are using regex here, you have to do so or it won't work in -this case. Also not regex is tricky even for the experienced. Use it -with caution. +If you are using a regex here, you have to use the quotes as shown or +it won't work. Also note that regex handling is tricky even for the +experienced. Use it with caution. =item As a code reference, e.g.: define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } , ''); + In this case C<$_> will be set to the name that is being looked up and I<ENCODING> is passed to the sub as its first argument. The example -is another way to names as used in X11 font names to alias the MIME -names for the iso-8859-* family. +is another way to alias names as used in X11 fonts to the MIME names +for the iso-8859-* family. =back =head2 Alias overloading -You can override predefined aliases by simply applying define_alias(). +You can override predefined aliases by simply applying define_alias(). New alias is always evaluated first and when neccessary define_alias() flushes internal cache to make new definition available. diff --git a/ext/Encode/lib/Encode/Supported.pod b/ext/Encode/lib/Encode/Supported.pod index 1a8d076469..9f9892872c 100644 --- a/ext/Encode/lib/Encode/Supported.pod +++ b/ext/Encode/lib/Encode/Supported.pod @@ -97,9 +97,9 @@ non-ASCII characters. MacCyrillic MacRomanian MacSami - MacGreek + MacGreek MacThai - MacIcelandic + MacIceland MacTurkish MacUkrainian diff --git a/ext/Encode/t/encoding.t b/ext/Encode/t/encoding.t new file mode 100644 index 0000000000..e23820aee4 --- /dev/null +++ b/ext/Encode/t/encoding.t @@ -0,0 +1,190 @@ +BEGIN { + require Config; import Config; + if ($Config{'extensions'} !~ /\bEncode\b/) { + print "1..0 # Skip: Encode was not built\n"; + exit 0; + } + if (ord("A") == 193) { + print "1..0 # encoding pragma does not support EBCDIC platforms\n"; + exit(0); + } +} + +print "1..29\n"; + +use encoding "latin1"; # ignored (overwritten by the next line) +use encoding "greek"; # iso 8859-7 (no "latin" alias, surprise...) + +# "greek" is "ISO 8859-7", and \xDF in ISO 8859-7 is +# \x{3AF} in Unicode (GREEK SMALL LETTER IOTA WITH TONOS), +# instead of \xDF in Unicode (LATIN SMALL LETTER SHARP S) + +$a = "\xDF"; +$b = "\x{100}"; + +print "not " unless ord($a) == 0x3af; +print "ok 1\n"; + +print "not " unless ord($b) == 0x100; +print "ok 2\n"; + +my $c; + +$c = $a . $b; + +print "not " unless ord($c) == 0x3af; +print "ok 3\n"; + +print "not " unless length($c) == 2; +print "ok 4\n"; + +print "not " unless ord(substr($c, 1, 1)) == 0x100; +print "ok 5\n"; + +print "not " unless ord(chr(0xdf)) == 0x3af; # spooky +print "ok 6\n"; + +print "not " unless ord(pack("C", 0xdf)) == 0x3af; +print "ok 7\n"; + +# we didn't break pack/unpack, I hope + +print "not " unless unpack("C", pack("C", 0xdf)) == 0xdf; +print "ok 8\n"; + +# the first octet of UTF-8 encoded 0x3af +print "not " unless unpack("C", chr(0xdf)) == 0xce; +print "ok 9\n"; + +print "not " unless unpack("U", pack("U", 0xdf)) == 0xdf; +print "ok 10\n"; + +print "not " unless unpack("U", chr(0xdf)) == 0x3af; +print "ok 11\n"; + +# charnames must still work +use charnames ':full'; +print "not " unless ord("\N{LATIN SMALL LETTER SHARP S}") == 0xdf; +print "ok 12\n"; + +# combine + +$c = "\xDF\N{LATIN SMALL LETTER SHARP S}" . chr(0xdf); + +print "not " unless ord($c) == 0x3af; +print "ok 13\n"; + +print "not " unless ord(substr($c, 1, 1)) == 0xdf; +print "ok 14\n"; + +print "not " unless ord(substr($c, 2, 1)) == 0x3af; +print "ok 15\n"; + +# regex literals + +print "not " unless "\xDF" =~ /\x{3AF}/; +print "ok 16\n"; + +print "not " unless "\x{3AF}" =~ /\xDF/; +print "ok 17\n"; + +print "not " unless "\xDF" =~ /\xDF/; +print "ok 18\n"; + +print "not " unless "\x{3AF}" =~ /\x{3AF}/; +print "ok 19\n"; + +# eq, cmp + +my ($byte,$bytes,$U,$Ub,$g1,$g2,$l) = ( + pack("C*", 0xDF ), # byte + pack("C*", 0xDF, 0x20), # ($bytes2 cmp $U) > 0 + pack("U*", 0x3AF), # $U eq $byte + pack("U*", 0xDF ), # $Ub would eq $bytev w/o use encoding + pack("U*", 0x3B1), # ($g1 cmp $byte) > 0; === chr(0xe1) + pack("U*", 0x3AF, 0x20), # ($g2 cmp $byte) > 0; + pack("U*", 0x3AB), # ($l cmp $byte) < 0; === chr(0xdb) +); + +# all the tests in this section that compare a byte encoded string +# ato UTF-8 encoded are run in all possible vairants +# all of the eq, ne, cmp operations tested, +# $v z $u tested as well as $u z $v + +sub alleq($$){ + my ($a,$b) = (shift, shift); + $a eq $b && $b eq $a && + !( $a ne $b ) && !( $b ne $a ) && + ( $a cmp $b ) == 0 && ( $b cmp $a ) == 0; +} + +sub anyeq($$){ + my ($a,$b) = (shift, shift); + $a eq $b || $b eq $a || + !( $a ne $b ) || !( $b ne $a ) || + ( $a cmp $b ) == 0 || ( $b cmp $a ) == 0; +} + +sub allgt($$){ + my ($a,$b) = (shift, shift); + ( $a cmp $b ) == 1 && ( $b cmp $a ) == -1; +} +#match the correct UTF-8 string +print "not " unless alleq($byte, $U); +print "ok 20\n"; + +#do not match a wrong UTF-8 string +print "not " if anyeq($byte, $Ub); +print "ok 21\n"; + +#string ordering +print "not " unless allgt ( $g1, $byte ) && + allgt ( $g2, $byte ) && + allgt ( $byte, $l ) && + allgt ( $bytes, $U ); +print "ok 22\n"; + +# upgrade, downgrade + +my ($u,$v,$v2); +$u = $v = $v2 = pack("C*", 0xDF); +utf8::upgrade($v); #explicit upgrade +$v2 = substr( $v2."\x{410}", 0, -1); #implicit upgrade + +# implicit upgrade === explicit upgrade +print "not " if do{{use bytes; $v ne $v2}} || $v ne $v2; +print "ok 23\n"; + +# utf8::upgrade is transparent and does not break equality +print "not " unless alleq( $u, $v ); +print "ok 24\n"; + +$u = $v = pack("C*", 0xDF); +utf8::upgrade($v); +#test for a roundtrip, we should get back from where we left +eval {utf8::downgrade( $v )}; +print "not " if $@ !~ /^Wide / || do{{use bytes; $u eq $v}} || $u ne $v; +print "ok 25\n"; + +# some more eq, cmp + +$byte=pack("C*", 0xDF); + +print "not " unless pack("U*", 0x3AF) eq $byte; +print "ok 26\n"; + +print "not " if chr(0xDF) cmp $byte; +print "ok 27\n"; + +print "not " unless ((pack("U*", 0x3B0) cmp $byte) == 1) && + ((pack("U*", 0x3AE) cmp $byte) == -1) && + ((pack("U*", 0x3AF, 0x20) cmp $byte) == 1) && + ((pack("U*", 0x3AF) cmp pack("C*",0xDF,0x20))==-1); +print "ok 28\n"; + + +{ + # Used to core dump in 5.7.3 + no warnings; # so test goes noiselessly + print ord(undef) == 0 ? "ok 29\n" : "not ok 29\n"; +} diff --git a/ext/Encode/t/jperl.t b/ext/Encode/t/jperl.t new file mode 100644 index 0000000000..333e1881c2 --- /dev/null +++ b/ext/Encode/t/jperl.t @@ -0,0 +1,57 @@ +# +# $Id: jperl.t,v 1.11 2002/03/31 22:12:13 dankogai Exp dankogai $ +# +# This script is written in euc-jp + +use strict; +use Test::More tests => 15; +my $Debug = shift; + +no encoding; # ensure +my $Enamae = "\xbe\xae\xbb\xf4\x20\xc3\xc6"; # euc-jp, with \x escapes +use encoding "euc-jp"; + +my $Namae = "¾®»ô ÃÆ"; # in Japanese, in euc-jp +my $Name = "Dan Kogai"; # in English +# euc-jp in \x format but after the pragma. But this one will be converted! +my $Ynamae = "\xbe\xae\xbb\xf4\x20\xc3\xc6"; + + +my $str = $Namae; $str =~ s/¾®»ô ÃÆ/Dan Kogai/o; +is($str, $Name, q{regex}); +$str = $Namae; $str =~ s/$Namae/Dan Kogai/o; +is($str, $Name, q{regex - with variable}); +is(length($Namae), 4, q{utf8:length}); +{ + use bytes; + # converted to UTF-8 so 3*3+1 + is(length($Namae), 10, q{bytes:length}); + # + is(length($Enamae), 7, q{euc:length}); # 2*3+1 + is ($Namae, $Ynamae, q{literal conversions}); + isnt($Enamae, $Ynamae, q{before and after}); + is($Enamae, Encode::encode('euc-jp', $Namae)); +} +# let's test the scope as well. Must be in utf8 realm +is(length($Namae), 4, q{utf8:length}); + +{ + no encoding; + ok(! defined(${^ENCODING}), q{no encoding;}); +} +# should've been isnt() but no scoping is suported -- yet +ok(! defined(${^ENCODING}), q{not scoped yet}); +{ + # now let's try some real black magic! + local(${^ENCODING}) = Encode::find_encoding("euc-jp"); + my $str = "\xbe\xae\xbb\xf4\x20\xc3\xc6"; + is (length($str), 4, q{black magic:length}); + is ($str, $Enamae, q{black magic:eq}); +} +ok(! defined(${^ENCODING}), q{out of black magic}); +use bytes; +is (length($Namae), 10); +1; +__END__ + + diff --git a/ext/Encode/Encode/8859-1.ucm b/ext/Encode/ucm/8859-1.ucm index 6e1caba220..6e1caba220 100644 --- a/ext/Encode/Encode/8859-1.ucm +++ b/ext/Encode/ucm/8859-1.ucm diff --git a/ext/Encode/Encode/8859-10.ucm b/ext/Encode/ucm/8859-10.ucm index dcf79fec8d..dcf79fec8d 100644 --- a/ext/Encode/Encode/8859-10.ucm +++ b/ext/Encode/ucm/8859-10.ucm diff --git a/ext/Encode/Encode/8859-11.ucm b/ext/Encode/ucm/8859-11.ucm index 5047598d8d..5047598d8d 100644 --- a/ext/Encode/Encode/8859-11.ucm +++ b/ext/Encode/ucm/8859-11.ucm diff --git a/ext/Encode/Encode/8859-13.ucm b/ext/Encode/ucm/8859-13.ucm index f3a5eb44da..f3a5eb44da 100644 --- a/ext/Encode/Encode/8859-13.ucm +++ b/ext/Encode/ucm/8859-13.ucm diff --git a/ext/Encode/Encode/8859-14.ucm b/ext/Encode/ucm/8859-14.ucm index a427b82bd3..a427b82bd3 100644 --- a/ext/Encode/Encode/8859-14.ucm +++ b/ext/Encode/ucm/8859-14.ucm diff --git a/ext/Encode/Encode/8859-15.ucm b/ext/Encode/ucm/8859-15.ucm index 69cf924bdb..69cf924bdb 100644 --- a/ext/Encode/Encode/8859-15.ucm +++ b/ext/Encode/ucm/8859-15.ucm diff --git a/ext/Encode/Encode/8859-16.ucm b/ext/Encode/ucm/8859-16.ucm index 8cc38991a7..8cc38991a7 100644 --- a/ext/Encode/Encode/8859-16.ucm +++ b/ext/Encode/ucm/8859-16.ucm diff --git a/ext/Encode/Encode/8859-2.ucm b/ext/Encode/ucm/8859-2.ucm index eeb8ee3aa7..eeb8ee3aa7 100644 --- a/ext/Encode/Encode/8859-2.ucm +++ b/ext/Encode/ucm/8859-2.ucm diff --git a/ext/Encode/Encode/8859-3.ucm b/ext/Encode/ucm/8859-3.ucm index 1c2e80f8f5..1c2e80f8f5 100644 --- a/ext/Encode/Encode/8859-3.ucm +++ b/ext/Encode/ucm/8859-3.ucm diff --git a/ext/Encode/Encode/8859-4.ucm b/ext/Encode/ucm/8859-4.ucm index 66c2b83955..66c2b83955 100644 --- a/ext/Encode/Encode/8859-4.ucm +++ b/ext/Encode/ucm/8859-4.ucm diff --git a/ext/Encode/Encode/8859-5.ucm b/ext/Encode/ucm/8859-5.ucm index c3b9898dc4..c3b9898dc4 100644 --- a/ext/Encode/Encode/8859-5.ucm +++ b/ext/Encode/ucm/8859-5.ucm diff --git a/ext/Encode/Encode/8859-6.ucm b/ext/Encode/ucm/8859-6.ucm index 0a970cdd47..0a970cdd47 100644 --- a/ext/Encode/Encode/8859-6.ucm +++ b/ext/Encode/ucm/8859-6.ucm diff --git a/ext/Encode/Encode/8859-7.ucm b/ext/Encode/ucm/8859-7.ucm index f92a11a198..f92a11a198 100644 --- a/ext/Encode/Encode/8859-7.ucm +++ b/ext/Encode/ucm/8859-7.ucm diff --git a/ext/Encode/Encode/8859-8.ucm b/ext/Encode/ucm/8859-8.ucm index b29179accc..b29179accc 100644 --- a/ext/Encode/Encode/8859-8.ucm +++ b/ext/Encode/ucm/8859-8.ucm diff --git a/ext/Encode/Encode/8859-9.ucm b/ext/Encode/ucm/8859-9.ucm index c763763b1b..c763763b1b 100644 --- a/ext/Encode/Encode/8859-9.ucm +++ b/ext/Encode/ucm/8859-9.ucm diff --git a/ext/Encode/Encode/ascii.ucm b/ext/Encode/ucm/ascii.ucm index 4a0d9d899d..2d44b5184b 100644 --- a/ext/Encode/Encode/ascii.ucm +++ b/ext/Encode/ucm/ascii.ucm @@ -1,5 +1,5 @@ # -# $Id: ascii.ucm,v 1.0 2002/03/28 23:26:25 dankogai Exp $ +# $Id: ascii.ucm,v 1.0 2002/03/31 21:09:59 dankogai Exp $ # # ./compile -n US-ascii -o Encode/ascii.ucm Encode/ascii.enc <code_set_name> "US-ascii" diff --git a/ext/Encode/Encode/big5-hkscs.ucm b/ext/Encode/ucm/big5-hkscs.ucm index 0fd50cec78..0fd50cec78 100644 --- a/ext/Encode/Encode/big5-hkscs.ucm +++ b/ext/Encode/ucm/big5-hkscs.ucm diff --git a/ext/Encode/Encode/big5.ucm b/ext/Encode/ucm/big5.ucm index bea897bc8a..bea897bc8a 100644 --- a/ext/Encode/Encode/big5.ucm +++ b/ext/Encode/ucm/big5.ucm diff --git a/ext/Encode/Encode/cp1047.ucm b/ext/Encode/ucm/cp1047.ucm index 649ee77d7a..649ee77d7a 100644 --- a/ext/Encode/Encode/cp1047.ucm +++ b/ext/Encode/ucm/cp1047.ucm diff --git a/ext/Encode/Encode/cp1250.ucm b/ext/Encode/ucm/cp1250.ucm index 79453f3dee..79453f3dee 100644 --- a/ext/Encode/Encode/cp1250.ucm +++ b/ext/Encode/ucm/cp1250.ucm diff --git a/ext/Encode/Encode/cp37.ucm b/ext/Encode/ucm/cp37.ucm index a9a3dfa947..a9a3dfa947 100644 --- a/ext/Encode/Encode/cp37.ucm +++ b/ext/Encode/ucm/cp37.ucm diff --git a/ext/Encode/Encode/cp932.ucm b/ext/Encode/ucm/cp932.ucm index 0669bef39a..0669bef39a 100644 --- a/ext/Encode/Encode/cp932.ucm +++ b/ext/Encode/ucm/cp932.ucm diff --git a/ext/Encode/Encode/cp936.ucm b/ext/Encode/ucm/cp936.ucm index 81c1f8d514..31903ecfb0 100644 --- a/ext/Encode/Encode/cp936.ucm +++ b/ext/Encode/ucm/cp936.ucm @@ -1,5 +1,5 @@ # -# $Id: cp936.ucm,v 1.0 2002/03/28 23:26:25 dankogai Exp $ +# $Id: cp936.ucm,v 1.0 2002/03/31 21:09:59 dankogai Exp $ # # ./compile -n cp936 -o Encode/cp936.ucm Encode/cp936.enc <code_set_name> "cp936" diff --git a/ext/Encode/Encode/cp949.ucm b/ext/Encode/ucm/cp949.ucm index b2ce365aa9..b2ce365aa9 100644 --- a/ext/Encode/Encode/cp949.ucm +++ b/ext/Encode/ucm/cp949.ucm diff --git a/ext/Encode/Encode/cp950.ucm b/ext/Encode/ucm/cp950.ucm index 63798b83d1..63798b83d1 100644 --- a/ext/Encode/Encode/cp950.ucm +++ b/ext/Encode/ucm/cp950.ucm diff --git a/ext/Encode/Encode/dingbats.ucm b/ext/Encode/ucm/dingbats.ucm index 63df1c26bb..aafe6ff85c 100644 --- a/ext/Encode/Encode/dingbats.ucm +++ b/ext/Encode/ucm/dingbats.ucm @@ -1,5 +1,5 @@ # -# $Id: dingbats.ucm,v 1.0 2002/03/28 23:26:26 dankogai Exp $ +# $Id: dingbats.ucm,v 1.0 2002/03/31 21:09:59 dankogai Exp $ # <code_set_name> "dingbats" <mb_cur_min> 1 diff --git a/ext/Encode/Encode/euc-cn.ucm b/ext/Encode/ucm/euc-cn.ucm index a6e03fd057..a6e03fd057 100644 --- a/ext/Encode/Encode/euc-cn.ucm +++ b/ext/Encode/ucm/euc-cn.ucm diff --git a/ext/Encode/Encode/euc-jp.ucm b/ext/Encode/ucm/euc-jp.ucm index e0fddcbb1b..e0fddcbb1b 100644 --- a/ext/Encode/Encode/euc-jp.ucm +++ b/ext/Encode/ucm/euc-jp.ucm diff --git a/ext/Encode/Encode/euc-kr.ucm b/ext/Encode/ucm/euc-kr.ucm index 9c38a9be81..9c38a9be81 100644 --- a/ext/Encode/Encode/euc-kr.ucm +++ b/ext/Encode/ucm/euc-kr.ucm diff --git a/ext/Encode/Encode/gb12345.ucm b/ext/Encode/ucm/gb12345.ucm index 9fe6e6d4e8..9fe6e6d4e8 100644 --- a/ext/Encode/Encode/gb12345.ucm +++ b/ext/Encode/ucm/gb12345.ucm diff --git a/ext/Encode/Encode/gb2312.ucm b/ext/Encode/ucm/gb2312.ucm index d38ccacac1..d38ccacac1 100644 --- a/ext/Encode/Encode/gb2312.ucm +++ b/ext/Encode/ucm/gb2312.ucm diff --git a/ext/Encode/Encode/gsm0338.ucm b/ext/Encode/ucm/gsm0338.ucm index f321fb77ce..f321fb77ce 100644 --- a/ext/Encode/Encode/gsm0338.ucm +++ b/ext/Encode/ucm/gsm0338.ucm diff --git a/ext/Encode/Encode/hp-roman8.ucm b/ext/Encode/ucm/hp-roman8.ucm index 3897892f8b..3897892f8b 100644 --- a/ext/Encode/Encode/hp-roman8.ucm +++ b/ext/Encode/ucm/hp-roman8.ucm diff --git a/ext/Encode/Encode/ibm-1250.ucm b/ext/Encode/ucm/ibm-1250.ucm index 773c5fa945..773c5fa945 100644 --- a/ext/Encode/Encode/ibm-1250.ucm +++ b/ext/Encode/ucm/ibm-1250.ucm diff --git a/ext/Encode/Encode/ibm-1251.ucm b/ext/Encode/ucm/ibm-1251.ucm index 476a46679e..476a46679e 100644 --- a/ext/Encode/Encode/ibm-1251.ucm +++ b/ext/Encode/ucm/ibm-1251.ucm diff --git a/ext/Encode/Encode/ibm-1252.ucm b/ext/Encode/ucm/ibm-1252.ucm index 859989dd6a..859989dd6a 100644 --- a/ext/Encode/Encode/ibm-1252.ucm +++ b/ext/Encode/ucm/ibm-1252.ucm diff --git a/ext/Encode/Encode/ibm-1253.ucm b/ext/Encode/ucm/ibm-1253.ucm index 500b704f15..500b704f15 100644 --- a/ext/Encode/Encode/ibm-1253.ucm +++ b/ext/Encode/ucm/ibm-1253.ucm diff --git a/ext/Encode/Encode/ibm-1254.ucm b/ext/Encode/ucm/ibm-1254.ucm index ff6edbe941..ff6edbe941 100644 --- a/ext/Encode/Encode/ibm-1254.ucm +++ b/ext/Encode/ucm/ibm-1254.ucm diff --git a/ext/Encode/Encode/ibm-1255.ucm b/ext/Encode/ucm/ibm-1255.ucm index 61a5c674f3..61a5c674f3 100644 --- a/ext/Encode/Encode/ibm-1255.ucm +++ b/ext/Encode/ucm/ibm-1255.ucm diff --git a/ext/Encode/Encode/ibm-1256.ucm b/ext/Encode/ucm/ibm-1256.ucm index cc1fd7a8dd..cc1fd7a8dd 100644 --- a/ext/Encode/Encode/ibm-1256.ucm +++ b/ext/Encode/ucm/ibm-1256.ucm diff --git a/ext/Encode/Encode/ibm-1257.ucm b/ext/Encode/ucm/ibm-1257.ucm index 1867b7de37..1867b7de37 100644 --- a/ext/Encode/Encode/ibm-1257.ucm +++ b/ext/Encode/ucm/ibm-1257.ucm diff --git a/ext/Encode/Encode/ibm-1258.ucm b/ext/Encode/ucm/ibm-1258.ucm index fa5363c1b7..fa5363c1b7 100644 --- a/ext/Encode/Encode/ibm-1258.ucm +++ b/ext/Encode/ucm/ibm-1258.ucm diff --git a/ext/Encode/Encode/ir-165.ucm b/ext/Encode/ucm/ir-165.ucm index 50aeb6598b..50aeb6598b 100644 --- a/ext/Encode/Encode/ir-165.ucm +++ b/ext/Encode/ucm/ir-165.ucm diff --git a/ext/Encode/Encode/jis0201.ucm b/ext/Encode/ucm/jis0201.ucm index 979b1c830a..979b1c830a 100644 --- a/ext/Encode/Encode/jis0201.ucm +++ b/ext/Encode/ucm/jis0201.ucm diff --git a/ext/Encode/Encode/johab.ucm b/ext/Encode/ucm/johab.ucm index 6d034cec35..6d034cec35 100644 --- a/ext/Encode/Encode/johab.ucm +++ b/ext/Encode/ucm/johab.ucm diff --git a/ext/Encode/Encode/koi8-f.ucm b/ext/Encode/ucm/koi8-f.ucm index 4eee360222..4eee360222 100644 --- a/ext/Encode/Encode/koi8-f.ucm +++ b/ext/Encode/ucm/koi8-f.ucm diff --git a/ext/Encode/Encode/koi8-r.ucm b/ext/Encode/ucm/koi8-r.ucm index 11a2bf7f4a..b9f413e8d1 100644 --- a/ext/Encode/Encode/koi8-r.ucm +++ b/ext/Encode/ucm/koi8-r.ucm @@ -1,5 +1,5 @@ # -# $Id: koi8-r.ucm,v 1.0 2002/03/28 23:26:27 dankogai Exp $ +# $Id: koi8-r.ucm,v 1.0 2002/03/31 21:09:59 dankogai Exp $ # # ./compile -n koi8-r -o Encode/koi8-r.ucm Encode/koi8-r.enc <code_set_name> "koi8-r" diff --git a/ext/Encode/Encode/koi8-u.ucm b/ext/Encode/ucm/koi8-u.ucm index 7c022d6936..7c022d6936 100644 --- a/ext/Encode/Encode/koi8-u.ucm +++ b/ext/Encode/ucm/koi8-u.ucm diff --git a/ext/Encode/Encode/ksc5601.ucm b/ext/Encode/ucm/ksc5601.ucm index db94cdfb35..db94cdfb35 100644 --- a/ext/Encode/Encode/ksc5601.ucm +++ b/ext/Encode/ucm/ksc5601.ucm diff --git a/ext/Encode/Encode/macCentEuro.ucm b/ext/Encode/ucm/macCentEuro.ucm index 09c7c854ac..09c7c854ac 100644 --- a/ext/Encode/Encode/macCentEuro.ucm +++ b/ext/Encode/ucm/macCentEuro.ucm diff --git a/ext/Encode/Encode/macCroatian.ucm b/ext/Encode/ucm/macCroatian.ucm index 8dfa69d696..8dfa69d696 100644 --- a/ext/Encode/Encode/macCroatian.ucm +++ b/ext/Encode/ucm/macCroatian.ucm diff --git a/ext/Encode/Encode/macCyrillic.ucm b/ext/Encode/ucm/macCyrillic.ucm index 4a4578c729..4a4578c729 100644 --- a/ext/Encode/Encode/macCyrillic.ucm +++ b/ext/Encode/ucm/macCyrillic.ucm diff --git a/ext/Encode/Encode/macDingbats.ucm b/ext/Encode/ucm/macDingbats.ucm index dfa5e9142b..dfa5e9142b 100644 --- a/ext/Encode/Encode/macDingbats.ucm +++ b/ext/Encode/ucm/macDingbats.ucm diff --git a/ext/Encode/Encode/macGreek.ucm b/ext/Encode/ucm/macGreek.ucm index 995a52bd9f..995a52bd9f 100644 --- a/ext/Encode/Encode/macGreek.ucm +++ b/ext/Encode/ucm/macGreek.ucm diff --git a/ext/Encode/Encode/macIceland.ucm b/ext/Encode/ucm/macIceland.ucm index 4c744a5a73..38dd7880ef 100644 --- a/ext/Encode/Encode/macIceland.ucm +++ b/ext/Encode/ucm/macIceland.ucm @@ -1,8 +1,8 @@ # -# $Id: macIceland.ucm,v 1.0 2002/03/28 23:26:27 dankogai Exp $ +# $Id: macIceland.ucm,v 1.2 2002/03/31 18:22:36 dankogai Exp $ # # ./compile -n macIceland -o Encode/macIceland.ucm Encode/macIceland.enc -<code_set_name> "MacIcelandic" +<code_set_name> "MacIceland" <mb_cur_min> 1 <mb_cur_max> 1 <subchar> \x3F diff --git a/ext/Encode/Encode/macJapan.ucm b/ext/Encode/ucm/macJapan.ucm index 07cf9a1b67..07cf9a1b67 100644 --- a/ext/Encode/Encode/macJapan.ucm +++ b/ext/Encode/ucm/macJapan.ucm diff --git a/ext/Encode/Encode/macRoman.ucm b/ext/Encode/ucm/macRoman.ucm index 1fabc5a051..1fabc5a051 100644 --- a/ext/Encode/Encode/macRoman.ucm +++ b/ext/Encode/ucm/macRoman.ucm diff --git a/ext/Encode/Encode/macRumanian.ucm b/ext/Encode/ucm/macRumanian.ucm index 360d55cac5..360d55cac5 100644 --- a/ext/Encode/Encode/macRumanian.ucm +++ b/ext/Encode/ucm/macRumanian.ucm diff --git a/ext/Encode/Encode/macSami.ucm b/ext/Encode/ucm/macSami.ucm index aabb0a07ad..aabb0a07ad 100644 --- a/ext/Encode/Encode/macSami.ucm +++ b/ext/Encode/ucm/macSami.ucm diff --git a/ext/Encode/Encode/macThai.ucm b/ext/Encode/ucm/macThai.ucm index 0a7b6aa22c..0a7b6aa22c 100644 --- a/ext/Encode/Encode/macThai.ucm +++ b/ext/Encode/ucm/macThai.ucm diff --git a/ext/Encode/Encode/macTurkish.ucm b/ext/Encode/ucm/macTurkish.ucm index 858d200d89..f7df090aa6 100644 --- a/ext/Encode/Encode/macTurkish.ucm +++ b/ext/Encode/ucm/macTurkish.ucm @@ -1,5 +1,5 @@ # -# $Id: macTurkish.ucm,v 1.0 2002/03/28 23:26:27 dankogai Exp $ +# $Id: macTurkish.ucm,v 1.0 2002/03/31 21:09:59 dankogai Exp $ # # ./compile -n macTurkish -o Encode/macTurkish.ucm Encode/macTurkish.enc <code_set_name> "MacTurkish" diff --git a/ext/Encode/Encode/macUkraine.ucm b/ext/Encode/ucm/macUkraine.ucm index c64f7043eb..c64f7043eb 100644 --- a/ext/Encode/Encode/macUkraine.ucm +++ b/ext/Encode/ucm/macUkraine.ucm diff --git a/ext/Encode/Encode/nextstep.ucm b/ext/Encode/ucm/nextstep.ucm index 6e258117c5..6e258117c5 100644 --- a/ext/Encode/Encode/nextstep.ucm +++ b/ext/Encode/ucm/nextstep.ucm diff --git a/ext/Encode/Encode/posix-bc.ucm b/ext/Encode/ucm/posix-bc.ucm index 632af71dd9..632af71dd9 100644 --- a/ext/Encode/Encode/posix-bc.ucm +++ b/ext/Encode/ucm/posix-bc.ucm diff --git a/ext/Encode/Encode/shiftjis.ucm b/ext/Encode/ucm/shiftjis.ucm index 9aa27948ec..9aa27948ec 100644 --- a/ext/Encode/Encode/shiftjis.ucm +++ b/ext/Encode/ucm/shiftjis.ucm diff --git a/ext/Encode/Encode/symbol.ucm b/ext/Encode/ucm/symbol.ucm index a003d99ba4..a003d99ba4 100644 --- a/ext/Encode/Encode/symbol.ucm +++ b/ext/Encode/ucm/symbol.ucm diff --git a/ext/Encode/Encode/viscii.ucm b/ext/Encode/ucm/viscii.ucm index 0f02f01adc..0f02f01adc 100644 --- a/ext/Encode/Encode/viscii.ucm +++ b/ext/Encode/ucm/viscii.ucm |