summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authorNick Ing-Simmons <nik@tiuk.ti.com>2001-03-05 14:51:50 +0000
committerNick Ing-Simmons <nik@tiuk.ti.com>2001-03-05 14:51:50 +0000
commit51ef4e1196e74554150c2d1993b5a0e37f6709c9 (patch)
treeb3c7f984d3bbe182b93da70fc63c2e7f0e7d55e1 /ext
parent26c1551e60cd5bd52d80b74e7d16ea4a8437d156 (diff)
downloadperl-51ef4e1196e74554150c2d1993b5a0e37f6709c9.tar.gz
Encode implementation "completion"
Implement and document define_encoding() Implement and document encoding aliases including define_alias() Make Encode::XS use define_encoding() rather than back-door. Move run-time *.enc to separate Encode::Tcl module. Make 'compile' honour <codeset_name> Change canonical names of to iso-8859-* and US-ascii. p4raw-id: //depot/perlio@9032
Diffstat (limited to 'ext')
-rw-r--r--ext/Encode/Encode.pm359
-rw-r--r--ext/Encode/Encode.xs13
-rw-r--r--ext/Encode/Encode/Tcl.pm247
-rw-r--r--ext/Encode/Encode/ascii.ucm7
-rw-r--r--ext/Encode/Encode/cp1250.ucm4
-rw-r--r--ext/Encode/Encode/iso8859-1.ucm6
-rw-r--r--ext/Encode/Encode/iso8859-10.ucm6
-rw-r--r--ext/Encode/Encode/iso8859-13.ucm6
-rw-r--r--ext/Encode/Encode/iso8859-14.ucm6
-rw-r--r--ext/Encode/Encode/iso8859-15.ucm6
-rw-r--r--ext/Encode/Encode/iso8859-16.ucm6
-rw-r--r--ext/Encode/Encode/iso8859-2.ucm6
-rw-r--r--ext/Encode/Encode/iso8859-3.ucm6
-rw-r--r--ext/Encode/Encode/iso8859-4.ucm6
-rw-r--r--ext/Encode/Encode/iso8859-5.ucm6
-rw-r--r--ext/Encode/Encode/iso8859-6.ucm6
-rw-r--r--ext/Encode/Encode/iso8859-7.ucm6
-rw-r--r--ext/Encode/Encode/iso8859-8.ucm6
-rw-r--r--ext/Encode/Encode/iso8859-9.ucm6
-rwxr-xr-xext/Encode/compile4
20 files changed, 457 insertions, 261 deletions
diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm
index 72d6cc0fcc..38c30ad1ba 100644
--- a/ext/Encode/Encode.pm
+++ b/ext/Encode/Encode.pm
@@ -1,24 +1,27 @@
package Encode;
+use strict;
-$VERSION = 0.01;
+our $VERSION = 0.02;
require DynaLoader;
require Exporter;
-@ISA = qw(Exporter DynaLoader);
+our @ISA = qw(Exporter DynaLoader);
# Public, encouraged API is exported by default
-@EXPORT = qw (
+our @EXPORT = qw (
encode
decode
encode_utf8
decode_utf8
find_encoding
+ encodings
);
-@EXPORT_OK =
+our @EXPORT_OK =
qw(
- encodings
+ define_encoding
+ define_alias
from_to
is_utf8
is_8bit
@@ -35,71 +38,97 @@ bootstrap Encode ();
use Carp;
-# The global hash is declared in XS code
-$encoding{Unicode} = bless({},'Encode::Unicode');
-$encoding{utf8} = bless({},'Encode::utf8');
-$encoding{'iso10646-1'} = bless({},'Encode::iso10646_1');
+# Make a %encoding package variable to allow a certain amount of cheating
+our %encoding;
+my @alias; # ordered matching list
+my %alias; # cached known aliases
sub encodings
{
my ($class) = @_;
- foreach my $dir (@INC)
+ return keys %encoding;
+}
+
+sub findAlias
+{
+ my $class = shift;
+ local $_ = shift;
+ unless (exists $alias{$_})
{
- if (opendir(my $dh,"$dir/Encode"))
+ for (my $i=0; $i < @alias; $i += 2)
{
- while (defined(my $name = readdir($dh)))
+ my $alias = $alias[$i];
+ my $val = $alias[$i+1];
+ my $new;
+ if (ref($alias) eq 'Regexp' && $_ =~ $alias)
{
- if ($name =~ /^(.*)\.enc$/)
+ $new = eval $val;
+ }
+ elsif (ref($alias) eq 'CODE')
+ {
+ $new = &{$alias}($val)
+ }
+ elsif (lc($_) eq $alias)
+ {
+ $new = $val;
+ }
+ if (defined($new))
+ {
+ next if $new eq $_; # avoid (direct) recursion on bugs
+ my $enc = (ref($new)) ? $new : find_encoding($new);
+ if ($enc)
{
- next if exists $encoding{$1};
- $encoding{$1} = "$dir/$name";
+ $alias{$_} = $enc;
+ last;
}
}
- closedir($dh);
}
}
- return keys %encoding;
+ return $alias{$_};
}
-sub loadEncoding
+sub define_alias
{
- my ($class,$name,$file) = @_;
- if (open(my $fh,$file))
+ while (@_)
{
- my $type;
- while (1)
- {
- my $line = <$fh>;
- $type = substr($line,0,1);
- last unless $type eq '#';
- }
- $class .= ('::'.(($type eq 'E') ? 'Escape' : 'Table'));
- #warn "Loading $file";
- return $class->read($fh,$name,$type);
+ my ($alias,$name) = splice(@_,0,2);
+ push(@alias, $alias => $name);
}
- else
+}
+
+define_alias( qr/^iso(\d+-\d+)$/i => '"iso-$1"' );
+define_alias( qr/^(\S+)\s+(.*)$/i => '"$1-$2"' );
+#define_alias( sub { return /^iso-(\d+-\d+)$/i ? "iso$1" : '' } );
+define_alias( 'ascii' => 'US-ascii');
+define_alias( 'ibm-1047' => 'cp1047');
+
+sub define_encoding
+{
+ my $obj = shift;
+ my $name = shift;
+ $encoding{$name} = $obj;
+ my $lc = lc($name);
+ define_alias($lc => $obj) unless $lc eq $name;
+ while (@_)
{
- return undef;
+ my $alias = shift;
+ define_alias($alias,$obj);
}
+ return $obj;
}
sub getEncoding
{
my ($class,$name) = @_;
my $enc;
- unless (ref($enc = $encoding{$name}))
+ if (exists $encoding{$name})
{
- $enc = $class->loadEncoding($name,$enc) if defined $enc;
- unless (ref($enc))
- {
- foreach my $dir (@INC)
- {
- last if ($enc = $class->loadEncoding($name,"$dir/Encode/$name.enc"));
- }
- }
- $encoding{$name} = $enc;
+ return $encoding{$name};
+ }
+ else
+ {
+ return $class->findAlias($name);
}
- return $enc;
}
sub find_encoding
@@ -159,6 +188,17 @@ sub decode_utf8
package Encode::Encoding;
# Base class for classes which implement encodings
+sub Define
+{
+ my $obj = shift;
+ my $canonical = shift;
+ $obj = bless { Name => $canonical },$obj unless ref $obj;
+ # warn "$canonical => $obj\n";
+ Encode::define_encoding($obj, $canonical, @_);
+}
+
+sub name { shift->{'Name'} }
+
# Temporary legacy methods
sub toUnicode { shift->decode(@_) }
sub fromUnicode { shift->encode(@_) }
@@ -174,7 +214,7 @@ use base 'Encode::Encoding';
# Dummy package that provides the encode interface but leaves data
# as UTF-8 encoded. It is here so that from_to() works.
-sub name { 'Unicode' }
+__PACKAGE__->Define('Unicode');
sub decode
{
@@ -188,12 +228,11 @@ sub decode
package Encode::utf8;
use base 'Encode::Encoding';
-
# package to allow long-hand
# $octets = encode( utf8 => $string );
#
-sub name { 'utf8' }
+__PACKAGE__->Define(qw(UTF-8 utf8));
sub decode
{
@@ -215,131 +254,12 @@ sub encode
return $octets;
}
-package Encode::Table;
-use base 'Encode::Encoding';
-
-sub read
-{
- my ($class,$fh,$name,$type) = @_;
- my $rep = $class->can("rep_$type");
- my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>));
- my @touni;
- my %fmuni;
- my $count = 0;
- $def = hex($def);
- while ($pages--)
- {
- my $line = <$fh>;
- chomp($line);
- my $page = hex($line);
- my @page;
- my $ch = $page * 256;
- for (my $i = 0; $i < 16; $i++)
- {
- my $line = <$fh>;
- for (my $j = 0; $j < 16; $j++)
- {
- my $val = hex(substr($line,0,4,''));
- if ($val || !$ch)
- {
- my $uch = chr($val);
- push(@page,$uch);
- $fmuni{$uch} = $ch;
- $count++;
- }
- else
- {
- push(@page,undef);
- }
- $ch++;
- }
- }
- $touni[$page] = \@page;
- }
-
- return bless {Name => $name,
- Rep => $rep,
- ToUni => \@touni,
- FmUni => \%fmuni,
- Def => $def,
- Num => $count,
- },$class;
-}
-
-sub name { shift->{'Name'} }
-
-sub rep_S { 'C' }
-
-sub rep_D { 'n' }
-
-sub rep_M { ($_[0] > 255) ? 'n' : 'C' }
-
-sub representation
-{
- my ($obj,$ch) = @_;
- $ch = 0 unless @_ > 1;
- $obj-{'Rep'}->($ch);
-}
-
-sub decode
-{
- my ($obj,$str,$chk) = @_;
- my $rep = $obj->{'Rep'};
- my $touni = $obj->{'ToUni'};
- my $uni = '';
- while (length($str))
- {
- my $ch = ord(substr($str,0,1,''));
- my $x;
- if (&$rep($ch) eq 'C')
- {
- $x = $touni->[0][$ch];
- }
- else
- {
- $x = $touni->[$ch][ord(substr($str,0,1,''))];
- }
- unless (defined $x)
- {
- last if $chk;
- # What do we do here ?
- $x = '';
- }
- $uni .= $x;
- }
- $_[1] = $str if $chk;
- return $uni;
-}
-
-sub encode
-{
- my ($obj,$uni,$chk) = @_;
- my $fmuni = $obj->{'FmUni'};
- my $str = '';
- my $def = $obj->{'Def'};
- my $rep = $obj->{'Rep'};
- while (length($uni))
- {
- my $ch = substr($uni,0,1,'');
- my $x = $fmuni->{chr(ord($ch))};
- unless (defined $x)
- {
- last if ($chk);
- $x = $def;
- }
- $str .= pack(&$rep($x),$x);
- }
- $_[1] = $uni if $chk;
- return $str;
-}
-
package Encode::iso10646_1;
use base 'Encode::Encoding';
-
-# Encoding is 16-bit network order Unicode
+# Encoding is 16-bit network order Unicode (no surogates)
# Used for X font encodings
-sub name { 'iso10646-1' }
+__PACKAGE__->Define(qw(UCS-2 iso10646-1));
sub decode
{
@@ -374,38 +294,6 @@ sub encode
return $str;
}
-
-package Encode::Escape;
-use base 'Encode::Encoding';
-
-use Carp;
-
-sub read
-{
- my ($class,$fh,$name) = @_;
- my %self = (Name => $name, Num => 0);
- while (<$fh>)
- {
- my ($key,$val) = /^(\S+)\s+(.*)$/;
- $val =~ s/^\{(.*?)\}/$1/g;
- $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
- $self{$key} = $val;
- }
- return bless \%self,$class;
-}
-
-sub name { shift->{'Name'} }
-
-sub decode
-{
- croak("Not implemented yet");
-}
-
-sub encode
-{
- croak("Not implemented yet");
-}
-
# switch back to Encode package in case we ever add AutoLoader
package Encode;
@@ -564,8 +452,6 @@ Because of all the alias issues, and because in the general case
encodings have state C<Encode> uses the encoding object internally
once an operation is in progress.
-I<Aliasing is not yet implemented.>
-
=head1 PERL ENCODING API
=head2 Generic Encoding Interface
@@ -686,7 +572,7 @@ UTF-16 is similar to UCS-2, 16 bit or 2-byte chunks.
UCS-2 can only represent 0..0xFFFF, while UTF-16 has a "surogate pair"
scheme which allows it to cover the whole Unicode range.
-Encode implements big-endian UCS-2 as the encoding "iso10646-1" as that
+Encode implements big-endian UCS-2 aliased to "iso10646-1" as that
happens to be the name used by that representation when used with X11 fonts.
UTF-32 or UCS-4 is 32-bit or 4-byte chunks. Perl's logical characters
@@ -701,11 +587,62 @@ to transfer strings in this form (e.g. to write them to a file) would need to
depending on the endian required.
-No UTF-32 encodings are not yet implemented.
+No UTF-32 encodings are implemented yet.
Both UCS-2 and UCS-4 style encodings can have "byte order marks" by representing
the code point 0xFFFE as the very first thing in a file.
+=head2 Listing available encodings
+
+ use Encode qw(encodings);
+ @list = encodings();
+
+Returns a list of the canonical names of the available encodings.
+
+=head2 Defining Aliases
+
+ use Encode qw(define_alias);
+ define_alias( newName => ENCODING);
+
+Allows newName to be used as am alias for ENCODING. ENCODING may be either the
+name of an encoding or and encoding object (as above).
+
+Currently I<newName> can be specified in the following ways:
+
+=over 4
+
+=item As a simple string.
+
+=item As a qr// compiled regular expression, e.g.:
+
+ define_alias( qr/^iso8859-(\d+)$/i => '"iso-8859-$1"' );
+
+In this case if I<ENCODING> is not a reference it is C<eval>-ed to allow
+C<$1> etc. to be subsituted.
+The example is one way to names as used in X11 font names to alias the MIME names for the
+iso-8859-* family.
+
+=item As a code reference, e.g.:
+
+ define_alias( sub { return /^iso8859-(\d+)$/i ? "iso-8859-$1" : undef } , '');
+
+In this case C<$_> will be set to the name that is being looked up and
+I<ENCODING> is passed to the sub as its first argument.
+The example is another way to names as used in X11 font names to alias the MIME names for
+the iso-8859-* family.
+
+=back
+
+=head2 Defining Encodings
+
+ use Encode qw(define_alias);
+ define_encoding( $object, 'canonicalName' [,alias...]);
+
+Causes I<canonicalName> to be associated with I<$object>.
+The object should provide the interface described in L</"IMPLEMENTATION CLASSES"> below.
+If more than two arguments are provided then additional arguments are taken
+as aliases for I<$object> as for C<define_alias>.
+
=head1 Encoding and IO
It is very common to want to do encoding transformations when
@@ -714,7 +651,7 @@ If perl is configured to use the new 'perlio' IO system then
C<Encode> provides a "layer" (See L<perliol>) which can transform
data as it is read or written.
- open(my $ilyad,'>:encoding(iso8859-7)','ilyad.greek');
+ open(my $ilyad,'>:encoding(iso-8859-7)','ilyad.greek');
print $ilyad @epic;
In addition the new IO system can also be configured to read/write
@@ -816,8 +753,7 @@ not a string.
As mentioned above encodings are (in the current implementation at least)
defined by objects. The mapping of encoding name to object is via the
-C<%Encode::encodings> hash. (It is a package hash to allow XS code to get
-at it.)
+C<%encodings> hash.
The values of the hash can currently be either strings or objects.
The string form may go away in the future. The string form occurs
@@ -883,7 +819,16 @@ and additional parameter.
It is also highly desirable that encoding classes inherit from C<Encode::Encoding>
as a base class. This allows that class to define additional behaviour for
-all encoding objects.
+all encoding objects. For example built in Unicode, UCS-2 and UTF-8 classes
+use :
+
+ package Encode::MyEncoding;
+ use base qw(Encode::Encoding);
+
+ __PACKAGE__->Define(qw(myCanonical myAlias));
+
+To create an object with bless {Name => ...},$class, and call define_encoding.
+They inherit their C<name> method from C<Encode::Encoding>.
=head2 Compiled Encodings
diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs
index 40c3dc7f70..584849ac47 100644
--- a/ext/Encode/Encode.xs
+++ b/ext/Encode/Encode.xs
@@ -60,13 +60,12 @@ PerlIOEncode_pushed(PerlIO *f, const char *mode,const char *arg,STRLEN len)
ENTER;
SAVETMPS;
PUSHMARK(sp);
- XPUSHs(sv_2mortal(newSVpv("Encode",0)));
XPUSHs(sv_2mortal(newSVpvn(arg,len)));
PUTBACK;
- if (perl_call_method("getEncoding",G_SCALAR) != 1)
+ if (perl_call_pv("Encode::find_encoding",G_SCALAR) != 1)
{
/* should never happen */
- Perl_die(aTHX_ "Encode::getEncoding did not return a value");
+ Perl_die(aTHX_ "Encode::find_encoding did not return a value");
return -1;
}
SPAGAIN;
@@ -330,15 +329,19 @@ PerlIO_funcs PerlIO_encode = {
void
Encode_Define(pTHX_ encode_t *enc)
{
- HV *hash = get_hv("Encode::encoding",GV_ADD|GV_ADDMULTI);
+ dSP;
HV *stash = gv_stashpv("Encode::XS", TRUE);
SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
int i = 0;
+ PUSHMARK(sp);
+ XPUSHs(sv);
while (enc->name[i])
{
const char *name = enc->name[i++];
- hv_store(hash,name,strlen(name),SvREFCNT_inc(sv),0);
+ XPUSHs(sv_2mortal(newSVpvn(name,strlen(name))));
}
+ PUTBACK;
+ call_pv("Encode::define_encoding",G_DISCARD);
SvREFCNT_dec(sv);
}
diff --git a/ext/Encode/Encode/Tcl.pm b/ext/Encode/Encode/Tcl.pm
new file mode 100644
index 0000000000..8c956ffb7e
--- /dev/null
+++ b/ext/Encode/Encode/Tcl.pm
@@ -0,0 +1,247 @@
+package Encode::Tcl;
+use strict;
+use Encode qw(find_encoding);
+use base 'Encode::Encoding';
+use Carp;
+
+
+sub INC_search
+{
+ foreach my $dir (@INC)
+ {
+ if (opendir(my $dh,"$dir/Encode"))
+ {
+ while (defined(my $name = readdir($dh)))
+ {
+ if ($name =~ /^(.*)\.enc$/)
+ {
+ my $canon = $1;
+ my $obj = find_encoding($canon);
+ if (!defined($obj))
+ {
+ my $obj = bless { Name => $canon, File => "$dir/Encode/$name"},__PACKAGE__;
+ $obj->Define( $canon );
+ # warn "$canon => $obj\n";
+ }
+ }
+ }
+ closedir($dh);
+ }
+ }
+}
+
+sub import
+{
+ INC_search();
+}
+
+sub encode
+{
+ my $obj = shift;
+ my $new = $obj->loadEncoding;
+ return undef unless (defined $new);
+ return $new->encode(@_);
+}
+
+sub new_sequence
+{
+ my $obj = shift;
+ my $new = $obj->loadEncoding;
+ return undef unless (defined $new);
+ return $new->new_sequence(@_);
+}
+
+sub decode
+{
+ my $obj = shift;
+ my $new = $obj->loadEncoding;
+ return undef unless (defined $new);
+ return $new->decode(@_);
+}
+
+sub loadEncoding
+{
+ my $obj = shift;
+ my $file = $obj->{'File'};
+ my $name = $obj->name;
+ if (open(my $fh,$file))
+ {
+ my $type;
+ while (1)
+ {
+ my $line = <$fh>;
+ $type = substr($line,0,1);
+ last unless $type eq '#';
+ }
+ my $class = ref($obj).('::'.(($type eq 'E') ? 'Escape' : 'Table'));
+ carp "Loading $file";
+ bless $obj,$class;
+ return $obj if $obj->read($fh,$obj->name,$type);
+ }
+ else
+ {
+ croak("Cannot open $file for ".$obj->name);
+ }
+ $obj->Undefine($name);
+ return undef;
+}
+
+sub INC_find
+{
+ my ($class,$name) = @_;
+ my $enc;
+ foreach my $dir (@INC)
+ {
+ last if ($enc = $class->loadEncoding($name,"$dir/Encode/$name.enc"));
+ }
+ return $enc;
+}
+
+package Encode::Tcl::Table;
+use base 'Encode::Encoding';
+
+use Data::Dumper;
+
+sub read
+{
+ my ($obj,$fh,$name,$type) = @_;
+ my $rep = $obj->can("rep_$type");
+ my ($def,$sym,$pages) = split(/\s+/,scalar(<$fh>));
+ my @touni;
+ my %fmuni;
+ my $count = 0;
+ $def = hex($def);
+ while ($pages--)
+ {
+ my $line = <$fh>;
+ chomp($line);
+ my $page = hex($line);
+ my @page;
+ my $ch = $page * 256;
+ for (my $i = 0; $i < 16; $i++)
+ {
+ my $line = <$fh>;
+ for (my $j = 0; $j < 16; $j++)
+ {
+ my $val = hex(substr($line,0,4,''));
+ if ($val || !$ch)
+ {
+ my $uch = chr($val);
+ push(@page,$uch);
+ $fmuni{$uch} = $ch;
+ $count++;
+ }
+ else
+ {
+ push(@page,undef);
+ }
+ $ch++;
+ }
+ }
+ $touni[$page] = \@page;
+ }
+ $obj->{'Rep'} = $rep;
+ $obj->{'ToUni'} = \@touni;
+ $obj->{'FmUni'} = \%fmuni;
+ $obj->{'Def'} = $def;
+ $obj->{'Num'} = $count;
+ return $obj;
+}
+
+sub rep_S { 'C' }
+
+sub rep_D { 'n' }
+
+sub rep_M { ($_[0] > 255) ? 'n' : 'C' }
+
+sub representation
+{
+ my ($obj,$ch) = @_;
+ $ch = 0 unless @_ > 1;
+ $obj-{'Rep'}->($ch);
+}
+
+sub decode
+{
+ my ($obj,$str,$chk) = @_;
+ my $rep = $obj->{'Rep'};
+ my $touni = $obj->{'ToUni'};
+ my $uni = '';
+ while (length($str))
+ {
+ my $ch = ord(substr($str,0,1,''));
+ my $x;
+ if (&$rep($ch) eq 'C')
+ {
+ $x = $touni->[0][$ch];
+ }
+ else
+ {
+ $x = $touni->[$ch][ord(substr($str,0,1,''))];
+ }
+ unless (defined $x)
+ {
+ last if $chk;
+ # What do we do here ?
+ $x = '';
+ }
+ $uni .= $x;
+ }
+ $_[1] = $str if $chk;
+ return $uni;
+}
+
+
+sub encode
+{
+ my ($obj,$uni,$chk) = @_;
+ my $fmuni = $obj->{'FmUni'};
+ my $str = '';
+ my $def = $obj->{'Def'};
+ my $rep = $obj->{'Rep'};
+ while (length($uni))
+ {
+ my $ch = substr($uni,0,1,'');
+ my $x = $fmuni->{chr(ord($ch))};
+ unless (defined $x)
+ {
+ last if ($chk);
+ $x = $def;
+ }
+ $str .= pack(&$rep($x),$x);
+ }
+ $_[1] = $uni if $chk;
+ return $str;
+}
+
+package Encode::Tcl::Escape;
+use base 'Encode::Encoding';
+
+use Carp;
+
+sub read
+{
+ my ($class,$fh,$name) = @_;
+ my %self = (Name => $name, Num => 0);
+ while (<$fh>)
+ {
+ my ($key,$val) = /^(\S+)\s+(.*)$/;
+ $val =~ s/^\{(.*?)\}/$1/g;
+ $val =~ s/\\x([0-9a-f]{2})/chr(hex($1))/ge;
+ $self{$key} = $val;
+ }
+ return bless \%self,$class;
+}
+
+sub decode
+{
+ croak("Not implemented yet");
+}
+
+sub encode
+{
+ croak("Not implemented yet");
+}
+
+1;
+__END__
diff --git a/ext/Encode/Encode/ascii.ucm b/ext/Encode/Encode/ascii.ucm
index 71e2dd1244..344423e223 100644
--- a/ext/Encode/Encode/ascii.ucm
+++ b/ext/Encode/Encode/ascii.ucm
@@ -1,6 +1,7 @@
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n ascii -o Encode/ascii.ucm Encode/ascii.enc
-<code_set_name> "ascii"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n US-ascii -o Encode/ascii.ucm Encode/ascii.enc
+<code_set_name> "US-ascii"
+<code_set_alias> "ascii"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
diff --git a/ext/Encode/Encode/cp1250.ucm b/ext/Encode/Encode/cp1250.ucm
index 6acc1d109a..bc3cedc56d 100644
--- a/ext/Encode/Encode/cp1250.ucm
+++ b/ext/Encode/Encode/cp1250.ucm
@@ -1,5 +1,5 @@
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n cp1250 -o Encode/cp1250.ucm Encode/cp1250.enc
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n cp1250 -o Encode/cp1250.ucm Encode/cp1250.enc
<code_set_name> "cp1250"
<mb_cur_min> 1
<mb_cur_max> 1
diff --git a/ext/Encode/Encode/iso8859-1.ucm b/ext/Encode/Encode/iso8859-1.ucm
index 1366f60e73..6f139fb7e7 100644
--- a/ext/Encode/Encode/iso8859-1.ucm
+++ b/ext/Encode/Encode/iso8859-1.ucm
@@ -1,6 +1,6 @@
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-1 -o Encode/iso8859-1.ucm Encode/iso8859-1.enc
-<code_set_name> "iso8859-1"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-1 -o Encode/iso8859-1.ucm Encode/iso8859-1.enc
+<code_set_name> "iso-8859-1"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
diff --git a/ext/Encode/Encode/iso8859-10.ucm b/ext/Encode/Encode/iso8859-10.ucm
index a326352e7c..2bcc2b015e 100644
--- a/ext/Encode/Encode/iso8859-10.ucm
+++ b/ext/Encode/Encode/iso8859-10.ucm
@@ -1,6 +1,6 @@
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-10 -o Encode/iso8859-10.ucm Encode/iso8859-10.enc
-<code_set_name> "iso8859-10"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-10 -o Encode/iso8859-10.ucm Encode/iso8859-10.enc
+<code_set_name> "iso-8859-10"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
diff --git a/ext/Encode/Encode/iso8859-13.ucm b/ext/Encode/Encode/iso8859-13.ucm
index 435c4926d3..ff3e75ca4e 100644
--- a/ext/Encode/Encode/iso8859-13.ucm
+++ b/ext/Encode/Encode/iso8859-13.ucm
@@ -1,6 +1,6 @@
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-13 -o Encode/iso8859-13.ucm Encode/iso8859-13.enc
-<code_set_name> "iso8859-13"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-13 -o Encode/iso8859-13.ucm Encode/iso8859-13.enc
+<code_set_name> "iso-8859-13"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
diff --git a/ext/Encode/Encode/iso8859-14.ucm b/ext/Encode/Encode/iso8859-14.ucm
index c069f83c8d..76a2bbaef4 100644
--- a/ext/Encode/Encode/iso8859-14.ucm
+++ b/ext/Encode/Encode/iso8859-14.ucm
@@ -1,6 +1,6 @@
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-14 -o Encode/iso8859-14.ucm Encode/iso8859-14.enc
-<code_set_name> "iso8859-14"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-14 -o Encode/iso8859-14.ucm Encode/iso8859-14.enc
+<code_set_name> "iso-8859-14"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
diff --git a/ext/Encode/Encode/iso8859-15.ucm b/ext/Encode/Encode/iso8859-15.ucm
index da41e2dd84..40538ac44c 100644
--- a/ext/Encode/Encode/iso8859-15.ucm
+++ b/ext/Encode/Encode/iso8859-15.ucm
@@ -1,6 +1,6 @@
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-15 -o Encode/iso8859-15.ucm Encode/iso8859-15.enc
-<code_set_name> "iso8859-15"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-15 -o Encode/iso8859-15.ucm Encode/iso8859-15.enc
+<code_set_name> "iso-8859-15"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
diff --git a/ext/Encode/Encode/iso8859-16.ucm b/ext/Encode/Encode/iso8859-16.ucm
index b49d9754c0..2ff7cb881f 100644
--- a/ext/Encode/Encode/iso8859-16.ucm
+++ b/ext/Encode/Encode/iso8859-16.ucm
@@ -1,6 +1,6 @@
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-16 -o Encode/iso8859-16.ucm Encode/iso8859-16.enc
-<code_set_name> "iso8859-16"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-16 -o Encode/iso8859-16.ucm Encode/iso8859-16.enc
+<code_set_name> "iso-8859-16"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
diff --git a/ext/Encode/Encode/iso8859-2.ucm b/ext/Encode/Encode/iso8859-2.ucm
index c93deb2552..b55c8dcace 100644
--- a/ext/Encode/Encode/iso8859-2.ucm
+++ b/ext/Encode/Encode/iso8859-2.ucm
@@ -1,6 +1,6 @@
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-2 -o Encode/iso8859-2.ucm Encode/iso8859-2.enc
-<code_set_name> "iso8859-2"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-2 -o Encode/iso8859-2.ucm Encode/iso8859-2.enc
+<code_set_name> "iso-8859-2"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
diff --git a/ext/Encode/Encode/iso8859-3.ucm b/ext/Encode/Encode/iso8859-3.ucm
index 31fa1d6251..ec68ed1e60 100644
--- a/ext/Encode/Encode/iso8859-3.ucm
+++ b/ext/Encode/Encode/iso8859-3.ucm
@@ -1,6 +1,6 @@
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-3 -o Encode/iso8859-3.ucm Encode/iso8859-3.enc
-<code_set_name> "iso8859-3"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-3 -o Encode/iso8859-3.ucm Encode/iso8859-3.enc
+<code_set_name> "iso-8859-3"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
diff --git a/ext/Encode/Encode/iso8859-4.ucm b/ext/Encode/Encode/iso8859-4.ucm
index eb9e6facf7..3d430823d5 100644
--- a/ext/Encode/Encode/iso8859-4.ucm
+++ b/ext/Encode/Encode/iso8859-4.ucm
@@ -1,6 +1,6 @@
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-4 -o Encode/iso8859-4.ucm Encode/iso8859-4.enc
-<code_set_name> "iso8859-4"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-4 -o Encode/iso8859-4.ucm Encode/iso8859-4.enc
+<code_set_name> "iso-8859-4"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
diff --git a/ext/Encode/Encode/iso8859-5.ucm b/ext/Encode/Encode/iso8859-5.ucm
index 67daf5686b..86235a84d7 100644
--- a/ext/Encode/Encode/iso8859-5.ucm
+++ b/ext/Encode/Encode/iso8859-5.ucm
@@ -1,6 +1,6 @@
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-5 -o Encode/iso8859-5.ucm Encode/iso8859-5.enc
-<code_set_name> "iso8859-5"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-5 -o Encode/iso8859-5.ucm Encode/iso8859-5.enc
+<code_set_name> "iso-8859-5"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
diff --git a/ext/Encode/Encode/iso8859-6.ucm b/ext/Encode/Encode/iso8859-6.ucm
index e0d5c934d9..fbeb228ce0 100644
--- a/ext/Encode/Encode/iso8859-6.ucm
+++ b/ext/Encode/Encode/iso8859-6.ucm
@@ -1,6 +1,6 @@
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-6 -o Encode/iso8859-6.ucm Encode/iso8859-6.enc
-<code_set_name> "iso8859-6"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-6 -o Encode/iso8859-6.ucm Encode/iso8859-6.enc
+<code_set_name> "iso-8859-6"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
diff --git a/ext/Encode/Encode/iso8859-7.ucm b/ext/Encode/Encode/iso8859-7.ucm
index 6a4cb63f48..ba405dba23 100644
--- a/ext/Encode/Encode/iso8859-7.ucm
+++ b/ext/Encode/Encode/iso8859-7.ucm
@@ -1,6 +1,6 @@
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-7 -o Encode/iso8859-7.ucm Encode/iso8859-7.enc
-<code_set_name> "iso8859-7"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-7 -o Encode/iso8859-7.ucm Encode/iso8859-7.enc
+<code_set_name> "iso-8859-7"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
diff --git a/ext/Encode/Encode/iso8859-8.ucm b/ext/Encode/Encode/iso8859-8.ucm
index 0f7146f0ff..574abfda62 100644
--- a/ext/Encode/Encode/iso8859-8.ucm
+++ b/ext/Encode/Encode/iso8859-8.ucm
@@ -1,6 +1,6 @@
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-8 -o Encode/iso8859-8.ucm Encode/iso8859-8.enc
-<code_set_name> "iso8859-8"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-8 -o Encode/iso8859-8.ucm Encode/iso8859-8.enc
+<code_set_name> "iso-8859-8"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
diff --git a/ext/Encode/Encode/iso8859-9.ucm b/ext/Encode/Encode/iso8859-9.ucm
index f1a308f52b..24d7d4b598 100644
--- a/ext/Encode/Encode/iso8859-9.ucm
+++ b/ext/Encode/Encode/iso8859-9.ucm
@@ -1,6 +1,6 @@
-# Written $Id: //depot/perlio/ext/Encode/compile#14 $
-# compile -n iso8859-9 -o Encode/iso8859-9.ucm Encode/iso8859-9.enc
-<code_set_name> "iso8859-9"
+# Written $Id: //depot/perlio/ext/Encode/compile#15 $
+# ./compile -n iso-8859-9 -o Encode/iso8859-9.ucm Encode/iso8859-9.enc
+<code_set_name> "iso-8859-9"
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
diff --git a/ext/Encode/compile b/ext/Encode/compile
index 5e3e645502..8201043fde 100755
--- a/ext/Encode/compile
+++ b/ext/Encode/compile
@@ -143,7 +143,7 @@ sub cmp_name
foreach my $enc (sort cmp_name @encfiles)
{
my ($name,$sfx) = $enc =~ /^.*?([\w-]+)\.(enc|ucm)$/;
- $name = delete $opt{'n'} if exists $opt{'n'};
+ $name = $opt{'n'} if exists $opt{'n'};
if (open(E,$enc))
{
if ($sfx eq 'enc')
@@ -241,7 +241,7 @@ sub compile_ucm
}
else
{
- # $name = lc($cs);
+ $name = $cs unless exists $opt{'n'};
}
my $erep;
my $urep;