summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authorNick Ing-Simmons <nik@tiuk.ti.com>2001-01-22 21:44:00 +0000
committerNick Ing-Simmons <nik@tiuk.ti.com>2001-01-22 21:44:00 +0000
commitdcda1f94ccf07b68dc3a74b49b117aa6026f2557 (patch)
tree739f895275470c2ec9d0533459380b7207b15b1f /ext
parent3865e8e16e47e0d7cb9fff45bba4e84e396abd42 (diff)
downloadperl-dcda1f94ccf07b68dc3a74b49b117aa6026f2557.tar.gz
More messing with Encode:
Extra fields in header to allow multiple names and to record other things "compile" knows. Re-organise compile to factor out common output routines. p4raw-id: //depot/perlio@8520
Diffstat (limited to 'ext')
-rw-r--r--ext/Encode/Encode.xs16
-rwxr-xr-xext/Encode/compile80
-rw-r--r--ext/Encode/encode.h4
3 files changed, 57 insertions, 43 deletions
diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs
index a4670cddf1..8aa51ff33d 100644
--- a/ext/Encode/Encode.xs
+++ b/ext/Encode/Encode.xs
@@ -333,7 +333,13 @@ Encode_Define(pTHX_ encode_t *enc)
HV *hash = get_hv("Encode::encoding",GV_ADD|GV_ADDMULTI);
HV *stash = gv_stashpv("Encode::XS", TRUE);
SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash);
- hv_store(hash,enc->name,strlen(enc->name),sv,0);
+ int i = 0;
+ while (enc->name[i])
+ {
+ const char *name = enc->name[i++];
+ hv_store(hash,name,strlen(name),SvREFCNT_inc(sv),0);
+ }
+ SvREFCNT_dec(sv);
}
void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {}
@@ -377,7 +383,7 @@ encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check)
{
STRLEN clen;
UV ch = utf8_to_uv(s+slen,(SvCUR(src)-slen),&clen,0);
- Perl_warner(aTHX_ WARN_UTF8, "\"\\x{%"UVxf"}\" does not map to %s", ch, enc->name);
+ Perl_warner(aTHX_ WARN_UTF8, "\"\\x{%"UVxf"}\" does not map to %s", ch, enc->name[0]);
/* FIXME: Skip over the character, copy in replacement and continue
* but that is messy so for now just fail.
*/
@@ -392,7 +398,7 @@ encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check)
{
/* UTF-8 is supposed to be "Universal" so should not happen */
Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8",
- enc->name, (int)(SvCUR(src)-slen),s+slen);
+ enc->name[0], (int)(SvCUR(src)-slen),s+slen);
}
break;
@@ -400,13 +406,13 @@ encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check)
if (!check && ckWARN_d(WARN_UTF8))
{
Perl_warner(aTHX_ WARN_UTF8, "Partial %s character",
- (dir == enc->f_utf8) ? "UTF-8" : enc->name);
+ (dir == enc->f_utf8) ? "UTF-8" : enc->name[0]);
}
return &PL_sv_undef;
default:
Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
- code, (dir == enc->f_utf8) ? "to" : "from",enc->name);
+ code, (dir == enc->f_utf8) ? "to" : "from",enc->name[0]);
return &PL_sv_undef;
}
}
diff --git a/ext/Encode/compile b/ext/Encode/compile
index 755b78ca14..b1d68a298b 100755
--- a/ext/Encode/compile
+++ b/ext/Encode/compile
@@ -3,6 +3,8 @@ BEGIN { @INC = '../../lib' };
use strict;
use Getopt::Std;
my @orig_ARGV = @ARGV;
+my $perforce = '$Id$';
+
sub encode_U
{
@@ -57,7 +59,7 @@ open(C,">$cname") || die "Cannot open $cname:$!";
my $dname = $cname;
$dname =~ s/(\.[^\.]*)?$/.def/;
-my ($doC,$doEnc,$doUcm);
+my ($doC,$doEnc,$doUcm,$doPet);
if ($cname =~ /\.(c|xs)$/)
{
@@ -76,6 +78,7 @@ if ($cname =~ /\.(c|xs)$/)
!!!!!!! DO NOT EDIT THIS FILE !!!!!!!
This file was autogenerated by:
$^X $0 $cname @orig_ARGV
+ (Repository $perforce)
*/
END
}
@@ -97,6 +100,10 @@ elsif ($cname =~ /\.ucm$/)
{
$doUcm = 1;
}
+elsif ($cname =~ /\.pet$/)
+ {
+ $doPet = 1;
+ }
my @encfiles;
if (exists $opt{'f'})
@@ -129,6 +136,7 @@ sub cmp_name
return $a cmp $b;
}
+
foreach my $enc (sort cmp_name @encfiles)
{
my ($name,$sfx) = $enc =~ /^.*?([\w-]+)\.(enc|ucm)$/;
@@ -137,11 +145,11 @@ foreach my $enc (sort cmp_name @encfiles)
{
if ($sfx eq 'enc')
{
- compile_enc(\*E,lc($name),\*C);
+ compile_enc(\*E,lc($name));
}
else
{
- compile_ucm(\*E,lc($name),\*C);
+ compile_ucm(\*E,lc($name));
}
}
else
@@ -152,12 +160,21 @@ foreach my $enc (sort cmp_name @encfiles)
if ($doC)
{
+ foreach my $name (sort cmp_name keys %encoding)
+ {
+ my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}};
+ output(\*C,$name.'_utf8',$e2u);
+ output(\*C,'utf8_'.$name,$u2e);
+ push(@{$encoding{$name}},outstring(\*C,$e2u->{Cname}.'_def',$erep));
+ }
foreach my $enc (sort cmp_name keys %encoding)
{
+ my ($e2u,$u2e,$rep,$min_el,$max_el,$rsym) = @{$encoding{$enc}};
+ my @info = ($e2u->{Cname},$u2e->{Cname},$rsym,length($rep),$min_el,$max_el);
my $sym = "${enc}_encoding";
$sym =~ s/\W+/_/g;
print C "encode_t $sym = \n";
- print C " {",join(',',"\"$enc\"",@{$encoding{$enc}}),"};\n\n";
+ print C " {",join(',',@info,"{\"$enc\",(const char *)0}"),"};\n\n";
}
foreach my $enc (sort cmp_name keys %encoding)
@@ -179,12 +196,29 @@ if ($doC)
close(D);
close(H);
}
+elsif ($doEnc)
+ {
+ foreach my $name (sort cmp_name keys %encoding)
+ {
+ my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}};
+ output_enc(\*C,$name,$e2u);
+ }
+ }
+elsif ($doUcm)
+ {
+ foreach my $name (sort cmp_name keys %encoding)
+ {
+ my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}};
+ output_ucm(\*C,$name,$u2e,$erep,$min_el,$max_el);
+ }
+ }
+
close(C);
sub compile_ucm
{
- my ($fh,$name,$ch) = @_;
+ my ($fh,$name) = @_;
my $e2u = {};
my $u2e = {};
my $cs;
@@ -266,26 +300,12 @@ sub compile_ucm
{
die "$nfb entries without fallback, $hfb entries with\n";
}
- if ($doC)
- {
- output($ch,$name.'_utf8',$e2u);
- output($ch,'utf8_'.$name,$u2e);
- $encoding{$name} = [$e2u->{Cname},$u2e->{Cname},
- outstring($ch,$e2u->{Cname}.'_def',$erep),length($erep)];
- }
- elsif ($doEnc)
- {
- output_enc($ch,$name,$e2u);
- }
- elsif ($doUcm)
- {
- output_ucm($ch,$name,$u2e,$erep,$min_el,$max_el);
- }
+ $encoding{$name} = [$e2u,$u2e,$erep,$min_el,$max_el];
}
sub compile_enc
{
- my ($fh,$name,$ch) = @_;
+ my ($fh,$name) = @_;
my $e2u = {};
my $u2e = {};
@@ -349,21 +369,7 @@ sub compile_enc
}
}
}
- if ($doC)
- {
- output($ch,$name.'_utf8',$e2u);
- output($ch,'utf8_'.$name,$u2e);
- $encoding{$name} = [$e2u->{Cname},$u2e->{Cname},
- outstring($ch,$e2u->{Cname}.'_def',$rep),length($rep)];
- }
- elsif ($doEnc)
- {
- output_enc($ch,$name,$e2u);
- }
- elsif ($doUcm)
- {
- output_ucm($ch,$name,$u2e,$rep,$min_el,$max_el);
- }
+ $encoding{$name} = [$e2u,$u2e,$rep,$min_el,$max_el];
}
sub enter
@@ -569,7 +575,7 @@ sub output_ucm_page
sub output_ucm
{
my ($fh,$name,$a,$rep,$min_el,$max_el) = @_;
- print $fh "# Written by $0 @orig_ARGV\n" unless $opt{'q'};
+ print $fh "# Written $perforce\n# $0 @orig_ARGV\n" unless $opt{'q'};
print $fh "<code_set_name> \"$name\"\n";
if (defined $min_el)
{
diff --git a/ext/Encode/encode.h b/ext/Encode/encode.h
index 853ad041b4..aecc66eafe 100644
--- a/ext/Encode/encode.h
+++ b/ext/Encode/encode.h
@@ -19,11 +19,13 @@ struct encpage_s
typedef struct encode_s encode_t;
struct encode_s
{
- const char *name;
encpage_t *t_utf8;
encpage_t *f_utf8;
const U8 *rep;
int replen;
+ U8 min_el;
+ U8 max_el;
+ const char *name[2];
};
#ifdef U8