diff options
author | Nick Ing-Simmons <nik@tiuk.ti.com> | 2001-01-22 21:44:00 +0000 |
---|---|---|
committer | Nick Ing-Simmons <nik@tiuk.ti.com> | 2001-01-22 21:44:00 +0000 |
commit | dcda1f94ccf07b68dc3a74b49b117aa6026f2557 (patch) | |
tree | 739f895275470c2ec9d0533459380b7207b15b1f /ext | |
parent | 3865e8e16e47e0d7cb9fff45bba4e84e396abd42 (diff) | |
download | perl-dcda1f94ccf07b68dc3a74b49b117aa6026f2557.tar.gz |
More messing with Encode:
Extra fields in header to allow multiple names and to record
other things "compile" knows.
Re-organise compile to factor out common output routines.
p4raw-id: //depot/perlio@8520
Diffstat (limited to 'ext')
-rw-r--r-- | ext/Encode/Encode.xs | 16 | ||||
-rwxr-xr-x | ext/Encode/compile | 80 | ||||
-rw-r--r-- | ext/Encode/encode.h | 4 |
3 files changed, 57 insertions, 43 deletions
diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index a4670cddf1..8aa51ff33d 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -333,7 +333,13 @@ Encode_Define(pTHX_ encode_t *enc) HV *hash = get_hv("Encode::encoding",GV_ADD|GV_ADDMULTI); HV *stash = gv_stashpv("Encode::XS", TRUE); SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))),stash); - hv_store(hash,enc->name,strlen(enc->name),sv,0); + int i = 0; + while (enc->name[i]) + { + const char *name = enc->name[i++]; + hv_store(hash,name,strlen(name),SvREFCNT_inc(sv),0); + } + SvREFCNT_dec(sv); } void call_failure (SV *routine, U8* done, U8* dest, U8* orig) {} @@ -377,7 +383,7 @@ encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check) { STRLEN clen; UV ch = utf8_to_uv(s+slen,(SvCUR(src)-slen),&clen,0); - Perl_warner(aTHX_ WARN_UTF8, "\"\\x{%"UVxf"}\" does not map to %s", ch, enc->name); + Perl_warner(aTHX_ WARN_UTF8, "\"\\x{%"UVxf"}\" does not map to %s", ch, enc->name[0]); /* FIXME: Skip over the character, copy in replacement and continue * but that is messy so for now just fail. */ @@ -392,7 +398,7 @@ encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check) { /* UTF-8 is supposed to be "Universal" so should not happen */ Perl_croak(aTHX_ "%s '%.*s' does not map to UTF-8", - enc->name, (int)(SvCUR(src)-slen),s+slen); + enc->name[0], (int)(SvCUR(src)-slen),s+slen); } break; @@ -400,13 +406,13 @@ encode_method(pTHX_ encode_t *enc, encpage_t *dir, SV *src, int check) if (!check && ckWARN_d(WARN_UTF8)) { Perl_warner(aTHX_ WARN_UTF8, "Partial %s character", - (dir == enc->f_utf8) ? "UTF-8" : enc->name); + (dir == enc->f_utf8) ? "UTF-8" : enc->name[0]); } return &PL_sv_undef; default: Perl_croak(aTHX_ "Unexpected code %d converting %s %s", - code, (dir == enc->f_utf8) ? "to" : "from",enc->name); + code, (dir == enc->f_utf8) ? "to" : "from",enc->name[0]); return &PL_sv_undef; } } diff --git a/ext/Encode/compile b/ext/Encode/compile index 755b78ca14..b1d68a298b 100755 --- a/ext/Encode/compile +++ b/ext/Encode/compile @@ -3,6 +3,8 @@ BEGIN { @INC = '../../lib' }; use strict; use Getopt::Std; my @orig_ARGV = @ARGV; +my $perforce = '$Id$'; + sub encode_U { @@ -57,7 +59,7 @@ open(C,">$cname") || die "Cannot open $cname:$!"; my $dname = $cname; $dname =~ s/(\.[^\.]*)?$/.def/; -my ($doC,$doEnc,$doUcm); +my ($doC,$doEnc,$doUcm,$doPet); if ($cname =~ /\.(c|xs)$/) { @@ -76,6 +78,7 @@ if ($cname =~ /\.(c|xs)$/) !!!!!!! DO NOT EDIT THIS FILE !!!!!!! This file was autogenerated by: $^X $0 $cname @orig_ARGV + (Repository $perforce) */ END } @@ -97,6 +100,10 @@ elsif ($cname =~ /\.ucm$/) { $doUcm = 1; } +elsif ($cname =~ /\.pet$/) + { + $doPet = 1; + } my @encfiles; if (exists $opt{'f'}) @@ -129,6 +136,7 @@ sub cmp_name return $a cmp $b; } + foreach my $enc (sort cmp_name @encfiles) { my ($name,$sfx) = $enc =~ /^.*?([\w-]+)\.(enc|ucm)$/; @@ -137,11 +145,11 @@ foreach my $enc (sort cmp_name @encfiles) { if ($sfx eq 'enc') { - compile_enc(\*E,lc($name),\*C); + compile_enc(\*E,lc($name)); } else { - compile_ucm(\*E,lc($name),\*C); + compile_ucm(\*E,lc($name)); } } else @@ -152,12 +160,21 @@ foreach my $enc (sort cmp_name @encfiles) if ($doC) { + foreach my $name (sort cmp_name keys %encoding) + { + my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}}; + output(\*C,$name.'_utf8',$e2u); + output(\*C,'utf8_'.$name,$u2e); + push(@{$encoding{$name}},outstring(\*C,$e2u->{Cname}.'_def',$erep)); + } foreach my $enc (sort cmp_name keys %encoding) { + my ($e2u,$u2e,$rep,$min_el,$max_el,$rsym) = @{$encoding{$enc}}; + my @info = ($e2u->{Cname},$u2e->{Cname},$rsym,length($rep),$min_el,$max_el); my $sym = "${enc}_encoding"; $sym =~ s/\W+/_/g; print C "encode_t $sym = \n"; - print C " {",join(',',"\"$enc\"",@{$encoding{$enc}}),"};\n\n"; + print C " {",join(',',@info,"{\"$enc\",(const char *)0}"),"};\n\n"; } foreach my $enc (sort cmp_name keys %encoding) @@ -179,12 +196,29 @@ if ($doC) close(D); close(H); } +elsif ($doEnc) + { + foreach my $name (sort cmp_name keys %encoding) + { + my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}}; + output_enc(\*C,$name,$e2u); + } + } +elsif ($doUcm) + { + foreach my $name (sort cmp_name keys %encoding) + { + my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}}; + output_ucm(\*C,$name,$u2e,$erep,$min_el,$max_el); + } + } + close(C); sub compile_ucm { - my ($fh,$name,$ch) = @_; + my ($fh,$name) = @_; my $e2u = {}; my $u2e = {}; my $cs; @@ -266,26 +300,12 @@ sub compile_ucm { die "$nfb entries without fallback, $hfb entries with\n"; } - if ($doC) - { - output($ch,$name.'_utf8',$e2u); - output($ch,'utf8_'.$name,$u2e); - $encoding{$name} = [$e2u->{Cname},$u2e->{Cname}, - outstring($ch,$e2u->{Cname}.'_def',$erep),length($erep)]; - } - elsif ($doEnc) - { - output_enc($ch,$name,$e2u); - } - elsif ($doUcm) - { - output_ucm($ch,$name,$u2e,$erep,$min_el,$max_el); - } + $encoding{$name} = [$e2u,$u2e,$erep,$min_el,$max_el]; } sub compile_enc { - my ($fh,$name,$ch) = @_; + my ($fh,$name) = @_; my $e2u = {}; my $u2e = {}; @@ -349,21 +369,7 @@ sub compile_enc } } } - if ($doC) - { - output($ch,$name.'_utf8',$e2u); - output($ch,'utf8_'.$name,$u2e); - $encoding{$name} = [$e2u->{Cname},$u2e->{Cname}, - outstring($ch,$e2u->{Cname}.'_def',$rep),length($rep)]; - } - elsif ($doEnc) - { - output_enc($ch,$name,$e2u); - } - elsif ($doUcm) - { - output_ucm($ch,$name,$u2e,$rep,$min_el,$max_el); - } + $encoding{$name} = [$e2u,$u2e,$rep,$min_el,$max_el]; } sub enter @@ -569,7 +575,7 @@ sub output_ucm_page sub output_ucm { my ($fh,$name,$a,$rep,$min_el,$max_el) = @_; - print $fh "# Written by $0 @orig_ARGV\n" unless $opt{'q'}; + print $fh "# Written $perforce\n# $0 @orig_ARGV\n" unless $opt{'q'}; print $fh "<code_set_name> \"$name\"\n"; if (defined $min_el) { diff --git a/ext/Encode/encode.h b/ext/Encode/encode.h index 853ad041b4..aecc66eafe 100644 --- a/ext/Encode/encode.h +++ b/ext/Encode/encode.h @@ -19,11 +19,13 @@ struct encpage_s typedef struct encode_s encode_t; struct encode_s { - const char *name; encpage_t *t_utf8; encpage_t *f_utf8; const U8 *rep; int replen; + U8 min_el; + U8 max_el; + const char *name[2]; }; #ifdef U8 |