diff options
-rw-r--r-- | MANIFEST | 45 | ||||
-rw-r--r-- | ext/Encode/Byte/Byte.pm | 4 | ||||
-rw-r--r-- | ext/Encode/CN/CN.pm | 5 | ||||
-rw-r--r-- | ext/Encode/Changes | 45 | ||||
-rw-r--r-- | ext/Encode/EBCDIC/EBCDIC.pm | 4 | ||||
-rw-r--r-- | ext/Encode/Encode.pm | 18 | ||||
-rw-r--r-- | ext/Encode/Encode.xs | 546 | ||||
-rw-r--r-- | ext/Encode/Encode/Makefile_PL.e2x | 2 | ||||
-rw-r--r-- | ext/Encode/Encode/_PM.e2x | 2 | ||||
-rw-r--r-- | ext/Encode/JP/JP.pm | 4 | ||||
-rw-r--r-- | ext/Encode/KR/KR.pm | 5 | ||||
-rw-r--r-- | ext/Encode/MANIFEST | 1 | ||||
-rw-r--r-- | ext/Encode/Symbol/Symbol.pm | 4 | ||||
-rw-r--r-- | ext/Encode/TW/TW.pm | 4 | ||||
-rw-r--r-- | ext/Encode/bin/enc2xs | 70 | ||||
-rw-r--r-- | ext/Encode/bin/piconv | 23 | ||||
-rw-r--r-- | ext/Encode/encoding.pm | 23 | ||||
-rw-r--r-- | ext/Encode/lib/Encode/CN/HZ.pm | 11 | ||||
-rw-r--r-- | ext/Encode/lib/Encode/Config.pm | 9 | ||||
-rw-r--r-- | ext/Encode/lib/Encode/KR/2022_KR.pm | 8 | ||||
-rw-r--r-- | ext/Encode/lib/Encode/XS.pm | 13 | ||||
-rw-r--r-- | ext/Encode/t/CN.t | 4 | ||||
-rw-r--r-- | ext/Encode/t/Encoder.t | 12 | ||||
-rw-r--r-- | ext/Encode/t/JP.t | 86 | ||||
-rw-r--r-- | ext/Encode/t/KR.t | 82 | ||||
-rw-r--r-- | ext/Encode/t/TW.t | 4 | ||||
-rw-r--r-- | ext/Encode/t/Unicode.t | 13 | ||||
-rw-r--r-- | ext/Encode/t/encoding.t | 4 |
28 files changed, 352 insertions, 699 deletions
@@ -197,33 +197,40 @@ ext/DynaLoader/Makefile.PL Dynamic Loader makefile writer ext/DynaLoader/README Dynamic Loader notes and intro ext/DynaLoader/XSLoader_pm.PL Simple XS Loader perl module ext/Encode/AUTHORS List of authors -ext/Encode/bin/enc2xs Encode module generator -ext/Encode/bin/piconv iconv by perl -ext/Encode/bin/ucm2table Table Generator for testing -ext/Encode/bin/ucmlint A UCM Lint utility -ext/Encode/bin/unidump Unicode Dump like hexdump(1) ext/Encode/Byte/Byte.pm Encode extension ext/Encode/Byte/Makefile.PL Encode extension -ext/Encode/Changes Change Log ext/Encode/CN/CN.pm Encode extension ext/Encode/CN/Makefile.PL Encode extension +ext/Encode/Changes Change Log ext/Encode/EBCDIC/EBCDIC.pm Encode extension ext/Encode/EBCDIC/Makefile.PL Encode extension -ext/Encode/encengine.c Encode extension ext/Encode/Encode.pm Mother of all Encode extensions ext/Encode/Encode.xs Encode extension ext/Encode/Encode/Changes.e2x Skeleton file for enc2xs ext/Encode/Encode/ConfigLocal_PM.e2x Skeleton file for enc2xs -ext/Encode/Encode/encode.h Encode extension header file ext/Encode/Encode/Makefile_PL.e2x Skeleton file for enc2xs ext/Encode/Encode/README.e2x Skeleton file for enc2xs ext/Encode/Encode/_PM.e2x Skeleton file for enc2xs ext/Encode/Encode/_T.e2x Skeleton file for enc2xs -ext/Encode/encoding.pm Perl Pragmactic Module +ext/Encode/Encode/encode.h Encode extension header file ext/Encode/JP/JP.pm Encode extension ext/Encode/JP/Makefile.PL Encode extension ext/Encode/KR/KR.pm Encode extension ext/Encode/KR/Makefile.PL Encode extension +ext/Encode/MANIFEST Encode extension +ext/Encode/Makefile.PL Encode extension makefile writer +ext/Encode/README Encode extension +ext/Encode/Symbol/Makefile.PL Encode extension +ext/Encode/Symbol/Symbol.pm Encode extension +ext/Encode/TW/Makefile.PL Encode extension +ext/Encode/TW/TW.pm Encode extension +ext/Encode/bin/enc2xs Encode module generator +ext/Encode/bin/piconv iconv by perl +ext/Encode/bin/ucm2table Table Generator for testing +ext/Encode/bin/ucmlint A UCM Lint utility +ext/Encode/bin/unidump Unicode Dump like hexdump(1) +ext/Encode/encengine.c Encode extension +ext/Encode/encoding.pm Perl Pragmactic Module ext/Encode/lib/Encode/Alias.pm Encode extension ext/Encode/lib/Encode/CJKConstants.pm Encode extension ext/Encode/lib/Encode/CN/HZ.pm Encode extension @@ -235,17 +242,15 @@ ext/Encode/lib/Encode/JP/JIS7.pm Encode extension ext/Encode/lib/Encode/KR/2022_KR.pm Encode extension ext/Encode/lib/Encode/Supported.pod Documents supported encodings ext/Encode/lib/Encode/Unicode.pm Encode extension -ext/Encode/lib/Encode/XS.pm Encode extension -ext/Encode/Makefile.PL Encode extension makefile writer -ext/Encode/MANIFEST Encode extension -ext/Encode/README Encode extension -ext/Encode/Symbol/Makefile.PL Encode extension -ext/Encode/Symbol/Symbol.pm Encode extension ext/Encode/t/Aliases.t Encode extension test -ext/Encode/t/bogus.ucm Sample data for ucmlint ext/Encode/t/CN.t Encode extension test ext/Encode/t/Encode.t Encode extension test ext/Encode/t/Encoder.t Encode::Encoder test +ext/Encode/t/JP.t Encode extension test +ext/Encode/t/KR.t Encode extension test +ext/Encode/t/TW.t Encode extension test +ext/Encode/t/Unicode.t Encode extension test +ext/Encode/t/bogus.ucm Sample data for ucmlint ext/Encode/t/encoding.t encoding extension test ext/Encode/t/gb2312.euc test data ext/Encode/t/gb2312.ref test data @@ -254,16 +259,10 @@ ext/Encode/t/jisx0208.euc test data ext/Encode/t/jisx0208.ref test data ext/Encode/t/jisx0212.euc test data ext/Encode/t/jisx0212.ref test data -ext/Encode/t/JP.t Encode extension test ext/Encode/t/jperl.t encoding extension test -ext/Encode/t/KR.t Encode extension test ext/Encode/t/ksc5601.euc test data ext/Encode/t/ksc5601.ref test data -ext/Encode/t/TW.t Encode extension test ext/Encode/t/unibench.pl Unicode benchmark -ext/Encode/t/Unicode.t Encode extension test -ext/Encode/TW/Makefile.PL Encode extension -ext/Encode/TW/TW.pm Encode extension ext/Encode/ucm/8859-1.ucm Unicode Character Map ext/Encode/ucm/8859-10.ucm Unicode Character Map ext/Encode/ucm/8859-11.ucm Unicode Character Map @@ -352,9 +351,9 @@ ext/Encode/ucm/macHebrew.ucm Unicode Character Map ext/Encode/ucm/macIceland.ucm Unicode Character Map ext/Encode/ucm/macJapanese.ucm Unicode Character Map ext/Encode/ucm/macKorean.ucm Unicode Character Map -ext/Encode/ucm/macRoman.ucm Unicode Character Map ext/Encode/ucm/macROMnn.ucm Unicode Character Map ext/Encode/ucm/macRUMnn.ucm Unicode Character Map +ext/Encode/ucm/macRoman.ucm Unicode Character Map ext/Encode/ucm/macSami.ucm Unicode Character Map ext/Encode/ucm/macSymbol.ucm Unicode Character Map ext/Encode/ucm/macThai.ucm Unicode Character Map diff --git a/ext/Encode/Byte/Byte.pm b/ext/Encode/Byte/Byte.pm index a163c92e1d..e570505f8e 100644 --- a/ext/Encode/Byte/Byte.pm +++ b/ext/Encode/Byte/Byte.pm @@ -1,9 +1,9 @@ package Encode::Byte; use Encode; -our $VERSION = do { my @r = (q$Revision: 1.20 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use XSLoader; -XSLoader::load('Encode::Byte',$VERSION); +XSLoader::load(__PACKAGE__,$VERSION); 1; __END__ diff --git a/ext/Encode/CN/CN.pm b/ext/Encode/CN/CN.pm index 2cdf969db9..5952cab69b 100644 --- a/ext/Encode/CN/CN.pm +++ b/ext/Encode/CN/CN.pm @@ -4,12 +4,11 @@ BEGIN { die "Encode::CN not supported on EBCDIC\n"; } } -our $VERSION = do { my @r = (q$Revision: 1.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.22 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use Encode; -use Encode::CN::HZ; use XSLoader; -XSLoader::load('Encode::CN',$VERSION); +XSLoader::load(__PACKAGE__,$VERSION); # Relocated from Encode.pm diff --git a/ext/Encode/Changes b/ext/Encode/Changes index 88023bf851..06cc9b6d4c 100644 --- a/ext/Encode/Changes +++ b/ext/Encode/Changes @@ -1,9 +1,48 @@ # Revision history for Perl extension Encode. # -# $Id: Changes,v 1.40 2002/04/14 22:27:14 dankogai Exp $ +# $Id: Changes,v 1.42 2002/04/17 03:01:20 dankogai Exp dankogai $ # -1.40 $Date: 2002/04/14 22:27:14 $ +1.42 $Date: 2002/04/17 03:01:20 $ +- lib/Encode/XS.pm + no-op module; Thought of adding a pod there but enc2xs has + one so gone. +! encoding.pm +! t/JP.pm +! t/KR.pm + correct mechanism to detect Perlio::encoding layar installed. +! Encode.xs + PerlIO Layer detached. + +1.41 2002/04/16 23:35:00 +! encoding.pm + binmode(STDIN|STDOUT ...) done iff PerlIO is available +! t/*.t + Cleaned up PerlIO skip conditions to prepare for the upcoming + Encode - PerlIO forking. +! Encode.pm + exported functions are now prototyped. +! lib/Encode/CN/HZ.pm +! bin/enc2xs +! Encode.xs + fallback implemented # was /* FIXME */ + affected programs revised to fit (only HZ was using the try-catch + approach which needed to be fixed for API-compliance). +! Encode/Config.pm +! Encode/KR/2022_KR.pm +! Encode/KR/KR.pm + can find =head1 NAME now, jhi + Message-Id: <20020416083059.V30639@alpha.hut.fi> +! encoding.pm + s/\{h\}/{$h}/g ;) +! Encode.xs + now complies with less warnings with the pickest compilers. + Suggested by Craig, fixed by Dan. + ! Encode/Makefile_PL.e2x +! bin/enc2xs + A bug that fails to find *.e2x in certain conditions fixed + +1.40 2002/04/14 22:27:14 + Encode/ConfigLocal_PM.e2x ! lib/Encode/Config.pm ! bin/enc2xs @@ -296,7 +335,7 @@ Typo fixes and improvements by jhi Message-Id: <200204010201.FAA03564@alpha.hut.fi>, et al. -1.11 $Date: 2002/04/14 22:27:14 $ +1.11 $Date: 2002/04/17 03:01:20 $ + t/encoding.t + t/jperl.t ! MANIFEST diff --git a/ext/Encode/EBCDIC/EBCDIC.pm b/ext/Encode/EBCDIC/EBCDIC.pm index 92a17561f0..4eb674a753 100644 --- a/ext/Encode/EBCDIC/EBCDIC.pm +++ b/ext/Encode/EBCDIC/EBCDIC.pm @@ -1,9 +1,9 @@ package Encode::EBCDIC; use Encode; -our $VERSION = do { my @r = (q$Revision: 1.20 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use XSLoader; -XSLoader::load('Encode::EBCDIC',$VERSION); +XSLoader::load(__PACKAGE__,$VERSION); 1; __END__ diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index e6a2048051..3dd63a8800 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -1,6 +1,6 @@ package Encode; use strict; -our $VERSION = do { my @r = (q$Revision: 1.40 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.42 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; our $DEBUG = 0; require DynaLoader; @@ -126,9 +126,10 @@ sub resolve_alias { return; } -sub encode +sub encode($$;$) { my ($name,$string,$check) = @_; + $check ||=0; my $enc = find_encoding($name); croak("Unknown encoding '$name'") unless defined $enc; my $octets = $enc->encode($string,$check); @@ -136,9 +137,10 @@ sub encode return $octets; } -sub decode +sub decode($$;$) { my ($name,$octets,$check) = @_; + $check ||=0; my $enc = find_encoding($name); croak("Unknown encoding '$name'") unless defined $enc; my $string = $enc->decode($octets,$check); @@ -146,9 +148,10 @@ sub decode return $string; } -sub from_to +sub from_to($$$;$) { my ($string,$from,$to,$check) = @_; + $check ||=0; my $f = find_encoding($from); croak("Unknown encoding '$from'") unless defined $f; my $t = find_encoding($to); @@ -160,14 +163,14 @@ sub from_to return defined($_[0] = $string) ? length($string) : undef ; } -sub encode_utf8 +sub encode_utf8($) { my ($str) = @_; utf8::encode($str); return $str; } -sub decode_utf8 +sub decode_utf8($) { my ($str) = @_; return undef unless utf8::decode($str); @@ -249,7 +252,8 @@ sub predefine_encodings{ } require Encode::Encoding; -require Encode::XS; + +eval { require PerlIO::encoding }; 1; diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index 229359e7f2..9c30c4a9d7 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -7,15 +7,14 @@ #include "def_t.h" #define FBCHAR 0xFFFd +#define FBCHAR_UTF8 "\xEF\xBF\xBD" #define BOM_BE 0xFeFF #define BOM16LE 0xFFFe #define BOM32LE 0xFFFe0000 - -#define valid_ucs2(x) ((0 <= (x) && (x) < 0xD800) || (0xDFFF < (x) && (x) <= 0xFFFF)) - #define issurrogate(x) (0xD800 <= (x) && (x) <= 0xDFFF ) #define isHiSurrogate(x) (0xD800 <= (x) && (x) < 0xDC00 ) #define isLoSurrogate(x) (0xDC00 <= (x) && (x) <= 0xDFFF ) +#define invalid_ucs2(x) ( issurrogate(x) || 0xFFFF < (x) ) static UV enc_unpack(pTHX_ U8 **sp,U8 *e,STRLEN size,U8 endian) @@ -53,7 +52,7 @@ enc_unpack(pTHX_ U8 **sp,U8 *e,STRLEN size,U8 endian) void enc_pack(pTHX_ SV *result,STRLEN size,U8 endian,UV value) { - U8 *d = SvGROW(result,SvCUR(result)+size); + U8 *d = (U8 *)SvGROW(result,SvCUR(result)+size); switch(endian) { case 'v': case 'V': @@ -93,452 +92,6 @@ enc_pack(pTHX_ SV *result,STRLEN size,U8 endian,UV value) } UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) -#if defined(USE_PERLIO) && !defined(USE_SFIO) -/* Define an encoding "layer" in the perliol.h sense. - The layer defined here "inherits" in an object-oriented sense from the - "perlio" layer with its PerlIOBuf_* "methods". - The implementation is particularly efficient as until Encode settles down - there is no point in tryint to tune it. - - The layer works by overloading the "fill" and "flush" methods. - - "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API - to convert the encoded data to UTF-8 form, then copies it back to the - buffer. The "base class's" read methods then see the UTF-8 data. - - "flush" transforms the UTF-8 data deposited by the "base class's write - method in the buffer back into the encoded form using the encode OO perl API, - then copies data back into the buffer and calls "SUPER::flush. - - Note that "flush" is _also_ called for read mode - we still do the (back)-translate - so that the the base class's "flush" sees the correct number of encoded chars - for positioning the seek pointer. (This double translation is the worst performance - issue - particularly with all-perl encode engine.) - -*/ -#include "perliol.h" -typedef struct { - PerlIOBuf base; /* PerlIOBuf stuff */ - SV *bufsv; /* buffer seen by layers above */ - SV *dataSV; /* data we have read from layer below */ - SV *enc; /* the encoding object */ -} PerlIOEncode; - -SV * -PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags) -{ - PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); - SV *sv = &PL_sv_undef; - if (e->enc) { - dSP; - ENTER; - SAVETMPS; - PUSHMARK(sp); - XPUSHs(e->enc); - PUTBACK; - if (perl_call_method("name", G_SCALAR) == 1) { - SPAGAIN; - sv = newSVsv(POPs); - PUTBACK; - } - } - return sv; -} - -IV -PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg) -{ - PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); - dSP; - IV code; - code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv); - ENTER; - SAVETMPS; - PUSHMARK(sp); - XPUSHs(arg); - PUTBACK; - if (perl_call_pv("Encode::find_encoding", G_SCALAR) != 1) { - /* should never happen */ - Perl_die(aTHX_ "Encode::find_encoding did not return a value"); - return -1; - } - SPAGAIN; - e->enc = POPs; - PUTBACK; - if (!SvROK(e->enc)) { - e->enc = Nullsv; - errno = EINVAL; - Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"", - arg); - code = -1; - } - else { - SvREFCNT_inc(e->enc); - PerlIOBase(f)->flags |= PERLIO_F_UTF8; - } - FREETMPS; - LEAVE; - return code; -} - -IV -PerlIOEncode_popped(pTHX_ PerlIO * f) -{ - PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); - if (e->enc) { - SvREFCNT_dec(e->enc); - e->enc = Nullsv; - } - if (e->bufsv) { - SvREFCNT_dec(e->bufsv); - e->bufsv = Nullsv; - } - if (e->dataSV) { - SvREFCNT_dec(e->dataSV); - e->dataSV = Nullsv; - } - return 0; -} - -STDCHAR * -PerlIOEncode_get_base(pTHX_ PerlIO * f) -{ - PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); - if (!e->base.bufsiz) - e->base.bufsiz = 1024; - if (!e->bufsv) { - e->bufsv = newSV(e->base.bufsiz); - sv_setpvn(e->bufsv, "", 0); - } - e->base.buf = (STDCHAR *) SvPVX(e->bufsv); - if (!e->base.ptr) - e->base.ptr = e->base.buf; - if (!e->base.end) - e->base.end = e->base.buf; - if (e->base.ptr < e->base.buf - || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) { - Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr, - e->base.buf + SvLEN(e->bufsv)); - abort(); - } - if (SvLEN(e->bufsv) < e->base.bufsiz) { - SSize_t poff = e->base.ptr - e->base.buf; - SSize_t eoff = e->base.end - e->base.buf; - e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz); - e->base.ptr = e->base.buf + poff; - e->base.end = e->base.buf + eoff; - } - if (e->base.ptr < e->base.buf - || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) { - Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr, - e->base.buf + SvLEN(e->bufsv)); - abort(); - } - return e->base.buf; -} - -IV -PerlIOEncode_fill(pTHX_ PerlIO * f) -{ - PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); - dSP; - IV code = 0; - PerlIO *n; - SSize_t avail; - if (PerlIO_flush(f) != 0) - return -1; - n = PerlIONext(f); - if (!PerlIO_fast_gets(n)) { - /* Things get too messy if we don't have a buffer layer - push a :perlio to do the job */ - char mode[8]; - n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv); - if (!n) { - Perl_die(aTHX_ "panic: cannot push :perlio for %p",f); - } - } - ENTER; - SAVETMPS; - retry: - avail = PerlIO_get_cnt(n); - if (avail <= 0) { - avail = PerlIO_fill(n); - if (avail == 0) { - avail = PerlIO_get_cnt(n); - } - else { - if (!PerlIO_error(n) && PerlIO_eof(n)) - avail = 0; - } - } - if (avail > 0) { - STDCHAR *ptr = PerlIO_get_ptr(n); - SSize_t use = avail; - SV *uni; - char *s; - STRLEN len = 0; - e->base.ptr = e->base.end = (STDCHAR *) Nullch; - (void) PerlIOEncode_get_base(aTHX_ f); - if (!e->dataSV) - e->dataSV = newSV(0); - if (SvTYPE(e->dataSV) < SVt_PV) { - sv_upgrade(e->dataSV,SVt_PV); - } - if (SvCUR(e->dataSV)) { - /* something left over from last time - create a normal - SV with new data appended - */ - if (use + SvCUR(e->dataSV) > e->base.bufsiz) { - use = e->base.bufsiz - SvCUR(e->dataSV); - } - sv_catpvn(e->dataSV,(char*)ptr,use); - } - else { - /* Create a "dummy" SV to represent the available data from layer below */ - if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) { - Safefree(SvPVX(e->dataSV)); - } - if (use > e->base.bufsiz) { - use = e->base.bufsiz; - } - SvPVX(e->dataSV) = (char *) ptr; - SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */ - SvCUR_set(e->dataSV,use); - SvPOK_only(e->dataSV); - } - SvUTF8_off(e->dataSV); - PUSHMARK(sp); - XPUSHs(e->enc); - XPUSHs(e->dataSV); - XPUSHs(&PL_sv_yes); - PUTBACK; - if (perl_call_method("decode", G_SCALAR) != 1) { - Perl_die(aTHX_ "panic: decode did not return a value"); - } - SPAGAIN; - uni = POPs; - PUTBACK; - /* Now get translated string (forced to UTF-8) and use as buffer */ - if (SvPOK(uni)) { - s = SvPVutf8(uni, len); -#ifdef PARANOID_ENCODE_CHECKS - if (len && !is_utf8_string((U8*)s,len)) { - Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s); - } -#endif - } - if (len > 0) { - /* Got _something */ - /* if decode gave us back dataSV then data may vanish when - we do ptrcnt adjust - so take our copy now. - (The copy is a pain - need a put-it-here option for decode.) - */ - sv_setpvn(e->bufsv,s,len); - e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv); - e->base.end = e->base.ptr + SvCUR(e->bufsv); - PerlIOBase(f)->flags |= PERLIO_F_RDBUF; - SvUTF8_on(e->bufsv); - - /* Adjust ptr/cnt not taking anything which - did not translate - not clear this is a win */ - /* compute amount we took */ - use -= SvCUR(e->dataSV); - PerlIO_set_ptrcnt(n, ptr+use, (avail-use)); - /* and as we did not take it it isn't pending */ - SvCUR_set(e->dataSV,0); - } else { - /* Got nothing - assume partial character so we need some more */ - /* Make sure e->dataSV is a normal SV before re-filling as - buffer alias will change under us - */ - s = SvPV(e->dataSV,len); - sv_setpvn(e->dataSV,s,len); - PerlIO_set_ptrcnt(n, ptr+use, (avail-use)); - goto retry; - } - FREETMPS; - LEAVE; - return code; - } - else { - if (avail == 0) - PerlIOBase(f)->flags |= PERLIO_F_EOF; - else - PerlIOBase(f)->flags |= PERLIO_F_ERROR; - return -1; - } -} - -IV -PerlIOEncode_flush(pTHX_ PerlIO * f) -{ - PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); - IV code = 0; - if (e->bufsv && (e->base.ptr > e->base.buf)) { - dSP; - SV *str; - char *s; - STRLEN len; - SSize_t count = 0; - if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) { - /* Write case encode the buffer and write() to layer below */ - ENTER; - SAVETMPS; - PUSHMARK(sp); - XPUSHs(e->enc); - SvCUR_set(e->bufsv, e->base.ptr - e->base.buf); - SvUTF8_on(e->bufsv); - XPUSHs(e->bufsv); - XPUSHs(&PL_sv_yes); - PUTBACK; - if (perl_call_method("encode", G_SCALAR) != 1) { - Perl_die(aTHX_ "panic: encode did not return a value"); - } - SPAGAIN; - str = POPs; - PUTBACK; - s = SvPV(str, len); - count = PerlIO_write(PerlIONext(f),s,len); - if (count != len) { - code = -1; - } - FREETMPS; - LEAVE; - if (PerlIO_flush(PerlIONext(f)) != 0) { - code = -1; - } - if (SvCUR(e->bufsv)) { - /* Did not all translate */ - e->base.ptr = e->base.buf+SvCUR(e->bufsv); - return code; - } - } - else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) { - /* read case */ - /* if we have any untranslated stuff then unread that first */ - if (e->dataSV && SvCUR(e->dataSV)) { - s = SvPV(e->dataSV, len); - count = PerlIO_unread(PerlIONext(f),s,len); - if (count != len) { - code = -1; - } - } - /* See if there is anything left in the buffer */ - if (e->base.ptr < e->base.end) { - /* Bother - have unread data. - re-encode and unread() to layer below - */ - ENTER; - SAVETMPS; - str = sv_newmortal(); - sv_upgrade(str, SVt_PV); - SvPVX(str) = (char*)e->base.ptr; - SvLEN(str) = 0; - SvCUR_set(str, e->base.end - e->base.ptr); - SvPOK_only(str); - SvUTF8_on(str); - PUSHMARK(sp); - XPUSHs(e->enc); - XPUSHs(str); - XPUSHs(&PL_sv_yes); - PUTBACK; - if (perl_call_method("encode", G_SCALAR) != 1) { - Perl_die(aTHX_ "panic: encode did not return a value"); - } - SPAGAIN; - str = POPs; - PUTBACK; - s = SvPV(str, len); - count = PerlIO_unread(PerlIONext(f),s,len); - if (count != len) { - code = -1; - } - FREETMPS; - LEAVE; - } - } - e->base.ptr = e->base.end = e->base.buf; - PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF); - } - return code; -} - -IV -PerlIOEncode_close(pTHX_ PerlIO * f) -{ - PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); - IV code = PerlIOBase_close(aTHX_ f); - if (e->bufsv) { - if (e->base.buf && e->base.ptr > e->base.buf) { - Perl_croak(aTHX_ "Close with partial character"); - } - SvREFCNT_dec(e->bufsv); - e->bufsv = Nullsv; - } - e->base.buf = NULL; - e->base.ptr = NULL; - e->base.end = NULL; - PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF); - return code; -} - -Off_t -PerlIOEncode_tell(pTHX_ PerlIO * f) -{ - PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf); - /* Unfortunately the only way to get a postion is to (re-)translate, - the UTF8 we have in bufefr and then ask layer below - */ - PerlIO_flush(f); - if (b->buf && b->ptr > b->buf) { - Perl_croak(aTHX_ "Cannot tell at partial character"); - } - return PerlIO_tell(PerlIONext(f)); -} - -PerlIO * -PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o, - CLONE_PARAMS * params, int flags) -{ - if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) { - PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode); - PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode); - if (oe->enc) { - fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params); - } - } - return f; -} - -PerlIO_funcs PerlIO_encode = { - "encoding", - sizeof(PerlIOEncode), - PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT, - PerlIOEncode_pushed, - PerlIOEncode_popped, - PerlIOBuf_open, - PerlIOEncode_getarg, - PerlIOBase_fileno, - PerlIOEncode_dup, - PerlIOBuf_read, - PerlIOBuf_unread, - PerlIOBuf_write, - PerlIOBuf_seek, - PerlIOEncode_tell, - PerlIOEncode_close, - PerlIOEncode_flush, - PerlIOEncode_fill, - PerlIOBase_eof, - PerlIOBase_error, - PerlIOBase_clearerr, - PerlIOBase_setlinebuf, - PerlIOEncode_get_base, - PerlIOBuf_bufsiz, - PerlIOBuf_get_ptr, - PerlIOBuf_get_cnt, - PerlIOBuf_set_ptrcnt, -}; -#endif /* encode layer */ void Encode_XSEncoding(pTHX_ encode_t * enc) @@ -636,33 +189,56 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, case ENCODE_NOREP: if (dir == enc->f_utf8) { - if (!check && ckWARN_d(WARN_UTF8)) { - STRLEN clen; - UV ch = - utf8n_to_uvuni(s + slen, (SvCUR(src) - slen), - &clen, 0); - Perl_warner(aTHX_ packWARN(WARN_UTF8), - "\"\\N{U+%" UVxf - "}\" does not map to %s", ch, - enc->name[0]); - /* FIXME: Skip over the character, copy in replacement and continue - * but that is messy so for now just fail. - */ - return &PL_sv_undef; + STRLEN clen; + UV ch = + utf8n_to_uvuni(s + slen, (SvCUR(src) - slen), + &clen, 0); + if (!check) { /* fallback char */ + sdone += slen + clen; + ddone += dlen + enc->replen; + sv_catpvn(dst, enc->rep, enc->replen); } - else { - return &PL_sv_undef; + else if (check == -1){ /* perlqq */ + SV* perlqq = + sv_2mortal(newSVpvf("\\x{%x}", ch)); + sdone += slen + clen; + ddone += dlen + SvLEN(perlqq); + sv_catsv(dst, perlqq); + } + else { + Perl_croak(aTHX_ + "\"\\N{U+%" UVxf + "}\" does not map to %s", ch, + enc->name[0]); } + } + else { + if (!check){ /* fallback char */ + sdone += slen + 1; + ddone += dlen + strlen(FBCHAR_UTF8); + sv_catpv(dst, FBCHAR_UTF8); } + else if (check == -1){ /* perlqq */ + SV* perlqq = + sv_2mortal(newSVpvf("\\x%02X", s[slen])); + sdone += slen + 1; + ddone += dlen + SvLEN(perlqq); + sv_catsv(dst, perlqq); + } else { - /* UTF-8 is supposed to be "Universal" so should not happen - for real characters, but some encodings have non-assigned - codes which may occur. - */ - Perl_croak(aTHX_ "%s \"\\x%02X\" does not map to Unicode (%d)", - enc->name[0], (U8) s[slen], code); + /* UTF-8 is supposed to be "Universal" so should not + happen for real characters, but some encodings + have non-assigned codes which may occur. */ + Perl_croak(aTHX_ "%s \"\\x%02X\" " + "does not map to Unicode (%d)", + enc->name[0], (U8) s[slen], code); } - break; + } + dlen = SvCUR(dst); + d = SvPVX(dst) + dlen; + s = SvPVX(src) + sdone; + slen = tlen - sdone; + break; default: Perl_croak(aTHX_ "Unexpected code %d converting %s %s", @@ -722,10 +298,10 @@ CODE: } void -Method_decode(obj,src,check = FALSE) +Method_decode(obj,src,check = 0) SV * obj SV * src -bool check +int check CODE: { encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); @@ -735,10 +311,10 @@ CODE: } void -Method_encode(obj,src,check = FALSE) +Method_encode(obj,src,check = 0) SV * obj SV * src -bool check +int check CODE: { encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); @@ -761,8 +337,8 @@ CODE: int ucs2 = SvTRUE(*hv_fetch((HV *)SvRV(obj),"ucs2",4,0)); SV *result = newSVpvn("",0); STRLEN ulen; - U8 *s = SvPVbyte(str,ulen); - U8 *e = SvEND(str); + U8 *s = (U8 *)SvPVbyte(str,ulen); + U8 *e = (U8 *)SvEND(str); ST(0) = sv_2mortal(result); SvUTF8_on(result); @@ -790,7 +366,7 @@ CODE: while (s < e && s+size <= e) { UV ord = enc_unpack(aTHX_ &s,e,size,endian); U8 *d; - if (size != 4 && !valid_ucs2(ord)) { + if (size != 4 && invalid_ucs2(ord)) { if (ucs2) { if (SvTRUE(chk)) { croak("%s:no surrogates allowed %"UVxf, @@ -851,8 +427,8 @@ CODE: int ucs2 = SvTRUE(*hv_fetch((HV *)SvRV(obj),"ucs2",4,0)); SV *result = newSVpvn("",0); STRLEN ulen; - U8 *s = SvPVutf8(utf8,ulen); - U8 *e = SvEND(utf8); + U8 *s = (U8 *)SvPVutf8(utf8,ulen); + U8 *e = (U8 *)SvEND(utf8); ST(0) = sv_2mortal(result); if (!endian) { endian = (size == 4) ? 'N' : 'n'; @@ -866,7 +442,7 @@ CODE: STRLEN len; UV ord = utf8n_to_uvuni(s, e-s, &len, 0); s += len; - if (size != 4 && !valid_ucs2(ord)) { + if (size != 4 && invalid_ucs2(ord)) { if (!issurrogate(ord)){ if (ucs2) { if (SvTRUE(chk)) { @@ -999,9 +575,9 @@ _utf8_to_bytes(sv, ...) RETVAL bool -is_utf8(sv, check = FALSE) +is_utf8(sv, check = 0) SV * sv -bool check +int check CODE: { if (SvGMAGICAL(sv)) /* it could be $1, for example */ @@ -1056,7 +632,7 @@ _utf8_off(sv) BOOT: { #if defined(USE_PERLIO) && !defined(USE_SFIO) - PerlIO_define_layer(aTHX_ &PerlIO_encode); +/* PerlIO_define_layer(aTHX_ &PerlIO_encode); */ #endif #include "def_t.exh" } diff --git a/ext/Encode/Encode/Makefile_PL.e2x b/ext/Encode/Encode/Makefile_PL.e2x index 59b5149353..8571033f70 100644 --- a/ext/Encode/Encode/Makefile_PL.e2x +++ b/ext/Encode/Encode/Makefile_PL.e2x @@ -16,7 +16,7 @@ my %tables = ( #### DO NOT EDIT BEYOND THIS POINT! my $enc2xs = '$_Enc2xs_'; WriteMakefile( - INC => "-I$_Inc_", + INC => "-I$_E2X_", #### END_OF_HEADER -- DO NOT EDIT THIS LINE BY HAND! #### NAME => 'Encode::'.$name, VERSION_FROM => "$name.pm", diff --git a/ext/Encode/Encode/_PM.e2x b/ext/Encode/Encode/_PM.e2x index 208b87ee08..eb59cd1b52 100644 --- a/ext/Encode/Encode/_PM.e2x +++ b/ext/Encode/Encode/_PM.e2x @@ -3,7 +3,7 @@ our $VERSION = "0.01"; use Encode; use XSLoader; -XSLoader::load('Encode::$_Name_', $VERSION); +XSLoader::load(__PACKAGE__,$VERSION); 1; __END__ diff --git a/ext/Encode/JP/JP.pm b/ext/Encode/JP/JP.pm index 10eb59b2d4..1a4d42e92b 100644 --- a/ext/Encode/JP/JP.pm +++ b/ext/Encode/JP/JP.pm @@ -5,10 +5,10 @@ BEGIN { } } use Encode; -our $VERSION = do { my @r = (q$Revision: 1.22 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.23 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use XSLoader; -XSLoader::load('Encode::JP',$VERSION); +XSLoader::load(__PACKAGE__,$VERSION); use Encode::JP::JIS7; diff --git a/ext/Encode/KR/KR.pm b/ext/Encode/KR/KR.pm index 662f6c09bf..f7c9a82fd3 100644 --- a/ext/Encode/KR/KR.pm +++ b/ext/Encode/KR/KR.pm @@ -4,14 +4,15 @@ BEGIN { die "Encode::KR not supported on EBCDIC\n"; } } -our $VERSION = do { my @r = (q$Revision: 1.20 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use Encode; use XSLoader; -XSLoader::load('Encode::KR',$VERSION); +XSLoader::load(__PACKAGE__,$VERSION); 1; __END__ + =head1 NAME Encode::KR - Korean Encodings diff --git a/ext/Encode/MANIFEST b/ext/Encode/MANIFEST index 22f12c6f74..499998b063 100644 --- a/ext/Encode/MANIFEST +++ b/ext/Encode/MANIFEST @@ -44,7 +44,6 @@ lib/Encode/JP/JIS7.pm Encode extension lib/Encode/KR/2022_KR.pm Encode extension lib/Encode/Supported.pod Documents supported encodings lib/Encode/Unicode.pm Encode extension -lib/Encode/XS.pm Encode extension t/Aliases.t Encode extension test t/CN.t Encode extension test t/Encode.t Encode extension test diff --git a/ext/Encode/Symbol/Symbol.pm b/ext/Encode/Symbol/Symbol.pm index 33ef710d0b..9aed69d7e6 100644 --- a/ext/Encode/Symbol/Symbol.pm +++ b/ext/Encode/Symbol/Symbol.pm @@ -1,9 +1,9 @@ package Encode::Symbol; use Encode; -our $VERSION = do { my @r = (q$Revision: 1.20 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use XSLoader; -XSLoader::load('Encode::Symbol',$VERSION); +XSLoader::load(__PACKAGE__,$VERSION); 1; __END__ diff --git a/ext/Encode/TW/TW.pm b/ext/Encode/TW/TW.pm index 46a4bfb273..294144aad2 100644 --- a/ext/Encode/TW/TW.pm +++ b/ext/Encode/TW/TW.pm @@ -4,11 +4,11 @@ BEGIN { die "Encode::TW not supported on EBCDIC\n"; } } -our $VERSION = do { my @r = (q$Revision: 1.20 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use Encode; use XSLoader; -XSLoader::load('Encode::TW',$VERSION); +XSLoader::load(__PACKAGE__,$VERSION); 1; __END__ diff --git a/ext/Encode/bin/enc2xs b/ext/Encode/bin/enc2xs index bc03b82091..10feaf8bfa 100644 --- a/ext/Encode/bin/enc2xs +++ b/ext/Encode/bin/enc2xs @@ -8,7 +8,7 @@ BEGIN { use strict; use Getopt::Std; my @orig_ARGV = @ARGV; -our $VERSION = do { my @r = (q$Revision: 1.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.22 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; # These may get re-ordered. # RAW is a do_now as inserted by &enter @@ -252,12 +252,16 @@ if ($doC) my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}}; output(\*C,$name.'_utf8',$e2u); output(\*C,'utf8_'.$name,$u2e); - push(@{$encoding{$name}},outstring(\*C,$e2u->{Cname}.'_def',$erep)); + # push(@{$encoding{$name}},outstring(\*C,$e2u->{Cname}.'_def',$erep)); } foreach my $enc (sort cmp_name keys %encoding) { - my ($e2u,$u2e,$rep,$min_el,$max_el,$rsym) = @{$encoding{$enc}}; - my @info = ($e2u->{Cname},$u2e->{Cname},$rsym,length($rep),$min_el,$max_el); + # my ($e2u,$u2e,$rep,$min_el,$max_el,$rsym) = @{$encoding{$enc}}; + my ($e2u,$u2e,$rep,$min_el,$max_el) = @{$encoding{$enc}}; + #my @info = ($e2u->{Cname},$u2e->{Cname},$rsym,length($rep),$min_el,$max_el); + my $replen = 0; + $replen++ while($rep =~ /\G\\x[0-9A-Fa-f]/g); + my @info = ($e2u->{Cname},$u2e->{Cname},qq("$rep"),$replen,$min_el,$max_el); my $sym = "${enc}_encoding"; $sym =~ s/\W+/_/g; print C "encode_t $sym = \n"; @@ -368,10 +372,12 @@ sub compile_ucm my $min_el; if (exists $attr{'subchar'}) { - my @byte; - $attr{'subchar'} =~ /^\s*/cg; - push(@byte,$1) while $attr{'subchar'} =~ /\G\\x([0-9a-f]+)/icg; - $erep = join('',map(chr(hex($_)),@byte)); + #my @byte; + #$attr{'subchar'} =~ /^\s*/cg; + #push(@byte,$1) while $attr{'subchar'} =~ /\G\\x([0-9a-f]+)/icg; + #$erep = join('',map(chr(hex($_)),@byte)); + $erep = $attr{'subchar'}; + $erep =~ s/^\s+//; $erep =~ s/\s+$//; } print "Reading $name ($cs)\n"; my $nfb = 0; @@ -838,11 +844,37 @@ use vars qw( $_Enc2xs $_Version $_Inc + $_E2X $_Name $_TableFiles $_Now ); +sub find_e2x{ + eval { require File::Find }; + my (@inc, %e2x_dir); + for my $inc (@INC){ + push @inc, $inc unless $inc eq '.'; #skip current dir + } + File::Find::find( + sub { + my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, + $atime,$mtime,$ctime,$blksize,$blocks) + = lstat($_) or return; + -f _ or return; + if (/^.*\.e2x$/o){ + $e2x_dir{$File::Find::dir} ||= $mtime; + } + return; + }, @inc); + warn join("\n", keys %e2x_dir), "\n"; + for my $d (sort {$e2x_dir{$a} <=> $e2x_dir{$b}} keys %e2x_dir){ + $_E2X = $d; + # warn "$_E2X => ", scalar localtime($e2x_dir{$d}); + return $_E2X; + } +} + sub make_makefile_pl { eval { require Encode; }; @@ -850,21 +882,22 @@ sub make_makefile_pl # our used for variable expanstion $_Enc2xs = $0; $_Version = $VERSION; - $_Inc = $INC{"Encode.pm"}; $_Inc =~ s/\.pm$//o; + $_E2X = find_e2x(); $_Name = shift; $_TableFiles = join(",", map {qq('$_')} @_); $_Now = scalar localtime(); + eval { require File::Spec; }; warn "Generating Makefile.PL\n"; - _print_expand(File::Spec->catfile($_Inc,"Makefile_PL.e2x"),"Makefile.PL"); + _print_expand(File::Spec->catfile($_E2X,"Makefile_PL.e2x"),"Makefile.PL"); warn "Generating $_Name.pm\n"; - _print_expand(File::Spec->catfile($_Inc,"_PM.e2x"), "$_Name.pm"); + _print_expand(File::Spec->catfile($_E2X,"_PM.e2x"), "$_Name.pm"); warn "Generating t/$_Name.t\n"; - _print_expand(File::Spec->catfile($_Inc,"_T.e2x"), "t/$_Name.t"); + _print_expand(File::Spec->catfile($_E2X,"_T.e2x"), "t/$_Name.t"); warn "Generating README\n"; - _print_expand(File::Spec->catfile($_Inc,"README.e2x"), "README"); + _print_expand(File::Spec->catfile($_E2X,"README.e2x"), "README"); warn "Generating t/$_Name.t\n"; - _print_expand(File::Spec->catfile($_Inc,"Changes.e2x"), "Changes"); + _print_expand(File::Spec->catfile($_E2X,"Changes.e2x"), "Changes"); exit; } @@ -897,8 +930,7 @@ sub make_configlocal_pm $Encode::Config::ExtModule{$enc} and next; my $mod = "Encode/$f"; $mod =~ s/\.pm$//o; $mod =~ s,/,::,og; - warn "$enc => $mod\n"; - $LocalMod{$enc} = $mod; + $LocalMod{$enc} ||= $mod; } } } @@ -907,10 +939,12 @@ sub make_configlocal_pm $_ModLines .= qq(\$Encode::ExtModule{'$enc'} =\t"$LocalMod{$enc}";\n); } + warn $_ModLines; $_LocalVer = _mkversion(); + $_E2X = find_e2x(); $_Inc = $INC{"Encode.pm"}; $_Inc =~ s/\.pm$//o; - warn "Writing Encode::ConfigLocal\n"; - _print_expand(File::Spec->catfile($_Inc,"ConfigLocal_PM.e2x"), + warn "Writing ", File::Spec->catfile($_Inc,"ConfigLocal.pm"), "\n"; + _print_expand(File::Spec->catfile($_E2X,"ConfigLocal_PM.e2x"), File::Spec->catfile($_Inc,"ConfigLocal.pm")); exit; } diff --git a/ext/Encode/bin/piconv b/ext/Encode/bin/piconv index b70a1a801f..3880dea288 100644 --- a/ext/Encode/bin/piconv +++ b/ext/Encode/bin/piconv @@ -1,5 +1,5 @@ #!./perl -# $Id: piconv,v 1.21 2002/04/09 20:06:15 dankogai Exp $ +# $Id: piconv,v 1.22 2002/04/16 23:35:00 dankogai Exp $ # use 5.7.3; use strict; @@ -9,7 +9,7 @@ my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio); use Getopt::Std; -my %Opt; getopts("hDS:lf:t:s:", \%Opt); +my %Opt; getopts("pcC:hDS:lf:t:s:", \%Opt); $Opt{h} and help(); $Opt{l} and list_encodings(); my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG}; @@ -18,6 +18,8 @@ my $from = $Opt{f} || $locale or help("from_encoding unspecified"); my $to = $Opt{t} || $locale or help("to_encoding unspecified"); $Opt{s} and Encode::from_to($Opt{s}, $from, $to) and print $Opt{s} and exit; my $scheme = exists $Scheme{$Opt{S}} ? $Opt{S} : 'from_to'; +$Opt{C} ||= $Opt{c}; +$Opt{p} and $Opt{C} = -1; if ($Opt{D}){ my $cfrom = Encode->getEncoding($from)->name; @@ -32,12 +34,12 @@ EOT # default if ($scheme eq 'from_to'){ while(<>){ - Encode::from_to($_, $from, $to); print; + Encode::from_to($_, $from, $to, $Opt{C}); print; }; # step-by-step }elsif ($scheme eq 'decode_encode'){ while(<>){ - my $decoded = decode($from, $_); + my $decoded = decode($from, $_, $Opt{C}); my $encoded = encode($to, $decoded); print $encoded; }; @@ -121,6 +123,19 @@ and common aliases work, like "latin1" for "ISO 8859-1", or "ibm850" instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported> for the full discussion. +=item -C I<N> + +Check the validity of the stream if I<N> = 1. When I<N> = -1, something +interesting happens when it encounters an invalid character. + +=item -c + +Same as C<-C 1>. + +=item -p + +Same as C<-C -1>. + =item -h Show usage. diff --git a/ext/Encode/encoding.pm b/ext/Encode/encoding.pm index d5b32c7b5b..fd8ae1abac 100644 --- a/ext/Encode/encoding.pm +++ b/ext/Encode/encoding.pm @@ -1,5 +1,5 @@ package encoding; -our $VERSION = do { my @r = (q$Revision: 1.26 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.28 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use Encode; use strict; @@ -11,6 +11,16 @@ BEGIN { } } +our $HAS_PERLIO_ENCODING; + +eval { require PerlIO::encoding; }; +if ($@){ + $HAS_PERLIO_ENCODING = 0; +}else{ + $HAS_PERLIO_ENCODING = 1; + binmode(STDIN); +} + sub import { my $class = shift; my $name = shift; @@ -24,9 +34,10 @@ sub import { } unless ($arg{Filter}){ ${^ENCODING} = $enc; # this is all you need, actually. + $HAS_PERLIO_ENCODING or return 1; for my $h (qw(STDIN STDOUT)){ if ($arg{$h}){ - unless (defined find_encoding($arg{h})) { + unless (defined find_encoding($arg{$h})) { require Carp; Carp::croak "Unknown encoding for $h, '$arg{$h}'"; } @@ -46,8 +57,8 @@ sub import { eval { require Filter::Util::Call ; Filter::Util::Call->import ; - binmode(STDIN, ":raw"); - binmode(STDOUT, ":raw"); + binmode(STDIN); + binmode(STDOUT); filter_add(sub{ my $status; if (($status = filter_read()) > 0){ @@ -65,8 +76,8 @@ sub import { sub unimport{ no warnings; undef ${^ENCODING}; - binmode(STDIN, ":raw"); - binmode(STDOUT, ":raw"); + binmode(STDIN); + binmode(STDOUT); if ($INC{"Filter/Util/Call.pm"}){ eval { filter_del() }; } diff --git a/ext/Encode/lib/Encode/CN/HZ.pm b/ext/Encode/lib/Encode/CN/HZ.pm index d9c261ef1f..c599928f6e 100644 --- a/ext/Encode/lib/Encode/CN/HZ.pm +++ b/ext/Encode/lib/Encode/CN/HZ.pm @@ -3,7 +3,7 @@ package Encode::CN::HZ; use strict; use vars qw($VERSION); -$VERSION = do { my @r = (q$Revision: 1.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +$VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use Encode (); use Encode::CN; @@ -64,8 +64,13 @@ sub encode no warnings 'utf8'; my $char = substr($str, $index, 1); - my $try = $gb->encode($char); # try to encode this character - + # try to encode this character + # with CHECK on so it stops at proper place. + # also note that the assignement was braced in eval + # -- dankogai + my $try; + eval{ $try = $gb->encode($char, 1) }; + if (defined($try)) { # is a GB character: if ($in_gb) { $out .= $try; # in GB mode - just append it diff --git a/ext/Encode/lib/Encode/Config.pm b/ext/Encode/lib/Encode/Config.pm index 34f7b18220..ff81c2a251 100644 --- a/ext/Encode/lib/Encode/Config.pm +++ b/ext/Encode/lib/Encode/Config.pm @@ -2,7 +2,7 @@ # Demand-load module list # package Encode::Config; -our $VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.3 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; use strict; @@ -150,3 +150,10 @@ while (my ($enc,$mod) = each %ExtModule){ } 1; +__END__ + +=head1 NAME + +Encode::Config -- internally used by Encode + +=cut diff --git a/ext/Encode/lib/Encode/KR/2022_KR.pm b/ext/Encode/lib/Encode/KR/2022_KR.pm index 4a3b1d086a..c71f0e480c 100644 --- a/ext/Encode/lib/Encode/KR/2022_KR.pm +++ b/ext/Encode/lib/Encode/KR/2022_KR.pm @@ -4,7 +4,7 @@ use base 'Encode::Encoding'; use strict; -our $VERSION = do { my @r = (q$Revision: 1.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; my $canon = 'iso-2022-kr'; @@ -64,3 +64,9 @@ sub euc_iso{ 1; __END__ + +=head1 NAME + +Encode::KR::2022_KR -- internally used by Encode::KR + +=cut diff --git a/ext/Encode/lib/Encode/XS.pm b/ext/Encode/lib/Encode/XS.pm deleted file mode 100644 index 368ab0c561..0000000000 --- a/ext/Encode/lib/Encode/XS.pm +++ /dev/null @@ -1,13 +0,0 @@ -package Encode::XS; -use strict; -our $VERSION = do { my @r = (q$Revision: 1.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; - -use base 'Encode::Encoding'; -1; -__END__ - -=head1 NAME - -Encode::XS -- for internal use only - -=cut diff --git a/ext/Encode/t/CN.t b/ext/Encode/t/CN.t index 749f9139c5..893c29fa6d 100644 --- a/ext/Encode/t/CN.t +++ b/ext/Encode/t/CN.t @@ -8,10 +8,6 @@ BEGIN { print "1..0 # Skip: Encode was not built\n"; exit 0; } - unless (find PerlIO::Layer 'perlio') { - print "1..0 # Skip: PerlIO was not built\n"; - exit 0; - } if (ord("A") == 193) { print "1..0 # Skip: EBCDIC\n"; exit 0; diff --git a/ext/Encode/t/Encoder.t b/ext/Encode/t/Encoder.t index 22c240be45..af83cf60cf 100644 --- a/ext/Encode/t/Encoder.t +++ b/ext/Encode/t/Encoder.t @@ -1,5 +1,5 @@ # -# $Id: Encoder.t,v 1.2 2002/04/10 22:28:40 dankogai Exp $ +# $Id: Encoder.t,v 1.3 2002/04/16 23:35:00 dankogai Exp $ # BEGIN { @@ -8,16 +8,6 @@ BEGIN { print "1..0 # Skip: Encode was not built\n"; exit 0; } -# should work without perlio -# unless (find PerlIO::Layer 'perlio') { -# print "1..0 # Skip: PerlIO was not built\n"; -# exit 0; -# } -# should work on EBCDIC -# if (ord("A") == 193) { -# print "1..0 # Skip: EBCDIC\n"; -# exit 0; -# } $| = 1; } diff --git a/ext/Encode/t/JP.t b/ext/Encode/t/JP.t index c9b1dde332..89238b58d0 100644 --- a/ext/Encode/t/JP.t +++ b/ext/Encode/t/JP.t @@ -8,10 +8,6 @@ BEGIN { print "1..0 # Skip: Encode was not built\n"; exit 0; } - unless (find PerlIO::Layer 'perlio') { - print "1..0 # Skip: PerlIO was not built\n"; - exit 0; - } if (ord("A") == 193) { print "1..0 # Skip: EBCDIC\n"; exit 0; @@ -77,47 +73,51 @@ ok(compare($euc,$rnd) == 0); is($enc->name,'euc-jp'); -print "# src :encoding test\n"; -open($src,"<encoding(euc-jp)",$euc) || die "Cannot open $euc:$!"; -binmode($src); -ok(defined($src) && fileno($src)); -open($dst,">:utf8",$utf) || die "Cannot open $utf:$!"; -binmode($dst); -ok(defined($dst) || fileno($dst)); -my $out = select($dst); -while (<$src>) - { - print; - } -close($dst); -close($src); - -TODO: -{ - local $TODO = 'needs debugging on VMS' if $^O eq 'VMS'; - ok(compare($utf,$ref) == 0); +my $skip_perlio; +eval { require PerlIO::encoding; }; +if ($@){ + $skip_perlio = 1; +}else{ + $skip_perlio = 0; + binmode(STDIN); } -select($out); -SKIP: -{ - #skip "Multi-byte write is broken",3; - print "# dst :encoding test\n"; - open($src,"<:utf8",$ref) || die "Cannot open $ref:$!"; - binmode($src); - ok(defined($src) || fileno($src)); - open($dst,">encoding(euc-jp)",$rnd) || die "Cannot open $rnd:$!"; - binmode($dst); - ok(defined($dst) || fileno($dst)); - my $out = select($dst); - while (<$src>) - { - print; - } - close($dst); - close($src); - ok(compare($euc,$rnd) == 0); - select($out); +$skip_perlio ||= (@ARGV and shift eq 'perlio'); + +SKIP: { + skip "PerlIO Encoding Needed", 6 if $skip_perlio; + print "# src :encoding test\n"; + open($src,"<encoding(euc-jp)",$euc) || die "Cannot open $euc:$!"; + binmode($src); + ok(defined($src) && fileno($src)); + open($dst,">:utf8",$utf) || die "Cannot open $utf:$!"; + binmode($dst); + ok(defined($dst) || fileno($dst)); + my $out = select($dst); + while (<$src>){ print; } + close($dst); + close($src); + + TODO: + { + local $TODO = 'needs debugging on VMS' if $^O eq 'VMS'; + ok(compare($utf,$ref) == 0); + } + select($out); + + print "# dst :encoding test\n"; + open($src,"<:utf8",$ref) || die "Cannot open $ref:$!"; + binmode($src); + ok(defined($src) || fileno($src)); + open($dst,">encoding(euc-jp)",$rnd) || die "Cannot open $rnd:$!"; + binmode($dst); + ok(defined($dst) || fileno($dst)); + $out = select($dst); + while (<$src>) { print; } + close($dst); + close($src); + ok(compare($euc,$rnd) == 0); + select($out); } is($enc->name,'euc-jp'); diff --git a/ext/Encode/t/KR.t b/ext/Encode/t/KR.t index fd1c503bad..e42271b4fc 100644 --- a/ext/Encode/t/KR.t +++ b/ext/Encode/t/KR.t @@ -77,47 +77,51 @@ ok(compare($euc,$rnd) == 0); is($enc->name,'euc-kr'); -print "# src :encoding test\n"; -open($src,"<encoding(euc-kr)",$euc) || die "Cannot open $euc:$!"; -binmode($src); -ok(defined($src) && fileno($src)); -open($dst,">:utf8",$utf) || die "Cannot open $utf:$!"; -binmode($dst); -ok(defined($dst) || fileno($dst)); -my $out = select($dst); -while (<$src>) - { - print; - } -close($dst); -close($src); - -TODO: -{ - local $TODO = 'needs debugging on VMS' if $^O eq 'VMS'; - ok(compare($utf,$ref) == 0); +my $skip_perlio; +eval { require PerlIO::encoding; }; +if ($@){ + $skip_perlio = 1; +}else{ + $skip_perlio = 0; + binmode(STDIN); } -select($out); -SKIP: -{ - #skip "Multi-byte write is broken",3; - print "# dst :encoding test\n"; - open($src,"<:utf8",$ref) || die "Cannot open $ref:$!"; - binmode($src); - ok(defined($src) || fileno($src)); - open($dst,">encoding(euc-kr)",$rnd) || die "Cannot open $rnd:$!"; - binmode($dst); - ok(defined($dst) || fileno($dst)); - my $out = select($dst); - while (<$src>) - { - print; - } - close($dst); - close($src); - ok(compare($euc,$rnd) == 0); - select($out); +$skip_perlio ||= (@ARGV and shift eq 'perlio'); + +SKIP: { + skip "PerlIO Encoding Needed", 6 if $skip_perlio; + print "# src :encoding test\n"; + open($src,"<encoding(euc-kr)",$euc) || die "Cannot open $euc:$!"; + binmode($src); + ok(defined($src) && fileno($src)); + open($dst,">:utf8",$utf) || die "Cannot open $utf:$!"; + binmode($dst); + ok(defined($dst) || fileno($dst)); + my $out = select($dst); + while (<$src>) { print; } + close($dst); + close($src); + + TODO: + { + local $TODO = 'needs debugging on VMS' if $^O eq 'VMS'; + ok(compare($utf,$ref) == 0); + } + select($out); + + print "# dst :encoding test\n"; + open($src,"<:utf8",$ref) || die "Cannot open $ref:$!"; + binmode($src); + ok(defined($src) || fileno($src)); + open($dst,">encoding(euc-kr)",$rnd) || die "Cannot open $rnd:$!"; + binmode($dst); + ok(defined($dst) || fileno($dst)); + $out = select($dst); + while (<$src>) { print; } + close($dst); + close($src); + ok(compare($euc,$rnd) == 0); + select($out); } is($enc->name,'euc-kr'); diff --git a/ext/Encode/t/TW.t b/ext/Encode/t/TW.t index a51017abd5..5ce2c41156 100644 --- a/ext/Encode/t/TW.t +++ b/ext/Encode/t/TW.t @@ -8,10 +8,6 @@ BEGIN { print "1..0 # Skip: Encode was not built\n"; exit 0; } - unless (find PerlIO::Layer 'perlio') { - print "1..0 # Skip: PerlIO was not built\n"; - exit 0; - } if (ord("A") == 193) { print "1..0 # Skip: EBCDIC\n"; exit 0; diff --git a/ext/Encode/t/Unicode.t b/ext/Encode/t/Unicode.t index 02eac86d44..bc15aaf216 100644 --- a/ext/Encode/t/Unicode.t +++ b/ext/Encode/t/Unicode.t @@ -1,5 +1,5 @@ # -# $Id: Unicode.t,v 1.7 2002/04/14 22:05:20 dankogai Exp $ +# $Id: Unicode.t,v 1.8 2002/04/16 23:35:00 dankogai Exp $ # # This script is written entirely in ASCII, even though quoted literals # do include non-BMP unicode characters -- Are you happy, jhi? @@ -12,17 +12,6 @@ BEGIN { print "1..0 # Skip: Encode was not built\n"; exit 0; } -# should work without perlio -# unless (find PerlIO::Layer 'perlio') { -# print "1..0 # Skip: PerlIO was not built\n"; -# exit 0; -# } - -# should work on EBCDIC -# if (ord("A") == 193) { -# print "1..0 # Skip: EBCDIC\n"; -# exit 0; -# } $ON_EBCDIC = (ord("A") == 193) || $ARGV[0]; $| = 1; } diff --git a/ext/Encode/t/encoding.t b/ext/Encode/t/encoding.t index 85127ffec0..a51bb669f3 100644 --- a/ext/Encode/t/encoding.t +++ b/ext/Encode/t/encoding.t @@ -4,10 +4,6 @@ BEGIN { print "1..0 # Skip: Encode was not built\n"; exit 0; } - unless (find PerlIO::Layer 'perlio') { - print "1..0 # Skip: PerlIO was not built\n"; - exit 0; - } if (ord("A") == 193) { print "1..0 # encoding pragma does not support EBCDIC platforms\n"; exit(0); |