summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MANIFEST45
-rw-r--r--ext/Encode/Byte/Byte.pm4
-rw-r--r--ext/Encode/CN/CN.pm5
-rw-r--r--ext/Encode/Changes45
-rw-r--r--ext/Encode/EBCDIC/EBCDIC.pm4
-rw-r--r--ext/Encode/Encode.pm18
-rw-r--r--ext/Encode/Encode.xs546
-rw-r--r--ext/Encode/Encode/Makefile_PL.e2x2
-rw-r--r--ext/Encode/Encode/_PM.e2x2
-rw-r--r--ext/Encode/JP/JP.pm4
-rw-r--r--ext/Encode/KR/KR.pm5
-rw-r--r--ext/Encode/MANIFEST1
-rw-r--r--ext/Encode/Symbol/Symbol.pm4
-rw-r--r--ext/Encode/TW/TW.pm4
-rw-r--r--ext/Encode/bin/enc2xs70
-rw-r--r--ext/Encode/bin/piconv23
-rw-r--r--ext/Encode/encoding.pm23
-rw-r--r--ext/Encode/lib/Encode/CN/HZ.pm11
-rw-r--r--ext/Encode/lib/Encode/Config.pm9
-rw-r--r--ext/Encode/lib/Encode/KR/2022_KR.pm8
-rw-r--r--ext/Encode/lib/Encode/XS.pm13
-rw-r--r--ext/Encode/t/CN.t4
-rw-r--r--ext/Encode/t/Encoder.t12
-rw-r--r--ext/Encode/t/JP.t86
-rw-r--r--ext/Encode/t/KR.t82
-rw-r--r--ext/Encode/t/TW.t4
-rw-r--r--ext/Encode/t/Unicode.t13
-rw-r--r--ext/Encode/t/encoding.t4
28 files changed, 352 insertions, 699 deletions
diff --git a/MANIFEST b/MANIFEST
index 4594ce474c..bbd35b6b0d 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -197,33 +197,40 @@ ext/DynaLoader/Makefile.PL Dynamic Loader makefile writer
ext/DynaLoader/README Dynamic Loader notes and intro
ext/DynaLoader/XSLoader_pm.PL Simple XS Loader perl module
ext/Encode/AUTHORS List of authors
-ext/Encode/bin/enc2xs Encode module generator
-ext/Encode/bin/piconv iconv by perl
-ext/Encode/bin/ucm2table Table Generator for testing
-ext/Encode/bin/ucmlint A UCM Lint utility
-ext/Encode/bin/unidump Unicode Dump like hexdump(1)
ext/Encode/Byte/Byte.pm Encode extension
ext/Encode/Byte/Makefile.PL Encode extension
-ext/Encode/Changes Change Log
ext/Encode/CN/CN.pm Encode extension
ext/Encode/CN/Makefile.PL Encode extension
+ext/Encode/Changes Change Log
ext/Encode/EBCDIC/EBCDIC.pm Encode extension
ext/Encode/EBCDIC/Makefile.PL Encode extension
-ext/Encode/encengine.c Encode extension
ext/Encode/Encode.pm Mother of all Encode extensions
ext/Encode/Encode.xs Encode extension
ext/Encode/Encode/Changes.e2x Skeleton file for enc2xs
ext/Encode/Encode/ConfigLocal_PM.e2x Skeleton file for enc2xs
-ext/Encode/Encode/encode.h Encode extension header file
ext/Encode/Encode/Makefile_PL.e2x Skeleton file for enc2xs
ext/Encode/Encode/README.e2x Skeleton file for enc2xs
ext/Encode/Encode/_PM.e2x Skeleton file for enc2xs
ext/Encode/Encode/_T.e2x Skeleton file for enc2xs
-ext/Encode/encoding.pm Perl Pragmactic Module
+ext/Encode/Encode/encode.h Encode extension header file
ext/Encode/JP/JP.pm Encode extension
ext/Encode/JP/Makefile.PL Encode extension
ext/Encode/KR/KR.pm Encode extension
ext/Encode/KR/Makefile.PL Encode extension
+ext/Encode/MANIFEST Encode extension
+ext/Encode/Makefile.PL Encode extension makefile writer
+ext/Encode/README Encode extension
+ext/Encode/Symbol/Makefile.PL Encode extension
+ext/Encode/Symbol/Symbol.pm Encode extension
+ext/Encode/TW/Makefile.PL Encode extension
+ext/Encode/TW/TW.pm Encode extension
+ext/Encode/bin/enc2xs Encode module generator
+ext/Encode/bin/piconv iconv by perl
+ext/Encode/bin/ucm2table Table Generator for testing
+ext/Encode/bin/ucmlint A UCM Lint utility
+ext/Encode/bin/unidump Unicode Dump like hexdump(1)
+ext/Encode/encengine.c Encode extension
+ext/Encode/encoding.pm Perl Pragmactic Module
ext/Encode/lib/Encode/Alias.pm Encode extension
ext/Encode/lib/Encode/CJKConstants.pm Encode extension
ext/Encode/lib/Encode/CN/HZ.pm Encode extension
@@ -235,17 +242,15 @@ ext/Encode/lib/Encode/JP/JIS7.pm Encode extension
ext/Encode/lib/Encode/KR/2022_KR.pm Encode extension
ext/Encode/lib/Encode/Supported.pod Documents supported encodings
ext/Encode/lib/Encode/Unicode.pm Encode extension
-ext/Encode/lib/Encode/XS.pm Encode extension
-ext/Encode/Makefile.PL Encode extension makefile writer
-ext/Encode/MANIFEST Encode extension
-ext/Encode/README Encode extension
-ext/Encode/Symbol/Makefile.PL Encode extension
-ext/Encode/Symbol/Symbol.pm Encode extension
ext/Encode/t/Aliases.t Encode extension test
-ext/Encode/t/bogus.ucm Sample data for ucmlint
ext/Encode/t/CN.t Encode extension test
ext/Encode/t/Encode.t Encode extension test
ext/Encode/t/Encoder.t Encode::Encoder test
+ext/Encode/t/JP.t Encode extension test
+ext/Encode/t/KR.t Encode extension test
+ext/Encode/t/TW.t Encode extension test
+ext/Encode/t/Unicode.t Encode extension test
+ext/Encode/t/bogus.ucm Sample data for ucmlint
ext/Encode/t/encoding.t encoding extension test
ext/Encode/t/gb2312.euc test data
ext/Encode/t/gb2312.ref test data
@@ -254,16 +259,10 @@ ext/Encode/t/jisx0208.euc test data
ext/Encode/t/jisx0208.ref test data
ext/Encode/t/jisx0212.euc test data
ext/Encode/t/jisx0212.ref test data
-ext/Encode/t/JP.t Encode extension test
ext/Encode/t/jperl.t encoding extension test
-ext/Encode/t/KR.t Encode extension test
ext/Encode/t/ksc5601.euc test data
ext/Encode/t/ksc5601.ref test data
-ext/Encode/t/TW.t Encode extension test
ext/Encode/t/unibench.pl Unicode benchmark
-ext/Encode/t/Unicode.t Encode extension test
-ext/Encode/TW/Makefile.PL Encode extension
-ext/Encode/TW/TW.pm Encode extension
ext/Encode/ucm/8859-1.ucm Unicode Character Map
ext/Encode/ucm/8859-10.ucm Unicode Character Map
ext/Encode/ucm/8859-11.ucm Unicode Character Map
@@ -352,9 +351,9 @@ ext/Encode/ucm/macHebrew.ucm Unicode Character Map
ext/Encode/ucm/macIceland.ucm Unicode Character Map
ext/Encode/ucm/macJapanese.ucm Unicode Character Map
ext/Encode/ucm/macKorean.ucm Unicode Character Map
-ext/Encode/ucm/macRoman.ucm Unicode Character Map
ext/Encode/ucm/macROMnn.ucm Unicode Character Map
ext/Encode/ucm/macRUMnn.ucm Unicode Character Map
+ext/Encode/ucm/macRoman.ucm Unicode Character Map
ext/Encode/ucm/macSami.ucm Unicode Character Map
ext/Encode/ucm/macSymbol.ucm Unicode Character Map
ext/Encode/ucm/macThai.ucm Unicode Character Map
diff --git a/ext/Encode/Byte/Byte.pm b/ext/Encode/Byte/Byte.pm
index a163c92e1d..e570505f8e 100644
--- a/ext/Encode/Byte/Byte.pm
+++ b/ext/Encode/Byte/Byte.pm
@@ -1,9 +1,9 @@
package Encode::Byte;
use Encode;
-our $VERSION = do { my @r = (q$Revision: 1.20 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use XSLoader;
-XSLoader::load('Encode::Byte',$VERSION);
+XSLoader::load(__PACKAGE__,$VERSION);
1;
__END__
diff --git a/ext/Encode/CN/CN.pm b/ext/Encode/CN/CN.pm
index 2cdf969db9..5952cab69b 100644
--- a/ext/Encode/CN/CN.pm
+++ b/ext/Encode/CN/CN.pm
@@ -4,12 +4,11 @@ BEGIN {
die "Encode::CN not supported on EBCDIC\n";
}
}
-our $VERSION = do { my @r = (q$Revision: 1.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.22 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use Encode;
-use Encode::CN::HZ;
use XSLoader;
-XSLoader::load('Encode::CN',$VERSION);
+XSLoader::load(__PACKAGE__,$VERSION);
# Relocated from Encode.pm
diff --git a/ext/Encode/Changes b/ext/Encode/Changes
index 88023bf851..06cc9b6d4c 100644
--- a/ext/Encode/Changes
+++ b/ext/Encode/Changes
@@ -1,9 +1,48 @@
# Revision history for Perl extension Encode.
#
-# $Id: Changes,v 1.40 2002/04/14 22:27:14 dankogai Exp $
+# $Id: Changes,v 1.42 2002/04/17 03:01:20 dankogai Exp dankogai $
#
-1.40 $Date: 2002/04/14 22:27:14 $
+1.42 $Date: 2002/04/17 03:01:20 $
+- lib/Encode/XS.pm
+ no-op module; Thought of adding a pod there but enc2xs has
+ one so gone.
+! encoding.pm
+! t/JP.pm
+! t/KR.pm
+ correct mechanism to detect Perlio::encoding layar installed.
+! Encode.xs
+ PerlIO Layer detached.
+
+1.41 2002/04/16 23:35:00
+! encoding.pm
+ binmode(STDIN|STDOUT ...) done iff PerlIO is available
+! t/*.t
+ Cleaned up PerlIO skip conditions to prepare for the upcoming
+ Encode - PerlIO forking.
+! Encode.pm
+ exported functions are now prototyped.
+! lib/Encode/CN/HZ.pm
+! bin/enc2xs
+! Encode.xs
+ fallback implemented # was /* FIXME */
+ affected programs revised to fit (only HZ was using the try-catch
+ approach which needed to be fixed for API-compliance).
+! Encode/Config.pm
+! Encode/KR/2022_KR.pm
+! Encode/KR/KR.pm
+ can find =head1 NAME now, jhi
+ Message-Id: <20020416083059.V30639@alpha.hut.fi>
+! encoding.pm
+ s/\{h\}/{$h}/g ;)
+! Encode.xs
+ now complies with less warnings with the pickest compilers.
+ Suggested by Craig, fixed by Dan.
+ ! Encode/Makefile_PL.e2x
+! bin/enc2xs
+ A bug that fails to find *.e2x in certain conditions fixed
+
+1.40 2002/04/14 22:27:14
+ Encode/ConfigLocal_PM.e2x
! lib/Encode/Config.pm
! bin/enc2xs
@@ -296,7 +335,7 @@
Typo fixes and improvements by jhi
Message-Id: <200204010201.FAA03564@alpha.hut.fi>, et al.
-1.11 $Date: 2002/04/14 22:27:14 $
+1.11 $Date: 2002/04/17 03:01:20 $
+ t/encoding.t
+ t/jperl.t
! MANIFEST
diff --git a/ext/Encode/EBCDIC/EBCDIC.pm b/ext/Encode/EBCDIC/EBCDIC.pm
index 92a17561f0..4eb674a753 100644
--- a/ext/Encode/EBCDIC/EBCDIC.pm
+++ b/ext/Encode/EBCDIC/EBCDIC.pm
@@ -1,9 +1,9 @@
package Encode::EBCDIC;
use Encode;
-our $VERSION = do { my @r = (q$Revision: 1.20 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use XSLoader;
-XSLoader::load('Encode::EBCDIC',$VERSION);
+XSLoader::load(__PACKAGE__,$VERSION);
1;
__END__
diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm
index e6a2048051..3dd63a8800 100644
--- a/ext/Encode/Encode.pm
+++ b/ext/Encode/Encode.pm
@@ -1,6 +1,6 @@
package Encode;
use strict;
-our $VERSION = do { my @r = (q$Revision: 1.40 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.42 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
our $DEBUG = 0;
require DynaLoader;
@@ -126,9 +126,10 @@ sub resolve_alias {
return;
}
-sub encode
+sub encode($$;$)
{
my ($name,$string,$check) = @_;
+ $check ||=0;
my $enc = find_encoding($name);
croak("Unknown encoding '$name'") unless defined $enc;
my $octets = $enc->encode($string,$check);
@@ -136,9 +137,10 @@ sub encode
return $octets;
}
-sub decode
+sub decode($$;$)
{
my ($name,$octets,$check) = @_;
+ $check ||=0;
my $enc = find_encoding($name);
croak("Unknown encoding '$name'") unless defined $enc;
my $string = $enc->decode($octets,$check);
@@ -146,9 +148,10 @@ sub decode
return $string;
}
-sub from_to
+sub from_to($$$;$)
{
my ($string,$from,$to,$check) = @_;
+ $check ||=0;
my $f = find_encoding($from);
croak("Unknown encoding '$from'") unless defined $f;
my $t = find_encoding($to);
@@ -160,14 +163,14 @@ sub from_to
return defined($_[0] = $string) ? length($string) : undef ;
}
-sub encode_utf8
+sub encode_utf8($)
{
my ($str) = @_;
utf8::encode($str);
return $str;
}
-sub decode_utf8
+sub decode_utf8($)
{
my ($str) = @_;
return undef unless utf8::decode($str);
@@ -249,7 +252,8 @@ sub predefine_encodings{
}
require Encode::Encoding;
-require Encode::XS;
+
+eval { require PerlIO::encoding };
1;
diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs
index 229359e7f2..9c30c4a9d7 100644
--- a/ext/Encode/Encode.xs
+++ b/ext/Encode/Encode.xs
@@ -7,15 +7,14 @@
#include "def_t.h"
#define FBCHAR 0xFFFd
+#define FBCHAR_UTF8 "\xEF\xBF\xBD"
#define BOM_BE 0xFeFF
#define BOM16LE 0xFFFe
#define BOM32LE 0xFFFe0000
-
-#define valid_ucs2(x) ((0 <= (x) && (x) < 0xD800) || (0xDFFF < (x) && (x) <= 0xFFFF))
-
#define issurrogate(x) (0xD800 <= (x) && (x) <= 0xDFFF )
#define isHiSurrogate(x) (0xD800 <= (x) && (x) < 0xDC00 )
#define isLoSurrogate(x) (0xDC00 <= (x) && (x) <= 0xDFFF )
+#define invalid_ucs2(x) ( issurrogate(x) || 0xFFFF < (x) )
static UV
enc_unpack(pTHX_ U8 **sp,U8 *e,STRLEN size,U8 endian)
@@ -53,7 +52,7 @@ enc_unpack(pTHX_ U8 **sp,U8 *e,STRLEN size,U8 endian)
void
enc_pack(pTHX_ SV *result,STRLEN size,U8 endian,UV value)
{
- U8 *d = SvGROW(result,SvCUR(result)+size);
+ U8 *d = (U8 *)SvGROW(result,SvCUR(result)+size);
switch(endian) {
case 'v':
case 'V':
@@ -93,452 +92,6 @@ enc_pack(pTHX_ SV *result,STRLEN size,U8 endian,UV value)
}
UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
-#if defined(USE_PERLIO) && !defined(USE_SFIO)
-/* Define an encoding "layer" in the perliol.h sense.
- The layer defined here "inherits" in an object-oriented sense from the
- "perlio" layer with its PerlIOBuf_* "methods".
- The implementation is particularly efficient as until Encode settles down
- there is no point in tryint to tune it.
-
- The layer works by overloading the "fill" and "flush" methods.
-
- "fill" calls "SUPER::fill" in perl terms, then calls the encode OO perl API
- to convert the encoded data to UTF-8 form, then copies it back to the
- buffer. The "base class's" read methods then see the UTF-8 data.
-
- "flush" transforms the UTF-8 data deposited by the "base class's write
- method in the buffer back into the encoded form using the encode OO perl API,
- then copies data back into the buffer and calls "SUPER::flush.
-
- Note that "flush" is _also_ called for read mode - we still do the (back)-translate
- so that the the base class's "flush" sees the correct number of encoded chars
- for positioning the seek pointer. (This double translation is the worst performance
- issue - particularly with all-perl encode engine.)
-
-*/
-#include "perliol.h"
-typedef struct {
- PerlIOBuf base; /* PerlIOBuf stuff */
- SV *bufsv; /* buffer seen by layers above */
- SV *dataSV; /* data we have read from layer below */
- SV *enc; /* the encoding object */
-} PerlIOEncode;
-
-SV *
-PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
-{
- PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
- SV *sv = &PL_sv_undef;
- if (e->enc) {
- dSP;
- ENTER;
- SAVETMPS;
- PUSHMARK(sp);
- XPUSHs(e->enc);
- PUTBACK;
- if (perl_call_method("name", G_SCALAR) == 1) {
- SPAGAIN;
- sv = newSVsv(POPs);
- PUTBACK;
- }
- }
- return sv;
-}
-
-IV
-PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
-{
- PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
- dSP;
- IV code;
- code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
- ENTER;
- SAVETMPS;
- PUSHMARK(sp);
- XPUSHs(arg);
- PUTBACK;
- if (perl_call_pv("Encode::find_encoding", G_SCALAR) != 1) {
- /* should never happen */
- Perl_die(aTHX_ "Encode::find_encoding did not return a value");
- return -1;
- }
- SPAGAIN;
- e->enc = POPs;
- PUTBACK;
- if (!SvROK(e->enc)) {
- e->enc = Nullsv;
- errno = EINVAL;
- Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
- arg);
- code = -1;
- }
- else {
- SvREFCNT_inc(e->enc);
- PerlIOBase(f)->flags |= PERLIO_F_UTF8;
- }
- FREETMPS;
- LEAVE;
- return code;
-}
-
-IV
-PerlIOEncode_popped(pTHX_ PerlIO * f)
-{
- PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
- if (e->enc) {
- SvREFCNT_dec(e->enc);
- e->enc = Nullsv;
- }
- if (e->bufsv) {
- SvREFCNT_dec(e->bufsv);
- e->bufsv = Nullsv;
- }
- if (e->dataSV) {
- SvREFCNT_dec(e->dataSV);
- e->dataSV = Nullsv;
- }
- return 0;
-}
-
-STDCHAR *
-PerlIOEncode_get_base(pTHX_ PerlIO * f)
-{
- PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
- if (!e->base.bufsiz)
- e->base.bufsiz = 1024;
- if (!e->bufsv) {
- e->bufsv = newSV(e->base.bufsiz);
- sv_setpvn(e->bufsv, "", 0);
- }
- e->base.buf = (STDCHAR *) SvPVX(e->bufsv);
- if (!e->base.ptr)
- e->base.ptr = e->base.buf;
- if (!e->base.end)
- e->base.end = e->base.buf;
- if (e->base.ptr < e->base.buf
- || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
- Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
- e->base.buf + SvLEN(e->bufsv));
- abort();
- }
- if (SvLEN(e->bufsv) < e->base.bufsiz) {
- SSize_t poff = e->base.ptr - e->base.buf;
- SSize_t eoff = e->base.end - e->base.buf;
- e->base.buf = (STDCHAR *) SvGROW(e->bufsv, e->base.bufsiz);
- e->base.ptr = e->base.buf + poff;
- e->base.end = e->base.buf + eoff;
- }
- if (e->base.ptr < e->base.buf
- || e->base.ptr > e->base.buf + SvLEN(e->bufsv)) {
- Perl_warn(aTHX_ " ptr %p(%p)%p", e->base.buf, e->base.ptr,
- e->base.buf + SvLEN(e->bufsv));
- abort();
- }
- return e->base.buf;
-}
-
-IV
-PerlIOEncode_fill(pTHX_ PerlIO * f)
-{
- PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
- dSP;
- IV code = 0;
- PerlIO *n;
- SSize_t avail;
- if (PerlIO_flush(f) != 0)
- return -1;
- n = PerlIONext(f);
- if (!PerlIO_fast_gets(n)) {
- /* Things get too messy if we don't have a buffer layer
- push a :perlio to do the job */
- char mode[8];
- n = PerlIO_push(aTHX_ n, &PerlIO_perlio, PerlIO_modestr(f,mode), Nullsv);
- if (!n) {
- Perl_die(aTHX_ "panic: cannot push :perlio for %p",f);
- }
- }
- ENTER;
- SAVETMPS;
- retry:
- avail = PerlIO_get_cnt(n);
- if (avail <= 0) {
- avail = PerlIO_fill(n);
- if (avail == 0) {
- avail = PerlIO_get_cnt(n);
- }
- else {
- if (!PerlIO_error(n) && PerlIO_eof(n))
- avail = 0;
- }
- }
- if (avail > 0) {
- STDCHAR *ptr = PerlIO_get_ptr(n);
- SSize_t use = avail;
- SV *uni;
- char *s;
- STRLEN len = 0;
- e->base.ptr = e->base.end = (STDCHAR *) Nullch;
- (void) PerlIOEncode_get_base(aTHX_ f);
- if (!e->dataSV)
- e->dataSV = newSV(0);
- if (SvTYPE(e->dataSV) < SVt_PV) {
- sv_upgrade(e->dataSV,SVt_PV);
- }
- if (SvCUR(e->dataSV)) {
- /* something left over from last time - create a normal
- SV with new data appended
- */
- if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
- use = e->base.bufsiz - SvCUR(e->dataSV);
- }
- sv_catpvn(e->dataSV,(char*)ptr,use);
- }
- else {
- /* Create a "dummy" SV to represent the available data from layer below */
- if (SvLEN(e->dataSV) && SvPVX(e->dataSV)) {
- Safefree(SvPVX(e->dataSV));
- }
- if (use > e->base.bufsiz) {
- use = e->base.bufsiz;
- }
- SvPVX(e->dataSV) = (char *) ptr;
- SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */
- SvCUR_set(e->dataSV,use);
- SvPOK_only(e->dataSV);
- }
- SvUTF8_off(e->dataSV);
- PUSHMARK(sp);
- XPUSHs(e->enc);
- XPUSHs(e->dataSV);
- XPUSHs(&PL_sv_yes);
- PUTBACK;
- if (perl_call_method("decode", G_SCALAR) != 1) {
- Perl_die(aTHX_ "panic: decode did not return a value");
- }
- SPAGAIN;
- uni = POPs;
- PUTBACK;
- /* Now get translated string (forced to UTF-8) and use as buffer */
- if (SvPOK(uni)) {
- s = SvPVutf8(uni, len);
-#ifdef PARANOID_ENCODE_CHECKS
- if (len && !is_utf8_string((U8*)s,len)) {
- Perl_warn(aTHX_ "panic: decode did not return UTF-8 '%.*s'",(int) len,s);
- }
-#endif
- }
- if (len > 0) {
- /* Got _something */
- /* if decode gave us back dataSV then data may vanish when
- we do ptrcnt adjust - so take our copy now.
- (The copy is a pain - need a put-it-here option for decode.)
- */
- sv_setpvn(e->bufsv,s,len);
- e->base.ptr = e->base.buf = (STDCHAR*)SvPVX(e->bufsv);
- e->base.end = e->base.ptr + SvCUR(e->bufsv);
- PerlIOBase(f)->flags |= PERLIO_F_RDBUF;
- SvUTF8_on(e->bufsv);
-
- /* Adjust ptr/cnt not taking anything which
- did not translate - not clear this is a win */
- /* compute amount we took */
- use -= SvCUR(e->dataSV);
- PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
- /* and as we did not take it it isn't pending */
- SvCUR_set(e->dataSV,0);
- } else {
- /* Got nothing - assume partial character so we need some more */
- /* Make sure e->dataSV is a normal SV before re-filling as
- buffer alias will change under us
- */
- s = SvPV(e->dataSV,len);
- sv_setpvn(e->dataSV,s,len);
- PerlIO_set_ptrcnt(n, ptr+use, (avail-use));
- goto retry;
- }
- FREETMPS;
- LEAVE;
- return code;
- }
- else {
- if (avail == 0)
- PerlIOBase(f)->flags |= PERLIO_F_EOF;
- else
- PerlIOBase(f)->flags |= PERLIO_F_ERROR;
- return -1;
- }
-}
-
-IV
-PerlIOEncode_flush(pTHX_ PerlIO * f)
-{
- PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
- IV code = 0;
- if (e->bufsv && (e->base.ptr > e->base.buf)) {
- dSP;
- SV *str;
- char *s;
- STRLEN len;
- SSize_t count = 0;
- if (PerlIOBase(f)->flags & PERLIO_F_WRBUF) {
- /* Write case encode the buffer and write() to layer below */
- ENTER;
- SAVETMPS;
- PUSHMARK(sp);
- XPUSHs(e->enc);
- SvCUR_set(e->bufsv, e->base.ptr - e->base.buf);
- SvUTF8_on(e->bufsv);
- XPUSHs(e->bufsv);
- XPUSHs(&PL_sv_yes);
- PUTBACK;
- if (perl_call_method("encode", G_SCALAR) != 1) {
- Perl_die(aTHX_ "panic: encode did not return a value");
- }
- SPAGAIN;
- str = POPs;
- PUTBACK;
- s = SvPV(str, len);
- count = PerlIO_write(PerlIONext(f),s,len);
- if (count != len) {
- code = -1;
- }
- FREETMPS;
- LEAVE;
- if (PerlIO_flush(PerlIONext(f)) != 0) {
- code = -1;
- }
- if (SvCUR(e->bufsv)) {
- /* Did not all translate */
- e->base.ptr = e->base.buf+SvCUR(e->bufsv);
- return code;
- }
- }
- else if (PerlIOBase(f)->flags & PERLIO_F_RDBUF) {
- /* read case */
- /* if we have any untranslated stuff then unread that first */
- if (e->dataSV && SvCUR(e->dataSV)) {
- s = SvPV(e->dataSV, len);
- count = PerlIO_unread(PerlIONext(f),s,len);
- if (count != len) {
- code = -1;
- }
- }
- /* See if there is anything left in the buffer */
- if (e->base.ptr < e->base.end) {
- /* Bother - have unread data.
- re-encode and unread() to layer below
- */
- ENTER;
- SAVETMPS;
- str = sv_newmortal();
- sv_upgrade(str, SVt_PV);
- SvPVX(str) = (char*)e->base.ptr;
- SvLEN(str) = 0;
- SvCUR_set(str, e->base.end - e->base.ptr);
- SvPOK_only(str);
- SvUTF8_on(str);
- PUSHMARK(sp);
- XPUSHs(e->enc);
- XPUSHs(str);
- XPUSHs(&PL_sv_yes);
- PUTBACK;
- if (perl_call_method("encode", G_SCALAR) != 1) {
- Perl_die(aTHX_ "panic: encode did not return a value");
- }
- SPAGAIN;
- str = POPs;
- PUTBACK;
- s = SvPV(str, len);
- count = PerlIO_unread(PerlIONext(f),s,len);
- if (count != len) {
- code = -1;
- }
- FREETMPS;
- LEAVE;
- }
- }
- e->base.ptr = e->base.end = e->base.buf;
- PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
- }
- return code;
-}
-
-IV
-PerlIOEncode_close(pTHX_ PerlIO * f)
-{
- PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
- IV code = PerlIOBase_close(aTHX_ f);
- if (e->bufsv) {
- if (e->base.buf && e->base.ptr > e->base.buf) {
- Perl_croak(aTHX_ "Close with partial character");
- }
- SvREFCNT_dec(e->bufsv);
- e->bufsv = Nullsv;
- }
- e->base.buf = NULL;
- e->base.ptr = NULL;
- e->base.end = NULL;
- PerlIOBase(f)->flags &= ~(PERLIO_F_RDBUF | PERLIO_F_WRBUF);
- return code;
-}
-
-Off_t
-PerlIOEncode_tell(pTHX_ PerlIO * f)
-{
- PerlIOBuf *b = PerlIOSelf(f, PerlIOBuf);
- /* Unfortunately the only way to get a postion is to (re-)translate,
- the UTF8 we have in bufefr and then ask layer below
- */
- PerlIO_flush(f);
- if (b->buf && b->ptr > b->buf) {
- Perl_croak(aTHX_ "Cannot tell at partial character");
- }
- return PerlIO_tell(PerlIONext(f));
-}
-
-PerlIO *
-PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
- CLONE_PARAMS * params, int flags)
-{
- if ((f = PerlIOBase_dup(aTHX_ f, o, params, flags))) {
- PerlIOEncode *fe = PerlIOSelf(f, PerlIOEncode);
- PerlIOEncode *oe = PerlIOSelf(o, PerlIOEncode);
- if (oe->enc) {
- fe->enc = PerlIO_sv_dup(aTHX_ oe->enc, params);
- }
- }
- return f;
-}
-
-PerlIO_funcs PerlIO_encode = {
- "encoding",
- sizeof(PerlIOEncode),
- PERLIO_K_BUFFERED|PERLIO_K_DESTRUCT,
- PerlIOEncode_pushed,
- PerlIOEncode_popped,
- PerlIOBuf_open,
- PerlIOEncode_getarg,
- PerlIOBase_fileno,
- PerlIOEncode_dup,
- PerlIOBuf_read,
- PerlIOBuf_unread,
- PerlIOBuf_write,
- PerlIOBuf_seek,
- PerlIOEncode_tell,
- PerlIOEncode_close,
- PerlIOEncode_flush,
- PerlIOEncode_fill,
- PerlIOBase_eof,
- PerlIOBase_error,
- PerlIOBase_clearerr,
- PerlIOBase_setlinebuf,
- PerlIOEncode_get_base,
- PerlIOBuf_bufsiz,
- PerlIOBuf_get_ptr,
- PerlIOBuf_get_cnt,
- PerlIOBuf_set_ptrcnt,
-};
-#endif /* encode layer */
void
Encode_XSEncoding(pTHX_ encode_t * enc)
@@ -636,33 +189,56 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
case ENCODE_NOREP:
if (dir == enc->f_utf8) {
- if (!check && ckWARN_d(WARN_UTF8)) {
- STRLEN clen;
- UV ch =
- utf8n_to_uvuni(s + slen, (SvCUR(src) - slen),
- &clen, 0);
- Perl_warner(aTHX_ packWARN(WARN_UTF8),
- "\"\\N{U+%" UVxf
- "}\" does not map to %s", ch,
- enc->name[0]);
- /* FIXME: Skip over the character, copy in replacement and continue
- * but that is messy so for now just fail.
- */
- return &PL_sv_undef;
+ STRLEN clen;
+ UV ch =
+ utf8n_to_uvuni(s + slen, (SvCUR(src) - slen),
+ &clen, 0);
+ if (!check) { /* fallback char */
+ sdone += slen + clen;
+ ddone += dlen + enc->replen;
+ sv_catpvn(dst, enc->rep, enc->replen);
}
- else {
- return &PL_sv_undef;
+ else if (check == -1){ /* perlqq */
+ SV* perlqq =
+ sv_2mortal(newSVpvf("\\x{%x}", ch));
+ sdone += slen + clen;
+ ddone += dlen + SvLEN(perlqq);
+ sv_catsv(dst, perlqq);
+ }
+ else {
+ Perl_croak(aTHX_
+ "\"\\N{U+%" UVxf
+ "}\" does not map to %s", ch,
+ enc->name[0]);
}
+ }
+ else {
+ if (!check){ /* fallback char */
+ sdone += slen + 1;
+ ddone += dlen + strlen(FBCHAR_UTF8);
+ sv_catpv(dst, FBCHAR_UTF8);
}
+ else if (check == -1){ /* perlqq */
+ SV* perlqq =
+ sv_2mortal(newSVpvf("\\x%02X", s[slen]));
+ sdone += slen + 1;
+ ddone += dlen + SvLEN(perlqq);
+ sv_catsv(dst, perlqq);
+ }
else {
- /* UTF-8 is supposed to be "Universal" so should not happen
- for real characters, but some encodings have non-assigned
- codes which may occur.
- */
- Perl_croak(aTHX_ "%s \"\\x%02X\" does not map to Unicode (%d)",
- enc->name[0], (U8) s[slen], code);
+ /* UTF-8 is supposed to be "Universal" so should not
+ happen for real characters, but some encodings
+ have non-assigned codes which may occur. */
+ Perl_croak(aTHX_ "%s \"\\x%02X\" "
+ "does not map to Unicode (%d)",
+ enc->name[0], (U8) s[slen], code);
}
- break;
+ }
+ dlen = SvCUR(dst);
+ d = SvPVX(dst) + dlen;
+ s = SvPVX(src) + sdone;
+ slen = tlen - sdone;
+ break;
default:
Perl_croak(aTHX_ "Unexpected code %d converting %s %s",
@@ -722,10 +298,10 @@ CODE:
}
void
-Method_decode(obj,src,check = FALSE)
+Method_decode(obj,src,check = 0)
SV * obj
SV * src
-bool check
+int check
CODE:
{
encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
@@ -735,10 +311,10 @@ CODE:
}
void
-Method_encode(obj,src,check = FALSE)
+Method_encode(obj,src,check = 0)
SV * obj
SV * src
-bool check
+int check
CODE:
{
encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
@@ -761,8 +337,8 @@ CODE:
int ucs2 = SvTRUE(*hv_fetch((HV *)SvRV(obj),"ucs2",4,0));
SV *result = newSVpvn("",0);
STRLEN ulen;
- U8 *s = SvPVbyte(str,ulen);
- U8 *e = SvEND(str);
+ U8 *s = (U8 *)SvPVbyte(str,ulen);
+ U8 *e = (U8 *)SvEND(str);
ST(0) = sv_2mortal(result);
SvUTF8_on(result);
@@ -790,7 +366,7 @@ CODE:
while (s < e && s+size <= e) {
UV ord = enc_unpack(aTHX_ &s,e,size,endian);
U8 *d;
- if (size != 4 && !valid_ucs2(ord)) {
+ if (size != 4 && invalid_ucs2(ord)) {
if (ucs2) {
if (SvTRUE(chk)) {
croak("%s:no surrogates allowed %"UVxf,
@@ -851,8 +427,8 @@ CODE:
int ucs2 = SvTRUE(*hv_fetch((HV *)SvRV(obj),"ucs2",4,0));
SV *result = newSVpvn("",0);
STRLEN ulen;
- U8 *s = SvPVutf8(utf8,ulen);
- U8 *e = SvEND(utf8);
+ U8 *s = (U8 *)SvPVutf8(utf8,ulen);
+ U8 *e = (U8 *)SvEND(utf8);
ST(0) = sv_2mortal(result);
if (!endian) {
endian = (size == 4) ? 'N' : 'n';
@@ -866,7 +442,7 @@ CODE:
STRLEN len;
UV ord = utf8n_to_uvuni(s, e-s, &len, 0);
s += len;
- if (size != 4 && !valid_ucs2(ord)) {
+ if (size != 4 && invalid_ucs2(ord)) {
if (!issurrogate(ord)){
if (ucs2) {
if (SvTRUE(chk)) {
@@ -999,9 +575,9 @@ _utf8_to_bytes(sv, ...)
RETVAL
bool
-is_utf8(sv, check = FALSE)
+is_utf8(sv, check = 0)
SV * sv
-bool check
+int check
CODE:
{
if (SvGMAGICAL(sv)) /* it could be $1, for example */
@@ -1056,7 +632,7 @@ _utf8_off(sv)
BOOT:
{
#if defined(USE_PERLIO) && !defined(USE_SFIO)
- PerlIO_define_layer(aTHX_ &PerlIO_encode);
+/* PerlIO_define_layer(aTHX_ &PerlIO_encode); */
#endif
#include "def_t.exh"
}
diff --git a/ext/Encode/Encode/Makefile_PL.e2x b/ext/Encode/Encode/Makefile_PL.e2x
index 59b5149353..8571033f70 100644
--- a/ext/Encode/Encode/Makefile_PL.e2x
+++ b/ext/Encode/Encode/Makefile_PL.e2x
@@ -16,7 +16,7 @@ my %tables = (
#### DO NOT EDIT BEYOND THIS POINT!
my $enc2xs = '$_Enc2xs_';
WriteMakefile(
- INC => "-I$_Inc_",
+ INC => "-I$_E2X_",
#### END_OF_HEADER -- DO NOT EDIT THIS LINE BY HAND! ####
NAME => 'Encode::'.$name,
VERSION_FROM => "$name.pm",
diff --git a/ext/Encode/Encode/_PM.e2x b/ext/Encode/Encode/_PM.e2x
index 208b87ee08..eb59cd1b52 100644
--- a/ext/Encode/Encode/_PM.e2x
+++ b/ext/Encode/Encode/_PM.e2x
@@ -3,7 +3,7 @@ our $VERSION = "0.01";
use Encode;
use XSLoader;
-XSLoader::load('Encode::$_Name_', $VERSION);
+XSLoader::load(__PACKAGE__,$VERSION);
1;
__END__
diff --git a/ext/Encode/JP/JP.pm b/ext/Encode/JP/JP.pm
index 10eb59b2d4..1a4d42e92b 100644
--- a/ext/Encode/JP/JP.pm
+++ b/ext/Encode/JP/JP.pm
@@ -5,10 +5,10 @@ BEGIN {
}
}
use Encode;
-our $VERSION = do { my @r = (q$Revision: 1.22 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.23 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use XSLoader;
-XSLoader::load('Encode::JP',$VERSION);
+XSLoader::load(__PACKAGE__,$VERSION);
use Encode::JP::JIS7;
diff --git a/ext/Encode/KR/KR.pm b/ext/Encode/KR/KR.pm
index 662f6c09bf..f7c9a82fd3 100644
--- a/ext/Encode/KR/KR.pm
+++ b/ext/Encode/KR/KR.pm
@@ -4,14 +4,15 @@ BEGIN {
die "Encode::KR not supported on EBCDIC\n";
}
}
-our $VERSION = do { my @r = (q$Revision: 1.20 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use Encode;
use XSLoader;
-XSLoader::load('Encode::KR',$VERSION);
+XSLoader::load(__PACKAGE__,$VERSION);
1;
__END__
+
=head1 NAME
Encode::KR - Korean Encodings
diff --git a/ext/Encode/MANIFEST b/ext/Encode/MANIFEST
index 22f12c6f74..499998b063 100644
--- a/ext/Encode/MANIFEST
+++ b/ext/Encode/MANIFEST
@@ -44,7 +44,6 @@ lib/Encode/JP/JIS7.pm Encode extension
lib/Encode/KR/2022_KR.pm Encode extension
lib/Encode/Supported.pod Documents supported encodings
lib/Encode/Unicode.pm Encode extension
-lib/Encode/XS.pm Encode extension
t/Aliases.t Encode extension test
t/CN.t Encode extension test
t/Encode.t Encode extension test
diff --git a/ext/Encode/Symbol/Symbol.pm b/ext/Encode/Symbol/Symbol.pm
index 33ef710d0b..9aed69d7e6 100644
--- a/ext/Encode/Symbol/Symbol.pm
+++ b/ext/Encode/Symbol/Symbol.pm
@@ -1,9 +1,9 @@
package Encode::Symbol;
use Encode;
-our $VERSION = do { my @r = (q$Revision: 1.20 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use XSLoader;
-XSLoader::load('Encode::Symbol',$VERSION);
+XSLoader::load(__PACKAGE__,$VERSION);
1;
__END__
diff --git a/ext/Encode/TW/TW.pm b/ext/Encode/TW/TW.pm
index 46a4bfb273..294144aad2 100644
--- a/ext/Encode/TW/TW.pm
+++ b/ext/Encode/TW/TW.pm
@@ -4,11 +4,11 @@ BEGIN {
die "Encode::TW not supported on EBCDIC\n";
}
}
-our $VERSION = do { my @r = (q$Revision: 1.20 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use Encode;
use XSLoader;
-XSLoader::load('Encode::TW',$VERSION);
+XSLoader::load(__PACKAGE__,$VERSION);
1;
__END__
diff --git a/ext/Encode/bin/enc2xs b/ext/Encode/bin/enc2xs
index bc03b82091..10feaf8bfa 100644
--- a/ext/Encode/bin/enc2xs
+++ b/ext/Encode/bin/enc2xs
@@ -8,7 +8,7 @@ BEGIN {
use strict;
use Getopt::Std;
my @orig_ARGV = @ARGV;
-our $VERSION = do { my @r = (q$Revision: 1.21 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.22 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
# These may get re-ordered.
# RAW is a do_now as inserted by &enter
@@ -252,12 +252,16 @@ if ($doC)
my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}};
output(\*C,$name.'_utf8',$e2u);
output(\*C,'utf8_'.$name,$u2e);
- push(@{$encoding{$name}},outstring(\*C,$e2u->{Cname}.'_def',$erep));
+ # push(@{$encoding{$name}},outstring(\*C,$e2u->{Cname}.'_def',$erep));
}
foreach my $enc (sort cmp_name keys %encoding)
{
- my ($e2u,$u2e,$rep,$min_el,$max_el,$rsym) = @{$encoding{$enc}};
- my @info = ($e2u->{Cname},$u2e->{Cname},$rsym,length($rep),$min_el,$max_el);
+ # my ($e2u,$u2e,$rep,$min_el,$max_el,$rsym) = @{$encoding{$enc}};
+ my ($e2u,$u2e,$rep,$min_el,$max_el) = @{$encoding{$enc}};
+ #my @info = ($e2u->{Cname},$u2e->{Cname},$rsym,length($rep),$min_el,$max_el);
+ my $replen = 0;
+ $replen++ while($rep =~ /\G\\x[0-9A-Fa-f]/g);
+ my @info = ($e2u->{Cname},$u2e->{Cname},qq("$rep"),$replen,$min_el,$max_el);
my $sym = "${enc}_encoding";
$sym =~ s/\W+/_/g;
print C "encode_t $sym = \n";
@@ -368,10 +372,12 @@ sub compile_ucm
my $min_el;
if (exists $attr{'subchar'})
{
- my @byte;
- $attr{'subchar'} =~ /^\s*/cg;
- push(@byte,$1) while $attr{'subchar'} =~ /\G\\x([0-9a-f]+)/icg;
- $erep = join('',map(chr(hex($_)),@byte));
+ #my @byte;
+ #$attr{'subchar'} =~ /^\s*/cg;
+ #push(@byte,$1) while $attr{'subchar'} =~ /\G\\x([0-9a-f]+)/icg;
+ #$erep = join('',map(chr(hex($_)),@byte));
+ $erep = $attr{'subchar'};
+ $erep =~ s/^\s+//; $erep =~ s/\s+$//;
}
print "Reading $name ($cs)\n";
my $nfb = 0;
@@ -838,11 +844,37 @@ use vars qw(
$_Enc2xs
$_Version
$_Inc
+ $_E2X
$_Name
$_TableFiles
$_Now
);
+sub find_e2x{
+ eval { require File::Find };
+ my (@inc, %e2x_dir);
+ for my $inc (@INC){
+ push @inc, $inc unless $inc eq '.'; #skip current dir
+ }
+ File::Find::find(
+ sub {
+ my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
+ $atime,$mtime,$ctime,$blksize,$blocks)
+ = lstat($_) or return;
+ -f _ or return;
+ if (/^.*\.e2x$/o){
+ $e2x_dir{$File::Find::dir} ||= $mtime;
+ }
+ return;
+ }, @inc);
+ warn join("\n", keys %e2x_dir), "\n";
+ for my $d (sort {$e2x_dir{$a} <=> $e2x_dir{$b}} keys %e2x_dir){
+ $_E2X = $d;
+ # warn "$_E2X => ", scalar localtime($e2x_dir{$d});
+ return $_E2X;
+ }
+}
+
sub make_makefile_pl
{
eval { require Encode; };
@@ -850,21 +882,22 @@ sub make_makefile_pl
# our used for variable expanstion
$_Enc2xs = $0;
$_Version = $VERSION;
- $_Inc = $INC{"Encode.pm"}; $_Inc =~ s/\.pm$//o;
+ $_E2X = find_e2x();
$_Name = shift;
$_TableFiles = join(",", map {qq('$_')} @_);
$_Now = scalar localtime();
+
eval { require File::Spec; };
warn "Generating Makefile.PL\n";
- _print_expand(File::Spec->catfile($_Inc,"Makefile_PL.e2x"),"Makefile.PL");
+ _print_expand(File::Spec->catfile($_E2X,"Makefile_PL.e2x"),"Makefile.PL");
warn "Generating $_Name.pm\n";
- _print_expand(File::Spec->catfile($_Inc,"_PM.e2x"), "$_Name.pm");
+ _print_expand(File::Spec->catfile($_E2X,"_PM.e2x"), "$_Name.pm");
warn "Generating t/$_Name.t\n";
- _print_expand(File::Spec->catfile($_Inc,"_T.e2x"), "t/$_Name.t");
+ _print_expand(File::Spec->catfile($_E2X,"_T.e2x"), "t/$_Name.t");
warn "Generating README\n";
- _print_expand(File::Spec->catfile($_Inc,"README.e2x"), "README");
+ _print_expand(File::Spec->catfile($_E2X,"README.e2x"), "README");
warn "Generating t/$_Name.t\n";
- _print_expand(File::Spec->catfile($_Inc,"Changes.e2x"), "Changes");
+ _print_expand(File::Spec->catfile($_E2X,"Changes.e2x"), "Changes");
exit;
}
@@ -897,8 +930,7 @@ sub make_configlocal_pm
$Encode::Config::ExtModule{$enc} and next;
my $mod = "Encode/$f";
$mod =~ s/\.pm$//o; $mod =~ s,/,::,og;
- warn "$enc => $mod\n";
- $LocalMod{$enc} = $mod;
+ $LocalMod{$enc} ||= $mod;
}
}
}
@@ -907,10 +939,12 @@ sub make_configlocal_pm
$_ModLines .=
qq(\$Encode::ExtModule{'$enc'} =\t"$LocalMod{$enc}";\n);
}
+ warn $_ModLines;
$_LocalVer = _mkversion();
+ $_E2X = find_e2x();
$_Inc = $INC{"Encode.pm"}; $_Inc =~ s/\.pm$//o;
- warn "Writing Encode::ConfigLocal\n";
- _print_expand(File::Spec->catfile($_Inc,"ConfigLocal_PM.e2x"),
+ warn "Writing ", File::Spec->catfile($_Inc,"ConfigLocal.pm"), "\n";
+ _print_expand(File::Spec->catfile($_E2X,"ConfigLocal_PM.e2x"),
File::Spec->catfile($_Inc,"ConfigLocal.pm"));
exit;
}
diff --git a/ext/Encode/bin/piconv b/ext/Encode/bin/piconv
index b70a1a801f..3880dea288 100644
--- a/ext/Encode/bin/piconv
+++ b/ext/Encode/bin/piconv
@@ -1,5 +1,5 @@
#!./perl
-# $Id: piconv,v 1.21 2002/04/09 20:06:15 dankogai Exp $
+# $Id: piconv,v 1.22 2002/04/16 23:35:00 dankogai Exp $
#
use 5.7.3;
use strict;
@@ -9,7 +9,7 @@ my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio);
use Getopt::Std;
-my %Opt; getopts("hDS:lf:t:s:", \%Opt);
+my %Opt; getopts("pcC:hDS:lf:t:s:", \%Opt);
$Opt{h} and help();
$Opt{l} and list_encodings();
my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
@@ -18,6 +18,8 @@ my $from = $Opt{f} || $locale or help("from_encoding unspecified");
my $to = $Opt{t} || $locale or help("to_encoding unspecified");
$Opt{s} and Encode::from_to($Opt{s}, $from, $to) and print $Opt{s} and exit;
my $scheme = exists $Scheme{$Opt{S}} ? $Opt{S} : 'from_to';
+$Opt{C} ||= $Opt{c};
+$Opt{p} and $Opt{C} = -1;
if ($Opt{D}){
my $cfrom = Encode->getEncoding($from)->name;
@@ -32,12 +34,12 @@ EOT
# default
if ($scheme eq 'from_to'){
while(<>){
- Encode::from_to($_, $from, $to); print;
+ Encode::from_to($_, $from, $to, $Opt{C}); print;
};
# step-by-step
}elsif ($scheme eq 'decode_encode'){
while(<>){
- my $decoded = decode($from, $_);
+ my $decoded = decode($from, $_, $Opt{C});
my $encoded = encode($to, $decoded);
print $encoded;
};
@@ -121,6 +123,19 @@ and common aliases work, like "latin1" for "ISO 8859-1", or "ibm850"
instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported>
for the full discussion.
+=item -C I<N>
+
+Check the validity of the stream if I<N> = 1. When I<N> = -1, something
+interesting happens when it encounters an invalid character.
+
+=item -c
+
+Same as C<-C 1>.
+
+=item -p
+
+Same as C<-C -1>.
+
=item -h
Show usage.
diff --git a/ext/Encode/encoding.pm b/ext/Encode/encoding.pm
index d5b32c7b5b..fd8ae1abac 100644
--- a/ext/Encode/encoding.pm
+++ b/ext/Encode/encoding.pm
@@ -1,5 +1,5 @@
package encoding;
-our $VERSION = do { my @r = (q$Revision: 1.26 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.28 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use Encode;
use strict;
@@ -11,6 +11,16 @@ BEGIN {
}
}
+our $HAS_PERLIO_ENCODING;
+
+eval { require PerlIO::encoding; };
+if ($@){
+ $HAS_PERLIO_ENCODING = 0;
+}else{
+ $HAS_PERLIO_ENCODING = 1;
+ binmode(STDIN);
+}
+
sub import {
my $class = shift;
my $name = shift;
@@ -24,9 +34,10 @@ sub import {
}
unless ($arg{Filter}){
${^ENCODING} = $enc; # this is all you need, actually.
+ $HAS_PERLIO_ENCODING or return 1;
for my $h (qw(STDIN STDOUT)){
if ($arg{$h}){
- unless (defined find_encoding($arg{h})) {
+ unless (defined find_encoding($arg{$h})) {
require Carp;
Carp::croak "Unknown encoding for $h, '$arg{$h}'";
}
@@ -46,8 +57,8 @@ sub import {
eval {
require Filter::Util::Call ;
Filter::Util::Call->import ;
- binmode(STDIN, ":raw");
- binmode(STDOUT, ":raw");
+ binmode(STDIN);
+ binmode(STDOUT);
filter_add(sub{
my $status;
if (($status = filter_read()) > 0){
@@ -65,8 +76,8 @@ sub import {
sub unimport{
no warnings;
undef ${^ENCODING};
- binmode(STDIN, ":raw");
- binmode(STDOUT, ":raw");
+ binmode(STDIN);
+ binmode(STDOUT);
if ($INC{"Filter/Util/Call.pm"}){
eval { filter_del() };
}
diff --git a/ext/Encode/lib/Encode/CN/HZ.pm b/ext/Encode/lib/Encode/CN/HZ.pm
index d9c261ef1f..c599928f6e 100644
--- a/ext/Encode/lib/Encode/CN/HZ.pm
+++ b/ext/Encode/lib/Encode/CN/HZ.pm
@@ -3,7 +3,7 @@ package Encode::CN::HZ;
use strict;
use vars qw($VERSION);
-$VERSION = do { my @r = (q$Revision: 1.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+$VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use Encode ();
use Encode::CN;
@@ -64,8 +64,13 @@ sub encode
no warnings 'utf8';
my $char = substr($str, $index, 1);
- my $try = $gb->encode($char); # try to encode this character
-
+ # try to encode this character
+ # with CHECK on so it stops at proper place.
+ # also note that the assignement was braced in eval
+ # -- dankogai
+ my $try;
+ eval{ $try = $gb->encode($char, 1) };
+
if (defined($try)) { # is a GB character:
if ($in_gb) {
$out .= $try; # in GB mode - just append it
diff --git a/ext/Encode/lib/Encode/Config.pm b/ext/Encode/lib/Encode/Config.pm
index 34f7b18220..ff81c2a251 100644
--- a/ext/Encode/lib/Encode/Config.pm
+++ b/ext/Encode/lib/Encode/Config.pm
@@ -2,7 +2,7 @@
# Demand-load module list
#
package Encode::Config;
-our $VERSION = do { my @r = (q$Revision: 1.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.3 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use strict;
@@ -150,3 +150,10 @@ while (my ($enc,$mod) = each %ExtModule){
}
1;
+__END__
+
+=head1 NAME
+
+Encode::Config -- internally used by Encode
+
+=cut
diff --git a/ext/Encode/lib/Encode/KR/2022_KR.pm b/ext/Encode/lib/Encode/KR/2022_KR.pm
index 4a3b1d086a..c71f0e480c 100644
--- a/ext/Encode/lib/Encode/KR/2022_KR.pm
+++ b/ext/Encode/lib/Encode/KR/2022_KR.pm
@@ -4,7 +4,7 @@ use base 'Encode::Encoding';
use strict;
-our $VERSION = do { my @r = (q$Revision: 1.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
my $canon = 'iso-2022-kr';
@@ -64,3 +64,9 @@ sub euc_iso{
1;
__END__
+
+=head1 NAME
+
+Encode::KR::2022_KR -- internally used by Encode::KR
+
+=cut
diff --git a/ext/Encode/lib/Encode/XS.pm b/ext/Encode/lib/Encode/XS.pm
deleted file mode 100644
index 368ab0c561..0000000000
--- a/ext/Encode/lib/Encode/XS.pm
+++ /dev/null
@@ -1,13 +0,0 @@
-package Encode::XS;
-use strict;
-our $VERSION = do { my @r = (q$Revision: 1.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
-
-use base 'Encode::Encoding';
-1;
-__END__
-
-=head1 NAME
-
-Encode::XS -- for internal use only
-
-=cut
diff --git a/ext/Encode/t/CN.t b/ext/Encode/t/CN.t
index 749f9139c5..893c29fa6d 100644
--- a/ext/Encode/t/CN.t
+++ b/ext/Encode/t/CN.t
@@ -8,10 +8,6 @@ BEGIN {
print "1..0 # Skip: Encode was not built\n";
exit 0;
}
- unless (find PerlIO::Layer 'perlio') {
- print "1..0 # Skip: PerlIO was not built\n";
- exit 0;
- }
if (ord("A") == 193) {
print "1..0 # Skip: EBCDIC\n";
exit 0;
diff --git a/ext/Encode/t/Encoder.t b/ext/Encode/t/Encoder.t
index 22c240be45..af83cf60cf 100644
--- a/ext/Encode/t/Encoder.t
+++ b/ext/Encode/t/Encoder.t
@@ -1,5 +1,5 @@
#
-# $Id: Encoder.t,v 1.2 2002/04/10 22:28:40 dankogai Exp $
+# $Id: Encoder.t,v 1.3 2002/04/16 23:35:00 dankogai Exp $
#
BEGIN {
@@ -8,16 +8,6 @@ BEGIN {
print "1..0 # Skip: Encode was not built\n";
exit 0;
}
-# should work without perlio
-# unless (find PerlIO::Layer 'perlio') {
-# print "1..0 # Skip: PerlIO was not built\n";
-# exit 0;
-# }
-# should work on EBCDIC
-# if (ord("A") == 193) {
-# print "1..0 # Skip: EBCDIC\n";
-# exit 0;
-# }
$| = 1;
}
diff --git a/ext/Encode/t/JP.t b/ext/Encode/t/JP.t
index c9b1dde332..89238b58d0 100644
--- a/ext/Encode/t/JP.t
+++ b/ext/Encode/t/JP.t
@@ -8,10 +8,6 @@ BEGIN {
print "1..0 # Skip: Encode was not built\n";
exit 0;
}
- unless (find PerlIO::Layer 'perlio') {
- print "1..0 # Skip: PerlIO was not built\n";
- exit 0;
- }
if (ord("A") == 193) {
print "1..0 # Skip: EBCDIC\n";
exit 0;
@@ -77,47 +73,51 @@ ok(compare($euc,$rnd) == 0);
is($enc->name,'euc-jp');
-print "# src :encoding test\n";
-open($src,"<encoding(euc-jp)",$euc) || die "Cannot open $euc:$!";
-binmode($src);
-ok(defined($src) && fileno($src));
-open($dst,">:utf8",$utf) || die "Cannot open $utf:$!";
-binmode($dst);
-ok(defined($dst) || fileno($dst));
-my $out = select($dst);
-while (<$src>)
- {
- print;
- }
-close($dst);
-close($src);
-
-TODO:
-{
- local $TODO = 'needs debugging on VMS' if $^O eq 'VMS';
- ok(compare($utf,$ref) == 0);
+my $skip_perlio;
+eval { require PerlIO::encoding; };
+if ($@){
+ $skip_perlio = 1;
+}else{
+ $skip_perlio = 0;
+ binmode(STDIN);
}
-select($out);
-SKIP:
-{
- #skip "Multi-byte write is broken",3;
- print "# dst :encoding test\n";
- open($src,"<:utf8",$ref) || die "Cannot open $ref:$!";
- binmode($src);
- ok(defined($src) || fileno($src));
- open($dst,">encoding(euc-jp)",$rnd) || die "Cannot open $rnd:$!";
- binmode($dst);
- ok(defined($dst) || fileno($dst));
- my $out = select($dst);
- while (<$src>)
- {
- print;
- }
- close($dst);
- close($src);
- ok(compare($euc,$rnd) == 0);
- select($out);
+$skip_perlio ||= (@ARGV and shift eq 'perlio');
+
+SKIP: {
+ skip "PerlIO Encoding Needed", 6 if $skip_perlio;
+ print "# src :encoding test\n";
+ open($src,"<encoding(euc-jp)",$euc) || die "Cannot open $euc:$!";
+ binmode($src);
+ ok(defined($src) && fileno($src));
+ open($dst,">:utf8",$utf) || die "Cannot open $utf:$!";
+ binmode($dst);
+ ok(defined($dst) || fileno($dst));
+ my $out = select($dst);
+ while (<$src>){ print; }
+ close($dst);
+ close($src);
+
+ TODO:
+ {
+ local $TODO = 'needs debugging on VMS' if $^O eq 'VMS';
+ ok(compare($utf,$ref) == 0);
+ }
+ select($out);
+
+ print "# dst :encoding test\n";
+ open($src,"<:utf8",$ref) || die "Cannot open $ref:$!";
+ binmode($src);
+ ok(defined($src) || fileno($src));
+ open($dst,">encoding(euc-jp)",$rnd) || die "Cannot open $rnd:$!";
+ binmode($dst);
+ ok(defined($dst) || fileno($dst));
+ $out = select($dst);
+ while (<$src>) { print; }
+ close($dst);
+ close($src);
+ ok(compare($euc,$rnd) == 0);
+ select($out);
}
is($enc->name,'euc-jp');
diff --git a/ext/Encode/t/KR.t b/ext/Encode/t/KR.t
index fd1c503bad..e42271b4fc 100644
--- a/ext/Encode/t/KR.t
+++ b/ext/Encode/t/KR.t
@@ -77,47 +77,51 @@ ok(compare($euc,$rnd) == 0);
is($enc->name,'euc-kr');
-print "# src :encoding test\n";
-open($src,"<encoding(euc-kr)",$euc) || die "Cannot open $euc:$!";
-binmode($src);
-ok(defined($src) && fileno($src));
-open($dst,">:utf8",$utf) || die "Cannot open $utf:$!";
-binmode($dst);
-ok(defined($dst) || fileno($dst));
-my $out = select($dst);
-while (<$src>)
- {
- print;
- }
-close($dst);
-close($src);
-
-TODO:
-{
- local $TODO = 'needs debugging on VMS' if $^O eq 'VMS';
- ok(compare($utf,$ref) == 0);
+my $skip_perlio;
+eval { require PerlIO::encoding; };
+if ($@){
+ $skip_perlio = 1;
+}else{
+ $skip_perlio = 0;
+ binmode(STDIN);
}
-select($out);
-SKIP:
-{
- #skip "Multi-byte write is broken",3;
- print "# dst :encoding test\n";
- open($src,"<:utf8",$ref) || die "Cannot open $ref:$!";
- binmode($src);
- ok(defined($src) || fileno($src));
- open($dst,">encoding(euc-kr)",$rnd) || die "Cannot open $rnd:$!";
- binmode($dst);
- ok(defined($dst) || fileno($dst));
- my $out = select($dst);
- while (<$src>)
- {
- print;
- }
- close($dst);
- close($src);
- ok(compare($euc,$rnd) == 0);
- select($out);
+$skip_perlio ||= (@ARGV and shift eq 'perlio');
+
+SKIP: {
+ skip "PerlIO Encoding Needed", 6 if $skip_perlio;
+ print "# src :encoding test\n";
+ open($src,"<encoding(euc-kr)",$euc) || die "Cannot open $euc:$!";
+ binmode($src);
+ ok(defined($src) && fileno($src));
+ open($dst,">:utf8",$utf) || die "Cannot open $utf:$!";
+ binmode($dst);
+ ok(defined($dst) || fileno($dst));
+ my $out = select($dst);
+ while (<$src>) { print; }
+ close($dst);
+ close($src);
+
+ TODO:
+ {
+ local $TODO = 'needs debugging on VMS' if $^O eq 'VMS';
+ ok(compare($utf,$ref) == 0);
+ }
+ select($out);
+
+ print "# dst :encoding test\n";
+ open($src,"<:utf8",$ref) || die "Cannot open $ref:$!";
+ binmode($src);
+ ok(defined($src) || fileno($src));
+ open($dst,">encoding(euc-kr)",$rnd) || die "Cannot open $rnd:$!";
+ binmode($dst);
+ ok(defined($dst) || fileno($dst));
+ $out = select($dst);
+ while (<$src>) { print; }
+ close($dst);
+ close($src);
+ ok(compare($euc,$rnd) == 0);
+ select($out);
}
is($enc->name,'euc-kr');
diff --git a/ext/Encode/t/TW.t b/ext/Encode/t/TW.t
index a51017abd5..5ce2c41156 100644
--- a/ext/Encode/t/TW.t
+++ b/ext/Encode/t/TW.t
@@ -8,10 +8,6 @@ BEGIN {
print "1..0 # Skip: Encode was not built\n";
exit 0;
}
- unless (find PerlIO::Layer 'perlio') {
- print "1..0 # Skip: PerlIO was not built\n";
- exit 0;
- }
if (ord("A") == 193) {
print "1..0 # Skip: EBCDIC\n";
exit 0;
diff --git a/ext/Encode/t/Unicode.t b/ext/Encode/t/Unicode.t
index 02eac86d44..bc15aaf216 100644
--- a/ext/Encode/t/Unicode.t
+++ b/ext/Encode/t/Unicode.t
@@ -1,5 +1,5 @@
#
-# $Id: Unicode.t,v 1.7 2002/04/14 22:05:20 dankogai Exp $
+# $Id: Unicode.t,v 1.8 2002/04/16 23:35:00 dankogai Exp $
#
# This script is written entirely in ASCII, even though quoted literals
# do include non-BMP unicode characters -- Are you happy, jhi?
@@ -12,17 +12,6 @@ BEGIN {
print "1..0 # Skip: Encode was not built\n";
exit 0;
}
-# should work without perlio
-# unless (find PerlIO::Layer 'perlio') {
-# print "1..0 # Skip: PerlIO was not built\n";
-# exit 0;
-# }
-
-# should work on EBCDIC
-# if (ord("A") == 193) {
-# print "1..0 # Skip: EBCDIC\n";
-# exit 0;
-# }
$ON_EBCDIC = (ord("A") == 193) || $ARGV[0];
$| = 1;
}
diff --git a/ext/Encode/t/encoding.t b/ext/Encode/t/encoding.t
index 85127ffec0..a51bb669f3 100644
--- a/ext/Encode/t/encoding.t
+++ b/ext/Encode/t/encoding.t
@@ -4,10 +4,6 @@ BEGIN {
print "1..0 # Skip: Encode was not built\n";
exit 0;
}
- unless (find PerlIO::Layer 'perlio') {
- print "1..0 # Skip: PerlIO was not built\n";
- exit 0;
- }
if (ord("A") == 193) {
print "1..0 # encoding pragma does not support EBCDIC platforms\n";
exit(0);