diff options
-rw-r--r-- | ext/Encode/Encode.pm | 8 | ||||
-rw-r--r-- | ext/Encode/Encode.xs | 50 | ||||
-rw-r--r-- | ext/Encode/t/perlio.t | 2 | ||||
-rw-r--r-- | ext/PerlIO/encoding/encoding.pm | 12 | ||||
-rw-r--r-- | ext/PerlIO/encoding/encoding.xs | 162 |
5 files changed, 182 insertions, 52 deletions
diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm index 0bf6a2489f..fb80200d2c 100644 --- a/ext/Encode/Encode.pm +++ b/ext/Encode/Encode.pm @@ -253,14 +253,18 @@ sub predefine_encodings{ $_[1] = '' if $chk; return $octets; }; - $Encode::Encoding{utf8} = + $Encode::Encoding{utf8} = bless {Name => "utf8"} => "Encode::utf8"; } } require Encode::Encoding; +@Encode::XS::ISA = qw(Encode::Encoding); -eval { +# This is very dodgy - PerlIO::encoding does "use Encode" and _BEFORE_ it gets a +# chance to set its VERSION we potentially delete it from %INC so it will be re-loaded +# NI-S +eval { require PerlIO::encoding; unless (PerlIO::encoding->VERSION >= 0.02){ delete $INC{"PerlIO/encoding.pm"}; diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index c208af0e16..b898780e73 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -11,11 +11,11 @@ /* set 1 or more to profile. t/encoding.t dumps core because of Perl_warner and PerlIO don't work well */ -#define ENCODE_XS_PROFILE 0 +#define ENCODE_XS_PROFILE 0 /* set 0 to disable floating point to calculate buffer size for encode_method(). 1 is recommended. 2 restores NI-S original */ -#define ENCODE_XS_USEFP 1 +#define ENCODE_XS_USEFP 1 #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \ Perl_croak(aTHX_ "panic_unimplemented"); \ @@ -119,40 +119,40 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, } case ENCODE_NOREP: /* encoding */ - if (dir == enc->f_utf8) { + if (dir == enc->f_utf8) { STRLEN clen; UV ch = - utf8n_to_uvuni(s+slen, (SvCUR(src)-slen), + utf8n_to_uvuni(s+slen, (SvCUR(src)-slen), &clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY); if (check & ENCODE_DIE_ON_ERR) { Perl_croak( - aTHX_ "\"\\N{U+%" UVxf "}\" does not map to %s, %d", + aTHX_ "\"\\N{U+%" UVxf "}\" does not map to %s, %d", ch, enc->name[0], __LINE__); }else{ if (check & ENCODE_RETURN_ON_ERR){ if (check & ENCODE_WARN_ON_ERR){ Perl_warner( aTHX_ packWARN(WARN_UTF8), - "\"\\N{U+%" UVxf "}\" does not map to %s", + "\"\\N{U+%" UVxf "}\" does not map to %s", ch,enc->name[0]); } goto ENCODE_SET_SRC; }else if (check & ENCODE_PERLQQ){ - SV* perlqq = + SV* perlqq = sv_2mortal(newSVpvf("\\x{%04x}", ch)); sdone += slen + clen; ddone += dlen + SvCUR(perlqq); sv_catsv(dst, perlqq); - } else { + } else { /* fallback char */ sdone += slen + clen; - ddone += dlen + enc->replen; - sv_catpvn(dst, (char*)enc->rep, enc->replen); + ddone += dlen + enc->replen; + sv_catpvn(dst, (char*)enc->rep, enc->replen); } - } + } } /* decoding */ - else { + else { if (check & ENCODE_DIE_ON_ERR){ Perl_croak( aTHX_ "%s \"\\x%02X\" does not map to Unicode (%d)", @@ -167,22 +167,22 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, } goto ENCODE_SET_SRC; }else if (check & ENCODE_PERLQQ){ - SV* perlqq = + SV* perlqq = sv_2mortal(newSVpvf("\\x%02X", s[slen])); sdone += slen + 1; ddone += dlen + SvCUR(perlqq); sv_catsv(dst, perlqq); } else { sdone += slen + 1; - ddone += dlen + strlen(FBCHAR_UTF8); - sv_catpv(dst, FBCHAR_UTF8); + ddone += dlen + strlen(FBCHAR_UTF8); + sv_catpv(dst, FBCHAR_UTF8); } } } /* settle variables when fallback */ d = (U8 *)SvEND(dst); - dlen = SvLEN(dst) - ddone - 1; - s = (U8*)SvPVX(src) + sdone; + dlen = SvLEN(dst) - ddone - 1; + s = (U8*)SvPVX(src) + sdone; slen = tlen - sdone; break; @@ -205,10 +205,10 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, if (code && !(check & ENCODE_RETURN_ON_ERR)) { return &PL_sv_undef; } - + SvCUR_set(dst, dlen+ddone); SvPOK_only(dst); - + #if ENCODE_XS_PROFILE if (SvCUR(dst) > SvCUR(src)){ Perl_warn(aTHX_ @@ -217,7 +217,7 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src, (SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0); } #endif - + ENCODE_END: *SvEND(dst) = '\0'; return dst; @@ -273,7 +273,7 @@ SV * sv CODE: { SV * encoding = items == 2 ? ST(1) : Nullsv; - + if (encoding) RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); else { @@ -310,7 +310,7 @@ CODE: /* Must do things the slow way */ U8 *dest; /* We need a copy to pass to check() */ - U8 *src = (U8*)savepv((char *)s); + U8 *src = (U8*)savepv((char *)s); U8 *send = s + len; New(83, dest, len, U8); /* I think */ @@ -335,8 +335,8 @@ CODE: /* Note change to utf8.c variable naming, for variety */ while (ulen--) { - if ((*s & 0xc0) != 0x80){ - goto failure; + if ((*s & 0xc0) != 0x80){ + goto failure; } else { uv = (uv << 6) | (*s++ & 0x3f); } @@ -422,7 +422,7 @@ CODE: OUTPUT: RETVAL -int +int WARN_ON_ERR() CODE: RETVAL = ENCODE_WARN_ON_ERR; diff --git a/ext/Encode/t/perlio.t b/ext/Encode/t/perlio.t index 936eeb0b63..3381a12e13 100644 --- a/ext/Encode/t/perlio.t +++ b/ext/Encode/t/perlio.t @@ -59,7 +59,7 @@ for my $e (qw/euc-jp shiftjis 7bit-jis iso-2022-jp iso-2022-jp-1/){ # first create a file without perlio dump2file($sfile, &encode($e, $utext, 0)); - + # then create a file via perlio without autoflush SKIP:{ diff --git a/ext/PerlIO/encoding/encoding.pm b/ext/PerlIO/encoding/encoding.pm index 9aa0e9a8b1..1d9c73f242 100644 --- a/ext/PerlIO/encoding/encoding.pm +++ b/ext/PerlIO/encoding/encoding.pm @@ -1,7 +1,8 @@ package PerlIO::encoding; -our $VERSION = '0.02'; +our $VERSION = '0.03'; use XSLoader (); -use Encode; +use Encode (); # Load but do not import anything. +our $check; XSLoader::load 'PerlIO::encoding'; 1; __END__ @@ -15,6 +16,9 @@ PerlIO::encoding - encoding layer open($f, "<:encoding(foo)", "infoo"); open($f, ">:encoding(bar)", "outbar"); + use Encode; + $PerlIO::encoding::check = Encode::FB_PERLQQ(); + =head1 DESCRIPTION Open a filehandle with a transparent encoding filter. @@ -24,6 +28,10 @@ character set and encoding to Perl string data (Unicode and Perl's internal Unicode encoding, UTF-8). On output, convert Perl string data into the specified character set and encoding. +When the layer is pushed the current value of C<$PerlIO::encoding::check> +is saved and used as the check argument when calling the Encodings +encode and decode. + =head1 SEE ALSO L<open>, L<Encode>, L<perlfunc/binmode>, L<perluniintro> diff --git a/ext/PerlIO/encoding/encoding.xs b/ext/PerlIO/encoding/encoding.xs index 23de989514..a864c8aa18 100644 --- a/ext/PerlIO/encoding/encoding.xs +++ b/ext/PerlIO/encoding/encoding.xs @@ -45,11 +45,16 @@ typedef struct { SV *dataSV; /* data we have read from layer below */ SV *enc; /* the encoding object */ SV *chk; /* CHECK in Encode methods */ + int flags; /* Flags currently just needs lines */ } PerlIOEncode; +#define NEEDS_LINES 1 -#define ENCODE_FB_QUIET "Encode::FB_QUIET" - +#if 0 +#define OUR_ENCODE_FB "Encode::FB_PERLQQ" +#else +#define OUR_ENCODE_FB "Encode::FB_QUIET" +#endif SV * PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags) @@ -78,21 +83,12 @@ PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg) PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); dSP; IV code; + SV *result = Nullsv; code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv); ENTER; SAVETMPS; PUSHMARK(sp); - PUTBACK; - if (call_pv(ENCODE_FB_QUIET, G_SCALAR|G_NOARGS) != 1) { - Perl_die(aTHX_ "Call to Encode::FB_QUIET failed!"); - code = -1; - } - SPAGAIN; - e->chk = newSVsv(POPs); - PUTBACK; - - PUSHMARK(sp); XPUSHs(arg); PUTBACK; if (call_pv("Encode::find_encoding", G_SCALAR) != 1) { @@ -101,20 +97,52 @@ PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg) return -1; } SPAGAIN; - e->enc = POPs; + result = POPs; PUTBACK; - if (!SvROK(e->enc)) { + if (!SvROK(result) || !SvOBJECT(SvRV(result))) { e->enc = Nullsv; - errno = EINVAL; Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"", - arg); + arg); + errno = EINVAL; code = -1; } else { - SvREFCNT_inc(e->enc); +#ifdef USE_NEW_SEQUENCE + PUSHMARK(sp); + XPUSHs(result); + PUTBACK; + if (call_method("new_sequence",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) { + Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support new_sequence", + arg); + } + else { + SPAGAIN; + result = POPs; + PUTBACK; + } +#endif + e->enc = newSVsv(result); + PUSHMARK(sp); + XPUSHs(e->enc); + PUTBACK; + if (call_method("needs_lines",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) { + Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support needs_lines", + arg); + } + else { + SPAGAIN; + result = POPs; + PUTBACK; + if (SvTRUE(result)) { + e->flags |= NEEDS_LINES; + } + } PerlIOBase(f)->flags |= PERLIO_F_UTF8; } + + e->chk = newSVsv(get_sv("PerlIO::encoding::check",0)); + FREETMPS; LEAVE; return code; @@ -136,6 +164,10 @@ PerlIOEncode_popped(pTHX_ PerlIO * f) SvREFCNT_dec(e->dataSV); e->dataSV = Nullsv; } + if (e->chk) { + SvREFCNT_dec(e->chk); + e->dataSV = Nullsv; + } return 0; } @@ -210,9 +242,9 @@ PerlIOEncode_fill(pTHX_ PerlIO * f) avail = 0; } } - if (avail > 0) { + if (avail > 0 || (e->flags & NEEDS_LINES)) { STDCHAR *ptr = PerlIO_get_ptr(n); - SSize_t use = avail; + SSize_t use = (avail >= 0) ? avail : 0; SV *uni; char *s; STRLEN len = 0; @@ -223,12 +255,45 @@ PerlIOEncode_fill(pTHX_ PerlIO * f) if (SvTYPE(e->dataSV) < SVt_PV) { sv_upgrade(e->dataSV,SVt_PV); } + if (e->flags & NEEDS_LINES) { + /* Encoding needs whole lines (e.g. iso-2022-*) + search back from end of available data for + and line marker + */ + STDCHAR *nl = ptr+use-1; + while (nl >= ptr) { + if (*nl == '\n') { + break; + } + nl--; + } + if (nl >= ptr && *nl == '\n') { + /* found a line - take up to and including that */ + use = (nl+1)-ptr; + } + else if (avail > 0) { + /* No line, but not EOF - append avail to the pending data */ + sv_catpvn(e->dataSV, ptr, use); + PerlIO_set_ptrcnt(n, ptr+use, 0); + goto retry; + } + else if (!SvCUR(e->dataSV)) { + goto end_of_file; + } + } if (SvCUR(e->dataSV)) { /* something left over from last time - create a normal SV with new data appended */ if (use + SvCUR(e->dataSV) > e->base.bufsiz) { - use = e->base.bufsiz - SvCUR(e->dataSV); + if (e->flags & NEEDS_LINES) { + /* Have to grow buffer */ + e->base.bufsiz = use + SvCUR(e->dataSV); + PerlIOEncode_get_base(aTHX_ f); + } + else { + use = e->base.bufsiz - SvCUR(e->dataSV); + } } sv_catpvn(e->dataSV,(char*)ptr,use); } @@ -238,7 +303,14 @@ PerlIOEncode_fill(pTHX_ PerlIO * f) Safefree(SvPVX(e->dataSV)); } if (use > (SSize_t)e->base.bufsiz) { - use = e->base.bufsiz; + if (e->flags & NEEDS_LINES) { + /* Have to grow buffer */ + e->base.bufsiz = use; + PerlIOEncode_get_base(aTHX_ f); + } + else { + use = e->base.bufsiz; + } } SvPVX(e->dataSV) = (char *) ptr; SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */ @@ -300,6 +372,7 @@ PerlIOEncode_fill(pTHX_ PerlIO * f) return code; } else { + end_of_file: if (avail == 0) PerlIOBase(f)->flags |= PERLIO_F_EOF; else @@ -449,6 +522,38 @@ PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o, return f; } +SSize_t +PerlIOEncode_write(pTHX_ PerlIO *f, const void *vbuf, Size_t count) +{ + PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode); + if (e->flags & NEEDS_LINES) { + SSize_t done = 0; + const char *ptr = (const char *) vbuf; + const char *end = ptr+count; + while (ptr < end) { + const char *nl = ptr; + while (nl < end && *nl++ != '\n') /* empty body */; + done = PerlIOBuf_write(aTHX_ f, ptr, nl-ptr); + if (done != nl-ptr) { + if (done > 0) { + ptr += done; + } + break; + } + ptr += done; + if (ptr[-1] == '\n') { + if (PerlIOEncode_flush(aTHX_ f) != 0) { + break; + } + } + } + return (SSize_t) (ptr - (const char *) vbuf); + } + else { + return PerlIOBuf_write(aTHX_ f, vbuf, count); + } +} + PerlIO_funcs PerlIO_encode = { "encoding", sizeof(PerlIOEncode), @@ -461,7 +566,7 @@ PerlIO_funcs PerlIO_encode = { PerlIOEncode_dup, PerlIOBuf_read, PerlIOBuf_unread, - PerlIOBuf_write, + PerlIOEncode_write, PerlIOBuf_seek, PerlIOEncode_tell, PerlIOEncode_close, @@ -485,6 +590,19 @@ PROTOTYPES: ENABLE BOOT: { + SV *sv = get_sv("PerlIO::encoding::check", GV_ADD|GV_ADDMULTI); + sv_setiv(sv,0); + PUSHMARK(sp); + PUTBACK; + if (call_pv(OUR_ENCODE_FB, G_SCALAR) != 1) { + Perl_warner(aTHX_ packWARN(WARN_IO), + "Call to %s failed!",OUR_ENCODE_FB); + } + else { + SPAGAIN; + sv_setsv(sv,POPs); + PUTBACK; + } #ifdef PERLIO_LAYERS PerlIO_define_layer(aTHX_ &PerlIO_encode); #endif |