summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ext/Encode/Encode.pm8
-rw-r--r--ext/Encode/Encode.xs50
-rw-r--r--ext/Encode/t/perlio.t2
-rw-r--r--ext/PerlIO/encoding/encoding.pm12
-rw-r--r--ext/PerlIO/encoding/encoding.xs162
5 files changed, 182 insertions, 52 deletions
diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm
index 0bf6a2489f..fb80200d2c 100644
--- a/ext/Encode/Encode.pm
+++ b/ext/Encode/Encode.pm
@@ -253,14 +253,18 @@ sub predefine_encodings{
$_[1] = '' if $chk;
return $octets;
};
- $Encode::Encoding{utf8} =
+ $Encode::Encoding{utf8} =
bless {Name => "utf8"} => "Encode::utf8";
}
}
require Encode::Encoding;
+@Encode::XS::ISA = qw(Encode::Encoding);
-eval {
+# This is very dodgy - PerlIO::encoding does "use Encode" and _BEFORE_ it gets a
+# chance to set its VERSION we potentially delete it from %INC so it will be re-loaded
+# NI-S
+eval {
require PerlIO::encoding;
unless (PerlIO::encoding->VERSION >= 0.02){
delete $INC{"PerlIO/encoding.pm"};
diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs
index c208af0e16..b898780e73 100644
--- a/ext/Encode/Encode.xs
+++ b/ext/Encode/Encode.xs
@@ -11,11 +11,11 @@
/* set 1 or more to profile. t/encoding.t dumps core because of
Perl_warner and PerlIO don't work well */
-#define ENCODE_XS_PROFILE 0
+#define ENCODE_XS_PROFILE 0
/* set 0 to disable floating point to calculate buffer size for
encode_method(). 1 is recommended. 2 restores NI-S original */
-#define ENCODE_XS_USEFP 1
+#define ENCODE_XS_USEFP 1
#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \
Perl_croak(aTHX_ "panic_unimplemented"); \
@@ -119,40 +119,40 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
}
case ENCODE_NOREP:
/* encoding */
- if (dir == enc->f_utf8) {
+ if (dir == enc->f_utf8) {
STRLEN clen;
UV ch =
- utf8n_to_uvuni(s+slen, (SvCUR(src)-slen),
+ utf8n_to_uvuni(s+slen, (SvCUR(src)-slen),
&clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY);
if (check & ENCODE_DIE_ON_ERR) {
Perl_croak(
- aTHX_ "\"\\N{U+%" UVxf "}\" does not map to %s, %d",
+ aTHX_ "\"\\N{U+%" UVxf "}\" does not map to %s, %d",
ch, enc->name[0], __LINE__);
}else{
if (check & ENCODE_RETURN_ON_ERR){
if (check & ENCODE_WARN_ON_ERR){
Perl_warner(
aTHX_ packWARN(WARN_UTF8),
- "\"\\N{U+%" UVxf "}\" does not map to %s",
+ "\"\\N{U+%" UVxf "}\" does not map to %s",
ch,enc->name[0]);
}
goto ENCODE_SET_SRC;
}else if (check & ENCODE_PERLQQ){
- SV* perlqq =
+ SV* perlqq =
sv_2mortal(newSVpvf("\\x{%04x}", ch));
sdone += slen + clen;
ddone += dlen + SvCUR(perlqq);
sv_catsv(dst, perlqq);
- } else {
+ } else {
/* fallback char */
sdone += slen + clen;
- ddone += dlen + enc->replen;
- sv_catpvn(dst, (char*)enc->rep, enc->replen);
+ ddone += dlen + enc->replen;
+ sv_catpvn(dst, (char*)enc->rep, enc->replen);
}
- }
+ }
}
/* decoding */
- else {
+ else {
if (check & ENCODE_DIE_ON_ERR){
Perl_croak(
aTHX_ "%s \"\\x%02X\" does not map to Unicode (%d)",
@@ -167,22 +167,22 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
}
goto ENCODE_SET_SRC;
}else if (check & ENCODE_PERLQQ){
- SV* perlqq =
+ SV* perlqq =
sv_2mortal(newSVpvf("\\x%02X", s[slen]));
sdone += slen + 1;
ddone += dlen + SvCUR(perlqq);
sv_catsv(dst, perlqq);
} else {
sdone += slen + 1;
- ddone += dlen + strlen(FBCHAR_UTF8);
- sv_catpv(dst, FBCHAR_UTF8);
+ ddone += dlen + strlen(FBCHAR_UTF8);
+ sv_catpv(dst, FBCHAR_UTF8);
}
}
}
/* settle variables when fallback */
d = (U8 *)SvEND(dst);
- dlen = SvLEN(dst) - ddone - 1;
- s = (U8*)SvPVX(src) + sdone;
+ dlen = SvLEN(dst) - ddone - 1;
+ s = (U8*)SvPVX(src) + sdone;
slen = tlen - sdone;
break;
@@ -205,10 +205,10 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
if (code && !(check & ENCODE_RETURN_ON_ERR)) {
return &PL_sv_undef;
}
-
+
SvCUR_set(dst, dlen+ddone);
SvPOK_only(dst);
-
+
#if ENCODE_XS_PROFILE
if (SvCUR(dst) > SvCUR(src)){
Perl_warn(aTHX_
@@ -217,7 +217,7 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
(SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0);
}
#endif
-
+
ENCODE_END:
*SvEND(dst) = '\0';
return dst;
@@ -273,7 +273,7 @@ SV * sv
CODE:
{
SV * encoding = items == 2 ? ST(1) : Nullsv;
-
+
if (encoding)
RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding));
else {
@@ -310,7 +310,7 @@ CODE:
/* Must do things the slow way */
U8 *dest;
/* We need a copy to pass to check() */
- U8 *src = (U8*)savepv((char *)s);
+ U8 *src = (U8*)savepv((char *)s);
U8 *send = s + len;
New(83, dest, len, U8); /* I think */
@@ -335,8 +335,8 @@ CODE:
/* Note change to utf8.c variable naming, for variety */
while (ulen--) {
- if ((*s & 0xc0) != 0x80){
- goto failure;
+ if ((*s & 0xc0) != 0x80){
+ goto failure;
} else {
uv = (uv << 6) | (*s++ & 0x3f);
}
@@ -422,7 +422,7 @@ CODE:
OUTPUT:
RETVAL
-int
+int
WARN_ON_ERR()
CODE:
RETVAL = ENCODE_WARN_ON_ERR;
diff --git a/ext/Encode/t/perlio.t b/ext/Encode/t/perlio.t
index 936eeb0b63..3381a12e13 100644
--- a/ext/Encode/t/perlio.t
+++ b/ext/Encode/t/perlio.t
@@ -59,7 +59,7 @@ for my $e (qw/euc-jp shiftjis 7bit-jis iso-2022-jp iso-2022-jp-1/){
# first create a file without perlio
dump2file($sfile, &encode($e, $utext, 0));
-
+
# then create a file via perlio without autoflush
SKIP:{
diff --git a/ext/PerlIO/encoding/encoding.pm b/ext/PerlIO/encoding/encoding.pm
index 9aa0e9a8b1..1d9c73f242 100644
--- a/ext/PerlIO/encoding/encoding.pm
+++ b/ext/PerlIO/encoding/encoding.pm
@@ -1,7 +1,8 @@
package PerlIO::encoding;
-our $VERSION = '0.02';
+our $VERSION = '0.03';
use XSLoader ();
-use Encode;
+use Encode (); # Load but do not import anything.
+our $check;
XSLoader::load 'PerlIO::encoding';
1;
__END__
@@ -15,6 +16,9 @@ PerlIO::encoding - encoding layer
open($f, "<:encoding(foo)", "infoo");
open($f, ">:encoding(bar)", "outbar");
+ use Encode;
+ $PerlIO::encoding::check = Encode::FB_PERLQQ();
+
=head1 DESCRIPTION
Open a filehandle with a transparent encoding filter.
@@ -24,6 +28,10 @@ character set and encoding to Perl string data (Unicode and
Perl's internal Unicode encoding, UTF-8). On output, convert
Perl string data into the specified character set and encoding.
+When the layer is pushed the current value of C<$PerlIO::encoding::check>
+is saved and used as the check argument when calling the Encodings
+encode and decode.
+
=head1 SEE ALSO
L<open>, L<Encode>, L<perlfunc/binmode>, L<perluniintro>
diff --git a/ext/PerlIO/encoding/encoding.xs b/ext/PerlIO/encoding/encoding.xs
index 23de989514..a864c8aa18 100644
--- a/ext/PerlIO/encoding/encoding.xs
+++ b/ext/PerlIO/encoding/encoding.xs
@@ -45,11 +45,16 @@ typedef struct {
SV *dataSV; /* data we have read from layer below */
SV *enc; /* the encoding object */
SV *chk; /* CHECK in Encode methods */
+ int flags; /* Flags currently just needs lines */
} PerlIOEncode;
+#define NEEDS_LINES 1
-#define ENCODE_FB_QUIET "Encode::FB_QUIET"
-
+#if 0
+#define OUR_ENCODE_FB "Encode::FB_PERLQQ"
+#else
+#define OUR_ENCODE_FB "Encode::FB_QUIET"
+#endif
SV *
PerlIOEncode_getarg(pTHX_ PerlIO * f, CLONE_PARAMS * param, int flags)
@@ -78,21 +83,12 @@ PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
dSP;
IV code;
+ SV *result = Nullsv;
code = PerlIOBuf_pushed(aTHX_ f, mode, Nullsv);
ENTER;
SAVETMPS;
PUSHMARK(sp);
- PUTBACK;
- if (call_pv(ENCODE_FB_QUIET, G_SCALAR|G_NOARGS) != 1) {
- Perl_die(aTHX_ "Call to Encode::FB_QUIET failed!");
- code = -1;
- }
- SPAGAIN;
- e->chk = newSVsv(POPs);
- PUTBACK;
-
- PUSHMARK(sp);
XPUSHs(arg);
PUTBACK;
if (call_pv("Encode::find_encoding", G_SCALAR) != 1) {
@@ -101,20 +97,52 @@ PerlIOEncode_pushed(pTHX_ PerlIO * f, const char *mode, SV * arg)
return -1;
}
SPAGAIN;
- e->enc = POPs;
+ result = POPs;
PUTBACK;
- if (!SvROK(e->enc)) {
+ if (!SvROK(result) || !SvOBJECT(SvRV(result))) {
e->enc = Nullsv;
- errno = EINVAL;
Perl_warner(aTHX_ packWARN(WARN_IO), "Cannot find encoding \"%" SVf "\"",
- arg);
+ arg);
+ errno = EINVAL;
code = -1;
}
else {
- SvREFCNT_inc(e->enc);
+#ifdef USE_NEW_SEQUENCE
+ PUSHMARK(sp);
+ XPUSHs(result);
+ PUTBACK;
+ if (call_method("new_sequence",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
+ Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support new_sequence",
+ arg);
+ }
+ else {
+ SPAGAIN;
+ result = POPs;
+ PUTBACK;
+ }
+#endif
+ e->enc = newSVsv(result);
+ PUSHMARK(sp);
+ XPUSHs(e->enc);
+ PUTBACK;
+ if (call_method("needs_lines",G_SCALAR|G_EVAL) != 1 || SvTRUE(ERRSV)) {
+ Perl_warner(aTHX_ packWARN(WARN_IO), "\"%" SVf "\" does not support needs_lines",
+ arg);
+ }
+ else {
+ SPAGAIN;
+ result = POPs;
+ PUTBACK;
+ if (SvTRUE(result)) {
+ e->flags |= NEEDS_LINES;
+ }
+ }
PerlIOBase(f)->flags |= PERLIO_F_UTF8;
}
+
+ e->chk = newSVsv(get_sv("PerlIO::encoding::check",0));
+
FREETMPS;
LEAVE;
return code;
@@ -136,6 +164,10 @@ PerlIOEncode_popped(pTHX_ PerlIO * f)
SvREFCNT_dec(e->dataSV);
e->dataSV = Nullsv;
}
+ if (e->chk) {
+ SvREFCNT_dec(e->chk);
+ e->dataSV = Nullsv;
+ }
return 0;
}
@@ -210,9 +242,9 @@ PerlIOEncode_fill(pTHX_ PerlIO * f)
avail = 0;
}
}
- if (avail > 0) {
+ if (avail > 0 || (e->flags & NEEDS_LINES)) {
STDCHAR *ptr = PerlIO_get_ptr(n);
- SSize_t use = avail;
+ SSize_t use = (avail >= 0) ? avail : 0;
SV *uni;
char *s;
STRLEN len = 0;
@@ -223,12 +255,45 @@ PerlIOEncode_fill(pTHX_ PerlIO * f)
if (SvTYPE(e->dataSV) < SVt_PV) {
sv_upgrade(e->dataSV,SVt_PV);
}
+ if (e->flags & NEEDS_LINES) {
+ /* Encoding needs whole lines (e.g. iso-2022-*)
+ search back from end of available data for
+ and line marker
+ */
+ STDCHAR *nl = ptr+use-1;
+ while (nl >= ptr) {
+ if (*nl == '\n') {
+ break;
+ }
+ nl--;
+ }
+ if (nl >= ptr && *nl == '\n') {
+ /* found a line - take up to and including that */
+ use = (nl+1)-ptr;
+ }
+ else if (avail > 0) {
+ /* No line, but not EOF - append avail to the pending data */
+ sv_catpvn(e->dataSV, ptr, use);
+ PerlIO_set_ptrcnt(n, ptr+use, 0);
+ goto retry;
+ }
+ else if (!SvCUR(e->dataSV)) {
+ goto end_of_file;
+ }
+ }
if (SvCUR(e->dataSV)) {
/* something left over from last time - create a normal
SV with new data appended
*/
if (use + SvCUR(e->dataSV) > e->base.bufsiz) {
- use = e->base.bufsiz - SvCUR(e->dataSV);
+ if (e->flags & NEEDS_LINES) {
+ /* Have to grow buffer */
+ e->base.bufsiz = use + SvCUR(e->dataSV);
+ PerlIOEncode_get_base(aTHX_ f);
+ }
+ else {
+ use = e->base.bufsiz - SvCUR(e->dataSV);
+ }
}
sv_catpvn(e->dataSV,(char*)ptr,use);
}
@@ -238,7 +303,14 @@ PerlIOEncode_fill(pTHX_ PerlIO * f)
Safefree(SvPVX(e->dataSV));
}
if (use > (SSize_t)e->base.bufsiz) {
- use = e->base.bufsiz;
+ if (e->flags & NEEDS_LINES) {
+ /* Have to grow buffer */
+ e->base.bufsiz = use;
+ PerlIOEncode_get_base(aTHX_ f);
+ }
+ else {
+ use = e->base.bufsiz;
+ }
}
SvPVX(e->dataSV) = (char *) ptr;
SvLEN(e->dataSV) = 0; /* Hands off sv.c - it isn't yours */
@@ -300,6 +372,7 @@ PerlIOEncode_fill(pTHX_ PerlIO * f)
return code;
}
else {
+ end_of_file:
if (avail == 0)
PerlIOBase(f)->flags |= PERLIO_F_EOF;
else
@@ -449,6 +522,38 @@ PerlIOEncode_dup(pTHX_ PerlIO * f, PerlIO * o,
return f;
}
+SSize_t
+PerlIOEncode_write(pTHX_ PerlIO *f, const void *vbuf, Size_t count)
+{
+ PerlIOEncode *e = PerlIOSelf(f, PerlIOEncode);
+ if (e->flags & NEEDS_LINES) {
+ SSize_t done = 0;
+ const char *ptr = (const char *) vbuf;
+ const char *end = ptr+count;
+ while (ptr < end) {
+ const char *nl = ptr;
+ while (nl < end && *nl++ != '\n') /* empty body */;
+ done = PerlIOBuf_write(aTHX_ f, ptr, nl-ptr);
+ if (done != nl-ptr) {
+ if (done > 0) {
+ ptr += done;
+ }
+ break;
+ }
+ ptr += done;
+ if (ptr[-1] == '\n') {
+ if (PerlIOEncode_flush(aTHX_ f) != 0) {
+ break;
+ }
+ }
+ }
+ return (SSize_t) (ptr - (const char *) vbuf);
+ }
+ else {
+ return PerlIOBuf_write(aTHX_ f, vbuf, count);
+ }
+}
+
PerlIO_funcs PerlIO_encode = {
"encoding",
sizeof(PerlIOEncode),
@@ -461,7 +566,7 @@ PerlIO_funcs PerlIO_encode = {
PerlIOEncode_dup,
PerlIOBuf_read,
PerlIOBuf_unread,
- PerlIOBuf_write,
+ PerlIOEncode_write,
PerlIOBuf_seek,
PerlIOEncode_tell,
PerlIOEncode_close,
@@ -485,6 +590,19 @@ PROTOTYPES: ENABLE
BOOT:
{
+ SV *sv = get_sv("PerlIO::encoding::check", GV_ADD|GV_ADDMULTI);
+ sv_setiv(sv,0);
+ PUSHMARK(sp);
+ PUTBACK;
+ if (call_pv(OUR_ENCODE_FB, G_SCALAR) != 1) {
+ Perl_warner(aTHX_ packWARN(WARN_IO),
+ "Call to %s failed!",OUR_ENCODE_FB);
+ }
+ else {
+ SPAGAIN;
+ sv_setsv(sv,POPs);
+ PUTBACK;
+ }
#ifdef PERLIO_LAYERS
PerlIO_define_layer(aTHX_ &PerlIO_encode);
#endif