summaryrefslogtreecommitdiff
path: root/cpan/Encode
diff options
context:
space:
mode:
authorSteve Hay <steve.m.hay@googlemail.com>2016-12-06 08:41:46 +0000
committerSteve Hay <steve.m.hay@googlemail.com>2016-12-06 08:41:46 +0000
commit15f5e486022b574631307e6a27ca1b961591e561 (patch)
treed14808feabe2cd097bd8b331234338c37d1d16cd /cpan/Encode
parent5aa240eab7dbaa91f98c2fee1f04b6c0b5a9b9e3 (diff)
downloadperl-15f5e486022b574631307e6a27ca1b961591e561.tar.gz
Upgrade Encode from version 2.86 to 2.88
(Unicode.pm is customized for a version-bump only, to silence t/porting/cmp_version.t since Unicode.xs has changed.)
Diffstat (limited to 'cpan/Encode')
-rw-r--r--cpan/Encode/Encode.pm31
-rw-r--r--cpan/Encode/Encode.xs519
-rw-r--r--cpan/Encode/Encode/_T.e2x6
-rw-r--r--cpan/Encode/Makefile.PL13
-rw-r--r--cpan/Encode/Unicode/Makefile.PL2
-rw-r--r--cpan/Encode/Unicode/Unicode.pm2
-rw-r--r--cpan/Encode/Unicode/Unicode.xs110
-rw-r--r--cpan/Encode/bin/enc2xs58
-rw-r--r--cpan/Encode/encoding.pm4
-rw-r--r--cpan/Encode/lib/Encode/Alias.pm14
-rw-r--r--cpan/Encode/lib/Encode/CN/HZ.pm5
-rw-r--r--cpan/Encode/lib/Encode/MIME/Header.pm471
-rw-r--r--cpan/Encode/lib/Encode/MIME/Name.pm14
-rw-r--r--cpan/Encode/t/Aliases.t2
-rw-r--r--cpan/Encode/t/Encode.t54
-rw-r--r--cpan/Encode/t/at-cn.t4
-rw-r--r--cpan/Encode/t/at-tw.t4
-rw-r--r--cpan/Encode/t/decode.t56
-rw-r--r--cpan/Encode/t/enc_data.t8
-rw-r--r--cpan/Encode/t/enc_eucjp.t2
-rw-r--r--cpan/Encode/t/enc_module.t8
-rw-r--r--cpan/Encode/t/enc_utf8.t2
-rw-r--r--cpan/Encode/t/encoding-locale.t2
-rw-r--r--cpan/Encode/t/encoding.t6
-rw-r--r--cpan/Encode/t/fallback.t2
-rw-r--r--cpan/Encode/t/jperl.t6
-rw-r--r--cpan/Encode/t/magic.t144
-rw-r--r--cpan/Encode/t/mime-header.t215
-rw-r--r--cpan/Encode/t/mime-name.t34
-rw-r--r--cpan/Encode/t/rt113164.t38
-rw-r--r--cpan/Encode/t/rt65541.t29
-rw-r--r--cpan/Encode/t/rt76824.t60
-rw-r--r--cpan/Encode/t/rt85489.t48
-rw-r--r--cpan/Encode/t/rt86327.t33
-rw-r--r--cpan/Encode/t/taint.t28
-rw-r--r--cpan/Encode/t/utf8ref.t21
-rw-r--r--cpan/Encode/t/utf8strict.t51
37 files changed, 1559 insertions, 547 deletions
diff --git a/cpan/Encode/Encode.pm b/cpan/Encode/Encode.pm
index bda8e1b316..57b4292279 100644
--- a/cpan/Encode/Encode.pm
+++ b/cpan/Encode/Encode.pm
@@ -1,10 +1,10 @@
#
-# $Id: Encode.pm,v 2.86 2016/08/10 18:08:01 dankogai Exp $
+# $Id: Encode.pm,v 2.88 2016/11/29 23:30:30 dankogai Exp dankogai $
#
package Encode;
use strict;
use warnings;
-our $VERSION = sprintf "%d.%02d", q$Revision: 2.86 $ =~ /(\d+)/g;
+our $VERSION = sprintf "%d.%02d", q$Revision: 2.88 $ =~ /(\d+)/g;
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
use XSLoader ();
XSLoader::load( __PACKAGE__, $VERSION );
@@ -15,7 +15,7 @@ use Exporter 5.57 'import';
our @EXPORT = qw(
decode decode_utf8 encode encode_utf8 str2bytes bytes2str
- encodings find_encoding clone_encoding
+ encodings find_encoding find_mime_encoding clone_encoding
);
our @FB_FLAGS = qw(
DIE_ON_ERR WARN_ON_ERR RETURN_ON_ERR LEAVE_SRC
@@ -102,6 +102,8 @@ sub define_encoding {
sub getEncoding {
my ( $class, $name, $skip_external ) = @_;
+ defined($name) or return;
+
$name =~ s/\s+//g; # https://rt.cpan.org/Ticket/Display.html?id=65796
ref($name) && $name->can('renew') and return $name;
@@ -130,6 +132,14 @@ sub find_encoding($;$) {
return __PACKAGE__->getEncoding( $name, $skip_external );
}
+sub find_mime_encoding($;$) {
+ my ( $mime_name, $skip_external ) = @_;
+ eval { require Encode::MIME::Name; };
+ $@ and return;
+ my $name = Encode::MIME::Name::get_encode_name( $mime_name );
+ return find_encoding( $name, $skip_external );
+}
+
sub resolve_alias($) {
my $obj = find_encoding(shift);
defined $obj and return $obj->name;
@@ -254,6 +264,7 @@ sub from_to($$$;$) {
sub encode_utf8($) {
my ($str) = @_;
+ return undef unless defined $str;
utf8::encode($str);
return $str;
}
@@ -576,6 +587,20 @@ name of the encoding object.
See L<Encode::Encoding> for details.
+=head3 find_mime_encoding
+
+ [$obj =] find_mime_encoding(MIME_ENCODING)
+
+Returns the I<encoding object> corresponding to I<MIME_ENCODING>. Acts
+same as C<find_encoding()> but C<mime_name()> of returned object must
+match to I<MIME_ENCODING>. So as opposite of C<find_encoding()>
+canonical names and aliases are not used when searching for object.
+
+ find_mime_encoding("utf8"); # returns undef because "utf8" is not valid I<MIME_ENCODING>
+ find_mime_encoding("utf-8"); # returns encode object "utf-8-strict"
+ find_mime_encoding("UTF-8"); # same as "utf-8" because I<MIME_ENCODING> is case insensitive
+ find_mime_encoding("utf-8-strict"); returns undef because "utf-8-strict" is not valid I<MIME_ENCODING>
+
=head3 from_to
[$length =] from_to($octets, FROM_ENC, TO_ENC [, CHECK])
diff --git a/cpan/Encode/Encode.xs b/cpan/Encode/Encode.xs
index 222f39b2ea..b5160d2516 100644
--- a/cpan/Encode/Encode.xs
+++ b/cpan/Encode/Encode.xs
@@ -1,5 +1,5 @@
/*
- $Id: Encode.xs,v 2.37 2016/08/10 18:08:45 dankogai Exp dankogai $
+ $Id: Encode.xs,v 2.39 2016/11/29 23:29:23 dankogai Exp dankogai $
*/
#define PERL_NO_GET_CONTEXT
@@ -31,6 +31,10 @@
UNIMPLEMENTED(_encoded_utf8_to_bytes, I32)
UNIMPLEMENTED(_encoded_bytes_to_utf8, I32)
+#ifndef SvIV_nomg
+#define SvIV_nomg SvIV
+#endif
+
#ifdef UTF8_DISALLOW_ILLEGAL_INTERCHANGE
# define UTF8_ALLOW_STRICT UTF8_DISALLOW_ILLEGAL_INTERCHANGE
#else
@@ -76,6 +80,37 @@ call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig)
PERL_UNUSED_VAR(orig);
}
+static void
+utf8_safe_downgrade(pTHX_ SV ** src, U8 ** s, STRLEN * slen, bool modify)
+{
+ if (!modify) {
+ SV *tmp = sv_2mortal(newSVpvn((char *)*s, *slen));
+ SvUTF8_on(tmp);
+ if (SvTAINTED(*src))
+ SvTAINTED_on(tmp);
+ *src = tmp;
+ *s = (U8 *)SvPVX(*src);
+ }
+ if (*slen) {
+ if (!utf8_to_bytes(*s, slen))
+ croak("Wide character");
+ SvCUR_set(*src, *slen);
+ }
+ SvUTF8_off(*src);
+}
+
+static void
+utf8_safe_upgrade(pTHX_ SV ** src, U8 ** s, STRLEN * slen, bool modify)
+{
+ if (!modify) {
+ SV *tmp = sv_2mortal(newSVpvn((char *)*s, *slen));
+ if (SvTAINTED(*src))
+ SvTAINTED_on(tmp);
+ *src = tmp;
+ }
+ sv_utf8_upgrade_nomg(*src);
+ *s = (U8 *)SvPV_nomg(*src, *slen);
+}
#define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s"
#define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode"
@@ -104,18 +139,16 @@ do_fallback_cb(pTHX_ UV ch, SV *fallback_cb)
}
static SV *
-encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src,
+encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, U8 * s, STRLEN slen,
int check, STRLEN * offset, SV * term, int * retcode,
SV *fallback_cb)
{
- STRLEN slen;
- U8 *s = (U8 *) SvPV(src, slen);
STRLEN tlen = slen;
STRLEN ddone = 0;
STRLEN sdone = 0;
/* We allocate slen+1.
PerlIO dumps core if this value is smaller than this. */
- SV *dst = sv_2mortal(newSV(slen+1));
+ SV *dst = newSV(slen+1);
U8 *d = (U8 *)SvPVX(dst);
STRLEN dlen = SvLEN(dst)-1;
int code = 0;
@@ -191,10 +224,10 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src,
if (dir == enc->f_utf8) {
STRLEN clen;
UV ch =
- utf8n_to_uvuni(s+slen, (SvCUR(src)-slen),
+ utf8n_to_uvuni(s+slen, (tlen-sdone-slen),
&clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY);
/* if non-representable multibyte prefix at end of current buffer - break*/
- if (clen > tlen - sdone) break;
+ if (clen > tlen - sdone - slen) break;
if (check & ENCODE_DIE_ON_ERR) {
Perl_croak(aTHX_ ERR_ENCODE_NOMAP,
(UV)ch, enc->name[0]);
@@ -211,7 +244,7 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src,
SV* subchar =
(fallback_cb != &PL_sv_undef)
? do_fallback_cb(aTHX_ ch, fallback_cb)
- : newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04"UVxf"}" :
+ : newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04" UVxf "}" :
check & ENCODE_HTMLCREF ? "&#%" UVuf ";" :
"&#x%" UVxf ";", (UV)ch);
SvUTF8_off(subchar); /* make sure no decoded string gets in */
@@ -279,6 +312,7 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src,
sv_setpvn(src, (char*)s+slen, sdone);
}
SvCUR_set(src, sdone);
+ SvSETMAGIC(src);
}
/* warn("check = 0x%X, code = 0x%d\n", check, code); */
@@ -318,6 +352,62 @@ strict_utf8(pTHX_ SV* sv)
return SvTRUE(*svp);
}
+/*
+ * https://github.com/dankogai/p5-encode/pull/56#issuecomment-231959126
+ */
+#ifndef UNICODE_IS_NONCHAR
+#define UNICODE_IS_NONCHAR(c) ((c >= 0xFDD0 && c <= 0xFDEF) || (c & 0xFFFE) == 0xFFFE)
+#endif
+
+#ifndef UNICODE_IS_SUPER
+#define UNICODE_IS_SUPER(c) (c > PERL_UNICODE_MAX)
+#endif
+
+#define UNICODE_IS_STRICT(c) (!UNICODE_IS_SURROGATE(c) && !UNICODE_IS_NONCHAR(c) && !UNICODE_IS_SUPER(c))
+
+#ifndef UTF_ACCUMULATION_OVERFLOW_MASK
+#ifndef CHARBITS
+#define CHARBITS CHAR_BIT
+#endif
+#define UTF_ACCUMULATION_OVERFLOW_MASK (((UV) UTF_CONTINUATION_MASK) << ((sizeof(UV) * CHARBITS) - UTF_ACCUMULATION_SHIFT))
+#endif
+
+/*
+ * Convert non strict utf8 sequence of len >= 2 to unicode codepoint
+ */
+static UV
+convert_utf8_multi_seq(U8* s, STRLEN len, STRLEN *rlen)
+{
+ UV uv;
+ U8 *ptr = s;
+ bool overflowed = 0;
+
+ uv = NATIVE_TO_UTF(*s) & UTF_START_MASK(len);
+
+ len--;
+ s++;
+
+ while (len--) {
+ if (!UTF8_IS_CONTINUATION(*s)) {
+ *rlen = s-ptr;
+ return 0;
+ }
+ if (uv & UTF_ACCUMULATION_OVERFLOW_MASK)
+ overflowed = 1;
+ uv = UTF8_ACCUMULATE(uv, *s);
+ s++;
+ }
+
+ *rlen = s-ptr;
+
+ if (overflowed || *rlen > (STRLEN)UNISKIP(uv)) {
+ *rlen = 1;
+ return 0;
+ }
+
+ return uv;
+}
+
static U8*
process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
bool encode, bool strict, bool stop_at_partial)
@@ -336,7 +426,7 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
}
else {
fallback_cb = &PL_sv_undef;
- check = SvIV(check_sv);
+ check = SvIV_nomg(check_sv);
}
SvPOK_only(dst);
@@ -351,39 +441,30 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
continue;
}
+ ulen = 1;
if (UTF8_IS_START(*s)) {
U8 skip = UTF8SKIP(s);
if ((s + skip) > e) {
if (stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL)) {
const U8 *p = s + 1;
for (; p < e; p++) {
- if (!UTF8_IS_CONTINUATION(*p))
+ if (!UTF8_IS_CONTINUATION(*p)) {
+ ulen = p-s;
goto malformed_byte;
+ }
}
break;
}
+ ulen = e-s;
goto malformed_byte;
}
- uv = utf8n_to_uvuni(s, e - s, &ulen,
- UTF8_CHECK_ONLY | (strict ? UTF8_ALLOW_STRICT :
- UTF8_ALLOW_NONSTRICT)
- );
-#if 1 /* perl-5.8.6 and older do not check UTF8_ALLOW_LONG */
- if (strict && uv > PERL_UNICODE_MAX)
- ulen = (STRLEN) -1;
-#endif
- if (ulen == (STRLEN) -1) {
- if (strict) {
- uv = utf8n_to_uvuni(s, e - s, &ulen,
- UTF8_CHECK_ONLY | UTF8_ALLOW_NONSTRICT);
- if (ulen == (STRLEN) -1)
- goto malformed_byte;
- goto malformed;
- }
+ uv = convert_utf8_multi_seq(s, skip, &ulen);
+ if (uv == 0)
goto malformed_byte;
- }
+ else if (strict && !UNICODE_IS_STRICT(uv))
+ goto malformed;
/* Whole char is good */
@@ -396,7 +477,8 @@ process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv,
/* If we get here there is something wrong with alleged UTF-8 */
malformed_byte:
uv = (UV)*s;
- ulen = 1;
+ if (ulen == 0)
+ ulen = 1;
malformed:
if (check & ENCODE_DIE_ON_ERR){
@@ -456,10 +538,6 @@ MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_
PROTOTYPES: DISABLE
-#ifndef SvIsCOW
-# define SvIsCOW(sv) (SvREADONLY(sv) && SvFAKE(sv))
-#endif
-
void
Method_decode_xs(obj,src,check_sv = &PL_sv_no)
SV * obj
@@ -472,23 +550,26 @@ PREINIT:
SV *dst;
bool renewed = 0;
int check;
+ bool modify;
+INIT:
+ SvGETMAGIC(src);
+ SvGETMAGIC(check_sv);
+ check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv);
+ modify = (check && !(check & ENCODE_LEAVE_SRC));
CODE:
{
- dSP; ENTER; SAVETMPS;
- if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0));
- check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv);
- if (!(check & ENCODE_LEAVE_SRC) && SvIsCOW(src)) {
- /*
- * disassociate from any other scalars before doing
- * in-place modifications
- */
- sv_force_normal(src);
- }
- s = (U8 *) SvPV(src, slen);
- e = (U8 *) SvEND(src);
+ dSP;
+ if (!SvOK(src))
+ XSRETURN_UNDEF;
+ s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen);
+ if (SvUTF8(src))
+ utf8_safe_downgrade(aTHX_ &src, &s, &slen, modify);
+ e = s+slen;
+
/*
* PerlIO check -- we assume the object is of PerlIO if renewed
*/
+ ENTER; SAVETMPS;
PUSHMARK(sp);
XPUSHs(obj);
PUTBACK;
@@ -503,28 +584,17 @@ CODE:
FREETMPS; LEAVE;
/* end PerlIO check */
- if (SvUTF8(src)) {
- s = utf8_to_bytes(s,&slen);
- if (s) {
- SvCUR_set(src,slen);
- SvUTF8_off(src);
- e = s+slen;
- }
- else {
- croak("Cannot decode string with wide characters");
- }
- }
-
dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */
s = process_utf8(aTHX_ dst, s, e, check_sv, 0, strict_utf8(aTHX_ obj), renewed);
/* Clear out translated part of source unless asked not to */
- if (check && !(check & ENCODE_LEAVE_SRC)){
+ if (modify) {
slen = e-s;
if (slen) {
sv_setpvn(src, (char*)s, slen);
}
SvCUR_set(src, slen);
+ SvSETMAGIC(src);
}
SvUTF8_on(dst);
if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */
@@ -543,12 +613,18 @@ PREINIT:
U8 *e;
SV *dst;
int check;
+ bool modify;
+INIT:
+ SvGETMAGIC(src);
+ SvGETMAGIC(check_sv);
+ check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv);
+ modify = (check && !(check & ENCODE_LEAVE_SRC));
CODE:
{
- check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv);
- if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0));
- s = (U8 *) SvPV(src, slen);
- e = (U8 *) SvEND(src);
+ if (!SvOK(src))
+ XSRETURN_UNDEF;
+ s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen);
+ e = s+slen;
dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */
if (SvUTF8(src)) {
/* Already encoded */
@@ -584,12 +660,13 @@ CODE:
}
/* Clear out translated part of source unless asked not to */
- if (check && !(check & ENCODE_LEAVE_SRC)){
+ if (modify) {
slen = e-s;
if (slen) {
sv_setpvn(src, (char*)s, slen);
}
SvCUR_set(src, slen);
+ SvSETMAGIC(src);
}
SvPOK_only(dst);
SvUTF8_off(dst);
@@ -638,24 +715,35 @@ SV * src
SV * off
SV * term
SV * check_sv
-CODE:
-{
+PREINIT:
int check;
- SV *fallback_cb = &PL_sv_undef;
- encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
- STRLEN offset = (STRLEN)SvIV(off);
+ SV *fallback_cb;
+ bool modify;
+ encode_t *enc;
+ STRLEN offset;
int code = 0;
- if (SvUTF8(src)) {
- sv_utf8_downgrade(src, FALSE);
- }
- if (SvROK(check_sv)){
- fallback_cb = check_sv;
- check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */
- }else{
- check = SvIV(check_sv);
- }
- sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, check,
- &offset, term, &code, fallback_cb));
+ U8 *s;
+ STRLEN slen;
+ SV *tmp;
+INIT:
+ SvGETMAGIC(src);
+ SvGETMAGIC(check_sv);
+ check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv);
+ fallback_cb = SvROK(check_sv) ? check_sv : &PL_sv_undef;
+ modify = (check && !(check & ENCODE_LEAVE_SRC));
+ enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
+ offset = (STRLEN)SvIV(off);
+CODE:
+{
+ if (!SvOK(src))
+ XSRETURN_NO;
+ s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen);
+ if (SvUTF8(src))
+ utf8_safe_downgrade(aTHX_ &src, &s, &slen, modify);
+ tmp = encode_method(aTHX_ enc, enc->t_utf8, src, s, slen, check,
+ &offset, term, &code, fallback_cb);
+ sv_catsv(dst, tmp);
+ SvREFCNT_dec(tmp);
SvIV_set(off, (IV)offset);
if (code == ENCODE_FOUND_TERM) {
ST(0) = &PL_sv_yes;
@@ -665,79 +753,70 @@ CODE:
XSRETURN(1);
}
-void
+SV *
Method_decode(obj,src,check_sv = &PL_sv_no)
SV * obj
SV * src
SV * check_sv
+PREINIT:
+ int check;
+ SV *fallback_cb;
+ bool modify;
+ encode_t *enc;
+ U8 *s;
+ STRLEN slen;
+INIT:
+ SvGETMAGIC(src);
+ SvGETMAGIC(check_sv);
+ check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv);
+ fallback_cb = SvROK(check_sv) ? check_sv : &PL_sv_undef;
+ modify = (check && !(check & ENCODE_LEAVE_SRC));
+ enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
CODE:
{
- int check;
- SV *fallback_cb = &PL_sv_undef;
- encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
- if (SvREADONLY(src) || SvSMAGICAL(src) || SvGMAGICAL(src) || !SvPOK(src)) {
- SV *tmp;
- tmp = sv_newmortal();
- sv_copypv(tmp, src);
- src = tmp;
- }
- if (SvUTF8(src)) {
- sv_utf8_downgrade(src, FALSE);
- }
- if (SvROK(check_sv)){
- fallback_cb = check_sv;
- check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */
- }else{
- check = SvIV(check_sv);
- }
- ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check,
+ if (!SvOK(src))
+ XSRETURN_UNDEF;
+ s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen);
+ if (SvUTF8(src))
+ utf8_safe_downgrade(aTHX_ &src, &s, &slen, modify);
+ RETVAL = encode_method(aTHX_ enc, enc->t_utf8, src, s, slen, check,
NULL, Nullsv, NULL, fallback_cb);
- SvUTF8_on(ST(0));
- XSRETURN(1);
+ SvUTF8_on(RETVAL);
}
+OUTPUT:
+ RETVAL
-
-#ifndef SvPV_force_nolen
-# define SvPV_force_nolen(sv) SvPV_force_flags_nolen(sv, SV_GMAGIC)
-#endif
-
-#ifndef SvPV_force_flags_nolen
-# define SvPV_force_flags_nolen(sv, flags) \
- ((SvFLAGS(sv) & (SVf_POK|SVf_THINKFIRST)) == SVf_POK \
- ? SvPVX(sv) : sv_pvn_force_flags(sv, &PL_na, flags))
-#endif
-
-void
+SV *
Method_encode(obj,src,check_sv = &PL_sv_no)
SV * obj
SV * src
SV * check_sv
+PREINIT:
+ int check;
+ SV *fallback_cb;
+ bool modify;
+ encode_t *enc;
+ U8 *s;
+ STRLEN slen;
+INIT:
+ SvGETMAGIC(src);
+ SvGETMAGIC(check_sv);
+ check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV_nomg(check_sv);
+ fallback_cb = SvROK(check_sv) ? check_sv : &PL_sv_undef;
+ modify = (check && !(check & ENCODE_LEAVE_SRC));
+ enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
CODE:
{
- int check;
- SV *fallback_cb = &PL_sv_undef;
- encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
- if (SvREADONLY(src) || SvSMAGICAL(src) || SvGMAGICAL(src) || !SvPOK(src)) {
- /*
- SV *tmp;
- tmp = sv_newmortal();
- sv_copypv(tmp, src);
- src = tmp;
- */
- src = sv_mortalcopy(src);
- SvPV_force_nolen(src);
- }
- sv_utf8_upgrade(src);
- if (SvROK(check_sv)){
- fallback_cb = check_sv;
- check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */
- }else{
- check = SvIV(check_sv);
- }
- ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check,
+ if (!SvOK(src))
+ XSRETURN_UNDEF;
+ s = modify ? (U8 *)SvPV_force_nomg(src, slen) : (U8 *)SvPV_nomg(src, slen);
+ if (!SvUTF8(src))
+ utf8_safe_upgrade(aTHX_ &src, &s, &slen, modify);
+ RETVAL = encode_method(aTHX_ enc, enc->f_utf8, src, s, slen, check,
NULL, Nullsv, NULL, fallback_cb);
- XSRETURN(1);
}
+OUTPUT:
+ RETVAL
void
Method_needs_lines(obj)
@@ -753,6 +832,8 @@ CODE:
void
Method_perlio_ok(obj)
SV * obj
+PREINIT:
+ SV *sv;
CODE:
{
/* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */
@@ -762,7 +843,8 @@ CODE:
eval_pv("require PerlIO::encoding", 0);
SPAGAIN;
- if (SvTRUE(get_sv("@", 0))) {
+ sv = get_sv("@", 0);
+ if (SvTRUE(sv)) {
ST(0) = &PL_sv_no;
}else{
ST(0) = &PL_sv_yes;
@@ -773,6 +855,8 @@ CODE:
void
Method_mime_name(obj)
SV * obj
+PREINIT:
+ SV *sv;
CODE:
{
encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj)));
@@ -780,7 +864,8 @@ CODE:
eval_pv("require Encode::MIME::Name", 0);
SPAGAIN;
- if (SvTRUE(get_sv("@", 0))) {
+ sv = get_sv("@", 0);
+ if (SvTRUE(sv)) {
ST(0) = &PL_sv_undef;
}else{
ENTER;
@@ -903,17 +988,16 @@ bool
is_utf8(sv, check = 0)
SV * sv
int check
+PREINIT:
+ char *str;
+ STRLEN len;
CODE:
{
- if (SvGMAGICAL(sv)) /* it could be $1, for example */
- sv = newSVsv(sv); /* GMAGIG will be done */
+ SvGETMAGIC(sv); /* SvGETMAGIC() can modify SvOK flag */
+ str = SvOK(sv) ? SvPV_nomg(sv, len) : NULL; /* SvPV() can modify SvUTF8 flag */
RETVAL = SvUTF8(sv) ? TRUE : FALSE;
- if (RETVAL &&
- check &&
- !is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
+ if (RETVAL && check && (!str || !is_utf8_string((U8 *)str, len)))
RETVAL = FALSE;
- if (sv != ST(0))
- SvREFCNT_dec(sv); /* it was a temp copy */
}
OUTPUT:
RETVAL
@@ -923,13 +1007,14 @@ _utf8_on(sv)
SV * sv
CODE:
{
- if (SvPOK(sv)) {
- SV *rsv = newSViv(SvUTF8(sv));
- RETVAL = rsv;
- if (SvIsCOW(sv)) sv_force_normal(sv);
- SvUTF8_on(sv);
+ SvGETMAGIC(sv);
+ if (!SvTAINTED(sv) && SvPOKp(sv)) {
+ if (SvTHINKFIRST(sv)) sv_force_normal(sv);
+ RETVAL = newSViv(SvUTF8(sv));
+ SvUTF8_on(sv);
+ SvSETMAGIC(sv);
} else {
- RETVAL = &PL_sv_undef;
+ RETVAL = &PL_sv_undef;
}
}
OUTPUT:
@@ -940,124 +1025,38 @@ _utf8_off(sv)
SV * sv
CODE:
{
- if (SvPOK(sv)) {
- SV *rsv = newSViv(SvUTF8(sv));
- RETVAL = rsv;
- if (SvIsCOW(sv)) sv_force_normal(sv);
- SvUTF8_off(sv);
+ SvGETMAGIC(sv);
+ if (!SvTAINTED(sv) && SvPOKp(sv)) {
+ if (SvTHINKFIRST(sv)) sv_force_normal(sv);
+ RETVAL = newSViv(SvUTF8(sv));
+ SvUTF8_off(sv);
+ SvSETMAGIC(sv);
} else {
- RETVAL = &PL_sv_undef;
+ RETVAL = &PL_sv_undef;
}
}
OUTPUT:
RETVAL
-int
-DIE_ON_ERR()
-CODE:
- RETVAL = ENCODE_DIE_ON_ERR;
-OUTPUT:
- RETVAL
-
-int
-WARN_ON_ERR()
-CODE:
- RETVAL = ENCODE_WARN_ON_ERR;
-OUTPUT:
- RETVAL
-
-int
-LEAVE_SRC()
-CODE:
- RETVAL = ENCODE_LEAVE_SRC;
-OUTPUT:
- RETVAL
-
-int
-RETURN_ON_ERR()
-CODE:
- RETVAL = ENCODE_RETURN_ON_ERR;
-OUTPUT:
- RETVAL
-
-int
-PERLQQ()
-CODE:
- RETVAL = ENCODE_PERLQQ;
-OUTPUT:
- RETVAL
-
-int
-HTMLCREF()
-CODE:
- RETVAL = ENCODE_HTMLCREF;
-OUTPUT:
- RETVAL
-
-int
-XMLCREF()
-CODE:
- RETVAL = ENCODE_XMLCREF;
-OUTPUT:
- RETVAL
-
-int
-STOP_AT_PARTIAL()
-CODE:
- RETVAL = ENCODE_STOP_AT_PARTIAL;
-OUTPUT:
- RETVAL
-
-int
-FB_DEFAULT()
-CODE:
- RETVAL = ENCODE_FB_DEFAULT;
-OUTPUT:
- RETVAL
-
-int
-FB_CROAK()
-CODE:
- RETVAL = ENCODE_FB_CROAK;
-OUTPUT:
- RETVAL
-
-int
-FB_QUIET()
-CODE:
- RETVAL = ENCODE_FB_QUIET;
-OUTPUT:
- RETVAL
-
-int
-FB_WARN()
-CODE:
- RETVAL = ENCODE_FB_WARN;
-OUTPUT:
- RETVAL
-
-int
-FB_PERLQQ()
-CODE:
- RETVAL = ENCODE_FB_PERLQQ;
-OUTPUT:
- RETVAL
-
-int
-FB_HTMLCREF()
-CODE:
- RETVAL = ENCODE_FB_HTMLCREF;
-OUTPUT:
- RETVAL
-
-int
-FB_XMLCREF()
-CODE:
- RETVAL = ENCODE_FB_XMLCREF;
-OUTPUT:
- RETVAL
-
BOOT:
{
+ HV *stash = gv_stashpvn("Encode", strlen("Encode"), GV_ADD);
+ newCONSTSUB(stash, "DIE_ON_ERR", newSViv(ENCODE_DIE_ON_ERR));
+ newCONSTSUB(stash, "WARN_ON_ERR", newSViv(ENCODE_WARN_ON_ERR));
+ newCONSTSUB(stash, "RETURN_ON_ERR", newSViv(ENCODE_RETURN_ON_ERR));
+ newCONSTSUB(stash, "LEAVE_SRC", newSViv(ENCODE_LEAVE_SRC));
+ newCONSTSUB(stash, "PERLQQ", newSViv(ENCODE_PERLQQ));
+ newCONSTSUB(stash, "HTMLCREF", newSViv(ENCODE_HTMLCREF));
+ newCONSTSUB(stash, "XMLCREF", newSViv(ENCODE_XMLCREF));
+ newCONSTSUB(stash, "STOP_AT_PARTIAL", newSViv(ENCODE_STOP_AT_PARTIAL));
+ newCONSTSUB(stash, "FB_DEFAULT", newSViv(ENCODE_FB_DEFAULT));
+ newCONSTSUB(stash, "FB_CROAK", newSViv(ENCODE_FB_CROAK));
+ newCONSTSUB(stash, "FB_QUIET", newSViv(ENCODE_FB_QUIET));
+ newCONSTSUB(stash, "FB_WARN", newSViv(ENCODE_FB_WARN));
+ newCONSTSUB(stash, "FB_PERLQQ", newSViv(ENCODE_FB_PERLQQ));
+ newCONSTSUB(stash, "FB_HTMLCREF", newSViv(ENCODE_FB_HTMLCREF));
+ newCONSTSUB(stash, "FB_XMLCREF", newSViv(ENCODE_FB_XMLCREF));
+}
+{
#include "def_t.exh"
}
diff --git a/cpan/Encode/Encode/_T.e2x b/cpan/Encode/Encode/_T.e2x
index 6cf5f293d5..7b9a67e43d 100644
--- a/cpan/Encode/Encode/_T.e2x
+++ b/cpan/Encode/Encode/_T.e2x
@@ -2,6 +2,8 @@ use strict;
# Adjust the number here!
use Test::More tests => 2;
-use_ok('Encode');
-use_ok('Encode::$_Name_');
+BEGIN {
+ use_ok('Encode');
+ use_ok('Encode::$_Name_');
+}
# Add more test here!
diff --git a/cpan/Encode/Makefile.PL b/cpan/Encode/Makefile.PL
index c87153bbb3..8203105247 100644
--- a/cpan/Encode/Makefile.PL
+++ b/cpan/Encode/Makefile.PL
@@ -1,16 +1,26 @@
#
-# $Id: Makefile.PL,v 2.17 2016/08/04 03:15:58 dankogai Exp $
+# $Id: Makefile.PL,v 2.18 2016/11/29 23:29:23 dankogai Exp dankogai $
#
use 5.007003;
use strict;
use warnings;
use ExtUtils::MakeMaker;
use File::Spec;
+use Config;
# Just for sure :)
my %ARGV = map { my @r = split /=/,$_; defined $r[1] or $r[1]=1; @r } @ARGV;
$ARGV{DEBUG} and warn "$_ => $ARGV{$_}\n" for sort keys %ARGV;
$ENV{PERL_CORE} ||= $ARGV{PERL_CORE} if $ARGV{PERL_CORE};
+# similar strictness as in core
+my $ccflags = $Config{ccflags};
+if (!$ENV{PERL_CORE}) {
+ if ($Config{gccversion}) {
+ $ccflags .= ' -Werror=declaration-after-statement';
+ $ccflags .= ' -Wpointer-sign' unless $Config{d_cplusplus};
+ $ccflags .= ' -fpermissive' if $Config{d_cplusplus};
+ }
+}
my %tables =
(
@@ -45,6 +55,7 @@ WriteMakefile(
SUFFIX => 'gz',
DIST_DEFAULT => 'all tardist',
},
+ CCFLAGS => $ccflags,
INC => '-I' . File::Spec->catfile( '.', 'Encode' ),
LICENSE => 'perl',
PREREQ_PM => {
diff --git a/cpan/Encode/Unicode/Makefile.PL b/cpan/Encode/Unicode/Makefile.PL
index ce48b7aace..b28d16bb96 100644
--- a/cpan/Encode/Unicode/Makefile.PL
+++ b/cpan/Encode/Unicode/Makefile.PL
@@ -3,7 +3,7 @@ use strict;
use ExtUtils::MakeMaker;
WriteMakefile(
- INC => "-I../Encode",
+ INC => "-I../Encode",
NAME => 'Encode::Unicode',
VERSION_FROM => "Unicode.pm",
MAN3PODS => {},
diff --git a/cpan/Encode/Unicode/Unicode.pm b/cpan/Encode/Unicode/Unicode.pm
index 7dec3e3815..fc1d3d1382 100644
--- a/cpan/Encode/Unicode/Unicode.pm
+++ b/cpan/Encode/Unicode/Unicode.pm
@@ -4,7 +4,7 @@ use strict;
use warnings;
no warnings 'redefine';
-our $VERSION = do { my @r = ( q$Revision: 2.15 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.15_01 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use XSLoader;
XSLoader::load( __PACKAGE__, $VERSION );
diff --git a/cpan/Encode/Unicode/Unicode.xs b/cpan/Encode/Unicode/Unicode.xs
index 3bad2adae0..117e14d83f 100644
--- a/cpan/Encode/Unicode/Unicode.xs
+++ b/cpan/Encode/Unicode/Unicode.xs
@@ -1,5 +1,5 @@
/*
- $Id: Unicode.xs,v 2.14 2016/01/22 06:33:07 dankogai Exp $
+ $Id: Unicode.xs,v 2.15 2016/11/29 23:29:23 dankogai Exp dankogai $
*/
#define PERL_NO_GET_CONTEXT
@@ -125,8 +125,6 @@ PROTOTYPES: DISABLE
#define attr(k, l) (hv_exists((HV *)SvRV(obj),k,l) ? \
*hv_fetch((HV *)SvRV(obj),k,l,0) : &PL_sv_undef)
-#define attr_true(k, l) (hv_exists((HV *)SvRV(obj),k,l) ? \
- SvTRUE(*hv_fetch((HV *)SvRV(obj),k,l,0)) : FALSE)
void
decode_xs(obj, str, check = 0)
@@ -135,26 +133,54 @@ SV * str
IV check
CODE:
{
- U8 endian = *((U8 *)SvPV_nolen(attr("endian", 6)));
- int size = SvIV(attr("size", 4));
+ SV *sve = attr("endian", 6);
+ U8 endian = *((U8 *)SvPV_nolen(sve));
+ SV *svs = attr("size", 4);
+ int size = SvIV(svs);
int ucs2 = -1; /* only needed in the event of surrogate pairs */
SV *result = newSVpvn("",0);
STRLEN usize = (size > 0 ? size : 1); /* protect against rogue size<=0 */
STRLEN ulen;
STRLEN resultbuflen;
U8 *resultbuf;
- U8 *s = (U8 *)SvPVbyte(str,ulen);
- U8 *e = (U8 *)SvEND(str);
+ U8 *s;
+ U8 *e;
+ bool modify = (check && !(check & ENCODE_LEAVE_SRC));
+ bool temp_result;
+
+ SvGETMAGIC(str);
+ if (!SvOK(str))
+ XSRETURN_UNDEF;
+ s = modify ? (U8 *)SvPV_force_nomg(str, ulen) : (U8 *)SvPV_nomg(str, ulen);
+ if (SvUTF8(str)) {
+ if (!modify) {
+ SV *tmp = sv_2mortal(newSVpvn((char *)s, ulen));
+ SvUTF8_on(tmp);
+ if (SvTAINTED(str))
+ SvTAINTED_on(tmp);
+ str = tmp;
+ s = (U8 *)SvPVX(str);
+ }
+ if (ulen) {
+ if (!utf8_to_bytes(s, &ulen))
+ croak("Wide character");
+ SvCUR_set(str, ulen);
+ }
+ SvUTF8_off(str);
+ }
+ e = s+ulen;
+
/* Optimise for the common case of being called from PerlIOEncode_fill()
with a standard length buffer. In this case the result SV's buffer is
only used temporarily, so we can afford to allocate the maximum needed
and not care about unused space. */
- const bool temp_result = (ulen == PERLIO_BUFSIZ);
+ temp_result = (ulen == PERLIO_BUFSIZ);
ST(0) = sv_2mortal(result);
SvUTF8_on(result);
if (!endian && s+size <= e) {
+ SV *sv;
UV bom;
endian = (size == 4) ? 'N' : 'n';
bom = enc_unpack(aTHX_ &s,e,size,endian);
@@ -183,8 +209,9 @@ CODE:
}
#if 1
/* Update endian for next sequence */
- if (attr_true("renewed", 7)) {
- hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
+ sv = attr("renewed", 7);
+ if (SvTRUE(sv)) {
+ (void)hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
}
#endif
}
@@ -202,11 +229,12 @@ CODE:
U8 *d;
if (issurrogate(ord)) {
if (ucs2 == -1) {
- ucs2 = attr_true("ucs2", 4);
+ SV *sv = attr("ucs2", 4);
+ ucs2 = SvTRUE(sv);
}
if (ucs2 || size == 4) {
if (check) {
- croak("%"SVf":no surrogates allowed %"UVxf,
+ croak("%" SVf ":no surrogates allowed %" UVxf,
*hv_fetch((HV *)SvRV(obj),"Name",4,0),
ord);
}
@@ -216,7 +244,7 @@ CODE:
UV lo;
if (!isHiSurrogate(ord)) {
if (check) {
- croak("%"SVf":Malformed HI surrogate %"UVxf,
+ croak("%" SVf ":Malformed HI surrogate %" UVxf,
*hv_fetch((HV *)SvRV(obj),"Name",4,0),
ord);
}
@@ -231,7 +259,7 @@ CODE:
break;
}
else {
- croak("%"SVf":Malformed HI surrogate %"UVxf,
+ croak("%" SVf ":Malformed HI surrogate %" UVxf,
*hv_fetch((HV *)SvRV(obj),"Name",4,0),
ord);
}
@@ -244,7 +272,7 @@ CODE:
lo = enc_unpack(aTHX_ &s,e,size,endian);
if (!isLoSurrogate(lo)) {
if (check) {
- croak("%"SVf":Malformed LO surrogate %"UVxf,
+ croak("%" SVf ":Malformed LO surrogate %" UVxf,
*hv_fetch((HV *)SvRV(obj),"Name",4,0),
ord);
}
@@ -262,7 +290,7 @@ CODE:
if ((ord & 0xFFFE) == 0xFFFE || (ord >= 0xFDD0 && ord <= 0xFDEF)) {
if (check) {
- croak("%"SVf":Unicode character %"UVxf" is illegal",
+ croak("%" SVf ":Unicode character %" UVxf " is illegal",
*hv_fetch((HV *)SvRV(obj),"Name",4,0),
ord);
} else {
@@ -295,7 +323,7 @@ CODE:
if (s < e) {
/* unlikely to happen because it's fixed-length -- dankogai */
if (check & ENCODE_WARN_ON_ERR) {
- Perl_warner(aTHX_ packWARN(WARN_UTF8),"%"SVf":Partial character",
+ Perl_warner(aTHX_ packWARN(WARN_UTF8),"%" SVf ":Partial character",
*hv_fetch((HV *)SvRV(obj),"Name",4,0));
}
}
@@ -308,6 +336,7 @@ CODE:
SvCUR_set(str,0);
}
*SvEND(str) = '\0';
+ SvSETMAGIC(str);
}
if (!temp_result) shrink_buffer(result);
@@ -322,19 +351,40 @@ SV * utf8
IV check
CODE:
{
- U8 endian = *((U8 *)SvPV_nolen(attr("endian", 6)));
- const int size = SvIV(attr("size", 4));
+ SV *sve = attr("endian", 6);
+ U8 endian = *((U8 *)SvPV_nolen(sve));
+ SV *svs = attr("size", 4);
+ const int size = SvIV(svs);
int ucs2 = -1; /* only needed if there is invalid_ucs2 input */
const STRLEN usize = (size > 0 ? size : 1);
SV *result = newSVpvn("", 0);
STRLEN ulen;
- U8 *s = (U8 *) SvPVutf8(utf8, ulen);
- const U8 *e = (U8 *) SvEND(utf8);
+ U8 *s;
+ U8 *e;
+ bool modify = (check && !(check & ENCODE_LEAVE_SRC));
+ bool temp_result;
+
+ SvGETMAGIC(utf8);
+ if (!SvOK(utf8))
+ XSRETURN_UNDEF;
+ s = modify ? (U8 *)SvPV_force_nomg(utf8, ulen) : (U8 *)SvPV_nomg(utf8, ulen);
+ if (!SvUTF8(utf8)) {
+ if (!modify) {
+ SV *tmp = sv_2mortal(newSVpvn((char *)s, ulen));
+ if (SvTAINTED(utf8))
+ SvTAINTED_on(tmp);
+ utf8 = tmp;
+ }
+ sv_utf8_upgrade_nomg(utf8);
+ s = (U8 *)SvPV_nomg(utf8, ulen);
+ }
+ e = s+ulen;
+
/* Optimise for the common case of being called from PerlIOEncode_flush()
with a standard length buffer. In this case the result SV's buffer is
only used temporarily, so we can afford to allocate the maximum needed
and not care about unused space. */
- const bool temp_result = (ulen == PERLIO_BUFSIZ);
+ temp_result = (ulen == PERLIO_BUFSIZ);
ST(0) = sv_2mortal(result);
@@ -344,12 +394,14 @@ CODE:
SvGROW(result, ((ulen+1) * usize));
if (!endian) {
+ SV *sv;
endian = (size == 4) ? 'N' : 'n';
enc_pack(aTHX_ result,size,endian,BOM_BE);
#if 1
/* Update endian for next sequence */
- if (attr_true("renewed", 7)) {
- hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
+ sv = attr("renewed", 7);
+ if (SvTRUE(sv)) {
+ (void)hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
}
#endif
}
@@ -364,11 +416,12 @@ CODE:
if (size != 4 && invalid_ucs2(ord)) {
if (!issurrogate(ord)) {
if (ucs2 == -1) {
- ucs2 = attr_true("ucs2", 4);
+ SV *sv = attr("ucs2", 4);
+ ucs2 = SvTRUE(sv);
}
if (ucs2 || ord > 0x10FFFF) {
if (check) {
- croak("%"SVf":code point \"\\x{%"UVxf"}\" too high",
+ croak("%" SVf ":code point \"\\x{%" UVxf "}\" too high",
*hv_fetch((HV *)SvRV(obj),"Name",4,0),ord);
}
enc_pack(aTHX_ result,size,endian,FBCHAR);
@@ -394,7 +447,7 @@ CODE:
But this is critical when you choose to LEAVE_SRC
in which case we die */
if (check & (ENCODE_DIE_ON_ERR|ENCODE_LEAVE_SRC)) {
- Perl_croak(aTHX_ "%"SVf":partial character is not allowed "
+ Perl_croak(aTHX_ "%" SVf ":partial character is not allowed "
"when CHECK = 0x%" UVuf,
*hv_fetch((HV *)SvRV(obj),"Name",4,0), check);
}
@@ -408,12 +461,11 @@ CODE:
SvCUR_set(utf8,0);
}
*SvEND(utf8) = '\0';
+ SvSETMAGIC(utf8);
}
if (!temp_result) shrink_buffer(result);
if (SvTAINTED(utf8)) SvTAINTED_on(result); /* propagate taintedness */
- SvSETMAGIC(utf8);
-
XSRETURN(1);
}
diff --git a/cpan/Encode/bin/enc2xs b/cpan/Encode/bin/enc2xs
index f2a228f68b..bd39639ae8 100644
--- a/cpan/Encode/bin/enc2xs
+++ b/cpan/Encode/bin/enc2xs
@@ -11,7 +11,7 @@ use warnings;
use Getopt::Std;
use Config;
my @orig_ARGV = @ARGV;
-our $VERSION = do { my @r = (q$Revision: 2.19 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 2.20 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
# These may get re-ordered.
# RAW is a do_now as inserted by &enter
@@ -123,7 +123,10 @@ my %encode_types = (U => \&encode_U,
);
# Win32 does not expand globs on command line
-eval "\@ARGV = map(glob(\$_),\@ARGV)" if ($^O eq 'MSWin32');
+if ($^O eq 'MSWin32' and !$ENV{PERL_CORE}) {
+ eval "\@ARGV = map(glob(\$_),\@ARGV)";
+ @ARGV = @orig_ARGV unless @ARGV;
+}
my %opt;
# I think these are:
@@ -134,6 +137,8 @@ my %opt;
# -o <output> to specify the output file name (else it's the first arg)
# -f <inlist> to give a file with a list of input files (else use the args)
# -n <name> to name the encoding (else use the basename of the input file.
+#Getopt::Long::Configure("bundling");
+#GetOptions(\%opt, qw(C M=s S Q q O o=s f=s n=s v));
getopts('CM:SQqOo:f:n:v',\%opt);
$opt{M} and make_makefile_pl($opt{M}, @ARGV);
@@ -196,9 +201,9 @@ sub compiler_info {
# This really should go first, else the die here causes empty (non-erroneous)
# output files to be written.
my @encfiles;
-if (exists $opt{'f'}) {
+if (exists $opt{f}) {
# -F is followed by name of file containing list of filenames
- my $flist = $opt{'f'};
+ my $flist = $opt{f};
open(FLIST,$flist) || die "Cannot open $flist:$!";
chomp(@encfiles = <FLIST>);
close(FLIST);
@@ -206,9 +211,15 @@ if (exists $opt{'f'}) {
@encfiles = @ARGV;
}
-my $cname = (exists $opt{'o'}) ? $opt{'o'} : shift(@ARGV);
+my $cname = $opt{o} ? $opt{o} : shift(@ARGV);
+unless ($cname) { #debuging a win32 nmake error-only. works via cmdline
+ print "\nARGV:";
+ print "$_ " for @ARGV;
+ print "\nopt:";
+ print " $_ => ",defined $opt{$_}?$opt{$_}:"undef","\n" for keys %opt;
+}
chmod(0666,$cname) if -f $cname && !-w $cname;
-open(C,">$cname") || die "Cannot open $cname:$!";
+open(C,">", $cname) || die "Cannot open $cname:$!";
my $dname = $cname;
my $hname = $cname;
@@ -220,10 +231,10 @@ if ($cname =~ /\.(c|xs)$/i) # VMS may have upcased filenames with DECC$ARGV_PARS
$doC = 1;
$dname =~ s/(\.[^\.]*)?$/.exh/;
chmod(0666,$dname) if -f $cname && !-w $dname;
- open(D,">$dname") || die "Cannot open $dname:$!";
+ open(D,">", $dname) || die "Cannot open $dname:$!";
$hname =~ s/(\.[^\.]*)?$/.h/;
chmod(0666,$hname) if -f $cname && !-w $hname;
- open(H,">$hname") || die "Cannot open $hname:$!";
+ open(H,">", $hname) || die "Cannot open $hname:$!";
foreach my $fh (\*C,\*D,\*H)
{
@@ -469,7 +480,9 @@ sub compile_ucm
$erep = $attr{'subchar'};
$erep =~ s/^\s+//; $erep =~ s/\s+$//;
}
- print "Reading $name ($cs)\n";
+ print "Reading $name ($cs)\n"
+ unless defined $ENV{MAKEFLAGS}
+ and $ENV{MAKEFLAGS} =~ /\b(s|silent|quiet)\b/;
my $nfb = 0;
my $hfb = 0;
while (<$fh>)
@@ -755,9 +768,17 @@ sub addstrings
if ($a->{'Forward'})
{
my ($cpp, $static, $sized) = compiler_info(1);
- my $var = $static ? 'static const' : 'extern';
my $count = $sized ? scalar(@{$a->{'Entries'}}) : '';
- print $fh "$var encpage_t $name\[$count];\n";
+ if ($static) {
+ # we cannot ask Config for d_plusplus since we can override CC=g++-6 on the cmdline
+ print $fh "#ifdef __cplusplus\n"; # -fpermissive since g++-6
+ print $fh "extern encpage_t $name\[$count];\n";
+ print $fh "#else\n";
+ print $fh "static const encpage_t $name\[$count];\n";
+ print $fh "#endif\n";
+ } else {
+ print $fh "extern encpage_t $name\[$count];\n";
+ }
}
$a->{'DoneStrings'} = 1;
foreach my $b (@{$a->{'Entries'}})
@@ -848,9 +869,16 @@ sub outtable
outtable($fh,$t,$bigname) unless $t->{'Done'};
}
my ($cpp, $static) = compiler_info(0);
- my $var = $static ? 'static const ' : '';
- print $fh "\n${var}encpage_t $name\[",
- scalar(@{$a->{'Entries'}}), "] = {\n";
+ my $count = scalar(@{$a->{'Entries'}});
+ if ($static) {
+ print $fh "#ifdef __cplusplus\n"; # -fpermissive since g++-6
+ print $fh "encpage_t $name\[$count] = {\n";
+ print $fh "#else\n";
+ print $fh "static const encpage_t $name\[$count] = {\n";
+ print $fh "#endif\n";
+ } else {
+ print $fh "\nencpage_t $name\[$count] = {\n";
+ }
foreach my $b (@{$a->{'Entries'}})
{
my ($sc,$ec,$out,$t,$end,$l,$fb) = @$b;
@@ -1104,7 +1132,7 @@ sub _print_expand{
if ((my $d = dirname($dst)) ne '.'){
-d $d or mkdir $d, 0755 or die "mkdir $d : $!";
}
- open my $out, ">$dst" or die "$!";
+ open my $out, ">", $dst or die "$!";
my $asis = 0;
while (<$in>){
if (/^#### END_OF_HEADER/){
diff --git a/cpan/Encode/encoding.pm b/cpan/Encode/encoding.pm
index 754b3acb03..dc342683ee 100644
--- a/cpan/Encode/encoding.pm
+++ b/cpan/Encode/encoding.pm
@@ -1,6 +1,6 @@
-# $Id: encoding.pm,v 2.18 2016/08/10 18:08:45 dankogai Exp dankogai $
+# $Id: encoding.pm,v 2.19 2016/11/01 13:30:38 dankogai Exp $
package encoding;
-our $VERSION = sprintf "%d.%02d", q$Revision: 2.18 $ =~ /(\d+)/g;
+our $VERSION = sprintf "%d.%02d", q$Revision: 2.19 $ =~ /(\d+)/g;
use Encode;
use strict;
diff --git a/cpan/Encode/lib/Encode/Alias.pm b/cpan/Encode/lib/Encode/Alias.pm
index 04ad4967c9..0a252560f5 100644
--- a/cpan/Encode/lib/Encode/Alias.pm
+++ b/cpan/Encode/lib/Encode/Alias.pm
@@ -2,7 +2,7 @@ package Encode::Alias;
use strict;
use warnings;
no warnings 'redefine';
-our $VERSION = do { my @r = ( q$Revision: 2.20 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 2.21 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
use Exporter 'import';
@@ -79,8 +79,10 @@ sub find_alias {
sub define_alias {
while (@_) {
- my ( $alias, $name ) = splice( @_, 0, 2 );
- unshift( @Alias, $alias => $name ); # newer one has precedence
+ my $alias = shift;
+ my $name = shift;
+ unshift( @Alias, $alias => $name ) # newer one has precedence
+ if defined $alias;
if ( ref($alias) ) {
# clear %Alias cache to allow overrides
@@ -96,10 +98,14 @@ sub define_alias {
}
}
}
- else {
+ elsif (defined $alias) {
DEBUG and warn "delete \$Alias\{$alias\}";
delete $Alias{$alias};
}
+ elsif (DEBUG) {
+ require Carp;
+ Carp::croak("undef \$alias");
+ }
}
}
diff --git a/cpan/Encode/lib/Encode/CN/HZ.pm b/cpan/Encode/lib/Encode/CN/HZ.pm
index f035d821f5..4510b0b400 100644
--- a/cpan/Encode/lib/Encode/CN/HZ.pm
+++ b/cpan/Encode/lib/Encode/CN/HZ.pm
@@ -5,7 +5,7 @@ use warnings;
use utf8 ();
use vars qw($VERSION);
-$VERSION = do { my @r = ( q$Revision: 2.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+$VERSION = do { my @r = ( q$Revision: 2.8 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
use Encode qw(:fallbacks);
@@ -49,7 +49,8 @@ sub decode ($$;$) {
else { # GB mode; the byte ranges are as in RFC 1843.
no warnings 'uninitialized';
if ( $str =~ s/^((?:[\x21-\x77][\x21-\x7E])+)// ) {
- $ret .= $GB->decode( $1, $chk );
+ my $prefix = $1;
+ $ret .= $GB->decode( $prefix, $chk );
}
elsif ( $str =~ s/^\x7E\x7D// ) { # '~}'
$in_ascii = 1;
diff --git a/cpan/Encode/lib/Encode/MIME/Header.pm b/cpan/Encode/lib/Encode/MIME/Header.pm
index d74d453b8b..ad14dba374 100644
--- a/cpan/Encode/lib/Encode/MIME/Header.pm
+++ b/cpan/Encode/lib/Encode/MIME/Header.pm
@@ -1,22 +1,25 @@
package Encode::MIME::Header;
use strict;
use warnings;
-no warnings 'redefine';
-our $VERSION = do { my @r = ( q$Revision: 2.23 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
-use Encode qw(find_encoding encode_utf8 decode_utf8);
-use MIME::Base64;
-use Carp;
+our $VERSION = do { my @r = ( q$Revision: 2.24 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+
+use Carp ();
+use Encode ();
+use MIME::Base64 ();
my %seed = (
- decode_b => '1', # decodes 'B' encoding ?
- decode_q => '1', # decodes 'Q' encoding ?
- encode => 'B', # encode with 'B' or 'Q' ?
- bpl => 75, # bytes per line
+ decode_b => 1, # decodes 'B' encoding ?
+ decode_q => 1, # decodes 'Q' encoding ?
+ encode => 'B', # encode with 'B' or 'Q' ?
+ charset => 'UTF-8', # encode charset
+ bpl => 75, # bytes per line
);
-$Encode::Encoding{'MIME-Header'} =
- bless { %seed, Name => 'MIME-Header', } => __PACKAGE__;
+$Encode::Encoding{'MIME-Header'} = bless {
+ %seed,
+ Name => 'MIME-Header',
+} => __PACKAGE__;
$Encode::Encoding{'MIME-B'} = bless {
%seed,
@@ -37,107 +40,186 @@ sub needs_lines { 1 }
sub perlio_ok { 0 }
# RFC 2047 and RFC 2231 grammar
-my $re_charset = qr/[-0-9A-Za-z_]+/;
-my $re_language = qr/[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*/;
+my $re_charset = qr/[!"#\$%&'+\-0-9A-Z\\\^_`a-z\{\|\}~]+/;
+my $re_language = qr/[A-Za-z]{1,8}(?:-[0-9A-Za-z]{1,8})*/;
my $re_encoding = qr/[QqBb]/;
-my $re_encoded_text = qr/[^\?\s]*/;
+my $re_encoded_text = qr/[^\?]*/;
my $re_encoded_word = qr/=\?$re_charset(?:\*$re_language)?\?$re_encoding\?$re_encoded_text\?=/;
-my $re_capture_encoded_word = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding)\?($re_encoded_text)\?=/;
+my $re_capture_encoded_word = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding\?$re_encoded_text)\?=/;
+my $re_capture_encoded_word_split = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding)\?($re_encoded_text)\?=/;
+
+# in strict mode check also for valid base64 characters and also for valid quoted printable codes
+my $re_encoding_strict_b = qr/[Bb]/;
+my $re_encoding_strict_q = qr/[Qq]/;
+my $re_encoded_text_strict_b = qr/[0-9A-Za-z\+\/]*={0,2}/;
+my $re_encoded_text_strict_q = qr/(?:[^\?\s=]|=[0-9A-Fa-f]{2})*/;
+my $re_encoded_word_strict = qr/=\?$re_charset(?:\*$re_language)?\?(?:$re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
+my $re_capture_encoded_word_strict = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding_strict_b\?$re_encoded_text_strict_b|$re_encoding_strict_q\?$re_encoded_text_strict_q)\?=/;
+
+my $re_newline = qr/(?:\r\n|[\r\n])/;
+
+# in strict mode encoded words must be always separated by spaces or tabs (or folded newline)
+# except in comments when separator between words and comment round brackets can be omitted
+my $re_word_begin_strict = qr/(?:(?:[ \t]|\A)\(?|(?:[^\\]|\A)\)\()/;
+my $re_word_sep_strict = qr/(?:$re_newline?[ \t])+/;
+my $re_word_end_strict = qr/(?:\)\(|\)?(?:$re_newline?[ \t]|\z))/;
+
+my $re_match = qr/()((?:$re_encoded_word\s*)*$re_encoded_word)()/;
+my $re_match_strict = qr/($re_word_begin_strict)((?:$re_encoded_word_strict$re_word_sep_strict)*$re_encoded_word_strict)(?=$re_word_end_strict)/;
+
+my $re_capture = qr/$re_capture_encoded_word(?:\s*)?/;
+my $re_capture_strict = qr/$re_capture_encoded_word_strict$re_word_sep_strict?/;
our $STRICT_DECODE = 0;
sub decode($$;$) {
- use utf8;
- my ( $obj, $str, $chk ) = @_;
+ my ($obj, $str, $chk) = @_;
- # multi-line header to single line
- $str =~ s/(?:\r\n|[\r\n])([ \t])/$1/gos;
+ my $re_match_decode = $STRICT_DECODE ? $re_match_strict : $re_match;
+ my $re_capture_decode = $STRICT_DECODE ? $re_capture_strict : $re_capture;
- # decode each line separately
- my @input = split /(\r\n|\r|\n)/o, $str;
+ my $stop = 0;
my $output = substr($str, 0, 0); # to propagate taintedness
- while ( @input ) {
+ # decode each line separately, match whole continuous folded line at one call
+ 1 while not $stop and $str =~ s{^((?:[^\r\n]*(?:$re_newline[ \t])?)*)($re_newline)?}{
- my $line = shift @input;
- my $sep = shift @input;
+ my $line = $1;
+ my $sep = defined $2 ? $2 : '';
- # in strict mode encoded words must be always separated by spaces or tabs
- # except in comments when separator between words and comment round brackets can be omitted
- my $re_word_begin = $STRICT_DECODE ? qr/(?:[ \t\n]|\A)\(?/ : qr//;
- my $re_word_sep = $STRICT_DECODE ? qr/[ \t]+/ : qr/\s*/;
- my $re_word_end = $STRICT_DECODE ? qr/\)?(?:[ \t\n]|\z)/ : qr//;
+ $stop = 1 unless length($line) or length($sep);
- # concat consecutive encoded mime words with same charset, language and encoding
+ # NOTE: this code partially could break $chk support
+ # in non strict mode concat consecutive encoded mime words with same charset, language and encoding
# fixes breaking inside multi-byte characters
- 1 while $line =~ s/($re_word_begin)$re_capture_encoded_word$re_word_sep=\?\2\3\?\4\?($re_encoded_text)\?=(?=$re_word_end)/$1=\?$2$3\?$4\?$5$6\?=/;
-
- $line =~ s{($re_word_begin)((?:$re_encoded_word$re_word_sep)*$re_encoded_word)(?=$re_word_end)}{
- my $begin = $1;
- my $words = $2;
- $words =~ s{$re_capture_encoded_word$re_word_sep?}{
- if (uc($3) eq 'B') {
- $obj->{decode_b} or croak qq(MIME "B" unsupported);
- decode_b($1, $4, $chk);
- } elsif (uc($3) eq 'Q') {
- $obj->{decode_q} or croak qq(MIME "Q" unsupported);
- decode_q($1, $4, $chk);
+ 1 while not $STRICT_DECODE and $line =~ s/$re_capture_encoded_word_split\s*=\?\1\2\?\3\?($re_encoded_text)\?=/=\?$1$2\?$3\?$4$5\?=/so;
+
+ # process sequence of encoded MIME words at once
+ 1 while not $stop and $line =~ s{^(.*?)$re_match_decode}{
+
+ my $begin = $1 . $2;
+ my $words = $3;
+
+ $begin =~ tr/\r\n//d;
+ $output .= $begin;
+
+ # decode one MIME word
+ 1 while not $stop and $words =~ s{^(.*?)($re_capture_decode)}{
+
+ $output .= $1;
+ my $orig = $2;
+ my $charset = $3;
+ my ($mime_enc, $text) = split /\?/, $5;
+
+ $text =~ tr/\r\n//d;
+
+ my $enc = Encode::find_mime_encoding($charset);
+
+ # in non strict mode allow also perl encoding aliases
+ if ( not defined $enc and not $STRICT_DECODE ) {
+ # make sure that decoded string will be always strict UTF-8
+ $charset = 'UTF-8' if lc($charset) eq 'utf8';
+ $enc = Encode::find_encoding($charset);
+ }
+
+ if ( not defined $enc ) {
+ Carp::croak qq(Unknown charset "$charset") if not ref $chk and $chk & Encode::DIE_ON_ERR;
+ Carp::carp qq(Unknown charset "$charset") if not ref $chk and $chk & Encode::WARN_ON_ERR;
+ $stop = 1 if not ref $chk and $chk & Encode::RETURN_ON_ERR;
+ $output .= ($output =~ /(?:\A|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
+ $stop ? $orig : '';
} else {
- croak qq(MIME "$3" encoding is nonexistent!);
+ if ( uc($mime_enc) eq 'B' and $obj->{decode_b} ) {
+ my $decoded = _decode_b($enc, $text, $chk);
+ $stop = 1 if not defined $decoded and not ref $chk and $chk & Encode::RETURN_ON_ERR;
+ $output .= (defined $decoded ? $decoded : $text) unless $stop;
+ $stop ? $orig : '';
+ } elsif ( uc($mime_enc) eq 'Q' and $obj->{decode_q} ) {
+ my $decoded = _decode_q($enc, $text, $chk);
+ $stop = 1 if not defined $decoded and not ref $chk and $chk & Encode::RETURN_ON_ERR;
+ $output .= (defined $decoded ? $decoded : $text) unless $stop;
+ $stop ? $orig : '';
+ } else {
+ Carp::croak qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk & Encode::DIE_ON_ERR;
+ Carp::carp qq(MIME "$mime_enc" unsupported) if not ref $chk and $chk & Encode::WARN_ON_ERR;
+ $stop = 1 if not ref $chk and $chk & Encode::RETURN_ON_ERR;
+ $output .= ($output =~ /(?:\A|[ \t])$/ ? '' : ' ') . $orig unless $stop; # $orig mime word is separated by whitespace
+ $stop ? $orig : '';
+ }
}
- }eg;
- $begin . $words;
- }eg;
- $output .= $line;
- $output .= $sep if defined $sep;
+ }se;
- }
+ if ( not $stop ) {
+ $output .= $words;
+ $words = '';
+ }
+
+ $words;
+
+ }se;
+
+ if ( not $stop ) {
+ $line =~ tr/\r\n//d;
+ $output .= $line . $sep;
+ $line = '';
+ $sep = '';
+ }
+
+ $line . $sep;
- $_[1] = '' if $chk; # empty the input string in the stack so perlio is ok
+ }se;
+
+ $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
return $output;
}
-sub decode_b {
- my ( $enc, $b, $chk ) = @_;
- my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
- # MIME::Base64::decode_base64 ignores everything after a '=' padding character
- # split string after each sequence of padding characters and decode each substring
- my $db64 = join('', map { decode_base64($_) } split /(?<==)(?=[^=])/, $b);
- return $d->name eq 'utf8'
- ? Encode::decode_utf8($db64)
- : $d->decode( $db64, $chk || Encode::FB_PERLQQ );
+sub _decode_b {
+ my ($enc, $text, $chk) = @_;
+ # MIME::Base64::decode ignores everything after a '=' padding character
+ # in non strict mode split string after each sequence of padding characters and decode each substring
+ my $octets = $STRICT_DECODE ?
+ MIME::Base64::decode($text) :
+ join('', map { MIME::Base64::decode($_) } split /(?<==)(?=[^=])/, $text);
+ return _decode_octets($enc, $octets, $chk);
+}
+
+sub _decode_q {
+ my ($enc, $text, $chk) = @_;
+ $text =~ s/_/ /go;
+ $text =~ s/=([0-9A-Fa-f]{2})/pack('C', hex($1))/ego;
+ return _decode_octets($enc, $text, $chk);
}
-sub decode_q {
- my ( $enc, $q, $chk ) = @_;
- my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
- $q =~ s/_/ /go;
- $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
- return $d->name eq 'utf8'
- ? Encode::decode_utf8($q)
- : $d->decode( $q, $chk || Encode::FB_PERLQQ );
+sub _decode_octets {
+ my ($enc, $octets, $chk) = @_;
+ $chk &= ~Encode::LEAVE_SRC if not ref $chk and $chk;
+ local $Carp::CarpLevel = $Carp::CarpLevel + 1; # propagate Carp messages back to caller
+ my $output = $enc->decode($octets, $chk);
+ return undef if not ref $chk and $chk and $octets ne '';
+ return $output;
}
sub encode($$;$) {
- my ( $obj, $str, $chk ) = @_;
- $_[1] = '' if $chk; # empty the input string in the stack so perlio is ok
- return $obj->_fold_line($obj->_encode_line($str));
+ my ($obj, $str, $chk) = @_;
+ my $output = $obj->_fold_line($obj->_encode_string($str, $chk));
+ $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
+ return $output . substr($str, 0, 0); # to propagate taintedness
}
sub _fold_line {
- my ( $obj, $line ) = @_;
+ my ($obj, $line) = @_;
my $bpl = $obj->{bpl};
- my $output = substr($line, 0, 0); # to propagate taintedness
+ my $output = '';
- while ( length $line ) {
+ while ( length($line) ) {
if ( $line =~ s/^(.{0,$bpl})(\s|\z)// ) {
$output .= $1;
- $output .= "\r\n" . $2 if length $line;
+ $output .= "\r\n" . $2 if length($line);
} elsif ( $line =~ s/(\s)(.*)$// ) {
$output .= $line;
$line = $2;
- $output .= "\r\n" . $1 if length $line;
+ $output .= "\r\n" . $1 if length($line);
} else {
$output .= $line;
last;
@@ -147,56 +229,75 @@ sub _fold_line {
return $output;
}
-use constant HEAD => '=?UTF-8?';
-use constant TAIL => '?=';
-use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, B_len => \&_encode_b_len, Q_len => \&_encode_q_len };
-
-sub _encode_line {
- my ( $o, $str ) = @_;
- my $enc = $o->{encode};
- my $enc_len = $enc . '_len';
- my $llen = ( $o->{bpl} - length(HEAD) - 2 - length(TAIL) );
-
+sub _encode_string {
+ my ($obj, $str, $chk) = @_;
+ my $wordlen = $obj->{bpl} > 76 ? 76 : $obj->{bpl};
+ my $enc = Encode::find_mime_encoding($obj->{charset});
+ my $enc_chk = (not ref $chk and $chk) ? ($chk | Encode::LEAVE_SRC) : $chk;
my @result = ();
- my $chunk = '';
- while ( length( my $chr = substr( $str, 0, 1, '' ) ) ) {
- if ( SINGLE->{$enc_len}($chunk . $chr) > $llen ) {
- push @result, SINGLE->{$enc}($chunk);
- $chunk = '';
+ my $octets = '';
+ while ( length( my $chr = substr($str, 0, 1, '') ) ) {
+ my $seq;
+ {
+ local $Carp::CarpLevel = $Carp::CarpLevel + 1; # propagate Carp messages back to caller
+ $seq = $enc->encode($chr, $enc_chk);
}
- $chunk .= $chr;
+ if ( not length($seq) ) {
+ substr($str, 0, 0, $chr);
+ last;
+ }
+ if ( $obj->_encoded_word_len($octets . $seq) > $wordlen ) {
+ push @result, $obj->_encode_word($octets);
+ $octets = '';
+ }
+ $octets .= $seq;
}
- length($chunk) and push @result, SINGLE->{$enc}($chunk);
+ length($octets) and push @result, $obj->_encode_word($octets);
+ $_[1] = $str if not ref $chk and $chk and !($chk & Encode::LEAVE_SRC);
return join(' ', @result);
}
+sub _encode_word {
+ my ($obj, $octets) = @_;
+ my $charset = $obj->{charset};
+ my $encode = $obj->{encode};
+ my $text = $encode eq 'B' ? _encode_b($octets) : _encode_q($octets);
+ return "=?$charset?$encode?$text?=";
+}
+
+sub _encoded_word_len {
+ my ($obj, $octets) = @_;
+ my $charset = $obj->{charset};
+ my $encode = $obj->{encode};
+ my $text_len = $encode eq 'B' ? _encoded_b_len($octets) : _encoded_q_len($octets);
+ return length("=?$charset?$encode??=") + $text_len;
+}
+
sub _encode_b {
- HEAD . 'B?' . encode_base64( encode_utf8(shift), '' ) . TAIL;
+ my ($octets) = @_;
+ return MIME::Base64::encode($octets, '');
}
-sub _encode_b_len {
- my ( $chunk ) = @_;
- use bytes ();
- return bytes::length($chunk) * 4 / 3;
+sub _encoded_b_len {
+ my ($octets) = @_;
+ return ( length($octets) + 2 ) / 3 * 4;
}
-my $valid_q_chars = '0-9A-Za-z !*+\-/';
+my $re_invalid_q_char = qr/[^0-9A-Za-z !*+\-\/]/;
sub _encode_q {
- my ( $chunk ) = @_;
- $chunk = encode_utf8($chunk);
- $chunk =~ s{([^$valid_q_chars])}{
- join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
+ my ($octets) = @_;
+ $octets =~ s{($re_invalid_q_char)}{
+ join('', map { sprintf('=%02X', $_) } unpack('C*', $1))
}egox;
- $chunk =~ s/ /_/go;
- return HEAD . 'Q?' . $chunk . TAIL;
+ $octets =~ s/ /_/go;
+ return $octets;
}
-sub _encode_q_len {
- my ( $chunk ) = @_;
- use bytes ();
- my $valid_count =()= $chunk =~ /[$valid_q_chars]/sgo;
- return ( bytes::length($chunk) - $valid_count ) * 3 + $valid_count;
+sub _encoded_q_len {
+ my ($octets) = @_;
+ my $invalid_count = () = $octets =~ /$re_invalid_q_char/sgo;
+ return ( $invalid_count * 3 ) + ( length($octets) - $invalid_count );
}
1;
@@ -204,75 +305,119 @@ __END__
=head1 NAME
-Encode::MIME::Header -- MIME 'B' and 'Q' encoding for unstructured header
+Encode::MIME::Header -- MIME encoding for an unstructured email header
=head1 SYNOPSIS
- use Encode qw/encode decode/;
- $utf8 = decode('MIME-Header', $header);
- $header = encode('MIME-Header', $utf8);
-
-=head1 ABSTRACT
-
-This module implements RFC 2047 MIME encoding for unstructured header.
-It cannot be used for structured headers like From or To. There are 3
-variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The
-difference is described below
+ use Encode qw(encode decode);
- decode() encode()
- ----------------------------------------------
- MIME-Header Both B and Q =?UTF-8?B?....?=
- MIME-B B only; Q croaks =?UTF-8?B?....?=
- MIME-Q Q only; B croaks =?UTF-8?Q?....?=
+ my $mime_str = encode("MIME-Header", "Sample:Text \N{U+263A}");
+ # $mime_str is "=?UTF-8?B?U2FtcGxlOlRleHQg4pi6?="
-=head1 DESCRIPTION
+ my $mime_q_str = encode("MIME-Q", "Sample:Text \N{U+263A}");
+ # $mime_q_str is "=?UTF-8?Q?Sample=3AText_=E2=98=BA?="
-When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
-is extracted and decoded for I<X> encoding (B for Base64, Q for
-Quoted-Printable). Then the decoded chunk is fed to
-decode(I<encoding>). So long as I<encoding> is supported by Encode,
-any source encoding is fine.
+ my $str = decode("MIME-Header",
+ "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\r\n " .
+ "=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?="
+ );
+ # $str is "If you can read this you understand the example."
-When you encode, it just encodes UTF-8 string with I<X> encoding then
-quoted with =?UTF-8?I<X>?....?= . The parts that RFC 2047 forbids to
-encode are left as is and long lines are folded within 76 bytes per
-line.
+ use Encode qw(decode :fallbacks);
+ use Encode::MIME::Header;
+ local $Encode::MIME::Header::STRICT_DECODE = 1;
+ my $strict_string = decode("MIME-Header", $mime_string, FB_CROAK);
+ # use strict decoding and croak on errors
-=head1 BUGS
-
-Before version 2.83 this module had broken both decoder and encoder.
-Encoder inserted additional spaces, incorrectly encoded input data
-and produced invalid MIME strings. Decoder lot of times discarded
-white space characters, incorrectly interpreted data or decoded
-Base64 string as Quoted-Printable.
+=head1 ABSTRACT
-As of version 2.83 encoder should be fully compliant of RFC 2047.
-Due to bugs in previous versions of encoder, decoder is by default in
-less strict compatible mode. It should be able to decode strings
-encoded by pre 2.83 version of this module. But this default mode is
-not correct according to RFC 2047.
+This module implements L<RFC 2047|https://tools.ietf.org/html/rfc2047> MIME
+encoding for an unstructured field body of the email header. It can also be
+used for L<RFC 822|https://tools.ietf.org/html/rfc822> 'text' token. However,
+it cannot be used directly for the whole header with the field name or for the
+structured header fields like From, To, Cc, Message-Id, etc... There are 3
+encoding names supported by this module: C<MIME-Header>, C<MIME-B> and
+C<MIME-Q>.
-In default mode decoder try to decode every substring which looks like
-MIME encoded data. So it means that MIME data does not need to be
-separated by white space. To enforce correct strict mode, set package
-variable $Encode::MIME::Header::STRICT_DECODE to 1, e.g. by localizing:
+=head1 DESCRIPTION
-C<require Encode::MIME::Header; local $Encode::MIME::Header::STRICT_DECODE = 1;>
+Decode method takes an unstructured field body of the email header (or
+L<RFC 822|https://tools.ietf.org/html/rfc822> 'text' token) as its input and
+decodes each MIME encoded-word from input string to a sequence of bytes
+according to L<RFC 2047|https://tools.ietf.org/html/rfc2047> and
+L<RFC 2231|https://tools.ietf.org/html/rfc2231>. Subsequently, each sequence
+of bytes with the corresponding MIME charset is decoded with
+L<the Encode module|Encode> and finally, one output string is returned. Text
+parts of the input string which do not contain MIME encoded-word stay
+unmodified in the output string. Folded newlines between two consecutive MIME
+encoded-words are discarded, others are preserved in the output string.
+C<MIME-B> can decode Base64 variant, C<MIME-Q> can decode Quoted-Printable
+variant and C<MIME-Header> can decode both of them. If L<Encode module|Encode>
+does not support particular MIME charset or chosen variant then an action based
+on L<CHECK flags|Encode/Handling Malformed Data> is performed (by default, the
+MIME encoded-word is not decoded).
+
+Encode method takes a scalar string as its input and uses
+L<strict UTF-8|Encode/UTF-8 vs. utf8 vs. UTF8> encoder for encoding it to UTF-8
+bytes. Then a sequence of UTF-8 bytes is encoded into MIME encoded-words
+(C<MIME-Header> and C<MIME-B> use a Base64 variant while C<MIME-Q> uses a
+Quoted-Printable variant) where each MIME encoded-word is limited to 75
+characters. MIME encoded-words are separated by C<CRLF SPACE> and joined to
+one output string. Output string is suitable for unstructured field body of
+the email header.
+
+Both encode and decode methods propagate
+L<CHECK flags|Encode/Handling Malformed Data> when encoding and decoding the
+MIME charset.
-It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
-and =?ISO-8859-1?= but that makes the implementation too complicated.
-These days major mail agents all support =?UTF-8? so I think it is
-just good enough.
+=head1 BUGS
-Due to popular demand, 'MIME-Header-ISO_2022_JP' was introduced by
-Makamaka. Thre are still too many MUAs especially cellular phone
-handsets which does not grok UTF-8.
+Versions prior to 2.22 (part of Encode 2.83) have a malfunctioning decoder
+and encoder. The MIME encoder infamously inserted additional spaces or
+discarded white spaces between consecutive MIME encoded-words, which led to
+invalid MIME headers produced by this module. The MIME decoder had a tendency
+to discard white spaces, incorrectly interpret data or attempt to decode Base64
+MIME encoded-words as Quoted-Printable. These problems were fixed in version
+2.22. It is highly recommended not to use any version prior 2.22!
+
+Versions prior to 2.24 (part of Encode 2.87) ignored
+L<CHECK flags|Encode/Handling Malformed Data>. The MIME encoder used
+L<not strict utf8|Encode/UTF-8 vs. utf8 vs. UTF8> encoder for input Unicode
+strings which could lead to invalid UTF-8 sequences. MIME decoder used also
+L<not strict utf8|Encode/UTF-8 vs. utf8 vs. UTF8> decoder and additionally
+called the decode method with a C<Encode::FB_PERLQQ> flag (thus user-specified
+L<CHECK flags|Encode/Handling Malformed Data> were ignored). Moreover, it
+automatically croaked when a MIME encoded-word contained unknown encoding.
+Since version 2.24, this module uses
+L<strict UTF-8|Encode/UTF-8 vs. utf8 vs. UTF8> encoder and decoder. And
+L<CHECK flags|Encode/Handling Malformed Data> are correctly propagated.
+
+Since version 2.22 (part of Encode 2.83), the MIME encoder should be fully
+compliant to L<RFC 2047|https://tools.ietf.org/html/rfc2047> and
+L<RFC 2231|https://tools.ietf.org/html/rfc2231>. Due to the aforementioned
+bugs in previous versions of the MIME encoder, there is a I<less strict>
+compatible mode for the MIME decoder which is used by default. It should be
+able to decode MIME encoded-words encoded by pre 2.22 versions of this module.
+However, note that this is not correct according to
+L<RFC 2047|https://tools.ietf.org/html/rfc2047>.
+
+In default I<not strict> mode the MIME decoder attempts to decode every substring
+which looks like a MIME encoded-word. Therefore, the MIME encoded-words do not
+need to be separated by white space. To enforce a correct I<strict> mode, set
+variable C<$Encode::MIME::Header::STRICT_DECODE> to 1 e.g. by localizing:
+
+ use Encode::MIME::Header;
+ local $Encode::MIME::Header::STRICT_DECODE = 1;
+
+=head1 AUTHORS
+
+Pali E<lt>pali@cpan.orgE<gt>
=head1 SEE ALSO
-L<Encode>
-
-RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other
-locations.
+L<Encode>,
+L<RFC 822|https://tools.ietf.org/html/rfc822>,
+L<RFC 2047|https://tools.ietf.org/html/rfc2047>,
+L<RFC 2231|https://tools.ietf.org/html/rfc2231>
=cut
diff --git a/cpan/Encode/lib/Encode/MIME/Name.pm b/cpan/Encode/lib/Encode/MIME/Name.pm
index 10d86a746d..1a8d788aec 100644
--- a/cpan/Encode/lib/Encode/MIME/Name.pm
+++ b/cpan/Encode/lib/Encode/MIME/Name.pm
@@ -1,8 +1,9 @@
package Encode::MIME::Name;
use strict;
use warnings;
-our $VERSION = do { my @r = ( q$Revision: 1.1 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+our $VERSION = do { my @r = ( q$Revision: 1.2 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
+# NOTE: This table must be 1:1 mapping
our %MIME_NAME_OF = (
'AdobeStandardEncoding' => 'Adobe-Standard-Encoding',
'AdobeSymbol' => 'Adobe-Symbol-Encoding',
@@ -43,7 +44,7 @@ our %MIME_NAME_OF = (
'hp-roman8' => 'hp-roman8',
'hz' => 'HZ-GB-2312',
'iso-2022-jp' => 'ISO-2022-JP',
- 'iso-2022-jp-1' => 'ISO-2022-JP',
+ 'iso-2022-jp-1' => 'ISO-2022-JP-1',
'iso-2022-kr' => 'ISO-2022-KR',
'iso-8859-1' => 'ISO-8859-1',
'iso-8859-10' => 'ISO-8859-10',
@@ -73,13 +74,20 @@ our %MIME_NAME_OF = (
'UTF-32BE' => 'UTF-32BE',
'UTF-32LE' => 'UTF-32LE',
'UTF-7' => 'UTF-7',
- 'utf8' => 'UTF-8',
'utf-8-strict' => 'UTF-8',
'viscii' => 'VISCII',
);
+# NOTE: %MIME_NAME_OF is still 1:1 mapping
+our %ENCODE_NAME_OF = map { uc $MIME_NAME_OF{$_} => $_ } keys %MIME_NAME_OF;
+
+# Add additional 1:N mapping
+$MIME_NAME_OF{'utf8'} = 'UTF-8';
+
sub get_mime_name($) { $MIME_NAME_OF{$_[0]} };
+sub get_encode_name($) { $ENCODE_NAME_OF{uc $_[0]} };
+
1;
__END__
diff --git a/cpan/Encode/t/Aliases.t b/cpan/Encode/t/Aliases.t
index 2fc14cc114..8d4752bddb 100644
--- a/cpan/Encode/t/Aliases.t
+++ b/cpan/Encode/t/Aliases.t
@@ -159,7 +159,7 @@ define_alias( sub {
return "iso-8859-2" if $enc =~ /hebrew/i;
return "does-not-exist" if $enc =~ /arabic/i; # should then use other override alias
return "utf-8" if $enc =~ /eight/i;
- return;
+ return "unknown";
});
print "# alias test with alias overrides\n";
diff --git a/cpan/Encode/t/Encode.t b/cpan/Encode/t/Encode.t
index d12b2fa232..0536b4b714 100644
--- a/cpan/Encode/t/Encode.t
+++ b/cpan/Encode/t/Encode.t
@@ -25,7 +25,7 @@ my @character_set = ('0'..'9', 'A'..'Z', 'a'..'z');
my @source = qw(ascii iso8859-1 cp1250);
my @destiny = qw(cp1047 cp37 posix-bc);
my @ebcdic_sets = qw(cp1047 cp37 posix-bc);
-plan tests => 38+$n*@encodings + 2*@source*@destiny*@character_set + 2*@ebcdic_sets*256 + 6 + 5 + 2;
+plan tests => 38+$n*@encodings + 2*@source*@destiny*@character_set + 2*@ebcdic_sets*256 + 6 + 3 + 3*8 + 2;
my $str = join('',map(chr($_),0x20..0x7E));
my $cpy = $str;
@@ -156,15 +156,49 @@ ok(encode(utf8 => Encode::Dummy->new("foobar")), "foobar");
ok(decode_utf8(*1), "*main::1");
# hash keys
-my $key = (keys %{{ "whatever\x{100}" => '' }})[0];
-my $kopy = $key;
-encode("UTF-16LE", $kopy, Encode::FB_CROAK);
-is $key, "whatever\x{100}", 'encode with shared hash key scalars';
-undef $key;
-$key = (keys %{{ "whatever" => '' }})[0];
-$kopy = $key;
-decode("UTF-16LE", $kopy, Encode::FB_CROAK);
-is $key, "whatever", 'decode with shared hash key scalars';
+foreach my $name ("UTF-16LE", "UTF-8", "Latin1") {
+ my $key = (keys %{{ "whatever\x{CA}" => '' }})[0];
+ my $kopy = $key;
+ encode($name, $kopy, Encode::FB_CROAK);
+ is $key, "whatever\x{CA}", "encode $name with shared hash key scalars";
+ undef $key;
+ $key = (keys %{{ "whatever\x{CA}" => '' }})[0];
+ $kopy = $key;
+ encode($name, $kopy, Encode::FB_CROAK | Encode::LEAVE_SRC);
+ is $key, "whatever\x{CA}", "encode $name with LEAVE_SRC and shared hash key scalars";
+ undef $key;
+ $key = (keys %{{ "whatever" => '' }})[0];
+ $kopy = $key;
+ decode($name, $kopy, Encode::FB_CROAK);
+ is $key, "whatever", "decode $name with shared hash key scalars";
+ undef $key;
+ $key = (keys %{{ "whatever" => '' }})[0];
+ $kopy = $key;
+ decode($name, $kopy, Encode::FB_CROAK | Encode::LEAVE_SRC);
+ is $key, "whatever", "decode $name with LEAVE_SRC and shared hash key scalars";
+
+ my $enc = find_encoding($name);
+ undef $key;
+ $key = (keys %{{ "whatever\x{CA}" => '' }})[0];
+ $kopy = $key;
+ $enc->encode($kopy, Encode::FB_CROAK);
+ is $key, "whatever\x{CA}", "encode obj $name with shared hash key scalars";
+ undef $key;
+ $key = (keys %{{ "whatever\x{CA}" => '' }})[0];
+ $kopy = $key;
+ $enc->encode($kopy, Encode::FB_CROAK | Encode::LEAVE_SRC);
+ is $key, "whatever\x{CA}", "encode obj $name with LEAVE_SRC and shared hash key scalars";
+ undef $key;
+ $key = (keys %{{ "whatever" => '' }})[0];
+ $kopy = $key;
+ $enc->decode($kopy, Encode::FB_CROAK);
+ is $key, "whatever", "decode obj $name with shared hash key scalars";
+ undef $key;
+ $key = (keys %{{ "whatever" => '' }})[0];
+ $kopy = $key;
+ $enc->decode($kopy, Encode::FB_CROAK | Encode::LEAVE_SRC);
+ is $key, "whatever", "decode obj $name with LEAVE_SRC and shared hash key scalars";
+}
my $latin1 = find_encoding('latin1');
my $orig = "\316";
diff --git a/cpan/Encode/t/at-cn.t b/cpan/Encode/t/at-cn.t
index 03ba10955a..c82225ecae 100644
--- a/cpan/Encode/t/at-cn.t
+++ b/cpan/Encode/t/at-cn.t
@@ -21,7 +21,9 @@ use Encode;
no utf8; # we have raw Chinese encodings here
-use_ok('Encode::CN');
+BEGIN {
+ use_ok('Encode::CN');
+}
# Since JP.t already tests basic file IO, we will just focus on
# internal encode / decode test here. Unfortunately, to test
diff --git a/cpan/Encode/t/at-tw.t b/cpan/Encode/t/at-tw.t
index e6a559b0a1..203fc34bc9 100644
--- a/cpan/Encode/t/at-tw.t
+++ b/cpan/Encode/t/at-tw.t
@@ -23,7 +23,9 @@ use Encode;
no utf8; # we have raw Chinese encodings here
-use_ok('Encode::TW');
+BEGIN {
+ use_ok('Encode::TW');
+}
# Since JP.t already tests basic file IO, we will just focus on
# internal encode / decode test here. Unfortunately, to test
diff --git a/cpan/Encode/t/decode.t b/cpan/Encode/t/decode.t
index 6b24a8fa8c..3995412895 100644
--- a/cpan/Encode/t/decode.t
+++ b/cpan/Encode/t/decode.t
@@ -1,9 +1,9 @@
#
-# $Id: decode.t,v 1.2 2016/08/04 03:15:58 dankogai Exp $
+# $Id: decode.t,v 1.3 2016/10/28 05:03:52 dankogai Exp $
#
use strict;
use Encode qw(decode_utf8 FB_CROAK find_encoding decode);
-use Test::More tests => 5;
+use Test::More tests => 17;
sub croak_ok(&) {
my $code = shift;
@@ -32,3 +32,55 @@ SKIP: {
*a = $orig;
is($latin1->decode(*a), '*main::'.$orig, '[cpan #115168] passing typeglobs to decode');
}
+
+$orig = "\x80";
+$orig =~ /(.)/;
+is($latin1->decode($1), "\N{U+0080}", 'passing magic regex to latin1 decode');
+
+$orig = "\x80";
+*a = $orig;
+is($latin1->decode(*a), "*main::\N{U+0080}", 'passing typeglob to latin1 decode');
+
+$orig = "\N{U+0080}";
+$orig =~ /(.)/;
+is($latin1->encode($1), "\x80", 'passing magic regex to latin1 encode');
+
+$orig = "\xC3\x80";
+$orig =~ /(..)/;
+is(Encode::decode_utf8($1), "\N{U+C0}", 'passing magic regex to Encode::decode_utf8');
+
+$orig = "\xC3\x80";
+*a = $orig;
+is(Encode::decode_utf8(*a), "*main::\N{U+C0}", 'passing typeglob to Encode::decode_utf8');
+
+$orig = "\N{U+C0}";
+$orig =~ /(.)/;
+is(Encode::encode_utf8($1), "\xC3\x80", 'passing magic regex to Encode::encode_utf8');
+
+$orig = "\xC3\x80";
+$orig =~ /(..)/;
+is(Encode::decode('utf-8', $1), "\N{U+C0}", 'passing magic regex to UTF-8 decode');
+
+$orig = "\xC3\x80";
+*a = $orig;
+is(Encode::decode('utf-8', *a), "*main::\N{U+C0}", 'passing typeglob to UTF-8 decode');
+
+$orig = "\N{U+C0}";
+$orig =~ /(.)/;
+is(Encode::encode('utf-8', $1), "\xC3\x80", 'passing magic regex to UTF-8 encode');
+
+SKIP: {
+ skip "Perl Version ($]) is older than v5.16", 3 if $] < 5.016;
+
+ $orig = "\N{U+0080}";
+ *a = $orig;
+ is($latin1->encode(*a), "*main::\x80", 'passing typeglob to latin1 encode');
+
+ $orig = "\N{U+C0}";
+ *a = $orig;
+ is(Encode::encode_utf8(*a), "*main::\xC3\x80", 'passing typeglob to Encode::encode_utf8');
+
+ $orig = "\N{U+C0}";
+ *a = $orig;
+ is(Encode::encode('utf-8', *a), "*main::\xC3\x80", 'passing typeglob to UTF-8 encode');
+}
diff --git a/cpan/Encode/t/enc_data.t b/cpan/Encode/t/enc_data.t
index 99ea78d94c..2ead16ea95 100644
--- a/cpan/Encode/t/enc_data.t
+++ b/cpan/Encode/t/enc_data.t
@@ -1,4 +1,4 @@
-# $Id: enc_data.t,v 2.3 2016/08/10 18:08:45 dankogai Exp dankogai $
+# $Id: enc_data.t,v 2.5 2016/11/29 23:29:23 dankogai Exp dankogai $
BEGIN {
require Config; import Config;
@@ -11,11 +11,11 @@ BEGIN {
exit 0;
}
if (ord("A") == 193) {
- print "1..0 # encoding pragma does not support EBCDIC platforms\n";
+ print "1..0 # Skip: encoding pragma does not support EBCDIC platforms\n";
exit(0);
}
- if ("$]" >= 5.025) {
- print "1..0 # encoding pragma not supported in Perl 5.26\n";
+ if ($] >= 5.025 and !$Config{usecperl}) {
+ print "1..0 # Skip: encoding pragma not supported in Perl 5.26\n";
exit(0);
}
if ($] <= 5.008 and !$Config{perl_patchlevel}){
diff --git a/cpan/Encode/t/enc_eucjp.t b/cpan/Encode/t/enc_eucjp.t
index 952a8ae7bc..9b32459792 100644
--- a/cpan/Encode/t/enc_eucjp.t
+++ b/cpan/Encode/t/enc_eucjp.t
@@ -1,4 +1,4 @@
-# $Id: enc_eucjp.t,v 2.3 2016/08/10 18:08:45 dankogai Exp dankogai $
+# $Id: enc_eucjp.t,v 2.3 2016/08/10 18:08:45 dankogai Exp $
# This is the twin of enc_utf8.t .
BEGIN {
diff --git a/cpan/Encode/t/enc_module.t b/cpan/Encode/t/enc_module.t
index 8796a9b343..7d7382b903 100644
--- a/cpan/Encode/t/enc_module.t
+++ b/cpan/Encode/t/enc_module.t
@@ -1,4 +1,4 @@
-# $Id: enc_module.t,v 2.3 2016/08/10 18:08:45 dankogai Exp dankogai $
+# $Id: enc_module.t,v 2.5 2016/11/29 23:29:23 dankogai Exp dankogai $
# This file is in euc-jp
BEGIN {
require Config; import Config;
@@ -15,11 +15,11 @@ BEGIN {
exit 0;
}
if (ord("A") == 193) {
- print "1..0 # encoding pragma does not support EBCDIC platforms\n";
+ print "1..0 # Skip: encoding pragma does not support EBCDIC platforms\n";
exit(0);
}
- if ("$]" >= 5.025) {
- print "1..0 # encoding pragma not supported in Perl 5.26\n";
+ if ($] >= 5.025 and !$Config{usecperl}) {
+ print "1..0 # Skip: encoding pragma not supported in Perl 5.26\n";
exit(0);
}
}
diff --git a/cpan/Encode/t/enc_utf8.t b/cpan/Encode/t/enc_utf8.t
index 7ffaac0f3f..b07c573960 100644
--- a/cpan/Encode/t/enc_utf8.t
+++ b/cpan/Encode/t/enc_utf8.t
@@ -1,4 +1,4 @@
-# $Id: enc_utf8.t,v 2.3 2016/08/10 18:08:45 dankogai Exp dankogai $
+# $Id: enc_utf8.t,v 2.3 2016/08/10 18:08:45 dankogai Exp $
# This is the twin of enc_eucjp.t .
BEGIN {
diff --git a/cpan/Encode/t/encoding-locale.t b/cpan/Encode/t/encoding-locale.t
index 7a305a0dcf..87e7ecb45f 100644
--- a/cpan/Encode/t/encoding-locale.t
+++ b/cpan/Encode/t/encoding-locale.t
@@ -22,5 +22,5 @@ SKIP: {
ok(defined $enc, 'encoding returned is supported')
or diag("Encoding: ", explain($locale_encoding));
isa_ok($enc, 'Encode::Encoding');
- note($locale_encoding, ' => ', $enc->name);
+ eval { note($locale_encoding, ' => ', $enc->name); };
}
diff --git a/cpan/Encode/t/encoding.t b/cpan/Encode/t/encoding.t
index 18d1921428..33010e74b5 100644
--- a/cpan/Encode/t/encoding.t
+++ b/cpan/Encode/t/encoding.t
@@ -9,11 +9,11 @@ BEGIN {
exit 0;
}
if (ord("A") == 193) {
- print "1..0 # encoding pragma does not support EBCDIC platforms\n";
+ print "1..0 # Skip: encoding pragma does not support EBCDIC platforms\n";
exit(0);
}
- if ("$]" >= 5.025) {
- print "1..0 # encoding pragma not supported in Perl 5.26\n";
+ if ($] >= 5.025 and !$Config{usecperl}) {
+ print "1..0 # Skip: encoding pragma not supported in Perl 5.26\n";
exit(0);
}
}
diff --git a/cpan/Encode/t/fallback.t b/cpan/Encode/t/fallback.t
index 8ef8ab38df..86605ef3b8 100644
--- a/cpan/Encode/t/fallback.t
+++ b/cpan/Encode/t/fallback.t
@@ -35,7 +35,7 @@ for my $i (0x80..0xff){
$uo .= chr($i);
$residue .= chr($i);
$af .= '?';
- $uf .= "\x{FFFD}";
+ $uf .= "\x{FFFD}" if $i < 0xfd;
$ap .= sprintf("\\x{%04x}", $i);
$up .= sprintf("\\x%02X", $i);
$ah .= sprintf("&#%d;", $i);
diff --git a/cpan/Encode/t/jperl.t b/cpan/Encode/t/jperl.t
index 475d8bc0db..a0e7a379f6 100644
--- a/cpan/Encode/t/jperl.t
+++ b/cpan/Encode/t/jperl.t
@@ -1,5 +1,5 @@
#
-# $Id: jperl.t,v 2.3 2016/08/10 18:08:45 dankogai Exp dankogai $
+# $Id: jperl.t,v 2.5 2016/11/29 23:29:23 dankogai Exp dankogai $
#
# This script is written in euc-jp
@@ -17,8 +17,8 @@ BEGIN {
print "1..0 # Skip: EBCDIC\n";
exit 0;
}
- if ("$]" >= 5.025) {
- print "1..0 # encoding pragma not supported in Perl 5.26\n";
+ if ($] >= 5.025 and !$Config{usecperl}) {
+ print "1..0 # Skip: encoding pragma not supported in Perl 5.26\n";
exit(0);
}
$| = 1;
diff --git a/cpan/Encode/t/magic.t b/cpan/Encode/t/magic.t
new file mode 100644
index 0000000000..8295152247
--- /dev/null
+++ b/cpan/Encode/t/magic.t
@@ -0,0 +1,144 @@
+BEGIN {
+ if ($ENV{'PERL_CORE'}) {
+ chdir 't';
+ unshift @INC, '../lib';
+ }
+ require Config; import Config;
+ if ($Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
+ }
+ if (ord("A") == 193) {
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
+ }
+ $| = 1;
+}
+
+use strict;
+use warnings;
+
+use Encode qw(find_encoding encode decode encode_utf8 decode_utf8 is_utf8 _utf8_on _utf8_off FB_CROAK);
+
+use Test::More tests => 3*(2*(4*(4*4)+4)+4+3*3);
+
+my $ascii = find_encoding('ASCII');
+my $latin1 = find_encoding('Latin1');
+my $utf8 = find_encoding('UTF-8');
+my $utf16 = find_encoding('UTF-16LE');
+
+my $undef = undef;
+my $ascii_str = 'ascii_str';
+my $utf8_str = 'utf8_str';
+_utf8_on($utf8_str);
+
+{
+ foreach my $str ($undef, $ascii_str, $utf8_str) {
+ foreach my $croak (0, 1) {
+ foreach my $enc ('ASCII', 'Latin1', 'UTF-8', 'UTF-16LE') {
+ my $mod = defined $str && $croak;
+ my $func = "encode('" . $enc . "', " . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')';
+ tie my $input, 'TieScalarCounter', $str;
+ my $output = encode($enc, $input, $croak ? FB_CROAK : 0);
+ is(tied($input)->{fetch}, 1, "$func processes get magic only once");
+ is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic'));
+ is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
+ is($output, ((defined $str and $enc eq 'UTF-16LE') ? encode("UTF-16LE", $str) : $str), "$func returns correct \$output string");
+ }
+ foreach my $enc ('ASCII', 'Latin1', 'UTF-8', 'UTF-16LE') {
+ my $mod = defined $str && $croak;
+ my $func = "decode('" . $enc . "', " . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')';
+ my $input_str = ((defined $str and $enc eq 'UTF-16LE') ? encode("UTF-16LE", $str) : $str);
+ tie my $input, 'TieScalarCounter', $input_str;
+ my $output = decode($enc, $input, $croak ? FB_CROAK : 0);
+ is(tied($input)->{fetch}, 1, "$func processes get magic only once");
+ is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic'));
+ is($input, $mod ? '' : $input_str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
+ is($output, $str, "$func returns correct \$output string");
+ }
+ foreach my $obj ($ascii, $latin1, $utf8, $utf16) {
+ my $mod = defined $str && $croak;
+ my $func = '$' . $obj->name() . '->encode(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')';
+ tie my $input, 'TieScalarCounter', $str;
+ my $output = $obj->encode($input, $croak ? FB_CROAK : 0);
+ is(tied($input)->{fetch}, 1, "$func processes get magic only once");
+ is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic'));
+ is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
+ is($output, ((defined $str and $obj == $utf16) ? encode("UTF-16LE", $str) : $str), "$func returns correct \$output string");
+ }
+ foreach my $obj ($ascii, $latin1, $utf8, $utf16) {
+ my $mod = defined $str && $croak;
+ my $func = '$' . $obj->name() . '->decode(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')';
+ my $input_str = ((defined $str and $obj == $utf16) ? encode("UTF-16LE", $str) : $str);
+ tie my $input, 'TieScalarCounter', $input_str;
+ my $output = $obj->decode($input, $croak ? FB_CROAK : 0);
+ is(tied($input)->{fetch}, 1, "$func processes get magic only once");
+ is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic'));
+ is($input, $mod ? '' : $input_str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
+ is($output, $str, "$func returns correct \$output string");
+ }
+ {
+ my $mod = defined $str && $croak;
+ my $func = 'decode_utf8(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')';
+ tie my $input, 'TieScalarCounter', $str;
+ my $output = decode_utf8($input, $croak ? FB_CROAK : 0);
+ is(tied($input)->{fetch}, 1, "$func processes get magic only once");
+ is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic'));
+ is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
+ is($output, $str, "$func returns correct \$output string");
+ }
+ }
+ {
+ my $func = 'encode_utf8(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')';
+ tie my $input, 'TieScalarCounter', $str;
+ my $output = encode_utf8($input);
+ is(tied($input)->{fetch}, 1, "$func processes get magic only once");
+ is(tied($input)->{store}, 0, "$func does not process set magic");
+ is($input, $str, "$func does not modify \$input string");
+ is($output, $str, "$func returns correct \$output string");
+ }
+ {
+ my $func = '_utf8_on(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')';
+ tie my $input, 'TieScalarCounter', $str;
+ _utf8_on($input);
+ is(tied($input)->{fetch}, 1, "$func processes get magic only once");
+ is(tied($input)->{store}, defined $str ? 1 : 0, "$func " . (defined $str ? 'processes set magic only once' : 'does not process set magic'));
+ defined $str ? ok(is_utf8($input), "$func sets UTF8 status flag") : ok(!is_utf8($input), "$func does not set UTF8 status flag");
+ }
+ {
+ my $func = '_utf8_off(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')';
+ tie my $input, 'TieScalarCounter', $str;
+ _utf8_off($input);
+ is(tied($input)->{fetch}, 1, "$func processes get magic only once");
+ is(tied($input)->{store}, defined $str ? 1 : 0, "$func " . (defined $str ? 'processes set magic only once' : 'does not process set magic'));
+ ok(!is_utf8($input), "$func unsets UTF8 status flag");
+ }
+ {
+ my $func = 'is_utf8(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ')';
+ tie my $input, 'TieScalarCounter', $str;
+ my $utf8 = is_utf8($input);
+ is(tied($input)->{fetch}, 1, "$func processes get magic only once");
+ is(tied($input)->{store}, 0, "$func does not process set magic");
+ is($utf8, is_utf8($str), "$func returned correct state");
+ }
+ }
+}
+
+package TieScalarCounter;
+
+sub TIESCALAR {
+ my ($class, $value) = @_;
+ return bless { fetch => 0, store => 0, value => $value }, $class;
+}
+
+sub FETCH {
+ my ($self) = @_;
+ $self->{fetch}++;
+ return $self->{value};
+}
+
+sub STORE {
+ my ($self, $value) = @_;
+ $self->{store}++;
+ $self->{value} = $value;
+}
diff --git a/cpan/Encode/t/mime-header.t b/cpan/Encode/t/mime-header.t
index 4477a4eb87..a997dffb41 100644
--- a/cpan/Encode/t/mime-header.t
+++ b/cpan/Encode/t/mime-header.t
@@ -1,5 +1,5 @@
#
-# $Id: mime-header.t,v 2.12 2016/04/11 07:17:02 dankogai Exp $
+# $Id: mime-header.t,v 2.14 2016/11/29 23:29:23 dankogai Exp dankogai $
# This script is written in utf8
#
BEGIN {
@@ -24,8 +24,22 @@ use strict;
use utf8;
use charnames ":full";
-use Test::More tests => 130;
-use_ok("Encode::MIME::Header");
+use Test::More tests => 264;
+
+BEGIN {
+ use_ok("Encode::MIME::Header");
+}
+
+my @decode_long_tests;
+if ($] < 5.009004) { # perl versions without Regular expressions Engine de-recursivised which cause stack overflow
+ push(@decode_long_tests, "a" x 1000000 => "a" x 1000000);
+ push(@decode_long_tests, "=?utf-8?Q?a?= " x 400 => "a" x 400 . " ");
+ push(@decode_long_tests, "=?utf-8?Q?a?= =?US-ASCII?Q?b?= " x 200 => "ab" x 200 . " ");
+} else {
+ push(@decode_long_tests, "a" x 1000000 => "a" x 1000000);
+ push(@decode_long_tests, "=?utf-8?Q?a?= " x 10000 => "a" x 10000 . " ");
+ push(@decode_long_tests, "=?utf-8?Q?a?= =?US-ASCII?Q?b?= " x 10000 => "ab" x 10000 . " ");
+}
my @decode_tests = (
# RFC2047 p.5
@@ -54,6 +68,14 @@ my @decode_tests = (
"=?ISO-8859-1*da-DK?Q?Keld_J=F8rn_Simonsen?=" => "Keld Jørn Simonsen",
"=?ISO-8859-1*fr-BE?Q?Andr=E9?= Pirard" => "André Pirard",
"=?ISO-8859-1*en?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=" => "If you can read this you understand the example.",
+ # multiple (separated by CRLF)
+ "=?US-ASCII?Q?a?=\r\n=?US-ASCII?Q?b?=" => "a\r\nb",
+ "a\r\nb" => "a\r\nb",
+ "a\r\n\r\nb" => "a\r\n\r\nb",
+ "a\r\n\r\nb\r\n" => "a\r\n\r\nb\r\n",
+ # multiple multiline (separated by CRLF)
+ "=?US-ASCII?Q?a?=\r\n =?US-ASCII?Q?b?=\r\n=?US-ASCII?Q?c?=" => "ab\r\nc",
+ "a\r\n b\r\nc" => "a b\r\nc",
# RT67569
"foo =?us-ascii?q?bar?=" => "foo bar",
"foo\r\n =?us-ascii?q?bar?=" => "foo bar",
@@ -63,16 +85,38 @@ my @decode_tests = (
"foo\r\n bar" => "foo bar",
"=?us-ascii?q?foo?= =?us-ascii?q?bar?=" => "foobar",
"=?us-ascii?q?foo?=\r\n =?us-ascii?q?bar?=" => "foobar",
- "=?us-ascii?q?foo bar?=" => "=?us-ascii?q?foo bar?=",
- "=?us-ascii?q?foo\r\n bar?=" => "=?us-ascii?q?foo bar?=",
# RT40027
"a: b\r\n c" => "a: b c",
# RT104422
"=?utf-8?Q?pre?= =?utf-8?B?IGZvbw==?=\r\n =?utf-8?Q?bar?=" => "pre foobar",
+ # RT114034 - replace invalid UTF-8 sequence with unicode replacement character
+ "=?utf-8?Q?=f9=80=80=80=80?=" => "�",
+ "=?utf-8?Q?=28=c3=29?=" => "(�)",
+ # decode only known MIME charsets, do not crash on invalid
+ "prefix =?unknown?Q?a=20b=20c?= middle =?US-ASCII?Q?d=20e=20f?= suffix" => "prefix =?unknown?Q?a=20b=20c?= middle d e f suffix",
+ "prefix =?US-ASCII?Q?a_b_c?= =?unknown?Q?d_e_f?= suffix" => "prefix a b c =?unknown?Q?d_e_f?= suffix",
+ "prefix =?US-ASCII?Q?a_b_c?= =?unknown?Q?d_e_f?= =?US-ASCII?Q?g_h_i?= suffix" => "prefix a b c =?unknown?Q?d_e_f?= g h i suffix",
+ # long strings
+ @decode_long_tests,
+ # separators around encoded words
+ "\r\n =?US-ASCII?Q?a?=" => " a",
+ "\r\n (=?US-ASCII?Q?a?=)" => " (a)",
+ "\r\n (=?US-ASCII?Q?a?=)\r\n " => " (a) ",
+ "(=?US-ASCII?Q?a?=)\r\n " => "(a) ",
+ " (=?US-ASCII?Q?a?=) " => " (a) ",
+ "(=?US-ASCII?Q?a?=) " => "(a) ",
+ " (=?US-ASCII?Q?a?=)" => " (a)",
+ "(=?US-ASCII?Q?a?=)(=?US-ASCII?Q?b?=)" => "(a)(b)",
+ "(=?US-ASCII?Q?a?=) (=?US-ASCII?Q?b?=)" => "(a) (b)",
+ "(=?US-ASCII?Q?a?=)\r\n (=?US-ASCII?Q?b?=)" => "(a) (b)",
+ "\r\n (=?US-ASCII?Q?a?=)\r\n (=?US-ASCII?Q?b?=)\r\n " => " (a) (b) ",
+ "\r\n(=?US-ASCII?Q?a?=)\r\n(=?US-ASCII?Q?b?=)" => "\r\n(a)\r\n(b)",
);
my @decode_default_tests = (
@decode_tests,
+ "=?us-ascii?q?foo bar?=" => "foo bar",
+ "=?us-ascii?q?foo\r\n bar?=" => "foo bar",
'=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?=' => 'foo <bar@baz.foo> bar',
'"=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?="' => '"foo <bar@baz.foo> bar"',
"=?us-ascii?q?foo?==?us-ascii?q?bar?=" => "foobar",
@@ -82,12 +126,35 @@ my @decode_default_tests = (
"[=?UTF-8?B?ZsOzcnVt?=]=?UTF-8?B?IHNwcsOhdmE=?=" => "[fórum] správa",
"test:=?UTF-8?B?IHNwcsOhdmE=?=" => "test: správa",
"=?UTF-8?B?dMOpc3Q=?=:=?UTF-8?B?IHNwcsOhdmE=?=", "tést: správa",
+ # multiple base64 parts in one b word
+ "=?us-ascii?b?Zg==Zg==?=" => "ff",
+ # b word with invalid characters
+ "=?us-ascii?b?Zm!!9!v?=" => "foo",
+ # concat consecutive words (with same parameters) and join them into one utf-8 symbol
+ "=?UTF-8?Q?=C3?= =?UTF-8?Q?=A1?=" => "á",
+ # RT114034 - use strict UTF-8 decoder for invalid MIME charsets utf8, UTF8 and utf-8-strict
+ "=?utf8?Q?=C3=A1=f9=80=80=80=80?=" => "á�",
+ "=?UTF8?Q?=C3=A1=f9=80=80=80=80?=" => "á�",
+ "=?utf-8-strict?Q?=C3=A1=f9=80=80=80=80?=" => "á�",
);
my @decode_strict_tests = (
@decode_tests,
+ "=?us-ascii?q?foo bar?=" => "=?us-ascii?q?foo bar?=",
+ "=?us-ascii?q?foo\r\n bar?=" => "=?us-ascii?q?foo bar?=",
'=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?=' => 'foo <bar@baz.foo> bar',
'"=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?="' => '"=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?="',
+ # do not decode invalid q words
+ "=?us-ascii?q?foo=?=" => "=?us-ascii?q?foo=?=",
+ "=?us-ascii?q?foo=?= =?us-ascii?q?foo?=" => "=?us-ascii?q?foo=?= foo",
+ # do not decode invalid b words
+ "=?us-ascii?b?----?=" => "=?us-ascii?b?----?=",
+ "=?us-ascii?b?Zm8=-?= =?us-ascii?b?Zm9v?= and =?us-ascii?b?Zg==?=" => "=?us-ascii?b?Zm8=-?= foo and f",
+ "=?us-ascii?b?----?= =?us-ascii?b?Zm9v?= and =?us-ascii?b?Zg==?=" => "=?us-ascii?b?----?= foo and f",
+ # RT114034 - utf8, UTF8 and also utf-8-strict are invalid MIME charset, do not decode it
+ "=?utf8?Q?=C3=A1?=" => "=?utf8?Q?=C3=A1?=",
+ "=?UTF8?Q?=C3=A1?=" => "=?UTF8?Q?=C3=A1?=",
+ "=?utf-8-strict?Q?=C3=A1?=" => "=?utf-8-strict?Q?=C3=A1?=",
);
my @encode_tests = (
@@ -106,41 +173,161 @@ my @encode_tests = (
# RT88717
"Hey foo\x{2024}bar:whee" => "=?UTF-8?B?SGV5IGZvb+KApGJhcjp3aGVl?=", "=?UTF-8?Q?Hey_foo=E2=80=A4bar=3Awhee?=",
# valid q chars
- "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz !*+-/" => "=?UTF-8?B?MDEyMzQ1Njc4OUFCQ0RFRkdISUpLTE1OT1BRUlNUVVZXWFlaYWJjZGVmZ2hpams=?=\r\n =?UTF-8?B?bG1ub3BxcnN0dXZ3eHl6ICEqKy0v?=", "=?UTF-8?Q?0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_?=\r\n =?UTF-8?Q?!*+-/?=",
+ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz !*+-/" => "=?UTF-8?B?MDEyMzQ1Njc4OUFCQ0RFRkdISUpLTE1OT1BRUlNUVVZXWFlaYWJjZGVmZ2hp?=\r\n =?UTF-8?B?amtsbW5vcHFyc3R1dnd4eXogISorLS8=?=", "=?UTF-8?Q?0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_?=\r\n =?UTF-8?Q?!*+-/?=",
# invalid q chars
"." => "=?UTF-8?B?Lg==?=", "=?UTF-8?Q?=2E?=",
"," => "=?UTF-8?B?LA==?=", "=?UTF-8?Q?=2C?=",
+ # long ascii sequence
+ "a" x 100 => "=?UTF-8?B?YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh?=\r\n =?UTF-8?B?YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh?=\r\n =?UTF-8?B?YWFhYWFhYWFhYQ==?=", "=?UTF-8?Q?aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa?=\r\n =?UTF-8?Q?aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa?=",
+ # long unicode sequence
+ "😀" x 100 => "=?UTF-8?B?8J+YgPCfmIDwn5iA8J+YgPCfmIDwn5iA8J+YgPCfmIDwn5iA8J+YgPCfmIA=?=\r\n " x 9 . "=?UTF-8?B?8J+YgA==?=", join("\r\n ", ("=?UTF-8?Q?=F0=9F=98=80=F0=9F=98=80=F0=9F=98=80=F0=9F=98=80=F0=9F=98=80?=") x 20),
);
sub info {
- my ($str) = @_;
+ my ($str, $str1, $str2) = @_;
+ substr $str1, 1000, -3, "..." if defined $str1 and length $str1 > 1000;
+ substr $str2, 1000, -3, "..." if defined $str2 and length $str2 > 1000;
+ $str .= ": $str1" if defined $str1;
+ $str .= " => $str2" if defined $str2;
$str = Encode::encode_utf8($str);
$str =~ s/\r/\\r/gs;
$str =~ s/\n/\\n/gs;
return $str;
}
+sub check_length {
+ my ($str) = @_;
+ my @lines = split /\r\n /, $str;
+ my @long = grep { length($_) > 75 } @lines;
+ return scalar @long == 0;
+}
+
my @splice;
@splice = @encode_tests;
while (my ($d, $b, $q) = splice @splice, 0, 3) {
- is Encode::encode('MIME-Header', $d) => $b, info("encode default: $d => $b");
- is Encode::encode('MIME-B', $d) => $b, info("encode base64: $d => $b");
- is Encode::encode('MIME-Q', $d) => $q, info("encode qp: $d => $q");
- is Encode::decode('MIME-B', $b) => $d, info("decode base64: $b => $d");
- is Encode::decode('MIME-Q', $q) => $d, info("decode qp: $b => $d");
+ is Encode::encode("MIME-Header", $d) => $b, info("encode default", $d => $b);
+ is Encode::encode("MIME-B", $d) => $b, info("encode base64", $d => $b);
+ is Encode::encode("MIME-Q", $d) => $q, info("encode qp", $d => $q);
+ is Encode::decode("MIME-B", $b) => $d, info("decode base64", $b => $d);
+ is Encode::decode("MIME-Q", $q) => $d, info("decode qp", $b => $d);
+ ok check_length($b), info("correct encoded length base64", $b);
+ ok check_length($q), info("correct encoded length qp", $q);
}
@splice = @decode_default_tests;
while (my ($e, $d) = splice @splice, 0, 2) {
- is Encode::decode('MIME-Header', $e) => $d, info("decode default: $e => $d");
+ is Encode::decode("MIME-Header", $e) => $d, info("decode default", $e => $d);
}
local $Encode::MIME::Header::STRICT_DECODE = 1;
@splice = @decode_strict_tests;
while (my ($e, $d) = splice @splice, 0, 2) {
- is Encode::decode('MIME-Header', $e) => $d, info("decode strict: $e => $d");
+ is Encode::decode("MIME-Header", $e) => $d, info("decode strict", $e => $d);
+}
+
+my $valid_unicode = "á";
+my $invalid_unicode = "\x{1000000}";
+{
+ my $input = $valid_unicode;
+ my $output = Encode::encode("MIME-Header", $input, Encode::FB_QUIET);
+ is $output => Encode::encode("MIME-Header", $valid_unicode), "encode valid with FB_QUIET flag: output string is valid";
+ is $input => "", "encode valid with FB_QUIET flag: input string is modified and empty";
+}
+{
+ my $input = $valid_unicode . $invalid_unicode;
+ my $output = Encode::encode("MIME-Header", $input, Encode::FB_QUIET);
+ is $output => Encode::encode("MIME-Header", $valid_unicode), "encode with FB_QUIET flag: output string stops before first invalid character";
+ is $input => $invalid_unicode, "encode with FB_QUIET flag: input string is modified and starts with first invalid character";
+}
+{
+ my $input = $valid_unicode . $invalid_unicode;
+ my $output = Encode::encode("MIME-Header", $input, Encode::FB_QUIET | Encode::LEAVE_SRC);
+ is $output => Encode::encode("MIME-Header", $valid_unicode), "encode with FB_QUIET and LEAVE_SRC flags: output string stops before first invalid character";
+ is $input => $valid_unicode . $invalid_unicode, "encode with FB_QUIET and LEAVE_SRC flags: input string is not modified";
+}
+{
+ my $input = $valid_unicode . $invalid_unicode;
+ my $output = Encode::encode("MIME-Header", $input, Encode::FB_PERLQQ);
+ is $output => Encode::encode("MIME-Header", $valid_unicode . '\x{1000000}'), "encode with FB_PERLQQ flag: output string contains perl qq representation of invalid character";
+ is $input => $valid_unicode . $invalid_unicode, "encode with FB_PERLQQ flag: input string is not modified";
+}
+{
+ my $input = $valid_unicode;
+ my $output = Encode::encode("MIME-Header", $input, sub { sprintf("!0x%X!", $_[0]) });
+ is $output => Encode::encode("MIME-Header", $valid_unicode), "encode valid with coderef check: output string is valid";
+ is $input => $valid_unicode, "encode valid with coderef check: input string is not modified";
+}
+{
+ my $input = $valid_unicode . $invalid_unicode;
+ my $output = Encode::encode("MIME-Header", $input, sub { sprintf("!0x%X!", $_[0]) });
+ is $output => Encode::encode("MIME-Header", $valid_unicode . '!0x1000000!'), "encode with coderef check: output string contains output from coderef";
+ is $input => $valid_unicode . $invalid_unicode, "encode with coderef check: input string is not modified";
+}
+
+my $valid_mime = "=?US-ASCII?Q?d=20e=20f?=";
+my $invalid_mime = "=?unknown?Q?a=20b=20c?=";
+my $invalid_mime_unicode = "=?utf-8?Q?=28=c3=29?=";
+{
+ my $input = $valid_mime;
+ my $output = Encode::decode("MIME-Header", $input, Encode::FB_QUIET);
+ is $output => Encode::decode("MIME-Header", $valid_mime), "decode valid with FB_QUIET flag: output string is valid";
+ is $input => "", "decode valid with FB_QUIET flag: input string is modified and empty";
+}
+{
+ my $input = $valid_mime . " " . $invalid_mime;
+ my $output = Encode::decode("MIME-Header", $input, Encode::FB_QUIET);
+ is $output => Encode::decode("MIME-Header", $valid_mime), "decode with FB_QUIET flag: output string stops before first mime word with unknown charset";
+ is $input => $invalid_mime, "decode with FB_QUIET flag: input string is modified and starts with first mime word with unknown charset";
+}
+{
+ my $input = $valid_mime . " " . $invalid_mime_unicode;
+ my $output = Encode::decode("MIME-Header", $input, Encode::FB_QUIET);
+ is $output => Encode::decode("MIME-Header", $valid_mime), "decode with FB_QUIET flag: output string stops before first mime word with invalid unicode character";
+ is $input => $invalid_mime_unicode, "decode with FB_QUIET flag: input string is modified and starts with first mime word with invalid unicode character";
+}
+{
+ my $input = $valid_mime . " " . $invalid_mime;
+ my $output = Encode::decode("MIME-Header", $input, Encode::FB_QUIET | Encode::LEAVE_SRC);
+ is $output => Encode::decode("MIME-Header", $valid_mime), "decode with FB_QUIET and LEAVE_SRC flags: output string stops before first mime word with unknown charset";
+ is $input => $valid_mime . " " . $invalid_mime, "decode with FB_QUIET flag: input string is not modified";
+}
+{
+ my $input = $valid_mime . " " . $invalid_mime_unicode;
+ my $output = Encode::decode("MIME-Header", $input, Encode::FB_QUIET | Encode::LEAVE_SRC);
+ is $output => Encode::decode("MIME-Header", $valid_mime), "decode with FB_QUIET and LEAVE_SRC flags: output string stops before first mime word with invalid unicode character";
+ is $input => $valid_mime . " " . $invalid_mime_unicode, "decode with FB_QUIET flag: input string is not modified";
+}
+{
+ my $input = $valid_mime . " " . $invalid_mime;
+ my $output = Encode::decode("MIME-Header", $input, Encode::FB_PERLQQ);
+ is $output => Encode::decode("MIME-Header", $valid_mime) . " " . $invalid_mime, "decode with FB_PERLQQ flag: output string contains unmodified mime word with unknown charset";
+ is $input => $valid_mime . " " . $invalid_mime, "decode with FB_QUIET flag: input string is not modified";
+}
+{
+ my $input = $valid_mime . " " . $invalid_mime_unicode;
+ my $output = Encode::decode("MIME-Header", $input, Encode::FB_PERLQQ);
+ is $output => Encode::decode("MIME-Header", $valid_mime) . '(\xC3)', "decode with FB_PERLQQ flag: output string contains perl qq representation of invalid unicode character";
+ is $input => $valid_mime . " " . $invalid_mime_unicode, "decode with FB_QUIET flag: input string is not modified";
+}
+{
+ my $input = $valid_mime;
+ my $output = Encode::decode("MIME-Header", $input, sub { sprintf("!0x%X!", $_[0]) });
+ is $output => Encode::decode("MIME-Header", $valid_mime), "decode valid with coderef check: output string is valid";
+ is $input => $valid_mime, "decode valid with coderef check: input string is not modified";
+}
+{
+ my $input = $valid_mime . " " . $invalid_mime;
+ my $output = Encode::decode("MIME-Header", $input, sub { sprintf("!0x%X!", $_[0]) });
+ is $output => Encode::decode("MIME-Header", $valid_mime) . " " . $invalid_mime, "decode with coderef check: output string contains unmodified mime word with unknown charset";
+ is $input => $valid_mime . " " . $invalid_mime, "decode with coderef check: input string is not modified";
+}
+{
+ my $input = $valid_mime . " " . $invalid_mime_unicode;
+ my $output = Encode::decode("MIME-Header", $input, sub { sprintf("!0x%X!", $_[0]) });
+ is $output => Encode::decode("MIME-Header", $valid_mime) . '(!0xC3!)', "decode with coderef check: output string contains output from coderef for invalid unicode character";
+ is $input => $valid_mime . " " . $invalid_mime_unicode, "decode with coderef check: input string is not modified";
}
__END__
diff --git a/cpan/Encode/t/mime-name.t b/cpan/Encode/t/mime-name.t
index 02ff49053a..ced4e7c031 100644
--- a/cpan/Encode/t/mime-name.t
+++ b/cpan/Encode/t/mime-name.t
@@ -1,5 +1,5 @@
#
-# $Id: mime-name.t,v 1.1 2007/05/12 06:42:19 dankogai Exp $
+# $Id: mime-name.t,v 1.2 2016/10/28 05:03:52 dankogai Exp $
# This script is written in utf8
#
BEGIN {
@@ -23,14 +23,40 @@ use strict;
use warnings;
use Encode;
#use Test::More qw(no_plan);
-use Test::More tests => 68;
+use Test::More tests => 277;
+
+BEGIN {
+ use_ok("Encode::MIME::Name");
+}
-use_ok("Encode::MIME::Name");
for my $canon ( sort keys %Encode::MIME::Name::MIME_NAME_OF ) {
my $enc = find_encoding($canon);
my $mime_name = $Encode::MIME::Name::MIME_NAME_OF{$canon};
is $enc->mime_name, $mime_name,
- qq(\$enc->mime_name("$canon") eq $mime_name);
+ qq(find_encoding($canon)->mime_name eq $mime_name);
+ is $enc->name, $canon,
+ qq(find_encoding($canon)->name eq $canon);
+}
+for my $mime_name ( sort keys %Encode::MIME::Name::ENCODE_NAME_OF ) {
+ my $enc = find_mime_encoding($mime_name);
+ my $canon = $Encode::MIME::Name::ENCODE_NAME_OF{$mime_name};
+ my $mime_name = $Encode::MIME::Name::MIME_NAME_OF{$canon};
+ is $enc->mime_name, $mime_name,
+ qq(find_mime_encoding($mime_name)->mime_name eq $mime_name);
+ is $enc->name, $canon,
+ qq(find_mime_encoding($mime_name)->name eq $canon);
}
+ok find_encoding("utf8");
+ok find_encoding("UTF8");
+ok find_encoding("utf-8-strict");
+ok find_encoding("utf-8");
+ok find_encoding("UTF-8");
+
+ok not find_mime_encoding("utf8");
+ok not find_mime_encoding("UTF8");
+ok not find_mime_encoding("utf-8-strict");
+ok find_mime_encoding("utf-8");
+ok find_mime_encoding("UTF-8");
+
__END__;
diff --git a/cpan/Encode/t/rt113164.t b/cpan/Encode/t/rt113164.t
new file mode 100644
index 0000000000..f0a94ea066
--- /dev/null
+++ b/cpan/Encode/t/rt113164.t
@@ -0,0 +1,38 @@
+BEGIN {
+ if ($ENV{'PERL_CORE'}) {
+ chdir 't';
+ unshift @INC, '../lib';
+ }
+ require Config; import Config;
+ if ($Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
+ }
+ if (ord("A") == 193) {
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
+ }
+ $| = 1;
+}
+
+use strict;
+use warnings;
+
+use Test::More tests => 2;
+
+use Encode;
+
+my $str = "You" . chr(8217) . "re doomed!";
+
+my $data;
+
+my $cb = sub {
+ $data = [ ('?') x 12_500 ];
+ return ";";
+};
+
+my $octets = encode('iso-8859-1', $str, $cb);
+is $octets, "You;re doomed!", "stack was not overwritten";
+
+$octets = encode('iso-8859-1', $str, $cb);
+is $octets, "You;re doomed!", "stack was not overwritten";
diff --git a/cpan/Encode/t/rt65541.t b/cpan/Encode/t/rt65541.t
new file mode 100644
index 0000000000..4a75ce3c7b
--- /dev/null
+++ b/cpan/Encode/t/rt65541.t
@@ -0,0 +1,29 @@
+BEGIN {
+ if ($ENV{'PERL_CORE'}) {
+ chdir 't';
+ unshift @INC, '../lib';
+ }
+ require Config; import Config;
+ if ($Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
+ }
+ if (ord("A") == 193) {
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
+ }
+ $| = 1;
+}
+
+use strict;
+use warnings;
+
+use Encode;
+use PerlIO::encoding;
+$PerlIO::encoding::fallback &= ~Encode::WARN_ON_ERR;
+
+use Test::More tests => 3;
+
+ok open my $fh, ">:encoding(cp1250)", do{\(my $str)};
+ok print $fh ("a" x 1023) . "\x{0378}";
+ok close $fh;
diff --git a/cpan/Encode/t/rt76824.t b/cpan/Encode/t/rt76824.t
new file mode 100644
index 0000000000..5d057f607e
--- /dev/null
+++ b/cpan/Encode/t/rt76824.t
@@ -0,0 +1,60 @@
+BEGIN {
+ if ($ENV{'PERL_CORE'}) {
+ chdir 't';
+ unshift @INC, '../lib';
+ }
+ require Config; import Config;
+ if ($Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
+ }
+ if (ord("A") == 193) {
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
+ }
+ $| = 1;
+}
+
+use strict;
+use warnings;
+
+use Encode;
+use PerlIO::encoding;
+$PerlIO::encoding::fallback &= ~Encode::WARN_ON_ERR;
+
+use Test::More tests => 2;
+
+my $out;
+my @arr = (
+ "\x{feff}\x{39f}\x{3af} \x{3a3}\x{3c5}\x{3bd}\x{3ad}\x{3bd}\x{3bf}\x{3c7}\x{3bf}\x{3b9}\n",
+ "\x{39f}\x{3b9} \x{393}\x{3b5}\x{3bd}\x{3bd}\x{3b1}\x{3af}\x{3bf}\x{3b9} \x{3c4}\x{3b7}\x{3c2} \x{3a3}\x{3b1}\x{3bc}\x{3bf}\x{3b8}\x{3c1}\x{3ac}\x{3ba}\x{3b7}\x{3c2}\n",
+ "\x{39f}\x{3b9} \x{393}\x{3b5}\x{3c1}\x{3bc}\x{3b1}\x{3bd}\x{3bf}\x{3af} \x{3be}\x{3b1}\x{3bd}\x{3ac}\x{3c1}\x{3c7}\x{3bf}\x{3bd}\x{3c4}\x{3b1}\x{3b9}...\n",
+ "\x{39f}\x{3b9} \x{395}\x{3c1}\x{3b1}\x{3c3}\x{3c4}\x{3ad}\x{3c2} \x{3a4}\x{3bf}\x{3c5} \x{391}\x{3b9}\x{3b3}\x{3b1}\x{3af}\x{3bf}\x{3c5}\n",
+ "\x{39f}\x{3b9} \x{39a}\x{3c5}\x{3bd}\x{3b7}\x{3b3}\x{3bf}\x{3af}\n",
+ "\x{39f}\x{3b9} \x{3a0}\x{3b1}\x{3bd}\x{3ba}\x{3c2} \x{3a4}\x{3b1} \x{39a}\x{3ac}\x{3bd}\x{3bf}\x{3c5}\x{3bd} \x{38c}\x{3bb}\x{3b1}\n",
+ "\x{39f}\x{3b9} \x{3a6}\x{3b1}\x{3bd}\x{3c4}\x{3b1}\x{3c1}\x{3af}\x{3bd}\x{3b5}\x{3c2}\n",
+ "\x{39f}\x{3b9}\x{3ba}\x{3bf}\x{3b3}\x{3ad}\x{3bd}\x{3b5}\x{3b9}\x{3b1} \x{3a0}\x{3b1}\x{3bd}\x{3c4}\x{3c1}\x{3b5}\x{3c5}\x{3cc}\x{3bc}\x{3b1}\x{3c3}\x{3c4}\x{3b5}\n",
+ "\x{39f}\x{3bb}\x{3b1} \x{3b5}\x{3af}\x{3bd}\x{3b1}\x{3b9} \x{3b4}\x{3c1}\x{3cc}\x{3bc}\x{3bf}\x{3c2}\n",
+ "\x{39f}\x{3bc}\x{3b7}\x{3c1}\x{3bf}\x{3c2}\n",
+ "\x{39f}\x{3be}\x{3c5}\x{3b3}\x{3cc}\x{3bd}\x{3bf}\n",
+ "\x{39f}\x{3c1}\x{3b1}\x{3c4}\x{3cc}\x{3c4}\x{3b7}\x{3c2} \x{3bc}\x{3b7}\x{3b4}\x{3ad}\x{3bd}\n",
+ "\x{3c0}\n",
+ "\x{3c0}\x{3ac}\x{3bd}\x{3c9}, \x{3ba}\x{3ac}\x{3c4}\x{3c9} \x{3ba}\x{3b1}\x{3b9} \x{3c0}\x{3bb}\x{3b1}\x{3b3}\x{3af}\x{3c9}\x{3c2}\n",
+ "\x{3a4}\x{3bf} \x{39a}\x{3b1}\x{3ba}\x{3cc}\n",
+ "\x{3a4}\x{3bf} \x{39a}\x{3b1}\x{3ba}\x{3cc} - \x{3a3}\x{3c4}\x{3b7}\x{3bd} \x{395}\x{3c0}\x{3bf}\x{3c7}\x{3ae} \x{3c4}\x{3c9}\x{3bd} \x{397}\x{3c1}\x{3ce}\x{3c9}\x{3bd}\n",
+ "\x{3a4}\x{3bf} \x{3ba}\x{3bb}\x{3ac}\x{3bc}\x{3b1} \x{3b2}\x{3b3}\x{3ae}\x{3ba}\x{3b5} \x{3b1}\x{3c0}'\x{3c4}\x{3bf}\x{3bd} \x{3c0}\x{3b1}\x{3c1}\x{3ac}\x{3b4}\x{3b5}\x{3b9}\x{3c3}\x{3bf}\n",
+ "\x{3a4}\x{3bf} \x{3ba}\x{3bf}\x{3c1}\x{3af}\x{3c4}\x{3c3}\x{3b9} \x{3bc}\x{3b5} \x{3c4}\x{3b1} \x{3bc}\x{3b1}\x{3cd}\x{3c1}\x{3b1}\n",
+ "\x{3a4}\x{3bf} \x{3ba}\x{3bf}\x{3c1}\x{3af}\x{3c4}\x{3c3}\x{3b9} \x{3c4}\x{3bf}\x{3c5} \x{3bb}\x{3bf}\x{3cd}\x{3bd}\x{3b1} \x{3c0}\x{3b1}\x{3c1}\x{3ba}\n",
+ "\x{3a4}\x{3bf} \x{39e}\x{3cd}\x{3bb}\x{3bf} \x{3b2}\x{3b3}\x{3ae}\x{3ba}\x{3b5} \x{3b1}\x{3c0}\x{3cc} \x{3c4}\x{3bf}\x{3bd} \x{3c0}\x{3b1}\x{3c1}\x{3ac}\x{3b4}\x{3b5}\x{3b9}\x{3c3}\x{3bf}\n",
+ "\x{3a4}\x{3bf} \x{3c0}\x{3b9}\x{3bf} \x{3bb}\x{3b1}\x{3bc}\x{3c0}\x{3c1}\x{3cc} \x{3b1}\x{3c3}\x{3c4}\x{3ad}\x{3c1}\x{3b9}\n",
+ "\x{3a4}\x{3bf} \x{3a1}\x{3b5}\x{3bc}\x{3b1}\x{3bb}\x{3b9} \x{3a4}\x{3b7}\x{3c2} \x{391}\x{3b8}\x{3b7}\x{3bd}\x{3b1}\x{3c2}\n",
+ "\x{3a4}\x{3bf} \x{3a4}\x{3b1}\x{3bd}\x{3b3}\x{3ba}\x{3cc} \x{3c4}\x{3c9}\x{3bd} \x{3a7}\x{3c1}\x{3b9}\x{3c3}\x{3c4}\x{3bf}\x{3c5}\x{3b3}\x{3ad}\x{3bd}\x{3bd}\x{3c9}\x{3bd}\n",
+ "\x{3a4}\x{3bf} \x{3c4}\x{3b5}\x{3bb}\x{3b5}\x{3c5}\x{3c4}\x{3b1}\x{3af}\x{3bf} \x{3c8}\x{3ad}\x{3bc}\x{3bc}\x{3b1}\n",
+ "\x{3a4}\x{3bf} \x{3c6}\x{3b9}\x{3bb}\x{3af} \x{3c4}\x{3b7}\x{3c2}... \x{396}\x{3c9}\x{3ae}\x{3c2}\n",
+ "\x{3a4}\x{3bf} \x{3c7}\x{3ce}\x{3bc}\x{3b1} \x{3b2}\x{3ac}\x{3c6}\x{3c4}\x{3b7}\x{3ba}\x{3b5} \x{3ba}\x{3cc}\x{3ba}\x{3ba}\x{3b9}\x{3bd}\x{3bf}\n",
+ "\x{3a4}\x{3bf}\x{3c0}\x{3af}\x{3bf} \x{3c3}\x{3c4}\x{3b7}\x{3bd} \x{3bf}\x{3bc}\x{3af}\x{3c7}\x{3bb}\x{3b7}\n",
+ "\x{3a4}\x{3c1}\x{3b9}\x{3bb}\x{3bf}\x{3b3}\x{3af}\x{3b1} 1: \x{3a4}\x{3bf} \x{39b}\x{3b9}\x{3b2}\x{3ac}\x{3b4}\x{3b9} \x{3c0}\x{3bf}\x{3c5} \x{3b4}\x{3b1}\x{3ba}\x{3c1}\x{3cd}\x{3b6}\x{3b5}\x{3b9}\n"
+ );
+ok open my $wh, '>:crlf:encoding(ISO-8859-1)', \$out;
+print $wh $_ for @arr;
+ok close $wh;
diff --git a/cpan/Encode/t/rt85489.t b/cpan/Encode/t/rt85489.t
new file mode 100644
index 0000000000..3b28e35af6
--- /dev/null
+++ b/cpan/Encode/t/rt85489.t
@@ -0,0 +1,48 @@
+BEGIN {
+ if ($ENV{'PERL_CORE'}) {
+ chdir 't';
+ unshift @INC, '../lib';
+ }
+ require Config; import Config;
+ if ($Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
+ }
+ if (ord("A") == 193) {
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
+ }
+ $| = 1;
+}
+
+use strict;
+use warnings;
+
+use Test::More tests => 8;
+
+use Encode;
+
+my $ascii = Encode::find_encoding("ascii");
+my $orig = "str";
+
+my $str = $orig;
+ok !Encode::is_utf8($str), "UTF8 flag is not set on input string before ascii encode";
+$ascii->encode($str);
+ok !Encode::is_utf8($str), "UTF8 flag is not set on input string after ascii encode";
+
+$str = $orig;
+ok !Encode::is_utf8($str), "UTF8 flag is not set on input string before Encode::encode ascii";
+Encode::encode("ascii", $str);
+ok !Encode::is_utf8($str), "UTF8 flag is not set on input string after Encode::encode ascii";
+
+$str = $orig;
+Encode::_utf8_on($str);
+ok Encode::is_utf8($str), "UTF8 flag is set on input string before ascii decode";
+$ascii->decode($str);
+ok Encode::is_utf8($str), "UTF8 flag is set on input string after ascii decode";
+
+$str = $orig;
+Encode::_utf8_on($str);
+ok Encode::is_utf8($str), "UTF8 flag is set on input string before Encode::decode ascii";
+Encode::decode("ascii", $str);
+ok Encode::is_utf8($str), "UTF8 flag is set on input string after Encode::decode ascii";
diff --git a/cpan/Encode/t/rt86327.t b/cpan/Encode/t/rt86327.t
new file mode 100644
index 0000000000..91527f849c
--- /dev/null
+++ b/cpan/Encode/t/rt86327.t
@@ -0,0 +1,33 @@
+BEGIN {
+ if ($ENV{'PERL_CORE'}) {
+ chdir 't';
+ unshift @INC, '../lib';
+ }
+ require Config; import Config;
+ if ($Config{'extensions'} !~ /\bEncode\b/) {
+ print "1..0 # Skip: Encode was not built\n";
+ exit 0;
+ }
+ if (ord("A") == 193) {
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
+ }
+ $| = 1;
+}
+
+use strict;
+use warnings;
+
+use Encode;
+use PerlIO::encoding;
+$PerlIO::encoding::fallback &= ~Encode::WARN_ON_ERR;
+
+use Test::More tests => 3;
+
+my @t = qw/230 13 90 65 34 239 86 15 8 26 181 25 305 123 22 139 111 6 3
+100 37 1 20 1 166 1 300 19 1 42 153 81 106 114 67 1 32 34/;
+my $str;
+ok open OUT, '>:encoding(iso-8859-1)', \$str;
+my $string = join "\x{fffd}", map { '.'x$_ } @t;
+ok print OUT $string;
+ok close OUT;
diff --git a/cpan/Encode/t/taint.t b/cpan/Encode/t/taint.t
index 2446dd76d3..6fa46bd957 100644
--- a/cpan/Encode/t/taint.t
+++ b/cpan/Encode/t/taint.t
@@ -1,13 +1,17 @@
#!/usr/bin/perl -T
use strict;
use Encode qw(encode decode);
+local %Encode::ExtModule = %Encode::Config::ExtModule;
use Scalar::Util qw(tainted);
use Test::More;
my $taint = substr($ENV{PATH},0,0);
my $str = "dan\x{5f3e}" . $taint; # tainted string to encode
my $bin = encode('UTF-8', $str); # tainted binary to decode
+my $notaint = "";
+my $notaint_str = "dan\x{5f3e}" . $notaint;
+my $notaint_bin = encode('UTF-8', $notaint_str);
my @names = Encode->encodings(':all');
-plan tests => 2 * @names;
+plan tests => 4 * @names + 2;
for my $name (@names) {
my ($d, $e, $s);
eval {
@@ -26,3 +30,25 @@ for my $name (@names) {
ok tainted($d), "decode $name";
}
}
+for my $name (@names) {
+ my ($d, $e, $s);
+ eval {
+ $e = encode($name, $notaint_str);
+ };
+ SKIP: {
+ skip $@, 1 if $@;
+ ok ! tainted($e), "encode $name";
+ }
+ $notaint_bin = $e.$notaint if $e;
+ eval {
+ $d = decode($name, $notaint_bin);
+ };
+ SKIP: {
+ skip $@, 1 if $@;
+ ok ! tainted($d), "decode $name";
+ }
+}
+Encode::_utf8_on($bin);
+ok(!Encode::is_utf8($bin), "Encode::_utf8_on does not work on tainted values");
+Encode::_utf8_off($str);
+ok(Encode::is_utf8($str), "Encode::_utf8_off does not work on tainted values");
diff --git a/cpan/Encode/t/utf8ref.t b/cpan/Encode/t/utf8ref.t
index 3253e08639..288f15b44f 100644
--- a/cpan/Encode/t/utf8ref.t
+++ b/cpan/Encode/t/utf8ref.t
@@ -1,12 +1,12 @@
#
-# $Id: utf8ref.t,v 1.1 2010/09/18 18:39:51 dankogai Exp $
+# $Id: utf8ref.t,v 1.2 2016/10/28 05:03:52 dankogai Exp $
#
use strict;
use warnings;
use Encode;
use Test::More;
-plan tests => 4;
+plan tests => 12;
#plan 'no_plan';
# my $a = find_encoding('ASCII');
@@ -14,7 +14,20 @@ my $u = find_encoding('UTF-8');
my $r = [];
no warnings 'uninitialized';
is encode_utf8($r), ''.$r;
-is $u->encode($r), '';
+is $u->encode($r), ''.$r;
$r = {};
is decode_utf8($r), ''.$r;
-is $u->decode($r), '';
+is $u->decode($r), ''.$r;
+use warnings 'uninitialized';
+
+is encode_utf8(undef), undef;
+is decode_utf8(undef), undef;
+
+is encode_utf8(''), '';
+is decode_utf8(''), '';
+
+is Encode::encode('utf8', undef), undef;
+is Encode::decode('utf8', undef), undef;
+
+is Encode::encode('utf8', ''), '';
+is Encode::decode('utf8', ''), '';
diff --git a/cpan/Encode/t/utf8strict.t b/cpan/Encode/t/utf8strict.t
index 3f362f4981..39293d3067 100644
--- a/cpan/Encode/t/utf8strict.t
+++ b/cpan/Encode/t/utf8strict.t
@@ -47,8 +47,8 @@ BEGIN {
qq/dd 67 41 41/ => 0, # 2.3.2
qq/ee 42 73 73 71/ => 0, # 2.3.3
qq/f4 90 80 80/ => 1, # 2.3.4 -- out of range so NG
- # "3 Malformed sequences" are checked by perl.
- # "4 Overlong sequences" are checked by perl.
+ # EBCDIC TODO: "3 Malformed sequences"
+ # EBCDIC TODO: "4 Overlong sequences"
);
} else {
%SEQ = (
@@ -56,8 +56,49 @@ BEGIN {
qq/ee 80 80/ => 0, # 2.3.2
qq/f4 8f bf bd/ => 0, # 2.3.3
qq/f4 90 80 80/ => 1, # 2.3.4 -- out of range so NG
- # "3 Malformed sequences" are checked by perl.
- # "4 Overlong sequences" are checked by perl.
+ qq/80/ => 1, # 3.1.1
+ qq/bf/ => 1, # 3.1.2
+ qq/80 bf/ => 1, # 3.1.3
+ qq/80 bf 80/ => 1, # 3.1.4
+ qq/80 bf 80 bf/ => 1, # 3.1.5
+ qq/80 bf 80 bf 80/ => 1, # 3.1.6
+ qq/80 bf 80 bf 80 bf/ => 1, # 3.1.7
+ qq/80 bf 80 bf 80 bf 80/ => 1, # 3.1.8
+ qq/80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aa ab ac ad ae af b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf/ => 1, # 3.1.9
+ qq/c0 20 c1 20 c2 20 c3 20 c4 20 c5 20 c6 20 c7 20 c8 20 c9 20 ca 20 cb 20 cc 20 cd 20 ce 20 cf 20 d0 20 d1 20 d2 20 d3 20 d4 20 d5 20 d6 20 d7 20 d8 20 d9 20 da 20 db 20 dc 20 dd 20 de 20 df 20/ => 1, # 3.2.1
+ qq/e0 20 e1 20 e2 20 e3 20 e4 20 e5 20 e6 20 e7 20 e8 20 e9 20 ea 20 eb 20 ec 20 ed 20 ee 20 ef 20/ => 1, # 3.2.2
+ qq/f0 20 f1 20 f2 20 f3 20 f4 20 f5 20 f6 20 f7 20/ => 1, # 3.2.3
+ qq/f8 20 f9 20 fa 20 fb 20/ => 1, # 3.2.4
+ qq/fc 20 fd 20/ => 1, # 3.2.5
+ qq/c0/ => 1, # 3.3.1
+ qq/e0 80/ => 1, # 3.3.2
+ qq/f0 80 80/ => 1, # 3.3.3
+ qq/f8 80 80 80/ => 1, # 3.3.4
+ qq/fc 80 80 80 80/ => 1, # 3.3.5
+ qq/df/ => 1, # 3.3.6
+ qq/ef bf/ => 1, # 3.3.7
+ qq/f7 bf bf/ => 1, # 3.3.8
+ qq/fb bf bf bf/ => 1, # 3.3.9
+ qq/fd bf bf bf bf/ => 1, # 3.3.10
+ qq/c0 e0 80 f0 80 80 f8 80 80 80 fc 80 80 80 80 df ef bf f7 bf bf fb bf bf bf fd bf bf bf bf/ => 1, # 3.4.1
+ qq/fe/ => 1, # 3.5.1
+ qq/ff/ => 1, # 3.5.2
+ qq/fe fe ff ff/ => 1, # 3.5.3
+ qq/c0 af/ => 1, # 4.1.1
+ qq/e0 80 af/ => 1, # 4.1.2
+ qq/f0 80 80 af/ => 1, # 4.1.3
+ qq/f8 80 80 80 af/ => 1, # 4.1.4
+ qq/fc 80 80 80 80 af/ => 1, # 4.1.5
+ qq/c1 bf/ => 1, # 4.2.1
+ qq/e0 9f bf/ => 1, # 4.2.2
+ qq/f0 8f bf bf/ => 1, # 4.2.3
+ qq/f8 87 bf bf bf/ => 1, # 4.2.4
+ qq/fc 83 bf bf bf bf/ => 1, # 4.2.5
+ qq/c0 80/ => 1, # 4.3.1
+ qq/e0 80 80/ => 1, # 4.3.2
+ qq/f0 80 80 80/ => 1, # 4.3.3
+ qq/f8 80 80 80 80/ => 1, # 4.3.4
+ qq/fc 80 80 80 80 80/ => 1, # 4.3.5
);
}
$NTESTS += scalar keys %SEQ;
@@ -82,7 +123,7 @@ for my $s (sort keys %SEQ){
eval { $d->decode($o,1) };
$DEBUG and $@ and warn $@;
my $t = $@ ? 1 : 0;
- is($t, $SEQ{$s}, $s);
+ is($t, $SEQ{$s}, "sequence: $s");
}
__END__