summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mg.c33
-rw-r--r--regcomp.c40
-rw-r--r--sv.c62
-rw-r--r--toke.c12
4 files changed, 84 insertions, 63 deletions
diff --git a/mg.c b/mg.c
index 8881f10f79..64f64978a2 100644
--- a/mg.c
+++ b/mg.c
@@ -1846,32 +1846,37 @@ Perl_magic_set(pTHX_ SV *sv, MAGIC *mg)
DEBUG_x(dump_all());
break;
case '\005': /* ^E */
- if (*(mg->mg_ptr+1) == '\0') {
+ if (*(mg->mg_ptr+1) == '\0') {
#ifdef MACOS_TRADITIONAL
- gMacPerl_OSErr = SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv);
+ gMacPerl_OSErr = SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv);
#else
# ifdef VMS
- set_vaxc_errno(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv));
+ set_vaxc_errno(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv));
# else
# ifdef WIN32
- SetLastError( SvIV(sv) );
+ SetLastError( SvIV(sv) );
# else
# ifdef OS2
- os2_setsyserrno(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv));
+ os2_setsyserrno(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv));
# else
- /* will anyone ever use this? */
- SETERRNO(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv), 4);
+ /* will anyone ever use this? */
+ SETERRNO(SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv), 4);
# endif
# endif
# endif
#endif
- }
- else if (strEQ(mg->mg_ptr+1, "NCODING")) {
- if (PL_encoding)
- sv_setsv(PL_encoding, sv);
- else
- PL_encoding = newSVsv(sv);
- }
+ }
+ else if (strEQ(mg->mg_ptr+1, "NCODING")) {
+ if (PL_encoding)
+ SvREFCNT_dec(PL_encoding);
+ if (SvOK(sv) || SvGMAGICAL(sv)) {
+ PL_encoding = newSVsv(sv);
+ }
+ else {
+ PL_encoding = Nullsv;
+ }
+ }
+ break;
case '\006': /* ^F */
PL_maxsysfd = SvIOK(sv) ? SvIVX(sv) : sv_2iv(sv);
break;
diff --git a/regcomp.c b/regcomp.c
index c8b5d7634b..a4c2d43f6a 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3304,25 +3304,27 @@ tryagain:
/* If the encoding pragma is in effect recode the text of
* any EXACT-kind nodes. */
if (PL_encoding && PL_regkind[(U8)OP(ret)] == EXACT) {
- STRLEN oldlen = STR_LEN(ret);
- SV *sv = sv_2mortal(newSVpvn(STRING(ret), oldlen));
-
- if (RExC_utf8)
- SvUTF8_on(sv);
- if (sv_utf8_downgrade(sv, TRUE)) {
- char *s = sv_recode_to_utf8(sv, PL_encoding);
- STRLEN newlen = SvCUR(sv);
-
- if (!SIZE_ONLY) {
- DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n",
- (int)oldlen, STRING(ret),
- (int)newlen, s));
- Copy(s, STRING(ret), newlen, char);
- STR_LEN(ret) += newlen - oldlen;
- RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen);
- } else
- RExC_size += STR_SZ(newlen) - STR_SZ(oldlen);
- }
+ STRLEN oldlen = STR_LEN(ret);
+ SV *sv = sv_2mortal(newSVpvn(STRING(ret), oldlen));
+
+ if (RExC_utf8)
+ SvUTF8_on(sv);
+ if (sv_utf8_downgrade(sv, TRUE)) {
+ char *s = sv_recode_to_utf8(sv, PL_encoding);
+ STRLEN newlen = SvCUR(sv);
+
+ if (SvUTF8(sv))
+ RExC_utf8 = 1;
+ if (!SIZE_ONLY) {
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "recode %*s to %*s\n",
+ (int)oldlen, STRING(ret),
+ (int)newlen, s));
+ Copy(s, STRING(ret), newlen, char);
+ STR_LEN(ret) += newlen - oldlen;
+ RExC_emit += STR_SZ(newlen) - STR_SZ(oldlen);
+ } else
+ RExC_size += STR_SZ(newlen) - STR_SZ(oldlen);
+ }
}
return(ret);
diff --git a/sv.c b/sv.c
index 35a7bd8bb2..14dbc1e5b1 100644
--- a/sv.c
+++ b/sv.c
@@ -10815,16 +10815,17 @@ char *
Perl_sv_recode_to_utf8(pTHX_ SV *sv, SV *encoding)
{
if (SvPOK(sv) && !DO_UTF8(sv) && SvROK(encoding)) {
- SV *uni;
- STRLEN len;
- char *s;
- dSP;
- ENTER;
- SAVETMPS;
- PUSHMARK(sp);
- EXTEND(SP, 3);
- XPUSHs(encoding);
- XPUSHs(sv);
+ int vary = FALSE;
+ SV *uni;
+ STRLEN len;
+ char *s;
+ dSP;
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(sp);
+ EXTEND(SP, 3);
+ XPUSHs(encoding);
+ XPUSHs(sv);
/*
NI-S 2002/07/09
Passing sv_yes is wrong - it needs to be or'ed set of constants
@@ -10833,23 +10834,32 @@ Perl_sv_recode_to_utf8(pTHX_ SV *sv, SV *encoding)
Both will default the value - let them.
- XPUSHs(&PL_sv_yes);
+ XPUSHs(&PL_sv_yes);
*/
- PUTBACK;
- call_method("decode", G_SCALAR);
- SPAGAIN;
- uni = POPs;
- PUTBACK;
- s = SvPV(uni, len);
- if (s != SvPVX(sv)) {
- SvGROW(sv, len + 1);
- Move(s, SvPVX(sv), len, char);
- SvCUR_set(sv, len);
- SvPVX(sv)[len] = 0;
- }
- FREETMPS;
- LEAVE;
- SvUTF8_on(sv);
+ PUTBACK;
+ call_method("decode", G_SCALAR);
+ SPAGAIN;
+ uni = POPs;
+ PUTBACK;
+ s = SvPV(uni, len);
+ {
+ U8 *t = (U8 *)s, *e = (U8 *)s + len;
+ while (t < e) {
+ if ((vary = !UTF8_IS_INVARIANT(*t++)))
+ break;
+ }
+ }
+ if (s != SvPVX(sv)) {
+ SvGROW(sv, len + 1);
+ Move(s, SvPVX(sv), len, char);
+ SvCUR_set(sv, len);
+ SvPVX(sv)[len] = 0;
+ }
+ FREETMPS;
+ LEAVE;
+ if (vary)
+ SvUTF8_on(sv);
+ SvUTF8_on(sv);
}
return SvPVX(sv);
}
diff --git a/toke.c b/toke.c
index b4c886fd85..d95b0a72bb 100644
--- a/toke.c
+++ b/toke.c
@@ -1674,17 +1674,18 @@ S_scan_const(pTHX_ char *start)
*d = '\0';
SvCUR_set(sv, d - SvPVX(sv));
if (SvCUR(sv) >= SvLEN(sv))
- Perl_croak(aTHX_ "panic: constant overflowed allocated space");
+ Perl_croak(aTHX_ "panic: constant overflowed allocated space");
SvPOK_on(sv);
if (PL_encoding && !has_utf8) {
- sv_recode_to_utf8(sv, PL_encoding);
- has_utf8 = TRUE;
+ sv_recode_to_utf8(sv, PL_encoding);
+ if (SvUTF8(sv))
+ has_utf8 = TRUE;
}
if (has_utf8) {
SvUTF8_on(sv);
if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
- PL_sublex_info.sub_op->op_private |=
+ PL_sublex_info.sub_op->op_private |=
(PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
}
}
@@ -7032,6 +7033,9 @@ S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
sv_catpvn(sv, s, 1);
if (has_utf8)
SvUTF8_on(sv);
+ else if (PL_encoding)
+ sv_recode_to_utf8(sv, PL_encoding);
+
PL_multi_end = CopLINE(PL_curcop);
s++;