diff options
Diffstat (limited to 'toke.c')
-rw-r--r-- | toke.c | 36 |
1 files changed, 29 insertions, 7 deletions
@@ -1336,6 +1336,8 @@ S_scan_const(pTHX_ char *start) /* backslashes */ if (*s == '\\' && s+1 < send) { + bool to_be_utf8 = FALSE; + s++; /* some backslashes we leave behind */ @@ -1404,7 +1406,7 @@ S_scan_const(pTHX_ char *start) else { STRLEN len = 1; /* allow underscores */ uv = (UV)scan_hex(s + 1, e - s - 1, &len); - has_utf8 = TRUE; + to_be_utf8 = TRUE; } s = e + 1; } @@ -1421,8 +1423,8 @@ S_scan_const(pTHX_ char *start) * There will always enough room in sv since such escapes will * be longer than any utf8 sequence they can end up as */ - if (uv > 127 || has_utf8) { - if (!this_utf8 && !has_utf8 && uv > 255) { + if (uv > 127) { + if (!has_utf8 && (to_be_utf8 || uv > 255)) { /* might need to recode whatever we have accumulated so far * if it contains any hibit chars */ @@ -1454,7 +1456,7 @@ S_scan_const(pTHX_ char *start) } } - if (has_utf8 || uv > 255) { + if (to_be_utf8 || uv > 255) { d = (char*)uv_to_utf8((U8*)d, uv); has_utf8 = TRUE; } @@ -3120,6 +3122,9 @@ Perl_yylex(pTHX) if (*d == '}') { char minus = (PL_tokenbuf[0] == '-'); s = force_word(s + minus, WORD, FALSE, TRUE, FALSE); + if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, 0) && + PL_nextval[PL_nexttoke-1].opval) + SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke-1].opval)->op_sv); if (minus) force_next('-'); } @@ -3774,6 +3779,8 @@ Perl_yylex(pTHX) CLINE; yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0)); yylval.opval->op_private = OPpCONST_BARE; + if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len)) + SvUTF8_on(((SVOP*)yylval.opval)->op_sv); TERM(WORD); } @@ -3933,6 +3940,8 @@ Perl_yylex(pTHX) if (*s == '=' && s[1] == '>') { CLINE; sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf); + if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len)) + SvUTF8_on(((SVOP*)yylval.opval)->op_sv); TERM(WORD); } @@ -4104,6 +4113,10 @@ Perl_yylex(pTHX) } } #endif +#ifdef PERLIO_LAYERS + if (UTF && !IN_BYTE) + PerlIO_apply_layers(PL_rsfp, NULL, ":utf8"); +#endif PL_rsfp = Nullfp; } goto fake_eof; @@ -4634,6 +4647,7 @@ Perl_yylex(pTHX) int warned = 0; d = SvPV_force(PL_lex_stuff, len); while (len) { + SV *sv; for (; isSPACE(*d) && len; --len, ++d) ; if (len) { char *b = d; @@ -4654,8 +4668,11 @@ Perl_yylex(pTHX) else { for (; !isSPACE(*d) && len; --len, ++d) ; } + sv = newSVpvn(b, d-b); + if (DO_UTF8(PL_lex_stuff)) + SvUTF8_on(sv); words = append_elem(OP_LIST, words, - newSVOP(OP_CONST, 0, tokeq(newSVpvn(b, d-b)))); + newSVOP(OP_CONST, 0, tokeq(sv))); } } if (words) { @@ -6248,7 +6265,9 @@ S_scan_trans(pTHX_ char *start) squash = OPpTRANS_SQUASH; s++; } - o->op_private = del|squash|complement; + o->op_private = del|squash|complement| + (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)| + (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0); PL_lex_op = o; yylval.ival = OP_TRANS; @@ -6442,6 +6461,8 @@ retval: Renew(SvPVX(tmpstr), SvLEN(tmpstr), char); } SvREFCNT_dec(herewas); + if (UTF && !IN_BYTE && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr))) + SvUTF8_on(tmpstr); PL_lex_stuff = tmpstr; yylval.ival = op_type; return s; @@ -7205,7 +7226,8 @@ vstring: SvREADONLY_on(sv); if (utf8) { SvUTF8_on(sv); - sv_utf8_downgrade(sv, TRUE); + if (!UTF||IN_BYTE) + sv_utf8_downgrade(sv, TRUE); } } } |