summaryrefslogtreecommitdiff
path: root/toke.c
diff options
context:
space:
mode:
Diffstat (limited to 'toke.c')
-rw-r--r--toke.c36
1 files changed, 29 insertions, 7 deletions
diff --git a/toke.c b/toke.c
index 46278e8be4..07c86b8b05 100644
--- a/toke.c
+++ b/toke.c
@@ -1336,6 +1336,8 @@ S_scan_const(pTHX_ char *start)
/* backslashes */
if (*s == '\\' && s+1 < send) {
+ bool to_be_utf8 = FALSE;
+
s++;
/* some backslashes we leave behind */
@@ -1404,7 +1406,7 @@ S_scan_const(pTHX_ char *start)
else {
STRLEN len = 1; /* allow underscores */
uv = (UV)scan_hex(s + 1, e - s - 1, &len);
- has_utf8 = TRUE;
+ to_be_utf8 = TRUE;
}
s = e + 1;
}
@@ -1421,8 +1423,8 @@ S_scan_const(pTHX_ char *start)
* There will always enough room in sv since such escapes will
* be longer than any utf8 sequence they can end up as
*/
- if (uv > 127 || has_utf8) {
- if (!this_utf8 && !has_utf8 && uv > 255) {
+ if (uv > 127) {
+ if (!has_utf8 && (to_be_utf8 || uv > 255)) {
/* might need to recode whatever we have accumulated so far
* if it contains any hibit chars
*/
@@ -1454,7 +1456,7 @@ S_scan_const(pTHX_ char *start)
}
}
- if (has_utf8 || uv > 255) {
+ if (to_be_utf8 || uv > 255) {
d = (char*)uv_to_utf8((U8*)d, uv);
has_utf8 = TRUE;
}
@@ -3120,6 +3122,9 @@ Perl_yylex(pTHX)
if (*d == '}') {
char minus = (PL_tokenbuf[0] == '-');
s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
+ if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, 0) &&
+ PL_nextval[PL_nexttoke-1].opval)
+ SvUTF8_on(((SVOP*)PL_nextval[PL_nexttoke-1].opval)->op_sv);
if (minus)
force_next('-');
}
@@ -3774,6 +3779,8 @@ Perl_yylex(pTHX)
CLINE;
yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpv(PL_tokenbuf,0));
yylval.opval->op_private = OPpCONST_BARE;
+ if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len))
+ SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
TERM(WORD);
}
@@ -3933,6 +3940,8 @@ Perl_yylex(pTHX)
if (*s == '=' && s[1] == '>') {
CLINE;
sv_setpv(((SVOP*)yylval.opval)->op_sv, PL_tokenbuf);
+ if (UTF && !IN_BYTE && is_utf8_string((U8*)PL_tokenbuf, len))
+ SvUTF8_on(((SVOP*)yylval.opval)->op_sv);
TERM(WORD);
}
@@ -4104,6 +4113,10 @@ Perl_yylex(pTHX)
}
}
#endif
+#ifdef PERLIO_LAYERS
+ if (UTF && !IN_BYTE)
+ PerlIO_apply_layers(PL_rsfp, NULL, ":utf8");
+#endif
PL_rsfp = Nullfp;
}
goto fake_eof;
@@ -4634,6 +4647,7 @@ Perl_yylex(pTHX)
int warned = 0;
d = SvPV_force(PL_lex_stuff, len);
while (len) {
+ SV *sv;
for (; isSPACE(*d) && len; --len, ++d) ;
if (len) {
char *b = d;
@@ -4654,8 +4668,11 @@ Perl_yylex(pTHX)
else {
for (; !isSPACE(*d) && len; --len, ++d) ;
}
+ sv = newSVpvn(b, d-b);
+ if (DO_UTF8(PL_lex_stuff))
+ SvUTF8_on(sv);
words = append_elem(OP_LIST, words,
- newSVOP(OP_CONST, 0, tokeq(newSVpvn(b, d-b))));
+ newSVOP(OP_CONST, 0, tokeq(sv)));
}
}
if (words) {
@@ -6248,7 +6265,9 @@ S_scan_trans(pTHX_ char *start)
squash = OPpTRANS_SQUASH;
s++;
}
- o->op_private = del|squash|complement;
+ o->op_private = del|squash|complement|
+ (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
+ (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
PL_lex_op = o;
yylval.ival = OP_TRANS;
@@ -6442,6 +6461,8 @@ retval:
Renew(SvPVX(tmpstr), SvLEN(tmpstr), char);
}
SvREFCNT_dec(herewas);
+ if (UTF && !IN_BYTE && is_utf8_string((U8*)SvPVX(tmpstr), SvCUR(tmpstr)))
+ SvUTF8_on(tmpstr);
PL_lex_stuff = tmpstr;
yylval.ival = op_type;
return s;
@@ -7205,7 +7226,8 @@ vstring:
SvREADONLY_on(sv);
if (utf8) {
SvUTF8_on(sv);
- sv_utf8_downgrade(sv, TRUE);
+ if (!UTF||IN_BYTE)
+ sv_utf8_downgrade(sv, TRUE);
}
}
}