diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-01-28 19:28:40 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-01-28 19:28:40 +0000 |
commit | f9a6324217cffea75ff769ddd313748c0613a128 (patch) | |
tree | 9fb5b4ade5877ba969d093cfe37ec605de62d8dc /toke.c | |
parent | 9ee2bb1a7c54b1866ff07ab9c157254810ee5205 (diff) | |
download | perl-f9a6324217cffea75ff769ddd313748c0613a128.tar.gz |
Patch from Inaba Hiroto:
- canonical UTF-8 hash keys: if a key string for a hash is
UTF8-on, try downgrade the string and use it if
unicode::distinct is not in effect.
For the task, I added a function bytes_from_utf8() to utf8.c.
It might resemble utf8_to_bytes() but it is not convenient
to the task.
Made a test for it and added to t/op/each.t
- Changed do_print in doio.c to apply sv_utf8_(downgrade|upgrade) to
the mortal copy of the argument SV.
And changed t/io/utf8.t test 18 which expects print() to
upgrade its argument.
- re-implement sv_eq with bytes_from_utf8()
- some bug fixes
- tr/// does not handle UTF8 range (\x{}-\x{})
- \ before raw UTF8 character produced
"Malformed UTF-8 character" warning.
- "\x{100}\N{CENT SIGN}" is Malformed.
Added tests for these 3.
- and one silly bug (by me) with qu operator.
p4raw-id: //depot/perl@8583
Diffstat (limited to 'toke.c')
-rw-r--r-- | toke.c | 25 |
1 files changed, 19 insertions, 6 deletions
@@ -1388,8 +1388,7 @@ S_scan_const(pTHX_ char *start) "Unrecognized escape \\%c passed through", *s); /* default action is to copy the quoted character */ - *d++ = *s++; - continue; + goto default_action; } /* \132 indicates an octal constant */ @@ -1479,6 +1478,13 @@ S_scan_const(pTHX_ char *start) if (has_utf8 || uv > 255) { d = (char*)uv_to_utf8((U8*)d, uv); has_utf8 = TRUE; + if (PL_lex_inwhat == OP_TRANS && + PL_sublex_info.sub_op) { + PL_sublex_info.sub_op->op_private |= + (PL_lex_repl ? OPpTRANS_FROM_UTF + : OPpTRANS_TO_UTF); + utf = TRUE; + } } else { *d++ = (char)uv; @@ -1506,6 +1512,8 @@ S_scan_const(pTHX_ char *start) res = newSVpvn(s + 1, e - s - 1); res = new_constant( Nullch, 0, "charnames", res, Nullsv, "\\N{...}" ); + if (has_utf8) + sv_utf8_upgrade(res); str = SvPV(res,len); if (!has_utf8 && SvUTF8(res)) { char *ostart = SvPVX(sv); @@ -1588,8 +1596,7 @@ S_scan_const(pTHX_ char *start) continue; } /* end if (backslash) */ - /* (now in tr/// code again) */ - + default_action: if (UTF8_IS_CONTINUED(*s) && (this_utf8 || has_utf8)) { STRLEN len = (STRLEN) -1; UV uv; @@ -1608,10 +1615,15 @@ S_scan_const(pTHX_ char *start) *d++ = *s++; } has_utf8 = TRUE; + if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) { + PL_sublex_info.sub_op->op_private |= + (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF); + utf = TRUE; + } continue; } - *d++ = *s++; + *d++ = *s++; } /* while loop to process each character */ /* terminate the string and set up the sv */ @@ -4742,7 +4754,8 @@ Perl_yylex(pTHX) case KEY_qq: case KEY_qu: s = scan_str(s,FALSE,FALSE); - if (tmp == KEY_qu && is_utf8_string((U8*)s, SvCUR(PL_lex_stuff))) + if (tmp == KEY_qu && + is_utf8_string((U8*)SvPVX(PL_lex_stuff), SvCUR(PL_lex_stuff))) SvUTF8_on(PL_lex_stuff); if (!s) missingterm((char*)0); |