diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-01-15 05:02:24 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-01-15 05:02:24 +0000 |
commit | 9aa983d27b0af31badfcbbb76567f6e557076b41 (patch) | |
tree | a3290ebe9e4a9773e967a8beb2895428f7e717c7 /mg.c | |
parent | 78d8f6e05211de1a60b4bb9b795b8ff72f179ebe (diff) | |
download | perl-9aa983d27b0af31badfcbbb76567f6e557076b41.tar.gz |
More UTF-8 patches from Inaba Hiroto.
- The substr lval was still not okay.
- Now pp_stringify and sv_setsv copies source's UTF8 flag
even if IN_BYTE. pp_stringify is called from fold_constants
at optimization phase and "\x{100}" was made SvUTF8_off under
use bytes (the bytes pragma is for "byte semantics" and not
for "do not produce UTF8 data")
- New `qu' operator to generate UTF8 string explicitly.
Though I agree with the policy "0x00-0xff always produce bytes",
sometimes want to such a string to be coded in UTF8.
I can use pack"U0a*" but it requires more typing and has
runtime overhead.
- Fix pp_regcomp bug uncovered by "0x00-0xff always produce bytes"
change, the bug appears if a pm has PMdf_UTF8 flag but interpolated
string is not UTF8_on and has char 0x80-0xff.
TODO: document and test qu.
p4raw-id: //depot/perl@8439
Diffstat (limited to 'mg.c')
-rw-r--r-- | mg.c | 35 |
1 files changed, 19 insertions, 16 deletions
@@ -1404,12 +1404,14 @@ Perl_magic_getsubstr(pTHX_ SV *sv, MAGIC *mg) I32 offs = LvTARGOFF(sv); I32 rem = LvTARGLEN(sv); + if (SvUTF8(lsv)) + sv_pos_u2b(lsv, &offs, &rem); if (offs > len) offs = len; if (rem + offs > len) rem = len - offs; sv_setpvn(sv, tmps + offs, (STRLEN)rem); - if (DO_UTF8(lsv)) + if (SvUTF8(lsv)) SvUTF8_on(sv); return 0; } @@ -1417,25 +1419,26 @@ Perl_magic_getsubstr(pTHX_ SV *sv, MAGIC *mg) int Perl_magic_setsubstr(pTHX_ SV *sv, MAGIC *mg) { - STRLEN littlelen; - char *tmps = SvPV(sv, littlelen); + STRLEN len; + char *tmps = SvPV(sv, len); + SV *lsv = LvTARG(sv); + I32 lvoff = LvTARGOFF(sv); + I32 lvlen = LvTARGLEN(sv); if (DO_UTF8(sv)) { - I32 bigoff = LvTARGOFF(sv); - I32 biglen = LvTARGLEN(sv); - U8 *s, *a, *b; - - sv_utf8_upgrade(LvTARG(sv)); - /* sv_utf8_upgrade() might have moved and/or resized - * the string to be replaced, we must rediscover it. --jhi */ - s = (U8*)SvPVX(LvTARG(sv)); - a = utf8_hop(s, bigoff); - b = utf8_hop(a, biglen); - sv_insert(LvTARG(sv), a - s, b - a, tmps, littlelen); - SvUTF8_on(LvTARG(sv)); + sv_utf8_upgrade(lsv); + sv_pos_u2b(lsv, &lvoff, &lvlen); + sv_insert(lsv, lvoff, lvlen, tmps, len); + SvUTF8_on(lsv); + } + else if (SvUTF8(lsv)) { + sv_pos_u2b(lsv, &lvoff, &lvlen); + tmps = bytes_to_utf8(tmps, &len); + sv_insert(lsv, lvoff, lvlen, tmps, len); + Safefree(tmps); } else - sv_insert(LvTARG(sv), LvTARGOFF(sv), LvTARGLEN(sv), tmps, littlelen); + sv_insert(lsv, lvoff, lvlen, tmps, len); return 0; } |