summaryrefslogtreecommitdiff
path: root/mg.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-01-15 05:02:24 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-01-15 05:02:24 +0000
commit9aa983d27b0af31badfcbbb76567f6e557076b41 (patch)
treea3290ebe9e4a9773e967a8beb2895428f7e717c7 /mg.c
parent78d8f6e05211de1a60b4bb9b795b8ff72f179ebe (diff)
downloadperl-9aa983d27b0af31badfcbbb76567f6e557076b41.tar.gz
More UTF-8 patches from Inaba Hiroto.
- The substr lval was still not okay. - Now pp_stringify and sv_setsv copies source's UTF8 flag even if IN_BYTE. pp_stringify is called from fold_constants at optimization phase and "\x{100}" was made SvUTF8_off under use bytes (the bytes pragma is for "byte semantics" and not for "do not produce UTF8 data") - New `qu' operator to generate UTF8 string explicitly. Though I agree with the policy "0x00-0xff always produce bytes", sometimes want to such a string to be coded in UTF8. I can use pack"U0a*" but it requires more typing and has runtime overhead. - Fix pp_regcomp bug uncovered by "0x00-0xff always produce bytes" change, the bug appears if a pm has PMdf_UTF8 flag but interpolated string is not UTF8_on and has char 0x80-0xff. TODO: document and test qu. p4raw-id: //depot/perl@8439
Diffstat (limited to 'mg.c')
-rw-r--r--mg.c35
1 files changed, 19 insertions, 16 deletions
diff --git a/mg.c b/mg.c
index b5cae86de6..4f183b02c5 100644
--- a/mg.c
+++ b/mg.c
@@ -1404,12 +1404,14 @@ Perl_magic_getsubstr(pTHX_ SV *sv, MAGIC *mg)
I32 offs = LvTARGOFF(sv);
I32 rem = LvTARGLEN(sv);
+ if (SvUTF8(lsv))
+ sv_pos_u2b(lsv, &offs, &rem);
if (offs > len)
offs = len;
if (rem + offs > len)
rem = len - offs;
sv_setpvn(sv, tmps + offs, (STRLEN)rem);
- if (DO_UTF8(lsv))
+ if (SvUTF8(lsv))
SvUTF8_on(sv);
return 0;
}
@@ -1417,25 +1419,26 @@ Perl_magic_getsubstr(pTHX_ SV *sv, MAGIC *mg)
int
Perl_magic_setsubstr(pTHX_ SV *sv, MAGIC *mg)
{
- STRLEN littlelen;
- char *tmps = SvPV(sv, littlelen);
+ STRLEN len;
+ char *tmps = SvPV(sv, len);
+ SV *lsv = LvTARG(sv);
+ I32 lvoff = LvTARGOFF(sv);
+ I32 lvlen = LvTARGLEN(sv);
if (DO_UTF8(sv)) {
- I32 bigoff = LvTARGOFF(sv);
- I32 biglen = LvTARGLEN(sv);
- U8 *s, *a, *b;
-
- sv_utf8_upgrade(LvTARG(sv));
- /* sv_utf8_upgrade() might have moved and/or resized
- * the string to be replaced, we must rediscover it. --jhi */
- s = (U8*)SvPVX(LvTARG(sv));
- a = utf8_hop(s, bigoff);
- b = utf8_hop(a, biglen);
- sv_insert(LvTARG(sv), a - s, b - a, tmps, littlelen);
- SvUTF8_on(LvTARG(sv));
+ sv_utf8_upgrade(lsv);
+ sv_pos_u2b(lsv, &lvoff, &lvlen);
+ sv_insert(lsv, lvoff, lvlen, tmps, len);
+ SvUTF8_on(lsv);
+ }
+ else if (SvUTF8(lsv)) {
+ sv_pos_u2b(lsv, &lvoff, &lvlen);
+ tmps = bytes_to_utf8(tmps, &len);
+ sv_insert(lsv, lvoff, lvlen, tmps, len);
+ Safefree(tmps);
}
else
- sv_insert(LvTARG(sv), LvTARGOFF(sv), LvTARGLEN(sv), tmps, littlelen);
+ sv_insert(lsv, lvoff, lvlen, tmps, len);
return 0;
}