summaryrefslogtreecommitdiff
path: root/pp.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-01-15 05:02:24 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-01-15 05:02:24 +0000
commit9aa983d27b0af31badfcbbb76567f6e557076b41 (patch)
treea3290ebe9e4a9773e967a8beb2895428f7e717c7 /pp.c
parent78d8f6e05211de1a60b4bb9b795b8ff72f179ebe (diff)
downloadperl-9aa983d27b0af31badfcbbb76567f6e557076b41.tar.gz
More UTF-8 patches from Inaba Hiroto.
- The substr lval was still not okay. - Now pp_stringify and sv_setsv copies source's UTF8 flag even if IN_BYTE. pp_stringify is called from fold_constants at optimization phase and "\x{100}" was made SvUTF8_off under use bytes (the bytes pragma is for "byte semantics" and not for "do not produce UTF8 data") - New `qu' operator to generate UTF8 string explicitly. Though I agree with the policy "0x00-0xff always produce bytes", sometimes want to such a string to be coded in UTF8. I can use pack"U0a*" but it requires more typing and has runtime overhead. - Fix pp_regcomp bug uncovered by "0x00-0xff always produce bytes" change, the bug appears if a pm has PMdf_UTF8 flag but interpolated string is not UTF8_on and has char 0x80-0xff. TODO: document and test qu. p4raw-id: //depot/perl@8439
Diffstat (limited to 'pp.c')
-rw-r--r--pp.c17
1 files changed, 7 insertions, 10 deletions
diff --git a/pp.c b/pp.c
index ba6c17a773..87e459e169 100644
--- a/pp.c
+++ b/pp.c
@@ -2792,6 +2792,8 @@ PP(pp_substr)
RETPUSHUNDEF;
}
else {
+ I32 upos = pos;
+ I32 urem = rem;
if (utfcurlen)
sv_pos_u2b(sv, &pos, &rem);
tmps += pos;
@@ -2826,8 +2828,8 @@ PP(pp_substr)
SvREFCNT_dec(LvTARG(TARG));
LvTARG(TARG) = SvREFCNT_inc(sv);
}
- LvTARGOFF(TARG) = pos;
- LvTARGLEN(TARG) = rem;
+ LvTARGOFF(TARG) = upos;
+ LvTARGLEN(TARG) = urem;
}
}
SPAGAIN;
@@ -2970,11 +2972,9 @@ PP(pp_chr)
(void)SvUPGRADE(TARG,SVt_PV);
- if ((value > 255 && !IN_BYTE) ||
- (UTF8_IS_CONTINUED(value) && (PL_hints & HINT_UTF8)) ) {
- SvGROW(TARG, UTF8_MAXLEN+1);
- tmps = SvPVX(TARG);
- tmps = (char*)uv_to_utf8((U8*)tmps, (UV)value);
+ if (value > 255 && !IN_BYTE) {
+ SvGROW(TARG, UNISKIP(value)+1);
+ tmps = (char*)uv_to_utf8((U8*)SvPVX(TARG), value);
SvCUR_set(TARG, tmps - SvPVX(TARG));
*tmps = '\0';
(void)SvPOK_only(TARG);
@@ -2982,9 +2982,6 @@ PP(pp_chr)
XPUSHs(TARG);
RETURN;
}
- else {
- SvUTF8_off(TARG);
- }
SvGROW(TARG,2);
SvCUR_set(TARG, 1);