summaryrefslogtreecommitdiff
path: root/pp_ctl.c
diff options
context:
space:
mode:
authorGurusamy Sarathy <gsar@cpan.org>2000-01-31 04:57:42 +0000
committerGurusamy Sarathy <gsar@cpan.org>2000-01-31 04:57:42 +0000
commit7e2040f0b7c6fc88ec07b6e169aa2f75fc0130a4 (patch)
treede43e349e9f70e27ef30b2a0de9de2df628cc1c3 /pp_ctl.c
parent8004f2ac219abdd8660c02a4a46ed97695dc379d (diff)
downloadperl-7e2040f0b7c6fc88ec07b6e169aa2f75fc0130a4.tar.gz
runtime now looks at the SVf_UTF8 bit on the SV to decide
whether to use widechar semantics; lexer and RE engine continue to need "use utf8" to enable unicode awareness in literals and patterns (TODO: this needs to be fixed); $1 et al are marked SvUTF8 if the pattern was compiled for utf8 (TODO: propagating it from the data is probably better) p4raw-id: //depot/perl@4930
Diffstat (limited to 'pp_ctl.c')
-rw-r--r--pp_ctl.c18
1 files changed, 14 insertions, 4 deletions
diff --git a/pp_ctl.c b/pp_ctl.c
index 8e41646ccd..fd725a347c 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -114,6 +114,10 @@ PP(pp_regcomp)
PL_reginterp_cnt = I32_MAX; /* Mark as safe. */
pm->op_pmflags = pm->op_pmpermflags; /* reset case sensitivity */
+ if (DO_UTF8(tmpstr))
+ pm->op_pmdynflags |= PMdf_UTF8;
+ else
+ pm->op_pmdynflags &= ~PMdf_UTF8;
pm->op_pmregexp = CALLREGCOMP(aTHX_ t, t + len, pm);
PL_reginterp_cnt = 0; /* XXXX Be extra paranoid - needed
inside tie/overload accessors. */
@@ -296,7 +300,8 @@ PP(pp_formline)
NV value;
bool gotsome;
STRLEN len;
- STRLEN fudge = SvCUR(tmpForm) * (IN_UTF8 ? 3 : 1) + 1;
+ STRLEN fudge = SvCUR(tmpForm) * (IN_BYTE ? 1 : 3) + 1;
+ bool item_is_utf = FALSE;
if (!SvMAGICAL(tmpForm) || !SvCOMPILED(tmpForm)) {
SvREADONLY_off(tmpForm);
@@ -374,7 +379,7 @@ PP(pp_formline)
case FF_CHECKNL:
item = s = SvPV(sv, len);
itemsize = len;
- if (IN_UTF8) {
+ if (DO_UTF8(sv)) {
itemsize = sv_len_utf8(sv);
if (itemsize != len) {
I32 itembytes;
@@ -393,11 +398,13 @@ PP(pp_formline)
break;
s++;
}
+ item_is_utf = TRUE;
itemsize = s - item;
sv_pos_b2u(sv, &itemsize);
break;
}
}
+ item_is_utf = FALSE;
if (itemsize > fieldsize)
itemsize = fieldsize;
send = chophere = s + itemsize;
@@ -414,7 +421,7 @@ PP(pp_formline)
case FF_CHECKCHOP:
item = s = SvPV(sv, len);
itemsize = len;
- if (IN_UTF8) {
+ if (DO_UTF8(sv)) {
itemsize = sv_len_utf8(sv);
if (itemsize != len) {
I32 itembytes;
@@ -452,9 +459,11 @@ PP(pp_formline)
itemsize = chophere - item;
sv_pos_b2u(sv, &itemsize);
}
+ item_is_utf = TRUE;
break;
}
}
+ item_is_utf = FALSE;
if (itemsize <= fieldsize) {
send = chophere = s + itemsize;
while (s < send) {
@@ -510,7 +519,7 @@ PP(pp_formline)
case FF_ITEM:
arg = itemsize;
s = item;
- if (IN_UTF8) {
+ if (item_is_utf) {
while (arg--) {
if (*s & 0x80) {
switch (UTF8SKIP(s)) {
@@ -553,6 +562,7 @@ PP(pp_formline)
case FF_LINEGLOB:
item = s = SvPV(sv, len);
itemsize = len;
+ item_is_utf = FALSE; /* XXX is this correct? */
if (itemsize) {
gotsome = TRUE;
send = s + itemsize;