summaryrefslogtreecommitdiff
path: root/pp.c
diff options
context:
space:
mode:
authorGurusamy Sarathy <gsar@cpan.org>2000-01-31 04:57:42 +0000
committerGurusamy Sarathy <gsar@cpan.org>2000-01-31 04:57:42 +0000
commit54f798b400d5c3ae39dadde4ef04ebb423aee8f6 (patch)
treede43e349e9f70e27ef30b2a0de9de2df628cc1c3 /pp.c
parent058b9f0e8f687141e133030332ab6c7136647be2 (diff)
downloadperl-54f798b400d5c3ae39dadde4ef04ebb423aee8f6.tar.gz
runtime now looks at the SVf_UTF8 bit on the SV to decide
whether to use widechar semantics; lexer and RE engine continue to need "use utf8" to enable unicode awareness in literals and patterns (TODO: this needs to be fixed); $1 et al are marked SvUTF8 if the pattern was compiled for utf8 (TODO: propagating it from the data is probably better) p4raw-id: //depot/perl@4930
Diffstat (limited to 'pp.c')
-rw-r--r--pp.c65
1 files changed, 41 insertions, 24 deletions
diff --git a/pp.c b/pp.c
index 13fbec8faa..45654a9445 100644
--- a/pp.c
+++ b/pp.c
@@ -389,7 +389,7 @@ PP(pp_pos)
mg = mg_find(sv, 'g');
if (mg && mg->mg_len >= 0) {
I32 i = mg->mg_len;
- if (IN_UTF8)
+ if (DO_UTF8(sv))
sv_pos_b2u(sv, &i);
PUSHi(i + PL_curcop->cop_arybase);
RETURN;
@@ -1426,7 +1426,7 @@ PP(pp_negate)
sv_setsv(TARG, sv);
*SvPV_force(TARG, len) = *s == '-' ? '+' : '-';
}
- else if (IN_UTF8 && *(U8*)s >= 0xc0 && isIDFIRST_utf8((U8*)s)) {
+ else if (DO_UTF8(sv) && *(U8*)s >= 0xc0 && isIDFIRST_utf8((U8*)s)) {
sv_setpvn(TARG, "-", 1);
sv_catsv(TARG, sv);
}
@@ -1930,13 +1930,12 @@ PP(pp_oct)
PP(pp_length)
{
djSP; dTARGET;
+ SV *sv = TOPs;
- if (IN_UTF8) {
- SETi( sv_len_utf8(TOPs) );
- RETURN;
- }
-
- SETi( sv_len(TOPs) );
+ if (DO_UTF8(sv))
+ SETi(sv_len_utf8(sv));
+ else
+ SETi(sv_len(sv));
RETURN;
}
@@ -1957,6 +1956,7 @@ PP(pp_substr)
STRLEN repl_len;
SvTAINTED_off(TARG); /* decontaminate */
+ SvUTF8_off(TARG); /* decontaminate */
if (MAXARG > 2) {
if (MAXARG > 3) {
sv = POPs;
@@ -1968,7 +1968,7 @@ PP(pp_substr)
sv = POPs;
PUTBACK;
tmps = SvPV(sv, curlen);
- if (IN_UTF8) {
+ if (DO_UTF8(sv)) {
utfcurlen = sv_len_utf8(sv);
if (utfcurlen == curlen)
utfcurlen = 0;
@@ -2017,8 +2017,10 @@ PP(pp_substr)
RETPUSHUNDEF;
}
else {
- if (utfcurlen)
+ if (utfcurlen) {
sv_pos_u2b(sv, &pos, &rem);
+ SvUTF8_on(TARG);
+ }
tmps += pos;
sv_setpvn(TARG, tmps, rem);
if (repl)
@@ -2106,7 +2108,7 @@ PP(pp_index)
little = POPs;
big = POPs;
tmps = SvPV(big, biglen);
- if (IN_UTF8 && offset > 0)
+ if (offset > 0 && DO_UTF8(big))
sv_pos_u2b(big, &offset, 0);
if (offset < 0)
offset = 0;
@@ -2117,7 +2119,7 @@ PP(pp_index)
retval = -1;
else
retval = tmps2 - tmps;
- if (IN_UTF8 && retval > 0)
+ if (retval > 0 && DO_UTF8(big))
sv_pos_b2u(big, &retval);
PUSHi(retval + arybase);
RETURN;
@@ -2145,7 +2147,7 @@ PP(pp_rindex)
if (MAXARG < 3)
offset = blen;
else {
- if (IN_UTF8 && offset > 0)
+ if (offset > 0 && DO_UTF8(big))
sv_pos_u2b(big, &offset, 0);
offset = offset - arybase + llen;
}
@@ -2158,7 +2160,7 @@ PP(pp_rindex)
retval = -1;
else
retval = tmps2 - tmps;
- if (IN_UTF8 && retval > 0)
+ if (retval > 0 && DO_UTF8(big))
sv_pos_b2u(big, &retval);
PUSHi(retval + arybase);
RETURN;
@@ -2179,10 +2181,11 @@ PP(pp_ord)
djSP; dTARGET;
UV value;
STRLEN n_a;
- U8 *tmps = (U8*)POPpx;
+ SV *tmpsv = POPs;
+ U8 *tmps = (U8*)SvPVx(tmpsv,n_a);
I32 retlen;
- if (IN_UTF8 && (*tmps & 0x80))
+ if ((*tmps & 0x80) && DO_UTF8(tmpsv))
value = utf8_to_uv(tmps, &retlen);
else
value = (UV)(*tmps & 255);
@@ -2196,14 +2199,16 @@ PP(pp_chr)
char *tmps;
U32 value = POPu;
+ SvUTF8_off(TARG); /* decontaminate */
(void)SvUPGRADE(TARG,SVt_PV);
- if (IN_UTF8 && value >= 128) {
+ if (value >= 128 && !IN_BYTE) {
SvGROW(TARG,8);
tmps = SvPVX(TARG);
tmps = (char*)uv_to_utf8((U8*)tmps, (UV)value);
SvCUR_set(TARG, tmps - SvPVX(TARG));
*tmps = '\0';
+ SvUTF8_on(TARG);
(void)SvPOK_only(TARG);
XPUSHs(TARG);
RETURN;
@@ -2245,7 +2250,7 @@ PP(pp_ucfirst)
register U8 *s;
STRLEN slen;
- if (IN_UTF8 && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) {
+ if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) {
I32 ulen;
U8 tmpbuf[10];
U8 *tend;
@@ -2265,6 +2270,7 @@ PP(pp_ucfirst)
dTARGET;
sv_setpvn(TARG, (char*)tmpbuf, tend - tmpbuf);
sv_catpvn(TARG, (char*)(s + ulen), slen - ulen);
+ SvUTF8_on(TARG);
SETs(TARG);
}
else {
@@ -2275,6 +2281,7 @@ PP(pp_ucfirst)
else {
if (!SvPADTMP(sv) || SvREADONLY(sv)) {
dTARGET;
+ SvUTF8_off(TARG); /* decontaminate */
sv_setsv(TARG, sv);
sv = TARG;
SETs(sv);
@@ -2302,7 +2309,7 @@ PP(pp_lcfirst)
register U8 *s;
STRLEN slen;
- if (IN_UTF8 && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) {
+ if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) {
I32 ulen;
U8 tmpbuf[10];
U8 *tend;
@@ -2322,6 +2329,7 @@ PP(pp_lcfirst)
dTARGET;
sv_setpvn(TARG, (char*)tmpbuf, tend - tmpbuf);
sv_catpvn(TARG, (char*)(s + ulen), slen - ulen);
+ SvUTF8_on(TARG);
SETs(TARG);
}
else {
@@ -2332,6 +2340,7 @@ PP(pp_lcfirst)
else {
if (!SvPADTMP(sv) || SvREADONLY(sv)) {
dTARGET;
+ SvUTF8_off(TARG); /* decontaminate */
sv_setsv(TARG, sv);
sv = TARG;
SETs(sv);
@@ -2346,7 +2355,6 @@ PP(pp_lcfirst)
else
*s = toLOWER(*s);
}
- SETs(sv);
}
if (SvSMAGICAL(sv))
mg_set(sv);
@@ -2360,7 +2368,7 @@ PP(pp_uc)
register U8 *s;
STRLEN len;
- if (IN_UTF8) {
+ if (DO_UTF8(sv)) {
dTARGET;
I32 ulen;
register U8 *d;
@@ -2368,6 +2376,7 @@ PP(pp_uc)
s = (U8*)SvPV(sv,len);
if (!len) {
+ SvUTF8_off(TARG); /* decontaminate */
sv_setpvn(TARG, "", 0);
SETs(TARG);
}
@@ -2392,6 +2401,7 @@ PP(pp_uc)
}
}
*d = '\0';
+ SvUTF8_on(TARG);
SvCUR_set(TARG, d - (U8*)SvPVX(TARG));
SETs(TARG);
}
@@ -2399,6 +2409,7 @@ PP(pp_uc)
else {
if (!SvPADTMP(sv) || SvREADONLY(sv)) {
dTARGET;
+ SvUTF8_off(TARG); /* decontaminate */
sv_setsv(TARG, sv);
sv = TARG;
SETs(sv);
@@ -2431,7 +2442,7 @@ PP(pp_lc)
register U8 *s;
STRLEN len;
- if (IN_UTF8) {
+ if (DO_UTF8(sv)) {
dTARGET;
I32 ulen;
register U8 *d;
@@ -2439,6 +2450,7 @@ PP(pp_lc)
s = (U8*)SvPV(sv,len);
if (!len) {
+ SvUTF8_off(TARG); /* decontaminate */
sv_setpvn(TARG, "", 0);
SETs(TARG);
}
@@ -2463,6 +2475,7 @@ PP(pp_lc)
}
}
*d = '\0';
+ SvUTF8_on(TARG);
SvCUR_set(TARG, d - (U8*)SvPVX(TARG));
SETs(TARG);
}
@@ -2470,6 +2483,7 @@ PP(pp_lc)
else {
if (!SvPADTMP(sv) || SvREADONLY(sv)) {
dTARGET;
+ SvUTF8_off(TARG); /* decontaminate */
sv_setsv(TARG, sv);
sv = TARG;
SETs(sv);
@@ -2504,11 +2518,12 @@ PP(pp_quotemeta)
register char *s = SvPV(sv,len);
register char *d;
+ SvUTF8_off(TARG); /* decontaminate */
if (len) {
(void)SvUPGRADE(TARG, SVt_PV);
SvGROW(TARG, (len * 2) + 1);
d = SvPVX(TARG);
- if (IN_UTF8) {
+ if (DO_UTF8(sv)) {
while (len) {
if (*s & 0x80) {
STRLEN ulen = UTF8SKIP(s);
@@ -2525,6 +2540,7 @@ PP(pp_quotemeta)
len--;
}
}
+ SvUTF8_on(TARG);
}
else {
while (len--) {
@@ -3184,13 +3200,14 @@ PP(pp_reverse)
dTARGET;
STRLEN len;
+ SvUTF8_off(TARG); /* decontaminate */
if (SP - MARK > 1)
do_join(TARG, &PL_sv_no, MARK, SP);
else
sv_setsv(TARG, (SP > MARK) ? *SP : DEFSV);
up = SvPV_force(TARG, len);
if (len > 1) {
- if (IN_UTF8) { /* first reverse each character */
+ if (DO_UTF8(TARG)) { /* first reverse each character */
U8* s = (U8*)SvPVX(TARG);
U8* send = (U8*)(s + len);
while (s < send) {