diff options
author | Gurusamy Sarathy <gsar@cpan.org> | 2000-01-31 04:57:42 +0000 |
---|---|---|
committer | Gurusamy Sarathy <gsar@cpan.org> | 2000-01-31 04:57:42 +0000 |
commit | 54f798b400d5c3ae39dadde4ef04ebb423aee8f6 (patch) | |
tree | de43e349e9f70e27ef30b2a0de9de2df628cc1c3 /pp.c | |
parent | 058b9f0e8f687141e133030332ab6c7136647be2 (diff) | |
download | perl-54f798b400d5c3ae39dadde4ef04ebb423aee8f6.tar.gz |
runtime now looks at the SVf_UTF8 bit on the SV to decide
whether to use widechar semantics; lexer and RE engine continue
to need "use utf8" to enable unicode awareness in literals
and patterns (TODO: this needs to be fixed); $1 et al are marked
SvUTF8 if the pattern was compiled for utf8 (TODO: propagating
it from the data is probably better)
p4raw-id: //depot/perl@4930
Diffstat (limited to 'pp.c')
-rw-r--r-- | pp.c | 65 |
1 files changed, 41 insertions, 24 deletions
@@ -389,7 +389,7 @@ PP(pp_pos) mg = mg_find(sv, 'g'); if (mg && mg->mg_len >= 0) { I32 i = mg->mg_len; - if (IN_UTF8) + if (DO_UTF8(sv)) sv_pos_b2u(sv, &i); PUSHi(i + PL_curcop->cop_arybase); RETURN; @@ -1426,7 +1426,7 @@ PP(pp_negate) sv_setsv(TARG, sv); *SvPV_force(TARG, len) = *s == '-' ? '+' : '-'; } - else if (IN_UTF8 && *(U8*)s >= 0xc0 && isIDFIRST_utf8((U8*)s)) { + else if (DO_UTF8(sv) && *(U8*)s >= 0xc0 && isIDFIRST_utf8((U8*)s)) { sv_setpvn(TARG, "-", 1); sv_catsv(TARG, sv); } @@ -1930,13 +1930,12 @@ PP(pp_oct) PP(pp_length) { djSP; dTARGET; + SV *sv = TOPs; - if (IN_UTF8) { - SETi( sv_len_utf8(TOPs) ); - RETURN; - } - - SETi( sv_len(TOPs) ); + if (DO_UTF8(sv)) + SETi(sv_len_utf8(sv)); + else + SETi(sv_len(sv)); RETURN; } @@ -1957,6 +1956,7 @@ PP(pp_substr) STRLEN repl_len; SvTAINTED_off(TARG); /* decontaminate */ + SvUTF8_off(TARG); /* decontaminate */ if (MAXARG > 2) { if (MAXARG > 3) { sv = POPs; @@ -1968,7 +1968,7 @@ PP(pp_substr) sv = POPs; PUTBACK; tmps = SvPV(sv, curlen); - if (IN_UTF8) { + if (DO_UTF8(sv)) { utfcurlen = sv_len_utf8(sv); if (utfcurlen == curlen) utfcurlen = 0; @@ -2017,8 +2017,10 @@ PP(pp_substr) RETPUSHUNDEF; } else { - if (utfcurlen) + if (utfcurlen) { sv_pos_u2b(sv, &pos, &rem); + SvUTF8_on(TARG); + } tmps += pos; sv_setpvn(TARG, tmps, rem); if (repl) @@ -2106,7 +2108,7 @@ PP(pp_index) little = POPs; big = POPs; tmps = SvPV(big, biglen); - if (IN_UTF8 && offset > 0) + if (offset > 0 && DO_UTF8(big)) sv_pos_u2b(big, &offset, 0); if (offset < 0) offset = 0; @@ -2117,7 +2119,7 @@ PP(pp_index) retval = -1; else retval = tmps2 - tmps; - if (IN_UTF8 && retval > 0) + if (retval > 0 && DO_UTF8(big)) sv_pos_b2u(big, &retval); PUSHi(retval + arybase); RETURN; @@ -2145,7 +2147,7 @@ PP(pp_rindex) if (MAXARG < 3) offset = blen; else { - if (IN_UTF8 && offset > 0) + if (offset > 0 && DO_UTF8(big)) sv_pos_u2b(big, &offset, 0); offset = offset - arybase + llen; } @@ -2158,7 +2160,7 @@ PP(pp_rindex) retval = -1; else retval = tmps2 - tmps; - if (IN_UTF8 && retval > 0) + if (retval > 0 && DO_UTF8(big)) sv_pos_b2u(big, &retval); PUSHi(retval + arybase); RETURN; @@ -2179,10 +2181,11 @@ PP(pp_ord) djSP; dTARGET; UV value; STRLEN n_a; - U8 *tmps = (U8*)POPpx; + SV *tmpsv = POPs; + U8 *tmps = (U8*)SvPVx(tmpsv,n_a); I32 retlen; - if (IN_UTF8 && (*tmps & 0x80)) + if ((*tmps & 0x80) && DO_UTF8(tmpsv)) value = utf8_to_uv(tmps, &retlen); else value = (UV)(*tmps & 255); @@ -2196,14 +2199,16 @@ PP(pp_chr) char *tmps; U32 value = POPu; + SvUTF8_off(TARG); /* decontaminate */ (void)SvUPGRADE(TARG,SVt_PV); - if (IN_UTF8 && value >= 128) { + if (value >= 128 && !IN_BYTE) { SvGROW(TARG,8); tmps = SvPVX(TARG); tmps = (char*)uv_to_utf8((U8*)tmps, (UV)value); SvCUR_set(TARG, tmps - SvPVX(TARG)); *tmps = '\0'; + SvUTF8_on(TARG); (void)SvPOK_only(TARG); XPUSHs(TARG); RETURN; @@ -2245,7 +2250,7 @@ PP(pp_ucfirst) register U8 *s; STRLEN slen; - if (IN_UTF8 && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) { + if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) { I32 ulen; U8 tmpbuf[10]; U8 *tend; @@ -2265,6 +2270,7 @@ PP(pp_ucfirst) dTARGET; sv_setpvn(TARG, (char*)tmpbuf, tend - tmpbuf); sv_catpvn(TARG, (char*)(s + ulen), slen - ulen); + SvUTF8_on(TARG); SETs(TARG); } else { @@ -2275,6 +2281,7 @@ PP(pp_ucfirst) else { if (!SvPADTMP(sv) || SvREADONLY(sv)) { dTARGET; + SvUTF8_off(TARG); /* decontaminate */ sv_setsv(TARG, sv); sv = TARG; SETs(sv); @@ -2302,7 +2309,7 @@ PP(pp_lcfirst) register U8 *s; STRLEN slen; - if (IN_UTF8 && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) { + if (DO_UTF8(sv) && (s = (U8*)SvPV(sv, slen)) && slen && (*s & 0xc0) == 0xc0) { I32 ulen; U8 tmpbuf[10]; U8 *tend; @@ -2322,6 +2329,7 @@ PP(pp_lcfirst) dTARGET; sv_setpvn(TARG, (char*)tmpbuf, tend - tmpbuf); sv_catpvn(TARG, (char*)(s + ulen), slen - ulen); + SvUTF8_on(TARG); SETs(TARG); } else { @@ -2332,6 +2340,7 @@ PP(pp_lcfirst) else { if (!SvPADTMP(sv) || SvREADONLY(sv)) { dTARGET; + SvUTF8_off(TARG); /* decontaminate */ sv_setsv(TARG, sv); sv = TARG; SETs(sv); @@ -2346,7 +2355,6 @@ PP(pp_lcfirst) else *s = toLOWER(*s); } - SETs(sv); } if (SvSMAGICAL(sv)) mg_set(sv); @@ -2360,7 +2368,7 @@ PP(pp_uc) register U8 *s; STRLEN len; - if (IN_UTF8) { + if (DO_UTF8(sv)) { dTARGET; I32 ulen; register U8 *d; @@ -2368,6 +2376,7 @@ PP(pp_uc) s = (U8*)SvPV(sv,len); if (!len) { + SvUTF8_off(TARG); /* decontaminate */ sv_setpvn(TARG, "", 0); SETs(TARG); } @@ -2392,6 +2401,7 @@ PP(pp_uc) } } *d = '\0'; + SvUTF8_on(TARG); SvCUR_set(TARG, d - (U8*)SvPVX(TARG)); SETs(TARG); } @@ -2399,6 +2409,7 @@ PP(pp_uc) else { if (!SvPADTMP(sv) || SvREADONLY(sv)) { dTARGET; + SvUTF8_off(TARG); /* decontaminate */ sv_setsv(TARG, sv); sv = TARG; SETs(sv); @@ -2431,7 +2442,7 @@ PP(pp_lc) register U8 *s; STRLEN len; - if (IN_UTF8) { + if (DO_UTF8(sv)) { dTARGET; I32 ulen; register U8 *d; @@ -2439,6 +2450,7 @@ PP(pp_lc) s = (U8*)SvPV(sv,len); if (!len) { + SvUTF8_off(TARG); /* decontaminate */ sv_setpvn(TARG, "", 0); SETs(TARG); } @@ -2463,6 +2475,7 @@ PP(pp_lc) } } *d = '\0'; + SvUTF8_on(TARG); SvCUR_set(TARG, d - (U8*)SvPVX(TARG)); SETs(TARG); } @@ -2470,6 +2483,7 @@ PP(pp_lc) else { if (!SvPADTMP(sv) || SvREADONLY(sv)) { dTARGET; + SvUTF8_off(TARG); /* decontaminate */ sv_setsv(TARG, sv); sv = TARG; SETs(sv); @@ -2504,11 +2518,12 @@ PP(pp_quotemeta) register char *s = SvPV(sv,len); register char *d; + SvUTF8_off(TARG); /* decontaminate */ if (len) { (void)SvUPGRADE(TARG, SVt_PV); SvGROW(TARG, (len * 2) + 1); d = SvPVX(TARG); - if (IN_UTF8) { + if (DO_UTF8(sv)) { while (len) { if (*s & 0x80) { STRLEN ulen = UTF8SKIP(s); @@ -2525,6 +2540,7 @@ PP(pp_quotemeta) len--; } } + SvUTF8_on(TARG); } else { while (len--) { @@ -3184,13 +3200,14 @@ PP(pp_reverse) dTARGET; STRLEN len; + SvUTF8_off(TARG); /* decontaminate */ if (SP - MARK > 1) do_join(TARG, &PL_sv_no, MARK, SP); else sv_setsv(TARG, (SP > MARK) ? *SP : DEFSV); up = SvPV_force(TARG, len); if (len > 1) { - if (IN_UTF8) { /* first reverse each character */ + if (DO_UTF8(TARG)) { /* first reverse each character */ U8* s = (U8*)SvPVX(TARG); U8* send = (U8*)(s + len); while (s < send) { |