diff options
author | Gurusamy Sarathy <gsar@cpan.org> | 2000-01-31 04:57:42 +0000 |
---|---|---|
committer | Gurusamy Sarathy <gsar@cpan.org> | 2000-01-31 04:57:42 +0000 |
commit | 7e2040f0b7c6fc88ec07b6e169aa2f75fc0130a4 (patch) | |
tree | de43e349e9f70e27ef30b2a0de9de2df628cc1c3 /sv.c | |
parent | 8004f2ac219abdd8660c02a4a46ed97695dc379d (diff) | |
download | perl-7e2040f0b7c6fc88ec07b6e169aa2f75fc0130a4.tar.gz |
runtime now looks at the SVf_UTF8 bit on the SV to decide
whether to use widechar semantics; lexer and RE engine continue
to need "use utf8" to enable unicode awareness in literals
and patterns (TODO: this needs to be fixed); $1 et al are marked
SvUTF8 if the pattern was compiled for utf8 (TODO: propagating
it from the data is probably better)
p4raw-id: //depot/perl@4930
Diffstat (limited to 'sv.c')
-rw-r--r-- | sv.c | 55 |
1 files changed, 34 insertions, 21 deletions
@@ -2769,7 +2769,7 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) *SvEND(dstr) = '\0'; (void)SvPOK_only(dstr); } - if (SvUTF8(sstr)) + if (DO_UTF8(sstr)) SvUTF8_on(dstr); /*SUPPRESS 560*/ if (sflags & SVp_NOK) { @@ -5638,6 +5638,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV STRLEN origlen; I32 svix = 0; static char nullstr[] = "(null)"; + SV *argsv; /* no matter what, this is a string now */ (void)SvPV_force(sv, origlen); @@ -5652,12 +5653,18 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV char *s = va_arg(*args, char*); sv_catpv(sv, s ? s : nullstr); } - else if (svix < svmax) + else if (svix < svmax) { sv_catsv(sv, *svargs); + if (DO_UTF8(*svargs)) + SvUTF8_on(sv); + } return; case '_': if (args) { - sv_catsv(sv, va_arg(*args, SV*)); + argsv = va_arg(*args, SV*); + sv_catsv(sv, argsv); + if (DO_UTF8(argsv)) + SvUTF8_on(sv); return; } /* See comment on '_' below */ @@ -5676,6 +5683,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV STRLEN zeros = 0; bool has_precis = FALSE; STRLEN precis = 0; + bool is_utf = FALSE; char esignbuf[4]; U8 utf8buf[10]; @@ -5816,22 +5824,20 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV goto string; case 'c': - if (IN_UTF8) { - if (args) - uv = va_arg(*args, int); - else - uv = (svix < svmax) ? SvIVx(svargs[svix++]) : 0; - + if (args) + uv = va_arg(*args, int); + else + uv = (svix < svmax) ? SvIVx(svargs[svix++]) : 0; + if (uv >= 128 && !IN_BYTE) { eptr = (char*)utf8buf; elen = uv_to_utf8((U8*)eptr, uv) - utf8buf; - goto string; + is_utf = TRUE; + } + else { + c = (char)uv; + eptr = &c; + elen = 1; } - if (args) - c = va_arg(*args, int); - else - c = (svix < svmax) ? SvIVx(svargs[svix++]) : 0; - eptr = &c; - elen = 1; goto string; case 's': @@ -5851,16 +5857,18 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV } } else if (svix < svmax) { - eptr = SvPVx(svargs[svix++], elen); - if (IN_UTF8) { + argsv = svargs[svix++]; + eptr = SvPVx(argsv, elen); + if (DO_UTF8(argsv)) { if (has_precis && precis < elen) { I32 p = precis; - sv_pos_u2b(svargs[svix - 1], &p, 0); /* sticks at end */ + sv_pos_u2b(argsv, &p, 0); /* sticks at end */ precis = p; } if (width) { /* fudge width (can't fudge elen) */ - width += elen - sv_len_utf8(svargs[svix - 1]); + width += elen - sv_len_utf8(argsv); } + is_utf = TRUE; } } goto string; @@ -5873,7 +5881,10 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV */ if (!args) goto unknown; - eptr = SvPVx(va_arg(*args, SV*), elen); + argsv = va_arg(*args,SV*); + eptr = SvPVx(argsv, elen); + if (DO_UTF8(argsv)) + is_utf = TRUE; string: if (has_precis && elen > precis) @@ -6216,6 +6227,8 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV memset(p, ' ', gap); p += gap; } + if (is_utf) + SvUTF8_on(sv); *p = '\0'; SvCUR(sv) = p - SvPVX(sv); } |