summaryrefslogtreecommitdiff
path: root/sv.c
diff options
context:
space:
mode:
authorGurusamy Sarathy <gsar@cpan.org>2000-01-31 04:57:42 +0000
committerGurusamy Sarathy <gsar@cpan.org>2000-01-31 04:57:42 +0000
commit7e2040f0b7c6fc88ec07b6e169aa2f75fc0130a4 (patch)
treede43e349e9f70e27ef30b2a0de9de2df628cc1c3 /sv.c
parent8004f2ac219abdd8660c02a4a46ed97695dc379d (diff)
downloadperl-7e2040f0b7c6fc88ec07b6e169aa2f75fc0130a4.tar.gz
runtime now looks at the SVf_UTF8 bit on the SV to decide
whether to use widechar semantics; lexer and RE engine continue to need "use utf8" to enable unicode awareness in literals and patterns (TODO: this needs to be fixed); $1 et al are marked SvUTF8 if the pattern was compiled for utf8 (TODO: propagating it from the data is probably better) p4raw-id: //depot/perl@4930
Diffstat (limited to 'sv.c')
-rw-r--r--sv.c55
1 files changed, 34 insertions, 21 deletions
diff --git a/sv.c b/sv.c
index 834dac3bd1..d76752fcf9 100644
--- a/sv.c
+++ b/sv.c
@@ -2769,7 +2769,7 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr)
*SvEND(dstr) = '\0';
(void)SvPOK_only(dstr);
}
- if (SvUTF8(sstr))
+ if (DO_UTF8(sstr))
SvUTF8_on(dstr);
/*SUPPRESS 560*/
if (sflags & SVp_NOK) {
@@ -5638,6 +5638,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV
STRLEN origlen;
I32 svix = 0;
static char nullstr[] = "(null)";
+ SV *argsv;
/* no matter what, this is a string now */
(void)SvPV_force(sv, origlen);
@@ -5652,12 +5653,18 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV
char *s = va_arg(*args, char*);
sv_catpv(sv, s ? s : nullstr);
}
- else if (svix < svmax)
+ else if (svix < svmax) {
sv_catsv(sv, *svargs);
+ if (DO_UTF8(*svargs))
+ SvUTF8_on(sv);
+ }
return;
case '_':
if (args) {
- sv_catsv(sv, va_arg(*args, SV*));
+ argsv = va_arg(*args, SV*);
+ sv_catsv(sv, argsv);
+ if (DO_UTF8(argsv))
+ SvUTF8_on(sv);
return;
}
/* See comment on '_' below */
@@ -5676,6 +5683,7 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV
STRLEN zeros = 0;
bool has_precis = FALSE;
STRLEN precis = 0;
+ bool is_utf = FALSE;
char esignbuf[4];
U8 utf8buf[10];
@@ -5816,22 +5824,20 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV
goto string;
case 'c':
- if (IN_UTF8) {
- if (args)
- uv = va_arg(*args, int);
- else
- uv = (svix < svmax) ? SvIVx(svargs[svix++]) : 0;
-
+ if (args)
+ uv = va_arg(*args, int);
+ else
+ uv = (svix < svmax) ? SvIVx(svargs[svix++]) : 0;
+ if (uv >= 128 && !IN_BYTE) {
eptr = (char*)utf8buf;
elen = uv_to_utf8((U8*)eptr, uv) - utf8buf;
- goto string;
+ is_utf = TRUE;
+ }
+ else {
+ c = (char)uv;
+ eptr = &c;
+ elen = 1;
}
- if (args)
- c = va_arg(*args, int);
- else
- c = (svix < svmax) ? SvIVx(svargs[svix++]) : 0;
- eptr = &c;
- elen = 1;
goto string;
case 's':
@@ -5851,16 +5857,18 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV
}
}
else if (svix < svmax) {
- eptr = SvPVx(svargs[svix++], elen);
- if (IN_UTF8) {
+ argsv = svargs[svix++];
+ eptr = SvPVx(argsv, elen);
+ if (DO_UTF8(argsv)) {
if (has_precis && precis < elen) {
I32 p = precis;
- sv_pos_u2b(svargs[svix - 1], &p, 0); /* sticks at end */
+ sv_pos_u2b(argsv, &p, 0); /* sticks at end */
precis = p;
}
if (width) { /* fudge width (can't fudge elen) */
- width += elen - sv_len_utf8(svargs[svix - 1]);
+ width += elen - sv_len_utf8(argsv);
}
+ is_utf = TRUE;
}
}
goto string;
@@ -5873,7 +5881,10 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV
*/
if (!args)
goto unknown;
- eptr = SvPVx(va_arg(*args, SV*), elen);
+ argsv = va_arg(*args,SV*);
+ eptr = SvPVx(argsv, elen);
+ if (DO_UTF8(argsv))
+ is_utf = TRUE;
string:
if (has_precis && elen > precis)
@@ -6216,6 +6227,8 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV
memset(p, ' ', gap);
p += gap;
}
+ if (is_utf)
+ SvUTF8_on(sv);
*p = '\0';
SvCUR(sv) = p - SvPVX(sv);
}