diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2000-10-15 15:19:29 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2000-10-15 15:19:29 +0000 |
commit | 93f04dac2ed878fbeae5ba64df628ed3ab0b6e21 (patch) | |
tree | 6a8cdf1a14fe7c3cf774c2445401be66cff399aa /pp.c | |
parent | 2a9c8e45b384ebf49f1c7a8f39efef187bdbfb6f (diff) | |
download | perl-93f04dac2ed878fbeae5ba64df628ed3ab0b6e21.tar.gz |
split() utf8 fixes. Should fix both 20001014.001 and 20000426.003.
The problem was that rx->minlen was in chars while pp_split()
thought it would be in bytes.
p4raw-id: //depot/perl@7234
Diffstat (limited to 'pp.c')
-rw-r--r-- | pp.c | 33 |
1 files changed, 19 insertions, 14 deletions
@@ -4975,7 +4975,7 @@ PP(pp_split) AV *ary; register I32 limit = POPi; /* note, negative is forever */ SV *sv = POPs; - bool isutf = DO_UTF8(sv); + bool doutf8 = DO_UTF8(sv); STRLEN len; register char *s = SvPV(sv, len); char *strend = s + len; @@ -5078,7 +5078,7 @@ PP(pp_split) sv_setpvn(dstr, s, m-s); if (make_mortal) sv_2mortal(dstr); - if (isutf) + if (doutf8) (void)SvUTF8_on(dstr); XPUSHs(dstr); @@ -5100,7 +5100,7 @@ PP(pp_split) sv_setpvn(dstr, s, m-s); if (make_mortal) sv_2mortal(dstr); - if (isutf) + if (doutf8) (void)SvUTF8_on(dstr); XPUSHs(dstr); s = m; @@ -5111,11 +5111,11 @@ PP(pp_split) && !(rx->reganch & ROPT_ANCH)) { int tail = (rx->reganch & RE_INTUIT_TAIL); SV *csv = CALLREG_INTUIT_STRING(aTHX_ rx); - char c; len = rx->minlen; if (len == 1 && !tail) { - c = *SvPV(csv,len); + STRLEN n_a; + char c = *SvPV(csv, n_a); while (--limit) { /*SUPPRESS 530*/ for (m = s; m < strend && *m != c; m++) ; @@ -5125,10 +5125,12 @@ PP(pp_split) sv_setpvn(dstr, s, m-s); if (make_mortal) sv_2mortal(dstr); - if (isutf) + if (doutf8) (void)SvUTF8_on(dstr); XPUSHs(dstr); - s = m + 1; + /* The rx->minlen is in characters but we want to step + * s ahead by bytes. */ + s = m + (doutf8 ? SvCUR(csv) : len); } } else { @@ -5142,10 +5144,12 @@ PP(pp_split) sv_setpvn(dstr, s, m-s); if (make_mortal) sv_2mortal(dstr); - if (isutf) + if (doutf8) (void)SvUTF8_on(dstr); XPUSHs(dstr); - s = m + len; /* Fake \n at the end */ + /* The rx->minlen is in characters but we want to step + * s ahead by bytes. */ + s = m + (doutf8 ? SvCUR(csv) : len); /* Fake \n at the end */ } } } @@ -5171,7 +5175,7 @@ PP(pp_split) sv_setpvn(dstr, s, m-s); if (make_mortal) sv_2mortal(dstr); - if (isutf) + if (doutf8) (void)SvUTF8_on(dstr); XPUSHs(dstr); if (rx->nparens) { @@ -5186,7 +5190,7 @@ PP(pp_split) dstr = NEWSV(33, 0); if (make_mortal) sv_2mortal(dstr); - if (isutf) + if (doutf8) (void)SvUTF8_on(dstr); XPUSHs(dstr); } @@ -5202,11 +5206,12 @@ PP(pp_split) /* keep field after final delim? */ if (s < strend || (iters && origlimit)) { - dstr = NEWSV(34, strend-s); - sv_setpvn(dstr, s, strend-s); + STRLEN l = strend - s; + dstr = NEWSV(34, l); + sv_setpvn(dstr, s, l); if (make_mortal) sv_2mortal(dstr); - if (isutf) + if (doutf8) (void)SvUTF8_on(dstr); XPUSHs(dstr); iters++; |