summaryrefslogtreecommitdiff
path: root/pp.c
diff options
context:
space:
mode:
authorInaba Hiroto <inaba@st.rim.or.jp>2000-12-30 23:27:10 +0900
committerJarkko Hietaniemi <jhi@iki.fi>2000-12-30 17:14:19 +0000
commit1aa99e6b6d14c469ac825dde483d9c9f913a3ee2 (patch)
tree76ac8dd0d9473e84a19f6184baa4eddb6337e7b1 /pp.c
parent13e8c8e316d3839d0834fb8b851566b00d81e876 (diff)
downloadperl-1aa99e6b6d14c469ac825dde483d9c9f913a3ee2.tar.gz
more UTF8 test suites and an UTF8 patch
Message-ID: <3A4D722D.243AFD88@st.rim.or.jp> Just the patch part for now, and the pragma renamed as unicode::distinct. p4raw-id: //depot/perl@8267
Diffstat (limited to 'pp.c')
-rw-r--r--pp.c14
1 files changed, 10 insertions, 4 deletions
diff --git a/pp.c b/pp.c
index 11506977b2..ff4508c24f 100644
--- a/pp.c
+++ b/pp.c
@@ -5739,9 +5739,9 @@ PP(pp_split)
AV *ary;
register IV limit = POPi; /* note, negative is forever */
SV *sv = POPs;
- bool do_utf8 = DO_UTF8(sv);
STRLEN len;
register char *s = SvPV(sv, len);
+ bool do_utf8 = DO_UTF8(sv);
char *strend = s + len;
register PMOP *pm;
register REGEXP *rx;
@@ -5878,7 +5878,7 @@ PP(pp_split)
SV *csv = CALLREG_INTUIT_STRING(aTHX_ rx);
len = rx->minlen;
- if (len == 1 && !tail) {
+ if (len == 1 && !(rx->reganch & ROPT_UTF8) && !tail) {
STRLEN n_a;
char c = *SvPV(csv, n_a);
while (--limit) {
@@ -5895,7 +5895,10 @@ PP(pp_split)
XPUSHs(dstr);
/* The rx->minlen is in characters but we want to step
* s ahead by bytes. */
- s = m + (do_utf8 ? SvCUR(csv) : len);
+ if (do_utf8)
+ s = (char*)utf8_hop((U8*)m, len);
+ else
+ s = m + len; /* Fake \n at the end */
}
}
else {
@@ -5914,7 +5917,10 @@ PP(pp_split)
XPUSHs(dstr);
/* The rx->minlen is in characters but we want to step
* s ahead by bytes. */
- s = m + (do_utf8 ? SvCUR(csv) : len); /* Fake \n at the end */
+ if (do_utf8)
+ s = (char*)utf8_hop((U8*)m, len);
+ else
+ s = m + len; /* Fake \n at the end */
}
}
}