summaryrefslogtreecommitdiff
path: root/pp.c
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2007-01-19 03:14:06 +0100
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2007-01-19 09:33:00 +0000
commit8727f688bf9bab57862da9dd9073020b13c82940 (patch)
tree5a3990bb74f985536ce5a1f7e19b51329f5419b8 /pp.c
parent89086707de1a3d9012ea80f0c19441d3352a2f73 (diff)
downloadperl-8727f688bf9bab57862da9dd9073020b13c82940.tar.gz
fix unicode split /\s+/
Message-ID: <9b18b3110701181714r4f3bc9ebq9ba462eba8338734@mail.gmail.com> p4raw-id: //depot/perl@29880
Diffstat (limited to 'pp.c')
-rw-r--r--pp.c41
1 files changed, 33 insertions, 8 deletions
diff --git a/pp.c b/pp.c
index 4523584068..4b021c0d6d 100644
--- a/pp.c
+++ b/pp.c
@@ -4606,12 +4606,29 @@ PP(pp_split)
if (!limit)
limit = maxiters + 2;
if (pm->op_pmflags & PMf_WHITE) {
+ if (do_utf8 && !PL_utf8_space) {
+ /* force PL_utf8_space to be loaded */
+ bool ok;
+ ENTER;
+ ok = is_utf8_space((const U8*)" ");
+ assert(ok);
+ LEAVE;
+ }
while (--limit) {
m = s;
- while (m < strend &&
- !((pm->op_pmflags & PMf_LOCALE)
- ? isSPACE_LC(*m) : isSPACE(*m)))
- ++m;
+ /* this one uses 'm' and is a negative test */
+ if (do_utf8) {
+ STRLEN uskip;
+ while (m < strend &&
+ !( *m == ' ' || swash_fetch(PL_utf8_space,(U8*)m, do_utf8) ))
+ m += UTF8SKIP(m);
+ } else if (pm->op_pmflags & PMf_LOCALE) {
+ while (m < strend && !isSPACE_LC(*m))
+ ++m;
+ } else {
+ while (m < strend && !isSPACE(*m))
+ ++m;
+ }
if (m >= strend)
break;
@@ -4623,10 +4640,18 @@ PP(pp_split)
XPUSHs(dstr);
s = m + 1;
- while (s < strend &&
- ((pm->op_pmflags & PMf_LOCALE)
- ? isSPACE_LC(*s) : isSPACE(*s)))
- ++s;
+ /* this one uses 's' and is a positive test */
+ if (do_utf8) {
+ while (s < strend &&
+ ( *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8) ))
+ s += UTF8SKIP(s);
+ } else if (pm->op_pmflags & PMf_LOCALE) {
+ while (s < strend && isSPACE_LC(*s))
+ ++s;
+ } else {
+ while (s < strend && isSPACE(*s))
+ ++s;
+ }
}
}
else if (rx->extflags & RXf_START_ONLY) {