diff options
author | Yves Orton <demerphq@gmail.com> | 2007-01-19 03:14:06 +0100 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2007-01-19 09:33:00 +0000 |
commit | 8727f688bf9bab57862da9dd9073020b13c82940 (patch) | |
tree | 5a3990bb74f985536ce5a1f7e19b51329f5419b8 /pp.c | |
parent | 89086707de1a3d9012ea80f0c19441d3352a2f73 (diff) | |
download | perl-8727f688bf9bab57862da9dd9073020b13c82940.tar.gz |
fix unicode split /\s+/
Message-ID: <9b18b3110701181714r4f3bc9ebq9ba462eba8338734@mail.gmail.com>
p4raw-id: //depot/perl@29880
Diffstat (limited to 'pp.c')
-rw-r--r-- | pp.c | 41 |
1 files changed, 33 insertions, 8 deletions
@@ -4606,12 +4606,29 @@ PP(pp_split) if (!limit) limit = maxiters + 2; if (pm->op_pmflags & PMf_WHITE) { + if (do_utf8 && !PL_utf8_space) { + /* force PL_utf8_space to be loaded */ + bool ok; + ENTER; + ok = is_utf8_space((const U8*)" "); + assert(ok); + LEAVE; + } while (--limit) { m = s; - while (m < strend && - !((pm->op_pmflags & PMf_LOCALE) - ? isSPACE_LC(*m) : isSPACE(*m))) - ++m; + /* this one uses 'm' and is a negative test */ + if (do_utf8) { + STRLEN uskip; + while (m < strend && + !( *m == ' ' || swash_fetch(PL_utf8_space,(U8*)m, do_utf8) )) + m += UTF8SKIP(m); + } else if (pm->op_pmflags & PMf_LOCALE) { + while (m < strend && !isSPACE_LC(*m)) + ++m; + } else { + while (m < strend && !isSPACE(*m)) + ++m; + } if (m >= strend) break; @@ -4623,10 +4640,18 @@ PP(pp_split) XPUSHs(dstr); s = m + 1; - while (s < strend && - ((pm->op_pmflags & PMf_LOCALE) - ? isSPACE_LC(*s) : isSPACE(*s))) - ++s; + /* this one uses 's' and is a positive test */ + if (do_utf8) { + while (s < strend && + ( *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8) )) + s += UTF8SKIP(s); + } else if (pm->op_pmflags & PMf_LOCALE) { + while (s < strend && isSPACE_LC(*s)) + ++s; + } else { + while (s < strend && isSPACE(*s)) + ++s; + } } } else if (rx->extflags & RXf_START_ONLY) { |