summaryrefslogtreecommitdiff
path: root/pp.c
diff options
context:
space:
mode:
authorSADAHIRO Tomoyuki <BQW10602@nifty.com>2007-01-20 09:52:42 +0900
committerNicholas Clark <nick@ccl4.org>2007-01-19 21:11:40 +0000
commit613f191e047db38572099edee06ae7ede4343b5b (patch)
tree10c80a637c6af8e44e27d83efcbf6f6f15cd7d97 /pp.c
parent36dfb0728c176423708a8d381586c8e380e47696 (diff)
downloadperl-613f191e047db38572099edee06ae7ede4343b5b.tar.gz
Re: [PATCH] fix unicode split /\s+/
Message-Id: <20070120005232.D9CC.BQW10602@nifty.com> Date: Sat, 20 Jan 2007 00:52:42 +0900 p4raw-id: //depot/perl@29887
Diffstat (limited to 'pp.c')
-rw-r--r--pp.c36
1 files changed, 20 insertions, 16 deletions
diff --git a/pp.c b/pp.c
index 4b021c0d6d..977c2b71f8 100644
--- a/pp.c
+++ b/pp.c
@@ -4590,7 +4590,11 @@ PP(pp_split)
base = SP - PL_stack_base;
orig = s;
if (pm->op_pmflags & PMf_SKIPWHITE) {
- if (pm->op_pmflags & PMf_LOCALE) {
+ if (do_utf8) {
+ while (*s == ' ' || is_utf8_space((U8*)s))
+ s += UTF8SKIP(s);
+ }
+ else if (pm->op_pmflags & PMf_LOCALE) {
while (isSPACE_LC(*s))
s++;
}
@@ -4606,22 +4610,18 @@ PP(pp_split)
if (!limit)
limit = maxiters + 2;
if (pm->op_pmflags & PMf_WHITE) {
- if (do_utf8 && !PL_utf8_space) {
- /* force PL_utf8_space to be loaded */
- bool ok;
- ENTER;
- ok = is_utf8_space((const U8*)" ");
- assert(ok);
- LEAVE;
- }
while (--limit) {
m = s;
/* this one uses 'm' and is a negative test */
if (do_utf8) {
- STRLEN uskip;
- while (m < strend &&
- !( *m == ' ' || swash_fetch(PL_utf8_space,(U8*)m, do_utf8) ))
- m += UTF8SKIP(m);
+ while (m < strend && !( *m == ' ' || is_utf8_space((U8*)m) )) {
+ const int t = UTF8SKIP(m);
+ /* is_utf8_space returns FALSE for malform utf8 */
+ if (strend - m < t)
+ m = strend;
+ else
+ m += t;
+ }
} else if (pm->op_pmflags & PMf_LOCALE) {
while (m < strend && !isSPACE_LC(*m))
++m;
@@ -4639,11 +4639,15 @@ PP(pp_split)
(void)SvUTF8_on(dstr);
XPUSHs(dstr);
- s = m + 1;
+ /* skip the whitespace found last */
+ if (do_utf8)
+ s = m + UTF8SKIP(m);
+ else
+ s = m + 1;
+
/* this one uses 's' and is a positive test */
if (do_utf8) {
- while (s < strend &&
- ( *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8) ))
+ while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) ))
s += UTF8SKIP(s);
} else if (pm->op_pmflags & PMf_LOCALE) {
while (s < strend && isSPACE_LC(*s))