diff options
author | Ævar Arnfjörð Bjarmason <avar@cpan.org> | 2007-08-09 07:49:16 +0000 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2007-08-09 10:10:37 +0000 |
commit | 640f820da331e7bc688f3f8820b2760fa5a09de6 (patch) | |
tree | cb0dcb9ac4b8e2fc5c49fcd2c6db441d7469a37c | |
parent | d3bf4b0e5207a91e509350df0ebcce4b2c9908f0 (diff) | |
download | perl-640f820da331e7bc688f3f8820b2760fa5a09de6.tar.gz |
Optimize split //
From: "Ævar Arnfjörð Bjarmason" <avarab@gmail.com>
Message-ID: <51dd1af80708090049p2cf4810ep5a437ad53f64fa78@mail.gmail.com>
p4raw-id: //depot/perl@31693
-rw-r--r-- | pod/perlreapi.pod | 10 | ||||
-rw-r--r-- | pp.c | 37 | ||||
-rw-r--r-- | regcomp.c | 6 | ||||
-rw-r--r-- | regexp.h | 1 |
4 files changed, 53 insertions, 1 deletions
diff --git a/pod/perlreapi.pod b/pod/perlreapi.pod index 1e3586942c..54257408ca 100644 --- a/pod/perlreapi.pod +++ b/pod/perlreapi.pod @@ -188,6 +188,16 @@ whether RXf_PMf_LOCALE is set. Perl's engine sets this flag if the pattern is C<\s+>. +=item RXf_NULL + +Tells the split operatior to split the target string on +characters. The definition of character varies depending on whether +the target string is a UTF-8 string. + +Perl's engine sets this flag on empty patterns, this optimization +makes C<split //> much faster than it would otherwise be, it's even +faster than C<unpack>. + =back =head2 exec @@ -4711,6 +4711,43 @@ PP(pp_split) s = m; } } + else if (rx->extflags & RXf_NULL && !(s >= strend)) { + /* + Pre-extend the stack, either the number of bytes or + characters in the string or a limited amount, triggered by: + + my ($x, $y) = split //, $str; + or + split //, $str, $i; + */ + const U32 items = limit - 1; + if (items < slen) + EXTEND(SP, items); + else + EXTEND(SP, slen); + + while (--limit) { + m = s; + + if (do_utf8) + s += UTF8SKIP(s); + else + ++s; + + dstr = newSVpvn(m, s-m); + + if (make_mortal) + sv_2mortal(dstr); + if (do_utf8) + (void)SvUTF8_on(dstr); + + PUSHs(dstr); + + /* are we there yet? */ + if (s >= strend) + break; + } + } else if (do_utf8 == ((rx->extflags & RXf_UTF8) != 0) && (rx->extflags & RXf_USE_INTUIT) && !rx->nparens && (rx->extflags & RXf_CHECK_ALL) @@ -4753,6 +4753,8 @@ reStudy: r->paren_names = NULL; #ifdef STUPID_PATTERN_CHECKS + if (r->prelen == 0) + r->extflags |= RXf_NULL; if (r->extflags & RXf_SPLIT && r->prelen == 1 && r->precomp[0] == ' ') /* XXX: this should happen BEFORE we compile */ r->extflags |= (RXf_SKIPWHITE|RXf_WHITE); @@ -4769,7 +4771,9 @@ reStudy: U8 fop = OP(first); U8 nop = OP(NEXTOPER(first)); - if (PL_regkind[fop] == BOL && nop == END) + if (PL_regkind[fop] == NOTHING && nop == END) + r->extflags |= RXf_NULL; + else if (PL_regkind[fop] == BOL && nop == END) r->extflags |= RXf_START_ONLY; else if (fop == PLUS && nop ==SPACE && OP(regnext(first))==END) r->extflags |= RXf_WHITE; @@ -240,6 +240,7 @@ and check for NULL. #define RXf_SKIPWHITE 0x00000100 /* Pattern is for a split / / */ #define RXf_START_ONLY 0x00000200 /* Pattern is /^/ */ #define RXf_WHITE 0x00000400 /* Pattern is /\s+/ */ +#define RXf_NULL 0x40000000 /* Pattern is // */ /* 0x1F800 of extflags is used by (RXf_)PMf_COMPILETIME */ #define RXf_PMf_LOCALE 0x00000800 /* use locale */ |