summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pod/perlreapi.pod10
-rw-r--r--pp.c37
-rw-r--r--regcomp.c6
-rw-r--r--regexp.h1
4 files changed, 53 insertions, 1 deletions
diff --git a/pod/perlreapi.pod b/pod/perlreapi.pod
index 1e3586942c..54257408ca 100644
--- a/pod/perlreapi.pod
+++ b/pod/perlreapi.pod
@@ -188,6 +188,16 @@ whether RXf_PMf_LOCALE is set.
Perl's engine sets this flag if the pattern is C<\s+>.
+=item RXf_NULL
+
+Tells the split operatior to split the target string on
+characters. The definition of character varies depending on whether
+the target string is a UTF-8 string.
+
+Perl's engine sets this flag on empty patterns, this optimization
+makes C<split //> much faster than it would otherwise be, it's even
+faster than C<unpack>.
+
=back
=head2 exec
diff --git a/pp.c b/pp.c
index 51af7d8c41..02e530f974 100644
--- a/pp.c
+++ b/pp.c
@@ -4711,6 +4711,43 @@ PP(pp_split)
s = m;
}
}
+ else if (rx->extflags & RXf_NULL && !(s >= strend)) {
+ /*
+ Pre-extend the stack, either the number of bytes or
+ characters in the string or a limited amount, triggered by:
+
+ my ($x, $y) = split //, $str;
+ or
+ split //, $str, $i;
+ */
+ const U32 items = limit - 1;
+ if (items < slen)
+ EXTEND(SP, items);
+ else
+ EXTEND(SP, slen);
+
+ while (--limit) {
+ m = s;
+
+ if (do_utf8)
+ s += UTF8SKIP(s);
+ else
+ ++s;
+
+ dstr = newSVpvn(m, s-m);
+
+ if (make_mortal)
+ sv_2mortal(dstr);
+ if (do_utf8)
+ (void)SvUTF8_on(dstr);
+
+ PUSHs(dstr);
+
+ /* are we there yet? */
+ if (s >= strend)
+ break;
+ }
+ }
else if (do_utf8 == ((rx->extflags & RXf_UTF8) != 0) &&
(rx->extflags & RXf_USE_INTUIT) && !rx->nparens
&& (rx->extflags & RXf_CHECK_ALL)
diff --git a/regcomp.c b/regcomp.c
index cada4cdd26..f06fb6848c 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4753,6 +4753,8 @@ reStudy:
r->paren_names = NULL;
#ifdef STUPID_PATTERN_CHECKS
+ if (r->prelen == 0)
+ r->extflags |= RXf_NULL;
if (r->extflags & RXf_SPLIT && r->prelen == 1 && r->precomp[0] == ' ')
/* XXX: this should happen BEFORE we compile */
r->extflags |= (RXf_SKIPWHITE|RXf_WHITE);
@@ -4769,7 +4771,9 @@ reStudy:
U8 fop = OP(first);
U8 nop = OP(NEXTOPER(first));
- if (PL_regkind[fop] == BOL && nop == END)
+ if (PL_regkind[fop] == NOTHING && nop == END)
+ r->extflags |= RXf_NULL;
+ else if (PL_regkind[fop] == BOL && nop == END)
r->extflags |= RXf_START_ONLY;
else if (fop == PLUS && nop ==SPACE && OP(regnext(first))==END)
r->extflags |= RXf_WHITE;
diff --git a/regexp.h b/regexp.h
index 27f17e71c9..6de89c9fd2 100644
--- a/regexp.h
+++ b/regexp.h
@@ -240,6 +240,7 @@ and check for NULL.
#define RXf_SKIPWHITE 0x00000100 /* Pattern is for a split / / */
#define RXf_START_ONLY 0x00000200 /* Pattern is /^/ */
#define RXf_WHITE 0x00000400 /* Pattern is /\s+/ */
+#define RXf_NULL 0x40000000 /* Pattern is // */
/* 0x1F800 of extflags is used by (RXf_)PMf_COMPILETIME */
#define RXf_PMf_LOCALE 0x00000800 /* use locale */