summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFather Chrysostomos <sprout@cpan.org>2012-09-22 17:54:12 -0700
committerFather Chrysostomos <sprout@cpan.org>2012-09-22 18:50:50 -0700
commit5255171e6cd0accee6f76ea2980e32b3b5b8e171 (patch)
treef198188a949f401983d4493ed514365d2c09213c
parentd8e299374017857bcc055c31b6d4a808fb862100 (diff)
downloadperl-5255171e6cd0accee6f76ea2980e32b3b5b8e171.tar.gz
[perl #94490] const fold should not trigger special split " "
The easiest way to fix this was to move the special handling out of the regexp engine. Now a flag is set on the split op itself for this case. A real regexp is still created, as that is the most convenient way to propagate locale settings, and it prevents the need to rework pp_split to handle a null regexp. This also means that custom regexp plugins no longer need to handle split specially (which they all do currently).
-rw-r--r--dist/B-Deparse/Deparse.pm5
-rw-r--r--op.c10
-rw-r--r--op.h2
-rw-r--r--pp.c7
-rw-r--r--regcomp.c10
-rw-r--r--regen/regcomp.pl2
-rw-r--r--regexp.h16
-rw-r--r--t/op/split.t7
8 files changed, 35 insertions, 24 deletions
diff --git a/dist/B-Deparse/Deparse.pm b/dist/B-Deparse/Deparse.pm
index fd351409db..42f2645bcc 100644
--- a/dist/B-Deparse/Deparse.pm
+++ b/dist/B-Deparse/Deparse.pm
@@ -4657,8 +4657,11 @@ sub pp_split {
# handle special case of split(), and split(' ') that compiles to /\s+/
# Under 5.10, the reflags may be undef if the split regexp isn't a constant
+ # Under 5.17.5+, the special flag is on split itself.
$kid = $op->first;
- if ( $kid->flags & OPf_SPECIAL
+ if ( $op->flags & OPf_SPECIAL
+ or
+ $kid->flags & OPf_SPECIAL
and ( $] < 5.009 ? $kid->pmflags & PMf_SKIPWHITE()
: ($kid->reflags || 0) & RXf_SKIPWHITE() ) ) {
$exprs[0] = "' '";
diff --git a/op.c b/op.c
index 1406ffc653..9e4dd3004c 100644
--- a/op.c
+++ b/op.c
@@ -4594,9 +4594,6 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor)
U32 rx_flags = pm->op_pmflags & RXf_PMf_COMPILETIME;
regexp_engine const *eng = current_re_engine();
- if (o->op_flags & OPf_SPECIAL)
- rx_flags |= RXf_SPLIT;
-
if (!has_code || !eng->op_comp) {
/* compile-time simple constant pattern */
@@ -9783,10 +9780,15 @@ Perl_ck_split(pTHX_ OP *o)
cLISTOPo->op_last = kid; /* There was only one element previously */
}
+ if (kid->op_type == OP_CONST && !(kid->op_private & OPpCONST_FOLDED)) {
+ SV * const sv = kSVOP->op_sv;
+ if (SvPOK(sv) && SvCUR(sv) == 1 && *SvPVX(sv) == ' ')
+ o->op_flags |= OPf_SPECIAL;
+ }
if (kid->op_type != OP_MATCH || kid->op_flags & OPf_STACKED) {
OP * const sibl = kid->op_sibling;
kid->op_sibling = 0;
- kid = pmruntime( newPMOP(OP_MATCH, OPf_SPECIAL), kid, 0, 0);
+ kid = pmruntime( newPMOP(OP_MATCH, 0), kid, 0, 0);
if (cLISTOPo->op_first == cLISTOPo->op_last)
cLISTOPo->op_last = kid;
cLISTOPo->op_first = kid;
diff --git a/op.h b/op.h
index 2bfaa0d9dd..3ddce78ba2 100644
--- a/op.h
+++ b/op.h
@@ -114,7 +114,7 @@ Deprecated. Use C<GIMME_V> instead.
/* On OP_ENTERSUB || OP_NULL, saw a "do". */
/* On OP_EXISTS, treat av as av, not avhv. */
/* On OP_(ENTER|LEAVE)EVAL, don't clear $@ */
- /* On pushre, rx is used as part of split, e.g. split " " */
+ /* On OP_SPLIT, special split " " */
/* On regcomp, "use re 'eval'" was in scope */
/* On OP_READLINE, was <$filehandle> */
/* On RV2[ACGHS]V, don't create GV--in
diff --git a/pp.c b/pp.c
index f99c460a8d..b57ee8492d 100644
--- a/pp.c
+++ b/pp.c
@@ -5299,6 +5299,7 @@ PP(pp_split)
STRLEN len;
const char *s = SvPV_const(sv, len);
const bool do_utf8 = DO_UTF8(sv);
+ const bool skipwhite = PL_op->op_flags & OPf_SPECIAL;
const char *strend = s + len;
PMOP *pm;
REGEXP *rx;
@@ -5329,7 +5330,7 @@ PP(pp_split)
rx = PM_GETRE(pm);
TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET &&
- (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE)));
+ (RX_EXTFLAGS(rx) & RXf_WHITE || skipwhite));
RX_MATCH_UTF8_set(rx, do_utf8);
@@ -5369,7 +5370,7 @@ PP(pp_split)
}
base = SP - PL_stack_base;
orig = s;
- if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) {
+ if (skipwhite) {
if (do_utf8) {
while (*s == ' ' || is_utf8_space((U8*)s))
s += UTF8SKIP(s);
@@ -5391,7 +5392,7 @@ PP(pp_split)
if (!limit)
limit = maxiters + 2;
- if (RX_EXTFLAGS(rx) & RXf_WHITE) {
+ if (RX_EXTFLAGS(rx) & RXf_WHITE || skipwhite) {
while (--limit) {
m = s;
/* this one uses 'm' and is a negative test */
diff --git a/regcomp.c b/regcomp.c
index 1236f53a96..61b52c91d9 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -6401,18 +6401,12 @@ reStudy:
#ifdef STUPID_PATTERN_CHECKS
if (RX_PRELEN(rx) == 0)
r->extflags |= RXf_NULL;
- if (r->extflags & RXf_SPLIT && RX_PRELEN(rx) == 1 && RX_PRECOMP(rx)[0] == ' ')
- /* XXX: this should happen BEFORE we compile */
- r->extflags |= (RXf_SKIPWHITE|RXf_WHITE);
- else if (RX_PRELEN(rx) == 3 && memEQ("\\s+", RX_PRECOMP(rx), 3))
+ if (RX_PRELEN(rx) == 3 && memEQ("\\s+", RX_PRECOMP(rx), 3))
r->extflags |= RXf_WHITE;
else if (RX_PRELEN(rx) == 1 && RXp_PRECOMP(rx)[0] == '^')
r->extflags |= RXf_START_ONLY;
#else
- if (r->extflags & RXf_SPLIT && RX_PRELEN(rx) == 1 && RX_PRECOMP(rx)[0] == ' ')
- /* XXX: this should happen BEFORE we compile */
- r->extflags |= (RXf_SKIPWHITE|RXf_WHITE);
- else {
+ {
regnode *first = ri->program + 1;
U8 fop = OP(first);
diff --git a/regen/regcomp.pl b/regen/regcomp.pl
index eef553311b..16091caae1 100644
--- a/regen/regcomp.pl
+++ b/regen/regcomp.pl
@@ -253,7 +253,7 @@ foreach my $file ("op_reg_common.h", "regexp.h") {
# optional leading '_'. Return symbol in $1, and strip it from
# rest of line
- if (s/ \#define \s+ ( _? RXf_ \w+ ) \s+ //xi) {
+ if (s/ \# \s* define \s+ ( _? RXf_ \w+ ) \s+ //xi) {
chomp;
my $define = $1;
s: / \s* \* .*? \* \s* / : :x; # Replace comments by a blank
diff --git a/regexp.h b/regexp.h
index 1fe4c3f468..c515667b00 100644
--- a/regexp.h
+++ b/regexp.h
@@ -395,11 +395,14 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
#define RXf_INTUIT_TAIL (1<<(RXf_BASE_SHIFT+14))
/*
- Set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. split. Will
- be used by regex engines to check whether they should set
- RXf_SKIPWHITE
+ This used to be set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e.
+ split. It was used by the regex engine to check whether it should set
+ RXf_SKIPWHITE. Regexp plugins on CPAN also have done the same thing
+ historically, so we leave this flag defined, even though it is never set.
*/
-#define RXf_SPLIT (1<<(RXf_BASE_SHIFT+15))
+#if !defined(PERL_CORE) || defined(PERL_IN_DUMP_C)
+# define RXf_SPLIT (1<<(RXf_BASE_SHIFT+15))
+#endif
#define RXf_USE_INTUIT (RXf_USE_INTUIT_NOML|RXf_USE_INTUIT_ML)
@@ -414,7 +417,10 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
/* Flags indicating special patterns */
#define RXf_START_ONLY (1<<(RXf_BASE_SHIFT+19)) /* Pattern is /^/ */
-#define RXf_SKIPWHITE (1<<(RXf_BASE_SHIFT+20)) /* Pattern is for a split " " */
+/* No longer used, but CPAN modules still mention it. */
+#if !defined(PERL_CORE) || defined(PERL_IN_DUMP_C)
+# define RXf_SKIPWHITE (1<<(RXf_BASE_SHIFT+20)) /* Pattern is for a split " " */
+#endif
#define RXf_WHITE (1<<(RXf_BASE_SHIFT+21)) /* Pattern is /\s+/ */
#define RXf_NULL (1U<<(RXf_BASE_SHIFT+22)) /* Pattern is // */
#if RXf_BASE_SHIFT+22 > 31
diff --git a/t/op/split.t b/t/op/split.t
index 69035030d7..76836d94c5 100644
--- a/t/op/split.t
+++ b/t/op/split.t
@@ -6,7 +6,7 @@ BEGIN {
require './test.pl';
}
-plan tests => 102;
+plan tests => 103;
$FS = ':';
@@ -417,3 +417,8 @@ is($cnt, scalar(@ary));
# 'my' doesn't trigger the bug
is "@PATH", "Font GlyphNames", "hybrid scalar-and-array context";
}
+
+# [perl #94490] constant folding should not invoke special split " "
+# behaviour.
+@_=split(0||" ","foo bar");
+is @_, 3, 'split(0||" ") is not treated like split(" ")';