diff options
author | Father Chrysostomos <sprout@cpan.org> | 2012-09-22 17:54:12 -0700 |
---|---|---|
committer | Father Chrysostomos <sprout@cpan.org> | 2012-09-22 18:50:50 -0700 |
commit | 5255171e6cd0accee6f76ea2980e32b3b5b8e171 (patch) | |
tree | f198188a949f401983d4493ed514365d2c09213c | |
parent | d8e299374017857bcc055c31b6d4a808fb862100 (diff) | |
download | perl-5255171e6cd0accee6f76ea2980e32b3b5b8e171.tar.gz |
[perl #94490] const fold should not trigger special split " "
The easiest way to fix this was to move the special handling out of
the regexp engine. Now a flag is set on the split op itself for
this case. A real regexp is still created, as that is the most
convenient way to propagate locale settings, and it prevents the
need to rework pp_split to handle a null regexp.
This also means that custom regexp plugins no longer need to handle
split specially (which they all do currently).
-rw-r--r-- | dist/B-Deparse/Deparse.pm | 5 | ||||
-rw-r--r-- | op.c | 10 | ||||
-rw-r--r-- | op.h | 2 | ||||
-rw-r--r-- | pp.c | 7 | ||||
-rw-r--r-- | regcomp.c | 10 | ||||
-rw-r--r-- | regen/regcomp.pl | 2 | ||||
-rw-r--r-- | regexp.h | 16 | ||||
-rw-r--r-- | t/op/split.t | 7 |
8 files changed, 35 insertions, 24 deletions
diff --git a/dist/B-Deparse/Deparse.pm b/dist/B-Deparse/Deparse.pm index fd351409db..42f2645bcc 100644 --- a/dist/B-Deparse/Deparse.pm +++ b/dist/B-Deparse/Deparse.pm @@ -4657,8 +4657,11 @@ sub pp_split { # handle special case of split(), and split(' ') that compiles to /\s+/ # Under 5.10, the reflags may be undef if the split regexp isn't a constant + # Under 5.17.5+, the special flag is on split itself. $kid = $op->first; - if ( $kid->flags & OPf_SPECIAL + if ( $op->flags & OPf_SPECIAL + or + $kid->flags & OPf_SPECIAL and ( $] < 5.009 ? $kid->pmflags & PMf_SKIPWHITE() : ($kid->reflags || 0) & RXf_SKIPWHITE() ) ) { $exprs[0] = "' '"; @@ -4594,9 +4594,6 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor) U32 rx_flags = pm->op_pmflags & RXf_PMf_COMPILETIME; regexp_engine const *eng = current_re_engine(); - if (o->op_flags & OPf_SPECIAL) - rx_flags |= RXf_SPLIT; - if (!has_code || !eng->op_comp) { /* compile-time simple constant pattern */ @@ -9783,10 +9780,15 @@ Perl_ck_split(pTHX_ OP *o) cLISTOPo->op_last = kid; /* There was only one element previously */ } + if (kid->op_type == OP_CONST && !(kid->op_private & OPpCONST_FOLDED)) { + SV * const sv = kSVOP->op_sv; + if (SvPOK(sv) && SvCUR(sv) == 1 && *SvPVX(sv) == ' ') + o->op_flags |= OPf_SPECIAL; + } if (kid->op_type != OP_MATCH || kid->op_flags & OPf_STACKED) { OP * const sibl = kid->op_sibling; kid->op_sibling = 0; - kid = pmruntime( newPMOP(OP_MATCH, OPf_SPECIAL), kid, 0, 0); + kid = pmruntime( newPMOP(OP_MATCH, 0), kid, 0, 0); if (cLISTOPo->op_first == cLISTOPo->op_last) cLISTOPo->op_last = kid; cLISTOPo->op_first = kid; @@ -114,7 +114,7 @@ Deprecated. Use C<GIMME_V> instead. /* On OP_ENTERSUB || OP_NULL, saw a "do". */ /* On OP_EXISTS, treat av as av, not avhv. */ /* On OP_(ENTER|LEAVE)EVAL, don't clear $@ */ - /* On pushre, rx is used as part of split, e.g. split " " */ + /* On OP_SPLIT, special split " " */ /* On regcomp, "use re 'eval'" was in scope */ /* On OP_READLINE, was <$filehandle> */ /* On RV2[ACGHS]V, don't create GV--in @@ -5299,6 +5299,7 @@ PP(pp_split) STRLEN len; const char *s = SvPV_const(sv, len); const bool do_utf8 = DO_UTF8(sv); + const bool skipwhite = PL_op->op_flags & OPf_SPECIAL; const char *strend = s + len; PMOP *pm; REGEXP *rx; @@ -5329,7 +5330,7 @@ PP(pp_split) rx = PM_GETRE(pm); TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET && - (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE))); + (RX_EXTFLAGS(rx) & RXf_WHITE || skipwhite)); RX_MATCH_UTF8_set(rx, do_utf8); @@ -5369,7 +5370,7 @@ PP(pp_split) } base = SP - PL_stack_base; orig = s; - if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) { + if (skipwhite) { if (do_utf8) { while (*s == ' ' || is_utf8_space((U8*)s)) s += UTF8SKIP(s); @@ -5391,7 +5392,7 @@ PP(pp_split) if (!limit) limit = maxiters + 2; - if (RX_EXTFLAGS(rx) & RXf_WHITE) { + if (RX_EXTFLAGS(rx) & RXf_WHITE || skipwhite) { while (--limit) { m = s; /* this one uses 'm' and is a negative test */ @@ -6401,18 +6401,12 @@ reStudy: #ifdef STUPID_PATTERN_CHECKS if (RX_PRELEN(rx) == 0) r->extflags |= RXf_NULL; - if (r->extflags & RXf_SPLIT && RX_PRELEN(rx) == 1 && RX_PRECOMP(rx)[0] == ' ') - /* XXX: this should happen BEFORE we compile */ - r->extflags |= (RXf_SKIPWHITE|RXf_WHITE); - else if (RX_PRELEN(rx) == 3 && memEQ("\\s+", RX_PRECOMP(rx), 3)) + if (RX_PRELEN(rx) == 3 && memEQ("\\s+", RX_PRECOMP(rx), 3)) r->extflags |= RXf_WHITE; else if (RX_PRELEN(rx) == 1 && RXp_PRECOMP(rx)[0] == '^') r->extflags |= RXf_START_ONLY; #else - if (r->extflags & RXf_SPLIT && RX_PRELEN(rx) == 1 && RX_PRECOMP(rx)[0] == ' ') - /* XXX: this should happen BEFORE we compile */ - r->extflags |= (RXf_SKIPWHITE|RXf_WHITE); - else { + { regnode *first = ri->program + 1; U8 fop = OP(first); diff --git a/regen/regcomp.pl b/regen/regcomp.pl index eef553311b..16091caae1 100644 --- a/regen/regcomp.pl +++ b/regen/regcomp.pl @@ -253,7 +253,7 @@ foreach my $file ("op_reg_common.h", "regexp.h") { # optional leading '_'. Return symbol in $1, and strip it from # rest of line - if (s/ \#define \s+ ( _? RXf_ \w+ ) \s+ //xi) { + if (s/ \# \s* define \s+ ( _? RXf_ \w+ ) \s+ //xi) { chomp; my $define = $1; s: / \s* \* .*? \* \s* / : :x; # Replace comments by a blank @@ -395,11 +395,14 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp) #define RXf_INTUIT_TAIL (1<<(RXf_BASE_SHIFT+14)) /* - Set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. split. Will - be used by regex engines to check whether they should set - RXf_SKIPWHITE + This used to be set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. + split. It was used by the regex engine to check whether it should set + RXf_SKIPWHITE. Regexp plugins on CPAN also have done the same thing + historically, so we leave this flag defined, even though it is never set. */ -#define RXf_SPLIT (1<<(RXf_BASE_SHIFT+15)) +#if !defined(PERL_CORE) || defined(PERL_IN_DUMP_C) +# define RXf_SPLIT (1<<(RXf_BASE_SHIFT+15)) +#endif #define RXf_USE_INTUIT (RXf_USE_INTUIT_NOML|RXf_USE_INTUIT_ML) @@ -414,7 +417,10 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp) /* Flags indicating special patterns */ #define RXf_START_ONLY (1<<(RXf_BASE_SHIFT+19)) /* Pattern is /^/ */ -#define RXf_SKIPWHITE (1<<(RXf_BASE_SHIFT+20)) /* Pattern is for a split " " */ +/* No longer used, but CPAN modules still mention it. */ +#if !defined(PERL_CORE) || defined(PERL_IN_DUMP_C) +# define RXf_SKIPWHITE (1<<(RXf_BASE_SHIFT+20)) /* Pattern is for a split " " */ +#endif #define RXf_WHITE (1<<(RXf_BASE_SHIFT+21)) /* Pattern is /\s+/ */ #define RXf_NULL (1U<<(RXf_BASE_SHIFT+22)) /* Pattern is // */ #if RXf_BASE_SHIFT+22 > 31 diff --git a/t/op/split.t b/t/op/split.t index 69035030d7..76836d94c5 100644 --- a/t/op/split.t +++ b/t/op/split.t @@ -6,7 +6,7 @@ BEGIN { require './test.pl'; } -plan tests => 102; +plan tests => 103; $FS = ':'; @@ -417,3 +417,8 @@ is($cnt, scalar(@ary)); # 'my' doesn't trigger the bug is "@PATH", "Font GlyphNames", "hybrid scalar-and-array context"; } + +# [perl #94490] constant folding should not invoke special split " " +# behaviour. +@_=split(0||" ","foo bar"); +is @_, 3, 'split(0||" ") is not treated like split(" ")'; |