diff options
-rw-r--r-- | ext/B/t/concise-xs.t | 2 | ||||
-rw-r--r-- | op.c | 25 | ||||
-rw-r--r-- | op.h | 2 | ||||
-rw-r--r-- | pod/perlreapi.pod | 38 | ||||
-rw-r--r-- | regcomp.c | 8 | ||||
-rw-r--r-- | regexp.h | 9 |
6 files changed, 41 insertions, 43 deletions
diff --git a/ext/B/t/concise-xs.t b/ext/B/t/concise-xs.t index 94308301d3..e4a0241793 100644 --- a/ext/B/t/concise-xs.t +++ b/ext/B/t/concise-xs.t @@ -117,7 +117,7 @@ use Getopt::Std; use Carp; use Test::More tests => ( # per-pkg tests (function ct + require_ok) 40 + 16 # Data::Dumper, Digest::MD5 - + 521 + 276 # B::Deparse, B + + 521 + 277 # B::Deparse, B + 595 + 190 # POSIX, IO::Socket - 6); # fudge @@ -3424,32 +3424,15 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg) pm = (PMOP*)o; if (expr->op_type == OP_CONST) { - STRLEN plen; SV * const pat = ((SVOP*)expr)->op_sv; - const char *p = SvPV_const(pat, plen); U32 pm_flags = pm->op_pmflags & PMf_COMPILETIME; - if ((o->op_flags & OPf_SPECIAL) && (plen == 1 && *p == ' ')) { - U32 was_readonly = SvREADONLY(pat); - if (was_readonly) { - if (SvFAKE(pat)) { - sv_force_normal_flags(pat, 0); - assert(!SvREADONLY(pat)); - was_readonly = 0; - } else { - SvREADONLY_off(pat); - } - } + if (o->op_flags & OPf_SPECIAL) + pm_flags |= RXf_SPLIT; - sv_setpvn(pat, "\\s+", 3); - - SvFLAGS(pat) |= was_readonly; - - p = SvPV_const(pat, plen); - pm_flags |= RXf_SKIPWHITE; - } - if (DO_UTF8(pat)) + if (DO_UTF8(pat)) pm_flags |= RXf_UTF8; + PM_SETRE(pm, CALLREGCOMP(pat, pm_flags)); #ifdef PERL_MAD @@ -120,7 +120,7 @@ Deprecated. Use C<GIMME_V> instead. /* On OP_EXISTS, treat av as av, not avhv. */ /* On OP_(ENTER|LEAVE)EVAL, don't clear $@ */ /* On OP_ENTERITER, loop var is per-thread */ - /* On pushre, re is /\s+/ imp. by split " " */ + /* On pushre, rx is used as part of split, e.g. split " " */ /* On regcomp, "use re 'eval'" was in scope */ /* On OP_READLINE, was <$filehandle> */ /* On RV2[ACGHS]V, don't create GV--in diff --git a/pod/perlreapi.pod b/pod/perlreapi.pod index 3b5dc852fd..084762dd94 100644 --- a/pod/perlreapi.pod +++ b/pod/perlreapi.pod @@ -124,20 +124,6 @@ Additional flags: =over 4 -=item RXf_SKIPWHITE - -If C<split> is invoked as C<split ' '> or with no arguments (which -really means C<split(' ', $_>, see L<split|perlfunc/split>), perl will set -this flag and change the pattern from C<" "> to C<"\s+"> before it's -passed to the comp routine. - -If the flag is present in C<< rx->extflags >> C<split> to delete -whitespace from the start of the subject string before it's operated -on. What is considered whitespace depends on whether the subject is a -UTF-8 string and whether the C<RXf_PMf_LOCALE> flag is set. - -This probably always be preserved verbatim in C<< rx->extflags >>. - =item RXf_PMf_LOCALE Set if C<use locale> is in effect. If present in C<< rx->extflags >> @@ -156,6 +142,16 @@ compilation. The perl engine for instance may upgrade non-UTF-8 strings to UTF-8 if the pattern includes constructs such as C<\x{...}> that can only match Unicode values. +=item RXf_SPLIT + +If C<split> is invoked as C<split ' '> or with no arguments (which +really means C<split(' ', $_>, see L<split|perlfunc/split>), perl will +set this flag. The regex engine can then check for it and set the +SKIPWHITE and WHITE extflags. To do this the perl engine does: + + if (flags & RXf_SPLIT && r->prelen == 1 && r->precomp[0] == ' ') + r->extflags |= (RXf_SKIPWHITE|RXf_WHITE); + =back These flags can be set during compilation to enable optimizations in @@ -163,6 +159,16 @@ the C<split> operator. =over 4 +=item RXf_SKIPWHITE + +If the flag is present in C<< rx->extflags >> C<split> will delete +whitespace from the start of the subject string before it's operated +on. What is considered whitespace depends on whether the subject is a +UTF-8 string and whether the C<RXf_PMf_LOCALE> flag is set. + +If RXf_WHITE is set in addition to this flag C<split> will behave like +C<split " "> under the perl engine. + =item RXf_START_ONLY Tells the split operator to split the target string on newlines @@ -180,9 +186,7 @@ without invoking the regex engine. The definition of whitespace varies depending on whether the target string is a UTF-8 string and on whether RXf_PMf_LOCALE is set. -Perl's engine sets this flag if the pattern is C<\s+>, which it will be if -the pattern actually was C<\s+> or if it was originally C<" "> (see -C<RXf_SKIPWHITE> above). +Perl's engine sets this flag if the pattern is C<\s+>. =back @@ -4751,8 +4751,12 @@ reStudy: r->paren_names = (HV*)SvREFCNT_inc(RExC_paren_names); else r->paren_names = NULL; - if (r->prelen == 3 && strnEQ("\\s+", r->precomp, 3)) /* precomp = "\\s+)" */ - r->extflags |= RXf_WHITE; + + if (r->extflags & RXf_SPLIT && r->prelen == 1 && r->precomp[0] == ' ') + /* XXX: this should happen BEFORE we compile */ + r->extflags |= (RXf_SKIPWHITE|RXf_WHITE); + else if (r->prelen == 3 && memEQ("\\s+", r->precomp, 3)) + r->extflags |= RXf_WHITE; else if (r->prelen == 1 && r->precomp[0] == '^') r->extflags |= RXf_START_ONLY; @@ -307,7 +307,14 @@ and check for NULL. #define RXf_USE_INTUIT_NOML 0x01000000 #define RXf_USE_INTUIT_ML 0x02000000 #define RXf_INTUIT_TAIL 0x04000000 -/* one bit here */ + +/* + Set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. split. Will + be used by regex engines to check whether they should set + RXf_SKIPWHITE +*/ +#define RXf_SPLIT 0x08000000 + #define RXf_USE_INTUIT (RXf_USE_INTUIT_NOML|RXf_USE_INTUIT_ML) /* Copy and tainted info */ |