summaryrefslogtreecommitdiff
path: root/op_reg_common.h
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2013-03-25 23:23:40 +0100
committerYves Orton <demerphq@gmail.com>2013-03-27 08:38:00 +0100
commitdbc200c5a1d3ae1d9360435a384c19883bf5f4f6 (patch)
tree2312197f897140b952835ad4d7864c03d9fcd791 /op_reg_common.h
parentc9d98c4e542a0779fb34f107a15def6ed7ff3f98 (diff)
downloadperl-dbc200c5a1d3ae1d9360435a384c19883bf5f4f6.tar.gz
rework split() special case interaction with regex engine
This patch resolves several issues at once. The parts are sufficiently interconnected that it is hard to break it down into smaller commits. The tickets open for these issues are: RT #94490 - split and constant folding RT #116086 - split "\x20" doesn't work as documented It additionally corrects some issues with cached regexes that were exposed by the split changes (and applied to them). It effectively reverts 5255171e6cd0accee6f76ea2980e32b3b5b8e171 and cccd1425414e6518c1fc8b7bcaccfb119320c513. Prior to this patch the special RXf_SKIPWHITE behavior of split(" ", $thing) was only available if Perl could resolve the first argument to split at compile time, meaning under various arcane situations. This manifested as oddities like my $delim = $cond ? " " : qr/\s+/; split $delim, $string; and split $cond ? " ", qr/\s+/, $string not behaving the same as: ($cond ? split(" ", $string) : split(/\s+/, $string)) which isn't very convenient. This patch changes this by adding a new flag to the op_pmflags, PMf_SPLIT which enables pp_regcomp() to know whether it was called as part of split, which allows the RXf_SPLIT to be passed into run time regex compilation. We also preserve the original flags so pattern caching works properly, by adding a new property to the regexp structure, "compflags", and related macros for accessing it. We preserve the original flags passed into the compilation process, so we can compare when we are trying to decide if we need to recompile. Note that this essentially the opposite fix from the one applied originally to fix #94490 in 5255171e6cd0accee6f76ea2980e32b3b5b8e171. The reverted patch was meant to make: split( 0 || " ", $thing ) #1 consistent with my $x=0; split( $x || " ", $thing ) #2 and not with split( " ", $thing ) #3 This was reverted because it broke C<split("\x{20}", $thing)>, and because one might argue that is not that #1 does the wrong thing, but rather that the behavior of #2 that is wrong. In other words we might expect that all three should behave the same as #3, and that instead of "fixing" the behavior of #1 to be like #2, we should really fix the behavior of #2 to behave like #3. (Which is what we did.) Also, it doesn't make sense to move the special case detection logic further from the regex engine. We really want the regex engine to decide this stuff itself, otherwise split " ", ... wouldn't work properly with an alternate engine. (Imagine we add a special regexp meta pattern that behaves the same as " " does in a split /.../. For instance we might make split /(*SPLITWHITE)/ trigger the same behavior as split " ". The other major change as result of this patch is it effectively reverts commit cccd1425414e6518c1fc8b7bcaccfb119320c513, which was intended to get rid of RXf_SPLIT and RXf_SKIPWHITE, which and free up bits in the regex flags structure. But we dont want to get rid of these vars, and it turns out that RXf_SEEN_LOOKBEHIND is used only in the same situation as the new RXf_MODIFIES_VARS. So I have renamed RXf_SEEN_LOOKBEHIND to RXf_NO_INPLACE_SUBST, and then instead of using two vars we use only the one. Which in turn allows RXf_SPLIT and RXf_SKIPWHITE to have their bits back.
Diffstat (limited to 'op_reg_common.h')
-rw-r--r--op_reg_common.h23
1 files changed, 17 insertions, 6 deletions
diff --git a/op_reg_common.h b/op_reg_common.h
index eed483b75b..9dcdaaec63 100644
--- a/op_reg_common.h
+++ b/op_reg_common.h
@@ -27,6 +27,7 @@
* RXf_PMf_STD_PMMOD_SHIFT, followed by the p. See STD_PAT_MODS and
* INT_PAT_MODS in regexp.h for the reason contiguity is needed */
/* Make sure to update lib/re.pm when changing these! */
+/* Make sure you keep the pure PMf_ versions below in sync */
#define RXf_PMf_MULTILINE (1 << (RXf_PMf_STD_PMMOD_SHIFT+0)) /* /m */
#define RXf_PMf_SINGLELINE (1 << (RXf_PMf_STD_PMMOD_SHIFT+1)) /* /s */
#define RXf_PMf_FOLD (1 << (RXf_PMf_STD_PMMOD_SHIFT+2)) /* /i */
@@ -79,13 +80,23 @@ get_regex_charset(const U32 flags)
return (regex_charset) ((flags & RXf_PMf_CHARSET) >> _RXf_PMf_CHARSET_SHIFT);
}
+#define _RXf_PMf_SHIFT_COMPILETIME (RXf_PMf_STD_PMMOD_SHIFT+8)
+
+/*
+ Set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. split. Will
+ be used by regex engines to check whether they should set
+ RXf_SKIPWHITE
+*/
+#define RXf_PMf_SPLIT (1<<(RXf_PMf_STD_PMMOD_SHIFT+8))
+
/* Next available bit after the above. Name begins with '_' so won't be
* exported by B */
-#define _RXf_PMf_SHIFT_NEXT (RXf_PMf_STD_PMMOD_SHIFT+8)
+#define _RXf_PMf_SHIFT_NEXT (RXf_PMf_STD_PMMOD_SHIFT+9)
/* Mask of the above bits. These need to be transferred from op_pmflags to
* re->extflags during compilation */
-#define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_CHARSET|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY)
+#define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY|RXf_PMf_CHARSET)
+#define RXf_PMf_FLAGCOPYMASK (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY|RXf_PMf_CHARSET|RXf_PMf_SPLIT)
#if RXf_PMf_COMPILETIME > 255
# error RXf_PMf_COMPILETIME wont fit in U8 flags field of eval node
@@ -97,18 +108,18 @@ get_regex_charset(const U32 flags)
#define PMf_FOLD 1<<2
#define PMf_EXTENDED 1<<3
#define PMf_KEEPCOPY 1<<4
+#define PMf_CHARSET 7<<5
+#define PMf_SPLIT 1<<8
-#if PMf_MULTILINE != RXf_PMf_MULTILINE || PMf_SINGLELINE != RXf_PMf_SINGLELINE || PMf_FOLD != RXf_PMf_FOLD || PMf_EXTENDED != RXf_PMf_EXTENDED || PMf_KEEPCOPY != RXf_PMf_KEEPCOPY
+#if PMf_MULTILINE != RXf_PMf_MULTILINE || PMf_SINGLELINE != RXf_PMf_SINGLELINE || PMf_FOLD != RXf_PMf_FOLD || PMf_EXTENDED != RXf_PMf_EXTENDED || PMf_KEEPCOPY != RXf_PMf_KEEPCOPY || PMf_SPLIT != RXf_PMf_SPLIT || PMf_CHARSET != RXf_PMf_CHARSET
# error RXf_PMf defines are wrong
#endif
-#define PMf_COMPILETIME RXf_PMf_COMPILETIME
-
/* Error check that haven't left something out of this. This isn't done
* directly in the #define because doing so confuses regcomp.pl.
* (2**n - 1) is n 1 bits, so the below gets the contiguous bits between the
* beginning and ending shifts */
-#if RXf_PMf_COMPILETIME != (((1 << (_RXf_PMf_SHIFT_NEXT))-1) \
+#if RXf_PMf_COMPILETIME != (((1 << (_RXf_PMf_SHIFT_COMPILETIME))-1) \
& (~((1 << RXf_PMf_STD_PMMOD_SHIFT)-1)))
# error RXf_PMf_COMPILETIME is invalid
#endif