summaryrefslogtreecommitdiff
path: root/regcomp.h
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2014-02-17 15:39:12 -0700
committerKarl Williamson <public@khwilliamson.com>2014-02-19 08:32:59 -0700
commit3b04b210101efbbbdf1d8095e181c4218cdf59c2 (patch)
tree0ab4999ede9e0c313c5d3b2500d6cd81d6446656 /regcomp.h
parent4afbae25415a29a2ea66f300c95436267450769c (diff)
downloadperl-3b04b210101efbbbdf1d8095e181c4218cdf59c2.tar.gz
Change method of passing some info from regcomp to regexec
For the last several releases, the fact that an ANYOF node could match something outside its bitmap has been passed to regexec.c by having its ARG field not be -1 (appropriately cast). A bit was set if the match could occur even if the target string was not UTF-8 encoded. This design was used to save a bit, as previously there was a bit also for it matching UTF-8 strings. That design is no longer tenable, as a future commit will have a third (independent) reason for something to match outside the bitmap, This commits uses the current spare bit flag to indicate if the match can only occur if the target string is UTF-8.
Diffstat (limited to 'regcomp.h')
-rw-r--r--regcomp.h20
1 files changed, 6 insertions, 14 deletions
diff --git a/regcomp.h b/regcomp.h
index ad688d2545..1b00c201f7 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -347,17 +347,7 @@ struct regnode_ssc {
* reach this high). */
#define ANYOF_NONBITMAP_EMPTY ((U32) -1)
-/* The information used to be stored as as combination of the ANYOF_UTF8 and
- * ANYOF_NONBITMAP_NON_UTF8 bits in the flags field, but was moved out of there
- * to free up a bit for other uses. This tries to hide the change from
- * existing code as much as possible. Now, the data structure that goes in ARG
- * is not allocated unless it is needed, and that is what is used to determine
- * if there is something outside the bitmap. The code now assumes that if
- * that structure exists, that any UTF-8 encoded string should be tried against
- * it, but a non-UTF8-encoded string will be tried only if the
- * ANYOF_NONBITMAP_NON_UTF8 bit is also set. */
-#define ANYOF_NONBITMAP(node) (ARG(node) != ANYOF_NONBITMAP_EMPTY)
-/* Flags for node->flags of ANYOF. These are in short supply, with one
+/* Flags for node->flags of ANYOF. These are in short supply, with none
* currently available. The ABOVE_LATIN1_ALL bit could be freed up
* by resorting to creating a swash containing everything above 255. This
* introduces a performance penalty. An option that wouldn't slow things down
@@ -380,7 +370,9 @@ struct regnode_ssc {
* regex compilation. */
#define ANYOF_EMPTY_STRING ANYOF_INVERT
-/* spare 0x02 */
+/* Are there things that will match only if the target string is encoded in
+ * UTF-8? (This is not set if ANYOF_AOVE_LATIN1_ALL is set) */
+#define ANYOF_UTF8 0x02
/* The fold is calculated and stored in the bitmap where possible at compile
* time. However under locale, the actual folding varies depending on
@@ -411,14 +403,14 @@ struct regnode_ssc {
* in utf8. */
#define ANYOF_NON_UTF8_NON_ASCII_ALL 0x80
-#define ANYOF_FLAGS_ALL (0xf5)
+#define ANYOF_FLAGS_ALL (0xff)
#define ANYOF_LOCALE_FLAGS (ANYOF_LOC_FOLD | ANYOF_POSIXL)
/* These are the flags that apply to both regular ANYOF nodes and synthetic
* start class nodes during construction of the SSC. During finalization of
* the SSC, other of the flags could be added to it */
-#define ANYOF_COMMON_FLAGS (ANYOF_WARN_SUPER)
+#define ANYOF_COMMON_FLAGS (ANYOF_WARN_SUPER|ANYOF_UTF8)
/* Character classes for node->classflags of ANYOF */
/* Should be synchronized with a table in regprop() */