diff options
author | Karl Williamson <public@khwilliamson.com> | 2013-09-24 10:32:37 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2013-09-24 11:36:21 -0600 |
commit | a0dd42312a1f26356d2fdf49656e45b77c2cefb5 (patch) | |
tree | a874f9b63db48c6840cb5b189f3623c63079c4c9 /regcomp.h | |
parent | 43a64b8b430604bd4d76fd256a5babdccaf0ab2b (diff) | |
download | perl-a0dd42312a1f26356d2fdf49656e45b77c2cefb5.tar.gz |
regcomp.c: Move bit to different data structure
Commit 899d20b99829f8ecdc14e1351b533bc62a354dea was used to free up a
bit in a flags field that had run out of bits at the time. Further work
has made that unnecessary, and this commit moves it back to the flags
field, which even after this commit has a spare bit (which is intended
to be used in a future commit).
Doing so makes this bit "just one of the guys", so can be operated on
en-masse with the others. This allows a little code to be removed, and
the knowledge of this flag mostly confined to lower level subroutines.
Diffstat (limited to 'regcomp.h')
-rw-r--r-- | regcomp.h | 25 |
1 files changed, 17 insertions, 8 deletions
@@ -191,7 +191,9 @@ struct regnode_charclass_class { U32 classflags; /* and run-time */ }; -/* Synthetic start class, is a regnode_charclass_class plus an SV* */ +/* Synthetic start class; is a regnode_charclass_class plus an SV*. Note that + * the 'next_off' field is unused, as the SSC stands alone, so there is never a + * next node. */ struct regnode_ssc { U8 flags; /* ANYOF_POSIXL bit must go here */ U8 type; @@ -314,14 +316,16 @@ struct regnode_ssc { /* Flags for node->flags of ANYOF. These are in short supply, but there is one * currently available. If more than this are needed, the ANYOF_LOCALE and - * ANYOF_POSIXL bits could be shared, making a space penalty for all locale nodes. - * Also, the ABOVE_LATIN1_ALL bit could be freed up by resorting to creating a - * swash containing everything above 255. This introduces a performance - * penalty. Better would be to split it off into a separate node, which - * actually would improve performance a bit by allowing regexec.c to test for a - * UTF-8 character being above 255 without having to call a function nor + * ANYOF_POSIXL bits could be shared, making a space penalty for all locale + * nodes. Also, the ABOVE_LATIN1_ALL bit could be freed up by resorting to + * creating a swash containing everything above 255. This introduces a + * performance penalty. Better would be to split it off into a separate node, + * which actually would improve performance a bit by allowing regexec.c to test + * for a UTF-8 character being above 255 without having to call a function nor * calculate its code point value. However, this solution might need to have a - * second node type, ANYOF_SYNTHETIC_ABOVE_LATIN1_ALL */ + * second node type, ANYOF_SYNTHETIC_ABOVE_LATIN1_ALL. Several flags are not + * used in synthetic start class (SSC) nodes, so could be shared should new + * flags be needed for SSCs. */ #define ANYOF_LOCALE 0x01 /* /l modifier */ @@ -333,6 +337,11 @@ struct regnode_ssc { #define ANYOF_INVERT 0x04 +/* For the SSC node only, which cannot be inverted, so is shared with that bit. + * This means "Does this SSC match an empty string?" This is used only during + * regex compilation. */ +#define ANYOF_EMPTY_STRING ANYOF_INVERT + /* Set if this is a regnode_charclass_posixl vs a regnode_charclass. This * is used for runtime \d, \w, [:posix:], ..., which are used only in locale * and the optimizer's synthetic start class. Non-locale \d, etc are resolved |