diff options
-rw-r--r-- | regcomp.c | 10 | ||||
-rw-r--r-- | regcomp.h | 10 | ||||
-rw-r--r-- | regexec.c | 4 |
3 files changed, 19 insertions, 5 deletions
@@ -8618,7 +8618,8 @@ parseit: ANYOF_FLAGS(ret) |= ANYOF_CLASS; } - /* a bad range like a-\d, a-[:digit:] ? */ + /* a bad range like a-\d, a-[:digit:]. The '-' is taken as a + * literal */ if (range) { if (!SIZE_ONLY) { const int w = @@ -8821,7 +8822,9 @@ parseit: ANYOF_FLAGS(ret) |= (FOLD || value < 256) ? ANYOF_NONBITMAP : ANYOF_UTF8; - if (prevnatvalue < natvalue) { /* what about > ? */ + if (prevnatvalue < natvalue) { /* '>' case is fatal error above */ + + /* The \t sets the whole range */ Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n", prevnatvalue, natvalue); } @@ -8925,7 +8928,8 @@ parseit: ANYOF_BITMAP(ret)[value] ^= 0xFF; stored = 256 - stored; - /* The inversion means that everything above 255 is matched */ + /* The inversion means that everything above 255 is matched; and at the + * same time we clear the invert flag */ ANYOF_FLAGS(ret) = ANYOF_UTF8|ANYOF_UNICODE_ALL; } @@ -312,7 +312,12 @@ struct regnode_charclass_class { /* Flags for node->flags of several of the node types */ #define USE_UNI 0x01 -/* Flags for node->flags of ANYOF */ +/* Flags for node->flags of ANYOF. These are in short supply, so some games + * are done to share them, as described below. For flags that are applicable + * to the synthetic start class (stc) only, with some work, they could be put + * in the next-node field, or in an unused bit of the classflags field. Once + * the planned change to compile all the above-latin1 code points is done, then + * the UNICODE_ALL bit can be freed up */ #define ANYOF_LOCALE 0x01 @@ -327,7 +332,8 @@ struct regnode_charclass_class { #define ANYOF_INVERT 0x04 -/* CLASS is never set unless LOCALE is too: has runtime \d, \w, [:posix:], ... */ +/* CLASS is never set unless LOCALE is too: has runtime \d, \w, [:posix:], ... + * The non-locale ones are resolved at compile-time */ #define ANYOF_CLASS 0x08 #define ANYOF_LARGE ANYOF_CLASS /* Same; name retained for back compat */ @@ -6295,6 +6295,10 @@ Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool bytes in p were matched. If there was no match, the value is undefined, possibly changed from the input. + Note that this can be a synthetic start class, a combination of various + nodes, so things you think might be mutually exclusive, such as locale, + aren't. It can match both locale and non-locale + */ STATIC bool |