summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regcomp.c10
-rw-r--r--regcomp.h10
-rw-r--r--regexec.c4
3 files changed, 19 insertions, 5 deletions
diff --git a/regcomp.c b/regcomp.c
index 6d13681cda..0d91c5057e 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -8618,7 +8618,8 @@ parseit:
ANYOF_FLAGS(ret) |= ANYOF_CLASS;
}
- /* a bad range like a-\d, a-[:digit:] ? */
+ /* a bad range like a-\d, a-[:digit:]. The '-' is taken as a
+ * literal */
if (range) {
if (!SIZE_ONLY) {
const int w =
@@ -8821,7 +8822,9 @@ parseit:
ANYOF_FLAGS(ret) |= (FOLD || value < 256)
? ANYOF_NONBITMAP
: ANYOF_UTF8;
- if (prevnatvalue < natvalue) { /* what about > ? */
+ if (prevnatvalue < natvalue) { /* '>' case is fatal error above */
+
+ /* The \t sets the whole range */
Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
prevnatvalue, natvalue);
}
@@ -8925,7 +8928,8 @@ parseit:
ANYOF_BITMAP(ret)[value] ^= 0xFF;
stored = 256 - stored;
- /* The inversion means that everything above 255 is matched */
+ /* The inversion means that everything above 255 is matched; and at the
+ * same time we clear the invert flag */
ANYOF_FLAGS(ret) = ANYOF_UTF8|ANYOF_UNICODE_ALL;
}
diff --git a/regcomp.h b/regcomp.h
index c15d681960..6adb47a183 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -312,7 +312,12 @@ struct regnode_charclass_class {
/* Flags for node->flags of several of the node types */
#define USE_UNI 0x01
-/* Flags for node->flags of ANYOF */
+/* Flags for node->flags of ANYOF. These are in short supply, so some games
+ * are done to share them, as described below. For flags that are applicable
+ * to the synthetic start class (stc) only, with some work, they could be put
+ * in the next-node field, or in an unused bit of the classflags field. Once
+ * the planned change to compile all the above-latin1 code points is done, then
+ * the UNICODE_ALL bit can be freed up */
#define ANYOF_LOCALE 0x01
@@ -327,7 +332,8 @@ struct regnode_charclass_class {
#define ANYOF_INVERT 0x04
-/* CLASS is never set unless LOCALE is too: has runtime \d, \w, [:posix:], ... */
+/* CLASS is never set unless LOCALE is too: has runtime \d, \w, [:posix:], ...
+ * The non-locale ones are resolved at compile-time */
#define ANYOF_CLASS 0x08
#define ANYOF_LARGE ANYOF_CLASS /* Same; name retained for back compat */
diff --git a/regexec.c b/regexec.c
index b7a8330290..7b69bbc617 100644
--- a/regexec.c
+++ b/regexec.c
@@ -6295,6 +6295,10 @@ Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool
bytes in p were matched. If there was no match, the value is undefined,
possibly changed from the input.
+ Note that this can be a synthetic start class, a combination of various
+ nodes, so things you think might be mutually exclusive, such as locale,
+ aren't. It can match both locale and non-locale
+
*/
STATIC bool