summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2020-02-17 11:24:19 -0700
committerKarl Williamson <khw@cpan.org>2020-02-19 18:18:40 -0700
commit86451f01babb7e6115c520111d32b1128dcc8f57 (patch)
tree3eb6740b2a8b4d9e8234f60a4e1c8d7e8e092d11
parentf1f5d51d61d3b95058950a4835a9cadfd8f2ad28 (diff)
downloadperl-86451f01babb7e6115c520111d32b1128dcc8f57.tar.gz
regcomp.sym: Add new regnode type for (?[])
This new regnode is used to handle interpolated already-compiled regex sets inside outer regex sets. If it isn't present, it will mean that what appears to be a nested, interpolated set really isn't. I created a new regnode structure to hold a pointer. This has to be temporary as pointers can be invalidated. I thought of just having a regnode without a pointer as a marker, and using a parallel array to store the data, rather than creating a whole new regnode structure for just pointers, but parallel data structures can get out of sync, so this seemed best. This commit just sets up the regnode; a future commit will actually use it.
-rw-r--r--pod/perldebguts.pod3
-rw-r--r--regcomp.h19
-rw-r--r--regcomp.sym2
-rw-r--r--regnodes.h13
4 files changed, 33 insertions, 4 deletions
diff --git a/pod/perldebguts.pod b/pod/perldebguts.pod
index 5a205e0ca7..cc908393cf 100644
--- a/pod/perldebguts.pod
+++ b/pod/perldebguts.pod
@@ -858,6 +858,9 @@ will be lost.
# mean "not seen anything to optimize yet".
PSEUDO off Pseudo opcode for internal use.
+ REGEX_SET depth p Regex set, temporary node used in pre-
+ optimization compilation
+
=for regcomp.pl end
=for unprinted-credits
diff --git a/regcomp.h b/regcomp.h
index 0104482d4b..67a4323ca5 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -181,6 +181,16 @@ struct regnode_1 {
U32 arg1;
};
+/* Node whose argument is 'void *', a pointer to void. This needs to be used
+ * very carefully in situations where pointers won't become invalid because of,
+ * say re-mallocs */
+struct regnode_p {
+ U8 flags;
+ U8 type;
+ U16 next_off;
+ void * arg1;
+};
+
/* Similar to a regnode_1 but with an extra signed argument */
struct regnode_2L {
U8 flags;
@@ -296,11 +306,13 @@ struct regnode_ssc {
#undef ARG2
#define ARG(p) ARG_VALUE(ARG_LOC(p))
+#define ARGp(p) ARG_VALUE(ARGp_LOC(p))
#define ARG1(p) ARG_VALUE(ARG1_LOC(p))
#define ARG2(p) ARG_VALUE(ARG2_LOC(p))
#define ARG2L(p) ARG_VALUE(ARG2L_LOC(p))
#define ARG_SET(p, val) ARG__SET(ARG_LOC(p), (val))
+#define ARGp_SET(p, val) ARG__SET(ARGp_LOC(p), (val))
#define ARG1_SET(p, val) ARG__SET(ARG1_LOC(p), (val))
#define ARG2_SET(p, val) ARG__SET(ARG2_LOC(p), (val))
#define ARG2L_SET(p, val) ARG__SET(ARG2L_LOC(p), (val))
@@ -388,6 +400,7 @@ struct regnode_ssc {
#define NODE_ALIGN(node)
#define ARG_LOC(p) (((struct regnode_1 *)p)->arg1)
+#define ARGp_LOC(p) (((struct regnode_p *)p)->arg1)
#define ARG1_LOC(p) (((struct regnode_2 *)p)->arg1)
#define ARG2_LOC(p) (((struct regnode_2 *)p)->arg2)
#define ARG2L_LOC(p) (((struct regnode_2L *)p)->arg2)
@@ -416,6 +429,12 @@ struct regnode_ssc {
* that have a longer argument */ \
(offset) += regarglen[op]; \
} STMT_END
+#define FILL_ADVANCE_NODE_ARGp(offset, op, arg) \
+ STMT_START { \
+ ARGp_SET(REGNODE_p(offset), arg); \
+ FILL_ADVANCE_NODE(offset, op); \
+ (offset) += regarglen[op]; \
+ } STMT_END
#define FILL_ADVANCE_NODE_2L_ARG(offset, op, arg1, arg2) \
STMT_START { \
ARG_SET(REGNODE_p(offset), arg1); \
diff --git a/regcomp.sym b/regcomp.sym
index 4b8670fa2c..7c0bf7a484 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -297,6 +297,8 @@ OPTIMIZED NOTHING, off ; Placeholder for dump.
#* mean "not seen anything to optimize yet".
PSEUDO PSEUDO, off ; Pseudo opcode for internal use.
+REGEX_SET REGEX_SET, depth p S ; Regex set, temporary node used in pre-optimization compilation
+
-------------------------------------------------------------------------------
# Format for second section:
# REGOP \t typelist [ \t typelist]
diff --git a/regnodes.h b/regnodes.h
index cce9d7f1fc..46e090f39c 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -6,8 +6,8 @@
/* Regops and State definitions */
-#define REGNODE_MAX 108
-#define REGMATCH_STATE_MAX 148
+#define REGNODE_MAX 109
+#define REGMATCH_STATE_MAX 149
#define END 0 /* 0000 End of program. */
#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
@@ -120,6 +120,7 @@
#define LNBREAK 106 /* 0x6a generic newline pattern */
#define OPTIMIZED 107 /* 0x6b Placeholder for dump. */
#define PSEUDO 108 /* 0x6c Pseudo opcode for internal use. */
+#define REGEX_SET 109 /* 0x6d Regex set, temporary node used in pre-optimization compilation */
/* ------------ States ------------- */
#define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */
#define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */
@@ -277,6 +278,7 @@ EXTCONST U8 PL_regkind[] = {
LNBREAK, /* LNBREAK */
NOTHING, /* OPTIMIZED */
PSEUDO, /* PSEUDO */
+ REGEX_SET, /* REGEX_SET */
/* ------------ States ------------- */
TRIE, /* TRIE_next */
TRIE, /* TRIE_next_fail */
@@ -435,6 +437,7 @@ static const U8 regarglen[] = {
0, /* LNBREAK */
0, /* OPTIMIZED */
0, /* PSEUDO */
+ EXTRA_SIZE(struct regnode_p), /* REGEX_SET */
};
/* reg_off_by_arg[] - Which argument holds the offset to the next node */
@@ -549,6 +552,7 @@ static const char reg_off_by_arg[] = {
0, /* LNBREAK */
0, /* OPTIMIZED */
0, /* PSEUDO */
+ 0, /* REGEX_SET */
};
#endif /* REG_COMP_C */
@@ -669,6 +673,7 @@ EXTCONST char * const PL_reg_name[] = {
"LNBREAK", /* 0x6a */
"OPTIMIZED", /* 0x6b */
"PSEUDO", /* 0x6c */
+ "REGEX_SET", /* 0x6d */
/* ------------ States ------------- */
"TRIE_next", /* REGNODE_MAX +0x01 */
"TRIE_next_fail", /* REGNODE_MAX +0x02 */
@@ -817,7 +822,7 @@ EXTCONST U8 PL_simple[] __attribute__deprecated__;
EXTCONST U8 PL_simple[] __attribute__deprecated__ = {
REG_ANY, SANY, ANYOF, ANYOFD, ANYOFL, ANYOFPOSIXL, ANYOFH, ANYOFHb,
ANYOFHr, ANYOFHs, ANYOFR, ANYOFRb, ANYOFM, NANYOFM, POSIXD, POSIXL,
- POSIXU, POSIXA, NPOSIXD, NPOSIXL, NPOSIXU, NPOSIXA,
+ POSIXU, POSIXA, NPOSIXD, NPOSIXL, NPOSIXU, NPOSIXA, REGEX_SET,
0
};
#endif /* DOINIT */
@@ -826,7 +831,7 @@ EXTCONST U8 PL_simple[] __attribute__deprecated__ = {
EXTCONST U8 PL_simple_bitmask[];
#else
EXTCONST U8 PL_simple_bitmask[] = {
- 0x00, 0x00, 0xFF, 0xFF, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ 0x00, 0x00, 0xFF, 0xFF, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20
};
#endif /* DOINIT */