diff options
author | Karl Williamson <khw@cpan.org> | 2020-02-17 11:24:19 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2020-02-19 18:18:40 -0700 |
commit | 86451f01babb7e6115c520111d32b1128dcc8f57 (patch) | |
tree | 3eb6740b2a8b4d9e8234f60a4e1c8d7e8e092d11 | |
parent | f1f5d51d61d3b95058950a4835a9cadfd8f2ad28 (diff) | |
download | perl-86451f01babb7e6115c520111d32b1128dcc8f57.tar.gz |
regcomp.sym: Add new regnode type for (?[])
This new regnode is used to handle interpolated already-compiled regex
sets inside outer regex sets.
If it isn't present, it will mean that what appears to be a nested,
interpolated set really isn't.
I created a new regnode structure to hold a pointer. This has to be
temporary as pointers can be invalidated. I thought of just having a
regnode without a pointer as a marker, and using a parallel array to
store the data, rather than creating a whole new regnode structure for
just pointers, but parallel data structures can get out of sync, so this
seemed best.
This commit just sets up the regnode; a future commit will actually use
it.
-rw-r--r-- | pod/perldebguts.pod | 3 | ||||
-rw-r--r-- | regcomp.h | 19 | ||||
-rw-r--r-- | regcomp.sym | 2 | ||||
-rw-r--r-- | regnodes.h | 13 |
4 files changed, 33 insertions, 4 deletions
diff --git a/pod/perldebguts.pod b/pod/perldebguts.pod index 5a205e0ca7..cc908393cf 100644 --- a/pod/perldebguts.pod +++ b/pod/perldebguts.pod @@ -858,6 +858,9 @@ will be lost. # mean "not seen anything to optimize yet". PSEUDO off Pseudo opcode for internal use. + REGEX_SET depth p Regex set, temporary node used in pre- + optimization compilation + =for regcomp.pl end =for unprinted-credits @@ -181,6 +181,16 @@ struct regnode_1 { U32 arg1; }; +/* Node whose argument is 'void *', a pointer to void. This needs to be used + * very carefully in situations where pointers won't become invalid because of, + * say re-mallocs */ +struct regnode_p { + U8 flags; + U8 type; + U16 next_off; + void * arg1; +}; + /* Similar to a regnode_1 but with an extra signed argument */ struct regnode_2L { U8 flags; @@ -296,11 +306,13 @@ struct regnode_ssc { #undef ARG2 #define ARG(p) ARG_VALUE(ARG_LOC(p)) +#define ARGp(p) ARG_VALUE(ARGp_LOC(p)) #define ARG1(p) ARG_VALUE(ARG1_LOC(p)) #define ARG2(p) ARG_VALUE(ARG2_LOC(p)) #define ARG2L(p) ARG_VALUE(ARG2L_LOC(p)) #define ARG_SET(p, val) ARG__SET(ARG_LOC(p), (val)) +#define ARGp_SET(p, val) ARG__SET(ARGp_LOC(p), (val)) #define ARG1_SET(p, val) ARG__SET(ARG1_LOC(p), (val)) #define ARG2_SET(p, val) ARG__SET(ARG2_LOC(p), (val)) #define ARG2L_SET(p, val) ARG__SET(ARG2L_LOC(p), (val)) @@ -388,6 +400,7 @@ struct regnode_ssc { #define NODE_ALIGN(node) #define ARG_LOC(p) (((struct regnode_1 *)p)->arg1) +#define ARGp_LOC(p) (((struct regnode_p *)p)->arg1) #define ARG1_LOC(p) (((struct regnode_2 *)p)->arg1) #define ARG2_LOC(p) (((struct regnode_2 *)p)->arg2) #define ARG2L_LOC(p) (((struct regnode_2L *)p)->arg2) @@ -416,6 +429,12 @@ struct regnode_ssc { * that have a longer argument */ \ (offset) += regarglen[op]; \ } STMT_END +#define FILL_ADVANCE_NODE_ARGp(offset, op, arg) \ + STMT_START { \ + ARGp_SET(REGNODE_p(offset), arg); \ + FILL_ADVANCE_NODE(offset, op); \ + (offset) += regarglen[op]; \ + } STMT_END #define FILL_ADVANCE_NODE_2L_ARG(offset, op, arg1, arg2) \ STMT_START { \ ARG_SET(REGNODE_p(offset), arg1); \ diff --git a/regcomp.sym b/regcomp.sym index 4b8670fa2c..7c0bf7a484 100644 --- a/regcomp.sym +++ b/regcomp.sym @@ -297,6 +297,8 @@ OPTIMIZED NOTHING, off ; Placeholder for dump. #* mean "not seen anything to optimize yet". PSEUDO PSEUDO, off ; Pseudo opcode for internal use. +REGEX_SET REGEX_SET, depth p S ; Regex set, temporary node used in pre-optimization compilation + ------------------------------------------------------------------------------- # Format for second section: # REGOP \t typelist [ \t typelist] diff --git a/regnodes.h b/regnodes.h index cce9d7f1fc..46e090f39c 100644 --- a/regnodes.h +++ b/regnodes.h @@ -6,8 +6,8 @@ /* Regops and State definitions */ -#define REGNODE_MAX 108 -#define REGMATCH_STATE_MAX 148 +#define REGNODE_MAX 109 +#define REGMATCH_STATE_MAX 149 #define END 0 /* 0000 End of program. */ #define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */ @@ -120,6 +120,7 @@ #define LNBREAK 106 /* 0x6a generic newline pattern */ #define OPTIMIZED 107 /* 0x6b Placeholder for dump. */ #define PSEUDO 108 /* 0x6c Pseudo opcode for internal use. */ +#define REGEX_SET 109 /* 0x6d Regex set, temporary node used in pre-optimization compilation */ /* ------------ States ------------- */ #define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */ #define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */ @@ -277,6 +278,7 @@ EXTCONST U8 PL_regkind[] = { LNBREAK, /* LNBREAK */ NOTHING, /* OPTIMIZED */ PSEUDO, /* PSEUDO */ + REGEX_SET, /* REGEX_SET */ /* ------------ States ------------- */ TRIE, /* TRIE_next */ TRIE, /* TRIE_next_fail */ @@ -435,6 +437,7 @@ static const U8 regarglen[] = { 0, /* LNBREAK */ 0, /* OPTIMIZED */ 0, /* PSEUDO */ + EXTRA_SIZE(struct regnode_p), /* REGEX_SET */ }; /* reg_off_by_arg[] - Which argument holds the offset to the next node */ @@ -549,6 +552,7 @@ static const char reg_off_by_arg[] = { 0, /* LNBREAK */ 0, /* OPTIMIZED */ 0, /* PSEUDO */ + 0, /* REGEX_SET */ }; #endif /* REG_COMP_C */ @@ -669,6 +673,7 @@ EXTCONST char * const PL_reg_name[] = { "LNBREAK", /* 0x6a */ "OPTIMIZED", /* 0x6b */ "PSEUDO", /* 0x6c */ + "REGEX_SET", /* 0x6d */ /* ------------ States ------------- */ "TRIE_next", /* REGNODE_MAX +0x01 */ "TRIE_next_fail", /* REGNODE_MAX +0x02 */ @@ -817,7 +822,7 @@ EXTCONST U8 PL_simple[] __attribute__deprecated__; EXTCONST U8 PL_simple[] __attribute__deprecated__ = { REG_ANY, SANY, ANYOF, ANYOFD, ANYOFL, ANYOFPOSIXL, ANYOFH, ANYOFHb, ANYOFHr, ANYOFHs, ANYOFR, ANYOFRb, ANYOFM, NANYOFM, POSIXD, POSIXL, - POSIXU, POSIXA, NPOSIXD, NPOSIXL, NPOSIXU, NPOSIXA, + POSIXU, POSIXA, NPOSIXD, NPOSIXL, NPOSIXU, NPOSIXA, REGEX_SET, 0 }; #endif /* DOINIT */ @@ -826,7 +831,7 @@ EXTCONST U8 PL_simple[] __attribute__deprecated__ = { EXTCONST U8 PL_simple_bitmask[]; #else EXTCONST U8 PL_simple_bitmask[] = { - 0x00, 0x00, 0xFF, 0xFF, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + 0x00, 0x00, 0xFF, 0xFF, 0x3F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20 }; #endif /* DOINIT */ |