summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pod/perldebguts.pod15
-rw-r--r--regcomp.c13
-rw-r--r--regcomp.h35
-rw-r--r--regcomp.sym8
-rw-r--r--regnodes.h8
5 files changed, 33 insertions, 46 deletions
diff --git a/pod/perldebguts.pod b/pod/perldebguts.pod
index b042910a15..a587f820c1 100644
--- a/pod/perldebguts.pod
+++ b/pod/perldebguts.pod
@@ -601,12 +601,15 @@ will be lost.
# [Special] alternatives:
REG_ANY no Match any one character (except newline).
SANY no Match any one character.
- ANYOF sv 1 Match character in (or not in) this class,
- single char match only
- ANYOFD sv 1 Like ANYOF, but /d is in effect
- ANYOFL sv 1 Like ANYOF, but /l is in effect
- ANYOFPOSIXL sv 1 Like ANYOFL, but matches [[:posix:]]
- classes
+ ANYOF sv Match character in (or not in) this class,
+ charclass single char match only
+ ANYOFD sv Like ANYOF, but /d is in effect
+ charclass
+ ANYOFL sv Like ANYOF, but /l is in effect
+ charclass
+ ANYOFPOSIXL sv Like ANYOFL, but matches [[:posix:]]
+ charclass_ classes
+ posixl
ANYOFM byte 1 Like ANYOF, but matches an invariant byte
as determined by the mask and arg
diff --git a/regcomp.c b/regcomp.c
index b0c8db6c5a..8ea84e9d72 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -15433,7 +15433,6 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
RExC_parse++;
if (nest_depth--) break;
node = reganode(pRExC_state, ANYOF, 0);
- RExC_size += ANYOF_SKIP;
nextchar(pRExC_state);
Set_Node_Length(node,
RExC_parse - oregcomp_parse + 1); /* MJD */
@@ -17947,12 +17946,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
ret = reganode(pRExC_state, op, 0);
if (SIZE_ONLY) {
- RExC_size += (op == ANYOFPOSIXL) ? ANYOF_POSIXL_SKIP : ANYOF_SKIP + 1;
return ret;
}
/****** !SIZE_ONLY (Pass 2) AFTER HERE *********/
- RExC_emit += (op == ANYOFPOSIXL) ? ANYOF_POSIXL_SKIP : ANYOF_SKIP;
ANYOF_FLAGS(ret) = anyof_flags;
if (posixl) {
@@ -19205,7 +19202,8 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
PERL_ARGS_ASSERT_REGANODE;
- assert(regarglen[op] == 1);
+ /* ANYOF are special cased to allow non-length 1 args */
+ assert(regarglen[op] == 1 || PL_regkind[op] == ANYOF);
if (PASS2) {
regnode *ptr = ret;
@@ -21494,13 +21492,6 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
else if ( op == PLUS || op == STAR) {
DUMPUNTIL(NEXTOPER(node), NEXTOPER(node) + 1);
}
- else if (PL_regkind[(U8)op] == ANYOF) {
- /* arglen 1 + class block */
- node += 1 + ((ANYOF_FLAGS(node) & ANYOF_MATCHES_POSIXL)
- ? ANYOF_POSIXL_SKIP
- : ANYOF_SKIP);
- node = NEXTOPER(node);
- }
else if (PL_regkind[(U8)op] == EXACT) {
/* Literal string, where present. */
node += NODE_SZ_STR(node) - 1;
diff --git a/regcomp.h b/regcomp.h
index 8bffb5d777..45d24c2599 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -367,7 +367,9 @@ struct regnode_ssc {
STMT_START { \
ARG_SET(ptr, arg); \
FILL_ADVANCE_NODE(ptr, op); \
- (ptr) += 1; \
+ /* This is used generically for other operations\
+ * that have a longer argument */ \
+ (ptr) += regarglen[op]; \
} STMT_END
#define FILL_ADVANCE_NODE_2L_ARG(ptr, op, arg1, arg2) \
STMT_START { \
@@ -466,22 +468,18 @@ struct regnode_ssc {
* handler function, as the macro REGINCLASS in regexec.c does now for other
* cases.
*
- * Another possibility is to instead (or additionally) rename the ANYOF_POSIXL
- * flag to be ANYOFL_LARGE, to mean that the ANYOF node has an extra 32 bits
- * beyond what a regular one does. That's what it effectively means now, with
- * the extra space all for the POSIX class flags. But those classes actually
- * only occupy 30 bits, so the ANYOFL_FOLD and
- * ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD flags could be moved
- * to that extra space. The 30 bits in the extra word would indicate if a
- * posix class should be looked up or not. The downside of this is that ANYOFL
- * nodes with folding would always have to have the extra space allocated, even
- * if they didn't use the 30 posix bits. There isn't an SSC problem as all
- * SSCs are this large anyway.
+ * Another possibility is based on the fact that ANYOF_MATCHES_POSIXL is
+ * redundant with the node type ANYOFPOSIXL. That flag could be removed, but
+ * at the expense of extra code in regexec.c. The flag has been retained
+ * because it allows us to see if we need to call reginsert, or just use the
+ * bitmap in one test.
*
- * One could completely remove ANYOFL_LARGE and make all ANYOFL nodes large.
- * REGINCLASS would have to be modified so that if the node type were this, it
- * would call reginclass(), as the flag bit that indicates to do this now would
- * be gone.
+ * If this is done, an extension would be to make all ANYOFL nodes contain the
+ * extra 32 bits that ANYOFPOSIXL ones do. The posix flags only occupy 30
+ * bits, so the ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD flags
+ * and ANYOFL_FOLD could be moved to that extra space, but it would mean extra
+ * instructions, as there are currently places in the code that assume those
+ * two bits are zero.
*
* All told, 5 bits could be available for other uses if all of the above were
* done.
@@ -707,11 +705,6 @@ struct regnode_ssc {
#define ANYOF_BITMAP_CLEARALL(p) \
Zero (ANYOF_BITMAP(p), ANYOF_BITMAP_SIZE)
-#define ANYOF_SKIP (EXTRA_SIZE(regnode_charclass) \
- - EXTRA_SIZE(struct regnode_1))
-#define ANYOF_POSIXL_SKIP (EXTRA_SIZE(regnode_charclass_posixl) \
- - EXTRA_SIZE(struct regnode_1))
-
/*
* Utility definitions.
*/
diff --git a/regcomp.sym b/regcomp.sym
index 604163bcd8..f275c7b6c8 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -56,10 +56,10 @@ NBOUNDA NBOUND, no ; Match "" betweeen any \w\w or \W\W, where \w
#* [Special] alternatives:
REG_ANY REG_ANY, no 0 S ; Match any one character (except newline).
SANY REG_ANY, no 0 S ; Match any one character.
-ANYOF ANYOF, sv 1 S ; Match character in (or not in) this class, single char match only
-ANYOFD ANYOF, sv 1 S ; Like ANYOF, but /d is in effect
-ANYOFL ANYOF, sv 1 S ; Like ANYOF, but /l is in effect
-ANYOFPOSIXL ANYOF, sv 1 S ; Like ANYOFL, but matches [[:posix:]] classes
+ANYOF ANYOF, sv charclass S ; Match character in (or not in) this class, single char match only
+ANYOFD ANYOF, sv charclass S ; Like ANYOF, but /d is in effect
+ANYOFL ANYOF, sv charclass S ; Like ANYOF, but /l is in effect
+ANYOFPOSIXL ANYOF, sv charclass_posixl S ; Like ANYOFL, but matches [[:posix:]] classes
ANYOFM ANYOFM byte 1 S ; Like ANYOF, but matches an invariant byte as determined by the mask and arg
#* POSIX Character Classes:
diff --git a/regnodes.h b/regnodes.h
index 590cba58d9..849d2ec494 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -324,10 +324,10 @@ static const U8 regarglen[] = {
0, /* NBOUNDA */
0, /* REG_ANY */
0, /* SANY */
- EXTRA_SIZE(struct regnode_1), /* ANYOF */
- EXTRA_SIZE(struct regnode_1), /* ANYOFD */
- EXTRA_SIZE(struct regnode_1), /* ANYOFL */
- EXTRA_SIZE(struct regnode_1), /* ANYOFPOSIXL */
+ EXTRA_SIZE(struct regnode_charclass), /* ANYOF */
+ EXTRA_SIZE(struct regnode_charclass), /* ANYOFD */
+ EXTRA_SIZE(struct regnode_charclass), /* ANYOFL */
+ EXTRA_SIZE(struct regnode_charclass_posixl), /* ANYOFPOSIXL */
EXTRA_SIZE(struct regnode_1), /* ANYOFM */
0, /* POSIXD */
0, /* POSIXL */