diff options
-rw-r--r-- | pod/perldebguts.pod | 15 | ||||
-rw-r--r-- | regcomp.c | 13 | ||||
-rw-r--r-- | regcomp.h | 35 | ||||
-rw-r--r-- | regcomp.sym | 8 | ||||
-rw-r--r-- | regnodes.h | 8 |
5 files changed, 33 insertions, 46 deletions
diff --git a/pod/perldebguts.pod b/pod/perldebguts.pod index b042910a15..a587f820c1 100644 --- a/pod/perldebguts.pod +++ b/pod/perldebguts.pod @@ -601,12 +601,15 @@ will be lost. # [Special] alternatives: REG_ANY no Match any one character (except newline). SANY no Match any one character. - ANYOF sv 1 Match character in (or not in) this class, - single char match only - ANYOFD sv 1 Like ANYOF, but /d is in effect - ANYOFL sv 1 Like ANYOF, but /l is in effect - ANYOFPOSIXL sv 1 Like ANYOFL, but matches [[:posix:]] - classes + ANYOF sv Match character in (or not in) this class, + charclass single char match only + ANYOFD sv Like ANYOF, but /d is in effect + charclass + ANYOFL sv Like ANYOF, but /l is in effect + charclass + ANYOFPOSIXL sv Like ANYOFL, but matches [[:posix:]] + charclass_ classes + posixl ANYOFM byte 1 Like ANYOF, but matches an invariant byte as determined by the mask and arg @@ -15433,7 +15433,6 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, RExC_parse++; if (nest_depth--) break; node = reganode(pRExC_state, ANYOF, 0); - RExC_size += ANYOF_SKIP; nextchar(pRExC_state); Set_Node_Length(node, RExC_parse - oregcomp_parse + 1); /* MJD */ @@ -17947,12 +17946,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, ret = reganode(pRExC_state, op, 0); if (SIZE_ONLY) { - RExC_size += (op == ANYOFPOSIXL) ? ANYOF_POSIXL_SKIP : ANYOF_SKIP + 1; return ret; } /****** !SIZE_ONLY (Pass 2) AFTER HERE *********/ - RExC_emit += (op == ANYOFPOSIXL) ? ANYOF_POSIXL_SKIP : ANYOF_SKIP; ANYOF_FLAGS(ret) = anyof_flags; if (posixl) { @@ -19205,7 +19202,8 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg) PERL_ARGS_ASSERT_REGANODE; - assert(regarglen[op] == 1); + /* ANYOF are special cased to allow non-length 1 args */ + assert(regarglen[op] == 1 || PL_regkind[op] == ANYOF); if (PASS2) { regnode *ptr = ret; @@ -21494,13 +21492,6 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, else if ( op == PLUS || op == STAR) { DUMPUNTIL(NEXTOPER(node), NEXTOPER(node) + 1); } - else if (PL_regkind[(U8)op] == ANYOF) { - /* arglen 1 + class block */ - node += 1 + ((ANYOF_FLAGS(node) & ANYOF_MATCHES_POSIXL) - ? ANYOF_POSIXL_SKIP - : ANYOF_SKIP); - node = NEXTOPER(node); - } else if (PL_regkind[(U8)op] == EXACT) { /* Literal string, where present. */ node += NODE_SZ_STR(node) - 1; @@ -367,7 +367,9 @@ struct regnode_ssc { STMT_START { \ ARG_SET(ptr, arg); \ FILL_ADVANCE_NODE(ptr, op); \ - (ptr) += 1; \ + /* This is used generically for other operations\ + * that have a longer argument */ \ + (ptr) += regarglen[op]; \ } STMT_END #define FILL_ADVANCE_NODE_2L_ARG(ptr, op, arg1, arg2) \ STMT_START { \ @@ -466,22 +468,18 @@ struct regnode_ssc { * handler function, as the macro REGINCLASS in regexec.c does now for other * cases. * - * Another possibility is to instead (or additionally) rename the ANYOF_POSIXL - * flag to be ANYOFL_LARGE, to mean that the ANYOF node has an extra 32 bits - * beyond what a regular one does. That's what it effectively means now, with - * the extra space all for the POSIX class flags. But those classes actually - * only occupy 30 bits, so the ANYOFL_FOLD and - * ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD flags could be moved - * to that extra space. The 30 bits in the extra word would indicate if a - * posix class should be looked up or not. The downside of this is that ANYOFL - * nodes with folding would always have to have the extra space allocated, even - * if they didn't use the 30 posix bits. There isn't an SSC problem as all - * SSCs are this large anyway. + * Another possibility is based on the fact that ANYOF_MATCHES_POSIXL is + * redundant with the node type ANYOFPOSIXL. That flag could be removed, but + * at the expense of extra code in regexec.c. The flag has been retained + * because it allows us to see if we need to call reginsert, or just use the + * bitmap in one test. * - * One could completely remove ANYOFL_LARGE and make all ANYOFL nodes large. - * REGINCLASS would have to be modified so that if the node type were this, it - * would call reginclass(), as the flag bit that indicates to do this now would - * be gone. + * If this is done, an extension would be to make all ANYOFL nodes contain the + * extra 32 bits that ANYOFPOSIXL ones do. The posix flags only occupy 30 + * bits, so the ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD flags + * and ANYOFL_FOLD could be moved to that extra space, but it would mean extra + * instructions, as there are currently places in the code that assume those + * two bits are zero. * * All told, 5 bits could be available for other uses if all of the above were * done. @@ -707,11 +705,6 @@ struct regnode_ssc { #define ANYOF_BITMAP_CLEARALL(p) \ Zero (ANYOF_BITMAP(p), ANYOF_BITMAP_SIZE) -#define ANYOF_SKIP (EXTRA_SIZE(regnode_charclass) \ - - EXTRA_SIZE(struct regnode_1)) -#define ANYOF_POSIXL_SKIP (EXTRA_SIZE(regnode_charclass_posixl) \ - - EXTRA_SIZE(struct regnode_1)) - /* * Utility definitions. */ diff --git a/regcomp.sym b/regcomp.sym index 604163bcd8..f275c7b6c8 100644 --- a/regcomp.sym +++ b/regcomp.sym @@ -56,10 +56,10 @@ NBOUNDA NBOUND, no ; Match "" betweeen any \w\w or \W\W, where \w #* [Special] alternatives: REG_ANY REG_ANY, no 0 S ; Match any one character (except newline). SANY REG_ANY, no 0 S ; Match any one character. -ANYOF ANYOF, sv 1 S ; Match character in (or not in) this class, single char match only -ANYOFD ANYOF, sv 1 S ; Like ANYOF, but /d is in effect -ANYOFL ANYOF, sv 1 S ; Like ANYOF, but /l is in effect -ANYOFPOSIXL ANYOF, sv 1 S ; Like ANYOFL, but matches [[:posix:]] classes +ANYOF ANYOF, sv charclass S ; Match character in (or not in) this class, single char match only +ANYOFD ANYOF, sv charclass S ; Like ANYOF, but /d is in effect +ANYOFL ANYOF, sv charclass S ; Like ANYOF, but /l is in effect +ANYOFPOSIXL ANYOF, sv charclass_posixl S ; Like ANYOFL, but matches [[:posix:]] classes ANYOFM ANYOFM byte 1 S ; Like ANYOF, but matches an invariant byte as determined by the mask and arg #* POSIX Character Classes: diff --git a/regnodes.h b/regnodes.h index 590cba58d9..849d2ec494 100644 --- a/regnodes.h +++ b/regnodes.h @@ -324,10 +324,10 @@ static const U8 regarglen[] = { 0, /* NBOUNDA */ 0, /* REG_ANY */ 0, /* SANY */ - EXTRA_SIZE(struct regnode_1), /* ANYOF */ - EXTRA_SIZE(struct regnode_1), /* ANYOFD */ - EXTRA_SIZE(struct regnode_1), /* ANYOFL */ - EXTRA_SIZE(struct regnode_1), /* ANYOFPOSIXL */ + EXTRA_SIZE(struct regnode_charclass), /* ANYOF */ + EXTRA_SIZE(struct regnode_charclass), /* ANYOFD */ + EXTRA_SIZE(struct regnode_charclass), /* ANYOFL */ + EXTRA_SIZE(struct regnode_charclass_posixl), /* ANYOFPOSIXL */ EXTRA_SIZE(struct regnode_1), /* ANYOFM */ 0, /* POSIXD */ 0, /* POSIXL */ |