diff options
-rw-r--r-- | regcomp.c | 14 | ||||
-rw-r--r-- | regcomp.h | 232 | ||||
-rw-r--r-- | regcomp_debug.c | 22 | ||||
-rw-r--r-- | regcomp_study.c | 40 | ||||
-rw-r--r-- | regcomp_trie.c | 2 | ||||
-rw-r--r-- | regexec.c | 70 | ||||
-rw-r--r-- | regexp.h | 19 |
7 files changed, 176 insertions, 223 deletions
@@ -1837,7 +1837,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, /* An OR of *one* alternative - should not happen now. */ (OP(first) == BRANCH && OP(first_next) != BRANCH) || /* for now we can't handle lookbehind IFMATCH*/ - (OP(first) == IFMATCH && !first->flags && (sawlookahead = 1)) || + (OP(first) == IFMATCH && !FLAGS(first) && (sawlookahead = 1)) || (OP(first) == PLUS) || (OP(first) == MINMOD) || /* An {n,m} with n>0 */ @@ -2220,7 +2220,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, */ if (REGNODE_TYPE(fop) == NOTHING && nop == END) RExC_rx->extflags |= RXf_NULL; - else if ((fop == MBOL || (fop == SBOL && !first->flags)) && nop == END) + else if ((fop == MBOL || (fop == SBOL && !FLAGS(first))) && nop == END) /* when fop is SBOL first->flags will be true only when it was * produced by parsing /\A/, and not when parsing /^/. This is * very important for the split code as there we want to @@ -2766,7 +2766,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state, : REFFN), num, RExC_nestroot); if (RExC_nestroot && num >= (U32)RExC_nestroot) - REGNODE_p(ret)->flags = VOLATILE_REF; + FLAGS(REGNODE_p(ret)) = VOLATILE_REF; *flagp |= HASWIDTH; nextchar(pRExC_state); @@ -6045,7 +6045,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) : REFF), num, RExC_nestroot); if (RExC_nestroot && num >= RExC_nestroot) - REGNODE_p(ret)->flags = VOLATILE_REF; + FLAGS(REGNODE_p(ret)) = VOLATILE_REF; if (OP(REGNODE_p(ret)) == REFF) { RExC_seen_d_op = TRUE; } @@ -12024,7 +12024,7 @@ S_optimize_regclass(pTHX_ op = ANYOFHbbm; *ret = REGNODE_GUTS(pRExC_state, op, REGNODE_ARG_LEN(op)); FILL_NODE(*ret, op); - ((struct regnode_bbm *) REGNODE_p(*ret))->first_byte = low_utf8[0], + FIRST_BYTE((struct regnode_bbm *) REGNODE_p(*ret)) = low_utf8[0], /* The 64 bit (or 32 on EBCCDIC) map can be looked up * directly based on the continuation byte, without @@ -12050,7 +12050,7 @@ S_optimize_regclass(pTHX_ *ret = REGNODE_GUTS(pRExC_state, op, REGNODE_ARG_LEN(op) + STR_SZ(len)); FILL_NODE(*ret, op); - ((struct regnode_anyofhs *) REGNODE_p(*ret))->str_len + STR_LEN_U8((struct regnode_anyofhs *) REGNODE_p(*ret)) = len; Copy(low_utf8, /* Add the common bytes */ ((struct regnode_anyofhs *) REGNODE_p(*ret))->string, @@ -13044,7 +13044,7 @@ Perl_get_ANYOFHbbm_contents(pTHX_ const regnode * n) { &cp_list, /* The base cp is from the start byte plus a zero continuation */ - TWO_BYTE_UTF8_TO_NATIVE(((struct regnode_bbm *) n)->first_byte, + TWO_BYTE_UTF8_TO_NATIVE(FIRST_BYTE((struct regnode_bbm *) n), UTF_CONTINUATION_MARK | 0)); return cp_list; } @@ -178,16 +178,36 @@ typedef struct regexp_internal { * change things without care. If you look at regexp.h you will see it * contains this: * + * union regnode_head { + * struct { + * union { + * U8 flags; + * U8 str_len_u8; + * U8 first_byte; + * } u_8; + * U8 type; + * U16 next_off; + * } data; + * U32 data_u32; + * }; + * * struct regnode { - * U8 flags; - * U8 type; - * U16 next_off; + * union regnode_head head; * }; * - * This structure is the base unit of elements in the regexp program. When - * we increment our way through the program we increment by the size of this - * structure, and in all cases where regnode sizing is considered it is in - * units of this structure. + * Which really is a complicated and alignment friendly version of + * + * struct { + * U8 flags; + * U8 type; + * U16 next_off; + * }; + * + * This structure is the base unit of elements in the regexp program. + * When we increment our way through the program we increment by the + * size of this structure (32 bits), and in all cases where regnode + * sizing is considered it is in units of this structure. All regnodes + * have a union regnode_head as their first parameter. * * This implies that no regnode style structure should contain 64 bit * aligned members. Since the base regnode is 32 bits any member might @@ -210,52 +230,40 @@ typedef struct regexp_internal { * we already have support for in the data array. */ +union regnode_arg { + I32 i32; + U32 u32; + struct { + U16 u16a; + U16 u16b; + } hi_lo; +}; + + struct regnode_string { - U8 str_len_u8; - U8 type; - U16 next_off; + union regnode_head head; char string[1]; }; struct regnode_lstring { /* Constructed this way to keep the string aligned. */ - U8 flags; - U8 type; - U16 next_off; + union regnode_head head; U32 str_len_u32; /* Only 18 bits allowed before would overflow 'next_off' */ char string[1]; }; struct regnode_anyofhs { /* Constructed this way to keep the string aligned. */ - U8 str_len; - U8 type; - U16 next_off; - union { - U32 arg1u; - I32 arg1i; - struct { - U16 arg1a; - U16 arg1b; - } hi_lo; - } arg1; + union regnode_head head; + union regnode_arg arg1; char string[1]; }; -/* Argument bearing node - workhorse, arg1u is often for the data field - * Can store either a signed value via ARG1i() or unsigned 32 bit value +/* Argument bearing node - workhorse, ARG1u() is often used for the data field + * Can store either a signed 32 bit value via ARG1i() or unsigned 32 bit value * via ARG1u(), or two unsigned 16 bit values via ARG1a() or ARG1b() */ struct regnode_1 { - U8 flags; - U8 type; - U16 next_off; - union { - U32 arg1u; - I32 arg1i; - struct { - U16 arg1a; - U16 arg1b; - } hi_lo; - } arg1; + union regnode_head head; + union regnode_arg arg1; }; /* Node whose argument is 'SV *'. This needs to be used very carefully in @@ -274,9 +282,7 @@ struct regnode_1 { * then use inline functions to copy the data in or out. * */ struct regnode_p { - U8 flags; - U8 type; - U16 next_off; + union regnode_head head; char arg1_sv_ptr_bytes[sizeof(SV *)]; }; @@ -285,25 +291,9 @@ struct regnode_p { * Extra field can be accessed as (U32)ARG2u() (I32)ARG2i() or (U16)ARG2a() * and (U16)ARG2b() */ struct regnode_2 { - U8 flags; - U8 type; - U16 next_off; - union { - U32 arg1u; - I32 arg1i; - struct { - U16 arg1a; - U16 arg1b; - } hi_lo; - } arg1; - union { - U32 arg2u; - I32 arg2i; - struct { - U16 arg2a; - U16 arg2b; - } hi_lo; - } arg2; + union regnode_head head; + union regnode_arg arg1; + union regnode_arg arg2; }; /* "Three Node" - similar to a regnode_2 but with space for an additional @@ -315,33 +305,10 @@ struct regnode_2 { * ARG3a() and ARG3b() which are used to store information about the number of * parens before and inside the quantified expression. */ struct regnode_3 { - U8 flags; - U8 type; - U16 next_off; - union { - I32 arg1i; - U32 arg1u; - struct { - U16 arg1a; - U16 arg1b; - } hi_lo; - } arg1; - union { - I32 arg2i; - U32 arg2u; - struct { - U16 arg2a; - U16 arg2b; - } hi_lo; - } arg2; - union { - struct { - U16 arg3a; - U16 arg3b; - } hi_lo; - I32 arg3i; - U32 arg3u; - } arg3; + union regnode_head head; + union regnode_arg arg1; + union regnode_arg arg2; + union regnode_arg arg3; }; #define REGNODE_BBM_BITMAP_LEN \ @@ -352,9 +319,7 @@ struct regnode_3 { * The array is a bitmap capable of representing any possible continuation * byte. */ struct regnode_bbm { - U8 first_byte; - U8 type; - U16 next_off; + union regnode_head head; U8 bitmap[REGNODE_BBM_BITMAP_LEN]; }; @@ -370,36 +335,18 @@ struct regnode_bbm { * the code that inserts and deletes regnodes. The basic single-argument * regnode has a U32, which is what reganode() allocates as a unit. Therefore * no field can require stricter alignment than U32. */ - + /* also used by trie */ struct regnode_charclass { - U8 flags; - U8 type; - U16 next_off; - union { - I32 arg1i; - U32 arg1u; - struct { - U16 arg1a; - U16 arg1b; - } hi_lo; - } arg1; + union regnode_head head; + union regnode_arg arg1; char bitmap[ANYOF_BITMAP_SIZE]; /* only compile-time */ }; /* has runtime (locale) \d, \w, ..., [:posix:] classes */ struct regnode_charclass_posixl { - U8 flags; /* ANYOF_MATCHES_POSIXL bit must go here */ - U8 type; - U16 next_off; - union { - I32 arg1i; - U32 arg1u; - struct { - U16 arg1a; - U16 arg1b; - } hi_lo; - } arg1; + union regnode_head head; + union regnode_arg arg1; char bitmap[ANYOF_BITMAP_SIZE]; /* both compile-time ... */ U32 classflags; /* and run-time */ }; @@ -418,17 +365,8 @@ struct regnode_charclass_posixl { * never a next node. */ struct regnode_ssc { - U8 flags; /* ANYOF_MATCHES_POSIXL bit must go here */ - U8 type; - U16 next_off; - union { - I32 arg1i; - U32 arg1u; - struct { - U16 arg1a; - U16 arg1b; - } hi_lo; - } arg1; + union regnode_head head; + union regnode_arg arg1; char bitmap[ANYOF_BITMAP_SIZE]; /* both compile-time ... */ U32 classflags; /* ... and run-time */ @@ -524,11 +462,6 @@ struct regnode_ssc { #define ARGp_SET(p, val) ARGp_SET_inline((p),(val)) -#undef NEXT_OFF -#undef NODE_ALIGN - -#define NEXT_OFF(p) ((p)->next_off) -#define NODE_ALIGN(node) /* the following define was set to 0xde in 075abff3 * as part of some linting logic. I have set it to 0 * as otherwise in every place where we /might/ set flags @@ -538,26 +471,33 @@ struct regnode_ssc { * is changed from 0 then at the very least make sure * that SBOL for /^/ sets the flags to 0 explicitly. * -- Yves */ -#define NODE_ALIGN_FILL(node) ((node)->flags = 0) +#define NODE_ALIGN(node) #define SIZE_ALIGN NODE_ALIGN #undef OP #undef OPERAND #undef STRING +#undef NEXT_OFF +#undef NODE_ALIGN -#define OP(p) ((p)->type) -#define FLAGS(p) ((p)->flags) /* Caution: Doesn't apply to all \ +#define NEXT_OFF(p) ((p)->head.data.next_off) +#define OP(p) ((p)->head.data.type) +#define STR_LEN_U8(p) ((p)->head.data.u_8.str_len_u8) +#define FIRST_BYTE(p) ((p)->head.data.u_8.first_byte) +#define FLAGS(p) ((p)->head.data.u_8.flags) /* Caution: Doesn't apply to all \ regnode types. For some, it's the \ character set of the regnode */ #define STR_LENs(p) (__ASSERT_(OP(p) != LEXACT && OP(p) != LEXACT_REQ8) \ - ((struct regnode_string *)p)->str_len_u8) + STR_LEN_U8((struct regnode_string *)p)) #define STRINGs(p) (__ASSERT_(OP(p) != LEXACT && OP(p) != LEXACT_REQ8) \ ((struct regnode_string *)p)->string) #define OPERANDs(p) STRINGs(p) #define PARNO(p) ARG1u(p) /* APPLIES for OPEN and CLOSE only */ +#define NODE_ALIGN_FILL(node) (FLAGS(node) = 0) + /* Long strings. Currently limited to length 18 bits, which handles a 262000 * byte string. The limiting factor is the 16 bit 'next_off' field, which * points to the next regnode, so the furthest away it can be is 2**16. On @@ -591,7 +531,7 @@ struct regnode_ssc { if (OP(p) == LEXACT || OP(p) == LEXACT_REQ8) \ ((struct regnode_lstring *)(p))->str_len_u32 = (v); \ else \ - ((struct regnode_string *)(p))->str_len_u8 = (v); \ + STR_LEN_U8((struct regnode_string *)(p)) = (v); \ } STMT_END #define ANYOFR_BASE_BITS 20 @@ -603,18 +543,18 @@ struct regnode_ssc { #define NODE_ALIGN(node) #define ARGp_BYTES_LOC(p) (((struct regnode_p *)p)->arg1_sv_ptr_bytes) -#define ARG1u_LOC(p) (((struct regnode_1 *)p)->arg1.arg1u) -#define ARG1i_LOC(p) (((struct regnode_1 *)p)->arg1.arg1i) -#define ARG1a_LOC(p) (((struct regnode_1 *)p)->arg1.hi_lo.arg1a) -#define ARG1b_LOC(p) (((struct regnode_1 *)p)->arg1.hi_lo.arg1b) -#define ARG2u_LOC(p) (((struct regnode_2 *)p)->arg2.arg2u) -#define ARG2i_LOC(p) (((struct regnode_2 *)p)->arg2.arg2i) -#define ARG2a_LOC(p) (((struct regnode_2 *)p)->arg2.hi_lo.arg2a) -#define ARG2b_LOC(p) (((struct regnode_2 *)p)->arg2.hi_lo.arg2b) -#define ARG3u_LOC(p) (((struct regnode_3 *)p)->arg3.arg3u) -#define ARG3i_LOC(p) (((struct regnode_3 *)p)->arg3.arg3i) -#define ARG3a_LOC(p) (((struct regnode_3 *)p)->arg3.hi_lo.arg3a) -#define ARG3b_LOC(p) (((struct regnode_3 *)p)->arg3.hi_lo.arg3b) +#define ARG1u_LOC(p) (((struct regnode_1 *)p)->arg1.u32) +#define ARG1i_LOC(p) (((struct regnode_1 *)p)->arg1.i32) +#define ARG1a_LOC(p) (((struct regnode_1 *)p)->arg1.hi_lo.u16a) +#define ARG1b_LOC(p) (((struct regnode_1 *)p)->arg1.hi_lo.u16b) +#define ARG2u_LOC(p) (((struct regnode_2 *)p)->arg2.u32) +#define ARG2i_LOC(p) (((struct regnode_2 *)p)->arg2.i32) +#define ARG2a_LOC(p) (((struct regnode_2 *)p)->arg2.hi_lo.u16a) +#define ARG2b_LOC(p) (((struct regnode_2 *)p)->arg2.hi_lo.u16b) +#define ARG3u_LOC(p) (((struct regnode_3 *)p)->arg3.u32) +#define ARG3i_LOC(p) (((struct regnode_3 *)p)->arg3.i32) +#define ARG3a_LOC(p) (((struct regnode_3 *)p)->arg3.hi_lo.u16a) +#define ARG3b_LOC(p) (((struct regnode_3 *)p)->arg3.hi_lo.u16b) /* These should no longer be used directly in most cases. Please use * the REGNODE_AFTER() macros instead. */ @@ -1065,7 +1005,7 @@ ARGp_SET_inline(struct regnode *node, SV *ptr) { #define BITMAP_BIT(c) (1U << ((c) & 7)) #define BITMAP_TEST(p, c) (BITMAP_BYTE(p, c) & BITMAP_BIT((U8)(c))) -#define ANYOF_FLAGS(p) ((p)->flags) +#define ANYOF_FLAGS(p) (FLAGS(p)) #define ANYOF_BIT(c) BITMAP_BIT(c) diff --git a/regcomp_debug.c b/regcomp_debug.c index 6ab276155c..93db7a89cf 100644 --- a/regcomp_debug.c +++ b/regcomp_debug.c @@ -438,7 +438,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ const reg_trie_data * const trie = (reg_trie_data*)progi->data->data[!IS_TRIE_AC(op) ? n : ac->trie]; - Perl_sv_catpvf(aTHX_ sv, "-%s", REGNODE_NAME(o->flags)); + Perl_sv_catpvf(aTHX_ sv, "-%s", REGNODE_NAME(FLAGS(o))); DEBUG_TRIE_COMPILE_r({ if (trie->jump) sv_catpvs(sv, "(JUMP)"); @@ -475,7 +475,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ if (ARG3u(o)) /* check both ARG3a and ARG3b at the same time */ Perl_sv_catpvf(aTHX_ sv, "<%d:%d>", ARG3a(o),ARG3b(o)); /* paren before, paren after */ if (op == CURLYM || op == CURLYN || op == CURLYX) - Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */ + Perl_sv_catpvf(aTHX_ sv, "[%d]", FLAGS(o)); /* Parenth number */ Perl_sv_catpvf(aTHX_ sv, "{%u,", (unsigned) lo); if (hi == REG_INFTY) sv_catpvs(sv, "INFTY"); @@ -483,8 +483,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ Perl_sv_catpvf(aTHX_ sv, "%u", (unsigned) hi); sv_catpvs(sv, "}"); } - else if (k == WHILEM && o->flags) /* Ordinal/of */ - Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4); + else if (k == WHILEM && FLAGS(o)) /* Ordinal/of */ + Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", FLAGS(o) & 0xf, FLAGS(o)>>4); else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP || op == ACCEPT) { @@ -586,7 +586,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ } else if (k == LOGICAL) /* 2: embedded, otherwise 1 */ - Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); + Perl_sv_catpvf(aTHX_ sv, "[%d]", FLAGS(o)); else if (k == ANYOF || k == ANYOFH || k == ANYOFR) { U8 flags; char * bitmap; @@ -876,21 +876,21 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ sv_catpv(sv, bounds[FLAGS(o)]); } else if (k == BRANCHJ && (op == UNLESSM || op == IFMATCH)) { - Perl_sv_catpvf(aTHX_ sv, "[%d", -(o->flags)); - if (o->next_off) { - Perl_sv_catpvf(aTHX_ sv, "..-%d", o->flags - o->next_off); + Perl_sv_catpvf(aTHX_ sv, "[%d", -(FLAGS(o))); + if (NEXT_OFF(o)) { + Perl_sv_catpvf(aTHX_ sv, "..-%d", FLAGS(o) - NEXT_OFF(o)); } Perl_sv_catpvf(aTHX_ sv, "]"); } else if (op == SBOL) - Perl_sv_catpvf(aTHX_ sv, " /%s/", o->flags ? "\\A" : "^"); + Perl_sv_catpvf(aTHX_ sv, " /%s/", FLAGS(o) ? "\\A" : "^"); else if (op == EVAL) { - if (o->flags & EVAL_OPTIMISTIC_FLAG) + if (FLAGS(o) & EVAL_OPTIMISTIC_FLAG) Perl_sv_catpvf(aTHX_ sv, " optimistic"); } /* add on the verb argument if there is one */ - if ( ( k == VERB || op == ACCEPT || op == OPFAIL ) && o->flags) { + if ( ( k == VERB || op == ACCEPT || op == OPFAIL ) && FLAGS(o)) { if ( ARG1u(o) ) Perl_sv_catpvf(aTHX_ sv, ":%" SVf, SVfARG((MUTABLE_SV(progi->data->data[ ARG1u( o ) ])))); diff --git a/regcomp_study.c b/regcomp_study.c index 81d55719df..db7ab3a409 100644 --- a/regcomp_study.c +++ b/regcomp_study.c @@ -2557,7 +2557,7 @@ Perl_study_chunk(pTHX_ goto optimize_curly_tail; case CURLY: if (stopparen>0 && (OP(scan)==CURLYN || OP(scan)==CURLYM) - && (scan->flags == stopparen)) + && (FLAGS(scan) == stopparen)) { mincount = 1; maxcount = 1; @@ -2568,7 +2568,7 @@ Perl_study_chunk(pTHX_ next = regnext(scan); if (OP(scan) == CURLYX) { I32 lp = (data ? *(data->last_closep) : 0); - scan->flags = ((lp <= (I32)U8_MAX) ? (U8)lp : U8_MAX); + FLAGS(scan) = ((lp <= (I32)U8_MAX) ? (U8)lp : U8_MAX); } scan = REGNODE_AFTER(scan); next_is_eval = (OP(scan) == EVAL); @@ -2729,7 +2729,7 @@ Perl_study_chunk(pTHX_ RExC_close_parens[PARNO(nxt1)] = REGNODE_OFFSET(nxt) + 2; } /* Now we know that nxt2 is the only contents: */ - oscan->flags = (U8)PARNO(nxt); + FLAGS(oscan) = (U8)PARNO(nxt); OP(oscan) = CURLYN; OP(nxt1) = NOTHING; /* was OPEN. */ @@ -2778,7 +2778,7 @@ Perl_study_chunk(pTHX_ /* note that we have changed the type of oscan to CURLYM here */ regnode *nxt1 = REGNODE_AFTER_type(oscan, tregnode_CURLYM); /* OPEN*/ - oscan->flags = (U8)PARNO(nxt); + FLAGS(oscan) = (U8)PARNO(nxt); if (RExC_open_parens) { /*open->CURLYM*/ RExC_open_parens[PARNO(nxt1)] = REGNODE_OFFSET(oscan); @@ -2817,7 +2817,7 @@ Perl_study_chunk(pTHX_ depth+1, mutate_ok); } else - oscan->flags = 0; + FLAGS(oscan) = 0; } else if ((OP(oscan) == CURLYX) && (flags & SCF_WHILEM_VISITED_POS) @@ -2833,11 +2833,11 @@ Perl_study_chunk(pTHX_ if (OP(REGNODE_BEFORE(nxt)) == NOTHING) /* LONGJMP */ nxt += ARG1u(nxt); nxt = REGNODE_BEFORE(nxt); - if (nxt->flags & 0xf) { + if (FLAGS(nxt) & 0xf) { /* we've already set whilem count on this node */ } else if (++data->whilem_c < 16) { assert(data->whilem_c <= RExC_whilem_seen); - nxt->flags = (U8)(data->whilem_c + FLAGS(nxt) = (U8)(data->whilem_c | (RExC_whilem_seen << 4)); /* On WHILEM */ } } @@ -3223,7 +3223,7 @@ Perl_study_chunk(pTHX_ } else if ( REGNODE_TYPE(OP(scan)) == BRANCHJ /* Lookbehind, or need to calculate parens/evals/stclass: */ - && (scan->flags || data || (flags & SCF_DO_STCLASS)) + && (FLAGS(scan) || data || (flags & SCF_DO_STCLASS)) && (OP(scan) == IFMATCH || OP(scan) == UNLESSM)) { if ( !PERL_ENABLE_POSITIVE_ASSERTION_STUDY @@ -3260,7 +3260,7 @@ Perl_study_chunk(pTHX_ cur_last_close_op= *(data_fake.last_close_opp); data_fake.pos_delta = delta; - if ( flags & SCF_DO_STCLASS && !scan->flags + if ( flags & SCF_DO_STCLASS && !FLAGS(scan) && OP(scan) == IFMATCH ) { /* Lookahead */ ssc_init(pRExC_state, &intrnl); data_fake.start_class = &intrnl; @@ -3277,7 +3277,7 @@ Perl_study_chunk(pTHX_ recursed_depth, NULL, f, depth+1, mutate_ok); - if (scan->flags) { + if (FLAGS(scan)) { if ( deltanext < 0 || deltanext > (I32) U8_MAX || minnext > (I32)U8_MAX @@ -3293,7 +3293,7 @@ Perl_study_chunk(pTHX_ * matches to avoid breakage for those not using this * extension) */ if (deltanext) { - scan->next_off = deltanext; + NEXT_OFF(scan) = deltanext; if ( /* See a CLOSE op inside this lookbehind? */ cur_last_close_op != *(data_fake.last_close_opp) @@ -3308,7 +3308,7 @@ Perl_study_chunk(pTHX_ is_positive ? "positive" : "negative"); } } - scan->flags = (U8)minnext + deltanext; + FLAGS(scan) = (U8)minnext + deltanext; } if (data) { if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR)) @@ -3365,7 +3365,7 @@ Perl_study_chunk(pTHX_ StructCopy(data, &data_fake, scan_data_t); if ((flags & SCF_DO_SUBSTR) && data->last_found) { f |= SCF_DO_SUBSTR; - if (scan->flags) + if (FLAGS(scan)) scan_commit(pRExC_state, &data_fake, minlenp, is_inf); data_fake.last_found=newSVsv(data->last_found); } @@ -3380,7 +3380,7 @@ Perl_study_chunk(pTHX_ data_fake.pos_delta = delta; if (is_inf) data_fake.flags |= SF_IS_INF; - if ( flags & SCF_DO_STCLASS && !scan->flags + if ( flags & SCF_DO_STCLASS && !FLAGS(scan) && OP(scan) == IFMATCH ) { /* Lookahead */ ssc_init(pRExC_state, &intrnl); data_fake.start_class = &intrnl; @@ -3396,7 +3396,7 @@ Perl_study_chunk(pTHX_ &deltanext, last, &data_fake, stopparen, recursed_depth, NULL, f, depth+1, mutate_ok); - if (scan->flags) { + if (FLAGS(scan)) { assert(0); /* This code has never been tested since this is normally not compiled */ if ( deltanext < 0 @@ -3409,9 +3409,9 @@ Perl_study_chunk(pTHX_ } if (deltanext) { - scan->next_off = deltanext; + NEXT_OFF(scan) = deltanext; } - scan->flags = (U8)*minnextp + deltanext; + FLAGS(scan) = (U8)*minnextp + deltanext; } *minnextp += min; @@ -3441,7 +3441,7 @@ Perl_study_chunk(pTHX_ data_fake.substrs[i].max_offset; data->substrs[i].minlenp = data_fake.substrs[i].minlenp; - data->substrs[i].lookbehind += scan->flags; + data->substrs[i].lookbehind += FLAGS(scan); } } } @@ -3469,7 +3469,7 @@ Perl_study_chunk(pTHX_ } } else if (OP(scan) == EVAL) { - if (data && !(scan->flags & EVAL_OPTIMISTIC_FLAG) ) + if (data && !(FLAGS(scan) & EVAL_OPTIMISTIC_FLAG) ) data->flags |= SF_HAS_EVAL; } else if ( REGNODE_TYPE(OP(scan)) == ENDLIKE ) { @@ -3496,7 +3496,7 @@ Perl_study_chunk(pTHX_ flags &= ~SCF_DO_SUBSTR; } } - else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */ + else if (OP(scan) == LOGICAL && FLAGS(scan) == 2) /* Embedded follows */ { if (flags & SCF_DO_SUBSTR) { scan_commit(pRExC_state, data, minlenp, is_inf); diff --git a/regcomp_trie.c b/regcomp_trie.c index 1096a25fcd..31b54ca936 100644 --- a/regcomp_trie.c +++ b/regcomp_trie.c @@ -1517,7 +1517,7 @@ Perl_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, OP( convert ) = TRIE; /* store the type in the flags */ - convert->flags = nodetype; + FLAGS(convert) = nodetype; DEBUG_r({ optimize = convert + NODE_STEP_REGNODE @@ -197,7 +197,7 @@ static const char non_utf8_target_but_utf8_required[] /* Search for mandatory following text node; for lookahead, the text must - follow but for lookbehind (rn->flags != 0) we skip to the next step. + follow but for lookbehind (FLAGS(rn) != 0) we skip to the next step. */ #define FIND_NEXT_IMPT(rn) STMT_START { \ while (JUMPABLE(rn)) { \ @@ -207,7 +207,7 @@ static const char non_utf8_target_but_utf8_required[] else if (type == PLUS) \ rn = REGNODE_AFTER_type(rn,tregnode_PLUS); \ else if (type == IFMATCH) \ - rn = (rn->flags == 0) ? REGNODE_AFTER_type(rn,tregnode_IFMATCH) : rn + ARG1u(rn); \ + rn = (FLAGS(rn) == 0) ? REGNODE_AFTER_type(rn,tregnode_IFMATCH) : rn + ARG1u(rn); \ else rn += NEXT_OFF(rn); \ } \ } STMT_END @@ -1781,15 +1781,15 @@ Perl_re_intuit_start(pTHX_ const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold, \ trie_utf8_exactfa_fold, trie_latin_utf8_exactfa_fold, \ trie_utf8l, trie_flu8, trie_flu8_latin } \ - trie_type = ((scan->flags == EXACT) \ + trie_type = ((FLAGS(scan) == EXACT) \ ? (utf8_target ? trie_utf8 : trie_plain) \ - : (scan->flags == EXACTL) \ + : (FLAGS(scan) == EXACTL) \ ? (utf8_target ? trie_utf8l : trie_plain) \ - : (scan->flags == EXACTFAA) \ + : (FLAGS(scan) == EXACTFAA) \ ? (utf8_target \ ? trie_utf8_exactfa_fold \ : trie_latin_utf8_exactfa_fold) \ - : (scan->flags == EXACTFLU8 \ + : (FLAGS(scan) == EXACTFLU8 \ ? (utf8_target \ ? trie_flu8 \ : trie_flu8_latin) \ @@ -6704,12 +6704,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG1u( scan ) + 1 ]); U32 state = trie->startstate; - if (scan->flags == EXACTL || scan->flags == EXACTFLU8) { + if (FLAGS(scan) == EXACTL || FLAGS(scan) == EXACTFLU8) { CHECK_AND_WARN_PROBLEMATIC_LOCALE_; if (utf8_target && ! NEXTCHR_IS_EOS && UTF8_IS_ABOVE_LATIN1(nextbyte) - && scan->flags == EXACTL) + && FLAGS(scan) == EXACTL) { /* We only output for EXACTL, as we let the folder * output this message for EXACTFLU8 to avoid @@ -8081,7 +8081,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) locinput += ln; } ref_yes: - if (scan->flags) { /* == VOLATILE_REF but only other value is 0 */ + if (FLAGS(scan)) { /* == VOLATILE_REF but only other value is 0 */ ST.cp = regcppush(rex, ARG2u(scan) - 1, maxopenparen); REGCP_SET(ST.lastcp); PUSH_STATE_GOTO(REF_next, next, locinput, loceol, @@ -8428,7 +8428,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) /* if we got here, it should be an engine which * supports compiling code blocks and stuff */ assert(rex->engine && rex->engine->op_comp); - assert(!(scan->flags & ~RXf_PMf_COMPILETIME)); + assert(!(FLAGS(scan) & ~RXf_PMf_COMPILETIME)); re_sv = rex->engine->op_comp(aTHX_ &ret, 1, NULL, rex->engine, NULL, NULL, /* copy /msixn etc to inner pattern */ @@ -8632,7 +8632,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) case ACCEPT: /* (*ACCEPT) */ is_accepted = true; - if (scan->flags) + if (FLAGS(scan)) sv_yes_mark = MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]); utmp = ARG2u(scan); @@ -8699,7 +8699,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) break; case LOGICAL: /* modifier for EVAL and IFMATCH */ - logical = scan->flags & EVAL_FLAGS_MASK; /* reserve a bit for optimistic eval */ + logical = FLAGS(scan) & EVAL_FLAGS_MASK; /* reserve a bit for optimistic eval */ break; /******************************************************************* @@ -8790,7 +8790,7 @@ NULL case CURLYX: /* start of /A*B/ (for complex A) */ { /* No need to save/restore up to this paren */ - I32 parenfloor = scan->flags; + I32 parenfloor = FLAGS(scan); assert(next); /* keep Coverity happy */ if (OP(REGNODE_BEFORE(next)) == NOTHING) /* LONGJMP */ @@ -8905,20 +8905,20 @@ NULL * op (string-length x #WHILEMs) times do we allocate the * cache. * - * The top 4 bits of scan->flags byte say how many different + * The top 4 bits of FLAGS(scan) byte say how many different * relevant CURLLYX/WHILEM op pairs there are, while the * bottom 4-bits is the identifying index number of this * WHILEM. */ - if (scan->flags) { + if (FLAGS(scan)) { if (!reginfo->poscache_maxiter) { /* start the countdown: Postpone detection until we * know the match is not *that* much linear. */ reginfo->poscache_maxiter = (reginfo->strend - reginfo->strbeg + 1) - * (scan->flags>>4); + * (FLAGS(scan)>>4); /* possible overflow for long strings and many CURLYX's */ if (reginfo->poscache_maxiter < 0) reginfo->poscache_maxiter = I32_MAX; @@ -8951,9 +8951,9 @@ NULL SSize_t offset, mask; reginfo->poscache_iter = -1; /* stop eventual underflow */ - offset = (scan->flags & 0xf) - 1 + offset = (FLAGS(scan) & 0xf) - 1 + (locinput - reginfo->strbeg) - * (scan->flags>>4); + * (FLAGS(scan)>>4); mask = 1 << (offset % 8); offset /= 8; if (reginfo->info_aux->poscache[offset] & mask) { @@ -9089,7 +9089,7 @@ NULL NOT_REACHED; /* NOTREACHED */ case CUTGROUP: /* /(*THEN)/ */ - sv_yes_mark = st->u.mark.mark_name = scan->flags + sv_yes_mark = st->u.mark.mark_name = FLAGS(scan) ? MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]) : NULL; PUSH_STATE_GOTO(CUTGROUP_next, next, locinput, loceol, @@ -9156,8 +9156,8 @@ NULL ST.lastcloseparen = RXp_LASTCLOSEPAREN(rex); /* if paren positive, emulate an OPEN/CLOSE around A */ - if (ST.me->flags) { - U32 paren = ST.me->flags; + if (FLAGS(ST.me)) { + U32 paren = FLAGS(ST.me); lastopen = paren; if (paren > maxopenparen) maxopenparen = paren; @@ -9202,15 +9202,15 @@ NULL depth, (IV) ST.count, (IV)ST.alen) ); - if (ST.me->flags) { + if (FLAGS(ST.me)) { /* emulate CLOSE: mark current A as captured */ - U32 paren = (U32)ST.me->flags; + U32 paren = (U32)FLAGS(ST.me); CLOSE_CAPTURE(rex, paren, HOPc(locinput, -ST.alen) - reginfo->strbeg, locinput - reginfo->strbeg); } - if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags)) + if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)FLAGS(ST.me))) goto fake_end; @@ -9226,7 +9226,7 @@ NULL if (ST.minmod || ST.count < ARG1i(ST.me) /* min*/ - || EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags)) + || EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)FLAGS(ST.me))) sayNO; curlym_do_B: /* execute the B in /A{m,n}B/ */ @@ -9275,9 +9275,9 @@ NULL } curlym_close_B: - if (ST.me->flags) { + if (FLAGS(ST.me)) { /* emulate CLOSE: mark current A as captured */ - U32 paren = (U32)ST.me->flags; + U32 paren = (U32)FLAGS(ST.me); if (ST.count || is_accepted) { CLOSE_CAPTURE(rex, paren, HOPc(locinput, -ST.alen) - reginfo->strbeg, @@ -9286,7 +9286,7 @@ NULL else RXp_OFFSp(rex)[paren].end = -1; - if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags)) + if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)FLAGS(ST.me))) { if (ST.count || is_accepted) goto fake_end; @@ -9349,7 +9349,7 @@ NULL goto repeat; case CURLYN: /* /(A){m,n}B/ where A is width 1 char */ - ST.paren = scan->flags; /* Which paren to set */ + ST.paren = FLAGS(scan); /* Which paren to set */ ST.lastparen = RXp_LASTPAREN(rex); ST.lastcloseparen = RXp_LASTCLOSEPAREN(rex); if (ST.paren > maxopenparen) @@ -9738,10 +9738,10 @@ NULL ST.wanted = 1; ifmatch_trivial_fail_test: ST.prev_match_end= match_end; - ST.count = scan->next_off + 1; /* next_off repurposed to be + ST.count = NEXT_OFF(scan) + 1; /* next_off repurposed to be lookbehind count, requires non-zero flags */ - if (! scan->flags) { /* 'flags' zero means lookahed */ + if (! FLAGS(scan)) { /* 'flags' zero means lookahed */ /* Lookahead starts here and ends at the normal place */ ST.start = locinput; @@ -9749,7 +9749,7 @@ NULL match_end = NULL; } else { - PERL_UINT_FAST8_T back_count = scan->flags; + PERL_UINT_FAST8_T back_count = FLAGS(scan); char * s; match_end = locinput; @@ -9847,7 +9847,7 @@ NULL /* FALLTHROUGH */ case PRUNE: /* (*PRUNE) */ - if (scan->flags) + if (FLAGS(scan)) sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]); PUSH_STATE_GOTO(COMMIT_next, next, locinput, loceol, script_run_begin); @@ -9860,7 +9860,7 @@ NULL NOT_REACHED; /* NOTREACHED */ case OPFAIL: /* (*FAIL) */ - if (scan->flags) + if (FLAGS(scan)) sv_commit = MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]); if (logical) { /* deal with (?(?!)X|Y) properly, @@ -9910,7 +9910,7 @@ NULL NOT_REACHED; /* NOTREACHED */ case SKIP: /* (*SKIP) */ - if (!scan->flags) { + if (!FLAGS(scan)) { /* (*SKIP) : if we fail we cut here*/ ST.mark_name = NULL; ST.mark_loc = locinput; @@ -29,10 +29,23 @@ struct regnode_meta { U8 off_by_arg; }; +/* this ensures that on alignment sensitive platforms + * this struct is aligned on 32 bit boundaries */ +union regnode_head { + struct { + union { + U8 flags; + U8 str_len_u8; + U8 first_byte; + } u_8; + U8 type; + U16 next_off; + } data; + U32 data_u32; +}; + struct regnode { - U8 flags; - U8 type; - U16 next_off; + union regnode_head head; }; typedef struct regnode regnode; |