diff options
author | Yves Orton <demerphq@gmail.com> | 2023-03-21 22:02:48 +0100 |
---|---|---|
committer | Yves Orton <demerphq@gmail.com> | 2023-03-29 20:54:49 +0800 |
commit | 44eb4cdc274114db740861e4a116ffce3371d70f (patch) | |
tree | ce493442d641765399dacc07ac2e81830796f0fa | |
parent | b292ecb4e4450921a8424ad87000f49bd9c858de (diff) | |
download | perl-44eb4cdc274114db740861e4a116ffce3371d70f.tar.gz |
regcomp.h - use a common union for head and args across all regnodes.
This helps with HPUX builds where we need to ensure everything
is aligned the same (on 32 bit boundaries). It also strongly
encourages everything to use the accessor macros and not access
the members directly.
By using a union for the variadic fields we make it more obvious
that some regops use the field in different ways. This patch
also converts all the arg unions into a standardized union with
standardized member names.
-rw-r--r-- | regcomp.c | 14 | ||||
-rw-r--r-- | regcomp.h | 232 | ||||
-rw-r--r-- | regcomp_debug.c | 22 | ||||
-rw-r--r-- | regcomp_study.c | 40 | ||||
-rw-r--r-- | regcomp_trie.c | 2 | ||||
-rw-r--r-- | regexec.c | 70 | ||||
-rw-r--r-- | regexp.h | 19 |
7 files changed, 176 insertions, 223 deletions
@@ -1837,7 +1837,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, /* An OR of *one* alternative - should not happen now. */ (OP(first) == BRANCH && OP(first_next) != BRANCH) || /* for now we can't handle lookbehind IFMATCH*/ - (OP(first) == IFMATCH && !first->flags && (sawlookahead = 1)) || + (OP(first) == IFMATCH && !FLAGS(first) && (sawlookahead = 1)) || (OP(first) == PLUS) || (OP(first) == MINMOD) || /* An {n,m} with n>0 */ @@ -2220,7 +2220,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, */ if (REGNODE_TYPE(fop) == NOTHING && nop == END) RExC_rx->extflags |= RXf_NULL; - else if ((fop == MBOL || (fop == SBOL && !first->flags)) && nop == END) + else if ((fop == MBOL || (fop == SBOL && !FLAGS(first))) && nop == END) /* when fop is SBOL first->flags will be true only when it was * produced by parsing /\A/, and not when parsing /^/. This is * very important for the split code as there we want to @@ -2766,7 +2766,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state, : REFFN), num, RExC_nestroot); if (RExC_nestroot && num >= (U32)RExC_nestroot) - REGNODE_p(ret)->flags = VOLATILE_REF; + FLAGS(REGNODE_p(ret)) = VOLATILE_REF; *flagp |= HASWIDTH; nextchar(pRExC_state); @@ -6045,7 +6045,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) : REFF), num, RExC_nestroot); if (RExC_nestroot && num >= RExC_nestroot) - REGNODE_p(ret)->flags = VOLATILE_REF; + FLAGS(REGNODE_p(ret)) = VOLATILE_REF; if (OP(REGNODE_p(ret)) == REFF) { RExC_seen_d_op = TRUE; } @@ -12024,7 +12024,7 @@ S_optimize_regclass(pTHX_ op = ANYOFHbbm; *ret = REGNODE_GUTS(pRExC_state, op, REGNODE_ARG_LEN(op)); FILL_NODE(*ret, op); - ((struct regnode_bbm *) REGNODE_p(*ret))->first_byte = low_utf8[0], + FIRST_BYTE((struct regnode_bbm *) REGNODE_p(*ret)) = low_utf8[0], /* The 64 bit (or 32 on EBCCDIC) map can be looked up * directly based on the continuation byte, without @@ -12050,7 +12050,7 @@ S_optimize_regclass(pTHX_ *ret = REGNODE_GUTS(pRExC_state, op, REGNODE_ARG_LEN(op) + STR_SZ(len)); FILL_NODE(*ret, op); - ((struct regnode_anyofhs *) REGNODE_p(*ret))->str_len + STR_LEN_U8((struct regnode_anyofhs *) REGNODE_p(*ret)) = len; Copy(low_utf8, /* Add the common bytes */ ((struct regnode_anyofhs *) REGNODE_p(*ret))->string, @@ -13044,7 +13044,7 @@ Perl_get_ANYOFHbbm_contents(pTHX_ const regnode * n) { &cp_list, /* The base cp is from the start byte plus a zero continuation */ - TWO_BYTE_UTF8_TO_NATIVE(((struct regnode_bbm *) n)->first_byte, + TWO_BYTE_UTF8_TO_NATIVE(FIRST_BYTE((struct regnode_bbm *) n), UTF_CONTINUATION_MARK | 0)); return cp_list; } @@ -178,16 +178,36 @@ typedef struct regexp_internal { * change things without care. If you look at regexp.h you will see it * contains this: * + * union regnode_head { + * struct { + * union { + * U8 flags; + * U8 str_len_u8; + * U8 first_byte; + * } u_8; + * U8 type; + * U16 next_off; + * } data; + * U32 data_u32; + * }; + * * struct regnode { - * U8 flags; - * U8 type; - * U16 next_off; + * union regnode_head head; * }; * - * This structure is the base unit of elements in the regexp program. When - * we increment our way through the program we increment by the size of this - * structure, and in all cases where regnode sizing is considered it is in - * units of this structure. + * Which really is a complicated and alignment friendly version of + * + * struct { + * U8 flags; + * U8 type; + * U16 next_off; + * }; + * + * This structure is the base unit of elements in the regexp program. + * When we increment our way through the program we increment by the + * size of this structure (32 bits), and in all cases where regnode + * sizing is considered it is in units of this structure. All regnodes + * have a union regnode_head as their first parameter. * * This implies that no regnode style structure should contain 64 bit * aligned members. Since the base regnode is 32 bits any member might @@ -210,52 +230,40 @@ typedef struct regexp_internal { * we already have support for in the data array. */ +union regnode_arg { + I32 i32; + U32 u32; + struct { + U16 u16a; + U16 u16b; + } hi_lo; +}; + + struct regnode_string { - U8 str_len_u8; - U8 type; - U16 next_off; + union regnode_head head; char string[1]; }; struct regnode_lstring { /* Constructed this way to keep the string aligned. */ - U8 flags; - U8 type; - U16 next_off; + union regnode_head head; U32 str_len_u32; /* Only 18 bits allowed before would overflow 'next_off' */ char string[1]; }; struct regnode_anyofhs { /* Constructed this way to keep the string aligned. */ - U8 str_len; - U8 type; - U16 next_off; - union { - U32 arg1u; - I32 arg1i; - struct { - U16 arg1a; - U16 arg1b; - } hi_lo; - } arg1; + union regnode_head head; + union regnode_arg arg1; char string[1]; }; -/* Argument bearing node - workhorse, arg1u is often for the data field - * Can store either a signed value via ARG1i() or unsigned 32 bit value +/* Argument bearing node - workhorse, ARG1u() is often used for the data field + * Can store either a signed 32 bit value via ARG1i() or unsigned 32 bit value * via ARG1u(), or two unsigned 16 bit values via ARG1a() or ARG1b() */ struct regnode_1 { - U8 flags; - U8 type; - U16 next_off; - union { - U32 arg1u; - I32 arg1i; - struct { - U16 arg1a; - U16 arg1b; - } hi_lo; - } arg1; + union regnode_head head; + union regnode_arg arg1; }; /* Node whose argument is 'SV *'. This needs to be used very carefully in @@ -274,9 +282,7 @@ struct regnode_1 { * then use inline functions to copy the data in or out. * */ struct regnode_p { - U8 flags; - U8 type; - U16 next_off; + union regnode_head head; char arg1_sv_ptr_bytes[sizeof(SV *)]; }; @@ -285,25 +291,9 @@ struct regnode_p { * Extra field can be accessed as (U32)ARG2u() (I32)ARG2i() or (U16)ARG2a() * and (U16)ARG2b() */ struct regnode_2 { - U8 flags; - U8 type; - U16 next_off; - union { - U32 arg1u; - I32 arg1i; - struct { - U16 arg1a; - U16 arg1b; - } hi_lo; - } arg1; - union { - U32 arg2u; - I32 arg2i; - struct { - U16 arg2a; - U16 arg2b; - } hi_lo; - } arg2; + union regnode_head head; + union regnode_arg arg1; + union regnode_arg arg2; }; /* "Three Node" - similar to a regnode_2 but with space for an additional @@ -315,33 +305,10 @@ struct regnode_2 { * ARG3a() and ARG3b() which are used to store information about the number of * parens before and inside the quantified expression. */ struct regnode_3 { - U8 flags; - U8 type; - U16 next_off; - union { - I32 arg1i; - U32 arg1u; - struct { - U16 arg1a; - U16 arg1b; - } hi_lo; - } arg1; - union { - I32 arg2i; - U32 arg2u; - struct { - U16 arg2a; - U16 arg2b; - } hi_lo; - } arg2; - union { - struct { - U16 arg3a; - U16 arg3b; - } hi_lo; - I32 arg3i; - U32 arg3u; - } arg3; + union regnode_head head; + union regnode_arg arg1; + union regnode_arg arg2; + union regnode_arg arg3; }; #define REGNODE_BBM_BITMAP_LEN \ @@ -352,9 +319,7 @@ struct regnode_3 { * The array is a bitmap capable of representing any possible continuation * byte. */ struct regnode_bbm { - U8 first_byte; - U8 type; - U16 next_off; + union regnode_head head; U8 bitmap[REGNODE_BBM_BITMAP_LEN]; }; @@ -370,36 +335,18 @@ struct regnode_bbm { * the code that inserts and deletes regnodes. The basic single-argument * regnode has a U32, which is what reganode() allocates as a unit. Therefore * no field can require stricter alignment than U32. */ - + /* also used by trie */ struct regnode_charclass { - U8 flags; - U8 type; - U16 next_off; - union { - I32 arg1i; - U32 arg1u; - struct { - U16 arg1a; - U16 arg1b; - } hi_lo; - } arg1; + union regnode_head head; + union regnode_arg arg1; char bitmap[ANYOF_BITMAP_SIZE]; /* only compile-time */ }; /* has runtime (locale) \d, \w, ..., [:posix:] classes */ struct regnode_charclass_posixl { - U8 flags; /* ANYOF_MATCHES_POSIXL bit must go here */ - U8 type; - U16 next_off; - union { - I32 arg1i; - U32 arg1u; - struct { - U16 arg1a; - U16 arg1b; - } hi_lo; - } arg1; + union regnode_head head; + union regnode_arg arg1; char bitmap[ANYOF_BITMAP_SIZE]; /* both compile-time ... */ U32 classflags; /* and run-time */ }; @@ -418,17 +365,8 @@ struct regnode_charclass_posixl { * never a next node. */ struct regnode_ssc { - U8 flags; /* ANYOF_MATCHES_POSIXL bit must go here */ - U8 type; - U16 next_off; - union { - I32 arg1i; - U32 arg1u; - struct { - U16 arg1a; - U16 arg1b; - } hi_lo; - } arg1; + union regnode_head head; + union regnode_arg arg1; char bitmap[ANYOF_BITMAP_SIZE]; /* both compile-time ... */ U32 classflags; /* ... and run-time */ @@ -524,11 +462,6 @@ struct regnode_ssc { #define ARGp_SET(p, val) ARGp_SET_inline((p),(val)) -#undef NEXT_OFF -#undef NODE_ALIGN - -#define NEXT_OFF(p) ((p)->next_off) -#define NODE_ALIGN(node) /* the following define was set to 0xde in 075abff3 * as part of some linting logic. I have set it to 0 * as otherwise in every place where we /might/ set flags @@ -538,26 +471,33 @@ struct regnode_ssc { * is changed from 0 then at the very least make sure * that SBOL for /^/ sets the flags to 0 explicitly. * -- Yves */ -#define NODE_ALIGN_FILL(node) ((node)->flags = 0) +#define NODE_ALIGN(node) #define SIZE_ALIGN NODE_ALIGN #undef OP #undef OPERAND #undef STRING +#undef NEXT_OFF +#undef NODE_ALIGN -#define OP(p) ((p)->type) -#define FLAGS(p) ((p)->flags) /* Caution: Doesn't apply to all \ +#define NEXT_OFF(p) ((p)->head.data.next_off) +#define OP(p) ((p)->head.data.type) +#define STR_LEN_U8(p) ((p)->head.data.u_8.str_len_u8) +#define FIRST_BYTE(p) ((p)->head.data.u_8.first_byte) +#define FLAGS(p) ((p)->head.data.u_8.flags) /* Caution: Doesn't apply to all \ regnode types. For some, it's the \ character set of the regnode */ #define STR_LENs(p) (__ASSERT_(OP(p) != LEXACT && OP(p) != LEXACT_REQ8) \ - ((struct regnode_string *)p)->str_len_u8) + STR_LEN_U8((struct regnode_string *)p)) #define STRINGs(p) (__ASSERT_(OP(p) != LEXACT && OP(p) != LEXACT_REQ8) \ ((struct regnode_string *)p)->string) #define OPERANDs(p) STRINGs(p) #define PARNO(p) ARG1u(p) /* APPLIES for OPEN and CLOSE only */ +#define NODE_ALIGN_FILL(node) (FLAGS(node) = 0) + /* Long strings. Currently limited to length 18 bits, which handles a 262000 * byte string. The limiting factor is the 16 bit 'next_off' field, which * points to the next regnode, so the furthest away it can be is 2**16. On @@ -591,7 +531,7 @@ struct regnode_ssc { if (OP(p) == LEXACT || OP(p) == LEXACT_REQ8) \ ((struct regnode_lstring *)(p))->str_len_u32 = (v); \ else \ - ((struct regnode_string *)(p))->str_len_u8 = (v); \ + STR_LEN_U8((struct regnode_string *)(p)) = (v); \ } STMT_END #define ANYOFR_BASE_BITS 20 @@ -603,18 +543,18 @@ struct regnode_ssc { #define NODE_ALIGN(node) #define ARGp_BYTES_LOC(p) (((struct regnode_p *)p)->arg1_sv_ptr_bytes) -#define ARG1u_LOC(p) (((struct regnode_1 *)p)->arg1.arg1u) -#define ARG1i_LOC(p) (((struct regnode_1 *)p)->arg1.arg1i) -#define ARG1a_LOC(p) (((struct regnode_1 *)p)->arg1.hi_lo.arg1a) -#define ARG1b_LOC(p) (((struct regnode_1 *)p)->arg1.hi_lo.arg1b) -#define ARG2u_LOC(p) (((struct regnode_2 *)p)->arg2.arg2u) -#define ARG2i_LOC(p) (((struct regnode_2 *)p)->arg2.arg2i) -#define ARG2a_LOC(p) (((struct regnode_2 *)p)->arg2.hi_lo.arg2a) -#define ARG2b_LOC(p) (((struct regnode_2 *)p)->arg2.hi_lo.arg2b) -#define ARG3u_LOC(p) (((struct regnode_3 *)p)->arg3.arg3u) -#define ARG3i_LOC(p) (((struct regnode_3 *)p)->arg3.arg3i) -#define ARG3a_LOC(p) (((struct regnode_3 *)p)->arg3.hi_lo.arg3a) -#define ARG3b_LOC(p) (((struct regnode_3 *)p)->arg3.hi_lo.arg3b) +#define ARG1u_LOC(p) (((struct regnode_1 *)p)->arg1.u32) +#define ARG1i_LOC(p) (((struct regnode_1 *)p)->arg1.i32) +#define ARG1a_LOC(p) (((struct regnode_1 *)p)->arg1.hi_lo.u16a) +#define ARG1b_LOC(p) (((struct regnode_1 *)p)->arg1.hi_lo.u16b) +#define ARG2u_LOC(p) (((struct regnode_2 *)p)->arg2.u32) +#define ARG2i_LOC(p) (((struct regnode_2 *)p)->arg2.i32) +#define ARG2a_LOC(p) (((struct regnode_2 *)p)->arg2.hi_lo.u16a) +#define ARG2b_LOC(p) (((struct regnode_2 *)p)->arg2.hi_lo.u16b) +#define ARG3u_LOC(p) (((struct regnode_3 *)p)->arg3.u32) +#define ARG3i_LOC(p) (((struct regnode_3 *)p)->arg3.i32) +#define ARG3a_LOC(p) (((struct regnode_3 *)p)->arg3.hi_lo.u16a) +#define ARG3b_LOC(p) (((struct regnode_3 *)p)->arg3.hi_lo.u16b) /* These should no longer be used directly in most cases. Please use * the REGNODE_AFTER() macros instead. */ @@ -1065,7 +1005,7 @@ ARGp_SET_inline(struct regnode *node, SV *ptr) { #define BITMAP_BIT(c) (1U << ((c) & 7)) #define BITMAP_TEST(p, c) (BITMAP_BYTE(p, c) & BITMAP_BIT((U8)(c))) -#define ANYOF_FLAGS(p) ((p)->flags) +#define ANYOF_FLAGS(p) (FLAGS(p)) #define ANYOF_BIT(c) BITMAP_BIT(c) diff --git a/regcomp_debug.c b/regcomp_debug.c index 6ab276155c..93db7a89cf 100644 --- a/regcomp_debug.c +++ b/regcomp_debug.c @@ -438,7 +438,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ const reg_trie_data * const trie = (reg_trie_data*)progi->data->data[!IS_TRIE_AC(op) ? n : ac->trie]; - Perl_sv_catpvf(aTHX_ sv, "-%s", REGNODE_NAME(o->flags)); + Perl_sv_catpvf(aTHX_ sv, "-%s", REGNODE_NAME(FLAGS(o))); DEBUG_TRIE_COMPILE_r({ if (trie->jump) sv_catpvs(sv, "(JUMP)"); @@ -475,7 +475,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ if (ARG3u(o)) /* check both ARG3a and ARG3b at the same time */ Perl_sv_catpvf(aTHX_ sv, "<%d:%d>", ARG3a(o),ARG3b(o)); /* paren before, paren after */ if (op == CURLYM || op == CURLYN || op == CURLYX) - Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */ + Perl_sv_catpvf(aTHX_ sv, "[%d]", FLAGS(o)); /* Parenth number */ Perl_sv_catpvf(aTHX_ sv, "{%u,", (unsigned) lo); if (hi == REG_INFTY) sv_catpvs(sv, "INFTY"); @@ -483,8 +483,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ Perl_sv_catpvf(aTHX_ sv, "%u", (unsigned) hi); sv_catpvs(sv, "}"); } - else if (k == WHILEM && o->flags) /* Ordinal/of */ - Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4); + else if (k == WHILEM && FLAGS(o)) /* Ordinal/of */ + Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", FLAGS(o) & 0xf, FLAGS(o)>>4); else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP || op == ACCEPT) { @@ -586,7 +586,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ } else if (k == LOGICAL) /* 2: embedded, otherwise 1 */ - Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); + Perl_sv_catpvf(aTHX_ sv, "[%d]", FLAGS(o)); else if (k == ANYOF || k == ANYOFH || k == ANYOFR) { U8 flags; char * bitmap; @@ -876,21 +876,21 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ sv_catpv(sv, bounds[FLAGS(o)]); } else if (k == BRANCHJ && (op == UNLESSM || op == IFMATCH)) { - Perl_sv_catpvf(aTHX_ sv, "[%d", -(o->flags)); - if (o->next_off) { - Perl_sv_catpvf(aTHX_ sv, "..-%d", o->flags - o->next_off); + Perl_sv_catpvf(aTHX_ sv, "[%d", -(FLAGS(o))); + if (NEXT_OFF(o)) { + Perl_sv_catpvf(aTHX_ sv, "..-%d", FLAGS(o) - NEXT_OFF(o)); } Perl_sv_catpvf(aTHX_ sv, "]"); } else if (op == SBOL) - Perl_sv_catpvf(aTHX_ sv, " /%s/", o->flags ? "\\A" : "^"); + Perl_sv_catpvf(aTHX_ sv, " /%s/", FLAGS(o) ? "\\A" : "^"); else if (op == EVAL) { - if (o->flags & EVAL_OPTIMISTIC_FLAG) + if (FLAGS(o) & EVAL_OPTIMISTIC_FLAG) Perl_sv_catpvf(aTHX_ sv, " optimistic"); } /* add on the verb argument if there is one */ - if ( ( k == VERB || op == ACCEPT || op == OPFAIL ) && o->flags) { + if ( ( k == VERB || op == ACCEPT || op == OPFAIL ) && FLAGS(o)) { if ( ARG1u(o) ) Perl_sv_catpvf(aTHX_ sv, ":%" SVf, SVfARG((MUTABLE_SV(progi->data->data[ ARG1u( o ) ])))); diff --git a/regcomp_study.c b/regcomp_study.c index 81d55719df..db7ab3a409 100644 --- a/regcomp_study.c +++ b/regcomp_study.c @@ -2557,7 +2557,7 @@ Perl_study_chunk(pTHX_ goto optimize_curly_tail; case CURLY: if (stopparen>0 && (OP(scan)==CURLYN || OP(scan)==CURLYM) - && (scan->flags == stopparen)) + && (FLAGS(scan) == stopparen)) { mincount = 1; maxcount = 1; @@ -2568,7 +2568,7 @@ Perl_study_chunk(pTHX_ next = regnext(scan); if (OP(scan) == CURLYX) { I32 lp = (data ? *(data->last_closep) : 0); - scan->flags = ((lp <= (I32)U8_MAX) ? (U8)lp : U8_MAX); + FLAGS(scan) = ((lp <= (I32)U8_MAX) ? (U8)lp : U8_MAX); } scan = REGNODE_AFTER(scan); next_is_eval = (OP(scan) == EVAL); @@ -2729,7 +2729,7 @@ Perl_study_chunk(pTHX_ RExC_close_parens[PARNO(nxt1)] = REGNODE_OFFSET(nxt) + 2; } /* Now we know that nxt2 is the only contents: */ - oscan->flags = (U8)PARNO(nxt); + FLAGS(oscan) = (U8)PARNO(nxt); OP(oscan) = CURLYN; OP(nxt1) = NOTHING; /* was OPEN. */ @@ -2778,7 +2778,7 @@ Perl_study_chunk(pTHX_ /* note that we have changed the type of oscan to CURLYM here */ regnode *nxt1 = REGNODE_AFTER_type(oscan, tregnode_CURLYM); /* OPEN*/ - oscan->flags = (U8)PARNO(nxt); + FLAGS(oscan) = (U8)PARNO(nxt); if (RExC_open_parens) { /*open->CURLYM*/ RExC_open_parens[PARNO(nxt1)] = REGNODE_OFFSET(oscan); @@ -2817,7 +2817,7 @@ Perl_study_chunk(pTHX_ depth+1, mutate_ok); } else - oscan->flags = 0; + FLAGS(oscan) = 0; } else if ((OP(oscan) == CURLYX) && (flags & SCF_WHILEM_VISITED_POS) @@ -2833,11 +2833,11 @@ Perl_study_chunk(pTHX_ if (OP(REGNODE_BEFORE(nxt)) == NOTHING) /* LONGJMP */ nxt += ARG1u(nxt); nxt = REGNODE_BEFORE(nxt); - if (nxt->flags & 0xf) { + if (FLAGS(nxt) & 0xf) { /* we've already set whilem count on this node */ } else if (++data->whilem_c < 16) { assert(data->whilem_c <= RExC_whilem_seen); - nxt->flags = (U8)(data->whilem_c + FLAGS(nxt) = (U8)(data->whilem_c | (RExC_whilem_seen << 4)); /* On WHILEM */ } } @@ -3223,7 +3223,7 @@ Perl_study_chunk(pTHX_ } else if ( REGNODE_TYPE(OP(scan)) == BRANCHJ /* Lookbehind, or need to calculate parens/evals/stclass: */ - && (scan->flags || data || (flags & SCF_DO_STCLASS)) + && (FLAGS(scan) || data || (flags & SCF_DO_STCLASS)) && (OP(scan) == IFMATCH || OP(scan) == UNLESSM)) { if ( !PERL_ENABLE_POSITIVE_ASSERTION_STUDY @@ -3260,7 +3260,7 @@ Perl_study_chunk(pTHX_ cur_last_close_op= *(data_fake.last_close_opp); data_fake.pos_delta = delta; - if ( flags & SCF_DO_STCLASS && !scan->flags + if ( flags & SCF_DO_STCLASS && !FLAGS(scan) && OP(scan) == IFMATCH ) { /* Lookahead */ ssc_init(pRExC_state, &intrnl); data_fake.start_class = &intrnl; @@ -3277,7 +3277,7 @@ Perl_study_chunk(pTHX_ recursed_depth, NULL, f, depth+1, mutate_ok); - if (scan->flags) { + if (FLAGS(scan)) { if ( deltanext < 0 || deltanext > (I32) U8_MAX || minnext > (I32)U8_MAX @@ -3293,7 +3293,7 @@ Perl_study_chunk(pTHX_ * matches to avoid breakage for those not using this * extension) */ if (deltanext) { - scan->next_off = deltanext; + NEXT_OFF(scan) = deltanext; if ( /* See a CLOSE op inside this lookbehind? */ cur_last_close_op != *(data_fake.last_close_opp) @@ -3308,7 +3308,7 @@ Perl_study_chunk(pTHX_ is_positive ? "positive" : "negative"); } } - scan->flags = (U8)minnext + deltanext; + FLAGS(scan) = (U8)minnext + deltanext; } if (data) { if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR)) @@ -3365,7 +3365,7 @@ Perl_study_chunk(pTHX_ StructCopy(data, &data_fake, scan_data_t); if ((flags & SCF_DO_SUBSTR) && data->last_found) { f |= SCF_DO_SUBSTR; - if (scan->flags) + if (FLAGS(scan)) scan_commit(pRExC_state, &data_fake, minlenp, is_inf); data_fake.last_found=newSVsv(data->last_found); } @@ -3380,7 +3380,7 @@ Perl_study_chunk(pTHX_ data_fake.pos_delta = delta; if (is_inf) data_fake.flags |= SF_IS_INF; - if ( flags & SCF_DO_STCLASS && !scan->flags + if ( flags & SCF_DO_STCLASS && !FLAGS(scan) && OP(scan) == IFMATCH ) { /* Lookahead */ ssc_init(pRExC_state, &intrnl); data_fake.start_class = &intrnl; @@ -3396,7 +3396,7 @@ Perl_study_chunk(pTHX_ &deltanext, last, &data_fake, stopparen, recursed_depth, NULL, f, depth+1, mutate_ok); - if (scan->flags) { + if (FLAGS(scan)) { assert(0); /* This code has never been tested since this is normally not compiled */ if ( deltanext < 0 @@ -3409,9 +3409,9 @@ Perl_study_chunk(pTHX_ } if (deltanext) { - scan->next_off = deltanext; + NEXT_OFF(scan) = deltanext; } - scan->flags = (U8)*minnextp + deltanext; + FLAGS(scan) = (U8)*minnextp + deltanext; } *minnextp += min; @@ -3441,7 +3441,7 @@ Perl_study_chunk(pTHX_ data_fake.substrs[i].max_offset; data->substrs[i].minlenp = data_fake.substrs[i].minlenp; - data->substrs[i].lookbehind += scan->flags; + data->substrs[i].lookbehind += FLAGS(scan); } } } @@ -3469,7 +3469,7 @@ Perl_study_chunk(pTHX_ } } else if (OP(scan) == EVAL) { - if (data && !(scan->flags & EVAL_OPTIMISTIC_FLAG) ) + if (data && !(FLAGS(scan) & EVAL_OPTIMISTIC_FLAG) ) data->flags |= SF_HAS_EVAL; } else if ( REGNODE_TYPE(OP(scan)) == ENDLIKE ) { @@ -3496,7 +3496,7 @@ Perl_study_chunk(pTHX_ flags &= ~SCF_DO_SUBSTR; } } - else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */ + else if (OP(scan) == LOGICAL && FLAGS(scan) == 2) /* Embedded follows */ { if (flags & SCF_DO_SUBSTR) { scan_commit(pRExC_state, data, minlenp, is_inf); diff --git a/regcomp_trie.c b/regcomp_trie.c index 1096a25fcd..31b54ca936 100644 --- a/regcomp_trie.c +++ b/regcomp_trie.c @@ -1517,7 +1517,7 @@ Perl_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, OP( convert ) = TRIE; /* store the type in the flags */ - convert->flags = nodetype; + FLAGS(convert) = nodetype; DEBUG_r({ optimize = convert + NODE_STEP_REGNODE @@ -197,7 +197,7 @@ static const char non_utf8_target_but_utf8_required[] /* Search for mandatory following text node; for lookahead, the text must - follow but for lookbehind (rn->flags != 0) we skip to the next step. + follow but for lookbehind (FLAGS(rn) != 0) we skip to the next step. */ #define FIND_NEXT_IMPT(rn) STMT_START { \ while (JUMPABLE(rn)) { \ @@ -207,7 +207,7 @@ static const char non_utf8_target_but_utf8_required[] else if (type == PLUS) \ rn = REGNODE_AFTER_type(rn,tregnode_PLUS); \ else if (type == IFMATCH) \ - rn = (rn->flags == 0) ? REGNODE_AFTER_type(rn,tregnode_IFMATCH) : rn + ARG1u(rn); \ + rn = (FLAGS(rn) == 0) ? REGNODE_AFTER_type(rn,tregnode_IFMATCH) : rn + ARG1u(rn); \ else rn += NEXT_OFF(rn); \ } \ } STMT_END @@ -1781,15 +1781,15 @@ Perl_re_intuit_start(pTHX_ const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold, \ trie_utf8_exactfa_fold, trie_latin_utf8_exactfa_fold, \ trie_utf8l, trie_flu8, trie_flu8_latin } \ - trie_type = ((scan->flags == EXACT) \ + trie_type = ((FLAGS(scan) == EXACT) \ ? (utf8_target ? trie_utf8 : trie_plain) \ - : (scan->flags == EXACTL) \ + : (FLAGS(scan) == EXACTL) \ ? (utf8_target ? trie_utf8l : trie_plain) \ - : (scan->flags == EXACTFAA) \ + : (FLAGS(scan) == EXACTFAA) \ ? (utf8_target \ ? trie_utf8_exactfa_fold \ : trie_latin_utf8_exactfa_fold) \ - : (scan->flags == EXACTFLU8 \ + : (FLAGS(scan) == EXACTFLU8 \ ? (utf8_target \ ? trie_flu8 \ : trie_flu8_latin) \ @@ -6704,12 +6704,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG1u( scan ) + 1 ]); U32 state = trie->startstate; - if (scan->flags == EXACTL || scan->flags == EXACTFLU8) { + if (FLAGS(scan) == EXACTL || FLAGS(scan) == EXACTFLU8) { CHECK_AND_WARN_PROBLEMATIC_LOCALE_; if (utf8_target && ! NEXTCHR_IS_EOS && UTF8_IS_ABOVE_LATIN1(nextbyte) - && scan->flags == EXACTL) + && FLAGS(scan) == EXACTL) { /* We only output for EXACTL, as we let the folder * output this message for EXACTFLU8 to avoid @@ -8081,7 +8081,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) locinput += ln; } ref_yes: - if (scan->flags) { /* == VOLATILE_REF but only other value is 0 */ + if (FLAGS(scan)) { /* == VOLATILE_REF but only other value is 0 */ ST.cp = regcppush(rex, ARG2u(scan) - 1, maxopenparen); REGCP_SET(ST.lastcp); PUSH_STATE_GOTO(REF_next, next, locinput, loceol, @@ -8428,7 +8428,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) /* if we got here, it should be an engine which * supports compiling code blocks and stuff */ assert(rex->engine && rex->engine->op_comp); - assert(!(scan->flags & ~RXf_PMf_COMPILETIME)); + assert(!(FLAGS(scan) & ~RXf_PMf_COMPILETIME)); re_sv = rex->engine->op_comp(aTHX_ &ret, 1, NULL, rex->engine, NULL, NULL, /* copy /msixn etc to inner pattern */ @@ -8632,7 +8632,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) case ACCEPT: /* (*ACCEPT) */ is_accepted = true; - if (scan->flags) + if (FLAGS(scan)) sv_yes_mark = MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]); utmp = ARG2u(scan); @@ -8699,7 +8699,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) break; case LOGICAL: /* modifier for EVAL and IFMATCH */ - logical = scan->flags & EVAL_FLAGS_MASK; /* reserve a bit for optimistic eval */ + logical = FLAGS(scan) & EVAL_FLAGS_MASK; /* reserve a bit for optimistic eval */ break; /******************************************************************* @@ -8790,7 +8790,7 @@ NULL case CURLYX: /* start of /A*B/ (for complex A) */ { /* No need to save/restore up to this paren */ - I32 parenfloor = scan->flags; + I32 parenfloor = FLAGS(scan); assert(next); /* keep Coverity happy */ if (OP(REGNODE_BEFORE(next)) == NOTHING) /* LONGJMP */ @@ -8905,20 +8905,20 @@ NULL * op (string-length x #WHILEMs) times do we allocate the * cache. * - * The top 4 bits of scan->flags byte say how many different + * The top 4 bits of FLAGS(scan) byte say how many different * relevant CURLLYX/WHILEM op pairs there are, while the * bottom 4-bits is the identifying index number of this * WHILEM. */ - if (scan->flags) { + if (FLAGS(scan)) { if (!reginfo->poscache_maxiter) { /* start the countdown: Postpone detection until we * know the match is not *that* much linear. */ reginfo->poscache_maxiter = (reginfo->strend - reginfo->strbeg + 1) - * (scan->flags>>4); + * (FLAGS(scan)>>4); /* possible overflow for long strings and many CURLYX's */ if (reginfo->poscache_maxiter < 0) reginfo->poscache_maxiter = I32_MAX; @@ -8951,9 +8951,9 @@ NULL SSize_t offset, mask; reginfo->poscache_iter = -1; /* stop eventual underflow */ - offset = (scan->flags & 0xf) - 1 + offset = (FLAGS(scan) & 0xf) - 1 + (locinput - reginfo->strbeg) - * (scan->flags>>4); + * (FLAGS(scan)>>4); mask = 1 << (offset % 8); offset /= 8; if (reginfo->info_aux->poscache[offset] & mask) { @@ -9089,7 +9089,7 @@ NULL NOT_REACHED; /* NOTREACHED */ case CUTGROUP: /* /(*THEN)/ */ - sv_yes_mark = st->u.mark.mark_name = scan->flags + sv_yes_mark = st->u.mark.mark_name = FLAGS(scan) ? MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]) : NULL; PUSH_STATE_GOTO(CUTGROUP_next, next, locinput, loceol, @@ -9156,8 +9156,8 @@ NULL ST.lastcloseparen = RXp_LASTCLOSEPAREN(rex); /* if paren positive, emulate an OPEN/CLOSE around A */ - if (ST.me->flags) { - U32 paren = ST.me->flags; + if (FLAGS(ST.me)) { + U32 paren = FLAGS(ST.me); lastopen = paren; if (paren > maxopenparen) maxopenparen = paren; @@ -9202,15 +9202,15 @@ NULL depth, (IV) ST.count, (IV)ST.alen) ); - if (ST.me->flags) { + if (FLAGS(ST.me)) { /* emulate CLOSE: mark current A as captured */ - U32 paren = (U32)ST.me->flags; + U32 paren = (U32)FLAGS(ST.me); CLOSE_CAPTURE(rex, paren, HOPc(locinput, -ST.alen) - reginfo->strbeg, locinput - reginfo->strbeg); } - if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags)) + if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)FLAGS(ST.me))) goto fake_end; @@ -9226,7 +9226,7 @@ NULL if (ST.minmod || ST.count < ARG1i(ST.me) /* min*/ - || EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags)) + || EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)FLAGS(ST.me))) sayNO; curlym_do_B: /* execute the B in /A{m,n}B/ */ @@ -9275,9 +9275,9 @@ NULL } curlym_close_B: - if (ST.me->flags) { + if (FLAGS(ST.me)) { /* emulate CLOSE: mark current A as captured */ - U32 paren = (U32)ST.me->flags; + U32 paren = (U32)FLAGS(ST.me); if (ST.count || is_accepted) { CLOSE_CAPTURE(rex, paren, HOPc(locinput, -ST.alen) - reginfo->strbeg, @@ -9286,7 +9286,7 @@ NULL else RXp_OFFSp(rex)[paren].end = -1; - if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags)) + if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)FLAGS(ST.me))) { if (ST.count || is_accepted) goto fake_end; @@ -9349,7 +9349,7 @@ NULL goto repeat; case CURLYN: /* /(A){m,n}B/ where A is width 1 char */ - ST.paren = scan->flags; /* Which paren to set */ + ST.paren = FLAGS(scan); /* Which paren to set */ ST.lastparen = RXp_LASTPAREN(rex); ST.lastcloseparen = RXp_LASTCLOSEPAREN(rex); if (ST.paren > maxopenparen) @@ -9738,10 +9738,10 @@ NULL ST.wanted = 1; ifmatch_trivial_fail_test: ST.prev_match_end= match_end; - ST.count = scan->next_off + 1; /* next_off repurposed to be + ST.count = NEXT_OFF(scan) + 1; /* next_off repurposed to be lookbehind count, requires non-zero flags */ - if (! scan->flags) { /* 'flags' zero means lookahed */ + if (! FLAGS(scan)) { /* 'flags' zero means lookahed */ /* Lookahead starts here and ends at the normal place */ ST.start = locinput; @@ -9749,7 +9749,7 @@ NULL match_end = NULL; } else { - PERL_UINT_FAST8_T back_count = scan->flags; + PERL_UINT_FAST8_T back_count = FLAGS(scan); char * s; match_end = locinput; @@ -9847,7 +9847,7 @@ NULL /* FALLTHROUGH */ case PRUNE: /* (*PRUNE) */ - if (scan->flags) + if (FLAGS(scan)) sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]); PUSH_STATE_GOTO(COMMIT_next, next, locinput, loceol, script_run_begin); @@ -9860,7 +9860,7 @@ NULL NOT_REACHED; /* NOTREACHED */ case OPFAIL: /* (*FAIL) */ - if (scan->flags) + if (FLAGS(scan)) sv_commit = MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]); if (logical) { /* deal with (?(?!)X|Y) properly, @@ -9910,7 +9910,7 @@ NULL NOT_REACHED; /* NOTREACHED */ case SKIP: /* (*SKIP) */ - if (!scan->flags) { + if (!FLAGS(scan)) { /* (*SKIP) : if we fail we cut here*/ ST.mark_name = NULL; ST.mark_loc = locinput; @@ -29,10 +29,23 @@ struct regnode_meta { U8 off_by_arg; }; +/* this ensures that on alignment sensitive platforms + * this struct is aligned on 32 bit boundaries */ +union regnode_head { + struct { + union { + U8 flags; + U8 str_len_u8; + U8 first_byte; + } u_8; + U8 type; + U16 next_off; + } data; + U32 data_u32; +}; + struct regnode { - U8 flags; - U8 type; - U16 next_off; + union regnode_head head; }; typedef struct regnode regnode; |