summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2023-03-21 22:02:48 +0100
committerYves Orton <demerphq@gmail.com>2023-03-29 20:54:49 +0800
commit44eb4cdc274114db740861e4a116ffce3371d70f (patch)
treece493442d641765399dacc07ac2e81830796f0fa
parentb292ecb4e4450921a8424ad87000f49bd9c858de (diff)
downloadperl-44eb4cdc274114db740861e4a116ffce3371d70f.tar.gz
regcomp.h - use a common union for head and args across all regnodes.
This helps with HPUX builds where we need to ensure everything is aligned the same (on 32 bit boundaries). It also strongly encourages everything to use the accessor macros and not access the members directly. By using a union for the variadic fields we make it more obvious that some regops use the field in different ways. This patch also converts all the arg unions into a standardized union with standardized member names.
-rw-r--r--regcomp.c14
-rw-r--r--regcomp.h232
-rw-r--r--regcomp_debug.c22
-rw-r--r--regcomp_study.c40
-rw-r--r--regcomp_trie.c2
-rw-r--r--regexec.c70
-rw-r--r--regexp.h19
7 files changed, 176 insertions, 223 deletions
diff --git a/regcomp.c b/regcomp.c
index 33b247bcd5..5d30cbbbb4 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1837,7 +1837,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
/* An OR of *one* alternative - should not happen now. */
(OP(first) == BRANCH && OP(first_next) != BRANCH) ||
/* for now we can't handle lookbehind IFMATCH*/
- (OP(first) == IFMATCH && !first->flags && (sawlookahead = 1)) ||
+ (OP(first) == IFMATCH && !FLAGS(first) && (sawlookahead = 1)) ||
(OP(first) == PLUS) ||
(OP(first) == MINMOD) ||
/* An {n,m} with n>0 */
@@ -2220,7 +2220,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
*/
if (REGNODE_TYPE(fop) == NOTHING && nop == END)
RExC_rx->extflags |= RXf_NULL;
- else if ((fop == MBOL || (fop == SBOL && !first->flags)) && nop == END)
+ else if ((fop == MBOL || (fop == SBOL && !FLAGS(first))) && nop == END)
/* when fop is SBOL first->flags will be true only when it was
* produced by parsing /\A/, and not when parsing /^/. This is
* very important for the split code as there we want to
@@ -2766,7 +2766,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
: REFFN),
num, RExC_nestroot);
if (RExC_nestroot && num >= (U32)RExC_nestroot)
- REGNODE_p(ret)->flags = VOLATILE_REF;
+ FLAGS(REGNODE_p(ret)) = VOLATILE_REF;
*flagp |= HASWIDTH;
nextchar(pRExC_state);
@@ -6045,7 +6045,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
: REFF),
num, RExC_nestroot);
if (RExC_nestroot && num >= RExC_nestroot)
- REGNODE_p(ret)->flags = VOLATILE_REF;
+ FLAGS(REGNODE_p(ret)) = VOLATILE_REF;
if (OP(REGNODE_p(ret)) == REFF) {
RExC_seen_d_op = TRUE;
}
@@ -12024,7 +12024,7 @@ S_optimize_regclass(pTHX_
op = ANYOFHbbm;
*ret = REGNODE_GUTS(pRExC_state, op, REGNODE_ARG_LEN(op));
FILL_NODE(*ret, op);
- ((struct regnode_bbm *) REGNODE_p(*ret))->first_byte = low_utf8[0],
+ FIRST_BYTE((struct regnode_bbm *) REGNODE_p(*ret)) = low_utf8[0],
/* The 64 bit (or 32 on EBCCDIC) map can be looked up
* directly based on the continuation byte, without
@@ -12050,7 +12050,7 @@ S_optimize_regclass(pTHX_
*ret = REGNODE_GUTS(pRExC_state, op,
REGNODE_ARG_LEN(op) + STR_SZ(len));
FILL_NODE(*ret, op);
- ((struct regnode_anyofhs *) REGNODE_p(*ret))->str_len
+ STR_LEN_U8((struct regnode_anyofhs *) REGNODE_p(*ret))
= len;
Copy(low_utf8, /* Add the common bytes */
((struct regnode_anyofhs *) REGNODE_p(*ret))->string,
@@ -13044,7 +13044,7 @@ Perl_get_ANYOFHbbm_contents(pTHX_ const regnode * n) {
&cp_list,
/* The base cp is from the start byte plus a zero continuation */
- TWO_BYTE_UTF8_TO_NATIVE(((struct regnode_bbm *) n)->first_byte,
+ TWO_BYTE_UTF8_TO_NATIVE(FIRST_BYTE((struct regnode_bbm *) n),
UTF_CONTINUATION_MARK | 0));
return cp_list;
}
diff --git a/regcomp.h b/regcomp.h
index 068de74cd6..b40a2f070c 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -178,16 +178,36 @@ typedef struct regexp_internal {
* change things without care. If you look at regexp.h you will see it
* contains this:
*
+ * union regnode_head {
+ * struct {
+ * union {
+ * U8 flags;
+ * U8 str_len_u8;
+ * U8 first_byte;
+ * } u_8;
+ * U8 type;
+ * U16 next_off;
+ * } data;
+ * U32 data_u32;
+ * };
+ *
* struct regnode {
- * U8 flags;
- * U8 type;
- * U16 next_off;
+ * union regnode_head head;
* };
*
- * This structure is the base unit of elements in the regexp program. When
- * we increment our way through the program we increment by the size of this
- * structure, and in all cases where regnode sizing is considered it is in
- * units of this structure.
+ * Which really is a complicated and alignment friendly version of
+ *
+ * struct {
+ * U8 flags;
+ * U8 type;
+ * U16 next_off;
+ * };
+ *
+ * This structure is the base unit of elements in the regexp program.
+ * When we increment our way through the program we increment by the
+ * size of this structure (32 bits), and in all cases where regnode
+ * sizing is considered it is in units of this structure. All regnodes
+ * have a union regnode_head as their first parameter.
*
* This implies that no regnode style structure should contain 64 bit
* aligned members. Since the base regnode is 32 bits any member might
@@ -210,52 +230,40 @@ typedef struct regexp_internal {
* we already have support for in the data array.
*/
+union regnode_arg {
+ I32 i32;
+ U32 u32;
+ struct {
+ U16 u16a;
+ U16 u16b;
+ } hi_lo;
+};
+
+
struct regnode_string {
- U8 str_len_u8;
- U8 type;
- U16 next_off;
+ union regnode_head head;
char string[1];
};
struct regnode_lstring { /* Constructed this way to keep the string aligned. */
- U8 flags;
- U8 type;
- U16 next_off;
+ union regnode_head head;
U32 str_len_u32; /* Only 18 bits allowed before would overflow 'next_off' */
char string[1];
};
struct regnode_anyofhs { /* Constructed this way to keep the string aligned. */
- U8 str_len;
- U8 type;
- U16 next_off;
- union {
- U32 arg1u;
- I32 arg1i;
- struct {
- U16 arg1a;
- U16 arg1b;
- } hi_lo;
- } arg1;
+ union regnode_head head;
+ union regnode_arg arg1;
char string[1];
};
-/* Argument bearing node - workhorse, arg1u is often for the data field
- * Can store either a signed value via ARG1i() or unsigned 32 bit value
+/* Argument bearing node - workhorse, ARG1u() is often used for the data field
+ * Can store either a signed 32 bit value via ARG1i() or unsigned 32 bit value
* via ARG1u(), or two unsigned 16 bit values via ARG1a() or ARG1b()
*/
struct regnode_1 {
- U8 flags;
- U8 type;
- U16 next_off;
- union {
- U32 arg1u;
- I32 arg1i;
- struct {
- U16 arg1a;
- U16 arg1b;
- } hi_lo;
- } arg1;
+ union regnode_head head;
+ union regnode_arg arg1;
};
/* Node whose argument is 'SV *'. This needs to be used very carefully in
@@ -274,9 +282,7 @@ struct regnode_1 {
* then use inline functions to copy the data in or out.
* */
struct regnode_p {
- U8 flags;
- U8 type;
- U16 next_off;
+ union regnode_head head;
char arg1_sv_ptr_bytes[sizeof(SV *)];
};
@@ -285,25 +291,9 @@ struct regnode_p {
* Extra field can be accessed as (U32)ARG2u() (I32)ARG2i() or (U16)ARG2a()
* and (U16)ARG2b() */
struct regnode_2 {
- U8 flags;
- U8 type;
- U16 next_off;
- union {
- U32 arg1u;
- I32 arg1i;
- struct {
- U16 arg1a;
- U16 arg1b;
- } hi_lo;
- } arg1;
- union {
- U32 arg2u;
- I32 arg2i;
- struct {
- U16 arg2a;
- U16 arg2b;
- } hi_lo;
- } arg2;
+ union regnode_head head;
+ union regnode_arg arg1;
+ union regnode_arg arg2;
};
/* "Three Node" - similar to a regnode_2 but with space for an additional
@@ -315,33 +305,10 @@ struct regnode_2 {
* ARG3a() and ARG3b() which are used to store information about the number of
* parens before and inside the quantified expression. */
struct regnode_3 {
- U8 flags;
- U8 type;
- U16 next_off;
- union {
- I32 arg1i;
- U32 arg1u;
- struct {
- U16 arg1a;
- U16 arg1b;
- } hi_lo;
- } arg1;
- union {
- I32 arg2i;
- U32 arg2u;
- struct {
- U16 arg2a;
- U16 arg2b;
- } hi_lo;
- } arg2;
- union {
- struct {
- U16 arg3a;
- U16 arg3b;
- } hi_lo;
- I32 arg3i;
- U32 arg3u;
- } arg3;
+ union regnode_head head;
+ union regnode_arg arg1;
+ union regnode_arg arg2;
+ union regnode_arg arg3;
};
#define REGNODE_BBM_BITMAP_LEN \
@@ -352,9 +319,7 @@ struct regnode_3 {
* The array is a bitmap capable of representing any possible continuation
* byte. */
struct regnode_bbm {
- U8 first_byte;
- U8 type;
- U16 next_off;
+ union regnode_head head;
U8 bitmap[REGNODE_BBM_BITMAP_LEN];
};
@@ -370,36 +335,18 @@ struct regnode_bbm {
* the code that inserts and deletes regnodes. The basic single-argument
* regnode has a U32, which is what reganode() allocates as a unit. Therefore
* no field can require stricter alignment than U32. */
-
+
/* also used by trie */
struct regnode_charclass {
- U8 flags;
- U8 type;
- U16 next_off;
- union {
- I32 arg1i;
- U32 arg1u;
- struct {
- U16 arg1a;
- U16 arg1b;
- } hi_lo;
- } arg1;
+ union regnode_head head;
+ union regnode_arg arg1;
char bitmap[ANYOF_BITMAP_SIZE]; /* only compile-time */
};
/* has runtime (locale) \d, \w, ..., [:posix:] classes */
struct regnode_charclass_posixl {
- U8 flags; /* ANYOF_MATCHES_POSIXL bit must go here */
- U8 type;
- U16 next_off;
- union {
- I32 arg1i;
- U32 arg1u;
- struct {
- U16 arg1a;
- U16 arg1b;
- } hi_lo;
- } arg1;
+ union regnode_head head;
+ union regnode_arg arg1;
char bitmap[ANYOF_BITMAP_SIZE]; /* both compile-time ... */
U32 classflags; /* and run-time */
};
@@ -418,17 +365,8 @@ struct regnode_charclass_posixl {
* never a next node.
*/
struct regnode_ssc {
- U8 flags; /* ANYOF_MATCHES_POSIXL bit must go here */
- U8 type;
- U16 next_off;
- union {
- I32 arg1i;
- U32 arg1u;
- struct {
- U16 arg1a;
- U16 arg1b;
- } hi_lo;
- } arg1;
+ union regnode_head head;
+ union regnode_arg arg1;
char bitmap[ANYOF_BITMAP_SIZE]; /* both compile-time ... */
U32 classflags; /* ... and run-time */
@@ -524,11 +462,6 @@ struct regnode_ssc {
#define ARGp_SET(p, val) ARGp_SET_inline((p),(val))
-#undef NEXT_OFF
-#undef NODE_ALIGN
-
-#define NEXT_OFF(p) ((p)->next_off)
-#define NODE_ALIGN(node)
/* the following define was set to 0xde in 075abff3
* as part of some linting logic. I have set it to 0
* as otherwise in every place where we /might/ set flags
@@ -538,26 +471,33 @@ struct regnode_ssc {
* is changed from 0 then at the very least make sure
* that SBOL for /^/ sets the flags to 0 explicitly.
* -- Yves */
-#define NODE_ALIGN_FILL(node) ((node)->flags = 0)
+#define NODE_ALIGN(node)
#define SIZE_ALIGN NODE_ALIGN
#undef OP
#undef OPERAND
#undef STRING
+#undef NEXT_OFF
+#undef NODE_ALIGN
-#define OP(p) ((p)->type)
-#define FLAGS(p) ((p)->flags) /* Caution: Doesn't apply to all \
+#define NEXT_OFF(p) ((p)->head.data.next_off)
+#define OP(p) ((p)->head.data.type)
+#define STR_LEN_U8(p) ((p)->head.data.u_8.str_len_u8)
+#define FIRST_BYTE(p) ((p)->head.data.u_8.first_byte)
+#define FLAGS(p) ((p)->head.data.u_8.flags) /* Caution: Doesn't apply to all \
regnode types. For some, it's the \
character set of the regnode */
#define STR_LENs(p) (__ASSERT_(OP(p) != LEXACT && OP(p) != LEXACT_REQ8) \
- ((struct regnode_string *)p)->str_len_u8)
+ STR_LEN_U8((struct regnode_string *)p))
#define STRINGs(p) (__ASSERT_(OP(p) != LEXACT && OP(p) != LEXACT_REQ8) \
((struct regnode_string *)p)->string)
#define OPERANDs(p) STRINGs(p)
#define PARNO(p) ARG1u(p) /* APPLIES for OPEN and CLOSE only */
+#define NODE_ALIGN_FILL(node) (FLAGS(node) = 0)
+
/* Long strings. Currently limited to length 18 bits, which handles a 262000
* byte string. The limiting factor is the 16 bit 'next_off' field, which
* points to the next regnode, so the furthest away it can be is 2**16. On
@@ -591,7 +531,7 @@ struct regnode_ssc {
if (OP(p) == LEXACT || OP(p) == LEXACT_REQ8) \
((struct regnode_lstring *)(p))->str_len_u32 = (v); \
else \
- ((struct regnode_string *)(p))->str_len_u8 = (v); \
+ STR_LEN_U8((struct regnode_string *)(p)) = (v); \
} STMT_END
#define ANYOFR_BASE_BITS 20
@@ -603,18 +543,18 @@ struct regnode_ssc {
#define NODE_ALIGN(node)
#define ARGp_BYTES_LOC(p) (((struct regnode_p *)p)->arg1_sv_ptr_bytes)
-#define ARG1u_LOC(p) (((struct regnode_1 *)p)->arg1.arg1u)
-#define ARG1i_LOC(p) (((struct regnode_1 *)p)->arg1.arg1i)
-#define ARG1a_LOC(p) (((struct regnode_1 *)p)->arg1.hi_lo.arg1a)
-#define ARG1b_LOC(p) (((struct regnode_1 *)p)->arg1.hi_lo.arg1b)
-#define ARG2u_LOC(p) (((struct regnode_2 *)p)->arg2.arg2u)
-#define ARG2i_LOC(p) (((struct regnode_2 *)p)->arg2.arg2i)
-#define ARG2a_LOC(p) (((struct regnode_2 *)p)->arg2.hi_lo.arg2a)
-#define ARG2b_LOC(p) (((struct regnode_2 *)p)->arg2.hi_lo.arg2b)
-#define ARG3u_LOC(p) (((struct regnode_3 *)p)->arg3.arg3u)
-#define ARG3i_LOC(p) (((struct regnode_3 *)p)->arg3.arg3i)
-#define ARG3a_LOC(p) (((struct regnode_3 *)p)->arg3.hi_lo.arg3a)
-#define ARG3b_LOC(p) (((struct regnode_3 *)p)->arg3.hi_lo.arg3b)
+#define ARG1u_LOC(p) (((struct regnode_1 *)p)->arg1.u32)
+#define ARG1i_LOC(p) (((struct regnode_1 *)p)->arg1.i32)
+#define ARG1a_LOC(p) (((struct regnode_1 *)p)->arg1.hi_lo.u16a)
+#define ARG1b_LOC(p) (((struct regnode_1 *)p)->arg1.hi_lo.u16b)
+#define ARG2u_LOC(p) (((struct regnode_2 *)p)->arg2.u32)
+#define ARG2i_LOC(p) (((struct regnode_2 *)p)->arg2.i32)
+#define ARG2a_LOC(p) (((struct regnode_2 *)p)->arg2.hi_lo.u16a)
+#define ARG2b_LOC(p) (((struct regnode_2 *)p)->arg2.hi_lo.u16b)
+#define ARG3u_LOC(p) (((struct regnode_3 *)p)->arg3.u32)
+#define ARG3i_LOC(p) (((struct regnode_3 *)p)->arg3.i32)
+#define ARG3a_LOC(p) (((struct regnode_3 *)p)->arg3.hi_lo.u16a)
+#define ARG3b_LOC(p) (((struct regnode_3 *)p)->arg3.hi_lo.u16b)
/* These should no longer be used directly in most cases. Please use
* the REGNODE_AFTER() macros instead. */
@@ -1065,7 +1005,7 @@ ARGp_SET_inline(struct regnode *node, SV *ptr) {
#define BITMAP_BIT(c) (1U << ((c) & 7))
#define BITMAP_TEST(p, c) (BITMAP_BYTE(p, c) & BITMAP_BIT((U8)(c)))
-#define ANYOF_FLAGS(p) ((p)->flags)
+#define ANYOF_FLAGS(p) (FLAGS(p))
#define ANYOF_BIT(c) BITMAP_BIT(c)
diff --git a/regcomp_debug.c b/regcomp_debug.c
index 6ab276155c..93db7a89cf 100644
--- a/regcomp_debug.c
+++ b/regcomp_debug.c
@@ -438,7 +438,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
const reg_trie_data * const trie
= (reg_trie_data*)progi->data->data[!IS_TRIE_AC(op) ? n : ac->trie];
- Perl_sv_catpvf(aTHX_ sv, "-%s", REGNODE_NAME(o->flags));
+ Perl_sv_catpvf(aTHX_ sv, "-%s", REGNODE_NAME(FLAGS(o)));
DEBUG_TRIE_COMPILE_r({
if (trie->jump)
sv_catpvs(sv, "(JUMP)");
@@ -475,7 +475,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
if (ARG3u(o)) /* check both ARG3a and ARG3b at the same time */
Perl_sv_catpvf(aTHX_ sv, "<%d:%d>", ARG3a(o),ARG3b(o)); /* paren before, paren after */
if (op == CURLYM || op == CURLYN || op == CURLYX)
- Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
+ Perl_sv_catpvf(aTHX_ sv, "[%d]", FLAGS(o)); /* Parenth number */
Perl_sv_catpvf(aTHX_ sv, "{%u,", (unsigned) lo);
if (hi == REG_INFTY)
sv_catpvs(sv, "INFTY");
@@ -483,8 +483,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
Perl_sv_catpvf(aTHX_ sv, "%u", (unsigned) hi);
sv_catpvs(sv, "}");
}
- else if (k == WHILEM && o->flags) /* Ordinal/of */
- Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
+ else if (k == WHILEM && FLAGS(o)) /* Ordinal/of */
+ Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", FLAGS(o) & 0xf, FLAGS(o)>>4);
else if (k == REF || k == OPEN || k == CLOSE
|| k == GROUPP || op == ACCEPT)
{
@@ -586,7 +586,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
}
else if (k == LOGICAL)
/* 2: embedded, otherwise 1 */
- Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);
+ Perl_sv_catpvf(aTHX_ sv, "[%d]", FLAGS(o));
else if (k == ANYOF || k == ANYOFH || k == ANYOFR) {
U8 flags;
char * bitmap;
@@ -876,21 +876,21 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
sv_catpv(sv, bounds[FLAGS(o)]);
}
else if (k == BRANCHJ && (op == UNLESSM || op == IFMATCH)) {
- Perl_sv_catpvf(aTHX_ sv, "[%d", -(o->flags));
- if (o->next_off) {
- Perl_sv_catpvf(aTHX_ sv, "..-%d", o->flags - o->next_off);
+ Perl_sv_catpvf(aTHX_ sv, "[%d", -(FLAGS(o)));
+ if (NEXT_OFF(o)) {
+ Perl_sv_catpvf(aTHX_ sv, "..-%d", FLAGS(o) - NEXT_OFF(o));
}
Perl_sv_catpvf(aTHX_ sv, "]");
}
else if (op == SBOL)
- Perl_sv_catpvf(aTHX_ sv, " /%s/", o->flags ? "\\A" : "^");
+ Perl_sv_catpvf(aTHX_ sv, " /%s/", FLAGS(o) ? "\\A" : "^");
else if (op == EVAL) {
- if (o->flags & EVAL_OPTIMISTIC_FLAG)
+ if (FLAGS(o) & EVAL_OPTIMISTIC_FLAG)
Perl_sv_catpvf(aTHX_ sv, " optimistic");
}
/* add on the verb argument if there is one */
- if ( ( k == VERB || op == ACCEPT || op == OPFAIL ) && o->flags) {
+ if ( ( k == VERB || op == ACCEPT || op == OPFAIL ) && FLAGS(o)) {
if ( ARG1u(o) )
Perl_sv_catpvf(aTHX_ sv, ":%" SVf,
SVfARG((MUTABLE_SV(progi->data->data[ ARG1u( o ) ]))));
diff --git a/regcomp_study.c b/regcomp_study.c
index 81d55719df..db7ab3a409 100644
--- a/regcomp_study.c
+++ b/regcomp_study.c
@@ -2557,7 +2557,7 @@ Perl_study_chunk(pTHX_
goto optimize_curly_tail;
case CURLY:
if (stopparen>0 && (OP(scan)==CURLYN || OP(scan)==CURLYM)
- && (scan->flags == stopparen))
+ && (FLAGS(scan) == stopparen))
{
mincount = 1;
maxcount = 1;
@@ -2568,7 +2568,7 @@ Perl_study_chunk(pTHX_
next = regnext(scan);
if (OP(scan) == CURLYX) {
I32 lp = (data ? *(data->last_closep) : 0);
- scan->flags = ((lp <= (I32)U8_MAX) ? (U8)lp : U8_MAX);
+ FLAGS(scan) = ((lp <= (I32)U8_MAX) ? (U8)lp : U8_MAX);
}
scan = REGNODE_AFTER(scan);
next_is_eval = (OP(scan) == EVAL);
@@ -2729,7 +2729,7 @@ Perl_study_chunk(pTHX_
RExC_close_parens[PARNO(nxt1)] = REGNODE_OFFSET(nxt) + 2;
}
/* Now we know that nxt2 is the only contents: */
- oscan->flags = (U8)PARNO(nxt);
+ FLAGS(oscan) = (U8)PARNO(nxt);
OP(oscan) = CURLYN;
OP(nxt1) = NOTHING; /* was OPEN. */
@@ -2778,7 +2778,7 @@ Perl_study_chunk(pTHX_
/* note that we have changed the type of oscan to CURLYM here */
regnode *nxt1 = REGNODE_AFTER_type(oscan, tregnode_CURLYM); /* OPEN*/
- oscan->flags = (U8)PARNO(nxt);
+ FLAGS(oscan) = (U8)PARNO(nxt);
if (RExC_open_parens) {
/*open->CURLYM*/
RExC_open_parens[PARNO(nxt1)] = REGNODE_OFFSET(oscan);
@@ -2817,7 +2817,7 @@ Perl_study_chunk(pTHX_
depth+1, mutate_ok);
}
else
- oscan->flags = 0;
+ FLAGS(oscan) = 0;
}
else if ((OP(oscan) == CURLYX)
&& (flags & SCF_WHILEM_VISITED_POS)
@@ -2833,11 +2833,11 @@ Perl_study_chunk(pTHX_
if (OP(REGNODE_BEFORE(nxt)) == NOTHING) /* LONGJMP */
nxt += ARG1u(nxt);
nxt = REGNODE_BEFORE(nxt);
- if (nxt->flags & 0xf) {
+ if (FLAGS(nxt) & 0xf) {
/* we've already set whilem count on this node */
} else if (++data->whilem_c < 16) {
assert(data->whilem_c <= RExC_whilem_seen);
- nxt->flags = (U8)(data->whilem_c
+ FLAGS(nxt) = (U8)(data->whilem_c
| (RExC_whilem_seen << 4)); /* On WHILEM */
}
}
@@ -3223,7 +3223,7 @@ Perl_study_chunk(pTHX_
}
else if ( REGNODE_TYPE(OP(scan)) == BRANCHJ
/* Lookbehind, or need to calculate parens/evals/stclass: */
- && (scan->flags || data || (flags & SCF_DO_STCLASS))
+ && (FLAGS(scan) || data || (flags & SCF_DO_STCLASS))
&& (OP(scan) == IFMATCH || OP(scan) == UNLESSM))
{
if ( !PERL_ENABLE_POSITIVE_ASSERTION_STUDY
@@ -3260,7 +3260,7 @@ Perl_study_chunk(pTHX_
cur_last_close_op= *(data_fake.last_close_opp);
data_fake.pos_delta = delta;
- if ( flags & SCF_DO_STCLASS && !scan->flags
+ if ( flags & SCF_DO_STCLASS && !FLAGS(scan)
&& OP(scan) == IFMATCH ) { /* Lookahead */
ssc_init(pRExC_state, &intrnl);
data_fake.start_class = &intrnl;
@@ -3277,7 +3277,7 @@ Perl_study_chunk(pTHX_
recursed_depth, NULL, f, depth+1,
mutate_ok);
- if (scan->flags) {
+ if (FLAGS(scan)) {
if ( deltanext < 0
|| deltanext > (I32) U8_MAX
|| minnext > (I32)U8_MAX
@@ -3293,7 +3293,7 @@ Perl_study_chunk(pTHX_
* matches to avoid breakage for those not using this
* extension) */
if (deltanext) {
- scan->next_off = deltanext;
+ NEXT_OFF(scan) = deltanext;
if (
/* See a CLOSE op inside this lookbehind? */
cur_last_close_op != *(data_fake.last_close_opp)
@@ -3308,7 +3308,7 @@ Perl_study_chunk(pTHX_
is_positive ? "positive" : "negative");
}
}
- scan->flags = (U8)minnext + deltanext;
+ FLAGS(scan) = (U8)minnext + deltanext;
}
if (data) {
if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
@@ -3365,7 +3365,7 @@ Perl_study_chunk(pTHX_
StructCopy(data, &data_fake, scan_data_t);
if ((flags & SCF_DO_SUBSTR) && data->last_found) {
f |= SCF_DO_SUBSTR;
- if (scan->flags)
+ if (FLAGS(scan))
scan_commit(pRExC_state, &data_fake, minlenp, is_inf);
data_fake.last_found=newSVsv(data->last_found);
}
@@ -3380,7 +3380,7 @@ Perl_study_chunk(pTHX_
data_fake.pos_delta = delta;
if (is_inf)
data_fake.flags |= SF_IS_INF;
- if ( flags & SCF_DO_STCLASS && !scan->flags
+ if ( flags & SCF_DO_STCLASS && !FLAGS(scan)
&& OP(scan) == IFMATCH ) { /* Lookahead */
ssc_init(pRExC_state, &intrnl);
data_fake.start_class = &intrnl;
@@ -3396,7 +3396,7 @@ Perl_study_chunk(pTHX_
&deltanext, last, &data_fake,
stopparen, recursed_depth, NULL,
f, depth+1, mutate_ok);
- if (scan->flags) {
+ if (FLAGS(scan)) {
assert(0); /* This code has never been tested since this
is normally not compiled */
if ( deltanext < 0
@@ -3409,9 +3409,9 @@ Perl_study_chunk(pTHX_
}
if (deltanext) {
- scan->next_off = deltanext;
+ NEXT_OFF(scan) = deltanext;
}
- scan->flags = (U8)*minnextp + deltanext;
+ FLAGS(scan) = (U8)*minnextp + deltanext;
}
*minnextp += min;
@@ -3441,7 +3441,7 @@ Perl_study_chunk(pTHX_
data_fake.substrs[i].max_offset;
data->substrs[i].minlenp =
data_fake.substrs[i].minlenp;
- data->substrs[i].lookbehind += scan->flags;
+ data->substrs[i].lookbehind += FLAGS(scan);
}
}
}
@@ -3469,7 +3469,7 @@ Perl_study_chunk(pTHX_
}
}
else if (OP(scan) == EVAL) {
- if (data && !(scan->flags & EVAL_OPTIMISTIC_FLAG) )
+ if (data && !(FLAGS(scan) & EVAL_OPTIMISTIC_FLAG) )
data->flags |= SF_HAS_EVAL;
}
else if ( REGNODE_TYPE(OP(scan)) == ENDLIKE ) {
@@ -3496,7 +3496,7 @@ Perl_study_chunk(pTHX_
flags &= ~SCF_DO_SUBSTR;
}
}
- else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */
+ else if (OP(scan) == LOGICAL && FLAGS(scan) == 2) /* Embedded follows */
{
if (flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
diff --git a/regcomp_trie.c b/regcomp_trie.c
index 1096a25fcd..31b54ca936 100644
--- a/regcomp_trie.c
+++ b/regcomp_trie.c
@@ -1517,7 +1517,7 @@ Perl_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
OP( convert ) = TRIE;
/* store the type in the flags */
- convert->flags = nodetype;
+ FLAGS(convert) = nodetype;
DEBUG_r({
optimize = convert
+ NODE_STEP_REGNODE
diff --git a/regexec.c b/regexec.c
index 4bf61aa145..84ca8fd2dd 100644
--- a/regexec.c
+++ b/regexec.c
@@ -197,7 +197,7 @@ static const char non_utf8_target_but_utf8_required[]
/*
Search for mandatory following text node; for lookahead, the text must
- follow but for lookbehind (rn->flags != 0) we skip to the next step.
+ follow but for lookbehind (FLAGS(rn) != 0) we skip to the next step.
*/
#define FIND_NEXT_IMPT(rn) STMT_START { \
while (JUMPABLE(rn)) { \
@@ -207,7 +207,7 @@ static const char non_utf8_target_but_utf8_required[]
else if (type == PLUS) \
rn = REGNODE_AFTER_type(rn,tregnode_PLUS); \
else if (type == IFMATCH) \
- rn = (rn->flags == 0) ? REGNODE_AFTER_type(rn,tregnode_IFMATCH) : rn + ARG1u(rn); \
+ rn = (FLAGS(rn) == 0) ? REGNODE_AFTER_type(rn,tregnode_IFMATCH) : rn + ARG1u(rn); \
else rn += NEXT_OFF(rn); \
} \
} STMT_END
@@ -1781,15 +1781,15 @@ Perl_re_intuit_start(pTHX_
const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold, \
trie_utf8_exactfa_fold, trie_latin_utf8_exactfa_fold, \
trie_utf8l, trie_flu8, trie_flu8_latin } \
- trie_type = ((scan->flags == EXACT) \
+ trie_type = ((FLAGS(scan) == EXACT) \
? (utf8_target ? trie_utf8 : trie_plain) \
- : (scan->flags == EXACTL) \
+ : (FLAGS(scan) == EXACTL) \
? (utf8_target ? trie_utf8l : trie_plain) \
- : (scan->flags == EXACTFAA) \
+ : (FLAGS(scan) == EXACTFAA) \
? (utf8_target \
? trie_utf8_exactfa_fold \
: trie_latin_utf8_exactfa_fold) \
- : (scan->flags == EXACTFLU8 \
+ : (FLAGS(scan) == EXACTFLU8 \
? (utf8_target \
? trie_flu8 \
: trie_flu8_latin) \
@@ -6704,12 +6704,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG1u( scan ) + 1 ]);
U32 state = trie->startstate;
- if (scan->flags == EXACTL || scan->flags == EXACTFLU8) {
+ if (FLAGS(scan) == EXACTL || FLAGS(scan) == EXACTFLU8) {
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
if (utf8_target
&& ! NEXTCHR_IS_EOS
&& UTF8_IS_ABOVE_LATIN1(nextbyte)
- && scan->flags == EXACTL)
+ && FLAGS(scan) == EXACTL)
{
/* We only output for EXACTL, as we let the folder
* output this message for EXACTFLU8 to avoid
@@ -8081,7 +8081,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
locinput += ln;
}
ref_yes:
- if (scan->flags) { /* == VOLATILE_REF but only other value is 0 */
+ if (FLAGS(scan)) { /* == VOLATILE_REF but only other value is 0 */
ST.cp = regcppush(rex, ARG2u(scan) - 1, maxopenparen);
REGCP_SET(ST.lastcp);
PUSH_STATE_GOTO(REF_next, next, locinput, loceol,
@@ -8428,7 +8428,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
/* if we got here, it should be an engine which
* supports compiling code blocks and stuff */
assert(rex->engine && rex->engine->op_comp);
- assert(!(scan->flags & ~RXf_PMf_COMPILETIME));
+ assert(!(FLAGS(scan) & ~RXf_PMf_COMPILETIME));
re_sv = rex->engine->op_comp(aTHX_ &ret, 1, NULL,
rex->engine, NULL, NULL,
/* copy /msixn etc to inner pattern */
@@ -8632,7 +8632,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
case ACCEPT: /* (*ACCEPT) */
is_accepted = true;
- if (scan->flags)
+ if (FLAGS(scan))
sv_yes_mark = MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]);
utmp = ARG2u(scan);
@@ -8699,7 +8699,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
break;
case LOGICAL: /* modifier for EVAL and IFMATCH */
- logical = scan->flags & EVAL_FLAGS_MASK; /* reserve a bit for optimistic eval */
+ logical = FLAGS(scan) & EVAL_FLAGS_MASK; /* reserve a bit for optimistic eval */
break;
/*******************************************************************
@@ -8790,7 +8790,7 @@ NULL
case CURLYX: /* start of /A*B/ (for complex A) */
{
/* No need to save/restore up to this paren */
- I32 parenfloor = scan->flags;
+ I32 parenfloor = FLAGS(scan);
assert(next); /* keep Coverity happy */
if (OP(REGNODE_BEFORE(next)) == NOTHING) /* LONGJMP */
@@ -8905,20 +8905,20 @@ NULL
* op (string-length x #WHILEMs) times do we allocate the
* cache.
*
- * The top 4 bits of scan->flags byte say how many different
+ * The top 4 bits of FLAGS(scan) byte say how many different
* relevant CURLLYX/WHILEM op pairs there are, while the
* bottom 4-bits is the identifying index number of this
* WHILEM.
*/
- if (scan->flags) {
+ if (FLAGS(scan)) {
if (!reginfo->poscache_maxiter) {
/* start the countdown: Postpone detection until we
* know the match is not *that* much linear. */
reginfo->poscache_maxiter
= (reginfo->strend - reginfo->strbeg + 1)
- * (scan->flags>>4);
+ * (FLAGS(scan)>>4);
/* possible overflow for long strings and many CURLYX's */
if (reginfo->poscache_maxiter < 0)
reginfo->poscache_maxiter = I32_MAX;
@@ -8951,9 +8951,9 @@ NULL
SSize_t offset, mask;
reginfo->poscache_iter = -1; /* stop eventual underflow */
- offset = (scan->flags & 0xf) - 1
+ offset = (FLAGS(scan) & 0xf) - 1
+ (locinput - reginfo->strbeg)
- * (scan->flags>>4);
+ * (FLAGS(scan)>>4);
mask = 1 << (offset % 8);
offset /= 8;
if (reginfo->info_aux->poscache[offset] & mask) {
@@ -9089,7 +9089,7 @@ NULL
NOT_REACHED; /* NOTREACHED */
case CUTGROUP: /* /(*THEN)/ */
- sv_yes_mark = st->u.mark.mark_name = scan->flags
+ sv_yes_mark = st->u.mark.mark_name = FLAGS(scan)
? MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ])
: NULL;
PUSH_STATE_GOTO(CUTGROUP_next, next, locinput, loceol,
@@ -9156,8 +9156,8 @@ NULL
ST.lastcloseparen = RXp_LASTCLOSEPAREN(rex);
/* if paren positive, emulate an OPEN/CLOSE around A */
- if (ST.me->flags) {
- U32 paren = ST.me->flags;
+ if (FLAGS(ST.me)) {
+ U32 paren = FLAGS(ST.me);
lastopen = paren;
if (paren > maxopenparen)
maxopenparen = paren;
@@ -9202,15 +9202,15 @@ NULL
depth, (IV) ST.count, (IV)ST.alen)
);
- if (ST.me->flags) {
+ if (FLAGS(ST.me)) {
/* emulate CLOSE: mark current A as captured */
- U32 paren = (U32)ST.me->flags;
+ U32 paren = (U32)FLAGS(ST.me);
CLOSE_CAPTURE(rex, paren,
HOPc(locinput, -ST.alen) - reginfo->strbeg,
locinput - reginfo->strbeg);
}
- if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags))
+ if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)FLAGS(ST.me)))
goto fake_end;
@@ -9226,7 +9226,7 @@ NULL
if (ST.minmod || ST.count < ARG1i(ST.me) /* min*/
- || EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags))
+ || EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)FLAGS(ST.me)))
sayNO;
curlym_do_B: /* execute the B in /A{m,n}B/ */
@@ -9275,9 +9275,9 @@ NULL
}
curlym_close_B:
- if (ST.me->flags) {
+ if (FLAGS(ST.me)) {
/* emulate CLOSE: mark current A as captured */
- U32 paren = (U32)ST.me->flags;
+ U32 paren = (U32)FLAGS(ST.me);
if (ST.count || is_accepted) {
CLOSE_CAPTURE(rex, paren,
HOPc(locinput, -ST.alen) - reginfo->strbeg,
@@ -9286,7 +9286,7 @@ NULL
else
RXp_OFFSp(rex)[paren].end = -1;
- if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags))
+ if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)FLAGS(ST.me)))
{
if (ST.count || is_accepted)
goto fake_end;
@@ -9349,7 +9349,7 @@ NULL
goto repeat;
case CURLYN: /* /(A){m,n}B/ where A is width 1 char */
- ST.paren = scan->flags; /* Which paren to set */
+ ST.paren = FLAGS(scan); /* Which paren to set */
ST.lastparen = RXp_LASTPAREN(rex);
ST.lastcloseparen = RXp_LASTCLOSEPAREN(rex);
if (ST.paren > maxopenparen)
@@ -9738,10 +9738,10 @@ NULL
ST.wanted = 1;
ifmatch_trivial_fail_test:
ST.prev_match_end= match_end;
- ST.count = scan->next_off + 1; /* next_off repurposed to be
+ ST.count = NEXT_OFF(scan) + 1; /* next_off repurposed to be
lookbehind count, requires
non-zero flags */
- if (! scan->flags) { /* 'flags' zero means lookahed */
+ if (! FLAGS(scan)) { /* 'flags' zero means lookahed */
/* Lookahead starts here and ends at the normal place */
ST.start = locinput;
@@ -9749,7 +9749,7 @@ NULL
match_end = NULL;
}
else {
- PERL_UINT_FAST8_T back_count = scan->flags;
+ PERL_UINT_FAST8_T back_count = FLAGS(scan);
char * s;
match_end = locinput;
@@ -9847,7 +9847,7 @@ NULL
/* FALLTHROUGH */
case PRUNE: /* (*PRUNE) */
- if (scan->flags)
+ if (FLAGS(scan))
sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]);
PUSH_STATE_GOTO(COMMIT_next, next, locinput, loceol,
script_run_begin);
@@ -9860,7 +9860,7 @@ NULL
NOT_REACHED; /* NOTREACHED */
case OPFAIL: /* (*FAIL) */
- if (scan->flags)
+ if (FLAGS(scan))
sv_commit = MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]);
if (logical) {
/* deal with (?(?!)X|Y) properly,
@@ -9910,7 +9910,7 @@ NULL
NOT_REACHED; /* NOTREACHED */
case SKIP: /* (*SKIP) */
- if (!scan->flags) {
+ if (!FLAGS(scan)) {
/* (*SKIP) : if we fail we cut here*/
ST.mark_name = NULL;
ST.mark_loc = locinput;
diff --git a/regexp.h b/regexp.h
index d3269ad18c..243cf246c6 100644
--- a/regexp.h
+++ b/regexp.h
@@ -29,10 +29,23 @@ struct regnode_meta {
U8 off_by_arg;
};
+/* this ensures that on alignment sensitive platforms
+ * this struct is aligned on 32 bit boundaries */
+union regnode_head {
+ struct {
+ union {
+ U8 flags;
+ U8 str_len_u8;
+ U8 first_byte;
+ } u_8;
+ U8 type;
+ U16 next_off;
+ } data;
+ U32 data_u32;
+};
+
struct regnode {
- U8 flags;
- U8 type;
- U16 next_off;
+ union regnode_head head;
};
typedef struct regnode regnode;