summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regcomp.c14
-rw-r--r--regcomp.h232
-rw-r--r--regcomp_debug.c22
-rw-r--r--regcomp_study.c40
-rw-r--r--regcomp_trie.c2
-rw-r--r--regexec.c70
-rw-r--r--regexp.h19
7 files changed, 176 insertions, 223 deletions
diff --git a/regcomp.c b/regcomp.c
index 33b247bcd5..5d30cbbbb4 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1837,7 +1837,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
/* An OR of *one* alternative - should not happen now. */
(OP(first) == BRANCH && OP(first_next) != BRANCH) ||
/* for now we can't handle lookbehind IFMATCH*/
- (OP(first) == IFMATCH && !first->flags && (sawlookahead = 1)) ||
+ (OP(first) == IFMATCH && !FLAGS(first) && (sawlookahead = 1)) ||
(OP(first) == PLUS) ||
(OP(first) == MINMOD) ||
/* An {n,m} with n>0 */
@@ -2220,7 +2220,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
*/
if (REGNODE_TYPE(fop) == NOTHING && nop == END)
RExC_rx->extflags |= RXf_NULL;
- else if ((fop == MBOL || (fop == SBOL && !first->flags)) && nop == END)
+ else if ((fop == MBOL || (fop == SBOL && !FLAGS(first))) && nop == END)
/* when fop is SBOL first->flags will be true only when it was
* produced by parsing /\A/, and not when parsing /^/. This is
* very important for the split code as there we want to
@@ -2766,7 +2766,7 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
: REFFN),
num, RExC_nestroot);
if (RExC_nestroot && num >= (U32)RExC_nestroot)
- REGNODE_p(ret)->flags = VOLATILE_REF;
+ FLAGS(REGNODE_p(ret)) = VOLATILE_REF;
*flagp |= HASWIDTH;
nextchar(pRExC_state);
@@ -6045,7 +6045,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
: REFF),
num, RExC_nestroot);
if (RExC_nestroot && num >= RExC_nestroot)
- REGNODE_p(ret)->flags = VOLATILE_REF;
+ FLAGS(REGNODE_p(ret)) = VOLATILE_REF;
if (OP(REGNODE_p(ret)) == REFF) {
RExC_seen_d_op = TRUE;
}
@@ -12024,7 +12024,7 @@ S_optimize_regclass(pTHX_
op = ANYOFHbbm;
*ret = REGNODE_GUTS(pRExC_state, op, REGNODE_ARG_LEN(op));
FILL_NODE(*ret, op);
- ((struct regnode_bbm *) REGNODE_p(*ret))->first_byte = low_utf8[0],
+ FIRST_BYTE((struct regnode_bbm *) REGNODE_p(*ret)) = low_utf8[0],
/* The 64 bit (or 32 on EBCCDIC) map can be looked up
* directly based on the continuation byte, without
@@ -12050,7 +12050,7 @@ S_optimize_regclass(pTHX_
*ret = REGNODE_GUTS(pRExC_state, op,
REGNODE_ARG_LEN(op) + STR_SZ(len));
FILL_NODE(*ret, op);
- ((struct regnode_anyofhs *) REGNODE_p(*ret))->str_len
+ STR_LEN_U8((struct regnode_anyofhs *) REGNODE_p(*ret))
= len;
Copy(low_utf8, /* Add the common bytes */
((struct regnode_anyofhs *) REGNODE_p(*ret))->string,
@@ -13044,7 +13044,7 @@ Perl_get_ANYOFHbbm_contents(pTHX_ const regnode * n) {
&cp_list,
/* The base cp is from the start byte plus a zero continuation */
- TWO_BYTE_UTF8_TO_NATIVE(((struct regnode_bbm *) n)->first_byte,
+ TWO_BYTE_UTF8_TO_NATIVE(FIRST_BYTE((struct regnode_bbm *) n),
UTF_CONTINUATION_MARK | 0));
return cp_list;
}
diff --git a/regcomp.h b/regcomp.h
index 068de74cd6..b40a2f070c 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -178,16 +178,36 @@ typedef struct regexp_internal {
* change things without care. If you look at regexp.h you will see it
* contains this:
*
+ * union regnode_head {
+ * struct {
+ * union {
+ * U8 flags;
+ * U8 str_len_u8;
+ * U8 first_byte;
+ * } u_8;
+ * U8 type;
+ * U16 next_off;
+ * } data;
+ * U32 data_u32;
+ * };
+ *
* struct regnode {
- * U8 flags;
- * U8 type;
- * U16 next_off;
+ * union regnode_head head;
* };
*
- * This structure is the base unit of elements in the regexp program. When
- * we increment our way through the program we increment by the size of this
- * structure, and in all cases where regnode sizing is considered it is in
- * units of this structure.
+ * Which really is a complicated and alignment friendly version of
+ *
+ * struct {
+ * U8 flags;
+ * U8 type;
+ * U16 next_off;
+ * };
+ *
+ * This structure is the base unit of elements in the regexp program.
+ * When we increment our way through the program we increment by the
+ * size of this structure (32 bits), and in all cases where regnode
+ * sizing is considered it is in units of this structure. All regnodes
+ * have a union regnode_head as their first parameter.
*
* This implies that no regnode style structure should contain 64 bit
* aligned members. Since the base regnode is 32 bits any member might
@@ -210,52 +230,40 @@ typedef struct regexp_internal {
* we already have support for in the data array.
*/
+union regnode_arg {
+ I32 i32;
+ U32 u32;
+ struct {
+ U16 u16a;
+ U16 u16b;
+ } hi_lo;
+};
+
+
struct regnode_string {
- U8 str_len_u8;
- U8 type;
- U16 next_off;
+ union regnode_head head;
char string[1];
};
struct regnode_lstring { /* Constructed this way to keep the string aligned. */
- U8 flags;
- U8 type;
- U16 next_off;
+ union regnode_head head;
U32 str_len_u32; /* Only 18 bits allowed before would overflow 'next_off' */
char string[1];
};
struct regnode_anyofhs { /* Constructed this way to keep the string aligned. */
- U8 str_len;
- U8 type;
- U16 next_off;
- union {
- U32 arg1u;
- I32 arg1i;
- struct {
- U16 arg1a;
- U16 arg1b;
- } hi_lo;
- } arg1;
+ union regnode_head head;
+ union regnode_arg arg1;
char string[1];
};
-/* Argument bearing node - workhorse, arg1u is often for the data field
- * Can store either a signed value via ARG1i() or unsigned 32 bit value
+/* Argument bearing node - workhorse, ARG1u() is often used for the data field
+ * Can store either a signed 32 bit value via ARG1i() or unsigned 32 bit value
* via ARG1u(), or two unsigned 16 bit values via ARG1a() or ARG1b()
*/
struct regnode_1 {
- U8 flags;
- U8 type;
- U16 next_off;
- union {
- U32 arg1u;
- I32 arg1i;
- struct {
- U16 arg1a;
- U16 arg1b;
- } hi_lo;
- } arg1;
+ union regnode_head head;
+ union regnode_arg arg1;
};
/* Node whose argument is 'SV *'. This needs to be used very carefully in
@@ -274,9 +282,7 @@ struct regnode_1 {
* then use inline functions to copy the data in or out.
* */
struct regnode_p {
- U8 flags;
- U8 type;
- U16 next_off;
+ union regnode_head head;
char arg1_sv_ptr_bytes[sizeof(SV *)];
};
@@ -285,25 +291,9 @@ struct regnode_p {
* Extra field can be accessed as (U32)ARG2u() (I32)ARG2i() or (U16)ARG2a()
* and (U16)ARG2b() */
struct regnode_2 {
- U8 flags;
- U8 type;
- U16 next_off;
- union {
- U32 arg1u;
- I32 arg1i;
- struct {
- U16 arg1a;
- U16 arg1b;
- } hi_lo;
- } arg1;
- union {
- U32 arg2u;
- I32 arg2i;
- struct {
- U16 arg2a;
- U16 arg2b;
- } hi_lo;
- } arg2;
+ union regnode_head head;
+ union regnode_arg arg1;
+ union regnode_arg arg2;
};
/* "Three Node" - similar to a regnode_2 but with space for an additional
@@ -315,33 +305,10 @@ struct regnode_2 {
* ARG3a() and ARG3b() which are used to store information about the number of
* parens before and inside the quantified expression. */
struct regnode_3 {
- U8 flags;
- U8 type;
- U16 next_off;
- union {
- I32 arg1i;
- U32 arg1u;
- struct {
- U16 arg1a;
- U16 arg1b;
- } hi_lo;
- } arg1;
- union {
- I32 arg2i;
- U32 arg2u;
- struct {
- U16 arg2a;
- U16 arg2b;
- } hi_lo;
- } arg2;
- union {
- struct {
- U16 arg3a;
- U16 arg3b;
- } hi_lo;
- I32 arg3i;
- U32 arg3u;
- } arg3;
+ union regnode_head head;
+ union regnode_arg arg1;
+ union regnode_arg arg2;
+ union regnode_arg arg3;
};
#define REGNODE_BBM_BITMAP_LEN \
@@ -352,9 +319,7 @@ struct regnode_3 {
* The array is a bitmap capable of representing any possible continuation
* byte. */
struct regnode_bbm {
- U8 first_byte;
- U8 type;
- U16 next_off;
+ union regnode_head head;
U8 bitmap[REGNODE_BBM_BITMAP_LEN];
};
@@ -370,36 +335,18 @@ struct regnode_bbm {
* the code that inserts and deletes regnodes. The basic single-argument
* regnode has a U32, which is what reganode() allocates as a unit. Therefore
* no field can require stricter alignment than U32. */
-
+
/* also used by trie */
struct regnode_charclass {
- U8 flags;
- U8 type;
- U16 next_off;
- union {
- I32 arg1i;
- U32 arg1u;
- struct {
- U16 arg1a;
- U16 arg1b;
- } hi_lo;
- } arg1;
+ union regnode_head head;
+ union regnode_arg arg1;
char bitmap[ANYOF_BITMAP_SIZE]; /* only compile-time */
};
/* has runtime (locale) \d, \w, ..., [:posix:] classes */
struct regnode_charclass_posixl {
- U8 flags; /* ANYOF_MATCHES_POSIXL bit must go here */
- U8 type;
- U16 next_off;
- union {
- I32 arg1i;
- U32 arg1u;
- struct {
- U16 arg1a;
- U16 arg1b;
- } hi_lo;
- } arg1;
+ union regnode_head head;
+ union regnode_arg arg1;
char bitmap[ANYOF_BITMAP_SIZE]; /* both compile-time ... */
U32 classflags; /* and run-time */
};
@@ -418,17 +365,8 @@ struct regnode_charclass_posixl {
* never a next node.
*/
struct regnode_ssc {
- U8 flags; /* ANYOF_MATCHES_POSIXL bit must go here */
- U8 type;
- U16 next_off;
- union {
- I32 arg1i;
- U32 arg1u;
- struct {
- U16 arg1a;
- U16 arg1b;
- } hi_lo;
- } arg1;
+ union regnode_head head;
+ union regnode_arg arg1;
char bitmap[ANYOF_BITMAP_SIZE]; /* both compile-time ... */
U32 classflags; /* ... and run-time */
@@ -524,11 +462,6 @@ struct regnode_ssc {
#define ARGp_SET(p, val) ARGp_SET_inline((p),(val))
-#undef NEXT_OFF
-#undef NODE_ALIGN
-
-#define NEXT_OFF(p) ((p)->next_off)
-#define NODE_ALIGN(node)
/* the following define was set to 0xde in 075abff3
* as part of some linting logic. I have set it to 0
* as otherwise in every place where we /might/ set flags
@@ -538,26 +471,33 @@ struct regnode_ssc {
* is changed from 0 then at the very least make sure
* that SBOL for /^/ sets the flags to 0 explicitly.
* -- Yves */
-#define NODE_ALIGN_FILL(node) ((node)->flags = 0)
+#define NODE_ALIGN(node)
#define SIZE_ALIGN NODE_ALIGN
#undef OP
#undef OPERAND
#undef STRING
+#undef NEXT_OFF
+#undef NODE_ALIGN
-#define OP(p) ((p)->type)
-#define FLAGS(p) ((p)->flags) /* Caution: Doesn't apply to all \
+#define NEXT_OFF(p) ((p)->head.data.next_off)
+#define OP(p) ((p)->head.data.type)
+#define STR_LEN_U8(p) ((p)->head.data.u_8.str_len_u8)
+#define FIRST_BYTE(p) ((p)->head.data.u_8.first_byte)
+#define FLAGS(p) ((p)->head.data.u_8.flags) /* Caution: Doesn't apply to all \
regnode types. For some, it's the \
character set of the regnode */
#define STR_LENs(p) (__ASSERT_(OP(p) != LEXACT && OP(p) != LEXACT_REQ8) \
- ((struct regnode_string *)p)->str_len_u8)
+ STR_LEN_U8((struct regnode_string *)p))
#define STRINGs(p) (__ASSERT_(OP(p) != LEXACT && OP(p) != LEXACT_REQ8) \
((struct regnode_string *)p)->string)
#define OPERANDs(p) STRINGs(p)
#define PARNO(p) ARG1u(p) /* APPLIES for OPEN and CLOSE only */
+#define NODE_ALIGN_FILL(node) (FLAGS(node) = 0)
+
/* Long strings. Currently limited to length 18 bits, which handles a 262000
* byte string. The limiting factor is the 16 bit 'next_off' field, which
* points to the next regnode, so the furthest away it can be is 2**16. On
@@ -591,7 +531,7 @@ struct regnode_ssc {
if (OP(p) == LEXACT || OP(p) == LEXACT_REQ8) \
((struct regnode_lstring *)(p))->str_len_u32 = (v); \
else \
- ((struct regnode_string *)(p))->str_len_u8 = (v); \
+ STR_LEN_U8((struct regnode_string *)(p)) = (v); \
} STMT_END
#define ANYOFR_BASE_BITS 20
@@ -603,18 +543,18 @@ struct regnode_ssc {
#define NODE_ALIGN(node)
#define ARGp_BYTES_LOC(p) (((struct regnode_p *)p)->arg1_sv_ptr_bytes)
-#define ARG1u_LOC(p) (((struct regnode_1 *)p)->arg1.arg1u)
-#define ARG1i_LOC(p) (((struct regnode_1 *)p)->arg1.arg1i)
-#define ARG1a_LOC(p) (((struct regnode_1 *)p)->arg1.hi_lo.arg1a)
-#define ARG1b_LOC(p) (((struct regnode_1 *)p)->arg1.hi_lo.arg1b)
-#define ARG2u_LOC(p) (((struct regnode_2 *)p)->arg2.arg2u)
-#define ARG2i_LOC(p) (((struct regnode_2 *)p)->arg2.arg2i)
-#define ARG2a_LOC(p) (((struct regnode_2 *)p)->arg2.hi_lo.arg2a)
-#define ARG2b_LOC(p) (((struct regnode_2 *)p)->arg2.hi_lo.arg2b)
-#define ARG3u_LOC(p) (((struct regnode_3 *)p)->arg3.arg3u)
-#define ARG3i_LOC(p) (((struct regnode_3 *)p)->arg3.arg3i)
-#define ARG3a_LOC(p) (((struct regnode_3 *)p)->arg3.hi_lo.arg3a)
-#define ARG3b_LOC(p) (((struct regnode_3 *)p)->arg3.hi_lo.arg3b)
+#define ARG1u_LOC(p) (((struct regnode_1 *)p)->arg1.u32)
+#define ARG1i_LOC(p) (((struct regnode_1 *)p)->arg1.i32)
+#define ARG1a_LOC(p) (((struct regnode_1 *)p)->arg1.hi_lo.u16a)
+#define ARG1b_LOC(p) (((struct regnode_1 *)p)->arg1.hi_lo.u16b)
+#define ARG2u_LOC(p) (((struct regnode_2 *)p)->arg2.u32)
+#define ARG2i_LOC(p) (((struct regnode_2 *)p)->arg2.i32)
+#define ARG2a_LOC(p) (((struct regnode_2 *)p)->arg2.hi_lo.u16a)
+#define ARG2b_LOC(p) (((struct regnode_2 *)p)->arg2.hi_lo.u16b)
+#define ARG3u_LOC(p) (((struct regnode_3 *)p)->arg3.u32)
+#define ARG3i_LOC(p) (((struct regnode_3 *)p)->arg3.i32)
+#define ARG3a_LOC(p) (((struct regnode_3 *)p)->arg3.hi_lo.u16a)
+#define ARG3b_LOC(p) (((struct regnode_3 *)p)->arg3.hi_lo.u16b)
/* These should no longer be used directly in most cases. Please use
* the REGNODE_AFTER() macros instead. */
@@ -1065,7 +1005,7 @@ ARGp_SET_inline(struct regnode *node, SV *ptr) {
#define BITMAP_BIT(c) (1U << ((c) & 7))
#define BITMAP_TEST(p, c) (BITMAP_BYTE(p, c) & BITMAP_BIT((U8)(c)))
-#define ANYOF_FLAGS(p) ((p)->flags)
+#define ANYOF_FLAGS(p) (FLAGS(p))
#define ANYOF_BIT(c) BITMAP_BIT(c)
diff --git a/regcomp_debug.c b/regcomp_debug.c
index 6ab276155c..93db7a89cf 100644
--- a/regcomp_debug.c
+++ b/regcomp_debug.c
@@ -438,7 +438,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
const reg_trie_data * const trie
= (reg_trie_data*)progi->data->data[!IS_TRIE_AC(op) ? n : ac->trie];
- Perl_sv_catpvf(aTHX_ sv, "-%s", REGNODE_NAME(o->flags));
+ Perl_sv_catpvf(aTHX_ sv, "-%s", REGNODE_NAME(FLAGS(o)));
DEBUG_TRIE_COMPILE_r({
if (trie->jump)
sv_catpvs(sv, "(JUMP)");
@@ -475,7 +475,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
if (ARG3u(o)) /* check both ARG3a and ARG3b at the same time */
Perl_sv_catpvf(aTHX_ sv, "<%d:%d>", ARG3a(o),ARG3b(o)); /* paren before, paren after */
if (op == CURLYM || op == CURLYN || op == CURLYX)
- Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
+ Perl_sv_catpvf(aTHX_ sv, "[%d]", FLAGS(o)); /* Parenth number */
Perl_sv_catpvf(aTHX_ sv, "{%u,", (unsigned) lo);
if (hi == REG_INFTY)
sv_catpvs(sv, "INFTY");
@@ -483,8 +483,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
Perl_sv_catpvf(aTHX_ sv, "%u", (unsigned) hi);
sv_catpvs(sv, "}");
}
- else if (k == WHILEM && o->flags) /* Ordinal/of */
- Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
+ else if (k == WHILEM && FLAGS(o)) /* Ordinal/of */
+ Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", FLAGS(o) & 0xf, FLAGS(o)>>4);
else if (k == REF || k == OPEN || k == CLOSE
|| k == GROUPP || op == ACCEPT)
{
@@ -586,7 +586,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
}
else if (k == LOGICAL)
/* 2: embedded, otherwise 1 */
- Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);
+ Perl_sv_catpvf(aTHX_ sv, "[%d]", FLAGS(o));
else if (k == ANYOF || k == ANYOFH || k == ANYOFR) {
U8 flags;
char * bitmap;
@@ -876,21 +876,21 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
sv_catpv(sv, bounds[FLAGS(o)]);
}
else if (k == BRANCHJ && (op == UNLESSM || op == IFMATCH)) {
- Perl_sv_catpvf(aTHX_ sv, "[%d", -(o->flags));
- if (o->next_off) {
- Perl_sv_catpvf(aTHX_ sv, "..-%d", o->flags - o->next_off);
+ Perl_sv_catpvf(aTHX_ sv, "[%d", -(FLAGS(o)));
+ if (NEXT_OFF(o)) {
+ Perl_sv_catpvf(aTHX_ sv, "..-%d", FLAGS(o) - NEXT_OFF(o));
}
Perl_sv_catpvf(aTHX_ sv, "]");
}
else if (op == SBOL)
- Perl_sv_catpvf(aTHX_ sv, " /%s/", o->flags ? "\\A" : "^");
+ Perl_sv_catpvf(aTHX_ sv, " /%s/", FLAGS(o) ? "\\A" : "^");
else if (op == EVAL) {
- if (o->flags & EVAL_OPTIMISTIC_FLAG)
+ if (FLAGS(o) & EVAL_OPTIMISTIC_FLAG)
Perl_sv_catpvf(aTHX_ sv, " optimistic");
}
/* add on the verb argument if there is one */
- if ( ( k == VERB || op == ACCEPT || op == OPFAIL ) && o->flags) {
+ if ( ( k == VERB || op == ACCEPT || op == OPFAIL ) && FLAGS(o)) {
if ( ARG1u(o) )
Perl_sv_catpvf(aTHX_ sv, ":%" SVf,
SVfARG((MUTABLE_SV(progi->data->data[ ARG1u( o ) ]))));
diff --git a/regcomp_study.c b/regcomp_study.c
index 81d55719df..db7ab3a409 100644
--- a/regcomp_study.c
+++ b/regcomp_study.c
@@ -2557,7 +2557,7 @@ Perl_study_chunk(pTHX_
goto optimize_curly_tail;
case CURLY:
if (stopparen>0 && (OP(scan)==CURLYN || OP(scan)==CURLYM)
- && (scan->flags == stopparen))
+ && (FLAGS(scan) == stopparen))
{
mincount = 1;
maxcount = 1;
@@ -2568,7 +2568,7 @@ Perl_study_chunk(pTHX_
next = regnext(scan);
if (OP(scan) == CURLYX) {
I32 lp = (data ? *(data->last_closep) : 0);
- scan->flags = ((lp <= (I32)U8_MAX) ? (U8)lp : U8_MAX);
+ FLAGS(scan) = ((lp <= (I32)U8_MAX) ? (U8)lp : U8_MAX);
}
scan = REGNODE_AFTER(scan);
next_is_eval = (OP(scan) == EVAL);
@@ -2729,7 +2729,7 @@ Perl_study_chunk(pTHX_
RExC_close_parens[PARNO(nxt1)] = REGNODE_OFFSET(nxt) + 2;
}
/* Now we know that nxt2 is the only contents: */
- oscan->flags = (U8)PARNO(nxt);
+ FLAGS(oscan) = (U8)PARNO(nxt);
OP(oscan) = CURLYN;
OP(nxt1) = NOTHING; /* was OPEN. */
@@ -2778,7 +2778,7 @@ Perl_study_chunk(pTHX_
/* note that we have changed the type of oscan to CURLYM here */
regnode *nxt1 = REGNODE_AFTER_type(oscan, tregnode_CURLYM); /* OPEN*/
- oscan->flags = (U8)PARNO(nxt);
+ FLAGS(oscan) = (U8)PARNO(nxt);
if (RExC_open_parens) {
/*open->CURLYM*/
RExC_open_parens[PARNO(nxt1)] = REGNODE_OFFSET(oscan);
@@ -2817,7 +2817,7 @@ Perl_study_chunk(pTHX_
depth+1, mutate_ok);
}
else
- oscan->flags = 0;
+ FLAGS(oscan) = 0;
}
else if ((OP(oscan) == CURLYX)
&& (flags & SCF_WHILEM_VISITED_POS)
@@ -2833,11 +2833,11 @@ Perl_study_chunk(pTHX_
if (OP(REGNODE_BEFORE(nxt)) == NOTHING) /* LONGJMP */
nxt += ARG1u(nxt);
nxt = REGNODE_BEFORE(nxt);
- if (nxt->flags & 0xf) {
+ if (FLAGS(nxt) & 0xf) {
/* we've already set whilem count on this node */
} else if (++data->whilem_c < 16) {
assert(data->whilem_c <= RExC_whilem_seen);
- nxt->flags = (U8)(data->whilem_c
+ FLAGS(nxt) = (U8)(data->whilem_c
| (RExC_whilem_seen << 4)); /* On WHILEM */
}
}
@@ -3223,7 +3223,7 @@ Perl_study_chunk(pTHX_
}
else if ( REGNODE_TYPE(OP(scan)) == BRANCHJ
/* Lookbehind, or need to calculate parens/evals/stclass: */
- && (scan->flags || data || (flags & SCF_DO_STCLASS))
+ && (FLAGS(scan) || data || (flags & SCF_DO_STCLASS))
&& (OP(scan) == IFMATCH || OP(scan) == UNLESSM))
{
if ( !PERL_ENABLE_POSITIVE_ASSERTION_STUDY
@@ -3260,7 +3260,7 @@ Perl_study_chunk(pTHX_
cur_last_close_op= *(data_fake.last_close_opp);
data_fake.pos_delta = delta;
- if ( flags & SCF_DO_STCLASS && !scan->flags
+ if ( flags & SCF_DO_STCLASS && !FLAGS(scan)
&& OP(scan) == IFMATCH ) { /* Lookahead */
ssc_init(pRExC_state, &intrnl);
data_fake.start_class = &intrnl;
@@ -3277,7 +3277,7 @@ Perl_study_chunk(pTHX_
recursed_depth, NULL, f, depth+1,
mutate_ok);
- if (scan->flags) {
+ if (FLAGS(scan)) {
if ( deltanext < 0
|| deltanext > (I32) U8_MAX
|| minnext > (I32)U8_MAX
@@ -3293,7 +3293,7 @@ Perl_study_chunk(pTHX_
* matches to avoid breakage for those not using this
* extension) */
if (deltanext) {
- scan->next_off = deltanext;
+ NEXT_OFF(scan) = deltanext;
if (
/* See a CLOSE op inside this lookbehind? */
cur_last_close_op != *(data_fake.last_close_opp)
@@ -3308,7 +3308,7 @@ Perl_study_chunk(pTHX_
is_positive ? "positive" : "negative");
}
}
- scan->flags = (U8)minnext + deltanext;
+ FLAGS(scan) = (U8)minnext + deltanext;
}
if (data) {
if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
@@ -3365,7 +3365,7 @@ Perl_study_chunk(pTHX_
StructCopy(data, &data_fake, scan_data_t);
if ((flags & SCF_DO_SUBSTR) && data->last_found) {
f |= SCF_DO_SUBSTR;
- if (scan->flags)
+ if (FLAGS(scan))
scan_commit(pRExC_state, &data_fake, minlenp, is_inf);
data_fake.last_found=newSVsv(data->last_found);
}
@@ -3380,7 +3380,7 @@ Perl_study_chunk(pTHX_
data_fake.pos_delta = delta;
if (is_inf)
data_fake.flags |= SF_IS_INF;
- if ( flags & SCF_DO_STCLASS && !scan->flags
+ if ( flags & SCF_DO_STCLASS && !FLAGS(scan)
&& OP(scan) == IFMATCH ) { /* Lookahead */
ssc_init(pRExC_state, &intrnl);
data_fake.start_class = &intrnl;
@@ -3396,7 +3396,7 @@ Perl_study_chunk(pTHX_
&deltanext, last, &data_fake,
stopparen, recursed_depth, NULL,
f, depth+1, mutate_ok);
- if (scan->flags) {
+ if (FLAGS(scan)) {
assert(0); /* This code has never been tested since this
is normally not compiled */
if ( deltanext < 0
@@ -3409,9 +3409,9 @@ Perl_study_chunk(pTHX_
}
if (deltanext) {
- scan->next_off = deltanext;
+ NEXT_OFF(scan) = deltanext;
}
- scan->flags = (U8)*minnextp + deltanext;
+ FLAGS(scan) = (U8)*minnextp + deltanext;
}
*minnextp += min;
@@ -3441,7 +3441,7 @@ Perl_study_chunk(pTHX_
data_fake.substrs[i].max_offset;
data->substrs[i].minlenp =
data_fake.substrs[i].minlenp;
- data->substrs[i].lookbehind += scan->flags;
+ data->substrs[i].lookbehind += FLAGS(scan);
}
}
}
@@ -3469,7 +3469,7 @@ Perl_study_chunk(pTHX_
}
}
else if (OP(scan) == EVAL) {
- if (data && !(scan->flags & EVAL_OPTIMISTIC_FLAG) )
+ if (data && !(FLAGS(scan) & EVAL_OPTIMISTIC_FLAG) )
data->flags |= SF_HAS_EVAL;
}
else if ( REGNODE_TYPE(OP(scan)) == ENDLIKE ) {
@@ -3496,7 +3496,7 @@ Perl_study_chunk(pTHX_
flags &= ~SCF_DO_SUBSTR;
}
}
- else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */
+ else if (OP(scan) == LOGICAL && FLAGS(scan) == 2) /* Embedded follows */
{
if (flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
diff --git a/regcomp_trie.c b/regcomp_trie.c
index 1096a25fcd..31b54ca936 100644
--- a/regcomp_trie.c
+++ b/regcomp_trie.c
@@ -1517,7 +1517,7 @@ Perl_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
OP( convert ) = TRIE;
/* store the type in the flags */
- convert->flags = nodetype;
+ FLAGS(convert) = nodetype;
DEBUG_r({
optimize = convert
+ NODE_STEP_REGNODE
diff --git a/regexec.c b/regexec.c
index 4bf61aa145..84ca8fd2dd 100644
--- a/regexec.c
+++ b/regexec.c
@@ -197,7 +197,7 @@ static const char non_utf8_target_but_utf8_required[]
/*
Search for mandatory following text node; for lookahead, the text must
- follow but for lookbehind (rn->flags != 0) we skip to the next step.
+ follow but for lookbehind (FLAGS(rn) != 0) we skip to the next step.
*/
#define FIND_NEXT_IMPT(rn) STMT_START { \
while (JUMPABLE(rn)) { \
@@ -207,7 +207,7 @@ static const char non_utf8_target_but_utf8_required[]
else if (type == PLUS) \
rn = REGNODE_AFTER_type(rn,tregnode_PLUS); \
else if (type == IFMATCH) \
- rn = (rn->flags == 0) ? REGNODE_AFTER_type(rn,tregnode_IFMATCH) : rn + ARG1u(rn); \
+ rn = (FLAGS(rn) == 0) ? REGNODE_AFTER_type(rn,tregnode_IFMATCH) : rn + ARG1u(rn); \
else rn += NEXT_OFF(rn); \
} \
} STMT_END
@@ -1781,15 +1781,15 @@ Perl_re_intuit_start(pTHX_
const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold, \
trie_utf8_exactfa_fold, trie_latin_utf8_exactfa_fold, \
trie_utf8l, trie_flu8, trie_flu8_latin } \
- trie_type = ((scan->flags == EXACT) \
+ trie_type = ((FLAGS(scan) == EXACT) \
? (utf8_target ? trie_utf8 : trie_plain) \
- : (scan->flags == EXACTL) \
+ : (FLAGS(scan) == EXACTL) \
? (utf8_target ? trie_utf8l : trie_plain) \
- : (scan->flags == EXACTFAA) \
+ : (FLAGS(scan) == EXACTFAA) \
? (utf8_target \
? trie_utf8_exactfa_fold \
: trie_latin_utf8_exactfa_fold) \
- : (scan->flags == EXACTFLU8 \
+ : (FLAGS(scan) == EXACTFLU8 \
? (utf8_target \
? trie_flu8 \
: trie_flu8_latin) \
@@ -6704,12 +6704,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG1u( scan ) + 1 ]);
U32 state = trie->startstate;
- if (scan->flags == EXACTL || scan->flags == EXACTFLU8) {
+ if (FLAGS(scan) == EXACTL || FLAGS(scan) == EXACTFLU8) {
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
if (utf8_target
&& ! NEXTCHR_IS_EOS
&& UTF8_IS_ABOVE_LATIN1(nextbyte)
- && scan->flags == EXACTL)
+ && FLAGS(scan) == EXACTL)
{
/* We only output for EXACTL, as we let the folder
* output this message for EXACTFLU8 to avoid
@@ -8081,7 +8081,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
locinput += ln;
}
ref_yes:
- if (scan->flags) { /* == VOLATILE_REF but only other value is 0 */
+ if (FLAGS(scan)) { /* == VOLATILE_REF but only other value is 0 */
ST.cp = regcppush(rex, ARG2u(scan) - 1, maxopenparen);
REGCP_SET(ST.lastcp);
PUSH_STATE_GOTO(REF_next, next, locinput, loceol,
@@ -8428,7 +8428,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
/* if we got here, it should be an engine which
* supports compiling code blocks and stuff */
assert(rex->engine && rex->engine->op_comp);
- assert(!(scan->flags & ~RXf_PMf_COMPILETIME));
+ assert(!(FLAGS(scan) & ~RXf_PMf_COMPILETIME));
re_sv = rex->engine->op_comp(aTHX_ &ret, 1, NULL,
rex->engine, NULL, NULL,
/* copy /msixn etc to inner pattern */
@@ -8632,7 +8632,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
case ACCEPT: /* (*ACCEPT) */
is_accepted = true;
- if (scan->flags)
+ if (FLAGS(scan))
sv_yes_mark = MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]);
utmp = ARG2u(scan);
@@ -8699,7 +8699,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
break;
case LOGICAL: /* modifier for EVAL and IFMATCH */
- logical = scan->flags & EVAL_FLAGS_MASK; /* reserve a bit for optimistic eval */
+ logical = FLAGS(scan) & EVAL_FLAGS_MASK; /* reserve a bit for optimistic eval */
break;
/*******************************************************************
@@ -8790,7 +8790,7 @@ NULL
case CURLYX: /* start of /A*B/ (for complex A) */
{
/* No need to save/restore up to this paren */
- I32 parenfloor = scan->flags;
+ I32 parenfloor = FLAGS(scan);
assert(next); /* keep Coverity happy */
if (OP(REGNODE_BEFORE(next)) == NOTHING) /* LONGJMP */
@@ -8905,20 +8905,20 @@ NULL
* op (string-length x #WHILEMs) times do we allocate the
* cache.
*
- * The top 4 bits of scan->flags byte say how many different
+ * The top 4 bits of FLAGS(scan) byte say how many different
* relevant CURLLYX/WHILEM op pairs there are, while the
* bottom 4-bits is the identifying index number of this
* WHILEM.
*/
- if (scan->flags) {
+ if (FLAGS(scan)) {
if (!reginfo->poscache_maxiter) {
/* start the countdown: Postpone detection until we
* know the match is not *that* much linear. */
reginfo->poscache_maxiter
= (reginfo->strend - reginfo->strbeg + 1)
- * (scan->flags>>4);
+ * (FLAGS(scan)>>4);
/* possible overflow for long strings and many CURLYX's */
if (reginfo->poscache_maxiter < 0)
reginfo->poscache_maxiter = I32_MAX;
@@ -8951,9 +8951,9 @@ NULL
SSize_t offset, mask;
reginfo->poscache_iter = -1; /* stop eventual underflow */
- offset = (scan->flags & 0xf) - 1
+ offset = (FLAGS(scan) & 0xf) - 1
+ (locinput - reginfo->strbeg)
- * (scan->flags>>4);
+ * (FLAGS(scan)>>4);
mask = 1 << (offset % 8);
offset /= 8;
if (reginfo->info_aux->poscache[offset] & mask) {
@@ -9089,7 +9089,7 @@ NULL
NOT_REACHED; /* NOTREACHED */
case CUTGROUP: /* /(*THEN)/ */
- sv_yes_mark = st->u.mark.mark_name = scan->flags
+ sv_yes_mark = st->u.mark.mark_name = FLAGS(scan)
? MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ])
: NULL;
PUSH_STATE_GOTO(CUTGROUP_next, next, locinput, loceol,
@@ -9156,8 +9156,8 @@ NULL
ST.lastcloseparen = RXp_LASTCLOSEPAREN(rex);
/* if paren positive, emulate an OPEN/CLOSE around A */
- if (ST.me->flags) {
- U32 paren = ST.me->flags;
+ if (FLAGS(ST.me)) {
+ U32 paren = FLAGS(ST.me);
lastopen = paren;
if (paren > maxopenparen)
maxopenparen = paren;
@@ -9202,15 +9202,15 @@ NULL
depth, (IV) ST.count, (IV)ST.alen)
);
- if (ST.me->flags) {
+ if (FLAGS(ST.me)) {
/* emulate CLOSE: mark current A as captured */
- U32 paren = (U32)ST.me->flags;
+ U32 paren = (U32)FLAGS(ST.me);
CLOSE_CAPTURE(rex, paren,
HOPc(locinput, -ST.alen) - reginfo->strbeg,
locinput - reginfo->strbeg);
}
- if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags))
+ if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)FLAGS(ST.me)))
goto fake_end;
@@ -9226,7 +9226,7 @@ NULL
if (ST.minmod || ST.count < ARG1i(ST.me) /* min*/
- || EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags))
+ || EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)FLAGS(ST.me)))
sayNO;
curlym_do_B: /* execute the B in /A{m,n}B/ */
@@ -9275,9 +9275,9 @@ NULL
}
curlym_close_B:
- if (ST.me->flags) {
+ if (FLAGS(ST.me)) {
/* emulate CLOSE: mark current A as captured */
- U32 paren = (U32)ST.me->flags;
+ U32 paren = (U32)FLAGS(ST.me);
if (ST.count || is_accepted) {
CLOSE_CAPTURE(rex, paren,
HOPc(locinput, -ST.alen) - reginfo->strbeg,
@@ -9286,7 +9286,7 @@ NULL
else
RXp_OFFSp(rex)[paren].end = -1;
- if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.me->flags))
+ if (EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)FLAGS(ST.me)))
{
if (ST.count || is_accepted)
goto fake_end;
@@ -9349,7 +9349,7 @@ NULL
goto repeat;
case CURLYN: /* /(A){m,n}B/ where A is width 1 char */
- ST.paren = scan->flags; /* Which paren to set */
+ ST.paren = FLAGS(scan); /* Which paren to set */
ST.lastparen = RXp_LASTPAREN(rex);
ST.lastcloseparen = RXp_LASTCLOSEPAREN(rex);
if (ST.paren > maxopenparen)
@@ -9738,10 +9738,10 @@ NULL
ST.wanted = 1;
ifmatch_trivial_fail_test:
ST.prev_match_end= match_end;
- ST.count = scan->next_off + 1; /* next_off repurposed to be
+ ST.count = NEXT_OFF(scan) + 1; /* next_off repurposed to be
lookbehind count, requires
non-zero flags */
- if (! scan->flags) { /* 'flags' zero means lookahed */
+ if (! FLAGS(scan)) { /* 'flags' zero means lookahed */
/* Lookahead starts here and ends at the normal place */
ST.start = locinput;
@@ -9749,7 +9749,7 @@ NULL
match_end = NULL;
}
else {
- PERL_UINT_FAST8_T back_count = scan->flags;
+ PERL_UINT_FAST8_T back_count = FLAGS(scan);
char * s;
match_end = locinput;
@@ -9847,7 +9847,7 @@ NULL
/* FALLTHROUGH */
case PRUNE: /* (*PRUNE) */
- if (scan->flags)
+ if (FLAGS(scan))
sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]);
PUSH_STATE_GOTO(COMMIT_next, next, locinput, loceol,
script_run_begin);
@@ -9860,7 +9860,7 @@ NULL
NOT_REACHED; /* NOTREACHED */
case OPFAIL: /* (*FAIL) */
- if (scan->flags)
+ if (FLAGS(scan))
sv_commit = MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]);
if (logical) {
/* deal with (?(?!)X|Y) properly,
@@ -9910,7 +9910,7 @@ NULL
NOT_REACHED; /* NOTREACHED */
case SKIP: /* (*SKIP) */
- if (!scan->flags) {
+ if (!FLAGS(scan)) {
/* (*SKIP) : if we fail we cut here*/
ST.mark_name = NULL;
ST.mark_loc = locinput;
diff --git a/regexp.h b/regexp.h
index d3269ad18c..243cf246c6 100644
--- a/regexp.h
+++ b/regexp.h
@@ -29,10 +29,23 @@ struct regnode_meta {
U8 off_by_arg;
};
+/* this ensures that on alignment sensitive platforms
+ * this struct is aligned on 32 bit boundaries */
+union regnode_head {
+ struct {
+ union {
+ U8 flags;
+ U8 str_len_u8;
+ U8 first_byte;
+ } u_8;
+ U8 type;
+ U16 next_off;
+ } data;
+ U32 data_u32;
+};
+
struct regnode {
- U8 flags;
- U8 type;
- U16 next_off;
+ union regnode_head head;
};
typedef struct regnode regnode;