summaryrefslogtreecommitdiff
path: root/regcomp_study.c
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2023-01-15 13:00:46 +0100
committerYves Orton <demerphq@gmail.com>2023-03-13 21:26:08 +0800
commit17e3e02ad120eabda2bdb6c297a70d53294437ef (patch)
tree6fc99228c2a34c7ee5ec892de4c1f1a980e2f240 /regcomp_study.c
parent59db194299c94c6707095797c3df0e2f67ff82b2 (diff)
downloadperl-17e3e02ad120eabda2bdb6c297a70d53294437ef.tar.gz
regex engine - simplify regnode structures and make them consistent
This eliminates the regnode_2L data structure, and merges it with the older regnode_2 data structure. At the same time it makes each "arg" property of the various regnode types that have one be consistently structured as an anonymous union like this: union { U32 arg1u; I32 arg2i; struct { U16 arg1a; U16 arg1b; }; }; We then expose four macros for accessing each slot: ARG1u() ARG1i() and ARG1a() and ARG1b(). Code then explicitly designates which they want. The old logic used ARG() to access an U32 arg1, and ARG1() to access an I32 arg1, which was confusing to say the least. The regnode_2L structure had a U32 arg1, and I32 arg2, and the regnode_2 data strucutre had two I32 args. With the new set of macros we use the regnode_2 for both, and use the appropriate macros to show whether we want to signed or unsigned values. This also renames the regnode_4 to regnode_3. The 3 stands for "three 32-bit args". However as each slot can also store two U16s, a regnode_3 can hold up to 6 U16s, or as 3 I32's, or a combination. For instance the CURLY style nodes use regnode_3 to store 4 values, ARG1i() for min count, ARG2i() for max count and ARG3a() and ARG3b() for parens before and inside the quantifier. It also changes the functions reganode() to reg1node() and changes reg2Lanode() to reg2node(). The 2L thing was just confusing.
Diffstat (limited to 'regcomp_study.c')
-rw-r--r--regcomp_study.c32
1 files changed, 16 insertions, 16 deletions
diff --git a/regcomp_study.c b/regcomp_study.c
index 3ab3458162..81d55719df 100644
--- a/regcomp_study.c
+++ b/regcomp_study.c
@@ -49,7 +49,7 @@ S_rck_elide_nothing(pTHX_ regnode *node)
? I32_MAX
/* I32 may be smaller than U16 on CRAYs! */
: (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
- int off = (REGNODE_OFF_BY_ARG(OP(node)) ? ARG(node) : NEXT_OFF(node));
+ int off = (REGNODE_OFF_BY_ARG(OP(node)) ? ARG1u(node) : NEXT_OFF(node));
int noff;
regnode *n = node;
@@ -58,14 +58,14 @@ S_rck_elide_nothing(pTHX_ regnode *node)
(n = regnext(n))
&& (
(REGNODE_TYPE(OP(n)) == NOTHING && (noff = NEXT_OFF(n)))
- || ((OP(n) == LONGJMP) && (noff = ARG(n)))
+ || ((OP(n) == LONGJMP) && (noff = ARG1u(n)))
)
&& off + noff < max
) {
off += noff;
}
if (REGNODE_OFF_BY_ARG(OP(node)))
- ARG(node) = off;
+ ARG1u(node) = off;
else
NEXT_OFF(node) = off;
}
@@ -368,7 +368,7 @@ Perl_ssc_init(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc)
Zero(ssc, 1, regnode_ssc);
set_ANYOF_SYNTHETIC(ssc);
- ARG_SET(ssc, ANYOF_MATCHES_ALL_OUTSIDE_BITMAP_VALUE);
+ ARG1u_SET(ssc, ANYOF_MATCHES_ALL_OUTSIDE_BITMAP_VALUE);
ssc_anything(ssc);
/* If any portion of the regex is to operate under locale rules that aren't
@@ -445,7 +445,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
invlist = _add_range_to_invlist(invlist, NUM_ANYOF_CODE_POINTS, UV_MAX);
}
else if (ANYOF_HAS_AUX(node)) {
- const U32 n = ARG(node);
+ const U32 n = ARG1u(node);
SV * const rv = MUTABLE_SV(RExC_rxi->data->data[n]);
AV * const av = MUTABLE_AV(SvRV(rv));
SV **const ary = AvARRAY(av);
@@ -1328,7 +1328,7 @@ Perl_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
if (flags && !NEXT_OFF(n)) {
DEBUG_PEEP("atch", val, depth, 0);
if (REGNODE_OFF_BY_ARG(OP(n))) {
- ARG_SET(n, val - n);
+ ARG1u_SET(n, val - n);
}
else {
NEXT_OFF(n) = val - n;
@@ -2213,8 +2213,8 @@ Perl_study_chunk(pTHX_
* the rest of this block. Specifically setting
* RExC_recurse[] must happen at least once during
* study_chunk(). */
- paren = ARG(scan);
- RExC_recurse[ARG2L(scan)] = scan;
+ paren = ARG1u(scan);
+ RExC_recurse[ARG2i(scan)] = scan;
start = REGNODE_p(RExC_open_parens[paren]);
end = REGNODE_p(RExC_close_parens[paren]);
@@ -2409,7 +2409,7 @@ Perl_study_chunk(pTHX_
U8 mask = ~ ('A' ^ 'a'); /* These differ in just one bit */
OP(scan) = ANYOFM;
- ARG_SET(scan, *s & mask);
+ ARG1u_SET(scan, *s & mask);
FLAGS(scan) = mask;
/* We're not EXACTFish any more, so restudy.
* Search for "restudy" in this file to find
@@ -2536,7 +2536,7 @@ Perl_study_chunk(pTHX_
* case, and the lowest code point of the
* pair (which the '&' forces) */
OP(next) = ANYOFM;
- ARG_SET(next, *STRING(next) & mask);
+ ARG1u_SET(next, *STRING(next) & mask);
FLAGS(next) = mask;
}
@@ -2562,8 +2562,8 @@ Perl_study_chunk(pTHX_
mincount = 1;
maxcount = 1;
} else {
- mincount = ARG1(scan);
- maxcount = ARG2(scan);
+ mincount = ARG1i(scan);
+ maxcount = ARG2i(scan);
}
next = regnext(scan);
if (OP(scan) == CURLYX) {
@@ -2801,7 +2801,7 @@ Perl_study_chunk(pTHX_
regnode *nnxt = regnext(nxt1);
if (nnxt == nxt) {
if (REGNODE_OFF_BY_ARG(OP(nxt1)))
- ARG_SET(nxt1, nxt2 - nxt1);
+ ARG1u_SET(nxt1, nxt2 - nxt1);
else if (nxt2 - nxt1 < U16_MAX)
NEXT_OFF(nxt1) = nxt2 - nxt1;
else
@@ -2831,7 +2831,7 @@ Perl_study_chunk(pTHX_
regnode *nxt = oscan + NEXT_OFF(oscan);
if (OP(REGNODE_BEFORE(nxt)) == NOTHING) /* LONGJMP */
- nxt += ARG(nxt);
+ nxt += ARG1u(nxt);
nxt = REGNODE_BEFORE(nxt);
if (nxt->flags & 0xf) {
/* we've already set whilem count on this node */
@@ -3528,7 +3528,7 @@ Perl_study_chunk(pTHX_
check there too. */
regnode *trie_node= scan;
regnode *tail= regnext(scan);
- reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ];
+ reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG1u(scan) ];
SSize_t max1 = 0, min1 = OPTIMIZE_INFTY;
regnode_ssc accum;
@@ -3657,7 +3657,7 @@ Perl_study_chunk(pTHX_
}
#else
else if (REGNODE_TYPE(OP(scan)) == TRIE) {
- reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ];
+ reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG1u(scan) ];
U8*bang=NULL;
min += trie->minlen;