summaryrefslogtreecommitdiff
path: root/regcomp_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'regcomp_internal.h')
-rw-r--r--regcomp_internal.h48
1 files changed, 48 insertions, 0 deletions
diff --git a/regcomp_internal.h b/regcomp_internal.h
index f1b81625a0..a895452511 100644
--- a/regcomp_internal.h
+++ b/regcomp_internal.h
@@ -66,15 +66,57 @@ struct RExC_state_t {
* independent warning is raised for any given spot */
Size_t latest_warn_offset;
+ /* Branch reset /(?|...|...)/ gives us two concepts of capture buffer id.
+ * "Logical Parno" is the user visible view with branch reset taken into
+ * account. "Parno" (or physical parno) is the actual capture buffers in
+ * the pattern *NOT* taking into account branch reset. We also maintain
+ * a map of "next" pointers which allow us to skip to the next physical
+ * capture buffer with the same logical id, with 0 representing "none".
+ *
+ * As we compile we keep track of the two different counts using the
+ * 'logical_npar' and 'npar' members, and we keep track of the upper bound
+ * of both in 'total_par' and 'logical_total_par', we also populate
+ * the 'logical_to_parno' map, which gives us the first physical parno
+ * for a given logical parno, and the `parno_to_logical` array which gives
+ * us the logical id for each physical parno. When compilation is
+ * completed we construct the 'parno_to_logical_next' array from the
+ * 'parno_to_logical' array. (We do not bother constructing it during
+ * compilation as we do not need it, and we can construct it in O(N) time
+ * once we are done, but would need more complicated logic during the
+ * compile, because we want the next pointers to go from smallest to
+ * largest, eg, left to right.)
+ *
+ * Logical: $1 $2 $3 $4 $2 $3 $2 $5
+ * Physical: 1 2 3 4 5 6 7 8
+ * Next: 0 5 6 0 7 0 0 0
+ * Pattern /(a) (?| (b) (c) (d) | (e) (f) | (g) ) (h)/
+ *
+ * As much as possible the internals use and store the physical id of
+ * of capture buffers. We decode the physical to the logical only when
+ * we need to, for instance when someone use $2.
+ *
+ * Note that when branch reset is not used logical and physical are the
+ * same and the next data would be all zero. So when branch reset is not
+ * used we do not need to populate this data into the final regexp.
+ *
+ */
+ I32 *logical_to_parno; /* logical_parno to parno */
+ I32 *parno_to_logical; /* parno to logical_parno */
+ I32 *parno_to_logical_next; /* parno to next (greater value)
+ parno with the same
+ logical_parno as parno.*/
+
I32 npar; /* Capture buffer count so far in the
parse, (OPEN) plus one. ("par" 0 is
the whole pattern)*/
+ I32 logical_npar; /* Logical version of npar */
I32 total_par; /* During initial parse, is either 0,
or -1; the latter indicating a
reparse is needed. After that pass,
it is what 'npar' became after the
pass. Hence, it being > 0 indicates
we are in a reparse situation */
+ I32 logical_total_par; /* Logical version to total par */
I32 nestroot; /* root parens we are in - used by
accept */
I32 seen_zerolen;
@@ -157,6 +199,11 @@ struct RExC_state_t {
#define RExC_seen (pRExC_state->seen)
#define RExC_size (pRExC_state->size)
#define RExC_maxlen (pRExC_state->maxlen)
+#define RExC_logical_npar (pRExC_state->logical_npar)
+#define RExC_logical_total_parens (pRExC_state->logical_total_par)
+#define RExC_logical_to_parno (pRExC_state->logical_to_parno)
+#define RExC_parno_to_logical (pRExC_state->parno_to_logical)
+#define RExC_parno_to_logical_next (pRExC_state->parno_to_logical_next)
#define RExC_npar (pRExC_state->npar)
#define RExC_total_parens (pRExC_state->total_par)
#define RExC_parens_buf_size (pRExC_state->parens_buf_size)
@@ -1194,4 +1241,5 @@ static const scan_data_t zero_scan_data = {
CLEAR_OPTSTART; \
node = dumpuntil(r,start,(b),(e),last,sv,indent+1,depth+1);
+
#endif /* REGCOMP_INTERNAL_H */