summaryrefslogtreecommitdiff
path: root/pod
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2022-02-16 03:53:38 +0100
committerHugo van der Sanden <hv@crypt.org>2022-02-18 15:08:24 +0000
commitbddb8c791b4cd7f67db06213e35b0f2351c0fea8 (patch)
tree26d019df9d21c9af7cbf1148fd632ea0b5f9cbaa /pod
parentd7ea8430bc1600e5e1aa5f980afeddedec662618 (diff)
downloadperl-bddb8c791b4cd7f67db06213e35b0f2351c0fea8.tar.gz
perlreguts.pod: synchronize regexp_internal docs with code
Various changes have been made to struct regexp_internal over time which have not been documented. This updates the docs to match the code as it is now in preparation of changing the docs in subsequent commits.
Diffstat (limited to 'pod')
-rw-r--r--pod/perlreguts.pod64
1 files changed, 49 insertions, 15 deletions
diff --git a/pod/perlreguts.pod b/pod/perlreguts.pod
index b0a8d8f922..e58aa42535 100644
--- a/pod/perlreguts.pod
+++ b/pod/perlreguts.pod
@@ -827,21 +827,19 @@ The following structure is used as the C<pprivate> struct by perl's
regex engine. Since it is specific to perl it is only of curiosity
value to other engine implementations.
- typedef struct regexp_internal {
- U32 *offsets; /* offset annotations 20001228 MJD
- * data about mapping the program to
- * the string*/
- regnode *regstclass; /* Optional startclass as identified or
- * constructed by the optimiser */
- struct reg_data *data; /* Additional miscellaneous data used
- * by the program. Used to make it
- * easier to clone and free arbitrary
- * data that the regops need. Often the
- * ARG field of a regop is an index
- * into this structure */
- regnode program[1]; /* Unwarranted chumminess with
- * compiler. */
- } regexp_internal;
+ typedef struct regexp_internal {
+ union {
+ U32 *offsets;
+ U32 proglen;
+ } u;
+ regnode *regstclass;
+ struct reg_data *data;
+ struct reg_code_blocks *code_blocks;
+ int name_list_idx;
+ regnode program[1];
+ } regexp_internal;
+
+Description of the attributes is as follows:
=over 5
@@ -851,6 +849,10 @@ Offsets holds a mapping of offset in the C<program>
to offset in the C<precomp> string. This is only used by ActiveState's
visual regex debugger.
+=item C<proglen>
+
+Stores the length of the compiled program in units of regops.
+
=item C<regstclass>
Special regop that is used by C<re_intuit_start()> to check if a pattern
@@ -878,6 +880,38 @@ what array. During compilation regops that need special structures stored
will add an element to each array using the add_data() routine and then store
the index in the regop.
+In modern perls the 0th element of this structure is reserved and is NEVER
+used to store anything of use. This is to allow things that need to index
+into this array to represent "no value".
+
+=item C<code_blocks>
+
+This optional structure is used to manage C<(?{})> constructs in the
+pattern. It is made up of the following structures.
+
+ /* record the position of a (?{...}) within a pattern */
+ struct reg_code_block {
+ STRLEN start;
+ STRLEN end;
+ OP *block;
+ REGEXP *src_regex;
+ };
+
+ /* array of reg_code_block's plus header info */
+ struct reg_code_blocks {
+ int refcnt; /* we may be pointed to from a regex
+ and from the savestack */
+ int count; /* how many code blocks */
+ struct reg_code_block *cb; /* array of reg_code_block's */
+ };
+
+=item C<name_list_idx>
+
+This is the index into the data array where an AV is stored that contains
+the names of any named capture buffers in the pattern, should there be
+any. This is only used in the debugging version of the regex engine. It
+will be 0 if there is no such data.
+
=item C<program>
Compiled program. Inlined into the structure so the entire struct can be