summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2022-02-11 06:30:45 +0100
committerHugo van der Sanden <hv@crypt.org>2022-02-18 15:08:24 +0000
commitc45b45416a61f2f56dbe348f33fb1bb07a1d5444 (patch)
tree3af45bebe39f1185787116521a5e2f1afcd813c6
parentbddb8c791b4cd7f67db06213e35b0f2351c0fea8 (diff)
downloadperl-c45b45416a61f2f56dbe348f33fb1bb07a1d5444.tar.gz
regcomp.c,re.pm: Remove "offsets" debugging code
This code was added by Mark Jason Dominus to aid a regex debugger he wrote for ActiveState. The basic premise is that every opcode in a regex can be attributed back to a contiguous sequence of characters that make up the pattern. This assumption has not been true ever since the "jump" TRIE optimizations were added to the engine. I spoke to MJD many years ago about whether it was ok to remove this from the regex engine and he said he had no objections. An example of a pattern that cannot be handled correctly by this logic is /(?: a x+ | b y+ | c z+ )/x where the (?:a ... | b ... | c ...) parts will now be handled by the TRIE logic and not by the BRANCH/EXACT opcodes that it would have been in the past. The offset debug output cannot handle this type of transformation, and produce nonsense output that mention opcodes that have been optimized away from the final program. The regex compiler is complicated enough without having to maintain this logic. There are essentially no tests for it, and the few tests that do cover it do so as a byproduct of testing other things. Despite the offsets logic only being used in debug supporting it does have a cost to non-debug logic as various internal routines include parameters related to it that are otherwise unused. Note this output is only usable or visible by enabling special flags in re.pm, there is no formal API to access it short of parsing the output of the debug mode of the regex engine, which has changed multiple time over the past years.
-rw-r--r--embed.fnc12
-rw-r--r--embed.h5
-rw-r--r--ext/re/re.pm31
-rw-r--r--ext/re/t/regop.pl2
-rw-r--r--ext/re/t/regop.t15
-rw-r--r--pod/perlreguts.pod19
-rw-r--r--proto.h11
-rw-r--r--regcomp.c307
-rw-r--r--regcomp.h32
9 files changed, 71 insertions, 363 deletions
diff --git a/embed.fnc b/embed.fnc
index 45c6fd2b26..c54294cf7e 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2029,9 +2029,12 @@ ERS |SV* |make_exactf_invlist |NN RExC_state_t *pRExC_state \
ES |regnode_offset|reg |NN RExC_state_t *pRExC_state \
|I32 paren|NN I32 *flagp|U32 depth
ES |regnode_offset|regnode_guts|NN RExC_state_t *pRExC_state \
- |const U8 op \
- |const STRLEN extra_len \
- |NN const char* const name
+ |const STRLEN extra_len
+#ifdef DEBUGGING
+ES |regnode_offset|regnode_guts_debug|NN RExC_state_t *pRExC_state \
+ |const U8 op \
+ |const STRLEN extra_len
+#endif
ES |void |change_engine_size|NN RExC_state_t *pRExC_state|const Ptrdiff_t size
ES |regnode_offset|reganode|NN RExC_state_t *pRExC_state|U8 op \
|U32 arg
@@ -2113,8 +2116,7 @@ ES |regnode_offset|handle_named_backref|NN RExC_state_t *pRExC_state \
ESTR |unsigned int|regex_set_precedence|const U8 my_operator
ES |regnode_offset|handle_regex_sets|NN RExC_state_t *pRExC_state \
|NULLOK SV ** return_invlist \
- |NN I32 *flagp|U32 depth \
- |NN char * const oregcomp_parse
+ |NN I32 *flagp|U32 depth
ES |void |set_regex_pv |NN RExC_state_t *pRExC_state|NN REGEXP *Rx
# if defined(DEBUGGING) && defined(ENABLE_REGEX_SETS_DEBUGGING)
ES |void |dump_regex_sets_structures \
diff --git a/embed.h b/embed.h
index c08806128c..691228bbe6 100644
--- a/embed.h
+++ b/embed.h
@@ -1030,6 +1030,7 @@
#endif
#define regdump_extflags(a,b) S_regdump_extflags(aTHX_ a,b)
#define regdump_intflags(a,b) S_regdump_intflags(aTHX_ a,b)
+#define regnode_guts_debug(a,b,c) S_regnode_guts_debug(aTHX_ a,b,c)
#define regtail_study(a,b,c,d) S_regtail_study(aTHX_ a,b,c,d)
# endif
# if defined(PERL_IN_REGEXEC_C)
@@ -1077,7 +1078,7 @@
#define handle_named_backref(a,b,c,d) S_handle_named_backref(aTHX_ a,b,c,d)
#define handle_names_wildcard(a,b,c,d) S_handle_names_wildcard(aTHX_ a,b,c,d)
#define handle_possible_posix(a,b,c,d,e) S_handle_possible_posix(aTHX_ a,b,c,d,e)
-#define handle_regex_sets(a,b,c,d,e) S_handle_regex_sets(aTHX_ a,b,c,d,e)
+#define handle_regex_sets(a,b,c,d) S_handle_regex_sets(aTHX_ a,b,c,d)
#define handle_user_defined_property(a,b,c,d,e,f,g,h,i,j) S_handle_user_defined_property(aTHX_ a,b,c,d,e,f,g,h,i,j)
#define invlist_contents(a,b) S_invlist_contents(aTHX_ a,b)
#define invlist_is_iterating S_invlist_is_iterating
@@ -1104,7 +1105,7 @@
#define regclass(a,b,c,d,e,f,g,h,i) S_regclass(aTHX_ a,b,c,d,e,f,g,h,i)
#define regex_set_precedence S_regex_set_precedence
#define reginsert(a,b,c,d) S_reginsert(aTHX_ a,b,c,d)
-#define regnode_guts(a,b,c,d) S_regnode_guts(aTHX_ a,b,c,d)
+#define regnode_guts(a,b) S_regnode_guts(aTHX_ a,b)
#define regpiece(a,b,c) S_regpiece(aTHX_ a,b,c)
#define regpnode(a,b,c) S_regpnode(aTHX_ a,b,c)
#define regtail(a,b,c,d) S_regtail(aTHX_ a,b,c,d)
diff --git a/ext/re/re.pm b/ext/re/re.pm
index d1db4625c0..791d680d57 100644
--- a/ext/re/re.pm
+++ b/ext/re/re.pm
@@ -4,7 +4,7 @@ package re;
use strict;
use warnings;
-our $VERSION = "0.41";
+our $VERSION = "0.42";
our @ISA = qw(Exporter);
our @EXPORT_OK = qw{
is_regexp regexp_pattern
@@ -71,8 +71,6 @@ my %flags = (
EXTRA => 0x3FF0000,
TRIEM => 0x0010000,
- OFFSETS => 0x0020000,
- OFFSETSDBG => 0x0040000,
STATE => 0x0080000,
OPTIMISEM => 0x0100000,
STACK => 0x0280000,
@@ -81,9 +79,7 @@ my %flags = (
DUMP_PRE_OPTIMIZE => 0x1000000,
WILDCARD => 0x2000000,
);
-$flags{ALL} = -1 & ~($flags{OFFSETS}
- |$flags{OFFSETSDBG}
- |$flags{BUFFERS}
+$flags{ALL} = -1 & ~($flags{BUFFERS}
|$flags{DUMP_PRE_OPTIMIZE}
|$flags{WILDCARD}
);
@@ -626,26 +622,6 @@ Enable debugging of the \G modifier.
Enable enhanced optimisation debugging and start-point optimisations.
Probably not useful except when debugging the regexp engine itself.
-=item OFFSETS
-
-Dump offset information. This can be used to see how regops correlate
-to the pattern. Output format is
-
- NODENUM:POSITION[LENGTH]
-
-Where 1 is the position of the first char in the string. Note that position
-can be 0, or larger than the actual length of the pattern, likewise length
-can be zero.
-
-=item OFFSETSDBG
-
-Enable debugging of offsets information. This emits copious
-amounts of trace information and doesn't mesh well with other
-debug options.
-
-Almost definitely only useful to people hacking
-on the offsets part of the debug engine.
-
=item DUMP_PRE_OPTIMIZE
Enable the dumping of the compiled pattern before the optimization phase.
@@ -687,8 +663,7 @@ These are useful shortcuts to save on the typing.
=item ALL
-Enable all options at once except OFFSETS, OFFSETSDBG, BUFFERS, WILDCARD, and
-DUMP_PRE_OPTIMIZE.
+Enable all options at once except BUFFERS, WILDCARD, and DUMP_PRE_OPTIMIZE.
(To get every single option without exception, use both ALL and EXTRA, or
starting in 5.30 on a C<-DDEBUGGING>-enabled perl interpreter, use
the B<-Drv> command-line switches.)
diff --git a/ext/re/t/regop.pl b/ext/re/t/regop.pl
index 86976ee0da..c725b73a9e 100644
--- a/ext/re/t/regop.pl
+++ b/ext/re/t/regop.pl
@@ -1,4 +1,4 @@
-use re Debug=>qw(DUMP EXECUTE OFFSETS TRIEC TEST);
+use re Debug=>qw(DUMP EXECUTE TRIEC TEST);
my @tests=(
XY => 'X(A|[B]Q||C|D)Y' ,
foobar => '[f][o][o][b][a][r]',
diff --git a/ext/re/t/regop.t b/ext/re/t/regop.t
index cf35d71fb0..20e9586c33 100644
--- a/ext/re/t/regop.t
+++ b/ext/re/t/regop.t
@@ -140,7 +140,6 @@ Freeing REx: "[f][o][o][b][a][r]"
minlen 3
---
# Compiling REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)"
-# Got 164 bytes for offset annotations.
# TRIE(NATIVE): W:6 C:24 Uq:7 Min:4 Max:4
# Char : Match Base Ofs A B C P G E D
# State|---------------------------------------------------
@@ -166,8 +165,6 @@ minlen 3
# <D>
# 20: END (0)
# anchored "ABC" at 0 (checking anchored) minlen 4
-# Offsets: [20]
-# 1:4[3] 3:4[15] 19:32[0] 20:34[0]
# Guessing start of match in sv for REx "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)" against "ABCD"
# Found anchored substr "ABC" at offset 0...
# Guessed: match at offset 0
@@ -210,8 +207,6 @@ anchored "ABC" at 0
# 47: EOL(48)
# 48: END(0)
#floating ""$ at 3..4 (checking floating) stclass "EXACTF <.>" minlen 3
-#Offsets: [48]
-# 1:1[1] 3:2[1] 5:2[81] 45:83[1] 47:84[1] 48:85[0]
#Guessing start of match, REx "(\.COM|\.EXE|\.BAT|\.CMD|\.VBS|\.VBE|\.JS|\.JSE|\.WSF|\.WSH|..." against "D:dev/perl/ver/28321_/perl.exe"...
#Found floating substr ""$ at offset 30...
#Starting position does not contradict /^/m...
@@ -233,7 +228,6 @@ anchored "ABC" at 0
#Freeing REx: "(\\.COM|\\.EXE|\\.BAT|\\.CMD|\\.VBS|\\.VBE|\\.JS|\\.JSE|\\."......
%MATCHED%
floating ""$ at 3..4 (checking floating)
-#1:1[1] 3:2[1] 5:2[64] 45:83[1] 47:84[1] 48:85[0]
#stclass EXACTF <.> minlen 3
#Found floating substr ""$ at offset 30...
#Does not contradict STCLASS...
@@ -241,22 +235,16 @@ floating ""$ at 3..4 (checking floating)
#Matching stclass EXACTF <.> against ".exe"
---
#Compiling REx "[q]"
-#size 3 nodes Got 7 bytes for offset annotations.
#first at 1
#Final program:
# 1: EXACT <q>(3)
# 3: END(0)
#anchored "q" at 0 (checking anchored isall) minlen 1
-#Offsets: [3]
-# 1:1[3] 3:4[0]
#Guessing start of match, REx "[q]" against "q"...
#Found anchored substr "q" at offset 0...
#Guessed: match at offset 0
#%MATCHED%
#Freeing REx: "[q]"
-Got 7 bytes for offset annotations.
-Offsets: [3]
-1:1[3] 3:4[0]
%MATCHED%
Freeing REx: "[q]"
---
@@ -281,7 +269,6 @@ Freeing REx: "[q]"
Freeing REx: "^(\S{1,9}):\s*(\d+)$"
---
#Compiling REx "(?(DEFINE)(?<foo>foo))(?(DEFINE)(?<bar>(?&foo)bar))(?(DEFINE"...
-#Got 532 bytes for offset annotations.
study_chunk_recursed_count: 5
#Final program:
# 1: DEFINEP (3)
@@ -317,8 +304,6 @@ study_chunk_recursed_count: 5
# 61: TAIL (62)
# 62: END (0)
minlen 0
-#Offsets: [66]
-# 1:3[0] 3:10[0] 5:17[1] 7:18[3] 9:21[1] 11:21[0] 13:22[0] 14:25[0] 16:32[0] 18:39[1] 20:41[3] 23:47[3] 25:50[1] 27:50[0] 29:51[0] 30:54[0] 32:61[0] 34:68[1] 36:70[3] 39:76[3] 41:79[1] 43:79[0] 45:80[0] 46:83[0] 48:90[0] 50:97[1] 52:99[3] 55:105[3] 57:108[1] 59:108[0] 61:109[0] 62:110[0]
#Matching REx "(?(DEFINE)(?<foo>foo))(?(DEFINE)(?<bar>(?&foo)bar))(?(DEFINE"... against ""
# 0 <> <> | 1:DEFINEP(3)
# 0 <> <> | 3:IFTHEN(14)
diff --git a/pod/perlreguts.pod b/pod/perlreguts.pod
index e58aa42535..2aae739d9b 100644
--- a/pod/perlreguts.pod
+++ b/pod/perlreguts.pod
@@ -828,13 +828,10 @@ regex engine. Since it is specific to perl it is only of curiosity
value to other engine implementations.
typedef struct regexp_internal {
- union {
- U32 *offsets;
- U32 proglen;
- } u;
regnode *regstclass;
struct reg_data *data;
struct reg_code_blocks *code_blocks;
+ U32 proglen;
int name_list_idx;
regnode program[1];
} regexp_internal;
@@ -843,16 +840,6 @@ Description of the attributes is as follows:
=over 5
-=item C<offsets>
-
-Offsets holds a mapping of offset in the C<program>
-to offset in the C<precomp> string. This is only used by ActiveState's
-visual regex debugger.
-
-=item C<proglen>
-
-Stores the length of the compiled program in units of regops.
-
=item C<regstclass>
Special regop that is used by C<re_intuit_start()> to check if a pattern
@@ -905,6 +892,10 @@ pattern. It is made up of the following structures.
struct reg_code_block *cb; /* array of reg_code_block's */
};
+=item C<proglen>
+
+Stores the length of the compiled program in units of regops.
+
=item C<name_list_idx>
This is the index into the data array where an AV is stored that contains
diff --git a/proto.h b/proto.h
index 2df7f0a30f..6f7619d3b4 100644
--- a/proto.h
+++ b/proto.h
@@ -4720,6 +4720,9 @@ STATIC void S_regdump_extflags(pTHX_ const char *lead, const U32 flags);
#define PERL_ARGS_ASSERT_REGDUMP_EXTFLAGS
STATIC void S_regdump_intflags(pTHX_ const char *lead, const U32 flags);
#define PERL_ARGS_ASSERT_REGDUMP_INTFLAGS
+STATIC regnode_offset S_regnode_guts_debug(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_len);
+#define PERL_ARGS_ASSERT_REGNODE_GUTS_DEBUG \
+ assert(pRExC_state)
STATIC bool S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, const regnode_offset val, U32 depth)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_REGTAIL_STUDY \
@@ -5886,9 +5889,9 @@ STATIC bool S_handle_names_wildcard(pTHX_ const char * wname, const STRLEN wname
STATIC int S_handle_possible_posix(pTHX_ RExC_state_t *pRExC_state, const char* const s, char ** updated_parse_ptr, AV** posix_warnings, const bool check_only);
#define PERL_ARGS_ASSERT_HANDLE_POSSIBLE_POSIX \
assert(pRExC_state); assert(s)
-STATIC regnode_offset S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV ** return_invlist, I32 *flagp, U32 depth, char * const oregcomp_parse);
+STATIC regnode_offset S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV ** return_invlist, I32 *flagp, U32 depth);
#define PERL_ARGS_ASSERT_HANDLE_REGEX_SETS \
- assert(pRExC_state); assert(flagp); assert(oregcomp_parse)
+ assert(pRExC_state); assert(flagp)
STATIC SV * S_handle_user_defined_property(pTHX_ const char * name, const STRLEN name_len, const bool is_utf8, const bool to_fold, const bool runtime, const bool deferrable, SV* contents, bool *user_defined_ptr, SV * msg, const STRLEN level);
#define PERL_ARGS_ASSERT_HANDLE_USER_DEFINED_PROPERTY \
assert(name); assert(contents); assert(user_defined_ptr); assert(msg)
@@ -5990,9 +5993,9 @@ STATIC unsigned int S_regex_set_precedence(const U8 my_operator)
STATIC void S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op, const regnode_offset operand, const U32 depth);
#define PERL_ARGS_ASSERT_REGINSERT \
assert(pRExC_state)
-STATIC regnode_offset S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_len, const char* const name);
+STATIC regnode_offset S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const STRLEN extra_len);
#define PERL_ARGS_ASSERT_REGNODE_GUTS \
- assert(pRExC_state); assert(name)
+ assert(pRExC_state)
STATIC regnode_offset S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth);
#define PERL_ARGS_ASSERT_REGPIECE \
assert(pRExC_state); assert(flagp)
diff --git a/regcomp.c b/regcomp.c
index cec3194efb..d735e39587 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -294,10 +294,6 @@ struct RExC_state_t {
#define RExC_seen_d_op (pRExC_state->seen_d_op) /* Seen something that differs
under /d from /u ? */
-#ifdef RE_TRACK_PATTERN_OFFSETS
-# define RExC_offsets (RExC_rxi->u.offsets) /* I am not like the
- others */
-#endif
#define RExC_emit (pRExC_state->emit)
#define RExC_emit_start (pRExC_state->emit_start)
#define RExC_sawback (pRExC_state->sawback)
@@ -1058,70 +1054,8 @@ static const scan_data_t zero_scan_data = {
#define REGNODE_p(offset) (RExC_emit_start + (offset))
#define REGNODE_OFFSET(node) ((node) - RExC_emit_start)
-/* Macros for recording node offsets. 20001227 mjd@plover.com
- * Nodes are numbered 1, 2, 3, 4. Node #n's position is recorded in
- * element 2*n-1 of the array. Element #2n holds the byte length node #n.
- * Element 0 holds the number n.
- * Position is 1 indexed.
- */
-#ifndef RE_TRACK_PATTERN_OFFSETS
-#define Set_Node_Offset_To_R(offset,byte)
-#define Set_Node_Offset(node,byte)
-#define Set_Cur_Node_Offset
-#define Set_Node_Length_To_R(node,len)
-#define Set_Node_Length(node,len)
-#define Set_Node_Cur_Length(node,start)
-#define Node_Offset(n)
-#define Node_Length(n)
-#define Set_Node_Offset_Length(node,offset,len)
-#define ProgLen(ri) ri->u.proglen
-#define SetProgLen(ri,x) ri->u.proglen = x
-#define Track_Code(code)
-#else
-#define ProgLen(ri) ri->u.offsets[0]
-#define SetProgLen(ri,x) ri->u.offsets[0] = x
-#define Set_Node_Offset_To_R(offset,byte) STMT_START { \
- MJD_OFFSET_DEBUG(("** (%d) offset of node %d is %d.\n", \
- __LINE__, (int)(offset), (int)(byte))); \
- if((offset) < 0) { \
- Perl_croak(aTHX_ "value of node is %d in Offset macro", \
- (int)(offset)); \
- } else { \
- RExC_offsets[2*(offset)-1] = (byte); \
- } \
-} STMT_END
-
-#define Set_Node_Offset(node,byte) \
- Set_Node_Offset_To_R(REGNODE_OFFSET(node), (byte)-RExC_start)
-#define Set_Cur_Node_Offset Set_Node_Offset(RExC_emit, RExC_parse)
-
-#define Set_Node_Length_To_R(node,len) STMT_START { \
- MJD_OFFSET_DEBUG(("** (%d) size of node %d is %d.\n", \
- __LINE__, (int)(node), (int)(len))); \
- if((node) < 0) { \
- Perl_croak(aTHX_ "value of node is %d in Length macro", \
- (int)(node)); \
- } else { \
- RExC_offsets[2*(node)] = (len); \
- } \
-} STMT_END
-
-#define Set_Node_Length(node,len) \
- Set_Node_Length_To_R(REGNODE_OFFSET(node), len)
-#define Set_Node_Cur_Length(node, start) \
- Set_Node_Length(node, RExC_parse - start)
-
-/* Get offsets and lengths */
-#define Node_Offset(n) (RExC_offsets[2*(REGNODE_OFFSET(n))-1])
-#define Node_Length(n) (RExC_offsets[2*(REGNODE_OFFSET(n))])
-
-#define Set_Node_Offset_Length(node,offset,len) STMT_START { \
- Set_Node_Offset_To_R(REGNODE_OFFSET(node), (offset)); \
- Set_Node_Length_To_R(REGNODE_OFFSET(node), (len)); \
-} STMT_END
-
-#define Track_Code(code) STMT_START { code } STMT_END
-#endif
+#define ProgLen(ri) ri->proglen
+#define SetProgLen(ri,x) ri->proglen = x
#if PERL_ENABLE_EXPERIMENTAL_REGEX_OPTIMISATIONS
#define EXPERIMENTAL_INPLACESCAN
@@ -3516,11 +3450,6 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
#ifdef DEBUGGING
regnode *optimize = NULL;
-#ifdef RE_TRACK_PATTERN_OFFSETS
-
- U32 mjd_offset = 0;
- U32 mjd_nodelen = 0;
-#endif /* RE_TRACK_PATTERN_OFFSETS */
#endif /* DEBUGGING */
/*
This means we convert either the first branch or the first Exact,
@@ -3534,28 +3463,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
if ( first != startbranch || OP( last ) == BRANCH ) {
/* branch sub-chain */
NEXT_OFF( first ) = (U16)(last - first);
-#ifdef RE_TRACK_PATTERN_OFFSETS
- DEBUG_r({
- mjd_offset= Node_Offset((convert));
- mjd_nodelen= Node_Length((convert));
- });
-#endif
/* whole branch chain */
}
-#ifdef RE_TRACK_PATTERN_OFFSETS
- else {
- DEBUG_r({
- const regnode *nop = NEXTOPER( convert );
- mjd_offset= Node_Offset((nop));
- mjd_nodelen= Node_Length((nop));
- });
- }
- DEBUG_OPTIMISE_r(
- Perl_re_indentf( aTHX_ "MJD offset:%" UVuf " MJD length:%" UVuf "\n",
- depth+1,
- (UV)mjd_offset, (UV)mjd_nodelen)
- );
-#endif
/* But first we check to see if there is a common prefix we can
split out as an EXACT and put in front of the TRIE node. */
trie->startstate= 1;
@@ -3673,15 +3582,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
DEBUG_r_TEST
#endif
) {
- regnode *fix = convert;
U32 word = trie->wordcount;
-#ifdef RE_TRACK_PATTERN_OFFSETS
- mjd_nodelen++;
-#endif
- Set_Node_Offset_Length(convert, mjd_offset, state - 1);
- while( ++fix < n ) {
- Set_Node_Offset_Length(fix, 0, 0);
- }
while (word--) {
SV ** const tmp = av_fetch( trie_words, word, 0 );
if (tmp) {
@@ -3741,22 +3642,14 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
}
/* needed for dumping*/
DEBUG_r(if (optimize) {
- regnode *opt = convert;
-
- while ( ++opt < optimize) {
- Set_Node_Offset_Length(opt, 0, 0);
- }
/*
Try to clean up some of the debris left after the
optimisation.
*/
while( optimize < jumper ) {
- Track_Code( mjd_nodelen += Node_Length((optimize)); );
OP( optimize ) = OPTIMIZED;
- Set_Node_Offset_Length(optimize, 0, 0);
optimize++;
}
- Set_Node_Offset_Length(convert, mjd_offset, mjd_nodelen);
});
} /* end node insert */
@@ -8012,28 +7905,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
RExC_lastparse=NULL;
});
-#ifdef RE_TRACK_PATTERN_OFFSETS
- DEBUG_OFFSETS_r(Perl_re_printf( aTHX_
- "%s %" UVuf " bytes for offset annotations.\n",
- RExC_offsets ? "Got" : "Couldn't get",
- (UV)((RExC_offsets[0] * 2 + 1))));
- DEBUG_OFFSETS_r(if (RExC_offsets) {
- const STRLEN len = RExC_offsets[0];
- STRLEN i;
- DECLARE_AND_GET_RE_DEBUG_FLAGS;
- Perl_re_printf( aTHX_
- "Offsets: [%" UVuf "]\n\t", (UV)RExC_offsets[0]);
- for (i = 1; i <= len; i++) {
- if (RExC_offsets[i*2-1] || RExC_offsets[i*2])
- Perl_re_printf( aTHX_ "%" UVuf ":%" UVuf "[%" UVuf "] ",
- (UV)i, (UV)RExC_offsets[i*2-1], (UV)RExC_offsets[i*2]);
- }
- Perl_re_printf( aTHX_ "\n");
- });
-
-#else
SetProgLen(RExC_rxi,RExC_size);
-#endif
DEBUG_DUMP_PRE_OPTIMIZE_r({
SV * const sv = sv_newmortal();
@@ -11163,9 +11035,6 @@ S_handle_named_backref(pTHX_ RExC_state_t *pRExC_state,
num);
*flagp |= HASWIDTH;
- Set_Node_Offset(REGNODE_p(ret), parse_start+1);
- Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
-
nextchar(pRExC_state);
return ret;
}
@@ -11913,10 +11782,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
(IV)ARG2L(REGNODE_p(ret))));
RExC_seen |= REG_RECURSE_SEEN;
- Set_Node_Length(REGNODE_p(ret),
- 1 + regarglen[OP(REGNODE_p(ret))]); /* MJD */
- Set_Node_Offset(REGNODE_p(ret), parse_start); /* MJD */
-
*flagp |= POSTPONED;
assert(*RExC_parse == ')');
nextchar(pRExC_state);
@@ -11990,12 +11855,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
if (! REGTAIL(pRExC_state, ret, eval)) {
REQUIRE_BRANCHJ(flagp, 0);
}
- /* deal with the length of this later - MJD */
return ret;
}
ret = reg2Lanode(pRExC_state, EVAL, n, 0);
- Set_Node_Length(REGNODE_p(ret), RExC_parse - parse_start + 1);
- Set_Node_Offset(REGNODE_p(ret), parse_start);
return ret;
}
case '(': /* (?(?{...})...) and (?(?=...)...) */
@@ -12218,8 +12080,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
vFAIL("Unknown switch condition (?(...))");
}
case '[': /* (?[ ... ]) */
- return handle_regex_sets(pRExC_state, NULL, flagp, depth+1,
- oregcomp_parse);
+ return handle_regex_sets(pRExC_state, NULL, flagp, depth+1);
case 0: /* A NUL */
RExC_parse--; /* for vFAIL to print correctly */
vFAIL("Sequence (? incomplete");
@@ -12308,8 +12169,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
RExC_open_parens[parno]= ret;
}
- Set_Node_Length(REGNODE_p(ret), 1); /* MJD */
- Set_Node_Offset(REGNODE_p(ret), RExC_parse); /* MJD */
is_open = 1;
} else {
/* with RXf_PMf_NOCAPTURE treat (...) as (?:...) */
@@ -12322,7 +12181,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
parse_rest:
/* Pick up the branches, linking them together. */
- parse_start = RExC_parse; /* MJD */
+ parse_start = RExC_parse;
br = regbranch(pRExC_state, &flags, 1, depth+1);
/* branch_len = (paren != 0); */
@@ -12335,10 +12194,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
if (RExC_use_BRANCHJ) {
reginsert(pRExC_state, BRANCHJ, br, depth+1);
}
- else { /* MJD */
+ else {
reginsert(pRExC_state, BRANCH, br, depth+1);
- Set_Node_Length(REGNODE_p(br), paren != 0);
- Set_Node_Offset_To_R(br, parse_start-RExC_start);
}
have_branch = 1;
}
@@ -12404,8 +12261,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
if (RExC_nestroot == parno)
RExC_nestroot = 0;
}
- Set_Node_Offset(REGNODE_p(ender), RExC_parse+1); /* MJD */
- Set_Node_Length(REGNODE_p(ender), 1); /* MJD */
break;
case 's':
ender = reg_node(pRExC_state, SRCLOSE);
@@ -12534,8 +12389,6 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
}
reginsert(pRExC_state, node, ret, depth+1);
- Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
- Set_Node_Offset(REGNODE_p(ret), parse_start + 1);
FLAGS(REGNODE_p(ret)) = flag;
if (! REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL)))
{
@@ -12608,7 +12461,6 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
ret = reganode(pRExC_state, BRANCHJ, 0);
else {
ret = reg_node(pRExC_state, BRANCH);
- Set_Node_Length(REGNODE_p(ret), 1);
}
}
@@ -12843,9 +12695,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
const char * const origparse = RExC_parse;
I32 min;
I32 max = REG_INFTY;
-#ifdef RE_TRACK_PATTERN_OFFSETS
- char *parse_start;
-#endif
/* Save the original in case we change the emitted regop to a FAIL. */
const regnode_offset orig_emit = RExC_emit;
@@ -12862,10 +12711,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
FAIL2("panic: regatom returned failure, flags=%#" UVxf, (UV) flags);
}
-#ifdef RE_TRACK_PATTERN_OFFSETS
- parse_start = RExC_parse;
-#endif
-
op = *RExC_parse;
switch (op) {
const char * regcurly_return[5];
@@ -13007,8 +12852,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
MARK_NAUGHTY_EXP(2, 2);
reginsert(pRExC_state, CURLY, ret, depth+1);
- Set_Node_Offset(REGNODE_p(ret), parse_start+1); /* MJD */
- Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
}
else { /* not SIMPLE */
const regnode_offset w = reg_node(pRExC_state, WHILEM);
@@ -13023,10 +12866,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
NEXT_OFF(REGNODE_p(ret)) = 3; /* Go over LONGJMP. */
}
reginsert(pRExC_state, CURLYX, ret, depth+1);
- /* MJD hk */
- Set_Node_Offset(REGNODE_p(ret), parse_start+1);
- Set_Node_Length(REGNODE_p(ret),
- op == '{' ? (RExC_parse - parse_start) : 1);
if (RExC_use_BRANCHJ)
NEXT_OFF(REGNODE_p(ret)) = 3; /* Go over NOTHING to
@@ -13230,7 +13069,6 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
*node_p = reg_node(pRExC_state, REG_ANY);
*flagp |= HASWIDTH|SIMPLE;
MARK_NAUGHTY(1);
- Set_Node_Length(REGNODE_p(*(node_p)), 1); /* MJD */
return TRUE;
}
@@ -13588,6 +13426,14 @@ S_backref_value(char *p, char *e)
return I32_MAX;
}
+#ifdef DEBUGGING
+#define REGNODE_GUTS(state,op,extra_size) \
+ regnode_guts_debug(state,op,extra_size)
+#else
+#define REGNODE_GUTS(state,op,extra_size) \
+ regnode_guts(state,extra_size)
+#endif
+
/*
- regatom - the lowest level
@@ -13686,7 +13532,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
ret = reg_node(pRExC_state, MBOL);
else
ret = reg_node(pRExC_state, SBOL);
- Set_Node_Length(REGNODE_p(ret), 1); /* MJD */
break;
case '$':
nextchar(pRExC_state);
@@ -13696,7 +13541,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
ret = reg_node(pRExC_state, MEOL);
else
ret = reg_node(pRExC_state, SEOL);
- Set_Node_Length(REGNODE_p(ret), 1); /* MJD */
break;
case '.':
nextchar(pRExC_state);
@@ -13706,7 +13550,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
ret = reg_node(pRExC_state, REG_ANY);
*flagp |= HASWIDTH|SIMPLE;
MARK_NAUGHTY(1);
- Set_Node_Length(REGNODE_p(ret), 1); /* MJD */
break;
case '[':
{
@@ -13728,7 +13571,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
vFAIL("Unmatched [");
}
nextchar(pRExC_state);
- Set_Node_Length(REGNODE_p(ret), RExC_parse - oregcomp_parse + 1); /* MJD */
break;
}
case '(':
@@ -14019,8 +13861,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
RExC_parse += 2;
vFAIL("Unescaped left brace in regex is illegal here");
}
- Set_Node_Offset(REGNODE_p(ret), parse_start);
- Set_Node_Length(REGNODE_p(ret), RExC_parse - parse_start + 1); /* MJD */
nextchar(pRExC_state);
break;
case 'N':
@@ -14238,9 +14078,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
}
*flagp |= HASWIDTH;
- /* override incorrect value set in reganode MJD */
- Set_Node_Offset(REGNODE_p(ret), parse_start);
- Set_Node_Cur_Length(REGNODE_p(ret), parse_start-1);
skip_to_be_ignored_text(pRExC_state, &RExC_parse,
FALSE /* Don't force to /x */ );
}
@@ -14353,8 +14190,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
/* Allocate an EXACT node. The node_type may change below to
* another EXACTish node, but since the size of the node doesn't
* change, it works */
- ret = regnode_guts(pRExC_state, node_type, current_string_nodes,
- "exact");
+ ret = REGNODE_GUTS(pRExC_state, node_type, current_string_nodes);
FILL_NODE(ret, node_type);
RExC_emit++;
@@ -15599,7 +15435,6 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
*flagp |= HASWIDTH | maybe_SIMPLE;
}
- Set_Node_Length(REGNODE_p(ret), p - parse_start - 1);
RExC_parse = p;
{
@@ -16551,8 +16386,7 @@ S_regex_set_precedence(const U8 my_operator) {
STATIC regnode_offset
S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
- I32 *flagp, U32 depth,
- char * const oregcomp_parse)
+ I32 *flagp, U32 depth)
{
/* Handle the (?[...]) construct to do set operations */
@@ -16581,7 +16415,6 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
DECLARE_AND_GET_RE_DEBUG_FLAGS;
PERL_ARGS_ASSERT_HANDLE_REGEX_SETS;
- PERL_UNUSED_ARG(oregcomp_parse); /* Only for Set_Node_Length */
DEBUG_PARSE("xcls");
@@ -17236,7 +17069,6 @@ redo_curchar:
}
nextchar(pRExC_state);
- Set_Node_Length(REGNODE_p(node), RExC_parse - oregcomp_parse + 1); /* MJD */
return node;
regclass_failed:
@@ -19290,8 +19122,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
/* If optimized to something else and emitted, clean up and return */
if (ret >= 0) {
- Set_Node_Offset_Length(REGNODE_p(ret), orig_parse - RExC_start,
- RExC_parse - orig_parse);;
SvREFCNT_dec(cp_list);;
SvREFCNT_dec(only_utf8_locale_list);
SvREFCNT_dec(upper_latin1_only_utf8_matches);
@@ -19318,7 +19148,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
}
}
- ret = regnode_guts(pRExC_state, op, regarglen[op], "anyof");
+ ret = REGNODE_GUTS(pRExC_state, op, regarglen[op]);
FILL_NODE(ret, op); /* We set the argument later */
RExC_emit += 1 + regarglen[op];
ANYOF_FLAGS(REGNODE_p(ret)) = anyof_flags;
@@ -19855,7 +19685,7 @@ S_optimize_regclass(pTHX_
len = (UTF) ? UVCHR_SKIP(value) : 1;
- *ret = regnode_guts(pRExC_state, op, len, "exact");
+ *ret = REGNODE_GUTS(pRExC_state, op, len);
FILL_NODE(*ret, op);
RExC_emit += 1 + STR_SZ(len);
setSTR_LEN(REGNODE_p(*ret), len);
@@ -20202,9 +20032,8 @@ S_optimize_regclass(pTHX_
}
else {
op = ANYOFHs;
- *ret = regnode_guts(pRExC_state, op,
- regarglen[op] + STR_SZ(len),
- "anyofhs");
+ *ret = REGNODE_GUTS(pRExC_state, op,
+ regarglen[op] + STR_SZ(len));
FILL_NODE(*ret, op);
((struct regnode_anyofhs *) REGNODE_p(*ret))->str_len
= len;
@@ -20712,54 +20541,39 @@ S_change_engine_size(pTHX_ RExC_state_t *pRExC_state, const Ptrdiff_t size)
if (size > 0) {
Zero(REGNODE_p(RExC_emit), size, regnode);
}
-
-#ifdef RE_TRACK_PATTERN_OFFSETS
- Renew(RExC_offsets, 2*RExC_size+1, U32);
- if (size > 0) {
- Zero(RExC_offsets + 2*(RExC_size - size) + 1, 2 * size, U32);
- }
- RExC_offsets[0] = RExC_size;
-#endif
}
STATIC regnode_offset
-S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_size, const char* const name)
+S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const STRLEN extra_size)
{
- /* Allocate a regnode for 'op', with 'extra_size' extra (smallest) regnode
- * equivalents space. It aligns and increments RExC_size
+ /* Allocate a regnode that is (1 + extra_size) times as big as the
+ * smallest regnode worth of space, and also aligns and increments
+ * RExC_size appropriately.
*
* It returns the regnode's offset into the regex engine program */
const regnode_offset ret = RExC_emit;
- DECLARE_AND_GET_RE_DEBUG_FLAGS;
-
PERL_ARGS_ASSERT_REGNODE_GUTS;
SIZE_ALIGN(RExC_size);
change_engine_size(pRExC_state, (Ptrdiff_t) 1 + extra_size);
NODE_ALIGN_FILL(REGNODE_p(ret));
-#ifndef RE_TRACK_PATTERN_OFFSETS
- PERL_UNUSED_ARG(name);
- PERL_UNUSED_ARG(op);
-#else
+ return(ret);
+}
+
+#ifdef DEBUGGING
+
+STATIC regnode_offset
+S_regnode_guts_debug(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_size) {
+ PERL_ARGS_ASSERT_REGNODE_GUTS_DEBUG;
assert(extra_size >= regarglen[op] || PL_regkind[op] == ANYOF);
+ return S_regnode_guts(aTHX_ pRExC_state, extra_size);
+}
- if (RExC_offsets) { /* MJD */
- MJD_OFFSET_DEBUG(
- ("%s:%d: (op %s) %s %" UVuf " (len %" UVuf ") (max %" UVuf ").\n",
- name, __LINE__,
- PL_reg_name[op],
- (UV)(RExC_emit) > RExC_offsets[0]
- ? "Overwriting end of array!\n" : "OK",
- (UV)(RExC_emit),
- (UV)(RExC_parse - RExC_start),
- (UV)RExC_offsets[0]));
- Set_Node_Offset(REGNODE_p(RExC_emit), RExC_parse + (op == END));
- }
#endif
- return(ret);
-}
+
+
/*
- reg_node - emit a node
@@ -20767,7 +20581,7 @@ S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_
STATIC regnode_offset /* Location. */
S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
{
- const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "reg_node");
+ const regnode_offset ret = REGNODE_GUTS(pRExC_state, op, regarglen[op]);
regnode_offset ptr = ret;
PERL_ARGS_ASSERT_REG_NODE;
@@ -20785,7 +20599,7 @@ S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
STATIC regnode_offset /* Location. */
S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
{
- const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "reganode");
+ const regnode_offset ret = REGNODE_GUTS(pRExC_state, op, regarglen[op]);
regnode_offset ptr = ret;
PERL_ARGS_ASSERT_REGANODE;
@@ -20804,7 +20618,7 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
STATIC regnode_offset /* Location. */
S_regpnode(pTHX_ RExC_state_t *pRExC_state, U8 op, SV * arg)
{
- const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "regpnode");
+ const regnode_offset ret = REGNODE_GUTS(pRExC_state, op, regarglen[op]);
regnode_offset ptr = ret;
PERL_ARGS_ASSERT_REGPNODE;
@@ -20819,7 +20633,7 @@ S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const
{
/* emit a node with U32 and I32 arguments */
- const regnode_offset ret = regnode_guts(pRExC_state, op, regarglen[op], "reg2Lanode");
+ const regnode_offset ret = REGNODE_GUTS(pRExC_state, op, regarglen[op]);
regnode_offset ptr = ret;
PERL_ARGS_ASSERT_REG2LANODE;
@@ -20901,41 +20715,9 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op,
while (src > REGNODE_p(operand)) {
StructCopy(--src, --dst, regnode);
-#ifdef RE_TRACK_PATTERN_OFFSETS
- if (RExC_offsets) { /* MJD 20010112 */
- MJD_OFFSET_DEBUG(
- ("%s(%d): (op %s) %s copy %" UVuf " -> %" UVuf " (max %" UVuf ").\n",
- "reginsert",
- __LINE__,
- PL_reg_name[op],
- (UV)(REGNODE_OFFSET(dst)) > RExC_offsets[0]
- ? "Overwriting end of array!\n" : "OK",
- (UV)REGNODE_OFFSET(src),
- (UV)REGNODE_OFFSET(dst),
- (UV)RExC_offsets[0]));
- Set_Node_Offset_To_R(REGNODE_OFFSET(dst), Node_Offset(src));
- Set_Node_Length_To_R(REGNODE_OFFSET(dst), Node_Length(src));
- }
-#endif
}
place = REGNODE_p(operand); /* Op node, where operand used to be. */
-#ifdef RE_TRACK_PATTERN_OFFSETS
- if (RExC_offsets) { /* MJD */
- MJD_OFFSET_DEBUG(
- ("%s(%d): (op %s) %s %" UVuf " <- %" UVuf " (max %" UVuf ").\n",
- "reginsert",
- __LINE__,
- PL_reg_name[op],
- (UV)REGNODE_OFFSET(place) > RExC_offsets[0]
- ? "Overwriting end of array!\n" : "OK",
- (UV)REGNODE_OFFSET(place),
- (UV)(RExC_parse - RExC_start),
- (UV)RExC_offsets[0]));
- Set_Node_Offset(place, RExC_parse);
- Set_Node_Length(place, 1);
- }
-#endif
src = NEXTOPER(place);
FLAGS(place) = 0;
FILL_NODE(operand, op);
@@ -22065,10 +21847,6 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
}
});
-#ifdef RE_TRACK_PATTERN_OFFSETS
- if (ri->u.offsets)
- Safefree(ri->u.offsets); /* 20010421 MJD */
-#endif
if (ri->code_blocks)
S_free_codeblocks(aTHX_ ri->code_blocks);
@@ -22393,14 +22171,7 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
reti->name_list_idx = ri->name_list_idx;
-#ifdef RE_TRACK_PATTERN_OFFSETS
- if (ri->u.offsets) {
- Newx(reti->u.offsets, 2*len+1, U32);
- Copy(ri->u.offsets, reti->u.offsets, 2*len+1, U32);
- }
-#else
SetProgLen(reti, len);
-#endif
return (void*)reti;
}
diff --git a/regcomp.h b/regcomp.h
index 552cd6ed65..6b8dc27642 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -26,11 +26,6 @@
/* Not for production use: */
#define PERL_ENABLE_EXPERIMENTAL_REGEX_OPTIMISATIONS 0
-/* Activate offsets code - set to if 1 to enable */
-#ifdef DEBUGGING
-#define RE_TRACK_PATTERN_OFFSETS
-#endif
-
/*
* Structure for regexp "program". This is essentially a linear encoding
* of a nondeterministic finite-state machine (aka syntax charts or
@@ -65,24 +60,18 @@
/* This is the stuff that used to live in regexp.h that was truly
private to the engine itself. It now lives here. */
- typedef struct regexp_internal {
- union {
- U32 *offsets; /* offset annotations 20001228 MJD
- data about mapping the program to the
- string -
- offsets[0] is proglen when this is used
- */
- U32 proglen;
- } u;
-
+typedef struct regexp_internal {
regnode *regstclass; /* Optional startclass as identified or constructed
by the optimiser */
struct reg_data *data; /* Additional miscellaneous data used by the program.
Used to make it easier to clone and free arbitrary
data that the regops need. Often the ARG field of
- a regop is an index into this structure */
+ a regop is an index into this structure. NOTE the
+ 0th element of this structure is NEVER used and is
+ strictly reserved for internal purposes. */
struct reg_code_blocks *code_blocks;/* positions of literal (?{}) */
- int name_list_idx; /* Optional data index of an array of paren names */
+ U32 proglen; /* size of the compiled program in regnodes */
+ int name_list_idx; /* Optional data index of an array of paren names */
regnode program[1]; /* Unwarranted chumminess with compiler. */
} regexp_internal;
@@ -995,7 +984,6 @@ further group, as currently only the low three bytes are used.
PEEP
TRIE
PROGRAM
- OFFSETS
Execute Options:
@@ -1006,7 +994,6 @@ further group, as currently only the low three bytes are used.
Extra Options
TRIE
- OFFSETS
If you modify any of these make sure you make corresponding changes to
re.pm, especially to the documentation.
@@ -1032,8 +1019,6 @@ re.pm, especially to the documentation.
/* Extra */
#define RE_DEBUG_EXTRA_MASK 0x3FF0000
#define RE_DEBUG_EXTRA_TRIE 0x0010000
-#define RE_DEBUG_EXTRA_OFFSETS 0x0020000
-#define RE_DEBUG_EXTRA_OFFDEBUG 0x0040000
#define RE_DEBUG_EXTRA_STATE 0x0080000
#define RE_DEBUG_EXTRA_OPTIMISE 0x0100000
#define RE_DEBUG_EXTRA_BUFFERS 0x0400000
@@ -1072,8 +1057,6 @@ re.pm, especially to the documentation.
/* Extra */
#define DEBUG_EXTRA_r(x) DEBUG_r( \
if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_MASK)) x )
-#define DEBUG_OFFSETS_r(x) DEBUG_r( \
- if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_OFFSETS)) x )
#define DEBUG_STATE_r(x) DEBUG_r( \
if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_STATE)) x )
#define DEBUG_STACK_r(x) DEBUG_r( \
@@ -1084,9 +1067,6 @@ re.pm, especially to the documentation.
#define DEBUG_OPTIMISE_MORE_r(x) DEBUG_r( \
if (DEBUG_v_TEST || ((RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE) == \
RE_DEBUG_FLAG(RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE))) x )
-#define MJD_OFFSET_DEBUG(x) DEBUG_r( \
- if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_OFFDEBUG)) \
- Perl_warn_nocontext x )
#define DEBUG_TRIE_COMPILE_MORE_r(x) DEBUG_TRIE_COMPILE_r( \
if (DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_TRIE)) x )
#define DEBUG_TRIE_EXECUTE_MORE_r(x) DEBUG_TRIE_EXECUTE_r( \