summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--embed.fnc9
-rw-r--r--embed.h6
-rw-r--r--proto.h15
-rw-r--r--regcomp.c84
-rw-r--r--regcomp.h17
-rw-r--r--regexec.c12
6 files changed, 81 insertions, 62 deletions
diff --git a/embed.fnc b/embed.fnc
index efcd38f15e..284bf44b08 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1352,11 +1352,14 @@ Es |const regnode*|dumpuntil|NN const regexp *r|NN const regnode *start \
|NN SV* sv|I32 indent|U32 depth
Es |void |put_byte |NN SV* sv|int c
Es |void |dump_trie |NN const struct _reg_trie_data *trie\
- |NULLOK HV* widecharmap|U32 depth
+ |NULLOK HV* widecharmap|NN AV *revcharmap\
+ |U32 depth
Es |void |dump_trie_interim_list|NN const struct _reg_trie_data *trie\
- |NULLOK HV* widecharmap|U32 next_alloc|U32 depth
+ |NULLOK HV* widecharmap|NN AV *revcharmap\
+ |U32 next_alloc|U32 depth
Es |void |dump_trie_interim_table|NN const struct _reg_trie_data *trie\
- |NULLOK HV* widecharmap|U32 next_alloc|U32 depth
+ |NULLOK HV* widecharmap|NN AV *revcharmap\
+ |U32 next_alloc|U32 depth
Es |U8 |regtail_study |NN struct RExC_state_t *state|NN regnode *p|NN const regnode *val|U32 depth
# endif
#endif
diff --git a/embed.h b/embed.h
index 70b3ecadf1..d294609487 100644
--- a/embed.h
+++ b/embed.h
@@ -3550,9 +3550,9 @@
#if defined(PERL_CORE) || defined(PERL_EXT)
#define dumpuntil(a,b,c,d,e,f,g,h) S_dumpuntil(aTHX_ a,b,c,d,e,f,g,h)
#define put_byte(a,b) S_put_byte(aTHX_ a,b)
-#define dump_trie(a,b,c) S_dump_trie(aTHX_ a,b,c)
-#define dump_trie_interim_list(a,b,c,d) S_dump_trie_interim_list(aTHX_ a,b,c,d)
-#define dump_trie_interim_table(a,b,c,d) S_dump_trie_interim_table(aTHX_ a,b,c,d)
+#define dump_trie(a,b,c,d) S_dump_trie(aTHX_ a,b,c,d)
+#define dump_trie_interim_list(a,b,c,d,e) S_dump_trie_interim_list(aTHX_ a,b,c,d,e)
+#define dump_trie_interim_table(a,b,c,d,e) S_dump_trie_interim_table(aTHX_ a,b,c,d,e)
#define regtail_study(a,b,c,d) S_regtail_study(aTHX_ a,b,c,d)
#endif
# endif
diff --git a/proto.h b/proto.h
index 1db1bd17f7..14fec7e74f 100644
--- a/proto.h
+++ b/proto.h
@@ -3677,14 +3677,17 @@ STATIC const regnode* S_dumpuntil(pTHX_ const regexp *r, const regnode *start, c
STATIC void S_put_byte(pTHX_ SV* sv, int c)
__attribute__nonnull__(pTHX_1);
-STATIC void S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, U32 depth)
- __attribute__nonnull__(pTHX_1);
+STATIC void S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, AV *revcharmap, U32 depth)
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_3);
-STATIC void S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, U32 next_alloc, U32 depth)
- __attribute__nonnull__(pTHX_1);
+STATIC void S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, AV *revcharmap, U32 next_alloc, U32 depth)
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_3);
-STATIC void S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, U32 next_alloc, U32 depth)
- __attribute__nonnull__(pTHX_1);
+STATIC void S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, AV *revcharmap, U32 next_alloc, U32 depth)
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_3);
STATIC U8 S_regtail_study(pTHX_ struct RExC_state_t *state, regnode *p, const regnode *val, U32 depth)
__attribute__nonnull__(pTHX_1)
diff --git a/regcomp.c b/regcomp.c
index 3d1211a8a4..77ee7b4bad 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -798,9 +798,9 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con
#ifdef DEBUGGING
/*
- dump_trie(trie,widecharmap)
- dump_trie_interim_list(trie,widecharmap,next_alloc)
- dump_trie_interim_table(trie,widecharmap,next_alloc)
+ dump_trie(trie,widecharmap,revcharmap)
+ dump_trie_interim_list(trie,widecharmap,revcharmap,next_alloc)
+ dump_trie_interim_table(trie,widecharmap,revcharmap,next_alloc)
These routines dump out a trie in a somewhat readable format.
The _interim_ variants are used for debugging the interim
@@ -818,7 +818,8 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con
*/
STATIC void
-S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, U32 depth)
+S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
+ AV *revcharmap, U32 depth)
{
U32 state;
SV *sv=sv_newmortal();
@@ -831,7 +832,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, U32 depth)
"Match","Base","Ofs" );
for( state = 0 ; state < trie->uniquecharcount ; state++ ) {
- SV ** const tmp = av_fetch( trie->revcharmap, state, 0);
+ SV ** const tmp = av_fetch( revcharmap, state, 0);
if ( tmp ) {
PerlIO_printf( Perl_debug_log, "%*s",
colwidth,
@@ -900,7 +901,8 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, U32 depth)
*/
STATIC void
S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
- HV *widecharmap, U32 next_alloc, U32 depth)
+ HV *widecharmap, AV *revcharmap, U32 next_alloc,
+ U32 depth)
{
U32 state;
SV *sv=sv_newmortal();
@@ -924,7 +926,7 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
);
}
for( charid = 1 ; charid <= TRIE_LIST_USED( state ) ; charid++ ) {
- SV ** const tmp = av_fetch( trie->revcharmap, TRIE_LIST_ITEM(state,charid).forid, 0);
+ SV ** const tmp = av_fetch( revcharmap, TRIE_LIST_ITEM(state,charid).forid, 0);
if ( tmp ) {
PerlIO_printf( Perl_debug_log, "%*s:%3X=%4"UVXf" | ",
colwidth,
@@ -953,7 +955,8 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
*/
STATIC void
S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
- HV *widecharmap, U32 next_alloc, U32 depth)
+ HV *widecharmap, AV *revcharmap, U32 next_alloc,
+ U32 depth)
{
U32 state;
U16 charid;
@@ -969,7 +972,7 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
PerlIO_printf( Perl_debug_log, "%*sChar : ",(int)depth * 2 + 2,"" );
for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
- SV ** const tmp = av_fetch( trie->revcharmap, charid, 0);
+ SV ** const tmp = av_fetch( revcharmap, charid, 0);
if ( tmp ) {
PerlIO_printf( Perl_debug_log, "%*s",
colwidth,
@@ -1136,7 +1139,7 @@ is the recommended Unicode-aware way of saying
SV *tmp = newSVpvs(""); \
if (UTF) SvUTF8_on(tmp); \
Perl_sv_catpvf( aTHX_ tmp, "%c", (int)uvc ); \
- av_push( TRIE_REVCHARMAP(trie), tmp ); \
+ av_push( revcharmap, tmp ); \
} STMT_END
#define TRIE_READ_CHAR STMT_START { \
@@ -1196,7 +1199,7 @@ is the recommended Unicode-aware way of saying
else \
tmp = newSVpvn( "", 0 ); \
if ( UTF ) SvUTF8_on( tmp ); \
- av_push( trie->words, tmp ); \
+ av_push( trie_words, tmp ); \
}); \
\
curword++; \
@@ -1249,6 +1252,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
/* first pass, loop through and scan words */
reg_trie_data *trie;
HV *widecharmap = NULL;
+ AV *revcharmap = newAV();
regnode *cur;
const U32 uniflags = UTF8_ALLOW_DEFAULT;
STRLEN len = 0;
@@ -1267,15 +1271,17 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
)
);
- const U32 data_slot = add_data( pRExC_state, 2, "tu" );
- SV *re_trie_maxbuff;
-#ifndef DEBUGGING
- /* these are only used during construction but are useful during
- * debugging so we store them in the struct when debugging.
+#ifdef DEBUGGING
+ const U32 data_slot = add_data( pRExC_state, 4, "tuuu" );
+ AV *trie_words = NULL;
+ /* along with revcharmap, this only used during construction but both are
+ * useful during debugging so we store them in the struct when debugging.
*/
+#else
+ const U32 data_slot = add_data( pRExC_state, 2, "tu" );
STRLEN trie_charcount=0;
- AV *trie_revcharmap;
#endif
+ SV *re_trie_maxbuff;
GET_RE_DEBUG_FLAGS_DECL;
#ifndef DEBUGGING
PERL_UNUSED_ARG(depth);
@@ -1290,9 +1296,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
if (!(UTF && folder))
trie->bitmap = PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 );
DEBUG_r({
- trie->words = newAV();
+ trie_words = newAV();
});
- TRIE_REVCHARMAP(trie) = newAV();
re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
if (!SvIOK(re_trie_maxbuff)) {
@@ -1513,8 +1518,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
* sizeof(reg_trie_state) );
/* and now dump it out before we compress it */
- DEBUG_TRIE_COMPILE_MORE_r(
- dump_trie_interim_list(trie,widecharmap,next_alloc,depth+1)
+ DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_list(trie, widecharmap,
+ revcharmap, next_alloc,
+ depth+1)
);
trie->trans
@@ -1687,9 +1693,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
} /* end second pass */
/* and now dump it out before we compress it */
- DEBUG_TRIE_COMPILE_MORE_r(
- dump_trie_interim_table(trie,widecharmap,next_alloc,depth+1)
- );
+ DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_table(trie, widecharmap,
+ revcharmap,
+ next_alloc, depth+1));
{
/*
@@ -1818,9 +1824,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
* sizeof(reg_trie_trans) );
/* and now dump out the compressed format */
- DEBUG_TRIE_COMPILE_r(
- dump_trie(trie,widecharmap,depth+1)
- );
+ DEBUG_TRIE_COMPILE_r(dump_trie(trie, widecharmap, revcharmap, depth+1));
{ /* Modify the program and insert the new TRIE node*/
U8 nodetype =(U8)(flags & 0xFF);
@@ -1882,7 +1886,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
trie->trans[ base + ofs - trie->uniquecharcount ].check == state )
{
if ( ++count > 1 ) {
- SV **tmp = av_fetch( TRIE_REVCHARMAP(trie), ofs, 0);
+ SV **tmp = av_fetch( revcharmap, ofs, 0);
const U8 *ch = (U8*)SvPV_nolen_const( *tmp );
if ( state == 1 ) break;
if ( count == 2 ) {
@@ -1893,7 +1897,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
(int)depth * 2 + 2, "",
(UV)state));
if (idx >= 0) {
- SV ** const tmp = av_fetch( TRIE_REVCHARMAP(trie), idx, 0);
+ SV ** const tmp = av_fetch( revcharmap, idx, 0);
const U8 * const ch = (U8*)SvPV_nolen_const( *tmp );
TRIE_BITMAP_SET(trie,*ch);
@@ -1913,7 +1917,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
}
}
if ( count == 1 ) {
- SV **tmp = av_fetch( TRIE_REVCHARMAP(trie), idx, 0);
+ SV **tmp = av_fetch( revcharmap, idx, 0);
char *ch = SvPV_nolen( *tmp );
DEBUG_OPTIMISE_r({
SV *sv=sv_newmortal();
@@ -1961,7 +1965,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
Set_Node_Offset_Length(fix, 0, 0);
}
while (word--) {
- SV ** const tmp = av_fetch( trie->words, word, 0 );
+ SV ** const tmp = av_fetch( trie_words, word, 0 );
if (tmp) {
if ( STR_LEN(convert) <= SvCUR(*tmp) )
sv_chop(*tmp, SvPV_nolen(*tmp) + STR_LEN(convert));
@@ -2030,8 +2034,11 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
});
} /* end node insert */
RExC_rxi->data->data[ data_slot + 1 ] = (void*)widecharmap;
-#ifndef DEBUGGING
- SvREFCNT_dec(TRIE_REVCHARMAP(trie));
+#ifdef DEBUGGING
+ RExC_rxi->data->data[ data_slot + TRIE_WORDS_OFFSET ] = (void*)trie_words;
+ RExC_rxi->data->data[ data_slot + 3 ] = (void*)revcharmap;
+#else
+ SvREFCNT_dec(revcharmap);
#endif
return trie->jump
? MADE_JUMP_TRIE
@@ -8587,12 +8594,6 @@ Perl_pregfree(pTHX_ struct regexp *r)
PerlMemShared_free(trie->jump);
if (trie->nextword)
PerlMemShared_free(trie->nextword);
-#ifdef DEBUGGING
- if (trie->words)
- SvREFCNT_dec((SV*)trie->words);
- if (trie->revcharmap)
- SvREFCNT_dec((SV*)trie->revcharmap);
-#endif
/* do this last!!!! */
PerlMemShared_free(ri->data->data[n]);
}
@@ -9093,11 +9094,14 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
NULL;
const reg_trie_data * const trie =
(reg_trie_data*)ri->data->data[op<AHOCORASICK ? n : ac->trie];
+#ifdef DEBUGGING
+ AV *const trie_words = (AV *) ri->data->data[n + TRIE_WORDS_OFFSET];
+#endif
const regnode *nextbranch= NULL;
I32 word_idx;
sv_setpvn(sv, "", 0);
for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) {
- SV ** const elem_ptr = av_fetch(trie->words,word_idx,0);
+ SV ** const elem_ptr = av_fetch(trie_words,word_idx,0);
PerlIO_printf(Perl_debug_log, "%*s%s ",
(int)(2*(indent+3)), "",
diff --git a/regcomp.h b/regcomp.h
index 255509687c..5710857e97 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -430,6 +430,7 @@ END_EXTERN_C
* in the character class
* t - trie struct
* u - trie struct's widecharmap (a HV, so can't share, must dup)
+ * also used for revcharmap and words under DEBUGGING
* T - aho-trie struct
* S - sv for named capture lookup
* 20010712 mjd@plover.com
@@ -537,10 +538,20 @@ struct _reg_trie_data {
U32 wordcount; /* Build only */
#ifdef DEBUGGING
STRLEN charcount; /* Build only */
- AV *words; /* Array of words contained in trie, for dumping */
- AV *revcharmap; /* Map of each charid back to its character representation */
#endif
};
+/* There is one (3 under DEBUGGING) pointers that logically belong in this
+ structure, but are held outside as they need duplication on thread cloning,
+ whereas the rest of the structure can be read only:
+ HV *widecharmap; code points > 255 to charid
+#ifdef DEBUGGING
+ AV *words; Array of words contained in trie, for dumping
+ AV *revcharmap; Map of each charid back to its character representation
+#endif
+*/
+
+#define TRIE_WORDS_OFFSET 2
+
typedef struct _reg_trie_data reg_trie_data;
/* refcount is first in both this and _reg_trie_data to allow a space
@@ -577,10 +588,8 @@ typedef struct _reg_ac_data reg_ac_data;
#ifdef DEBUGGING
#define TRIE_CHARCOUNT(trie) ((trie)->charcount)
-#define TRIE_REVCHARMAP(trie) ((trie)->revcharmap)
#else
#define TRIE_CHARCOUNT(trie) (trie_charcount)
-#define TRIE_REVCHARMAP(trie) (trie_revcharmap)
#endif
#define RE_TRIE_MAXBUF_INIT 65536
diff --git a/regexec.c b/regexec.c
index 4e3785bda6..b54a4cb880 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2947,9 +2947,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
if ( ST.accepted == 1 ) {
/* only one choice left - just continue */
DEBUG_EXECUTE_r({
- reg_trie_data * const trie
- = (reg_trie_data*)rexi->data->data[ ARG(ST.me) ];
- SV ** const tmp = av_fetch( trie->words,
+ AV *const trie_words
+ = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
+ SV ** const tmp = av_fetch( trie_words,
ST.accept_buff[ 0 ].wordnum-1, 0 );
SV *sv= tmp ? sv_newmortal() : NULL;
@@ -3028,9 +3028,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
}
DEBUG_EXECUTE_r({
- reg_trie_data * const trie
- = (reg_trie_data*)rexi->data->data[ ARG(ST.me) ];
- SV ** const tmp = av_fetch( trie->words,
+ AV *const trie_words
+ = (AV *) rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET];
+ SV ** const tmp = av_fetch( trie_words,
ST.accept_buff[ best ].wordnum - 1, 0 );
regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
ST.B :