summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--embed.fnc9
-rw-r--r--embed.h6
-rw-r--r--proto.h6
-rw-r--r--regcomp.c53
-rw-r--r--regcomp.h2
-rw-r--r--regexec.c20
6 files changed, 52 insertions, 44 deletions
diff --git a/embed.fnc b/embed.fnc
index ec2041eab5..efcd38f15e 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1351,9 +1351,12 @@ Es |const regnode*|dumpuntil|NN const regexp *r|NN const regnode *start \
|NULLOK const regnode *plast \
|NN SV* sv|I32 indent|U32 depth
Es |void |put_byte |NN SV* sv|int c
-Es |void |dump_trie |NN const struct _reg_trie_data *trie|U32 depth
-Es |void |dump_trie_interim_list|NN const struct _reg_trie_data *trie|U32 next_alloc|U32 depth
-Es |void |dump_trie_interim_table|NN const struct _reg_trie_data *trie|U32 next_alloc|U32 depth
+Es |void |dump_trie |NN const struct _reg_trie_data *trie\
+ |NULLOK HV* widecharmap|U32 depth
+Es |void |dump_trie_interim_list|NN const struct _reg_trie_data *trie\
+ |NULLOK HV* widecharmap|U32 next_alloc|U32 depth
+Es |void |dump_trie_interim_table|NN const struct _reg_trie_data *trie\
+ |NULLOK HV* widecharmap|U32 next_alloc|U32 depth
Es |U8 |regtail_study |NN struct RExC_state_t *state|NN regnode *p|NN const regnode *val|U32 depth
# endif
#endif
diff --git a/embed.h b/embed.h
index c4bf329a09..70b3ecadf1 100644
--- a/embed.h
+++ b/embed.h
@@ -3550,9 +3550,9 @@
#if defined(PERL_CORE) || defined(PERL_EXT)
#define dumpuntil(a,b,c,d,e,f,g,h) S_dumpuntil(aTHX_ a,b,c,d,e,f,g,h)
#define put_byte(a,b) S_put_byte(aTHX_ a,b)
-#define dump_trie(a,b) S_dump_trie(aTHX_ a,b)
-#define dump_trie_interim_list(a,b,c) S_dump_trie_interim_list(aTHX_ a,b,c)
-#define dump_trie_interim_table(a,b,c) S_dump_trie_interim_table(aTHX_ a,b,c)
+#define dump_trie(a,b,c) S_dump_trie(aTHX_ a,b,c)
+#define dump_trie_interim_list(a,b,c,d) S_dump_trie_interim_list(aTHX_ a,b,c,d)
+#define dump_trie_interim_table(a,b,c,d) S_dump_trie_interim_table(aTHX_ a,b,c,d)
#define regtail_study(a,b,c,d) S_regtail_study(aTHX_ a,b,c,d)
#endif
# endif
diff --git a/proto.h b/proto.h
index 52d4b746cf..1db1bd17f7 100644
--- a/proto.h
+++ b/proto.h
@@ -3677,13 +3677,13 @@ STATIC const regnode* S_dumpuntil(pTHX_ const regexp *r, const regnode *start, c
STATIC void S_put_byte(pTHX_ SV* sv, int c)
__attribute__nonnull__(pTHX_1);
-STATIC void S_dump_trie(pTHX_ const struct _reg_trie_data *trie, U32 depth)
+STATIC void S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, U32 depth)
__attribute__nonnull__(pTHX_1);
-STATIC void S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc, U32 depth)
+STATIC void S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, U32 next_alloc, U32 depth)
__attribute__nonnull__(pTHX_1);
-STATIC void S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc, U32 depth)
+STATIC void S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, U32 next_alloc, U32 depth)
__attribute__nonnull__(pTHX_1);
STATIC U8 S_regtail_study(pTHX_ struct RExC_state_t *state, regnode *p, const regnode *val, U32 depth)
diff --git a/regcomp.c b/regcomp.c
index c5c91498be..3d1211a8a4 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -798,9 +798,9 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con
#ifdef DEBUGGING
/*
- dump_trie(trie)
- dump_trie_interim_list(trie,next_alloc)
- dump_trie_interim_table(trie,next_alloc)
+ dump_trie(trie,widecharmap)
+ dump_trie_interim_list(trie,widecharmap,next_alloc)
+ dump_trie_interim_table(trie,widecharmap,next_alloc)
These routines dump out a trie in a somewhat readable format.
The _interim_ variants are used for debugging the interim
@@ -813,17 +813,16 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con
*/
/*
- dump_trie(trie)
Dumps the final compressed table form of the trie to Perl_debug_log.
Used for debugging make_trie().
*/
STATIC void
-S_dump_trie(pTHX_ const struct _reg_trie_data *trie,U32 depth)
+S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, U32 depth)
{
U32 state;
SV *sv=sv_newmortal();
- int colwidth= trie->widecharmap ? 6 : 4;
+ int colwidth= widecharmap ? 6 : 4;
GET_RE_DEBUG_FLAGS_DECL;
@@ -894,18 +893,18 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie,U32 depth)
}
}
/*
- dump_trie_interim_list(trie,next_alloc)
Dumps a fully constructed but uncompressed trie in list form.
List tries normally only are used for construction when the number of
possible chars (trie->uniquecharcount) is very high.
Used for debugging make_trie().
*/
STATIC void
-S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc,U32 depth)
+S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
+ HV *widecharmap, U32 next_alloc, U32 depth)
{
U32 state;
SV *sv=sv_newmortal();
- int colwidth= trie->widecharmap ? 6 : 4;
+ int colwidth= widecharmap ? 6 : 4;
GET_RE_DEBUG_FLAGS_DECL;
/* print out the table precompression. */
PerlIO_printf( Perl_debug_log, "%*sState :Word | Transition Data\n%*s%s",
@@ -947,19 +946,19 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc
}
/*
- dump_trie_interim_table(trie,next_alloc)
Dumps a fully constructed but uncompressed trie in table form.
This is the normal DFA style state transition table, with a few
twists to facilitate compression later.
Used for debugging make_trie().
*/
STATIC void
-S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc, U32 depth)
+S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
+ HV *widecharmap, U32 next_alloc, U32 depth)
{
U32 state;
U16 charid;
SV *sv=sv_newmortal();
- int colwidth= trie->widecharmap ? 6 : 4;
+ int colwidth= widecharmap ? 6 : 4;
GET_RE_DEBUG_FLAGS_DECL;
/*
@@ -1249,6 +1248,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
dVAR;
/* first pass, loop through and scan words */
reg_trie_data *trie;
+ HV *widecharmap = NULL;
regnode *cur;
const U32 uniflags = UTF8_ALLOW_DEFAULT;
STRLEN len = 0;
@@ -1267,7 +1267,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
)
);
- const U32 data_slot = add_data( pRExC_state, 1, "t" );
+ const U32 data_slot = add_data( pRExC_state, 2, "tu" );
SV *re_trie_maxbuff;
#ifndef DEBUGGING
/* these are only used during construction but are useful during
@@ -1370,10 +1370,10 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
}
} else {
SV** svpp;
- if ( !trie->widecharmap )
- trie->widecharmap = newHV();
+ if ( !widecharmap )
+ widecharmap = newHV();
- svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 1 );
+ svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 1 );
if ( !svpp )
Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%"UVXf, uvc );
@@ -1397,7 +1397,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
DEBUG_TRIE_COMPILE_r(
PerlIO_printf( Perl_debug_log, "%*sTRIE(%s): W:%d C:%d Uq:%d Min:%d Max:%d\n",
(int)depth * 2 + 2,"",
- ( trie->widecharmap ? "UTF8" : "NATIVE" ), (int)word_count,
+ ( widecharmap ? "UTF8" : "NATIVE" ), (int)word_count,
(int)TRIE_CHARCOUNT(trie), trie->uniquecharcount,
(int)trie->minlen, (int)trie->maxlen )
);
@@ -1469,7 +1469,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
if ( uvc < 256 ) {
charid = trie->charmap[ uvc ];
} else {
- SV** const svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 0);
+ SV** const svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 0);
if ( !svpp ) {
charid = 0;
} else {
@@ -1514,7 +1514,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
/* and now dump it out before we compress it */
DEBUG_TRIE_COMPILE_MORE_r(
- dump_trie_interim_list(trie,next_alloc,depth+1)
+ dump_trie_interim_list(trie,widecharmap,next_alloc,depth+1)
);
trie->trans
@@ -1664,7 +1664,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
if ( uvc < 256 ) {
charid = trie->charmap[ uvc ];
} else {
- SV* const * const svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 0);
+ SV* const * const svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 0);
charid = svpp ? (U16)SvIV(*svpp) : 0;
}
if ( charid ) {
@@ -1688,7 +1688,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
/* and now dump it out before we compress it */
DEBUG_TRIE_COMPILE_MORE_r(
- dump_trie_interim_table(trie,next_alloc,depth+1)
+ dump_trie_interim_table(trie,widecharmap,next_alloc,depth+1)
);
{
@@ -1819,7 +1819,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
/* and now dump out the compressed format */
DEBUG_TRIE_COMPILE_r(
- dump_trie(trie,depth+1)
+ dump_trie(trie,widecharmap,depth+1)
);
{ /* Modify the program and insert the new TRIE node*/
@@ -1865,7 +1865,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
/* But first we check to see if there is a common prefix we can
split out as an EXACT and put in front of the TRIE node. */
trie->startstate= 1;
- if ( trie->bitmap && !trie->widecharmap && !trie->jump ) {
+ if ( trie->bitmap && !widecharmap && !trie->jump ) {
U32 state;
for ( state = 1 ; state < trie->statecount-1 ; state++ ) {
U32 ofs = 0;
@@ -2029,6 +2029,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
Set_Node_Offset_Length(convert,mjd_offset,mjd_nodelen);
});
} /* end node insert */
+ RExC_rxi->data->data[ data_slot + 1 ] = (void*)widecharmap;
#ifndef DEBUGGING
SvREFCNT_dec(TRIE_REVCHARMAP(trie));
#endif
@@ -8521,6 +8522,7 @@ Perl_pregfree(pTHX_ struct regexp *r)
switch (ri->data->what[n]) {
case 's':
case 'S':
+ case 'u':
SvREFCNT_dec((SV*)ri->data->data[n]);
break;
case 'f':
@@ -8575,8 +8577,6 @@ Perl_pregfree(pTHX_ struct regexp *r)
OP_REFCNT_UNLOCK;
if ( !refcount ) {
PerlMemShared_free(trie->charmap);
- if (trie->widecharmap)
- SvREFCNT_dec((SV*)trie->widecharmap);
PerlMemShared_free(trie->states);
PerlMemShared_free(trie->trans);
if (trie->bitmap)
@@ -8691,11 +8691,12 @@ Perl_regdupe(pTHX_ const regexp *r, CLONE_PARAMS *param)
for (i = 0; i < count; i++) {
d->what[i] = ri->data->what[i];
switch (d->what[i]) {
- /* legal options are one of: sSfpontT
+ /* legal options are one of: sSfpontTu
see also regcomp.h and pregfree() */
case 's':
case 'S':
case 'p': /* actually an AV, but the dup function is identical. */
+ case 'u': /* actually an HV, but the dup function is identical. */
d->data[i] = sv_dup_inc((SV *)ri->data->data[i], param);
break;
case 'f':
diff --git a/regcomp.h b/regcomp.h
index 4ae26594f0..255509687c 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -429,6 +429,7 @@ END_EXTERN_C
* strings resulting from casefolding the single-character entries
* in the character class
* t - trie struct
+ * u - trie struct's widecharmap (a HV, so can't share, must dup)
* T - aho-trie struct
* S - sv for named capture lookup
* 20010712 mjd@plover.com
@@ -520,7 +521,6 @@ struct _reg_trie_data {
U16 uniquecharcount; /* unique chars in trie (width of trans table) */
U32 lasttrans; /* last valid transition element */
U16 *charmap; /* byte to charid lookup array */
- HV *widecharmap; /* code points > 255 to charid */
reg_trie_state *states; /* state data */
reg_trie_trans *trans; /* array of transition elements */
char *bitmap; /* stclass bitmap */
diff --git a/regexec.c b/regexec.c
index b184db1fb7..4e3785bda6 100644
--- a/regexec.c
+++ b/regexec.c
@@ -976,8 +976,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
-#define REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len, uvc, charid, \
-foldlen, foldbuf, uniflags) STMT_START { \
+#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \
+uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \
switch (trie_type) { \
case trie_utf8_fold: \
if ( foldlen>0 ) { \
@@ -1005,8 +1005,8 @@ foldlen, foldbuf, uniflags) STMT_START { \
} \
else { \
charid = 0; \
- if (trie->widecharmap) { \
- SV** const svpp = hv_fetch(trie->widecharmap, \
+ if (widecharmap) { \
+ SV** const svpp = hv_fetch(widecharmap, \
(char*)&uvc, sizeof(UV), 0); \
if (svpp) \
charid = (U16)SvIV(*svpp); \
@@ -1421,6 +1421,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
= (reg_ac_data*)progi->data->data[ ARG( c ) ];
reg_trie_data *trie
= (reg_trie_data*)progi->data->data[ aho->trie ];
+ HV *widecharmap = (HV*) progi->data->data[ aho->trie + 1 ];
const char *last_start = strend - trie->minlen;
#ifdef DEBUGGING
@@ -1523,8 +1524,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
}
points[pointpos++ % maxlen]= uc;
- REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len,
- uvc, charid, foldlen, foldbuf, uniflags);
+ REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
+ uscan, len, uvc, charid, foldlen,
+ foldbuf, uniflags);
DEBUG_TRIE_EXECUTE_r({
dump_exec_pos( (char *)uc, c, strend, real_start,
s, do_utf8 );
@@ -2800,6 +2802,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
/* what trie are we using right now */
reg_trie_data * const trie
= (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
+ HV * widecharmap = (HV *)rexi->data->data[ ARG( scan ) + 1 ];
U32 state = trie->startstate;
if (trie->bitmap && trie_type != trie_utf8_fold &&
@@ -2895,8 +2898,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
});
if ( base ) {
- REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len,
- uvc, charid, foldlen, foldbuf, uniflags);
+ REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
+ uscan, len, uvc, charid, foldlen,
+ foldbuf, uniflags);
if (charid &&
(base + charid > trie->uniquecharcount )