diff options
-rw-r--r-- | embed.fnc | 9 | ||||
-rw-r--r-- | embed.h | 6 | ||||
-rw-r--r-- | proto.h | 6 | ||||
-rw-r--r-- | regcomp.c | 53 | ||||
-rw-r--r-- | regcomp.h | 2 | ||||
-rw-r--r-- | regexec.c | 20 |
6 files changed, 52 insertions, 44 deletions
@@ -1351,9 +1351,12 @@ Es |const regnode*|dumpuntil|NN const regexp *r|NN const regnode *start \ |NULLOK const regnode *plast \ |NN SV* sv|I32 indent|U32 depth Es |void |put_byte |NN SV* sv|int c -Es |void |dump_trie |NN const struct _reg_trie_data *trie|U32 depth -Es |void |dump_trie_interim_list|NN const struct _reg_trie_data *trie|U32 next_alloc|U32 depth -Es |void |dump_trie_interim_table|NN const struct _reg_trie_data *trie|U32 next_alloc|U32 depth +Es |void |dump_trie |NN const struct _reg_trie_data *trie\ + |NULLOK HV* widecharmap|U32 depth +Es |void |dump_trie_interim_list|NN const struct _reg_trie_data *trie\ + |NULLOK HV* widecharmap|U32 next_alloc|U32 depth +Es |void |dump_trie_interim_table|NN const struct _reg_trie_data *trie\ + |NULLOK HV* widecharmap|U32 next_alloc|U32 depth Es |U8 |regtail_study |NN struct RExC_state_t *state|NN regnode *p|NN const regnode *val|U32 depth # endif #endif @@ -3550,9 +3550,9 @@ #if defined(PERL_CORE) || defined(PERL_EXT) #define dumpuntil(a,b,c,d,e,f,g,h) S_dumpuntil(aTHX_ a,b,c,d,e,f,g,h) #define put_byte(a,b) S_put_byte(aTHX_ a,b) -#define dump_trie(a,b) S_dump_trie(aTHX_ a,b) -#define dump_trie_interim_list(a,b,c) S_dump_trie_interim_list(aTHX_ a,b,c) -#define dump_trie_interim_table(a,b,c) S_dump_trie_interim_table(aTHX_ a,b,c) +#define dump_trie(a,b,c) S_dump_trie(aTHX_ a,b,c) +#define dump_trie_interim_list(a,b,c,d) S_dump_trie_interim_list(aTHX_ a,b,c,d) +#define dump_trie_interim_table(a,b,c,d) S_dump_trie_interim_table(aTHX_ a,b,c,d) #define regtail_study(a,b,c,d) S_regtail_study(aTHX_ a,b,c,d) #endif # endif @@ -3677,13 +3677,13 @@ STATIC const regnode* S_dumpuntil(pTHX_ const regexp *r, const regnode *start, c STATIC void S_put_byte(pTHX_ SV* sv, int c) __attribute__nonnull__(pTHX_1); -STATIC void S_dump_trie(pTHX_ const struct _reg_trie_data *trie, U32 depth) +STATIC void S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, U32 depth) __attribute__nonnull__(pTHX_1); -STATIC void S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc, U32 depth) +STATIC void S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, U32 next_alloc, U32 depth) __attribute__nonnull__(pTHX_1); -STATIC void S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc, U32 depth) +STATIC void S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, HV* widecharmap, U32 next_alloc, U32 depth) __attribute__nonnull__(pTHX_1); STATIC U8 S_regtail_study(pTHX_ struct RExC_state_t *state, regnode *p, const regnode *val, U32 depth) @@ -798,9 +798,9 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con #ifdef DEBUGGING /* - dump_trie(trie) - dump_trie_interim_list(trie,next_alloc) - dump_trie_interim_table(trie,next_alloc) + dump_trie(trie,widecharmap) + dump_trie_interim_list(trie,widecharmap,next_alloc) + dump_trie_interim_table(trie,widecharmap,next_alloc) These routines dump out a trie in a somewhat readable format. The _interim_ variants are used for debugging the interim @@ -813,17 +813,16 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con */ /* - dump_trie(trie) Dumps the final compressed table form of the trie to Perl_debug_log. Used for debugging make_trie(). */ STATIC void -S_dump_trie(pTHX_ const struct _reg_trie_data *trie,U32 depth) +S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, U32 depth) { U32 state; SV *sv=sv_newmortal(); - int colwidth= trie->widecharmap ? 6 : 4; + int colwidth= widecharmap ? 6 : 4; GET_RE_DEBUG_FLAGS_DECL; @@ -894,18 +893,18 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie,U32 depth) } } /* - dump_trie_interim_list(trie,next_alloc) Dumps a fully constructed but uncompressed trie in list form. List tries normally only are used for construction when the number of possible chars (trie->uniquecharcount) is very high. Used for debugging make_trie(). */ STATIC void -S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc,U32 depth) +S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, + HV *widecharmap, U32 next_alloc, U32 depth) { U32 state; SV *sv=sv_newmortal(); - int colwidth= trie->widecharmap ? 6 : 4; + int colwidth= widecharmap ? 6 : 4; GET_RE_DEBUG_FLAGS_DECL; /* print out the table precompression. */ PerlIO_printf( Perl_debug_log, "%*sState :Word | Transition Data\n%*s%s", @@ -947,19 +946,19 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc } /* - dump_trie_interim_table(trie,next_alloc) Dumps a fully constructed but uncompressed trie in table form. This is the normal DFA style state transition table, with a few twists to facilitate compression later. Used for debugging make_trie(). */ STATIC void -S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, U32 next_alloc, U32 depth) +S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, + HV *widecharmap, U32 next_alloc, U32 depth) { U32 state; U16 charid; SV *sv=sv_newmortal(); - int colwidth= trie->widecharmap ? 6 : 4; + int colwidth= widecharmap ? 6 : 4; GET_RE_DEBUG_FLAGS_DECL; /* @@ -1249,6 +1248,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs dVAR; /* first pass, loop through and scan words */ reg_trie_data *trie; + HV *widecharmap = NULL; regnode *cur; const U32 uniflags = UTF8_ALLOW_DEFAULT; STRLEN len = 0; @@ -1267,7 +1267,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs ) ); - const U32 data_slot = add_data( pRExC_state, 1, "t" ); + const U32 data_slot = add_data( pRExC_state, 2, "tu" ); SV *re_trie_maxbuff; #ifndef DEBUGGING /* these are only used during construction but are useful during @@ -1370,10 +1370,10 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs } } else { SV** svpp; - if ( !trie->widecharmap ) - trie->widecharmap = newHV(); + if ( !widecharmap ) + widecharmap = newHV(); - svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 1 ); + svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 1 ); if ( !svpp ) Perl_croak( aTHX_ "error creating/fetching widecharmap entry for 0x%"UVXf, uvc ); @@ -1397,7 +1397,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs DEBUG_TRIE_COMPILE_r( PerlIO_printf( Perl_debug_log, "%*sTRIE(%s): W:%d C:%d Uq:%d Min:%d Max:%d\n", (int)depth * 2 + 2,"", - ( trie->widecharmap ? "UTF8" : "NATIVE" ), (int)word_count, + ( widecharmap ? "UTF8" : "NATIVE" ), (int)word_count, (int)TRIE_CHARCOUNT(trie), trie->uniquecharcount, (int)trie->minlen, (int)trie->maxlen ) ); @@ -1469,7 +1469,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs if ( uvc < 256 ) { charid = trie->charmap[ uvc ]; } else { - SV** const svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 0); + SV** const svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 0); if ( !svpp ) { charid = 0; } else { @@ -1514,7 +1514,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs /* and now dump it out before we compress it */ DEBUG_TRIE_COMPILE_MORE_r( - dump_trie_interim_list(trie,next_alloc,depth+1) + dump_trie_interim_list(trie,widecharmap,next_alloc,depth+1) ); trie->trans @@ -1664,7 +1664,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs if ( uvc < 256 ) { charid = trie->charmap[ uvc ]; } else { - SV* const * const svpp = hv_fetch( trie->widecharmap, (char*)&uvc, sizeof( UV ), 0); + SV* const * const svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), 0); charid = svpp ? (U16)SvIV(*svpp) : 0; } if ( charid ) { @@ -1688,7 +1688,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs /* and now dump it out before we compress it */ DEBUG_TRIE_COMPILE_MORE_r( - dump_trie_interim_table(trie,next_alloc,depth+1) + dump_trie_interim_table(trie,widecharmap,next_alloc,depth+1) ); { @@ -1819,7 +1819,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs /* and now dump out the compressed format */ DEBUG_TRIE_COMPILE_r( - dump_trie(trie,depth+1) + dump_trie(trie,widecharmap,depth+1) ); { /* Modify the program and insert the new TRIE node*/ @@ -1865,7 +1865,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs /* But first we check to see if there is a common prefix we can split out as an EXACT and put in front of the TRIE node. */ trie->startstate= 1; - if ( trie->bitmap && !trie->widecharmap && !trie->jump ) { + if ( trie->bitmap && !widecharmap && !trie->jump ) { U32 state; for ( state = 1 ; state < trie->statecount-1 ; state++ ) { U32 ofs = 0; @@ -2029,6 +2029,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs Set_Node_Offset_Length(convert,mjd_offset,mjd_nodelen); }); } /* end node insert */ + RExC_rxi->data->data[ data_slot + 1 ] = (void*)widecharmap; #ifndef DEBUGGING SvREFCNT_dec(TRIE_REVCHARMAP(trie)); #endif @@ -8521,6 +8522,7 @@ Perl_pregfree(pTHX_ struct regexp *r) switch (ri->data->what[n]) { case 's': case 'S': + case 'u': SvREFCNT_dec((SV*)ri->data->data[n]); break; case 'f': @@ -8575,8 +8577,6 @@ Perl_pregfree(pTHX_ struct regexp *r) OP_REFCNT_UNLOCK; if ( !refcount ) { PerlMemShared_free(trie->charmap); - if (trie->widecharmap) - SvREFCNT_dec((SV*)trie->widecharmap); PerlMemShared_free(trie->states); PerlMemShared_free(trie->trans); if (trie->bitmap) @@ -8691,11 +8691,12 @@ Perl_regdupe(pTHX_ const regexp *r, CLONE_PARAMS *param) for (i = 0; i < count; i++) { d->what[i] = ri->data->what[i]; switch (d->what[i]) { - /* legal options are one of: sSfpontT + /* legal options are one of: sSfpontTu see also regcomp.h and pregfree() */ case 's': case 'S': case 'p': /* actually an AV, but the dup function is identical. */ + case 'u': /* actually an HV, but the dup function is identical. */ d->data[i] = sv_dup_inc((SV *)ri->data->data[i], param); break; case 'f': @@ -429,6 +429,7 @@ END_EXTERN_C * strings resulting from casefolding the single-character entries * in the character class * t - trie struct + * u - trie struct's widecharmap (a HV, so can't share, must dup) * T - aho-trie struct * S - sv for named capture lookup * 20010712 mjd@plover.com @@ -520,7 +521,6 @@ struct _reg_trie_data { U16 uniquecharcount; /* unique chars in trie (width of trans table) */ U32 lasttrans; /* last valid transition element */ U16 *charmap; /* byte to charid lookup array */ - HV *widecharmap; /* code points > 255 to charid */ reg_trie_state *states; /* state data */ reg_trie_trans *trans; /* array of transition elements */ char *bitmap; /* stclass bitmap */ @@ -976,8 +976,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, -#define REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len, uvc, charid, \ -foldlen, foldbuf, uniflags) STMT_START { \ +#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, \ +uvc, charid, foldlen, foldbuf, uniflags) STMT_START { \ switch (trie_type) { \ case trie_utf8_fold: \ if ( foldlen>0 ) { \ @@ -1005,8 +1005,8 @@ foldlen, foldbuf, uniflags) STMT_START { \ } \ else { \ charid = 0; \ - if (trie->widecharmap) { \ - SV** const svpp = hv_fetch(trie->widecharmap, \ + if (widecharmap) { \ + SV** const svpp = hv_fetch(widecharmap, \ (char*)&uvc, sizeof(UV), 0); \ if (svpp) \ charid = (U16)SvIV(*svpp); \ @@ -1421,6 +1421,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, = (reg_ac_data*)progi->data->data[ ARG( c ) ]; reg_trie_data *trie = (reg_trie_data*)progi->data->data[ aho->trie ]; + HV *widecharmap = (HV*) progi->data->data[ aho->trie + 1 ]; const char *last_start = strend - trie->minlen; #ifdef DEBUGGING @@ -1523,8 +1524,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, } points[pointpos++ % maxlen]= uc; - REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len, - uvc, charid, foldlen, foldbuf, uniflags); + REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, + uscan, len, uvc, charid, foldlen, + foldbuf, uniflags); DEBUG_TRIE_EXECUTE_r({ dump_exec_pos( (char *)uc, c, strend, real_start, s, do_utf8 ); @@ -2800,6 +2802,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) /* what trie are we using right now */ reg_trie_data * const trie = (reg_trie_data*)rexi->data->data[ ARG( scan ) ]; + HV * widecharmap = (HV *)rexi->data->data[ ARG( scan ) + 1 ]; U32 state = trie->startstate; if (trie->bitmap && trie_type != trie_utf8_fold && @@ -2895,8 +2898,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) }); if ( base ) { - REXEC_TRIE_READ_CHAR(trie_type, trie, uc, uscan, len, - uvc, charid, foldlen, foldbuf, uniflags); + REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, + uscan, len, uvc, charid, foldlen, + foldbuf, uniflags); if (charid && (base + charid > trie->uniquecharcount ) |