diff options
author | Yves Orton <demerphq@gmail.com> | 2012-03-20 02:01:16 +0100 |
---|---|---|
committer | Yves Orton <demerphq@gmail.com> | 2012-03-20 11:21:19 +0100 |
commit | ff66d8782f6f847a8f6240394736a9cd98a7549b (patch) | |
tree | 7bb67db269e64997b2d482c2a2ca695dccd8933a | |
parent | 729aaeb502af62aff1b72f90b3d33e7218b94e0c (diff) | |
download | perl-yves/trie_absorb.tar.gz |
make TRIE nodes "absorb" NOTHING->EXACT sequencesyves/trie_absorbsmoke-me/trie_absorb
-rw-r--r-- | regcomp.c | 90 |
1 files changed, 59 insertions, 31 deletions
@@ -1552,7 +1552,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs if (!SvIOK(re_trie_maxbuff)) { sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT); } - DEBUG_OPTIMISE_r({ + DEBUG_TRIE_COMPILE_r({ PerlIO_printf( Perl_debug_log, "%*smake_trie start==%d, first==%d, last==%d, tail==%d depth=%d\n", (int)depth * 2 + 2, "", @@ -1594,9 +1594,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs */ for ( cur = first ; cur < last ; cur = regnext( cur ) ) { - regnode * const noper = NEXTOPER( cur ); + regnode *noper = NEXTOPER( cur ); const U8 *uc = (U8*)STRING( noper ); - const U8 * const e = uc + STR_LEN( noper ); + const U8 *e = uc + STR_LEN( noper ); STRLEN foldlen = 0; U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ]; STRLEN skiplen = 0; @@ -1606,6 +1606,15 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs bool set_bit = trie->bitmap ? 1 : 0; /*store the first char in the bitmap?*/ if (OP(noper) == NOTHING) { + regnode *noper_next= regnext(noper); + if (OP(noper_next) == flags) { + noper = noper_next; + uc= (U8*)STRING(noper); + e= uc + STR_LEN(noper); + } + } + + if (OP(noper) == NOTHING) { trie->minlen= 0; continue; } @@ -1747,9 +1756,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs for ( cur = first ; cur < last ; cur = regnext( cur ) ) { - regnode * const noper = NEXTOPER( cur ); + regnode *noper = NEXTOPER( cur ); U8 *uc = (U8*)STRING( noper ); - const U8 * const e = uc + STR_LEN( noper ); + const U8 *e = uc + STR_LEN( noper ); U32 state = 1; /* required init */ U16 charid = 0; /* sanity init */ U8 *scan = (U8*)NULL; /* sanity init */ @@ -1758,6 +1767,15 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ]; STRLEN skiplen = 0; + if (OP(noper) == NOTHING) { + regnode *noper_next= regnext(noper); + if (OP(noper_next) == flags) { + noper = noper_next; + uc= (U8*)STRING(noper); + e= uc + STR_LEN(noper); + } + } + if (OP(noper) != NOTHING) { for ( ; uc < e ; uc += len ) { @@ -1945,9 +1963,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs for ( cur = first ; cur < last ; cur = regnext( cur ) ) { - regnode * const noper = NEXTOPER( cur ); + regnode *noper = NEXTOPER( cur ); const U8 *uc = (U8*)STRING( noper ); - const U8 * const e = uc + STR_LEN( noper ); + const U8 *e = uc + STR_LEN( noper ); U32 state = 1; /* required init */ @@ -1960,6 +1978,14 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs STRLEN skiplen = 0; U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ]; + if (OP(noper) == NOTHING) { + regnode *noper_next= regnext(noper); + if (OP(noper_next) == flags) { + noper = noper_next; + uc= (U8*)STRING(noper); + e= uc + STR_LEN(noper); + } + } if ( OP(noper) != NOTHING ) { for ( ; uc < e ; uc += len ) { @@ -3234,7 +3260,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } - DEBUG_OPTIMISE_r({ + DEBUG_TRIE_COMPILE_r({ regprop(RExC_rx, mysv, tail ); PerlIO_printf( Perl_debug_log, "%*s%s%s\n", (int)depth * 2 + 2, "", @@ -3300,9 +3326,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, U8 noper_trietype = TRIE_TYPE( noper_type ); #if defined(DEBUGGING) || defined(NOJUMPTRIE) regnode * const noper_next = regnext( noper ); + U8 noper_next_type = noper_next ? OP(noper_next) : 0; + U8 noper_next_trietype = noper_next ? TRIE_TYPE( noper_next_type ) :0; #endif - DEBUG_OPTIMISE_r({ + DEBUG_TRIE_COMPILE_r({ regprop(RExC_rx, mysv, cur); PerlIO_printf( Perl_debug_log, "%*s- %s (%d)", (int)depth * 2 + 2,"", SvPV_nolen_const( mysv ), REG_NODE_NUM(cur) ); @@ -3316,8 +3344,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, PerlIO_printf( Perl_debug_log,"\t=> %s\t", SvPV_nolen_const(mysv)); } - PerlIO_printf( Perl_debug_log, "(First==%d,Last==%d,Cur==%d)\n", - REG_NODE_NUM(first), REG_NODE_NUM(last), REG_NODE_NUM(cur) ); + PerlIO_printf( Perl_debug_log, "(First==%d,Last==%d,Cur==%d,tt==%d,nt==%d,nnt==%d)\n", + REG_NODE_NUM(first), REG_NODE_NUM(last), REG_NODE_NUM(cur), trietype, noper_trietype, noper_next_trietype ); }); /* Is noper a trieable nodetype that can be merged with the @@ -3325,22 +3353,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if ( noper_trietype && ( - /* XXX: Currently we cannot allow a NOTHING node to be the first element - * of a TRIEABLE sequence, Otherwise we will overwrite the regop following - * the NOTHING with the TRIE regop later on. This is because a NOTHING node - * is only one regnode wide, and a TRIE is two regnodes. An example of a - * problematic pattern is: "x" =~ /\A(?>(?:(?:)A|B|C?x))\z/ - * At a later point of time we can somewhat workaround this by handling - * NOTHING -> EXACT sequences as generated by /(?:)A|(?:)B/ type patterns, - * as we can effectively ignore the NOTHING regop in that case. - * This clause, which allows NOTHING to start a sequence is left commented - * out as a reference. - * - Yves - - ( noper_trietype == NOTHING) - || ( trietype == NOTHING ) - */ - ( noper_trietype == NOTHING && trietype ) + ( noper_trietype == NOTHING) + || ( trietype == NOTHING ) || ( trietype == noper_trietype ) ) #ifdef NOJUMPTRIE @@ -3352,15 +3366,29 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, * Either we are the first node in a new trieable sequence, * in which case we do some bookkeeping, otherwise we update * the end pointer. */ - count++; if ( !first ) { - first = cur; - trietype = noper_trietype; + if ( noper_trietype == NOTHING ) { +#if !defined(DEBUGGING) && !defined(NOJUMPTRIE) + regnode * const noper_next = regnext( noper ); + U8 noper_next_type = noper_next ? OP(noper_next) : 0; + U8 noper_next_trietype = noper_next ? TRIE_TYPE( noper_next_type ) :0; +#endif + + if ( noper_next_trietype ) { + first = cur; + trietype = noper_next_trietype; + } + } else { + first = cur; + trietype = noper_trietype; + } } else { if ( trietype == NOTHING ) trietype = noper_trietype; last = cur; } + if (first) + count++; } /* end handle mergable triable node */ else { /* handle unmergable node - @@ -3396,7 +3424,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } } /* end handle unmergable node */ } /* loop over branches */ - DEBUG_OPTIMISE_r({ + DEBUG_TRIE_COMPILE_r({ regprop(RExC_rx, mysv, cur); PerlIO_printf( Perl_debug_log, "%*s- %s (%d) <SCAN FINISHED>\n", (int)depth * 2 + 2, |