6 files changed, 302 insertions, 222 deletions
diff --git a/ext/re/t/regop.t b/ext/re/t/regop.t
index 46e6ec04f5..c24c32f95c 100644
--- a/ext/re/t/regop.t
+++ b/ext/re/t/regop.t
@@ -41,7 +41,9 @@ foreach my $testout ( @tests ) {
         s/\s+$//;
         ok( $testout=~/\Q$_\E/, "$_: /$pattern/" )
             or do {
-                !$diaged++ and diag("$_: /$pattern/\n$testout");
+                !$diaged++ and diag("PATTERN: /$pattern/\n\n"
+		    . "EXPECTED:\n$_\n\n"
+		    . "WITHIN GOT:\n$testout");
             };
     }
 }
@@ -152,16 +154,17 @@ minlen 3
 #       #   8| W   4 @   0 
 #       #   9| W   5 @   0 
 #       #   A| W   6 @   0 
+#     word_info N:(prev,char)= 1:(0,1) 2:(0,1) 3:(0,1) 4:(0,1) 5:(0,1) 6:(0,1)
 # Final program:
-#    1: EXACT <ABC>(3)
-#    3: TRIEC-EXACT<S:4/10 W:6 L:1/1 C:24/7>[A-EGP](20)
+#    1: EXACT <ABC> (3)
+#    3: TRIEC-EXACT<S:4/10 W:6 L:1/1 C:24/7>[A-EGP] (20)
 #       <P> 
 #       <G> 
 #       <E> 
 #       <B> 
 #       <A> 
 #       <D> 
-#   20: END(0)
+#   20: END (0)
 # anchored "ABC" at 0 (checking anchored) minlen 4 
 # Offsets: [20]
 # 	1:4[3] 3:4[15] 19:32[0] 20:34[0] 
@@ -172,10 +175,10 @@ minlen 3
 #    0 <> <ABCD>               |  1:EXACT <ABC>(3)
 #    3 <ABC> <D>               |  3:TRIEC-EXACT<S:4/10 W:6 L:1/1 C:24/7>[A-EGP](20)
 #    3 <ABC> <D>               |    State:    4 Accepted:    0 Charid:  7 CP:  44 After State:    a
-#    4 <ABCD> <>               |    State:    a Accepted:    1 Charid:  6 CP:   0 After State:    0
+#    4 <ABCD> <>               |    State:    a Accepted:    1 Charid:  7 CP:   0 After State:    0
 #                                   got 1 possible matches
-#                                   only one match left: #6 <D>
-#    4 <ABCD> <>               | 20:END(0)
+#                                   TRIE matched word #6, continuing
+#    4 <ABCD> <>               | 20:  END(0)
 # Match successful!
 # %MATCHED%
 # Freeing REx: "(?:ABCP|ABCG|ABCE|ABCB|ABCA|ABCD)"
@@ -183,7 +186,6 @@ minlen 3
 EXACT <ABC>
 TRIEC-EXACT
 [A-EGP]
-only one match left: #6 <D>
 S:4/10
 W:6
 L:1/1
diff --git a/regcomp.c b/regcomp.c
index 1a815c67fd..f665f0b5d5 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -878,6 +878,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
     U32 state;
     SV *sv=sv_newmortal();
     int colwidth= widecharmap ? 6 : 4;
+    U16 word;
     GET_RE_DEBUG_FLAGS_DECL;
 
     PERL_ARGS_ASSERT_DUMP_TRIE;
@@ -947,6 +948,13 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
         }
         PerlIO_printf( Perl_debug_log, "\n" );
     }
+    PerlIO_printf(Perl_debug_log, "%*sword_info N:(prev,len)=", (int)depth*2, "");
+    for (word=1; word <= trie->wordcount; word++) {
+	PerlIO_printf(Perl_debug_log, " %d:(%d,%d)",
+	    (int)word, (int)(trie->wordinfo[word].prev),
+	    (int)(trie->wordinfo[word].len));
+    }
+    PerlIO_printf(Perl_debug_log, "\n" );
 }    
 /*
   Dumps a fully constructed but uncompressed trie in list form.
@@ -1077,6 +1085,7 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
 
 #endif
 
+
 /* make_trie(startbranch,first,last,tail,word_count,flags,depth)
   startbranch: the first branch in the whole branch sequence
   first      : start branch of sequence of branch-exact nodes.
@@ -1257,8 +1266,6 @@ is the recommended Unicode-aware way of saying
     U16 dupe= trie->states[ state ].wordnum;                    \
     regnode * const noper_next = regnext( noper );              \
                                                                 \
-    if (trie->wordlen)                                          \
-        trie->wordlen[ curword ] = wordlen;                     \
     DEBUG_r({                                                   \
         /* store the word for dumping */                        \
         SV* tmp;                                                \
@@ -1270,6 +1277,9 @@ is the recommended Unicode-aware way of saying
     });                                                         \
                                                                 \
     curword++;                                                  \
+    trie->wordinfo[curword].prev   = 0;                         \
+    trie->wordinfo[curword].len    = wordlen;                   \
+    trie->wordinfo[curword].accept = state;                     \
                                                                 \
     if ( noper_next < tail ) {                                  \
         if (!trie->jump)                                        \
@@ -1282,16 +1292,11 @@ is the recommended Unicode-aware way of saying
     }                                                           \
                                                                 \
     if ( dupe ) {                                               \
-        /* So it's a dupe. This means we need to maintain a   */\
-        /* linked-list from the first to the next.            */\
-        /* we only allocate the nextword buffer when there    */\
-        /* a dupe, so first time we have to do the allocation */\
-        if (!trie->nextword)                                    \
-            trie->nextword = (U16 *)					\
-		PerlMemShared_calloc( word_count + 1, sizeof(U16));	\
-        while ( trie->nextword[dupe] )                          \
-            dupe= trie->nextword[dupe];                         \
-        trie->nextword[dupe]= curword;                          \
+        /* It's a dupe. Pre-insert into the wordinfo[].prev   */\
+        /* chain, so that when the bits of chain are later    */\
+        /* linked together, the dups appear in the chain      */\
+	trie->wordinfo[curword].prev = trie->wordinfo[dupe].prev; \
+	trie->wordinfo[dupe].prev = curword;                    \
     } else {                                                    \
         /* we haven't inserted this word yet.                */ \
         trie->states[ state ].wordnum = curword;                \
@@ -1329,6 +1334,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
     regnode *jumper = NULL;
     regnode *nextbranch = NULL;
     regnode *convert = NULL;
+    U32 *prev_states; /* temp array mapping each state to previous one */
     /* we just use folder as a flag in utf8 */
     const U8 * const folder = ( flags == EXACTF
                        ? PL_fold
@@ -1364,6 +1370,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
     trie->charmap = (U16 *) PerlMemShared_calloc( 256, sizeof(U16) );
     if (!(UTF && folder))
 	trie->bitmap = (char *) PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 );
+    trie->wordinfo = (reg_trie_wordinfo *) PerlMemShared_calloc(
+                       trie->wordcount+1, sizeof(reg_trie_wordinfo));
+
     DEBUG_r({
         trie_words = newAV();
     });
@@ -1496,7 +1505,6 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
 		(int)TRIE_CHARCOUNT(trie), trie->uniquecharcount,
 		(int)trie->minlen, (int)trie->maxlen )
     );
-    trie->wordlen = (U32 *) PerlMemShared_calloc( word_count, sizeof(U32) );
 
     /*
         We now know what we are dealing with in terms of unique chars and
@@ -1520,6 +1528,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
     */
 
 
+    Newx(prev_states, TRIE_CHARCOUNT(trie) + 2, U32);
+    prev_states[1] = 0;
+
     if ( (IV)( ( TRIE_CHARCOUNT(trie) + 1 ) * trie->uniquecharcount + 1) > SvIV(re_trie_maxbuff) ) {
         /*
             Second Pass -- Array Of Lists Representation
@@ -1590,6 +1601,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                         }
                         if ( ! newstate ) {
                             newstate = next_alloc++;
+			    prev_states[newstate] = state;
                             TRIE_LIST_PUSH( state, charid, newstate );
                             transcount++;
                         }
@@ -1773,6 +1785,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
                         if ( !trie->trans[ state + charid ].next ) {
                             trie->trans[ state + charid ].next = next_alloc;
                             trie->trans[ state ].check++;
+			    prev_states[TRIE_NODENUM(next_alloc)]
+				    = TRIE_NODENUM(state);
                             next_alloc += trie->uniquecharcount;
                         }
                         state = trie->trans[ state + charid ].next;
@@ -1920,9 +1934,6 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
 	PerlMemShared_realloc( trie->trans, trie->lasttrans
 			       * sizeof(reg_trie_trans) );
 
-    /* and now dump out the compressed format */
-    DEBUG_TRIE_COMPILE_r(dump_trie(trie, widecharmap, revcharmap, depth+1));
-
     {   /* Modify the program and insert the new TRIE node*/ 
         U8 nodetype =(U8)(flags & 0xFF);
         char *str=NULL;
@@ -2052,6 +2063,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
 		    break;
 		}
 	    }
+	    trie->prefixlen = (state-1);
             if (str) {
                 regnode *n = convert+NODE_SZ_STR(convert);
                 NEXT_OFF(convert) = NODE_SZ_STR(convert);
@@ -2147,6 +2159,42 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
             Set_Node_Offset_Length(convert,mjd_offset,mjd_nodelen);
         });
     } /* end node insert */
+
+    /*  Finish populating the prev field of the wordinfo array.  Walk back
+     *  from each accept state until we find another accept state, and if
+     *  so, point the first word's .prev field at the second word. If the
+     *  second already has a .prev field set, stop now. This will be the
+     *  case either if we've already processed that word's accept state,
+     *  or that that state had multiple words, and the overspill words
+     *  were already linked up earlier.
+     */
+    {
+	U16 word;
+	U32 state;
+	U16 prev;
+
+	for (word=1; word <= trie->wordcount; word++) {
+	    prev = 0;
+	    if (trie->wordinfo[word].prev)
+		continue;
+	    state = trie->wordinfo[word].accept;
+	    while (state) {
+		state = prev_states[state];
+		if (!state)
+		    break;
+		prev = trie->states[state].wordnum;
+		if (prev)
+		    break;
+	    }
+	    trie->wordinfo[word].prev = prev;
+	}
+	Safefree(prev_states);
+    }
+
+
+    /* and now dump out the compressed format */
+    DEBUG_TRIE_COMPILE_r(dump_trie(trie, widecharmap, revcharmap, depth+1));
+
     RExC_rxi->data->data[ data_slot + 1 ] = (void*)widecharmap;
 #ifdef DEBUGGING
     RExC_rxi->data->data[ data_slot + TRIE_WORDS_OFFSET ] = (void*)trie_words;
@@ -9571,12 +9619,9 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
                         PerlMemShared_free(trie->trans);
                         if (trie->bitmap)
                             PerlMemShared_free(trie->bitmap);
-                        if (trie->wordlen)
-                            PerlMemShared_free(trie->wordlen);
                         if (trie->jump)
                             PerlMemShared_free(trie->jump);
-                        if (trie->nextword)
-                            PerlMemShared_free(trie->nextword);
+			PerlMemShared_free(trie->wordinfo);
                         /* do this last!!!! */
                         PerlMemShared_free(ri->data->data[n]);
 		    }
diff --git a/regcomp.h b/regcomp.h
index 20b4401ed2..a20d6e11bd 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -586,6 +586,15 @@ struct _reg_trie_state {
   } trans;
 };
 
+/* info per word; indexed by wordnum */
+typedef struct {
+    U16  prev;	/* previous word in acceptance chain; eg in
+		 * zzz|abc|ab/ after matching the chars abc, the
+		 * accepted word is #2, and the previous accepted
+		 * word is #3 */
+    U32 len;	/* how many chars long is this word? */
+    U32 accept;	/* accept state for this word */
+} reg_trie_wordinfo;
 
 
 typedef struct _reg_trie_state    reg_trie_state;
@@ -603,15 +612,14 @@ struct _reg_trie_data {
     reg_trie_state  *states;         /* state data */
     reg_trie_trans  *trans;          /* array of transition elements */
     char            *bitmap;         /* stclass bitmap */
-    U32             *wordlen;        /* array of lengths of words */
     U16 	    *jump;           /* optional 1 indexed array of offsets before tail 
                                         for the node following a given word. */
-    U16	            *nextword;       /* optional 1 indexed array to support linked list
-                                        of duplicate wordnums */
+    reg_trie_wordinfo *wordinfo;     /* array of info per word */
     U16             uniquecharcount; /* unique chars in trie (width of trans table) */
     U32             startstate;      /* initial state - used for common prefix optimisation */
     STRLEN          minlen;          /* minimum length of words in trie - build/opt only? */
     STRLEN          maxlen;          /* maximum length of words in trie - build/opt only? */
+    U32             prefixlen;       /* #chars in common prefix */
     U32             statecount;      /* Build only - number of states in the states array 
                                         (including the unused zero state) */
     U32             wordcount;       /* Build only */
diff --git a/regexec.c b/regexec.c
index 7222efe8c7..4aa68efde0 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1736,7 +1736,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                         }
                                             
                         if ( word ) {
-                            U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
+                            U8 *lpos= points[ (pointpos - trie->wordinfo[word].len) % maxlen ];
                             if (!leftmost || lpos < leftmost) {
                                 DEBUG_r(accepted_word=word);
                                 leftmost= lpos;
@@ -1810,7 +1810,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                         }
                     }
                     if ( aho->states[ state ].wordnum ) {
-                        U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
+                        U8 *lpos = points[ (pointpos - trie->wordinfo[aho->states[ state ].wordnum].len) % maxlen ];
                         if (!leftmost || lpos < leftmost) {
                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
                             leftmost = lpos;
@@ -2505,9 +2505,6 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
 #define REPORT_CODE_OFF 32
 
 
-/* Make sure there is a test for this +1 options in re_tests */
-#define TRIE_INITAL_ACCEPT_BUFFLEN 4;
-
 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
 
@@ -3069,6 +3066,50 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
             }
             /* FALL THROUGH */
 	case TRIE:
+	    /* the basic plan of execution of the trie is:
+	     * At the beginning, run though all the states, and
+	     * find the longest-matching word. Also remember the position
+	     * of the shortest matching word. For example, this pattern:
+	     *    1  2 3 4    5
+	     *    ab|a|x|abcd|abc
+	     * when matched against the string "abcde", will generate
+	     * accept states for all words except 3, with the longest
+	     * matching word being 4, and the shortest being 1 (with
+	     * the position being after char 1 of the string).
+	     *
+	     * Then for each matching word, in word order (i.e. 1,2,4,5),
+	     * we run the remainder of the pattern; on each try setting
+	     * the current position to the character following the word,
+	     * returning to try the next word on failure.
+	     *
+	     * We avoid having to build a list of words at runtime by
+	     * using a compile-time structure, wordinfo[].prev, which
+	     * gives, for each word, the previous accepting word (if any).
+	     * In the case above it would contain the mappings 1->2, 2->0,
+	     * 3->0, 4->5, 5->1.  We can use this table to generate, from
+	     * the longest word (4 above), a list of all words, by
+	     * following the list of prev pointers; this gives us the
+	     * unordered list 4,5,1,2. Then given the current word we have
+	     * just tried, we can go through the list and find the
+	     * next-biggest word to try (so if we just failed on word 2,
+	     * the next in the list is 4).
+	     *
+	     * Since at runtime we don't record the matching position in
+	     * the string for each word, we have to work that out for
+	     * each word we're about to process. The wordinfo table holds
+	     * the character length of each word; given that we recorded
+	     * at the start: the position of the shortest word and its
+	     * length in chars, we just need to move the pointer the
+	     * difference between the two char lengths. Depending on
+	     * Unicode status and folding, that's cheap or expensive.
+	     *
+	     * This algorithm is optimised for the case where are only a
+	     * small number of accept states, i.e. 0,1, or maybe 2.
+	     * With lots of accepts states, and having to try all of them,
+	     * it becomes quadratic on number of accept states to find all
+	     * the next words.
+	     */
+
 	    {
                 /* what type of TRIE am I? (utf8 makes this contextual) */
                 DECL_TRIE_TYPE(scan);
@@ -3105,76 +3146,62 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 		STRLEN len = 0;
 		STRLEN foldlen = 0;
 		U8 *uscan = (U8*)NULL;
-		STRLEN bufflen=0;
-		SV *sv_accept_buff = NULL;
 		U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
+		U32 charcount = 0; /* how many input chars we have matched */
+		U32 accepted = 0; /* have we seen any accepting states? */
 
-	    	ST.accepted = 0; /* how many accepting states we have seen */
 		ST.B = next;
 		ST.jump = trie->jump;
 		ST.me = scan;
-	        /*
-        	   traverse the TRIE keeping track of all accepting states
-        	   we transition through until we get to a failing node.
-        	*/
+		ST.firstpos = NULL;
+		ST.longfold = FALSE; /* char longer if folded => it's harder */
+		ST.nextword = 0;
+
+		/* fully traverse the TRIE; note the position of the
+		   shortest accept state and the wordnum of the longest
+		   accept state */
 
 		while ( state && uc <= (U8*)PL_regeol ) {
                     U32 base = trie->states[ state ].trans.base;
                     UV uvc = 0;
                     U16 charid;
-                    /* We use charid to hold the wordnum as we don't use it
-                       for charid until after we have done the wordnum logic. 
-                       We define an alias just so that the wordnum logic reads
-                       more naturally. */
-
-#define got_wordnum charid
-                    got_wordnum = trie->states[ state ].wordnum;
-
-		    if ( got_wordnum ) {
-			if ( ! ST.accepted ) {
-			    ENTER;
-			    SAVETMPS; /* XXX is this necessary? dmq */
-			    bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
-			    sv_accept_buff=newSV(bufflen *
-					    sizeof(reg_trie_accepted) - 1);
-			    SvCUR_set(sv_accept_buff, 0);
-			    SvPOK_on(sv_accept_buff);
-			    sv_2mortal(sv_accept_buff);
-			    SAVETMPS;
-			    ST.accept_buff =
-				(reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
-			}
-			do {
-			    if (ST.accepted >= bufflen) {
-				bufflen *= 2;
-				ST.accept_buff =(reg_trie_accepted*)
-				    SvGROW(sv_accept_buff,
-				       	bufflen * sizeof(reg_trie_accepted));
+		    U16 wordnum;
+                    wordnum = trie->states[ state ].wordnum;
+
+		    if (wordnum) { /* it's an accept state */
+			if (!accepted) {
+			    accepted = 1;
+			    /* record first match position */
+			    if (ST.longfold) {
+				ST.firstpos = (U8*)locinput;
+				ST.firstchars = 0;
 			    }
-			    SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
-				+ sizeof(reg_trie_accepted));
-
-
-			    ST.accept_buff[ST.accepted].wordnum = got_wordnum;
-			    ST.accept_buff[ST.accepted].endpos = uc;
-			    ++ST.accepted;
-		        } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
+			    else {
+				ST.firstpos = uc;
+				ST.firstchars = charcount;
+			    }
+			}
+			if (!ST.nextword || wordnum < ST.nextword)
+			    ST.nextword = wordnum;
+			ST.topword = wordnum;
 		    }
-#undef got_wordnum 
 
 		    DEBUG_TRIE_EXECUTE_r({
 		                DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
 			        PerlIO_printf( Perl_debug_log,
-			            "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
+			            "%*s  %sState: %4"UVxf" Accepted: %c ",
 			            2+depth * 2, "", PL_colors[4],
-			            (UV)state, (UV)ST.accepted );
+			            (UV)state, (accepted ? 'Y' : 'N'));
 		    });
 
+		    /* read a char and goto next state */
 		    if ( base ) {
 			REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
 					     uscan, len, uvc, charid, foldlen,
 					     foldbuf, uniflags);
-
+			charcount++;
+			if (foldlen>0)
+			    ST.longfold = TRUE;
 			if (charid &&
 			     (base + charid > trie->uniquecharcount )
 			     && (base + charid - 1 - trie->uniquecharcount
@@ -3200,77 +3227,38 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 		            charid, uvc, (UV)state, PL_colors[5] );
 		    );
 		}
-		if (!ST.accepted )
+		if (!accepted)
 		   sayNO;
 
+		/* calculate total number of accept states */
+		{
+		    U16 w = ST.topword;
+		    accepted = 0;
+		    while (w) {
+			w = trie->wordinfo[w].prev;
+			accepted++;
+		    }
+		    ST.accepted = accepted;
+		}
+
 		DEBUG_EXECUTE_r(
 		    PerlIO_printf( Perl_debug_log,
 			"%*s  %sgot %"IVdf" possible matches%s\n",
 			REPORT_CODE_OFF + depth * 2, "",
 			PL_colors[4], (IV)ST.accepted, PL_colors[5] );
 		);
+		goto trie_first_try; /* jump into the fail handler */
 	    }}
-            goto trie_first_try; /* jump into the fail handler */
 	    /* NOTREACHED */
-	case TRIE_next_fail: /* we failed - try next alterative */
+
+	case TRIE_next_fail: /* we failed - try next alternative */
             if ( ST.jump) {
                 REGCP_UNWIND(ST.cp);
 	        for (n = *PL_reglastparen; n > ST.lastparen; n--)
 		    PL_regoffs[n].end = -1;
 	        *PL_reglastparen = n;
 	    }
-          trie_first_try:
-            if (do_cutgroup) {
-                do_cutgroup = 0;
-                no_final = 0;
-            }
-
-            if ( ST.jump) {
-                ST.lastparen = *PL_reglastparen;
-	        REGCP_SET(ST.cp);
-            }	        
-	    if ( ST.accepted == 1 ) {
-		/* only one choice left - just continue */
-		DEBUG_EXECUTE_r({
-		    AV *const trie_words
-			= MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
-		    SV ** const tmp = av_fetch( trie_words, 
-		        ST.accept_buff[ 0 ].wordnum-1, 0 );
-		    SV *sv= tmp ? sv_newmortal() : NULL;
-		    
-		    PerlIO_printf( Perl_debug_log,
-			"%*s  %sonly one match left: #%d <%s>%s\n",
-			REPORT_CODE_OFF+depth*2, "", PL_colors[4],
-			ST.accept_buff[ 0 ].wordnum,
-			tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0, 
-	                        PL_colors[0], PL_colors[1],
-	                        (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
-                            ) 
-			: "not compiled under -Dr",
-			PL_colors[5] );
-		});
-		PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
-		/* in this case we free tmps/leave before we call regmatch
-		   as we wont be using accept_buff again. */
-		
-		locinput = PL_reginput;
-		nextchr = UCHARAT(locinput);
-    		if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum]) 
-    		    scan = ST.B;
-    		else
-    		    scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
-		if (!has_cutgroup) {
-		    FREETMPS;
-		    LEAVE;
-                } else {
-                    ST.accepted--;
-                    PUSH_YES_STATE_GOTO(TRIE_next, scan);
-                }
-		
-		continue; /* execute rest of RE */
-	    }
-	    
-	    if ( !ST.accepted-- ) {
+	    if (!--ST.accepted) {
 	        DEBUG_EXECUTE_r({
 		    PerlIO_printf( Perl_debug_log,
 			"%*s  %sTRIE failed...%s\n",
@@ -3278,86 +3266,129 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
 			PL_colors[4],
 			PL_colors[5] );
 		});
-		FREETMPS;
-		LEAVE;
 		sayNO_SILENT;
-		/*NOTREACHED*/
-	    } 
+	    }
+	    {
+		/* Find next-highest word to process.  Note that this code
+		 * is O(N^2) per trie run (O(N) per branch), so keep tight */
+		register U32 min = 0;
+		register U32 word;
+		register U16 const nextword = ST.nextword;
+		register reg_trie_wordinfo * const wordinfo
+		    = ((reg_trie_data*)rexi->data->data[ARG(ST.me)])->wordinfo;
+		for (word=ST.topword; word; word=wordinfo[word].prev) {
+		    if (word > nextword && (!min || word < min))
+			min = word;
+		}
+		ST.nextword = min;
+	    }
 
-	    /*
-	       There are at least two accepting states left.  Presumably
-	       the number of accepting states is going to be low,
-	       typically two. So we simply scan through to find the one
-	       with lowest wordnum.  Once we find it, we swap the last
-	       state into its place and decrement the size. We then try to
-	       match the rest of the pattern at the point where the word
-	       ends. If we succeed, control just continues along the
-	       regex; if we fail we return here to try the next accepting
-	       state
-	     */
+          trie_first_try:
+            if (do_cutgroup) {
+                do_cutgroup = 0;
+                no_final = 0;
+            }
 
-	    {
-		U32 best = 0;
-		U32 cur;
-		for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
-		    DEBUG_TRIE_EXECUTE_r(
-			PerlIO_printf( Perl_debug_log,
-			    "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
-			    REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
-			    (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
-			    ST.accept_buff[ cur ].wordnum, PL_colors[5] );
-		    );
+            if ( ST.jump) {
+                ST.lastparen = *PL_reglastparen;
+	        REGCP_SET(ST.cp);
+            }
 
-		    if (ST.accept_buff[cur].wordnum <
-			    ST.accept_buff[best].wordnum)
-			best = cur;
+	    /* find start char of end of current word */
+	    {
+		U32 chars; /* how many chars to skip */
+		U8 *uc = ST.firstpos;
+		reg_trie_data * const trie
+		    = (reg_trie_data*)rexi->data->data[ARG(ST.me)];
+
+		assert((trie->wordinfo[ST.nextword].len - trie->prefixlen)
+			    >=  ST.firstchars);
+		chars = (trie->wordinfo[ST.nextword].len - trie->prefixlen)
+			    - ST.firstchars;
+
+		if (ST.longfold) {
+		    /* the hard option - fold each char in turn and find
+		     * its folded length (which may be different */
+		    U8 foldbuf[UTF8_MAXBYTES_CASE + 1];
+		    STRLEN foldlen;
+		    STRLEN len;
+		    U8 uvc;
+		    U8 *uscan;
+
+		    while (chars) {
+			if (do_utf8) {
+			    uvc = utf8n_to_uvuni((U8*)uc, UTF8_MAXLEN, &len,
+						    uniflags);
+			    uc += len;
+			}
+			else {
+			    uvc = *uc;
+			    uc++;
+			}
+			uvc = to_uni_fold(uvc, foldbuf, &foldlen);
+			uscan = foldbuf;
+			while (foldlen) {
+			    if (!--chars)
+				break;
+			    uvc = utf8n_to_uvuni(uscan, UTF8_MAXLEN, &len,
+					    uniflags);
+			    uscan += len;
+			    foldlen -= len;
+			}
+		    }
 		}
+		else {
+		    if (do_utf8) 
+			while (chars--)
+			    uc += UTF8SKIP(uc);
+		    else
+			uc += chars;
+		}
+		PL_reginput = (char *)uc;
+	    }
 
-		DEBUG_EXECUTE_r({
-		    AV *const trie_words
-			= MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
-		    SV ** const tmp = av_fetch( trie_words, 
-		        ST.accept_buff[ best ].wordnum - 1, 0 );
-		    regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ? 
-		                    ST.B : 
-		                    ST.me + ST.jump[ST.accept_buff[best].wordnum];    
-		    SV *sv= tmp ? sv_newmortal() : NULL;
-		    
-		    PerlIO_printf( Perl_debug_log, 
-		        "%*s  %strying alternation #%d <%s> at node #%d %s\n",
-			REPORT_CODE_OFF+depth*2, "", PL_colors[4],
-			ST.accept_buff[best].wordnum,
-			tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0, 
-	                        PL_colors[0], PL_colors[1],
-	                        (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
-                            ) : "not compiled under -Dr", 
-			    REG_NODE_NUM(nextop),
-			PL_colors[5] );
-		});
+	    scan = (ST.jump && ST.jump[ST.nextword]) 
+			? ST.me + ST.jump[ST.nextword]
+			: ST.B;
 
-		if ( best<ST.accepted ) {
-		    reg_trie_accepted tmp = ST.accept_buff[ best ];
-		    ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
-		    ST.accept_buff[ ST.accepted ] = tmp;
-		    best = ST.accepted;
-		}
-		PL_reginput = (char *)ST.accept_buff[ best ].endpos;
-		if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
-		    scan = ST.B;
-		} else {
-		    scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
-                }
-                PUSH_YES_STATE_GOTO(TRIE_next, scan);    
-                /* NOTREACHED */
+	    DEBUG_EXECUTE_r({
+		PerlIO_printf( Perl_debug_log,
+		    "%*s  %sTRIE matched word #%d, continuing%s\n",
+		    REPORT_CODE_OFF+depth*2, "", 
+		    PL_colors[4],
+		    ST.nextword,
+		    PL_colors[5]
+		    );
+	    });
+
+	    if (ST.accepted > 1 || has_cutgroup) {
+		PUSH_STATE_GOTO(TRIE_next, scan);
+		/* NOTREACHED */
 	    }
+	    /* only one choice left - just continue */
+	    DEBUG_EXECUTE_r({
+		AV *const trie_words
+		    = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
+		SV ** const tmp = av_fetch( trie_words,
+		    ST.nextword-1, 0 );
+		SV *sv= tmp ? sv_newmortal() : NULL;
+
+		PerlIO_printf( Perl_debug_log,
+		    "%*s  %sonly one match left, short-circuiting: #%d <%s>%s\n",
+		    REPORT_CODE_OFF+depth*2, "", PL_colors[4],
+		    ST.nextword,
+		    tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
+			    PL_colors[0], PL_colors[1],
+			    (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
+			) 
+		    : "not compiled under -Dr",
+		    PL_colors[5] );
+	    });
+
+	    locinput = PL_reginput;
+	    nextchr = UCHARAT(locinput);
+	    continue; /* execute rest of RE */
 	    /* NOTREACHED */
-        case TRIE_next:
-	    /* we dont want to throw this away, see bug 57042*/
-	    if (oreplsv != GvSV(PL_replgv))
-		sv_setsv(oreplsv, GvSV(PL_replgv));
-            FREETMPS;
-	    LEAVE;
-	    sayYES;
 #undef  ST
 
 	case EXACT: {
diff --git a/regexp.h b/regexp.h
index 90e3406a96..a9dd2e1276 100644
--- a/regexp.h
+++ b/regexp.h
@@ -490,13 +490,6 @@ and check for NULL.
 
 #define FBMrf_MULTILINE	1
 
-/* an accepting state/position*/
-struct _reg_trie_accepted {
-    U8   *endpos;
-    U16  wordnum;
-};
-typedef struct _reg_trie_accepted reg_trie_accepted;
-
 /* some basic information about the current match that is created by
  * Perl_regexec_flags and then passed to regtry(), regmatch() etc */
 
@@ -557,11 +550,15 @@ typedef struct regmatch_state {
 	    U32 lastparen;
 	    CHECKPOINT cp;
 
-	    reg_trie_accepted *accept_buff; /* accepting states we have seen */
-	    U32		accepted; /* how many accepting states we have seen */
+	    U32		accepted; /* how many accepting states left */
 	    U16         *jump;  /* positive offsets from me */
 	    regnode	*B;	/* node following the trie */
 	    regnode	*me;	/* Which node am I - needed for jump tries*/
+	    U8		*firstpos;/* pos in string of first trie match */
+	    U32		firstchars;/* len in chars of firstpos from start */
+	    U16		nextword;/* next word to try */
+	    U16		topword; /* longest accepted word */
+	    bool	longfold;/* saw a fold with a 1->n char mapping */
 	} trie;
 
         /* special types - these members are used to store state for special
diff --git a/t/op/svleak.t b/t/op/svleak.t
index 7b1f8f08c7..07c2efcb71 100644
--- a/t/op/svleak.t
+++ b/t/op/svleak.t
@@ -70,7 +70,4 @@ sub STORE	{ $_[0]->[$_[1]] = $_[2] }
 
 # [perl #74484]  repeated tries leaked SVs on the tmps stack
 
-{
-    local $TODO = 'not fixed yet';
-    leak_expr(5, 0, q{"YYYYYa" =~ /.+?(a(.+?)|b)/ }, "trie leak");
-}
+leak_expr(5, 0, q{"YYYYYa" =~ /.+?(a(.+?)|b)/ }, "trie leak");