summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2016-10-27 22:35:21 +0200
committerYves Orton <demerphq@gmail.com>2016-10-27 22:35:21 +0200
commit5ee57374e52ac64a2ab7681c2e0fa832e1c3a601 (patch)
tree96ae4ecfce5f12a15d17417f169208e1181c6992 /regcomp.c
parentd3d91c7ce972aa4a787621573eaa9f43187c1447 (diff)
downloadperl-5ee57374e52ac64a2ab7681c2e0fa832e1c3a601.tar.gz
regcomp.c: document the trie common prefix logic
I wrote this code some time ago. It is somewhat of a state machine with some interesting implicit assumptions which took me a while to remember. While I do it seems reasonable to document them so the next guy (maybe/probably me) doesn't have to think so hard.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c15
1 files changed, 15 insertions, 0 deletions
diff --git a/regcomp.c b/regcomp.c
index 9adaeb325e..e9c7972e0c 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3248,6 +3248,10 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
split out as an EXACT and put in front of the TRIE node. */
trie->startstate= 1;
if ( trie->bitmap && !widecharmap && !trie->jump ) {
+ /* we want to find the first state that has more than
+ * one transition, if that state is not the first state
+ * then we have a common prefix which we can remove.
+ */
U32 state;
for ( state = 1 ; state < trie->statecount-1 ; state++ ) {
U32 ofs = 0;
@@ -3256,6 +3260,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
U32 count = 0;
const U32 base = trie->states[ state ].trans.base;
+ /* does this state terminate an alternation? */
if ( trie->states[state].wordnum )
count = 1;
@@ -3265,13 +3270,20 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
trie->trans[ base + ofs - trie->uniquecharcount ].check == state )
{
if ( ++count > 1 ) {
+ /* we have more than one transition */
SV **tmp;
U8 *ch;
+ /* if this is the first state there is no common prefix
+ * to extract, so we can exit */
if ( state == 1 ) break;
tmp = av_fetch( revcharmap, ofs, 0);
ch = (U8*)SvPV_nolen_const( *tmp );
+ /* if we are on count 2 then we need to initialize the
+ * bitmap, and store the previous char if there was one
+ * in it*/
if ( count == 2 ) {
+ /* clear the bitmap */
Zero(trie->bitmap, ANYOF_BITMAP_SIZE, char);
DEBUG_OPTIMISE_r(
Perl_re_indentf( aTHX_ "New Start State=%"UVuf" Class: [",
@@ -3295,6 +3307,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
}
}
if ( count == 1 ) {
+ /* This state has only one transition, its transition is part
+ * of a common prefix - we need to concatenate the char it
+ * represents to what we have so far. */
SV **tmp = av_fetch( revcharmap, first_ofs, 0);
STRLEN len;
char *ch = SvPV( *tmp, len );