diff options
author | Yves Orton <demerphq@gmail.com> | 2006-09-02 18:40:12 +0200 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2006-09-05 10:21:57 +0000 |
commit | 786e8c118e1218e4c348fecf469934e080881633 (patch) | |
tree | 0c59c96c6848740abfe47c2fb0fd29a10035b4a5 /regcomp.h | |
parent | 7145db399bea60e9f2e625350c9081d1b1f3b08e (diff) | |
download | perl-786e8c118e1218e4c348fecf469934e080881633.tar.gz |
Re: [PATCH] Trie jumping
Message-ID: <9b18b3110609020740y2eb9004cpab313c3353a437ca@mail.gmail.com>
p4raw-id: //depot/perl@28785
Diffstat (limited to 'regcomp.h')
-rw-r--r-- | regcomp.h | 54 |
1 files changed, 30 insertions, 24 deletions
@@ -103,6 +103,7 @@ struct regnode_2 { #define ANYOF_BITMAP_SIZE 32 /* 256 b/(8 b/B) */ #define ANYOF_CLASSBITMAP_SIZE 4 /* up to 32 (8*4) named classes */ +/* also used by trie */ struct regnode_charclass { U8 flags; U8 type; @@ -152,6 +153,7 @@ struct regnode_charclass_class { /* has [[:blah:]] classes */ #define ARG(p) ARG_VALUE(ARG_LOC(p)) #define ARG1(p) ARG_VALUE(ARG1_LOC(p)) #define ARG2(p) ARG_VALUE(ARG2_LOC(p)) + #define ARG_SET(p, val) ARG__SET(ARG_LOC(p), (val)) #define ARG1_SET(p, val) ARG__SET(ARG1_LOC(p), (val)) #define ARG2_SET(p, val) ARG__SET(ARG2_LOC(p), (val)) @@ -187,6 +189,8 @@ struct regnode_charclass_class { /* has [[:blah:]] classes */ #define ARG_LOC(p) (((struct regnode_1 *)p)->arg1) #define ARG1_LOC(p) (((struct regnode_2 *)p)->arg1) #define ARG2_LOC(p) (((struct regnode_2 *)p)->arg2) + + #define NODE_STEP_REGNODE 1 /* sizeof(regnode)/sizeof(regnode) */ #define EXTRA_STEP_2ARGS EXTRA_SIZE(struct regnode_2) @@ -288,7 +292,7 @@ struct regnode_charclass_class { /* has [[:blah:]] classes */ #define ANYOF_BITMAP_ZERO(ret) Zero(((struct regnode_charclass*)(ret))->bitmap, ANYOF_BITMAP_SIZE, char) #define ANYOF_BITMAP(p) (((struct regnode_charclass*)(p))->bitmap) -#define ANYOF_BITMAP_BYTE(p, c) (ANYOF_BITMAP(p)[((c) >> 3) & 31]) +#define ANYOF_BITMAP_BYTE(p, c) (ANYOF_BITMAP(p)[(((U8)(c)) >> 3) & 31]) #define ANYOF_BITMAP_SET(p, c) (ANYOF_BITMAP_BYTE(p, c) |= ANYOF_BIT(c)) #define ANYOF_BITMAP_CLEAR(p,c) (ANYOF_BITMAP_BYTE(p, c) &= ~ANYOF_BIT(c)) #define ANYOF_BITMAP_TEST(p, c) (ANYOF_BITMAP_BYTE(p, c) & ANYOF_BIT(c)) @@ -305,6 +309,7 @@ struct regnode_charclass_class { /* has [[:blah:]] classes */ #define ANYOF_CLASS_SKIP ((ANYOF_CLASS_SIZE - 1)/sizeof(regnode)) #define ANYOF_CLASS_ADD_SKIP (ANYOF_CLASS_SKIP - ANYOF_SKIP) + /* * Utility definitions. */ @@ -454,24 +459,28 @@ typedef struct _reg_trie_trans reg_trie_trans; /* anything in here that needs to be freed later should be dealt with in pregfree */ struct _reg_trie_data { - U16 uniquecharcount; - U32 lasttrans; - U16 *charmap; - HV *widecharmap; - reg_trie_state *states; - reg_trie_trans *trans; - char *bitmap; - U32 refcount; - U32 startstate; - STRLEN minlen; - STRLEN maxlen; - U32 *wordlen; - U32 laststate; /* Build only */ + U16 uniquecharcount; /* unique chars in trie (width of trans table) */ + U32 lasttrans; /* last valid transition element */ + U16 *charmap; /* byte to charid lookup array */ + HV *widecharmap; /* code points > 255 to charid */ + reg_trie_state *states; /* state data */ + reg_trie_trans *trans; /* array of transition elements */ + char *bitmap; /* stclass bitmap */ + U32 refcount; /* number of times this trie is referenced */ + U32 startstate; /* initial state - used for common prefix optimisation */ + STRLEN minlen; /* minimum length of words in trie - build/opt only? */ + STRLEN maxlen; /* maximum length of words in trie - build/opt only? */ + U32 *wordlen; /* array of lengths of words */ + U16 *jump; /* optional 1 indexed array of offsets before tail + for the node following a given word. */ + U16 *nextword; /* optional 1 indexed array to support linked list + of duplicate wordnums */ + U32 laststate; /* Build only */ + U32 wordcount; /* Build only */ #ifdef DEBUGGING - U16 wordcount; /* Build only */ - STRLEN charcount; /* Build only */ - AV *words; - AV *revcharmap; + STRLEN charcount; /* Build only */ + AV *words; /* Array of words contained in trie, for dumping */ + AV *revcharmap; /* Map of each charid back to its character representation */ #endif }; typedef struct _reg_trie_data reg_trie_data; @@ -489,11 +498,13 @@ typedef struct _reg_ac_data reg_ac_data; three different sets... */ #define TRIE_BITMAP(p) (((reg_trie_data *)(p))->bitmap) -#define TRIE_BITMAP_BYTE(p, c) (TRIE_BITMAP(p)[(((U8)c) >> 3) & 31]) +#define TRIE_BITMAP_BYTE(p, c) (TRIE_BITMAP(p)[(((U8)(c)) >> 3) & 31]) #define TRIE_BITMAP_SET(p, c) (TRIE_BITMAP_BYTE(p, c) |= ANYOF_BIT((U8)c)) #define TRIE_BITMAP_CLEAR(p,c) (TRIE_BITMAP_BYTE(p, c) &= ~ANYOF_BIT((U8)c)) #define TRIE_BITMAP_TEST(p, c) (TRIE_BITMAP_BYTE(p, c) & ANYOF_BIT((U8)c)) +#define BITMAP_BYTE(p, c) (((U8*)p)[(((U8)(c)) >> 3) & 31]) +#define BITMAP_TEST(p, c) (BITMAP_BYTE(p, c) & ANYOF_BIT((U8)c)) /* these defines assume uniquecharcount is the correct variable, and state may be evaluated twice */ #define TRIE_NODENUM(state) (((state)-1)/(trie->uniquecharcount)+1) @@ -501,15 +512,10 @@ typedef struct _reg_ac_data reg_ac_data; #define TRIE_NODEIDX(state) ((state) ? (((state)-1)*(trie->uniquecharcount)+1) : (state)) #ifdef DEBUGGING -#define TRIE_WORDCOUNT(trie) ((trie)->wordcount) #define TRIE_CHARCOUNT(trie) ((trie)->charcount) -#define TRIE_LASTSTATE(trie) ((trie)->laststate) #define TRIE_REVCHARMAP(trie) ((trie)->revcharmap) #else -#define TRIE_WORDCOUNT(trie) (trie_wordcount) #define TRIE_CHARCOUNT(trie) (trie_charcount) -/*#define TRIE_LASTSTATE(trie) (trie_laststate)*/ -#define TRIE_LASTSTATE(trie) ((trie)->laststate) #define TRIE_REVCHARMAP(trie) (trie_revcharmap) #endif |