diff options
author | Karl Williamson <public@khwilliamson.com> | 2010-12-15 18:34:59 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2010-12-20 16:05:35 -0700 |
commit | 390656605358790e356331349a2f922ae36ae5df (patch) | |
tree | 1bb444ae420b11af97ee5cbc3560144db454e3ac | |
parent | f53580fec42f3b12264ee27b756dec257c0bb77a (diff) | |
download | perl-390656605358790e356331349a2f922ae36ae5df.tar.gz |
Change name of regex intrnl macro to new meaning
ANYOF_FOLD is now used only under fewer conditions. Otherwise the
bitmap of character 0-255 is fully calculated with the folds, and the
flag is not set. One condition is under locale, where the folds aren't
known at compile time; the other is for things accessible through a
swash.
By changing the name to its new meaning, certain optimizations become more
obvious.
-rw-r--r-- | regcomp.c | 34 | ||||
-rw-r--r-- | regcomp.h | 15 | ||||
-rw-r--r-- | regexec.c | 6 | ||||
-rw-r--r-- | utf8.h | 2 |
4 files changed, 33 insertions, 24 deletions
@@ -710,7 +710,7 @@ S_cl_anything(const RExC_state_t *pRExC_state, struct regnode_charclass_class *c cl->flags = ANYOF_EOS|ANYOF_UNICODE_ALL; if (LOC) cl->flags |= ANYOF_LOCALE; - cl->flags |= ANYOF_FOLD; + cl->flags |= ANYOF_LOC_NONBITMAP_FOLD; } /* Can match anything (initialization) */ @@ -767,8 +767,8 @@ S_cl_and(struct regnode_charclass_class *cl, if (!(ANYOF_CLASS_TEST_ANY_SET(and_with)) && !(ANYOF_CLASS_TEST_ANY_SET(cl)) && (and_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE) - && !(and_with->flags & ANYOF_FOLD) - && !(cl->flags & ANYOF_FOLD)) { + && !(and_with->flags & ANYOF_LOC_NONBITMAP_FOLD) + && !(cl->flags & ANYOF_LOC_NONBITMAP_FOLD)) { int i; if (and_with->flags & ANYOF_INVERT) @@ -781,8 +781,8 @@ S_cl_and(struct regnode_charclass_class *cl, if (!(and_with->flags & ANYOF_EOS)) cl->flags &= ~ANYOF_EOS; - if (!(and_with->flags & ANYOF_FOLD)) - cl->flags &= ~ANYOF_FOLD; + if (!(and_with->flags & ANYOF_LOC_NONBITMAP_FOLD)) + cl->flags &= ~ANYOF_LOC_NONBITMAP_FOLD; if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_NONBITMAP && !(and_with->flags & ANYOF_INVERT)) { @@ -818,8 +818,8 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con * (OK1(i) | OK1(i')) | (!OK1(i) & !OK1(i')) */ if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE) - && !(or_with->flags & ANYOF_FOLD) - && !(cl->flags & ANYOF_FOLD) ) { + && !(or_with->flags & ANYOF_LOC_NONBITMAP_FOLD) + && !(cl->flags & ANYOF_LOC_NONBITMAP_FOLD) ) { int i; for (i = 0; i < ANYOF_BITMAP_SIZE; i++) @@ -831,8 +831,8 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con } else { /* (B1 | CL1) | (B2 | CL2) = (B1 | B2) | (CL1 | CL2)) */ if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE) - && (!(or_with->flags & ANYOF_FOLD) - || (cl->flags & ANYOF_FOLD)) ) { + && (!(or_with->flags & ANYOF_LOC_NONBITMAP_FOLD) + || (cl->flags & ANYOF_LOC_NONBITMAP_FOLD)) ) { int i; /* OR char bitmap and class bitmap separately */ @@ -851,8 +851,8 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con if (or_with->flags & ANYOF_EOS) cl->flags |= ANYOF_EOS; - if (or_with->flags & ANYOF_FOLD) - cl->flags |= ANYOF_FOLD; + if (or_with->flags & ANYOF_LOC_NONBITMAP_FOLD) + cl->flags |= ANYOF_LOC_NONBITMAP_FOLD; /* If both nodes match something outside the bitmap, but what they match * outside is not the same pointer, and hence not easily compared, give up @@ -3085,7 +3085,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if (uc >= 0x100 || (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE)) && !ANYOF_BITMAP_TEST(data->start_class, uc) - && (!(data->start_class->flags & ANYOF_FOLD) + && (!(data->start_class->flags & ANYOF_LOC_NONBITMAP_FOLD) || !ANYOF_BITMAP_TEST(data->start_class, PL_fold_latin1[uc]))) ) compat = 0; @@ -3140,7 +3140,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if (compat) { ANYOF_BITMAP_SET(data->start_class, uc); data->start_class->flags &= ~ANYOF_EOS; - data->start_class->flags |= ANYOF_FOLD; + data->start_class->flags |= ANYOF_LOC_NONBITMAP_FOLD; if (OP(scan) == EXACTFL) { data->start_class->flags |= ANYOF_LOCALE; } @@ -3155,7 +3155,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } } else if (flags & SCF_DO_STCLASS_OR) { - if (data->start_class->flags & ANYOF_FOLD) { + if (data->start_class->flags & ANYOF_LOC_NONBITMAP_FOLD) { /* false positive possible if the class is case-folded. Assume that the locale settings are the same... */ if (uc < 0x100) { @@ -8913,7 +8913,7 @@ parseit: * which we have to wait to see what folding is in effect at runtime, and * for things not in the bitmap */ if (FOLD && (LOC || ANYOF_FLAGS(ret) & ANYOF_NONBITMAP)) { - ANYOF_FLAGS(ret) |= ANYOF_FOLD; + ANYOF_FLAGS(ret) |= ANYOF_LOC_NONBITMAP_FOLD; } /* Optimize inverted simple patterns (e.g. [^a-z]). Note that this doesn't @@ -8974,7 +8974,7 @@ parseit: /* A locale node with one point can be folded; all the other cases * with folding will have two points, since we calculate them above */ - if (ANYOF_FLAGS(ret) & ANYOF_FOLD) { + if (ANYOF_FLAGS(ret) & ANYOF_LOC_NONBITMAP_FOLD) { op = EXACTFL; } else { @@ -9745,7 +9745,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) if (flags & ANYOF_LOCALE) sv_catpvs(sv, "{loc}"); - if (flags & ANYOF_FOLD) + if (flags & ANYOF_LOC_NONBITMAP_FOLD) sv_catpvs(sv, "{i}"); Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]); if (flags & ANYOF_INVERT) @@ -314,9 +314,18 @@ struct regnode_charclass_class { /* Flags for node->flags of ANYOF */ -#define ANYOF_LOCALE 0x01 -#define ANYOF_FOLD 0x02 -#define ANYOF_INVERT 0x04 +#define ANYOF_LOCALE 0x01 + +/* The fold is calculated and stored in the bitmap where possible at compile + * time. However there are two cases where it isn't possible. These share + * this bit: 1) under locale, where the actual folding varies depending on + * what the locale is at the time of execution; and 2) where the folding is + * specified in a swash, not the bitmap, such as characters which aren't + * specified in the bitmap, or properties that aren't looked at at compile time + */ +#define ANYOF_LOC_NONBITMAP_FOLD 0x02 + +#define ANYOF_INVERT 0x04 /* CLASS is never set unless LOCALE is too: has runtime \d, \w, [:posix:], ... */ #define ANYOF_CLASS 0x08 @@ -6339,7 +6339,7 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, if (c < 256) { if (ANYOF_BITMAP_TEST(n, c)) match = TRUE; - else if (flags & ANYOF_FOLD) { + else if (flags & ANYOF_LOC_NONBITMAP_FOLD) { U8 f; if (flags & ANYOF_LOCALE) { @@ -6397,7 +6397,7 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, if (!match) { if (utf8_target && (flags & ANYOF_UNICODE_ALL)) { if (c >= 256 - || ((flags & ANYOF_FOLD) /* Latin1 1 that has a non-Latin1 fold + || ((flags & ANYOF_LOC_NONBITMAP_FOLD) /* Latin1 1 that has a non-Latin1 fold should match */ && _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c))) { @@ -6420,7 +6420,7 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, } if (swash_fetch(sw, utf8_p, 1)) match = TRUE; - else if (flags & ANYOF_FOLD) { + else if (flags & ANYOF_LOC_NONBITMAP_FOLD) { if (!match && lenp && av) { I32 i; for (i = 0; i <= av_len(av); i++) { @@ -291,7 +291,7 @@ Perl's extended UTF-8 means we can have start bytes up to FF. #define ANYOF_FOLD_SHARP_S(node, input, end) \ (ANYOF_BITMAP_TEST(node, LATIN_SMALL_LETTER_SHARP_S) && \ (ANYOF_FLAGS(node) & ANYOF_NONBITMAP) && \ - (ANYOF_FLAGS(node) & ANYOF_FOLD) && \ + (ANYOF_FLAGS(node) & ANYOF_LOC_NONBITMAP_FOLD) && \ ((end) > (input) + 1) && \ toLOWER((input)[0]) == 's' && \ toLOWER((input)[1]) == 's') |