summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2010-12-15 18:34:59 -0700
committerKarl Williamson <public@khwilliamson.com>2010-12-20 16:05:35 -0700
commit390656605358790e356331349a2f922ae36ae5df (patch)
tree1bb444ae420b11af97ee5cbc3560144db454e3ac
parentf53580fec42f3b12264ee27b756dec257c0bb77a (diff)
downloadperl-390656605358790e356331349a2f922ae36ae5df.tar.gz
Change name of regex intrnl macro to new meaning
ANYOF_FOLD is now used only under fewer conditions. Otherwise the bitmap of character 0-255 is fully calculated with the folds, and the flag is not set. One condition is under locale, where the folds aren't known at compile time; the other is for things accessible through a swash. By changing the name to its new meaning, certain optimizations become more obvious.
-rw-r--r--regcomp.c34
-rw-r--r--regcomp.h15
-rw-r--r--regexec.c6
-rw-r--r--utf8.h2
4 files changed, 33 insertions, 24 deletions
diff --git a/regcomp.c b/regcomp.c
index 122c560205..d52bf13ea4 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -710,7 +710,7 @@ S_cl_anything(const RExC_state_t *pRExC_state, struct regnode_charclass_class *c
cl->flags = ANYOF_EOS|ANYOF_UNICODE_ALL;
if (LOC)
cl->flags |= ANYOF_LOCALE;
- cl->flags |= ANYOF_FOLD;
+ cl->flags |= ANYOF_LOC_NONBITMAP_FOLD;
}
/* Can match anything (initialization) */
@@ -767,8 +767,8 @@ S_cl_and(struct regnode_charclass_class *cl,
if (!(ANYOF_CLASS_TEST_ANY_SET(and_with))
&& !(ANYOF_CLASS_TEST_ANY_SET(cl))
&& (and_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
- && !(and_with->flags & ANYOF_FOLD)
- && !(cl->flags & ANYOF_FOLD)) {
+ && !(and_with->flags & ANYOF_LOC_NONBITMAP_FOLD)
+ && !(cl->flags & ANYOF_LOC_NONBITMAP_FOLD)) {
int i;
if (and_with->flags & ANYOF_INVERT)
@@ -781,8 +781,8 @@ S_cl_and(struct regnode_charclass_class *cl,
if (!(and_with->flags & ANYOF_EOS))
cl->flags &= ~ANYOF_EOS;
- if (!(and_with->flags & ANYOF_FOLD))
- cl->flags &= ~ANYOF_FOLD;
+ if (!(and_with->flags & ANYOF_LOC_NONBITMAP_FOLD))
+ cl->flags &= ~ANYOF_LOC_NONBITMAP_FOLD;
if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_NONBITMAP &&
!(and_with->flags & ANYOF_INVERT)) {
@@ -818,8 +818,8 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con
* (OK1(i) | OK1(i')) | (!OK1(i) & !OK1(i'))
*/
if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
- && !(or_with->flags & ANYOF_FOLD)
- && !(cl->flags & ANYOF_FOLD) ) {
+ && !(or_with->flags & ANYOF_LOC_NONBITMAP_FOLD)
+ && !(cl->flags & ANYOF_LOC_NONBITMAP_FOLD) ) {
int i;
for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
@@ -831,8 +831,8 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con
} else {
/* (B1 | CL1) | (B2 | CL2) = (B1 | B2) | (CL1 | CL2)) */
if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
- && (!(or_with->flags & ANYOF_FOLD)
- || (cl->flags & ANYOF_FOLD)) ) {
+ && (!(or_with->flags & ANYOF_LOC_NONBITMAP_FOLD)
+ || (cl->flags & ANYOF_LOC_NONBITMAP_FOLD)) ) {
int i;
/* OR char bitmap and class bitmap separately */
@@ -851,8 +851,8 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con
if (or_with->flags & ANYOF_EOS)
cl->flags |= ANYOF_EOS;
- if (or_with->flags & ANYOF_FOLD)
- cl->flags |= ANYOF_FOLD;
+ if (or_with->flags & ANYOF_LOC_NONBITMAP_FOLD)
+ cl->flags |= ANYOF_LOC_NONBITMAP_FOLD;
/* If both nodes match something outside the bitmap, but what they match
* outside is not the same pointer, and hence not easily compared, give up
@@ -3085,7 +3085,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
if (uc >= 0x100 ||
(!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE))
&& !ANYOF_BITMAP_TEST(data->start_class, uc)
- && (!(data->start_class->flags & ANYOF_FOLD)
+ && (!(data->start_class->flags & ANYOF_LOC_NONBITMAP_FOLD)
|| !ANYOF_BITMAP_TEST(data->start_class, PL_fold_latin1[uc])))
)
compat = 0;
@@ -3140,7 +3140,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
if (compat) {
ANYOF_BITMAP_SET(data->start_class, uc);
data->start_class->flags &= ~ANYOF_EOS;
- data->start_class->flags |= ANYOF_FOLD;
+ data->start_class->flags |= ANYOF_LOC_NONBITMAP_FOLD;
if (OP(scan) == EXACTFL) {
data->start_class->flags |= ANYOF_LOCALE;
}
@@ -3155,7 +3155,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
}
}
else if (flags & SCF_DO_STCLASS_OR) {
- if (data->start_class->flags & ANYOF_FOLD) {
+ if (data->start_class->flags & ANYOF_LOC_NONBITMAP_FOLD) {
/* false positive possible if the class is case-folded.
Assume that the locale settings are the same... */
if (uc < 0x100) {
@@ -8913,7 +8913,7 @@ parseit:
* which we have to wait to see what folding is in effect at runtime, and
* for things not in the bitmap */
if (FOLD && (LOC || ANYOF_FLAGS(ret) & ANYOF_NONBITMAP)) {
- ANYOF_FLAGS(ret) |= ANYOF_FOLD;
+ ANYOF_FLAGS(ret) |= ANYOF_LOC_NONBITMAP_FOLD;
}
/* Optimize inverted simple patterns (e.g. [^a-z]). Note that this doesn't
@@ -8974,7 +8974,7 @@ parseit:
/* A locale node with one point can be folded; all the other cases
* with folding will have two points, since we calculate them above
*/
- if (ANYOF_FLAGS(ret) & ANYOF_FOLD) {
+ if (ANYOF_FLAGS(ret) & ANYOF_LOC_NONBITMAP_FOLD) {
op = EXACTFL;
}
else {
@@ -9745,7 +9745,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
if (flags & ANYOF_LOCALE)
sv_catpvs(sv, "{loc}");
- if (flags & ANYOF_FOLD)
+ if (flags & ANYOF_LOC_NONBITMAP_FOLD)
sv_catpvs(sv, "{i}");
Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
if (flags & ANYOF_INVERT)
diff --git a/regcomp.h b/regcomp.h
index 00fd94517f..3e87aa93ec 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -314,9 +314,18 @@ struct regnode_charclass_class {
/* Flags for node->flags of ANYOF */
-#define ANYOF_LOCALE 0x01
-#define ANYOF_FOLD 0x02
-#define ANYOF_INVERT 0x04
+#define ANYOF_LOCALE 0x01
+
+/* The fold is calculated and stored in the bitmap where possible at compile
+ * time. However there are two cases where it isn't possible. These share
+ * this bit: 1) under locale, where the actual folding varies depending on
+ * what the locale is at the time of execution; and 2) where the folding is
+ * specified in a swash, not the bitmap, such as characters which aren't
+ * specified in the bitmap, or properties that aren't looked at at compile time
+ */
+#define ANYOF_LOC_NONBITMAP_FOLD 0x02
+
+#define ANYOF_INVERT 0x04
/* CLASS is never set unless LOCALE is too: has runtime \d, \w, [:posix:], ... */
#define ANYOF_CLASS 0x08
diff --git a/regexec.c b/regexec.c
index 7778992a63..a4b4fba23f 100644
--- a/regexec.c
+++ b/regexec.c
@@ -6339,7 +6339,7 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n,
if (c < 256) {
if (ANYOF_BITMAP_TEST(n, c))
match = TRUE;
- else if (flags & ANYOF_FOLD) {
+ else if (flags & ANYOF_LOC_NONBITMAP_FOLD) {
U8 f;
if (flags & ANYOF_LOCALE) {
@@ -6397,7 +6397,7 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n,
if (!match) {
if (utf8_target && (flags & ANYOF_UNICODE_ALL)) {
if (c >= 256
- || ((flags & ANYOF_FOLD) /* Latin1 1 that has a non-Latin1 fold
+ || ((flags & ANYOF_LOC_NONBITMAP_FOLD) /* Latin1 1 that has a non-Latin1 fold
should match */
&& _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c)))
{
@@ -6420,7 +6420,7 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n,
}
if (swash_fetch(sw, utf8_p, 1))
match = TRUE;
- else if (flags & ANYOF_FOLD) {
+ else if (flags & ANYOF_LOC_NONBITMAP_FOLD) {
if (!match && lenp && av) {
I32 i;
for (i = 0; i <= av_len(av); i++) {
diff --git a/utf8.h b/utf8.h
index 405b8b4cb3..a162114ca2 100644
--- a/utf8.h
+++ b/utf8.h
@@ -291,7 +291,7 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
#define ANYOF_FOLD_SHARP_S(node, input, end) \
(ANYOF_BITMAP_TEST(node, LATIN_SMALL_LETTER_SHARP_S) && \
(ANYOF_FLAGS(node) & ANYOF_NONBITMAP) && \
- (ANYOF_FLAGS(node) & ANYOF_FOLD) && \
+ (ANYOF_FLAGS(node) & ANYOF_LOC_NONBITMAP_FOLD) && \
((end) > (input) + 1) && \
toLOWER((input)[0]) == 's' && \
toLOWER((input)[1]) == 's')