summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2010-11-11 20:07:09 -0700
committerFather Chrysostomos <sprout@cpan.org>2010-11-22 13:32:50 -0800
commitcefafd73018b048fa66d2b22250431112141955a (patch)
tree3fceda48445d91fea7e34f4e6a5a74a5a2588a30 /regcomp.c
parentc355c09a82f4c7345a6bafa5322643acdb584e80 (diff)
downloadperl-cefafd73018b048fa66d2b22250431112141955a.tar.gz
regex free up bit in ANYOF node
This patch causes all locale ANYOF nodes to have a class bitmap (4 bytes) even if they don't have a class (such as \w, \d, [:posix:]). This frees up a bit in the flags field that was used to signal if the node had the bitmap. I intend to use it instead to signal that loading a swash, which is slow, can be bypassed. Thus this is a time/space tradeoff, applicable to not just locale nodes: adding a word to the locale nodes saves time for all nodes. I added the ANYOF_CLASS_TEST_ANY_SET() macro to determine quickly if there are actually any classes in the node. Minimal code was changed, so this can be easily reversed if another bit frees up. Another possibility is to share with the ANYOF_EOS bit instead, as this is used just in the optimizer's start class, and only in regcomp.c. But this requires more careful coding. Another possibility is to add a byte (hence likely at least 4 because of alignment issues) to store extra flags. And still another possibility is to add just the byte for the start class, which would not need to affect other ANYOF nodes, since the EOS bit is not used outside regcomp.c. But various routines in regcomp assume that the start class and other ANYOF nodes are interchangeable, so this option would require more code changes.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c5
1 files changed, 3 insertions, 2 deletions
diff --git a/regcomp.c b/regcomp.c
index 787517b942..26d480f16b 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3572,7 +3572,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
goto do_default;
if (flags & SCF_DO_STCLASS_OR) { /* Everything but \n */
value = (ANYOF_BITMAP_TEST(data->start_class,'\n')
- || (data->start_class->flags & ANYOF_CLASS));
+ || ((data->start_class->flags & ANYOF_CLASS)
+ && ANYOF_CLASS_TEST_ANY_SET(data->start_class)));
cl_anything(pRExC_state, data->start_class);
}
if (flags & SCF_DO_STCLASS_AND || !value)
@@ -9519,7 +9520,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags);
/* output any special charclass tests (used mostly under use locale) */
- if (o->flags & ANYOF_CLASS)
+ if (o->flags & ANYOF_CLASS && ANYOF_CLASS_TEST_ANY_SET(o))
for (i = 0; i < (int)(sizeof(anyofs)/sizeof(char*)); i++)
if (ANYOF_CLASS_TEST(o,i)) {
sv_catpv(sv, anyofs[i]);