summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2021-06-05 20:43:17 -0600
committerKarl Williamson <khw@cpan.org>2021-07-30 05:41:28 -0600
commitb5288edf08d3f057f4cb70d49137c0a10da649a4 (patch)
treec303147ff5ab1593fb3b3374483aa43c75533491 /regexec.c
parentad494d15d17ba5d23ba2b9adc3d990c5f566f6f1 (diff)
downloadperl-b5288edf08d3f057f4cb70d49137c0a10da649a4.tar.gz
regexec.c: Use lsbit_pos32() to avoid iterations
Before this commit, the code looped through a bitmap looking for a set bit. Now that we have a fast way to find where a set bit is, use it, and avoid the fruitless iterations.
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c39
1 files changed, 13 insertions, 26 deletions
diff --git a/regexec.c b/regexec.c
index b06b6b0ea3..512da8b77c 100644
--- a/regexec.c
+++ b/regexec.c
@@ -10561,26 +10561,11 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
else if ( ANYOF_POSIXL_TEST_ANY_SET(n)
&& c <= U8_MAX /* param to isFOO_lc() */
) {
-
/* The data structure is arranged so bits 0, 2, 4, ... are set
* if the class includes the Posix character class given by
* bit/2; and 1, 3, 5, ... are set if the class includes the
- * complemented Posix class given by int(bit/2). So we loop
- * through the bits, each time changing whether we complement
- * the result or not. Suppose for the sake of illustration
- * that bits 0-3 mean respectively, \w, \W, \s, \S. If bit 0
- * is set, it means there is a match for this ANYOF node if the
- * character is in the class given by the expression (0 / 2 = 0
- * = \w). If it is in that class, isFOO_lc() will return 1,
- * and since 'to_complement' is 0, the result will stay TRUE,
- * and we exit the loop. Suppose instead that bit 0 is 0, but
- * bit 1 is 1. That means there is a match if the character
- * matches \W. We won't bother to call isFOO_lc() on bit 0,
- * but will on bit 1. On the second iteration 'to_complement'
- * will be 1, so the exclusive or will reverse things, so we
- * are testing for \W. On the third iteration, 'to_complement'
- * will be 0, and we would be testing for \s; the fourth
- * iteration would test for \S, etc.
+ * complemented Posix class given by int(bit/2), so the
+ * remainder modulo 2 tells us if to complement or not.
*
* Note that this code assumes that all the classes are closed
* under folding. For example, if a character matches \w, then
@@ -10592,19 +10577,21 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
* loop could be used below to iterate over both the source
* character, and its fold (if different) */
- int count = 0;
- int to_complement = 0;
+ U32 posixl_bits = ANYOF_POSIXL_BITMAP(n);
- while (count < ANYOF_MAX) {
- if (ANYOF_POSIXL_TEST(n, count)
- && to_complement ^ cBOOL(isFOO_lc(count/2, (U8) c)))
- {
+ do {
+ /* Find the next set bit indicating a class to try matching
+ * against */
+ U8 bit_pos = lsbit_pos32(posixl_bits);
+
+ if (bit_pos % 2 ^ cBOOL(isFOO_lc(bit_pos/2, (U8) c))) {
match = TRUE;
break;
}
- count++;
- to_complement ^= 1;
- }
+
+ /* Remove this class from consideration; repeat */
+ POSIXL_CLEAR(posixl_bits, bit_pos);
+ } while(posixl_bits != 0);
}
}
}