summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2010-10-31 13:17:34 -0600
committerFather Chrysostomos <sprout@cpan.org>2010-10-31 16:15:32 -0700
commit7cdde5444c9ad8cccf237ec340ddb54f58ce3cf0 (patch)
tree0995926659c26d2ff1bd4e8ea72602d47f5c5a45 /regexec.c
parentea6756a64e0a36eb91bcea39467a2253c31192b8 (diff)
downloadperl-7cdde5444c9ad8cccf237ec340ddb54f58ce3cf0.tar.gz
reginclass: Reorder fastest first
This patch simply moves the block of code that does the bitmap tests in front of the block of code that deals with potential things not in the bit map. The reason to do this is that it is faster to find things in the bitmap, than to have to create a utf8 swash. The patch also adds some comments, and the first block doesn't have to test if there has been a match, and the second block does, so if statements for those two blocks are adjusted accordingly. The proof that this doesn't break anything stems from the fact that the routine never stops early. If there wasn't a match in the first block of code, it would execute the second block. Thus swapping the order doesn't affect the outcome. The side effects of the first block are reading in the swash. These side effects won't happen if it no longer gets executed, because the other block matched. And thus an error could be introduced if there were coding errors elsewhere that didn't initialize the swash before using it. But that doesn't appear to be the case, as all tests pass.
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c106
1 files changed, 55 insertions, 51 deletions
diff --git a/regexec.c b/regexec.c
index 0fc505752d..3ca84516b7 100644
--- a/regexec.c
+++ b/regexec.c
@@ -6236,57 +6236,8 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n,
maxlen = c_len;
}
- if (utf8_target || (flags & ANYOF_UNICODE)) {
- if (utf8_target && !ANYOF_RUNTIME(n)) {
- if (c < 256 && ANYOF_BITMAP_TEST(n, c))
- match = TRUE;
- }
- if (!match && utf8_target && (flags & ANYOF_UNICODE_ALL) && c >= 256)
- match = TRUE;
- if (!match) {
- AV *av;
- SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
-
- if (sw) {
- U8 * utf8_p;
- if (utf8_target) {
- utf8_p = (U8 *) p;
- } else {
- STRLEN len = 1;
- utf8_p = bytes_to_utf8(p, &len);
- }
- if (swash_fetch(sw, utf8_p, 1))
- match = TRUE;
- else if (flags & ANYOF_FOLD) {
- if (!match && lenp && av) {
- I32 i;
- for (i = 0; i <= av_len(av); i++) {
- SV* const sv = *av_fetch(av, i, FALSE);
- STRLEN len;
- const char * const s = SvPV_const(sv, len);
- if (len <= maxlen && memEQ(s, (char*)utf8_p, len)) {
- *lenp = len;
- match = TRUE;
- break;
- }
- }
- }
- if (!match) {
- U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
-
- STRLEN tmplen;
- to_utf8_fold(utf8_p, tmpbuf, &tmplen);
- if (swash_fetch(sw, tmpbuf, 1))
- match = TRUE;
- }
- }
-
- /* If we allocated a string above, free it */
- if (! utf8_target) Safefree(utf8_p);
- }
- }
- }
- if (!match && c < 256) {
+ /* If this character is potentially in the bitmap, check it */
+ if (c < 256) {
if (ANYOF_BITMAP_TEST(n, c))
match = TRUE;
else if (flags & ANYOF_FOLD) {
@@ -6342,6 +6293,59 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n,
}
}
+ /* If the bitmap didn't (or couldn't) match, and something outside the
+ * bitmap could match, try that */
+ if (! match && utf8_target || (flags & ANYOF_UNICODE)) {
+ if (utf8_target && !ANYOF_RUNTIME(n)) {
+ if (c < 256 && ANYOF_BITMAP_TEST(n, c))
+ match = TRUE;
+ }
+ if (!match && utf8_target && (flags & ANYOF_UNICODE_ALL) && c >= 256)
+ match = TRUE;
+ if (!match) {
+ AV *av;
+ SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
+
+ if (sw) {
+ U8 * utf8_p;
+ if (utf8_target) {
+ utf8_p = (U8 *) p;
+ } else {
+ STRLEN len = 1;
+ utf8_p = bytes_to_utf8(p, &len);
+ }
+ if (swash_fetch(sw, utf8_p, 1))
+ match = TRUE;
+ else if (flags & ANYOF_FOLD) {
+ if (!match && lenp && av) {
+ I32 i;
+ for (i = 0; i <= av_len(av); i++) {
+ SV* const sv = *av_fetch(av, i, FALSE);
+ STRLEN len;
+ const char * const s = SvPV_const(sv, len);
+ if (len <= maxlen && memEQ(s, (char*)utf8_p, len)) {
+ *lenp = len;
+ match = TRUE;
+ break;
+ }
+ }
+ }
+ if (!match) {
+ U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
+
+ STRLEN tmplen;
+ to_utf8_fold(utf8_p, tmpbuf, &tmplen);
+ if (swash_fetch(sw, tmpbuf, 1))
+ match = TRUE;
+ }
+ }
+
+ /* If we allocated a string above, free it */
+ if (! utf8_target) Safefree(utf8_p);
+ }
+ }
+ }
+
return (flags & ANYOF_INVERT) ? !match : match;
}