diff options
author | Karl Williamson <public@khwilliamson.com> | 2010-10-31 13:17:34 -0600 |
---|---|---|
committer | Father Chrysostomos <sprout@cpan.org> | 2010-10-31 16:15:32 -0700 |
commit | 7cdde5444c9ad8cccf237ec340ddb54f58ce3cf0 (patch) | |
tree | 0995926659c26d2ff1bd4e8ea72602d47f5c5a45 /regexec.c | |
parent | ea6756a64e0a36eb91bcea39467a2253c31192b8 (diff) | |
download | perl-7cdde5444c9ad8cccf237ec340ddb54f58ce3cf0.tar.gz |
reginclass: Reorder fastest first
This patch simply moves the block of code that does the bitmap tests in
front of the block of code that deals with potential things not in the
bit map. The reason to do this is that it is faster to find things in
the bitmap, than to have to create a utf8 swash.
The patch also adds some comments, and the first block doesn't have to
test if there has been a match, and the second block does, so if
statements for those two blocks are adjusted accordingly.
The proof that this doesn't break anything stems from the fact that the
routine never stops early. If there wasn't a match in the first block
of code, it would execute the second block. Thus swapping the order
doesn't affect the outcome. The side effects of the first block are
reading in the swash. These side effects won't happen if it no longer
gets executed, because the other block matched. And thus an error could
be introduced if there were coding errors elsewhere that didn't
initialize the swash before using it. But that doesn't appear to be the
case, as all tests pass.
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 106 |
1 files changed, 55 insertions, 51 deletions
@@ -6236,57 +6236,8 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, maxlen = c_len; } - if (utf8_target || (flags & ANYOF_UNICODE)) { - if (utf8_target && !ANYOF_RUNTIME(n)) { - if (c < 256 && ANYOF_BITMAP_TEST(n, c)) - match = TRUE; - } - if (!match && utf8_target && (flags & ANYOF_UNICODE_ALL) && c >= 256) - match = TRUE; - if (!match) { - AV *av; - SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av); - - if (sw) { - U8 * utf8_p; - if (utf8_target) { - utf8_p = (U8 *) p; - } else { - STRLEN len = 1; - utf8_p = bytes_to_utf8(p, &len); - } - if (swash_fetch(sw, utf8_p, 1)) - match = TRUE; - else if (flags & ANYOF_FOLD) { - if (!match && lenp && av) { - I32 i; - for (i = 0; i <= av_len(av); i++) { - SV* const sv = *av_fetch(av, i, FALSE); - STRLEN len; - const char * const s = SvPV_const(sv, len); - if (len <= maxlen && memEQ(s, (char*)utf8_p, len)) { - *lenp = len; - match = TRUE; - break; - } - } - } - if (!match) { - U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; - - STRLEN tmplen; - to_utf8_fold(utf8_p, tmpbuf, &tmplen); - if (swash_fetch(sw, tmpbuf, 1)) - match = TRUE; - } - } - - /* If we allocated a string above, free it */ - if (! utf8_target) Safefree(utf8_p); - } - } - } - if (!match && c < 256) { + /* If this character is potentially in the bitmap, check it */ + if (c < 256) { if (ANYOF_BITMAP_TEST(n, c)) match = TRUE; else if (flags & ANYOF_FOLD) { @@ -6342,6 +6293,59 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, } } + /* If the bitmap didn't (or couldn't) match, and something outside the + * bitmap could match, try that */ + if (! match && utf8_target || (flags & ANYOF_UNICODE)) { + if (utf8_target && !ANYOF_RUNTIME(n)) { + if (c < 256 && ANYOF_BITMAP_TEST(n, c)) + match = TRUE; + } + if (!match && utf8_target && (flags & ANYOF_UNICODE_ALL) && c >= 256) + match = TRUE; + if (!match) { + AV *av; + SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av); + + if (sw) { + U8 * utf8_p; + if (utf8_target) { + utf8_p = (U8 *) p; + } else { + STRLEN len = 1; + utf8_p = bytes_to_utf8(p, &len); + } + if (swash_fetch(sw, utf8_p, 1)) + match = TRUE; + else if (flags & ANYOF_FOLD) { + if (!match && lenp && av) { + I32 i; + for (i = 0; i <= av_len(av); i++) { + SV* const sv = *av_fetch(av, i, FALSE); + STRLEN len; + const char * const s = SvPV_const(sv, len); + if (len <= maxlen && memEQ(s, (char*)utf8_p, len)) { + *lenp = len; + match = TRUE; + break; + } + } + } + if (!match) { + U8 tmpbuf[UTF8_MAXBYTES_CASE+1]; + + STRLEN tmplen; + to_utf8_fold(utf8_p, tmpbuf, &tmplen); + if (swash_fetch(sw, tmpbuf, 1)) + match = TRUE; + } + } + + /* If we allocated a string above, free it */ + if (! utf8_target) Safefree(utf8_p); + } + } + } + return (flags & ANYOF_INVERT) ? !match : match; } |