diff options
author | Hugo van der Sanden <hv@crypt.org> | 2003-01-21 00:44:20 +0000 |
---|---|---|
committer | hv <hv@crypt.org> | 2003-01-21 00:44:20 +0000 |
commit | 388cc4de5f48b02cc9fe9b962f02cf603af02178 (patch) | |
tree | 7a2fe4cc9c53651d71b5c5c269c407429ad6c6f7 /regexec.c | |
parent | 40b8d195ef25e04da53075384ae3c1fd5b4b5876 (diff) | |
download | perl-388cc4de5f48b02cc9fe9b962f02cf603af02178.tar.gz |
integrate #18349 from maint-5.8:
At least partially address [perl #10000] by speeding
up both the ASCII case (by about 2-3%) and the UTF-8 case
(by about 45%). The major trick is to avoid hitting the
costly S_reginclass(). (Even before this patch the speedup
since 5.8.0 was about 40-50%.) After this the UTF-8 case is
still about 30-60% slower than the ASCII case. (Note that
I'm unable to reproduce the 10-fold speed difference of the
original bug report; I can see a factor of 2 or 3, but no more.)
p4raw-id: //depot/perl@18529
p4raw-integrated: from //depot/maint-5.8/perl@18528 'merge in'
regexec.c (@18347..)
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 75 |
1 files changed, 54 insertions, 21 deletions
@@ -959,25 +959,40 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta /* We know what class it must start with. */ switch (OP(c)) { case ANYOF: - while (s < strend) { - STRLEN skip = do_utf8 ? UTF8SKIP(s) : 1; - - if (do_utf8 ? - reginclass(c, (U8*)s, 0, do_utf8) : - REGINCLASS(c, (U8*)s) || - (ANYOF_FOLD_SHARP_S(c, s, strend) && - /* The assignment of 2 is intentional: - * for the sharp s, the skip is 2. */ - (skip = SHARP_S_SKIP) - )) { - if (tmp && (norun || regtry(prog, s))) - goto got_it; - else - tmp = doevery; - } - else - tmp = 1; - s += skip; + if (do_utf8) { + while (s < strend) { + if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) || + !UTF8_IS_INVARIANT((U8)s[0]) ? + reginclass(c, (U8*)s, 0, do_utf8) : + REGINCLASS(c, (U8*)s)) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } + else + tmp = 1; + s += UTF8SKIP(s); + } + } + else { + while (s < strend) { + STRLEN skip = 1; + + if (REGINCLASS(c, (U8*)s) || + (ANYOF_FOLD_SHARP_S(c, s, strend) && + /* The assignment of 2 is intentional: + * for the folded sharp s, the skip is 2. */ + (skip = SHARP_S_SKIP))) { + if (tmp && (norun || regtry(prog, s))) + goto got_it; + else + tmp = doevery; + } + else + tmp = 1; + s += skip; + } } break; case CANY: @@ -4053,8 +4068,26 @@ S_regrepeat(pTHX_ regnode *p, I32 max) case ANYOF: if (do_utf8) { loceol = PL_regeol; - while (hardcount < max && scan < loceol && - reginclass(p, (U8*)scan, 0, do_utf8)) { + while (hardcount < max && scan < loceol) { + bool cont = FALSE; + if (ANYOF_FLAGS(p) & ANYOF_UNICODE) { + if (reginclass(p, (U8*)scan, 0, do_utf8)) + cont = TRUE; + } + else { + U8 c = (U8)scan[0]; + + if (UTF8_IS_INVARIANT(c)) { + if (ANYOF_BITMAP_TEST(p, c)) + cont = TRUE; + } + else { + if (reginclass(p, (U8*)scan, 0, do_utf8)) + cont = TRUE; + } + } + if (!cont) + break; scan += UTF8SKIP(scan); hardcount++; } |