summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorHugo van der Sanden <hv@crypt.org>2003-01-21 00:44:20 +0000
committerhv <hv@crypt.org>2003-01-21 00:44:20 +0000
commit388cc4de5f48b02cc9fe9b962f02cf603af02178 (patch)
tree7a2fe4cc9c53651d71b5c5c269c407429ad6c6f7 /regexec.c
parent40b8d195ef25e04da53075384ae3c1fd5b4b5876 (diff)
downloadperl-388cc4de5f48b02cc9fe9b962f02cf603af02178.tar.gz
integrate #18349 from maint-5.8:
At least partially address [perl #10000] by speeding up both the ASCII case (by about 2-3%) and the UTF-8 case (by about 45%). The major trick is to avoid hitting the costly S_reginclass(). (Even before this patch the speedup since 5.8.0 was about 40-50%.) After this the UTF-8 case is still about 30-60% slower than the ASCII case. (Note that I'm unable to reproduce the 10-fold speed difference of the original bug report; I can see a factor of 2 or 3, but no more.) p4raw-id: //depot/perl@18529 p4raw-integrated: from //depot/maint-5.8/perl@18528 'merge in' regexec.c (@18347..)
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c75
1 files changed, 54 insertions, 21 deletions
diff --git a/regexec.c b/regexec.c
index 4cf80692f2..f91af17d17 100644
--- a/regexec.c
+++ b/regexec.c
@@ -959,25 +959,40 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
/* We know what class it must start with. */
switch (OP(c)) {
case ANYOF:
- while (s < strend) {
- STRLEN skip = do_utf8 ? UTF8SKIP(s) : 1;
-
- if (do_utf8 ?
- reginclass(c, (U8*)s, 0, do_utf8) :
- REGINCLASS(c, (U8*)s) ||
- (ANYOF_FOLD_SHARP_S(c, s, strend) &&
- /* The assignment of 2 is intentional:
- * for the sharp s, the skip is 2. */
- (skip = SHARP_S_SKIP)
- )) {
- if (tmp && (norun || regtry(prog, s)))
- goto got_it;
- else
- tmp = doevery;
- }
- else
- tmp = 1;
- s += skip;
+ if (do_utf8) {
+ while (s < strend) {
+ if ((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
+ !UTF8_IS_INVARIANT((U8)s[0]) ?
+ reginclass(c, (U8*)s, 0, do_utf8) :
+ REGINCLASS(c, (U8*)s)) {
+ if (tmp && (norun || regtry(prog, s)))
+ goto got_it;
+ else
+ tmp = doevery;
+ }
+ else
+ tmp = 1;
+ s += UTF8SKIP(s);
+ }
+ }
+ else {
+ while (s < strend) {
+ STRLEN skip = 1;
+
+ if (REGINCLASS(c, (U8*)s) ||
+ (ANYOF_FOLD_SHARP_S(c, s, strend) &&
+ /* The assignment of 2 is intentional:
+ * for the folded sharp s, the skip is 2. */
+ (skip = SHARP_S_SKIP))) {
+ if (tmp && (norun || regtry(prog, s)))
+ goto got_it;
+ else
+ tmp = doevery;
+ }
+ else
+ tmp = 1;
+ s += skip;
+ }
}
break;
case CANY:
@@ -4053,8 +4068,26 @@ S_regrepeat(pTHX_ regnode *p, I32 max)
case ANYOF:
if (do_utf8) {
loceol = PL_regeol;
- while (hardcount < max && scan < loceol &&
- reginclass(p, (U8*)scan, 0, do_utf8)) {
+ while (hardcount < max && scan < loceol) {
+ bool cont = FALSE;
+ if (ANYOF_FLAGS(p) & ANYOF_UNICODE) {
+ if (reginclass(p, (U8*)scan, 0, do_utf8))
+ cont = TRUE;
+ }
+ else {
+ U8 c = (U8)scan[0];
+
+ if (UTF8_IS_INVARIANT(c)) {
+ if (ANYOF_BITMAP_TEST(p, c))
+ cont = TRUE;
+ }
+ else {
+ if (reginclass(p, (U8*)scan, 0, do_utf8))
+ cont = TRUE;
+ }
+ }
+ if (!cont)
+ break;
scan += UTF8SKIP(scan);
hardcount++;
}