summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2016-12-16 13:07:58 +0000
committerDavid Mitchell <davem@iabyn.com>2016-12-16 14:17:00 +0000
commit1451f692e6e77e59f92339a6e76b0adb7cf0d828 (patch)
tree6278a343f64985d8fbb508783a59c8f0a77e38d5 /regexec.c
parentfb910f2b12bd712c5e59f73232256f47c5e3ea2c (diff)
downloadperl-1451f692e6e77e59f92339a6e76b0adb7cf0d828.tar.gz
regexes: make scanning for ANYOF faster
Given a character class of random chars (like [acgt] say, rather than predefined ones like [\d], say), speed up the code in: 1) S_find_byclass(), which scans for the first char in the string that's in that class (e.g. /[acgt]...../), 2) S_regrepeat() which scans past all chars that are in that class (e.g. /....[acgt]+..../) by hoisting an unchanging test outside the main while loop. So this: while (s < end) { if (ANYOF_FLAGS(node)) match = reginclass(*s, ...); else match = ANYOF_BITMAP_TEST(*s, ...); ... } becomes this: if (ANYOF_FLAGS(node)) { while (s < end) { match = reginclass(*s, ...); ... } else while (s < end) { match = ANYOF_BITMAP_TEST(*s, ...); ... } } The average of the 3 tests added to t/perf/benchmarks by this commit show this change (raw numbers, lower better): before after -------- -------- Ir 3294.0 2763.0 Dr 900.7 802.3 Dw 356.0 390.0 COND 569.0 436.7 IND 11.0 11.0 COND_m 1.2 2.0 IND_m 7.3 7.3
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c15
1 files changed, 12 insertions, 3 deletions
diff --git a/regexec.c b/regexec.c
index 013ccc54a8..f6f293d56e 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1881,8 +1881,11 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
REXEC_FBC_UTF8_CLASS_SCAN(
reginclass(prog, c, (U8*)s, (U8*) strend, utf8_target));
}
+ else if (ANYOF_FLAGS(c)) {
+ REXEC_FBC_CLASS_SCAN(reginclass(prog,c, (U8*)s, (U8*)s+1, 0));
+ }
else {
- REXEC_FBC_CLASS_SCAN(REGINCLASS(prog, c, (U8*)s, 0));
+ REXEC_FBC_CLASS_SCAN(ANYOF_BITMAP_TEST(c, *((U8*)s)));
}
break;
@@ -8892,8 +8895,14 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
scan += UTF8SKIP(scan);
hardcount++;
}
- } else {
- while (scan < loceol && REGINCLASS(prog, p, (U8*)scan, 0))
+ }
+ else if (ANYOF_FLAGS(p)) {
+ while (scan < loceol
+ && reginclass(prog, p, (U8*)scan, (U8*)scan+1, 0))
+ scan++;
+ }
+ else {
+ while (scan < loceol && ANYOF_BITMAP_TEST(p, *((U8*)scan)))
scan++;
}
break;