summaryrefslogtreecommitdiff
path: root/t
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2016-12-16 13:07:58 +0000
committerDavid Mitchell <davem@iabyn.com>2016-12-16 14:17:00 +0000
commit1451f692e6e77e59f92339a6e76b0adb7cf0d828 (patch)
tree6278a343f64985d8fbb508783a59c8f0a77e38d5 /t
parentfb910f2b12bd712c5e59f73232256f47c5e3ea2c (diff)
downloadperl-1451f692e6e77e59f92339a6e76b0adb7cf0d828.tar.gz
regexes: make scanning for ANYOF faster
Given a character class of random chars (like [acgt] say, rather than predefined ones like [\d], say), speed up the code in: 1) S_find_byclass(), which scans for the first char in the string that's in that class (e.g. /[acgt]...../), 2) S_regrepeat() which scans past all chars that are in that class (e.g. /....[acgt]+..../) by hoisting an unchanging test outside the main while loop. So this: while (s < end) { if (ANYOF_FLAGS(node)) match = reginclass(*s, ...); else match = ANYOF_BITMAP_TEST(*s, ...); ... } becomes this: if (ANYOF_FLAGS(node)) { while (s < end) { match = reginclass(*s, ...); ... } else while (s < end) { match = ANYOF_BITMAP_TEST(*s, ...); ... } } The average of the 3 tests added to t/perf/benchmarks by this commit show this change (raw numbers, lower better): before after -------- -------- Ir 3294.0 2763.0 Dr 900.7 802.3 Dw 356.0 390.0 COND 569.0 436.7 IND 11.0 11.0 COND_m 1.2 2.0 IND_m 7.3 7.3
Diffstat (limited to 't')
-rw-r--r--t/perf/benchmarks17
1 files changed, 17 insertions, 0 deletions
diff --git a/t/perf/benchmarks b/t/perf/benchmarks
index 4a57175e1d..ac698500e3 100644
--- a/t/perf/benchmarks
+++ b/t/perf/benchmarks
@@ -1348,4 +1348,21 @@
setup => 'my $i = 0;',
code => 'while (++$i % 4) {}',
},
+
+
+ 'regex::anyof_plus::anchored' => {
+ desc => '/^[acgt]+/',
+ setup => '$_ = "a" x 100;',
+ code => '/^[acgt]+/',
+ },
+ 'regex::anyof_plus::floating' => {
+ desc => '/[acgt]+where match starts at position 0 for 100 chars/',
+ setup => '$_ = "a" x 100;',
+ code => '/[acgt]+/',
+ },
+ 'regex::anyof_plus::floating_away' => {
+ desc => '/[acgt]+/ where match starts at position 100 for 100 chars',
+ setup => '$_ = ("0" x 100) . ("a" x 100);',
+ code => '/[acgt]+/',
+ },
];