summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2018-02-01 20:49:03 -0700
committerKarl Williamson <khw@cpan.org>2018-02-01 21:37:19 -0700
commitc05cc3b625744ca35b9d2edaf1c108c901eaad86 (patch)
treef2b626ac51140b50d3c8439af4be639c09ff71e9 /regexec.c
parent8b37b57be5561e87de1003b536c9dfbfd6902d10 (diff)
downloadperl-c05cc3b625744ca35b9d2edaf1c108c901eaad86.tar.gz
Speed up finding non-UTF8 EXACTFish initial matches
find_byclass() is used to scan through a target string looking for something that matches a particular class. This commit speeds that up for patterns of the /foo/i type, where neither the target string nor the pattern are UTF-8. More precisely, it speeds up only finding the first byte of 'foo' in the string. The actual matching speed remains the same, once that initial character that is a potential match is found. But finding that first character is sped up immensely by this commit. It does this by using memchr() when the character is caseless. For example in the pattern /:abcde/i, the colon is first, and is caseless. On my system memchr is extremely fast, so the numbers below for this case may not be as good on other systems. And when the first character is cased, such as in /abcde/i, it uses the techniques added in 2813d4adc971fbaa124b5322d4bccaa73e9df8e2 for the ANYOFM regnode. In both ASCII and EBCDIC machines, the case folds of the cased letters are almost all of the form that these techniques work on. There are no tests in our current test suite that don't have this form. However, /il (locale) matching may very well violate this, and will use the per-byte scheme that has been in effect until this commit. The numbers below are for finding the first letter after a long string that doesn't include that character. Doing this isolates the speed up attributable to this commit from ovehead. The only downsides of this commit are that on some systems memchr() may introduce function call overhead that won't pay off if the next occurrence of the character is close by; and in the other case, a single extra conditional is required to determine if there is at least a word's worth of data to look at, plus some masking, shifting, and arithmetic instructions associated with that conditional. A static function is called, so there may or may not be function call overhead, depending on the compiler optimizer. Key: Ir Instruction read Dr Data read Dw Data write COND conditional branches IND indirect branches The numbers represent raw counts per loop iteration. caseless first letter ('b' x 10000) . ':' =~ /:a/i blead fast Ratio % -------- ------- ------- Ir 72109.0 4819.0 1496.3 Dr 20608.0 1237.5 1665.3 Dw 10409.0 409.5 2541.9 COND 20376.0 702.0 2902.6 IND 15.0 16.0 93.8 cased first letter ('b' x 10000) . 'a' =~ /A/i blead fast Ratio % -------- ------- ------- Ir 103074.0 25704.6 401.0 Dr 20896.5 2164.9 965.2 Dw 10587.5 601.9 1759.0 COND 30516.0 3036.2 1005.1 IND 22.0 22.0 100.0
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c62
1 files changed, 49 insertions, 13 deletions
diff --git a/regexec.c b/regexec.c
index d078c29b67..08bf7134cc 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1898,17 +1898,6 @@ STMT_START {
dump_exec_pos(li,s,(reginfo->strend),(reginfo->strbeg), \
startpos, doutf8, depth)
-#define REXEC_FBC_EXACTISH_SCAN(COND) \
-STMT_START { \
- while (s <= e) { \
- if ( (COND) \
- && (ln == 1 || folder(s+1, pat_string+1, ln-1))\
- && (reginfo->intuit || regtry(reginfo, &s)) )\
- goto got_it; \
- s++; \
- } \
-} STMT_END
-
#define REXEC_FBC_SCAN(UTF8, CODE) \
STMT_START { \
while (s < strend) { \
@@ -2348,10 +2337,57 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
c1 = *pat_string;
c2 = fold_array[c1];
if (c1 == c2) { /* If char and fold are the same */
- REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
+ while (s <= e) {
+ s = (char *) memchr(s, c1, e + 1 - s);
+ if (s == NULL) {
+ break;
+ }
+
+ /* Check that the rest of the node matches */
+ if ( (ln == 1 || folder(s + 1, pat_string + 1, ln - 1))
+ && (reginfo->intuit || regtry(reginfo, &s)) )
+ {
+ goto got_it;
+ }
+ s++;
+ }
}
else {
- REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
+ U8 bits_differing = c1 ^ c2;
+
+ /* If the folds differ in one bit position only, we can mask to
+ * match either of them, and can use this faster find method. Both
+ * ASCII and EBCDIC tend to have their case folds differ in only
+ * one position, so this is very likely */
+ if (LIKELY(PL_bitcount[bits_differing] == 1)) {
+ bits_differing = ~ bits_differing;
+ while (s <= e) {
+ s = find_next_masked(s, e + 1,
+ (c1 & bits_differing), bits_differing);
+ if (s > e) {
+ break;
+ }
+
+ if ( (ln == 1 || folder(s + 1, pat_string + 1, ln - 1))
+ && (reginfo->intuit || regtry(reginfo, &s)) )
+ {
+ goto got_it;
+ }
+ s++;
+ }
+ }
+ else { /* Otherwise, stuck with looking byte-at-a-time. This
+ should actually happen only in EXACTFL nodes */
+ while (s <= e) {
+ if ( (*(U8*)s == c1 || *(U8*)s == c2)
+ && (ln == 1 || folder(s + 1, pat_string + 1, ln - 1))
+ && (reginfo->intuit || regtry(reginfo, &s)) )
+ {
+ goto got_it;
+ }
+ s++;
+ }
+ }
}
break;