summaryrefslogtreecommitdiff
path: root/proto.h
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2017-12-29 13:35:00 -0700
committerKarl Williamson <khw@cpan.org>2017-12-29 22:45:14 -0700
commitaff4cafe362e55c7722ba12952e287a7d1770cb9 (patch)
treee2b958277f68493fce42e719a40185cca03910a8 /proto.h
parent39d242201ac8d2ef276274c2fd8a03ab861a0916 (diff)
downloadperl-aff4cafe362e55c7722ba12952e287a7d1770cb9.tar.gz
Use new regnodes for /[[:ascii:]]/
Prior to this commit, the ASCII Posix class was treated as any other Posix class under /a. It turns out that by making separate nodes for it (and its complement), the performance gains can be astronomical on ASCII platforms. This is mainly due to the fact that scanning can be done word-at-a-time, but also because various conditionals can be skipped. Below are some measurements from Porting/bench.pl on a 64-bit Linux g++ -O2 system.The numbers are for very long strings, as otherwise, the delta due solely to this change is masked by the overhead around pattern matching in general. These numbers are for finding an ASCII character at the end of a very long string of non-ASCII ones. All 1-byte (non-utf8) characters improvement ratio Ir 1990.8 Dr 2995.4 Dw 17296.7 COND 786.0 All 2-byte characters improvement ratio Ir 242.2 Dr 232.9 Dw 17237.8 COND 100.0 The numbers for three and more bytes per character are essentially the same as for two bytes. As an example, the Dw for strings consisting entirely of 2-byte characters (before the terminal ASCII one) now take 584 instructions, and previously 100,582 The gain would be less for a 32-bit system. bench.pl returns other measurements which I omitted above, because they either have unchanged performance or involve a trivial number of total instructions.
Diffstat (limited to 'proto.h')
-rw-r--r--proto.h10
1 files changed, 10 insertions, 0 deletions
diff --git a/proto.h b/proto.h
index 9c501978bc..3e4032b0e4 100644
--- a/proto.h
+++ b/proto.h
@@ -5553,6 +5553,16 @@ STATIC char* S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, cons
#define PERL_ARGS_ASSERT_FIND_BYCLASS \
assert(prog); assert(c); assert(s); assert(strend)
+STATIC char * S_find_next_ascii(char* s, const char * send, const bool is_utf8)
+ __attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_FIND_NEXT_ASCII \
+ assert(s); assert(send)
+
+STATIC char * S_find_next_non_ascii(char* s, const char * send, const bool is_utf8)
+ __attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_FIND_NEXT_NON_ASCII \
+ assert(s); assert(send)
+
STATIC bool S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_ISFOO_UTF8_LC \