diff options
Diffstat (limited to 'Objects/stringlib/fastsearch.h')
-rw-r--r-- | Objects/stringlib/fastsearch.h | 70 |
1 files changed, 67 insertions, 3 deletions
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h index e231c587e4..55ac77dd70 100644 --- a/Objects/stringlib/fastsearch.h +++ b/Objects/stringlib/fastsearch.h @@ -1,6 +1,5 @@ /* stringlib: fastsearch implementation */ -#ifndef STRINGLIB_FASTSEARCH_H #define STRINGLIB_FASTSEARCH_H /* fast search/count implementation, based on a mix between boyer- @@ -33,8 +32,56 @@ #define STRINGLIB_BLOOM(mask, ch) \ ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) + +Py_LOCAL_INLINE(Py_ssize_t) +STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n, + STRINGLIB_CHAR ch, unsigned char needle, + Py_ssize_t maxcount, int mode) +{ + if (mode == FAST_SEARCH) { + const STRINGLIB_CHAR *ptr = s; + const STRINGLIB_CHAR *e = s + n; + while (ptr < e) { + void *candidate = memchr((const void *) ptr, needle, (e - ptr) * sizeof(STRINGLIB_CHAR)); + if (candidate == NULL) + return -1; + ptr = (const STRINGLIB_CHAR *) _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); + if (sizeof(STRINGLIB_CHAR) == 1 || *ptr == ch) + return (ptr - s); + /* False positive */ + ptr++; + } + return -1; + } +#ifdef HAVE_MEMRCHR + /* memrchr() is a GNU extension, available since glibc 2.1.91. + it doesn't seem as optimized as memchr(), but is still quite + faster than our hand-written loop in FASTSEARCH below */ + else if (mode == FAST_RSEARCH) { + while (n > 0) { + const STRINGLIB_CHAR *found; + void *candidate = memrchr((const void *) s, needle, n * sizeof(STRINGLIB_CHAR)); + if (candidate == NULL) + return -1; + found = (const STRINGLIB_CHAR *) _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); + n = found - s; + if (sizeof(STRINGLIB_CHAR) == 1 || *found == ch) + return n; + /* False positive */ + } + return -1; + } +#endif + else { + assert(0); /* Should never get here */ + return 0; + } + +#undef DO_MEMCHR +} + Py_LOCAL_INLINE(Py_ssize_t) -fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, +FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, const STRINGLIB_CHAR* p, Py_ssize_t m, Py_ssize_t maxcount, int mode) { @@ -52,6 +99,24 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, if (m <= 0) return -1; /* use special case for 1-character strings */ + if (n > 10 && (mode == FAST_SEARCH +#ifdef HAVE_MEMRCHR + || mode == FAST_RSEARCH +#endif + )) { + /* use memchr if we can choose a needle without two many likely + false positives */ + unsigned char needle; + needle = p[0] & 0xff; +#if STRINGLIB_SIZEOF_CHAR > 1 + /* If looking for a multiple of 256, we'd have too + many false positives looking for the '\0' byte in UCS2 + and UCS4 representations. */ + if (needle != 0) +#endif + return STRINGLIB(fastsearch_memchr_1char) + (s, n, p[0], needle, maxcount, mode); + } if (mode == FAST_COUNT) { for (i = 0; i < n; i++) if (s[i] == p[0]) { @@ -157,4 +222,3 @@ fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n, return count; } -#endif |