diff options
author | Pádraig Brady <P@draigBrady.com> | 2011-01-21 13:40:28 +0100 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2011-01-21 13:40:28 +0100 |
commit | 1b1e945287b0e30a238102974f7a51f24def2a79 (patch) | |
tree | 8c185c4aecdd39fbe2d4425f752d2a03d640e8c1 /lib/str-kmp.h | |
parent | 76187a084bf2379a1577bf306b6742a447271a3c (diff) | |
download | gnulib-1b1e945287b0e30a238102974f7a51f24def2a79.tar.gz |
Prepare for faster uN_strstr functions.
* lib/str-kmp.h: Support definable UNITs.
(knuth_morris_pratt): Renamed from knuth_morris_pratt_unibyte. Add
needle_len argument.
* lib/mbsstr.c (mbsstr): Adjust for the changed str-kmp.h.
* lib/mbscasestr.c (mbscasestr): Likewise.
Diffstat (limited to 'lib/str-kmp.h')
-rw-r--r-- | lib/str-kmp.h | 32 |
1 files changed, 18 insertions, 14 deletions
diff --git a/lib/str-kmp.h b/lib/str-kmp.h index 95a73f571c..d7e0a74541 100644 --- a/lib/str-kmp.h +++ b/lib/str-kmp.h @@ -1,4 +1,4 @@ -/* Substring search in a NUL terminated string of 'char' elements, +/* Substring search in a NUL terminated string of UNIT elements, using the Knuth-Morris-Pratt algorithm. Copyright (C) 2005-2011 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2005. @@ -18,21 +18,26 @@ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /* Before including this file, you need to define: + UNIT The element type of the needle and haystack. CANON_ELEMENT(c) A macro that canonicalizes an element right after - it has been fetched from one of the two strings. - The argument is an 'unsigned char'; the result - must be an 'unsigned char' as well. */ + it has been fetched from needle or haystack. + The argument is of type UNIT; the result must be + of type UNIT as well. */ /* Knuth-Morris-Pratt algorithm. See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm + HAYSTACK is the NUL terminated string in which to search for. + NEEDLE is the string to search for in HAYSTACK, consisting of NEEDLE_LEN + units. Return a boolean indicating success: Return true and set *RESULTP if the search was completed. Return false if it was aborted because not enough memory was available. */ static bool -knuth_morris_pratt_unibyte (const char *haystack, const char *needle, - const char **resultp) +knuth_morris_pratt (const UNIT *haystack, + const UNIT *needle, size_t needle_len, + const UNIT **resultp) { - size_t m = strlen (needle); + size_t m = needle_len; /* Allocate the table. */ size_t *table = (size_t *) nmalloca (m, sizeof (size_t)); @@ -66,14 +71,14 @@ knuth_morris_pratt_unibyte (const char *haystack, const char *needle, The inequality needle[x..i-1] != needle[0..i-1-x] is known to hold for x < table[i-1], by induction. Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1]. */ - unsigned char b = CANON_ELEMENT ((unsigned char) needle[i - 1]); + UNIT b = CANON_ELEMENT (needle[i - 1]); for (;;) { /* Invariants: The inequality needle[x..i-1] != needle[0..i-1-x] is known to hold for x < i-1-j. Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1]. */ - if (b == CANON_ELEMENT ((unsigned char) needle[j])) + if (b == CANON_ELEMENT (needle[j])) { /* Set table[i] := i-1-j. */ table[i] = i - ++j; @@ -108,17 +113,16 @@ knuth_morris_pratt_unibyte (const char *haystack, const char *needle, /* Search, using the table to accelerate the processing. */ { size_t j; - const char *rhaystack; - const char *phaystack; + const UNIT *rhaystack; + const UNIT *phaystack; *resultp = NULL; j = 0; rhaystack = haystack; phaystack = haystack; /* Invariant: phaystack = rhaystack + j. */ - while (*phaystack != '\0') - if (CANON_ELEMENT ((unsigned char) needle[j]) - == CANON_ELEMENT ((unsigned char) *phaystack)) + while (*phaystack != 0) + if (CANON_ELEMENT (needle[j]) == CANON_ELEMENT (*phaystack)) { j++; phaystack++; |