summaryrefslogtreecommitdiff
path: root/lib/str-kmp.h
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2011-01-21 13:40:28 +0100
committerBruno Haible <bruno@clisp.org>2011-01-21 13:40:28 +0100
commit1b1e945287b0e30a238102974f7a51f24def2a79 (patch)
tree8c185c4aecdd39fbe2d4425f752d2a03d640e8c1 /lib/str-kmp.h
parent76187a084bf2379a1577bf306b6742a447271a3c (diff)
downloadgnulib-1b1e945287b0e30a238102974f7a51f24def2a79.tar.gz
Prepare for faster uN_strstr functions.
* lib/str-kmp.h: Support definable UNITs. (knuth_morris_pratt): Renamed from knuth_morris_pratt_unibyte. Add needle_len argument. * lib/mbsstr.c (mbsstr): Adjust for the changed str-kmp.h. * lib/mbscasestr.c (mbscasestr): Likewise.
Diffstat (limited to 'lib/str-kmp.h')
-rw-r--r--lib/str-kmp.h32
1 files changed, 18 insertions, 14 deletions
diff --git a/lib/str-kmp.h b/lib/str-kmp.h
index 95a73f571c..d7e0a74541 100644
--- a/lib/str-kmp.h
+++ b/lib/str-kmp.h
@@ -1,4 +1,4 @@
-/* Substring search in a NUL terminated string of 'char' elements,
+/* Substring search in a NUL terminated string of UNIT elements,
using the Knuth-Morris-Pratt algorithm.
Copyright (C) 2005-2011 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2005.
@@ -18,21 +18,26 @@
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
/* Before including this file, you need to define:
+ UNIT The element type of the needle and haystack.
CANON_ELEMENT(c) A macro that canonicalizes an element right after
- it has been fetched from one of the two strings.
- The argument is an 'unsigned char'; the result
- must be an 'unsigned char' as well. */
+ it has been fetched from needle or haystack.
+ The argument is of type UNIT; the result must be
+ of type UNIT as well. */
/* Knuth-Morris-Pratt algorithm.
See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
+ HAYSTACK is the NUL terminated string in which to search for.
+ NEEDLE is the string to search for in HAYSTACK, consisting of NEEDLE_LEN
+ units.
Return a boolean indicating success:
Return true and set *RESULTP if the search was completed.
Return false if it was aborted because not enough memory was available. */
static bool
-knuth_morris_pratt_unibyte (const char *haystack, const char *needle,
- const char **resultp)
+knuth_morris_pratt (const UNIT *haystack,
+ const UNIT *needle, size_t needle_len,
+ const UNIT **resultp)
{
- size_t m = strlen (needle);
+ size_t m = needle_len;
/* Allocate the table. */
size_t *table = (size_t *) nmalloca (m, sizeof (size_t));
@@ -66,14 +71,14 @@ knuth_morris_pratt_unibyte (const char *haystack, const char *needle,
The inequality needle[x..i-1] != needle[0..i-1-x] is known to hold
for x < table[i-1], by induction.
Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1]. */
- unsigned char b = CANON_ELEMENT ((unsigned char) needle[i - 1]);
+ UNIT b = CANON_ELEMENT (needle[i - 1]);
for (;;)
{
/* Invariants: The inequality needle[x..i-1] != needle[0..i-1-x]
is known to hold for x < i-1-j.
Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1]. */
- if (b == CANON_ELEMENT ((unsigned char) needle[j]))
+ if (b == CANON_ELEMENT (needle[j]))
{
/* Set table[i] := i-1-j. */
table[i] = i - ++j;
@@ -108,17 +113,16 @@ knuth_morris_pratt_unibyte (const char *haystack, const char *needle,
/* Search, using the table to accelerate the processing. */
{
size_t j;
- const char *rhaystack;
- const char *phaystack;
+ const UNIT *rhaystack;
+ const UNIT *phaystack;
*resultp = NULL;
j = 0;
rhaystack = haystack;
phaystack = haystack;
/* Invariant: phaystack = rhaystack + j. */
- while (*phaystack != '\0')
- if (CANON_ELEMENT ((unsigned char) needle[j])
- == CANON_ELEMENT ((unsigned char) *phaystack))
+ while (*phaystack != 0)
+ if (CANON_ELEMENT (needle[j]) == CANON_ELEMENT (*phaystack))
{
j++;
phaystack++;