summaryrefslogtreecommitdiff
path: root/lib/c-strstr.c
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2008-01-11 03:57:18 +0100
committerBruno Haible <bruno@clisp.org>2008-01-11 03:57:18 +0100
commit321e8651592976988151877a5ad4a4d307ca1d94 (patch)
tree4967585a015e7160f2f7a4be069522f89cdbc710 /lib/c-strstr.c
parent4a3dfbf40eba25a6d355db2ae5ac721f862a0557 (diff)
downloadgnulib-321e8651592976988151877a5ad4a4d307ca1d94.tar.gz
Make c-strstr rely on strstr.
Diffstat (limited to 'lib/c-strstr.c')
-rw-r--r--lib/c-strstr.c105
1 files changed, 4 insertions, 101 deletions
diff --git a/lib/c-strstr.c b/lib/c-strstr.c
index 47226c3d47..d903ec3ebf 100644
--- a/lib/c-strstr.c
+++ b/lib/c-strstr.c
@@ -1,5 +1,5 @@
/* c-strstr.c -- substring search in C locale
- Copyright (C) 2005-2007 Free Software Foundation, Inc.
+ Copyright (C) 2005-2008 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2005, 2007.
This program is free software: you can redistribute it and/or modify
@@ -20,110 +20,13 @@
/* Specification. */
#include "c-strstr.h"
-#include <stdbool.h>
-#include <stdlib.h>
#include <string.h>
-#include "malloca.h"
-
-/* Knuth-Morris-Pratt algorithm. */
-#define CANON_ELEMENT(c) c
-#include "str-kmp.h"
-
/* Find the first occurrence of NEEDLE in HAYSTACK. */
char *
c_strstr (const char *haystack, const char *needle)
{
- /* Be careful not to look at the entire extent of haystack or needle
- until needed. This is useful because of these two cases:
- - haystack may be very long, and a match of needle found early,
- - needle may be very long, and not even a short initial segment of
- needle may be found in haystack. */
- if (*needle != '\0')
- {
- /* Minimizing the worst-case complexity:
- Let n = strlen(haystack), m = strlen(needle).
- The naïve algorithm is O(n*m) worst-case.
- The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a
- memory allocation.
- To achieve linear complexity and yet amortize the cost of the memory
- allocation, we activate the Knuth-Morris-Pratt algorithm only once
- the naïve algorithm has already run for some time; more precisely,
- when
- - the outer loop count is >= 10,
- - the average number of comparisons per outer loop is >= 5,
- - the total number of comparisons is >= m.
- But we try it only once. If the memory allocation attempt failed,
- we don't retry it. */
- bool try_kmp = true;
- size_t outer_loop_count = 0;
- size_t comparison_count = 0;
- size_t last_ccount = 0; /* last comparison count */
- const char *needle_last_ccount = needle; /* = needle + last_ccount */
-
- /* Speed up the following searches of needle by caching its first
- character. */
- unsigned char b = (unsigned char) *needle;
-
- needle++;
- for (;; haystack++)
- {
- if (*haystack == '\0')
- /* No match. */
- return NULL;
-
- /* See whether it's advisable to use an asymptotically faster
- algorithm. */
- if (try_kmp
- && outer_loop_count >= 10
- && comparison_count >= 5 * outer_loop_count)
- {
- /* See if needle + comparison_count now reaches the end of
- needle. */
- if (needle_last_ccount != NULL)
- {
- needle_last_ccount +=
- strnlen (needle_last_ccount, comparison_count - last_ccount);
- if (*needle_last_ccount == '\0')
- needle_last_ccount = NULL;
- last_ccount = comparison_count;
- }
- if (needle_last_ccount == NULL)
- {
- /* Try the Knuth-Morris-Pratt algorithm. */
- const char *result;
- bool success =
- knuth_morris_pratt_unibyte (haystack, needle - 1, &result);
- if (success)
- return (char *) result;
- try_kmp = false;
- }
- }
-
- outer_loop_count++;
- comparison_count++;
- if ((unsigned char) *haystack == b)
- /* The first character matches. */
- {
- const char *rhaystack = haystack + 1;
- const char *rneedle = needle;
-
- for (;; rhaystack++, rneedle++)
- {
- if (*rneedle == '\0')
- /* Found a match. */
- return (char *) haystack;
- if (*rhaystack == '\0')
- /* No match. */
- return NULL;
- comparison_count++;
- if ((unsigned char) *rhaystack != (unsigned char) *rneedle)
- /* Nothing in this round. */
- break;
- }
- }
- }
- }
- else
- return (char *) haystack;
+ /* POSIX says that strstr() interprets the strings as byte sequences, not
+ as character sequences in the current locale. */
+ return strstr (haystack, needle);
}