diff options
author | Bruno Haible <bruno@clisp.org> | 2008-01-11 03:57:18 +0100 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2008-01-11 03:57:18 +0100 |
commit | 321e8651592976988151877a5ad4a4d307ca1d94 (patch) | |
tree | 4967585a015e7160f2f7a4be069522f89cdbc710 /lib/c-strstr.c | |
parent | 4a3dfbf40eba25a6d355db2ae5ac721f862a0557 (diff) | |
download | gnulib-321e8651592976988151877a5ad4a4d307ca1d94.tar.gz |
Make c-strstr rely on strstr.
Diffstat (limited to 'lib/c-strstr.c')
-rw-r--r-- | lib/c-strstr.c | 105 |
1 files changed, 4 insertions, 101 deletions
diff --git a/lib/c-strstr.c b/lib/c-strstr.c index 47226c3d47..d903ec3ebf 100644 --- a/lib/c-strstr.c +++ b/lib/c-strstr.c @@ -1,5 +1,5 @@ /* c-strstr.c -- substring search in C locale - Copyright (C) 2005-2007 Free Software Foundation, Inc. + Copyright (C) 2005-2008 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2005, 2007. This program is free software: you can redistribute it and/or modify @@ -20,110 +20,13 @@ /* Specification. */ #include "c-strstr.h" -#include <stdbool.h> -#include <stdlib.h> #include <string.h> -#include "malloca.h" - -/* Knuth-Morris-Pratt algorithm. */ -#define CANON_ELEMENT(c) c -#include "str-kmp.h" - /* Find the first occurrence of NEEDLE in HAYSTACK. */ char * c_strstr (const char *haystack, const char *needle) { - /* Be careful not to look at the entire extent of haystack or needle - until needed. This is useful because of these two cases: - - haystack may be very long, and a match of needle found early, - - needle may be very long, and not even a short initial segment of - needle may be found in haystack. */ - if (*needle != '\0') - { - /* Minimizing the worst-case complexity: - Let n = strlen(haystack), m = strlen(needle). - The naïve algorithm is O(n*m) worst-case. - The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a - memory allocation. - To achieve linear complexity and yet amortize the cost of the memory - allocation, we activate the Knuth-Morris-Pratt algorithm only once - the naïve algorithm has already run for some time; more precisely, - when - - the outer loop count is >= 10, - - the average number of comparisons per outer loop is >= 5, - - the total number of comparisons is >= m. - But we try it only once. If the memory allocation attempt failed, - we don't retry it. */ - bool try_kmp = true; - size_t outer_loop_count = 0; - size_t comparison_count = 0; - size_t last_ccount = 0; /* last comparison count */ - const char *needle_last_ccount = needle; /* = needle + last_ccount */ - - /* Speed up the following searches of needle by caching its first - character. */ - unsigned char b = (unsigned char) *needle; - - needle++; - for (;; haystack++) - { - if (*haystack == '\0') - /* No match. */ - return NULL; - - /* See whether it's advisable to use an asymptotically faster - algorithm. */ - if (try_kmp - && outer_loop_count >= 10 - && comparison_count >= 5 * outer_loop_count) - { - /* See if needle + comparison_count now reaches the end of - needle. */ - if (needle_last_ccount != NULL) - { - needle_last_ccount += - strnlen (needle_last_ccount, comparison_count - last_ccount); - if (*needle_last_ccount == '\0') - needle_last_ccount = NULL; - last_ccount = comparison_count; - } - if (needle_last_ccount == NULL) - { - /* Try the Knuth-Morris-Pratt algorithm. */ - const char *result; - bool success = - knuth_morris_pratt_unibyte (haystack, needle - 1, &result); - if (success) - return (char *) result; - try_kmp = false; - } - } - - outer_loop_count++; - comparison_count++; - if ((unsigned char) *haystack == b) - /* The first character matches. */ - { - const char *rhaystack = haystack + 1; - const char *rneedle = needle; - - for (;; rhaystack++, rneedle++) - { - if (*rneedle == '\0') - /* Found a match. */ - return (char *) haystack; - if (*rhaystack == '\0') - /* No match. */ - return NULL; - comparison_count++; - if ((unsigned char) *rhaystack != (unsigned char) *rneedle) - /* Nothing in this round. */ - break; - } - } - } - } - else - return (char *) haystack; + /* POSIX says that strstr() interprets the strings as byte sequences, not + as character sequences in the current locale. */ + return strstr (haystack, needle); } |