diff options
author | Simon Josefsson <simon@josefsson.org> | 2008-01-19 12:17:35 +0100 |
---|---|---|
committer | Simon Josefsson <simon@josefsson.org> | 2008-01-19 12:17:35 +0100 |
commit | 436934ddefdcedcc2c86cbb72e585498953889de (patch) | |
tree | 6d03193dfcf729a4e0da2f8d1bf4f3805e29ca01 /lgl | |
parent | 55ea8291ea5c9c0c55a2125525085ec5aec8c1ac (diff) | |
download | gnutls-436934ddefdcedcc2c86cbb72e585498953889de.tar.gz |
Update gnulib files.
Diffstat (limited to 'lgl')
-rw-r--r-- | lgl/Makefile.am | 8 | ||||
-rw-r--r-- | lgl/gc-libgcrypt.c | 5 | ||||
-rw-r--r-- | lgl/m4/gnulib-comp.m4 | 1 | ||||
-rw-r--r-- | lgl/m4/string_h.m4 | 10 | ||||
-rw-r--r-- | lgl/memmem.c | 355 | ||||
-rw-r--r-- | lgl/stdio.in.h | 4 | ||||
-rw-r--r-- | lgl/str-two-way.h | 427 | ||||
-rw-r--r-- | lgl/string.in.h | 48 | ||||
-rw-r--r-- | lgl/vasnprintf.h | 4 |
9 files changed, 492 insertions, 370 deletions
diff --git a/lgl/Makefile.am b/lgl/Makefile.am index 51974b107b..a545762b05 100644 --- a/lgl/Makefile.am +++ b/lgl/Makefile.am @@ -251,7 +251,7 @@ EXTRA_liblgnu_la_SOURCES += memcmp.c ## begin gnulib module memmem-simple -EXTRA_DIST += memmem.c +EXTRA_DIST += memmem.c str-two-way.h EXTRA_liblgnu_la_SOURCES += memmem.c @@ -500,9 +500,11 @@ string.h: string.in.h -e 's|@''GNULIB_STRNLEN''@|$(GNULIB_STRNLEN)|g' \ -e 's|@''GNULIB_STRPBRK''@|$(GNULIB_STRPBRK)|g' \ -e 's|@''GNULIB_STRSEP''@|$(GNULIB_STRSEP)|g' \ + -e 's|@''GNULIB_STRSTR''@|$(GNULIB_STRSTR)|g' \ -e 's|@''GNULIB_STRCASESTR''@|$(GNULIB_STRCASESTR)|g' \ -e 's|@''GNULIB_STRTOK_R''@|$(GNULIB_STRTOK_R)|g' \ -e 's|@''GNULIB_STRERROR''@|$(GNULIB_STRERROR)|g' \ + -e 's|@''GNULIB_STRSIGNAL''@|$(GNULIB_STRSIGNAL)|g' \ -e 's|@''HAVE_DECL_MEMMEM''@|$(HAVE_DECL_MEMMEM)|g' \ -e 's|@''HAVE_MEMPCPY''@|$(HAVE_MEMPCPY)|g' \ -e 's|@''HAVE_DECL_MEMRCHR''@|$(HAVE_DECL_MEMRCHR)|g' \ @@ -518,8 +520,12 @@ string.h: string.in.h -e 's|@''HAVE_STRCASESTR''@|$(HAVE_STRCASESTR)|g' \ -e 's|@''HAVE_DECL_STRTOK_R''@|$(HAVE_DECL_STRTOK_R)|g' \ -e 's|@''HAVE_DECL_STRERROR''@|$(HAVE_DECL_STRERROR)|g' \ + -e 's|@''HAVE_DECL_STRSIGNAL''@|$(HAVE_DECL_STRSIGNAL)|g' \ -e 's|@''REPLACE_MEMMEM''@|$(REPLACE_MEMMEM)|g' \ + -e 's|@''REPLACE_STRCASESTR''@|$(REPLACE_STRCASESTR)|g' \ + -e 's|@''REPLACE_STRSTR''@|$(REPLACE_STRSTR)|g' \ -e 's|@''REPLACE_STRERROR''@|$(REPLACE_STRERROR)|g' \ + -e 's|@''REPLACE_STRSIGNAL''@|$(REPLACE_STRSIGNAL)|g' \ -e '/definition of GL_LINK_WARNING/r $(LINK_WARNING_H)' \ < $(srcdir)/string.in.h; \ } > $@-t diff --git a/lgl/gc-libgcrypt.c b/lgl/gc-libgcrypt.c index 824a7750dc..73c81254ea 100644 --- a/lgl/gc-libgcrypt.c +++ b/lgl/gc-libgcrypt.c @@ -1,5 +1,5 @@ /* gc-libgcrypt.c --- Crypto wrappers around Libgcrypt for GC. - * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Simon Josefsson + * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Simon Josefsson * * This file is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -46,6 +46,9 @@ gc_init (void) err = gcry_control (GCRYCTL_ANY_INITIALIZATION_P); if (err == GPG_ERR_NO_ERROR) { + if (gcry_control (GCRYCTL_DISABLE_SECMEM, NULL, 0)) + return GC_INIT_ERROR; + if (gcry_check_version (GCRYPT_VERSION) == NULL) return GC_INIT_ERROR; diff --git a/lgl/m4/gnulib-comp.m4 b/lgl/m4/gnulib-comp.m4 index 5b51020aa1..7acee94f64 100644 --- a/lgl/m4/gnulib-comp.m4 +++ b/lgl/m4/gnulib-comp.m4 @@ -279,6 +279,7 @@ AC_DEFUN([lgl_FILE_LIST], [ lib/stdint.in.h lib/stdio.in.h lib/stdlib.in.h + lib/str-two-way.h lib/string.in.h lib/strverscmp.c lib/strverscmp.h diff --git a/lgl/m4/string_h.m4 b/lgl/m4/string_h.m4 index 99a0dabbcb..766d7e9849 100644 --- a/lgl/m4/string_h.m4 +++ b/lgl/m4/string_h.m4 @@ -1,11 +1,11 @@ # Configure a GNU-like replacement for <string.h>. -# Copyright (C) 2007 Free Software Foundation, Inc. +# Copyright (C) 2007, 2008 Free Software Foundation, Inc. # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. -# serial 2 +# serial 4 # Written by Paul Eggert. @@ -43,6 +43,7 @@ AC_DEFUN([gl_HEADER_STRING_H_DEFAULTS], GNULIB_STRNLEN=0; AC_SUBST([GNULIB_STRNLEN]) GNULIB_STRPBRK=0; AC_SUBST([GNULIB_STRPBRK]) GNULIB_STRSEP=0; AC_SUBST([GNULIB_STRSEP]) + GNULIB_STRSTR=0; AC_SUBST([GNULIB_STRSTR]) GNULIB_STRCASESTR=0; AC_SUBST([GNULIB_STRCASESTR]) GNULIB_STRTOK_R=0; AC_SUBST([GNULIB_STRTOK_R]) GNULIB_MBSLEN=0; AC_SUBST([GNULIB_MBSLEN]) @@ -60,6 +61,7 @@ AC_DEFUN([gl_HEADER_STRING_H_DEFAULTS], GNULIB_MBSSEP=0; AC_SUBST([GNULIB_MBSSEP]) GNULIB_MBSTOK_R=0; AC_SUBST([GNULIB_MBSTOK_R]) GNULIB_STRERROR=0; AC_SUBST([GNULIB_STRERROR]) + GNULIB_STRSIGNAL=0; AC_SUBST([GNULIB_STRSIGNAL]) dnl Assume proper GNU behavior unless another module says otherwise. HAVE_DECL_MEMMEM=1; AC_SUBST([HAVE_DECL_MEMMEM]) HAVE_MEMPCPY=1; AC_SUBST([HAVE_MEMPCPY]) @@ -76,6 +78,10 @@ AC_DEFUN([gl_HEADER_STRING_H_DEFAULTS], HAVE_STRCASESTR=1; AC_SUBST([HAVE_STRCASESTR]) HAVE_DECL_STRTOK_R=1; AC_SUBST([HAVE_DECL_STRTOK_R]) HAVE_DECL_STRERROR=1; AC_SUBST([HAVE_DECL_STRERROR]) + HAVE_DECL_STRSIGNAL=1; AC_SUBST([HAVE_DECL_STRSIGNAL]) REPLACE_STRERROR=0; AC_SUBST([REPLACE_STRERROR]) + REPLACE_STRSIGNAL=0; AC_SUBST([REPLACE_STRSIGNAL]) REPLACE_MEMMEM=0; AC_SUBST([REPLACE_MEMMEM]) + REPLACE_STRCASESTR=0; AC_SUBST([REPLACE_STRCASESTR]) + REPLACE_STRSTR=0; AC_SUBST([REPLACE_STRSTR]) ]) diff --git a/lgl/memmem.c b/lgl/memmem.c index c5dbd613fa..e4816ed1a4 100644 --- a/lgl/memmem.c +++ b/lgl/memmem.c @@ -25,361 +25,13 @@ /* Specification of memmem. */ #include <string.h> -#include <limits.h> -#include <stddef.h> -#include <stdint.h> - #ifndef _LIBC # define __builtin_expect(expr, val) (expr) #endif -/* We use the Two-Way string matching algorithm, which guarantees - linear complexity with constant space. Additionally, for long - needles, we also use a bad character shift table similar to the - Boyer-Moore algorithm to achieve sub-linear performance. - - See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 - and http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm -*/ - -/* Point at which computing a bad-byte shift table is likely to be - worthwhile. Small needles should not compute a table, since it - adds (1 << CHAR_BIT) + NEEDLE_LEN computations of preparation for a - speedup no greater than a factor of NEEDLE_LEN. The larger the - needle, the better the potential performance gain. On the other - hand, on non-POSIX systems with CHAR_BIT larger than eight, the - memory required for the table is prohibitive. */ -#if CHAR_BIT < 10 -# define LONG_NEEDLE_THRESHOLD 32U -#else -# define LONG_NEEDLE_THRESHOLD SIZE_MAX -#endif - -#define MAX(a, b) ((a < b) ? (b) : (a)) - -/* Perform a critical factorization of NEEDLE, of length NEEDLE_LEN. - Return the index of the first byte in the right half, and set - *PERIOD to the global period of the right half. - - The global period of a string is the smallest index (possibly its - length) at which all remaining bytes in the string are repetitions - of the prefix (the last repetition may be a subset of the prefix). - - When NEEDLE is factored into two halves, a local period is the - length of the smallest word that shares a suffix with the left half - and shares a prefix with the right half. All factorizations of a - non-empty NEEDLE have a local period of at least 1 and no greater - than NEEDLE_LEN. - - A critical factorization has the property that the local period - equals the global period. All strings have at least one critical - factorization with the left half smaller than the global period. - - Given an ordered alphabet, a critical factorization can be computed - in linear time, with 2 * NEEDLE_LEN comparisons, by computing the - larger of two ordered maximal suffixes. The ordered maximal - suffixes are determined by lexicographic comparison of - periodicity. */ -static size_t -critical_factorization (const unsigned char *needle, size_t needle_len, - size_t *period) -{ - /* Index of last byte of left half, or SIZE_MAX. */ - size_t max_suffix, max_suffix_rev; - size_t j; /* Index into NEEDLE for current candidate suffix. */ - size_t k; /* Offset into current period. */ - size_t p; /* Intermediate period. */ - unsigned char a, b; /* Current comparison bytes. */ - - /* Invariants: - 0 <= j < NEEDLE_LEN - 1 - -1 <= max_suffix{,_rev} < j (treating SIZE_MAX as if it were signed) - min(max_suffix, max_suffix_rev) < global period of NEEDLE - 1 <= p <= global period of NEEDLE - p == global period of the substring NEEDLE[max_suffix{,_rev}+1...j] - 1 <= k <= p - */ - - /* Perform lexicographic search. */ - max_suffix = SIZE_MAX; - j = 0; - k = p = 1; - while (j + k < needle_len) - { - a = needle[j + k]; - b = needle[max_suffix + k]; - if (a < b) - { - /* Suffix is smaller, period is entire prefix so far. */ - j += k; - k = 1; - p = j - max_suffix; - } - else if (a == b) - { - /* Advance through repetition of the current period. */ - if (k != p) - ++k; - else - { - j += p; - k = 1; - } - } - else /* b < a */ - { - /* Suffix is larger, start over from current location. */ - max_suffix = j++; - k = p = 1; - } - } - *period = p; - - /* Perform reverse lexicographic search. */ - max_suffix_rev = SIZE_MAX; - j = 0; - k = p = 1; - while (j + k < needle_len) - { - a = needle[j + k]; - b = needle[max_suffix_rev + k]; - if (b < a) - { - /* Suffix is smaller, period is entire prefix so far. */ - j += k; - k = 1; - p = j - max_suffix_rev; - } - else if (a == b) - { - /* Advance through repetition of the current period. */ - if (k != p) - ++k; - else - { - j += p; - k = 1; - } - } - else /* a < b */ - { - /* Suffix is larger, start over from current location. */ - max_suffix_rev = j++; - k = p = 1; - } - } - - /* Choose the longer suffix. Return the first byte of the right - half, rather than the last byte of the left half. */ - if (max_suffix_rev + 1 < max_suffix + 1) - return max_suffix + 1; - *period = p; - return max_suffix_rev + 1; -} - -/* Return the first location of NEEDLE within HAYSTACK, or NULL. This - method requires 0 < NEEDLE_LEN <= HAYSTACK_LEN, and is optimized - for NEEDLE_LEN < LONG_NEEDLE_THRESHOLD. Performance is linear, - with 2 * NEEDLE_LEN comparisons in preparation, and at most 2 * - HAYSTACK_LEN - NEEDLE_LEN comparisons in searching. */ -static void * -two_way_short_needle (const unsigned char *haystack, size_t haystack_len, - const unsigned char *needle, size_t needle_len) -{ - size_t i; /* Index into current byte of NEEDLE. */ - size_t j; /* Index into current window of HAYSTACK. */ - size_t period; /* The period of the right half of needle. */ - size_t suffix; /* The index of the right half of needle. */ - - /* Factor the needle into two halves, such that the left half is - smaller than the global period, and the right half is - periodic (with a period as large as NEEDLE_LEN - suffix). */ - suffix = critical_factorization (needle, needle_len, &period); - - /* Perform the search. Each iteration compares the right half - first. */ - if (memcmp (needle, needle + period, suffix) == 0) - { - /* Entire needle is periodic; a mismatch can only advance by the - period, so use memory to avoid rescanning known occurrences - of the period. */ - size_t memory = 0; - j = 0; - while (j <= haystack_len - needle_len) - { - /* Scan for matches in right half. */ - i = MAX (suffix, memory); - while (i < needle_len && needle[i] == haystack[i + j]) - ++i; - if (needle_len <= i) - { - /* Scan for matches in left half. */ - i = suffix - 1; - while (memory < i + 1 && needle[i] == haystack[i + j]) - --i; - if (i + 1 < memory + 1) - return (void *) (haystack + j); - /* No match, so remember how many repetitions of period - on the right half were scanned. */ - j += period; - memory = needle_len - period; - } - else - { - j += i - suffix + 1; - memory = 0; - } - } - } - else - { - /* The two halves of needle are distinct; no extra memory is - required, and any mismatch results in a maximal shift. */ - period = MAX (suffix, needle_len - suffix) + 1; - j = 0; - while (j <= haystack_len - needle_len) - { - /* Scan for matches in right half. */ - i = suffix; - while (i < needle_len && needle[i] == haystack[i + j]) - ++i; - if (needle_len <= i) - { - /* Scan for matches in left half. */ - i = suffix - 1; - while (i != SIZE_MAX && needle[i] == haystack[i + j]) - --i; - if (i == SIZE_MAX) - return (void *) (haystack + j); - j += period; - } - else - j += i - suffix + 1; - } - } - return NULL; -} - -/* Return the first location of NEEDLE within HAYSTACK, or NULL. This - method requires 0 < NEEDLE_LEN <= HAYSTACK_LEN, and is optimized - for LONG_NEEDLE_THRESHOLD <= NEEDLE_LEN. Performance is linear, - with 3 * NEEDLE_LEN + (1U << CHAR_BIT) operations in preparation, - and at most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons in searching. - The extra initialization cost allows for potential sublinear - performance O(HAYSTACK_LEN / NEEDLE_LEN). */ -static void * -two_way_long_needle (const unsigned char *haystack, size_t haystack_len, - const unsigned char *needle, size_t needle_len) -{ - size_t i; /* Index into current byte of NEEDLE. */ - size_t j; /* Index into current window of HAYSTACK. */ - size_t period; /* The period of the right half of needle. */ - size_t suffix; /* The index of the right half of needle. */ - size_t shift_table[1U << CHAR_BIT]; /* See below. */ - - /* Factor the needle into two halves, such that the left half is - smaller than the global period, and the right half is - periodic (with a period as large as NEEDLE_LEN - suffix). */ - suffix = critical_factorization (needle, needle_len, &period); - - /* Populate shift_table. For each possible byte value c, - shift_table[c] is the distance from the last occurrence of c to - the end of NEEDLE, or NEEDLE_LEN if c is absent from the NEEDLE. - shift_table[NEEDLE[NEEDLE_LEN - 1]] contains the only 0. */ - for (i = 0; i < 1U << CHAR_BIT; i++) - shift_table[i] = needle_len; - for (i = 0; i < needle_len; i++) - shift_table[needle[i]] = needle_len - i - 1; - - /* Perform the search. Each iteration compares the right half - first. */ - if (memcmp (needle, needle + period, suffix) == 0) - { - /* Entire needle is periodic; a mismatch can only advance by the - period, so use memory to avoid rescanning known occurrences - of the period. */ - size_t memory = 0; - j = 0; - while (j <= haystack_len - needle_len) - { - /* Check the last byte first; if it does not match, then - shift to the next possible match location. */ - size_t shift = shift_table[haystack[j + needle_len - 1]]; - if (0 < shift) - { - if (memory && shift < period) - { - /* Since needle is periodic, but the last period has - a byte out of place, there can be no match until - after the mismatch. */ - shift = needle_len - period; - memory = 0; - } - j += shift; - continue; - } - /* Scan for matches in right half. The last byte has - already been matched, by virtue of the shift table. */ - i = MAX (suffix, memory); - while (i < needle_len - 1 && needle[i] == haystack[i + j]) - ++i; - if (needle_len - 1 <= i) - { - /* Scan for matches in left half. */ - i = suffix - 1; - while (memory < i + 1 && needle[i] == haystack[i + j]) - --i; - if (i + 1 < memory + 1) - return (void *) (haystack + j); - /* No match, so remember how many repetitions of period - on the right half were scanned. */ - j += period; - memory = needle_len - period; - } - else - { - j += i - suffix + 1; - memory = 0; - } - } - } - else - { - /* The two halves of needle are distinct; no extra memory is - required, and any mismatch results in a maximal shift. */ - period = MAX (suffix, needle_len - suffix) + 1; - j = 0; - while (j <= haystack_len - needle_len) - { - /* Check the last byte first; if it does not match, then - shift to the next possible match location. */ - size_t shift = shift_table[haystack[j + needle_len - 1]]; - if (0 < shift) - { - j += shift; - continue; - } - /* Scan for matches in right half. The last byte has - already been matched, by virtue of the shift table. */ - i = suffix; - while (i < needle_len - 1 && needle[i] == haystack[i + j]) - ++i; - if (needle_len - 1 <= i) - { - /* Scan for matches in left half. */ - i = suffix - 1; - while (i != SIZE_MAX && needle[i] == haystack[i + j]) - --i; - if (i == SIZE_MAX) - return (void *) (haystack + j); - j += period; - } - else - j += i - suffix + 1; - } - } - return NULL; -} +#define RETURN_TYPE void * +#define AVAILABLE(h, h_l, j, n_l) ((j) <= (h_l) - (n_l)) +#include "str-two-way.h" /* Return the first occurrence of NEEDLE in HAYSTACK. Return HAYSTACK if NEEDLE_LEN is 0, otherwise NULL if NEEDLE is not found in @@ -422,4 +74,3 @@ memmem (const void *haystack_start, size_t haystack_len, } #undef LONG_NEEDLE_THRESHOLD -#undef MAX diff --git a/lgl/stdio.in.h b/lgl/stdio.in.h index b17642438d..d5962aa4fc 100644 --- a/lgl/stdio.in.h +++ b/lgl/stdio.in.h @@ -1,6 +1,6 @@ /* A GNU-like <stdio.h>. - Copyright (C) 2004, 2007 Free Software Foundation, Inc. + Copyright (C) 2004, 2007-2008 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by @@ -45,7 +45,7 @@ #ifndef __attribute__ /* This feature is available in gcc versions 2.5 and later. */ -# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5) || __STRICT_ANSI__ +# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5) # define __attribute__(Spec) /* empty */ # endif /* The __-protected variants of `format' and `printf' attributes diff --git a/lgl/str-two-way.h b/lgl/str-two-way.h new file mode 100644 index 0000000000..846824506c --- /dev/null +++ b/lgl/str-two-way.h @@ -0,0 +1,427 @@ +/* Byte-wise substring search, using the Two-Way algorithm. + Copyright (C) 2008 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Written by Eric Blake <ebb9@byu.net>, 2008. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License along + with this program; if not, write to the Free Software Foundation, + Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + +/* Before including this file, you need to include <config.h> and + <string.h>, and define: + RESULT_TYPE A macro that expands to the return type. + AVAILABLE(h, h_l, j, n_l) + A macro that returns nonzero if there are + at least N_L bytes left starting at H[J]. + H is 'unsigned char *', H_L, J, and N_L + are 'size_t'; H_L is an lvalue. For + NUL-terminated searches, H_L can be + modified each iteration to avoid having + to compute the end of H up front. + + For case-insensitivity, you may optionally define: + CMP_FUNC(p1, p2, l) A macro that returns 0 iff the first L + characters of P1 and P2 are equal. + CANON_ELEMENT(c) A macro that canonicalizes an element right after + it has been fetched from one of the two strings. + The argument is an 'unsigned char'; the result + must be an 'unsigned char' as well. + + This file undefines the macros documented above, and defines + LONG_NEEDLE_THRESHOLD. +*/ + +#include <limits.h> +#include <stdint.h> + +/* We use the Two-Way string matching algorithm, which guarantees + linear complexity with constant space. Additionally, for long + needles, we also use a bad character shift table similar to the + Boyer-Moore algorithm to achieve improved (potentially sub-linear) + performance. + + See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 + and http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm +*/ + +/* Point at which computing a bad-byte shift table is likely to be + worthwhile. Small needles should not compute a table, since it + adds (1 << CHAR_BIT) + NEEDLE_LEN computations of preparation for a + speedup no greater than a factor of NEEDLE_LEN. The larger the + needle, the better the potential performance gain. On the other + hand, on non-POSIX systems with CHAR_BIT larger than eight, the + memory required for the table is prohibitive. */ +#if CHAR_BIT < 10 +# define LONG_NEEDLE_THRESHOLD 32U +#else +# define LONG_NEEDLE_THRESHOLD SIZE_MAX +#endif + +#define MAX(a, b) ((a < b) ? (b) : (a)) + +#ifndef CANON_ELEMENT +# define CANON_ELEMENT(c) c +#endif +#ifndef CMP_FUNC +# define CMP_FUNC memcmp +#endif + +/* Perform a critical factorization of NEEDLE, of length NEEDLE_LEN. + Return the index of the first byte in the right half, and set + *PERIOD to the global period of the right half. + + The global period of a string is the smallest index (possibly its + length) at which all remaining bytes in the string are repetitions + of the prefix (the last repetition may be a subset of the prefix). + + When NEEDLE is factored into two halves, a local period is the + length of the smallest word that shares a suffix with the left half + and shares a prefix with the right half. All factorizations of a + non-empty NEEDLE have a local period of at least 1 and no greater + than NEEDLE_LEN. + + A critical factorization has the property that the local period + equals the global period. All strings have at least one critical + factorization with the left half smaller than the global period. + + Given an ordered alphabet, a critical factorization can be computed + in linear time, with 2 * NEEDLE_LEN comparisons, by computing the + larger of two ordered maximal suffixes. The ordered maximal + suffixes are determined by lexicographic comparison of + periodicity. */ +static size_t +critical_factorization (const unsigned char *needle, size_t needle_len, + size_t *period) +{ + /* Index of last byte of left half, or SIZE_MAX. */ + size_t max_suffix, max_suffix_rev; + size_t j; /* Index into NEEDLE for current candidate suffix. */ + size_t k; /* Offset into current period. */ + size_t p; /* Intermediate period. */ + unsigned char a, b; /* Current comparison bytes. */ + + /* Invariants: + 0 <= j < NEEDLE_LEN - 1 + -1 <= max_suffix{,_rev} < j (treating SIZE_MAX as if it were signed) + min(max_suffix, max_suffix_rev) < global period of NEEDLE + 1 <= p <= global period of NEEDLE + p == global period of the substring NEEDLE[max_suffix{,_rev}+1...j] + 1 <= k <= p + */ + + /* Perform lexicographic search. */ + max_suffix = SIZE_MAX; + j = 0; + k = p = 1; + while (j + k < needle_len) + { + a = CANON_ELEMENT (needle[j + k]); + b = CANON_ELEMENT (needle[max_suffix + k]); + if (a < b) + { + /* Suffix is smaller, period is entire prefix so far. */ + j += k; + k = 1; + p = j - max_suffix; + } + else if (a == b) + { + /* Advance through repetition of the current period. */ + if (k != p) + ++k; + else + { + j += p; + k = 1; + } + } + else /* b < a */ + { + /* Suffix is larger, start over from current location. */ + max_suffix = j++; + k = p = 1; + } + } + *period = p; + + /* Perform reverse lexicographic search. */ + max_suffix_rev = SIZE_MAX; + j = 0; + k = p = 1; + while (j + k < needle_len) + { + a = CANON_ELEMENT (needle[j + k]); + b = CANON_ELEMENT (needle[max_suffix_rev + k]); + if (b < a) + { + /* Suffix is smaller, period is entire prefix so far. */ + j += k; + k = 1; + p = j - max_suffix_rev; + } + else if (a == b) + { + /* Advance through repetition of the current period. */ + if (k != p) + ++k; + else + { + j += p; + k = 1; + } + } + else /* a < b */ + { + /* Suffix is larger, start over from current location. */ + max_suffix_rev = j++; + k = p = 1; + } + } + + /* Choose the longer suffix. Return the first byte of the right + half, rather than the last byte of the left half. */ + if (max_suffix_rev + 1 < max_suffix + 1) + return max_suffix + 1; + *period = p; + return max_suffix_rev + 1; +} + +/* Return the first location of non-empty NEEDLE within HAYSTACK, or + NULL. HAYSTACK_LEN is the minimum known length of HAYSTACK. This + method is optimized for NEEDLE_LEN < LONG_NEEDLE_THRESHOLD. + Performance is guaranteed to be linear, with an initialization cost + of 2 * NEEDLE_LEN comparisons. + + If AVAILABLE does not modify HAYSTACK_LEN (as in memmem), then at + most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching. + If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 * + HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching. */ +static RETURN_TYPE +two_way_short_needle (const unsigned char *haystack, size_t haystack_len, + const unsigned char *needle, size_t needle_len) +{ + size_t i; /* Index into current byte of NEEDLE. */ + size_t j; /* Index into current window of HAYSTACK. */ + size_t period; /* The period of the right half of needle. */ + size_t suffix; /* The index of the right half of needle. */ + + /* Factor the needle into two halves, such that the left half is + smaller than the global period, and the right half is + periodic (with a period as large as NEEDLE_LEN - suffix). */ + suffix = critical_factorization (needle, needle_len, &period); + + /* Perform the search. Each iteration compares the right half + first. */ + if (CMP_FUNC (needle, needle + period, suffix) == 0) + { + /* Entire needle is periodic; a mismatch can only advance by the + period, so use memory to avoid rescanning known occurrences + of the period. */ + size_t memory = 0; + j = 0; + while (AVAILABLE (haystack, haystack_len, j, needle_len)) + { + /* Scan for matches in right half. */ + i = MAX (suffix, memory); + while (i < needle_len && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + ++i; + if (needle_len <= i) + { + /* Scan for matches in left half. */ + i = suffix - 1; + while (memory < i + 1 && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + --i; + if (i + 1 < memory + 1) + return (RETURN_TYPE) (haystack + j); + /* No match, so remember how many repetitions of period + on the right half were scanned. */ + j += period; + memory = needle_len - period; + } + else + { + j += i - suffix + 1; + memory = 0; + } + } + } + else + { + /* The two halves of needle are distinct; no extra memory is + required, and any mismatch results in a maximal shift. */ + period = MAX (suffix, needle_len - suffix) + 1; + j = 0; + while (AVAILABLE (haystack, haystack_len, j, needle_len)) + { + /* Scan for matches in right half. */ + i = suffix; + while (i < needle_len && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + ++i; + if (needle_len <= i) + { + /* Scan for matches in left half. */ + i = suffix - 1; + while (i != SIZE_MAX && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + --i; + if (i == SIZE_MAX) + return (RETURN_TYPE) (haystack + j); + j += period; + } + else + j += i - suffix + 1; + } + } + return NULL; +} + +/* Return the first location of non-empty NEEDLE within HAYSTACK, or + NULL. HAYSTACK_LEN is the minimum known length of HAYSTACK. This + method is optimized for LONG_NEEDLE_THRESHOLD <= NEEDLE_LEN. + Performance is guaranteed to be linear, with an initialization cost + of 3 * NEEDLE_LEN + (1 << CHAR_BIT) operations. + + If AVAILABLE does not modify HAYSTACK_LEN (as in memmem), then at + most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching, + and sublinear performance O(HAYSTACK_LEN / NEEDLE_LEN) is possible. + If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 * + HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching, and + sublinear performance is not possible. */ +static RETURN_TYPE +two_way_long_needle (const unsigned char *haystack, size_t haystack_len, + const unsigned char *needle, size_t needle_len) +{ + size_t i; /* Index into current byte of NEEDLE. */ + size_t j; /* Index into current window of HAYSTACK. */ + size_t period; /* The period of the right half of needle. */ + size_t suffix; /* The index of the right half of needle. */ + size_t shift_table[1U << CHAR_BIT]; /* See below. */ + + /* Factor the needle into two halves, such that the left half is + smaller than the global period, and the right half is + periodic (with a period as large as NEEDLE_LEN - suffix). */ + suffix = critical_factorization (needle, needle_len, &period); + + /* Populate shift_table. For each possible byte value c, + shift_table[c] is the distance from the last occurrence of c to + the end of NEEDLE, or NEEDLE_LEN if c is absent from the NEEDLE. + shift_table[NEEDLE[NEEDLE_LEN - 1]] contains the only 0. */ + for (i = 0; i < 1U << CHAR_BIT; i++) + shift_table[i] = needle_len; + for (i = 0; i < needle_len; i++) + shift_table[CANON_ELEMENT (needle[i])] = needle_len - i - 1; + + /* Perform the search. Each iteration compares the right half + first. */ + if (CMP_FUNC (needle, needle + period, suffix) == 0) + { + /* Entire needle is periodic; a mismatch can only advance by the + period, so use memory to avoid rescanning known occurrences + of the period. */ + size_t memory = 0; + size_t shift; + j = 0; + while (AVAILABLE (haystack, haystack_len, j, needle_len)) + { + /* Check the last byte first; if it does not match, then + shift to the next possible match location. */ + shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])]; + if (0 < shift) + { + if (memory && shift < period) + { + /* Since needle is periodic, but the last period has + a byte out of place, there can be no match until + after the mismatch. */ + shift = needle_len - period; + memory = 0; + } + j += shift; + continue; + } + /* Scan for matches in right half. The last byte has + already been matched, by virtue of the shift table. */ + i = MAX (suffix, memory); + while (i < needle_len - 1 && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + ++i; + if (needle_len - 1 <= i) + { + /* Scan for matches in left half. */ + i = suffix - 1; + while (memory < i + 1 && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + --i; + if (i + 1 < memory + 1) + return (RETURN_TYPE) (haystack + j); + /* No match, so remember how many repetitions of period + on the right half were scanned. */ + j += period; + memory = needle_len - period; + } + else + { + j += i - suffix + 1; + memory = 0; + } + } + } + else + { + /* The two halves of needle are distinct; no extra memory is + required, and any mismatch results in a maximal shift. */ + size_t shift; + period = MAX (suffix, needle_len - suffix) + 1; + j = 0; + while (AVAILABLE (haystack, haystack_len, j, needle_len)) + { + /* Check the last byte first; if it does not match, then + shift to the next possible match location. */ + shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])]; + if (0 < shift) + { + j += shift; + continue; + } + /* Scan for matches in right half. The last byte has + already been matched, by virtue of the shift table. */ + i = suffix; + while (i < needle_len - 1 && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + ++i; + if (needle_len - 1 <= i) + { + /* Scan for matches in left half. */ + i = suffix - 1; + while (i != SIZE_MAX && (CANON_ELEMENT (needle[i]) + == CANON_ELEMENT (haystack[i + j]))) + --i; + if (i == SIZE_MAX) + return (RETURN_TYPE) (haystack + j); + j += period; + } + else + j += i - suffix + 1; + } + } + return NULL; +} + +#undef AVAILABLE +#undef CANON_ELEMENT +#undef CMP_FUNC +#undef MAX +#undef RETURN_TYPE diff --git a/lgl/string.in.h b/lgl/string.in.h index eb6a20f876..b10a491438 100644 --- a/lgl/string.in.h +++ b/lgl/string.in.h @@ -27,7 +27,7 @@ #ifndef __attribute__ /* This feature is available in gcc versions 2.5 and later. */ -# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5) || __STRICT_ANSI__ +# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5) # define __attribute__(Spec) /* empty */ # endif /* The attribute __pure__ was added in gcc 2.96. */ @@ -58,8 +58,9 @@ extern void *memmem (void const *__haystack, size_t __haystack_len, #elif defined GNULIB_POSIXCHECK # undef memmem # define memmem(a,al,b,bl) \ - (GL_LINK_WARNING ("memmem is unportable - " \ - "use gnulib module memmem for portability"), \ + (GL_LINK_WARNING ("memmem is unportable and often quadratic - " \ + "use gnulib module memmem-simple for portability, " \ + "and module memmem for speed" ), \ memmem (a, al, b, bl)) #endif @@ -289,26 +290,38 @@ extern char *strsep (char **restrict __stringp, char const *restrict __delim); strsep (s, d)) #endif -#if defined GNULIB_POSIXCHECK +#if @GNULIB_STRSTR@ +# if @REPLACE_STRSTR@ +# define strstr rpl_strstr +char *strstr (const char *haystack, const char *needle) + __attribute__ ((__pure__)); +# endif +#elif defined GNULIB_POSIXCHECK /* strstr() does not work with multibyte strings if the locale encoding is different from UTF-8: POSIX says that it operates on "strings", and "string" in POSIX is defined as a sequence of bytes, not of characters. */ # undef strstr # define strstr(a,b) \ - (GL_LINK_WARNING ("strstr cannot work correctly on character strings " \ - "in most multibyte locales - " \ - "use mbsstr if you care about internationalization"), \ + (GL_LINK_WARNING ("strstr is quadratic on many systems, and cannot " \ + "work correctly on character strings in most " \ + "multibyte locales - " \ + "use mbsstr if you care about internationalization, " \ + "or use strstr if you care about speed"), \ strstr (a, b)) #endif /* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive comparison. */ -#if ! @HAVE_STRCASESTR@ +#if @GNULIB_STRCASESTR@ +# if @REPLACE_STRCASESTR@ +# define strcasestr rpl_strcasestr +# endif +# if ! @HAVE_STRCASESTR@ || @REPLACE_STRCASESTR@ extern char *strcasestr (const char *haystack, const char *needle) __attribute__ ((__pure__)); -#endif -#if defined GNULIB_POSIXCHECK +# endif +#elif defined GNULIB_POSIXCHECK /* strcasestr() does not work with multibyte strings: It is a glibc extension, and glibc implements it only for unibyte locales. */ @@ -532,6 +545,21 @@ extern char *strerror (int); strerror (e)) #endif +#if @GNULIB_STRSIGNAL@ +# if @REPLACE_STRSIGNAL@ +# define strsignal rpl_strsignal +# endif +# if ! @HAVE_DECL_STRSIGNAL@ || @REPLACE_STRSIGNAL@ +extern char *strsignal (int __sig); +# endif +#elif defined GNULIB_POSIXCHECK +# undef strsignal +# define strsignal(a) \ + (GL_LINK_WARNING ("strsignal is unportable - " \ + "use gnulib module strsignal for portability"), \ + strsignal (a)) +#endif + #ifdef __cplusplus } diff --git a/lgl/vasnprintf.h b/lgl/vasnprintf.h index 4524ce77ff..7f5770aaf4 100644 --- a/lgl/vasnprintf.h +++ b/lgl/vasnprintf.h @@ -1,5 +1,5 @@ /* vsprintf with automatic memory allocation. - Copyright (C) 2002-2004, 2007 Free Software Foundation, Inc. + Copyright (C) 2002-2004, 2007-2008 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by @@ -26,7 +26,7 @@ #ifndef __attribute__ /* This feature is available in gcc versions 2.5 and later. */ -# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5) || __STRICT_ANSI__ +# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5) # define __attribute__(Spec) /* empty */ # endif /* The __-protected variants of `format' and `printf' attributes |