diff options
author | Simon Josefsson <simon@josefsson.org> | 2008-01-09 11:03:39 +0100 |
---|---|---|
committer | Simon Josefsson <simon@josefsson.org> | 2008-01-09 11:03:39 +0100 |
commit | 0fa84f1e5706a101c5e39cc3f12777bc0f8f5e3d (patch) | |
tree | a46b50357da64fb1e6ed1976fcd0a0d5ce0db1ff /lgl | |
parent | e5072435776f78842ffa197c39b4e25f7ce6dc99 (diff) | |
download | gnutls-0fa84f1e5706a101c5e39cc3f12777bc0f8f5e3d.tar.gz |
Update gnulib files.
Diffstat (limited to 'lgl')
-rw-r--r-- | lgl/Makefile.am | 14 | ||||
-rw-r--r-- | lgl/dummy.c | 42 | ||||
-rw-r--r-- | lgl/m4/eealloc.m4 | 32 | ||||
-rw-r--r-- | lgl/m4/gnulib-comp.m4 | 7 | ||||
-rw-r--r-- | lgl/m4/malloca.m4 | 14 | ||||
-rw-r--r-- | lgl/malloca.c | 137 | ||||
-rw-r--r-- | lgl/malloca.h | 134 | ||||
-rw-r--r-- | lgl/malloca.valgrind | 7 | ||||
-rw-r--r-- | lgl/memmem.c | 555 | ||||
-rw-r--r-- | lgl/printf-parse.c | 40 |
10 files changed, 450 insertions, 532 deletions
diff --git a/lgl/Makefile.am b/lgl/Makefile.am index 66004e71bc..e4146a26de 100644 --- a/lgl/Makefile.am +++ b/lgl/Makefile.am @@ -230,14 +230,6 @@ EXTRA_DIST += $(top_srcdir)/build-aux/link-warning.h ## end gnulib module link-warning -## begin gnulib module malloca - -liblgnu_la_SOURCES += malloca.c - -EXTRA_DIST += malloca.h malloca.valgrind - -## end gnulib module malloca - ## begin gnulib module memchr @@ -726,6 +718,12 @@ liblgnu_la_SOURCES += xsize.h ## end gnulib module xsize +## begin gnulib module dummy + +liblgnu_la_SOURCES += dummy.c + +## end gnulib module dummy + mostlyclean-local: mostlyclean-generic @for dir in '' $(MOSTLYCLEANDIRS); do \ diff --git a/lgl/dummy.c b/lgl/dummy.c new file mode 100644 index 0000000000..5834053b05 --- /dev/null +++ b/lgl/dummy.c @@ -0,0 +1,42 @@ +/* A dummy file, to prevent empty libraries from breaking builds. + Copyright (C) 2004, 2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Some systems, reportedly OpenBSD and Mac OS X, refuse to create + libraries without any object files. You might get an error like: + + > ar cru .libs/libgl.a + > ar: no archive members specified + + Compiling this file, and adding its object file to the library, will + prevent the library from being empty. */ + +/* Some systems, such as Solaris with cc 5.0, refuse to work with libraries + that don't export any symbol. You might get an error like: + + > cc ... libgnu.a + > ild: (bad file) garbled symbol table in archive ../gllib/libgnu.a + + Compiling this file, and adding its object file to the library, will + prevent the library from exporting no symbols. */ + +#ifdef __sun +/* This declaration ensures that the library will export at least 1 symbol. */ +int gl_dummy_symbol; +#else +/* This declaration is solely to ensure that after preprocessing + this file is never empty. */ +typedef int dummy; +#endif diff --git a/lgl/m4/eealloc.m4 b/lgl/m4/eealloc.m4 deleted file mode 100644 index adcfd06c95..0000000000 --- a/lgl/m4/eealloc.m4 +++ /dev/null @@ -1,32 +0,0 @@ -# eealloc.m4 serial 1 -dnl Copyright (C) 2003 Free Software Foundation, Inc. -dnl This file is free software; the Free Software Foundation -dnl gives unlimited permission to copy and/or distribute it, -dnl with or without modifications, as long as this notice is preserved. - -AC_DEFUN([gl_EEALLOC], -[ - AC_REQUIRE([gl_EEMALLOC]) - AC_REQUIRE([gl_EEREALLOC]) - AC_REQUIRE([AC_C_INLINE]) -]) - -AC_DEFUN([gl_EEMALLOC], -[ - _AC_FUNC_MALLOC_IF( - [gl_cv_func_malloc_0_nonnull=1], - [gl_cv_func_malloc_0_nonnull=0]) - AC_DEFINE_UNQUOTED([MALLOC_0_IS_NONNULL], $gl_cv_func_malloc_0_nonnull, - [If malloc(0) is != NULL, define this to 1. Otherwise define this - to 0.]) -]) - -AC_DEFUN([gl_EEREALLOC], -[ - _AC_FUNC_REALLOC_IF( - [gl_cv_func_realloc_0_nonnull=1], - [gl_cv_func_realloc_0_nonnull=0]) - AC_DEFINE_UNQUOTED([REALLOC_0_IS_NONNULL], $gl_cv_func_realloc_0_nonnull, - [If realloc(NULL,0) is != NULL, define this to 1. Otherwise define this - to 0.]) -]) diff --git a/lgl/m4/gnulib-comp.m4 b/lgl/m4/gnulib-comp.m4 index da6970ce89..273922a4e2 100644 --- a/lgl/m4/gnulib-comp.m4 +++ b/lgl/m4/gnulib-comp.m4 @@ -76,7 +76,6 @@ AC_DEFUN([lgl_INIT], AM_GNU_GETTEXT_VERSION([0.17]) AC_SUBST([LIBINTL]) AC_SUBST([LTLIBINTL]) - gl_MALLOCA gl_FUNC_MEMCHR gl_FUNC_MEMCMP gl_FUNC_MEMMEM @@ -237,6 +236,7 @@ AC_DEFUN([lgl_FILE_LIST], [ lib/asprintf.c lib/des.c lib/des.h + lib/dummy.c lib/float+.h lib/float.in.h lib/gc-gnulib.c @@ -247,9 +247,6 @@ AC_DEFUN([lgl_FILE_LIST], [ lib/hmac-md5.c lib/hmac-sha1.c lib/hmac.h - lib/malloca.c - lib/malloca.h - lib/malloca.valgrind lib/md2.c lib/md2.h lib/md4.c @@ -301,7 +298,6 @@ AC_DEFUN([lgl_FILE_LIST], [ m4/arctwo.m4 m4/codeset.m4 m4/des.m4 - m4/eealloc.m4 m4/eoverflow.m4 m4/extensions.m4 m4/float_h.m4 @@ -342,7 +338,6 @@ AC_DEFUN([lgl_FILE_LIST], [ m4/lock.m4 m4/longlong.m4 m4/malloc.m4 - m4/malloca.m4 m4/md2.m4 m4/md4.m4 m4/md5.m4 diff --git a/lgl/m4/malloca.m4 b/lgl/m4/malloca.m4 deleted file mode 100644 index 2841ae83a7..0000000000 --- a/lgl/m4/malloca.m4 +++ /dev/null @@ -1,14 +0,0 @@ -# malloca.m4 serial 1 -dnl Copyright (C) 2003-2004, 2006-2007 Free Software Foundation, Inc. -dnl This file is free software; the Free Software Foundation -dnl gives unlimited permission to copy and/or distribute it, -dnl with or without modifications, as long as this notice is preserved. - -AC_DEFUN([gl_MALLOCA], -[ - dnl Use the autoconf tests for alloca(), but not the AC_SUBSTed variables - dnl @ALLOCA@ and @LTALLOCA@. - dnl gl_FUNC_ALLOCA dnl Already brought in by the module dependencies. - AC_REQUIRE([gl_EEMALLOC]) - AC_REQUIRE([AC_TYPE_LONG_LONG_INT]) -]) diff --git a/lgl/malloca.c b/lgl/malloca.c deleted file mode 100644 index 778fdf1baf..0000000000 --- a/lgl/malloca.c +++ /dev/null @@ -1,137 +0,0 @@ -/* Safe automatic memory allocation. - Copyright (C) 2003, 2006-2007 Free Software Foundation, Inc. - Written by Bruno Haible <bruno@clisp.org>, 2003. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#include <config.h> - -/* Specification. */ -#include "malloca.h" - -/* The speed critical point in this file is freea() applied to an alloca() - result: it must be fast, to match the speed of alloca(). The speed of - mmalloca() and freea() in the other case are not critical, because they - are only invoked for big memory sizes. */ - -#if HAVE_ALLOCA - -/* Store the mmalloca() results in a hash table. This is needed to reliably - distinguish a mmalloca() result and an alloca() result. - - Although it is possible that the same pointer is returned by alloca() and - by mmalloca() at different times in the same application, it does not lead - to a bug in freea(), because: - - Before a pointer returned by alloca() can point into malloc()ed memory, - the function must return, and once this has happened the programmer must - not call freea() on it anyway. - - Before a pointer returned by mmalloca() can point into the stack, it - must be freed. The only function that can free it is freea(), and - when freea() frees it, it also removes it from the hash table. */ - -#define MAGIC_NUMBER 0x1415fb4a -#define MAGIC_SIZE sizeof (int) -/* This is how the header info would look like without any alignment - considerations. */ -struct preliminary_header { void *next; char room[MAGIC_SIZE]; }; -/* But the header's size must be a multiple of sa_alignment_max. */ -#define HEADER_SIZE \ - (((sizeof (struct preliminary_header) + sa_alignment_max - 1) / sa_alignment_max) * sa_alignment_max) -struct header { void *next; char room[HEADER_SIZE - sizeof (struct preliminary_header) + MAGIC_SIZE]; }; -/* Verify that HEADER_SIZE == sizeof (struct header). */ -typedef int verify1[2 * (HEADER_SIZE == sizeof (struct header)) - 1]; -/* We make the hash table quite big, so that during lookups the probability - of empty hash buckets is quite high. There is no need to make the hash - table resizable, because when the hash table gets filled so much that the - lookup becomes slow, it means that the application has memory leaks. */ -#define HASH_TABLE_SIZE 257 -static void * mmalloca_results[HASH_TABLE_SIZE]; - -#endif - -void * -mmalloca (size_t n) -{ -#if HAVE_ALLOCA - /* Allocate one more word, that serves as an indicator for malloc()ed - memory, so that freea() of an alloca() result is fast. */ - size_t nplus = n + HEADER_SIZE; - - if (nplus >= n) - { - char *p = (char *) malloc (nplus); - - if (p != NULL) - { - size_t slot; - - p += HEADER_SIZE; - - /* Put a magic number into the indicator word. */ - ((int *) p)[-1] = MAGIC_NUMBER; - - /* Enter p into the hash table. */ - slot = (unsigned long) p % HASH_TABLE_SIZE; - ((struct header *) (p - HEADER_SIZE))->next = mmalloca_results[slot]; - mmalloca_results[slot] = p; - - return p; - } - } - /* Out of memory. */ - return NULL; -#else -# if !MALLOC_0_IS_NONNULL - if (n == 0) - n = 1; -# endif - return malloc (n); -#endif -} - -#if HAVE_ALLOCA -void -freea (void *p) -{ - /* mmalloca() may have returned NULL. */ - if (p != NULL) - { - /* Attempt to quickly distinguish the mmalloca() result - which has - a magic indicator word - and the alloca() result - which has an - uninitialized indicator word. It is for this test that sa_increment - additional bytes are allocated in the alloca() case. */ - if (((int *) p)[-1] == MAGIC_NUMBER) - { - /* Looks like a mmalloca() result. To see whether it really is one, - perform a lookup in the hash table. */ - size_t slot = (unsigned long) p % HASH_TABLE_SIZE; - void **chain = &mmalloca_results[slot]; - for (; *chain != NULL;) - { - if (*chain == p) - { - /* Found it. Remove it from the hash table and free it. */ - char *p_begin = (char *) p - HEADER_SIZE; - *chain = ((struct header *) p_begin)->next; - free (p_begin); - return; - } - chain = &((struct header *) ((char *) *chain - HEADER_SIZE))->next; - } - } - /* At this point, we know it was not a mmalloca() result. */ - } -} -#endif diff --git a/lgl/malloca.h b/lgl/malloca.h deleted file mode 100644 index 15448a34e3..0000000000 --- a/lgl/malloca.h +++ /dev/null @@ -1,134 +0,0 @@ -/* Safe automatic memory allocation. - Copyright (C) 2003-2007 Free Software Foundation, Inc. - Written by Bruno Haible <bruno@clisp.org>, 2003. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2.1, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - -#ifndef _MALLOCA_H -#define _MALLOCA_H - -#include <alloca.h> -#include <stddef.h> -#include <stdlib.h> - - -#ifdef __cplusplus -extern "C" { -#endif - - -/* safe_alloca(N) is equivalent to alloca(N) when it is safe to call - alloca(N); otherwise it returns NULL. It either returns N bytes of - memory allocated on the stack, that lasts until the function returns, - or NULL. - Use of safe_alloca should be avoided: - - inside arguments of function calls - undefined behaviour, - - in inline functions - the allocation may actually last until the - calling function returns. -*/ -#if HAVE_ALLOCA -/* The OS usually guarantees only one guard page at the bottom of the stack, - and a page size can be as small as 4096 bytes. So we cannot safely - allocate anything larger than 4096 bytes. Also care for the possibility - of a few compiler-allocated temporary stack slots. - This must be a macro, not an inline function. */ -# define safe_alloca(N) ((N) < 4032 ? alloca (N) : NULL) -#else -# define safe_alloca(N) ((void) (N), NULL) -#endif - -/* malloca(N) is a safe variant of alloca(N). It allocates N bytes of - memory allocated on the stack, that must be freed using freea() before - the function returns. Upon failure, it returns NULL. */ -#if HAVE_ALLOCA -# define malloca(N) \ - ((N) < 4032 - sa_increment \ - ? (void *) ((char *) alloca ((N) + sa_increment) + sa_increment) \ - : mmalloca (N)) -#else -# define malloca(N) \ - mmalloca (N) -#endif -extern void * mmalloca (size_t n); - -/* Free a block of memory allocated through malloca(). */ -#if HAVE_ALLOCA -extern void freea (void *p); -#else -# define freea free -#endif - -/* nmalloca(N,S) is an overflow-safe variant of malloca (N * S). - It allocates an array of N objects, each with S bytes of memory, - on the stack. S must be positive and N must be nonnegative. - The array must be freed using freea() before the function returns. */ -#if 1 -/* Cf. the definition of xalloc_oversized. */ -# define nmalloca(n, s) \ - ((n) > (size_t) (sizeof (ptrdiff_t) <= sizeof (size_t) ? -1 : -2) / (s) \ - ? NULL \ - : malloca ((n) * (s))) -#else -extern void * nmalloca (size_t n, size_t s); -#endif - - -#ifdef __cplusplus -} -#endif - - -/* ------------------- Auxiliary, non-public definitions ------------------- */ - -/* Determine the alignment of a type at compile time. */ -#if defined __GNUC__ -# define sa_alignof __alignof__ -#elif defined __cplusplus - template <class type> struct sa_alignof_helper { char __slot1; type __slot2; }; -# define sa_alignof(type) offsetof (sa_alignof_helper<type>, __slot2) -#elif defined __hpux - /* Work around a HP-UX 10.20 cc bug with enums constants defined as offsetof - values. */ -# define sa_alignof(type) (sizeof (type) <= 4 ? 4 : 8) -#elif defined _AIX - /* Work around an AIX 3.2.5 xlc bug with enums constants defined as offsetof - values. */ -# define sa_alignof(type) (sizeof (type) <= 4 ? 4 : 8) -#else -# define sa_alignof(type) offsetof (struct { char __slot1; type __slot2; }, __slot2) -#endif - -enum -{ -/* The desired alignment of memory allocations is the maximum alignment - among all elementary types. */ - sa_alignment_long = sa_alignof (long), - sa_alignment_double = sa_alignof (double), -#if HAVE_LONG_LONG_INT - sa_alignment_longlong = sa_alignof (long long), -#endif - sa_alignment_longdouble = sa_alignof (long double), - sa_alignment_max = ((sa_alignment_long - 1) | (sa_alignment_double - 1) -#if HAVE_LONG_LONG_INT - | (sa_alignment_longlong - 1) -#endif - | (sa_alignment_longdouble - 1) - ) + 1, -/* The increment that guarantees room for a magic word must be >= sizeof (int) - and a multiple of sa_alignment_max. */ - sa_increment = ((sizeof (int) + sa_alignment_max - 1) / sa_alignment_max) * sa_alignment_max -}; - -#endif /* _MALLOCA_H */ diff --git a/lgl/malloca.valgrind b/lgl/malloca.valgrind deleted file mode 100644 index 52f0a50f57..0000000000 --- a/lgl/malloca.valgrind +++ /dev/null @@ -1,7 +0,0 @@ -# Suppress a valgrind message about use of uninitialized memory in freea(). -# This use is OK because it provides only a speedup. -{ - freea - Memcheck:Cond - fun:freea -} diff --git a/lgl/memmem.c b/lgl/memmem.c index 1196b3e200..c5dbd613fa 100644 --- a/lgl/memmem.c +++ b/lgl/memmem.c @@ -1,4 +1,5 @@ -/* Copyright (C) 1991,92,93,94,96,97,98,2000,2004,2007,2008 Free Software Foundation, Inc. +/* Copyright (C) 1991,92,93,94,96,97,98,2000,2004,2007,2008 Free Software + Foundation, Inc. This file is part of the GNU C Library. This program is free software; you can redistribute it and/or modify @@ -15,140 +16,369 @@ with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +/* This particular implementation was written by Eric Blake, 2008. */ + #ifndef _LIBC # include <config.h> #endif -#include <stddef.h> +/* Specification of memmem. */ #include <string.h> -#include <stdbool.h> -#include "malloca.h" +#include <limits.h> +#include <stddef.h> +#include <stdint.h> #ifndef _LIBC # define __builtin_expect(expr, val) (expr) #endif -/* Knuth-Morris-Pratt algorithm. - See http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm - Return a boolean indicating success. */ +/* We use the Two-Way string matching algorithm, which guarantees + linear complexity with constant space. Additionally, for long + needles, we also use a bad character shift table similar to the + Boyer-Moore algorithm to achieve sub-linear performance. + + See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260 + and http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm +*/ + +/* Point at which computing a bad-byte shift table is likely to be + worthwhile. Small needles should not compute a table, since it + adds (1 << CHAR_BIT) + NEEDLE_LEN computations of preparation for a + speedup no greater than a factor of NEEDLE_LEN. The larger the + needle, the better the potential performance gain. On the other + hand, on non-POSIX systems with CHAR_BIT larger than eight, the + memory required for the table is prohibitive. */ +#if CHAR_BIT < 10 +# define LONG_NEEDLE_THRESHOLD 32U +#else +# define LONG_NEEDLE_THRESHOLD SIZE_MAX +#endif + +#define MAX(a, b) ((a < b) ? (b) : (a)) + +/* Perform a critical factorization of NEEDLE, of length NEEDLE_LEN. + Return the index of the first byte in the right half, and set + *PERIOD to the global period of the right half. -static bool -knuth_morris_pratt (const unsigned char *haystack, - const unsigned char *last_haystack, - const unsigned char *needle, size_t m, - const unsigned char **resultp) + The global period of a string is the smallest index (possibly its + length) at which all remaining bytes in the string are repetitions + of the prefix (the last repetition may be a subset of the prefix). + + When NEEDLE is factored into two halves, a local period is the + length of the smallest word that shares a suffix with the left half + and shares a prefix with the right half. All factorizations of a + non-empty NEEDLE have a local period of at least 1 and no greater + than NEEDLE_LEN. + + A critical factorization has the property that the local period + equals the global period. All strings have at least one critical + factorization with the left half smaller than the global period. + + Given an ordered alphabet, a critical factorization can be computed + in linear time, with 2 * NEEDLE_LEN comparisons, by computing the + larger of two ordered maximal suffixes. The ordered maximal + suffixes are determined by lexicographic comparison of + periodicity. */ +static size_t +critical_factorization (const unsigned char *needle, size_t needle_len, + size_t *period) { - /* Allocate the table. */ - size_t *table = (size_t *) nmalloca (m, sizeof (size_t)); - if (table == NULL) - return false; - /* Fill the table. - For 0 < i < m: - 0 < table[i] <= i is defined such that - forall 0 < x < table[i]: needle[x..i-1] != needle[0..i-1-x], - and table[i] is as large as possible with this property. - This implies: - 1) For 0 < i < m: - If table[i] < i, - needle[table[i]..i-1] = needle[0..i-1-table[i]]. - 2) For 0 < i < m: - rhaystack[0..i-1] == needle[0..i-1] - and exists h, i <= h < m: rhaystack[h] != needle[h] - implies - forall 0 <= x < table[i]: rhaystack[x..x+m-1] != needle[0..m-1]. - table[0] remains uninitialized. */ - { - size_t i, j; - - /* i = 1: Nothing to verify for x = 0. */ - table[1] = 1; - j = 0; - - for (i = 2; i < m; i++) - { - /* Here: j = i-1 - table[i-1]. - The inequality needle[x..i-1] != needle[0..i-1-x] is known to hold - for x < table[i-1], by induction. - Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1]. */ - unsigned char b = needle[i - 1]; - - for (;;) - { - /* Invariants: The inequality needle[x..i-1] != needle[0..i-1-x] - is known to hold for x < i-1-j. - Furthermore, if j>0: needle[i-1-j..i-2] = needle[0..j-1]. */ - if (b == needle[j]) - { - /* Set table[i] := i-1-j. */ - table[i] = i - ++j; - break; - } - /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds - for x = i-1-j, because - needle[i-1] != needle[j] = needle[i-1-x]. */ - if (j == 0) - { - /* The inequality holds for all possible x. */ - table[i] = i; - break; - } - /* The inequality needle[x..i-1] != needle[0..i-1-x] also holds - for i-1-j < x < i-1-j+table[j], because for these x: - needle[x..i-2] - = needle[x-(i-1-j)..j-1] - != needle[0..j-1-(x-(i-1-j))] (by definition of table[j]) - = needle[0..i-2-x], - hence needle[x..i-1] != needle[0..i-1-x]. - Furthermore - needle[i-1-j+table[j]..i-2] - = needle[table[j]..j-1] - = needle[0..j-1-table[j]] (by definition of table[j]). */ - j = j - table[j]; - } - /* Here: j = i - table[i]. */ - } - } - - /* Search, using the table to accelerate the processing. */ - { - size_t j; - const unsigned char *rhaystack; - const unsigned char *phaystack; - - *resultp = NULL; - j = 0; - rhaystack = haystack; - phaystack = haystack; - /* Invariant: phaystack = rhaystack + j. */ - while (phaystack != last_haystack) - if (needle[j] == *phaystack) + /* Index of last byte of left half, or SIZE_MAX. */ + size_t max_suffix, max_suffix_rev; + size_t j; /* Index into NEEDLE for current candidate suffix. */ + size_t k; /* Offset into current period. */ + size_t p; /* Intermediate period. */ + unsigned char a, b; /* Current comparison bytes. */ + + /* Invariants: + 0 <= j < NEEDLE_LEN - 1 + -1 <= max_suffix{,_rev} < j (treating SIZE_MAX as if it were signed) + min(max_suffix, max_suffix_rev) < global period of NEEDLE + 1 <= p <= global period of NEEDLE + p == global period of the substring NEEDLE[max_suffix{,_rev}+1...j] + 1 <= k <= p + */ + + /* Perform lexicographic search. */ + max_suffix = SIZE_MAX; + j = 0; + k = p = 1; + while (j + k < needle_len) + { + a = needle[j + k]; + b = needle[max_suffix + k]; + if (a < b) + { + /* Suffix is smaller, period is entire prefix so far. */ + j += k; + k = 1; + p = j - max_suffix; + } + else if (a == b) { - j++; - phaystack++; - if (j == m) + /* Advance through repetition of the current period. */ + if (k != p) + ++k; + else { - /* The entire needle has been found. */ - *resultp = rhaystack; - break; + j += p; + k = 1; } } - else if (j > 0) + else /* b < a */ + { + /* Suffix is larger, start over from current location. */ + max_suffix = j++; + k = p = 1; + } + } + *period = p; + + /* Perform reverse lexicographic search. */ + max_suffix_rev = SIZE_MAX; + j = 0; + k = p = 1; + while (j + k < needle_len) + { + a = needle[j + k]; + b = needle[max_suffix_rev + k]; + if (b < a) + { + /* Suffix is smaller, period is entire prefix so far. */ + j += k; + k = 1; + p = j - max_suffix_rev; + } + else if (a == b) + { + /* Advance through repetition of the current period. */ + if (k != p) + ++k; + else + { + j += p; + k = 1; + } + } + else /* a < b */ + { + /* Suffix is larger, start over from current location. */ + max_suffix_rev = j++; + k = p = 1; + } + } + + /* Choose the longer suffix. Return the first byte of the right + half, rather than the last byte of the left half. */ + if (max_suffix_rev + 1 < max_suffix + 1) + return max_suffix + 1; + *period = p; + return max_suffix_rev + 1; +} + +/* Return the first location of NEEDLE within HAYSTACK, or NULL. This + method requires 0 < NEEDLE_LEN <= HAYSTACK_LEN, and is optimized + for NEEDLE_LEN < LONG_NEEDLE_THRESHOLD. Performance is linear, + with 2 * NEEDLE_LEN comparisons in preparation, and at most 2 * + HAYSTACK_LEN - NEEDLE_LEN comparisons in searching. */ +static void * +two_way_short_needle (const unsigned char *haystack, size_t haystack_len, + const unsigned char *needle, size_t needle_len) +{ + size_t i; /* Index into current byte of NEEDLE. */ + size_t j; /* Index into current window of HAYSTACK. */ + size_t period; /* The period of the right half of needle. */ + size_t suffix; /* The index of the right half of needle. */ + + /* Factor the needle into two halves, such that the left half is + smaller than the global period, and the right half is + periodic (with a period as large as NEEDLE_LEN - suffix). */ + suffix = critical_factorization (needle, needle_len, &period); + + /* Perform the search. Each iteration compares the right half + first. */ + if (memcmp (needle, needle + period, suffix) == 0) + { + /* Entire needle is periodic; a mismatch can only advance by the + period, so use memory to avoid rescanning known occurrences + of the period. */ + size_t memory = 0; + j = 0; + while (j <= haystack_len - needle_len) { - /* Found a match of needle[0..j-1], mismatch at needle[j]. */ - rhaystack += table[j]; - j -= table[j]; + /* Scan for matches in right half. */ + i = MAX (suffix, memory); + while (i < needle_len && needle[i] == haystack[i + j]) + ++i; + if (needle_len <= i) + { + /* Scan for matches in left half. */ + i = suffix - 1; + while (memory < i + 1 && needle[i] == haystack[i + j]) + --i; + if (i + 1 < memory + 1) + return (void *) (haystack + j); + /* No match, so remember how many repetitions of period + on the right half were scanned. */ + j += period; + memory = needle_len - period; + } + else + { + j += i - suffix + 1; + memory = 0; + } } - else + } + else + { + /* The two halves of needle are distinct; no extra memory is + required, and any mismatch results in a maximal shift. */ + period = MAX (suffix, needle_len - suffix) + 1; + j = 0; + while (j <= haystack_len - needle_len) { - /* Found a mismatch at needle[0] already. */ - rhaystack++; - phaystack++; + /* Scan for matches in right half. */ + i = suffix; + while (i < needle_len && needle[i] == haystack[i + j]) + ++i; + if (needle_len <= i) + { + /* Scan for matches in left half. */ + i = suffix - 1; + while (i != SIZE_MAX && needle[i] == haystack[i + j]) + --i; + if (i == SIZE_MAX) + return (void *) (haystack + j); + j += period; + } + else + j += i - suffix + 1; } - } + } + return NULL; +} + +/* Return the first location of NEEDLE within HAYSTACK, or NULL. This + method requires 0 < NEEDLE_LEN <= HAYSTACK_LEN, and is optimized + for LONG_NEEDLE_THRESHOLD <= NEEDLE_LEN. Performance is linear, + with 3 * NEEDLE_LEN + (1U << CHAR_BIT) operations in preparation, + and at most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons in searching. + The extra initialization cost allows for potential sublinear + performance O(HAYSTACK_LEN / NEEDLE_LEN). */ +static void * +two_way_long_needle (const unsigned char *haystack, size_t haystack_len, + const unsigned char *needle, size_t needle_len) +{ + size_t i; /* Index into current byte of NEEDLE. */ + size_t j; /* Index into current window of HAYSTACK. */ + size_t period; /* The period of the right half of needle. */ + size_t suffix; /* The index of the right half of needle. */ + size_t shift_table[1U << CHAR_BIT]; /* See below. */ + + /* Factor the needle into two halves, such that the left half is + smaller than the global period, and the right half is + periodic (with a period as large as NEEDLE_LEN - suffix). */ + suffix = critical_factorization (needle, needle_len, &period); - freea (table); - return true; + /* Populate shift_table. For each possible byte value c, + shift_table[c] is the distance from the last occurrence of c to + the end of NEEDLE, or NEEDLE_LEN if c is absent from the NEEDLE. + shift_table[NEEDLE[NEEDLE_LEN - 1]] contains the only 0. */ + for (i = 0; i < 1U << CHAR_BIT; i++) + shift_table[i] = needle_len; + for (i = 0; i < needle_len; i++) + shift_table[needle[i]] = needle_len - i - 1; + + /* Perform the search. Each iteration compares the right half + first. */ + if (memcmp (needle, needle + period, suffix) == 0) + { + /* Entire needle is periodic; a mismatch can only advance by the + period, so use memory to avoid rescanning known occurrences + of the period. */ + size_t memory = 0; + j = 0; + while (j <= haystack_len - needle_len) + { + /* Check the last byte first; if it does not match, then + shift to the next possible match location. */ + size_t shift = shift_table[haystack[j + needle_len - 1]]; + if (0 < shift) + { + if (memory && shift < period) + { + /* Since needle is periodic, but the last period has + a byte out of place, there can be no match until + after the mismatch. */ + shift = needle_len - period; + memory = 0; + } + j += shift; + continue; + } + /* Scan for matches in right half. The last byte has + already been matched, by virtue of the shift table. */ + i = MAX (suffix, memory); + while (i < needle_len - 1 && needle[i] == haystack[i + j]) + ++i; + if (needle_len - 1 <= i) + { + /* Scan for matches in left half. */ + i = suffix - 1; + while (memory < i + 1 && needle[i] == haystack[i + j]) + --i; + if (i + 1 < memory + 1) + return (void *) (haystack + j); + /* No match, so remember how many repetitions of period + on the right half were scanned. */ + j += period; + memory = needle_len - period; + } + else + { + j += i - suffix + 1; + memory = 0; + } + } + } + else + { + /* The two halves of needle are distinct; no extra memory is + required, and any mismatch results in a maximal shift. */ + period = MAX (suffix, needle_len - suffix) + 1; + j = 0; + while (j <= haystack_len - needle_len) + { + /* Check the last byte first; if it does not match, then + shift to the next possible match location. */ + size_t shift = shift_table[haystack[j + needle_len - 1]]; + if (0 < shift) + { + j += shift; + continue; + } + /* Scan for matches in right half. The last byte has + already been matched, by virtue of the shift table. */ + i = suffix; + while (i < needle_len - 1 && needle[i] == haystack[i + j]) + ++i; + if (needle_len - 1 <= i) + { + /* Scan for matches in left half. */ + i = suffix - 1; + while (i != SIZE_MAX && needle[i] == haystack[i + j]) + --i; + if (i == SIZE_MAX) + return (void *) (haystack + j); + j += period; + } + else + j += i - suffix + 1; + } + } + return NULL; } /* Return the first occurrence of NEEDLE in HAYSTACK. Return HAYSTACK @@ -162,8 +392,6 @@ memmem (const void *haystack_start, size_t haystack_len, not an array of 'char' values. See ISO C 99 section 6.2.6.1. */ const unsigned char *haystack = (const unsigned char *) haystack_start; const unsigned char *needle = (const unsigned char *) needle_start; - const unsigned char *last_haystack = haystack + haystack_len; - const unsigned char *last_needle = needle + needle_len; if (needle_len == 0) /* The first occurrence of the empty string is deemed to occur at @@ -175,82 +403,23 @@ memmem (const void *haystack_start, size_t haystack_len, if (__builtin_expect (haystack_len < needle_len, 0)) return NULL; - /* Use optimizations in memchr when possible. */ - if (__builtin_expect (needle_len == 1, 0)) - return memchr (haystack, *needle, haystack_len); - - /* Minimizing the worst-case complexity: - Let n = haystack_len, m = needle_len. - The naïve algorithm is O(n*m) worst-case. - The Knuth-Morris-Pratt algorithm is O(n) worst-case but it needs a - memory allocation. - To achieve linear complexity and yet amortize the cost of the - memory allocation, we activate the Knuth-Morris-Pratt algorithm - only once the naïve algorithm has already run for some time; more - precisely, when - - the outer loop count is >= 10, - - the average number of comparisons per outer loop is >= 5, - - the total number of comparisons is >= m. - But we try it only once. If the memory allocation attempt failed, - we don't retry it. */ - { - bool try_kmp = true; - size_t outer_loop_count = 0; - size_t comparison_count = 0; - - /* Speed up the following searches of needle by caching its first - byte. */ - unsigned char b = *needle++; - - for (;; haystack++) - { - if (haystack == last_haystack) - /* No match. */ - return NULL; - - /* See whether it's advisable to use an asymptotically faster - algorithm. */ - if (try_kmp - && outer_loop_count >= 10 - && comparison_count >= 5 * outer_loop_count) - { - /* See if needle + comparison_count now reaches the end of - needle. */ - if (comparison_count >= needle_len) - { - /* Try the Knuth-Morris-Pratt algorithm. */ - const unsigned char *result; - if (knuth_morris_pratt (haystack, last_haystack, - needle - 1, needle_len, &result)) - return (void *) result; - try_kmp = false; - } - } - - outer_loop_count++; - comparison_count++; - if (*haystack == b) - /* The first byte matches. */ - { - const unsigned char *rhaystack = haystack + 1; - const unsigned char *rneedle = needle; - - for (;; rhaystack++, rneedle++) - { - if (rneedle == last_needle) - /* Found a match. */ - return (void *) haystack; - if (rhaystack == last_haystack) - /* No match. */ - return NULL; - comparison_count++; - if (*rhaystack != *rneedle) - /* Nothing in this round. */ - break; - } - } - } - } - - return NULL; + /* Use optimizations in memchr when possible, to reduce the search + size of haystack using a linear algorithm with a smaller + coefficient. However, avoid memchr for long needles, since we + can often achieve sublinear performance. */ + if (needle_len < LONG_NEEDLE_THRESHOLD) + { + haystack = memchr (haystack, *needle, haystack_len); + if (!haystack || __builtin_expect (needle_len == 1, 0)) + return (void *) haystack; + haystack_len -= haystack - (const unsigned char *) haystack_start; + if (haystack_len < needle_len) + return NULL; + return two_way_short_needle (haystack, haystack_len, needle, needle_len); + } + else + return two_way_long_needle (haystack, haystack_len, needle, needle_len); } + +#undef LONG_NEEDLE_THRESHOLD +#undef MAX diff --git a/lgl/printf-parse.c b/lgl/printf-parse.c index 94c6c61328..66b45a73f2 100644 --- a/lgl/printf-parse.c +++ b/lgl/printf-parse.c @@ -1,5 +1,5 @@ /* Formatted output to strings. - Copyright (C) 1999-2000, 2002-2003, 2006-2007 Free Software Foundation, Inc. + Copyright (C) 1999-2000, 2002-2003, 2006-2008 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by @@ -392,6 +392,44 @@ PRINTF_PARSE (const CHAR_T *format, DIRECTIVES *d, arguments *a) } cp++; } +#if defined __APPLE__ && defined __MACH__ + /* On MacOS X 10.3, PRIdMAX is defined as "qd". + We cannot change it to "lld" because PRIdMAX must also + be understood by the system's printf routines. */ + else if (*cp == 'q') + { + if (64 / 8 > sizeof (long)) + { + /* int64_t = long long */ + flags += 16; + } + else + { + /* int64_t = long */ + flags += 8; + } + cp++; + } +#endif +#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ + /* On native Win32, PRIdMAX is defined as "I64d". + We cannot change it to "lld" because PRIdMAX must also + be understood by the system's printf routines. */ + else if (*cp == 'I' && cp[1] == '6' && cp[2] == '4') + { + if (64 / 8 > sizeof (long)) + { + /* __int64 = long long */ + flags += 16; + } + else + { + /* __int64 = long */ + flags += 8; + } + cp += 3; + } +#endif else break; } |