summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2011-09-02 10:00:38 -0700
committerJunio C Hamano <gitster@pobox.com>2011-09-02 10:00:38 -0700
commit8a8895baaf9d200c907555fd30b0b6190b708a5a (patch)
tree9ca717cf5314effc5dc09528bd4ff9c1c0bfe3bf
parent96b7c4deb8152cbc6a90c6d26ad6869b45a51e9b (diff)
parentd190a0875ff0f33d60a4d7265f2098b35d162f68 (diff)
downloadgit-8a8895baaf9d200c907555fd30b0b6190b708a5a.tar.gz
Merge branch 'fk/use-kwset-pickaxe-grep-f'
* fk/use-kwset-pickaxe-grep-f: obstack: Fix portability issues Use kwset in grep Use kwset in pickaxe Adapt the kwset code to Git Add string search routines from GNU grep Add obstack.[ch] from EGLIBC 2.10
-rw-r--r--Makefile4
-rw-r--r--compat/obstack.c414
-rw-r--r--compat/obstack.h506
-rw-r--r--diffcore-pickaxe.c34
-rw-r--r--grep.c66
-rw-r--r--grep.h2
-rw-r--r--kwset.c771
-rw-r--r--kwset.h63
-rwxr-xr-xt/t7008-grep-binary.sh4
9 files changed, 1830 insertions, 34 deletions
diff --git a/Makefile b/Makefile
index 6bf7d6c30f..8d6d4515d2 100644
--- a/Makefile
+++ b/Makefile
@@ -515,6 +515,7 @@ LIB_H += commit.h
LIB_H += compat/bswap.h
LIB_H += compat/cygwin.h
LIB_H += compat/mingw.h
+LIB_H += compat/obstack.h
LIB_H += compat/win32/pthread.h
LIB_H += compat/win32/syslog.h
LIB_H += compat/win32/sys/poll.h
@@ -533,6 +534,7 @@ LIB_H += graph.h
LIB_H += grep.h
LIB_H += hash.h
LIB_H += help.h
+LIB_H += kwset.h
LIB_H += levenshtein.h
LIB_H += list-objects.h
LIB_H += ll-merge.h
@@ -594,6 +596,7 @@ LIB_OBJS += cache-tree.o
LIB_OBJS += color.o
LIB_OBJS += combine-diff.o
LIB_OBJS += commit.o
+LIB_OBJS += compat/obstack.o
LIB_OBJS += config.o
LIB_OBJS += connect.o
LIB_OBJS += convert.o
@@ -623,6 +626,7 @@ LIB_OBJS += hash.o
LIB_OBJS += help.o
LIB_OBJS += hex.o
LIB_OBJS += ident.o
+LIB_OBJS += kwset.o
LIB_OBJS += levenshtein.o
LIB_OBJS += list-objects.o
LIB_OBJS += ll-merge.o
diff --git a/compat/obstack.c b/compat/obstack.c
new file mode 100644
index 0000000000..a89ab5b8e8
--- /dev/null
+++ b/compat/obstack.c
@@ -0,0 +1,414 @@
+/* obstack.c - subroutines used implicitly by object stack macros
+ Copyright (C) 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1996, 1997, 1998,
+ 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA. */
+
+#include "git-compat-util.h"
+#include <gettext.h>
+#include "obstack.h"
+
+/* NOTE BEFORE MODIFYING THIS FILE: This version number must be
+ incremented whenever callers compiled using an old obstack.h can no
+ longer properly call the functions in this obstack.c. */
+#define OBSTACK_INTERFACE_VERSION 1
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself, and the installed library
+ supports the same library interface we do. This code is part of the GNU
+ C Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object
+ files, it is simpler to just do this in the source for each such file. */
+
+#include <stdio.h> /* Random thing to get __GNU_LIBRARY__. */
+#if !defined _LIBC && defined __GNU_LIBRARY__ && __GNU_LIBRARY__ > 1
+# include <gnu-versions.h>
+# if _GNU_OBSTACK_INTERFACE_VERSION == OBSTACK_INTERFACE_VERSION
+# define ELIDE_CODE
+# endif
+#endif
+
+#include <stddef.h>
+
+#ifndef ELIDE_CODE
+
+
+# if HAVE_INTTYPES_H
+# include <inttypes.h>
+# endif
+# if HAVE_STDINT_H || defined _LIBC
+# include <stdint.h>
+# endif
+
+/* Determine default alignment. */
+union fooround
+{
+ uintmax_t i;
+ long double d;
+ void *p;
+};
+struct fooalign
+{
+ char c;
+ union fooround u;
+};
+/* If malloc were really smart, it would round addresses to DEFAULT_ALIGNMENT.
+ But in fact it might be less smart and round addresses to as much as
+ DEFAULT_ROUNDING. So we prepare for it to do that. */
+enum
+ {
+ DEFAULT_ALIGNMENT = offsetof (struct fooalign, u),
+ DEFAULT_ROUNDING = sizeof (union fooround)
+ };
+
+/* When we copy a long block of data, this is the unit to do it with.
+ On some machines, copying successive ints does not work;
+ in such a case, redefine COPYING_UNIT to `long' (if that works)
+ or `char' as a last resort. */
+# ifndef COPYING_UNIT
+# define COPYING_UNIT int
+# endif
+
+
+/* The functions allocating more room by calling `obstack_chunk_alloc'
+ jump to the handler pointed to by `obstack_alloc_failed_handler'.
+ This can be set to a user defined function which should either
+ abort gracefully or use longjump - but shouldn't return. This
+ variable by default points to the internal function
+ `print_and_abort'. */
+static void print_and_abort (void);
+void (*obstack_alloc_failed_handler) (void) = print_and_abort;
+
+# ifdef _LIBC
+# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
+/* A looong time ago (before 1994, anyway; we're not sure) this global variable
+ was used by non-GNU-C macros to avoid multiple evaluation. The GNU C
+ library still exports it because somebody might use it. */
+struct obstack *_obstack_compat;
+compat_symbol (libc, _obstack_compat, _obstack, GLIBC_2_0);
+# endif
+# endif
+
+/* Define a macro that either calls functions with the traditional malloc/free
+ calling interface, or calls functions with the mmalloc/mfree interface
+ (that adds an extra first argument), based on the state of use_extra_arg.
+ For free, do not use ?:, since some compilers, like the MIPS compilers,
+ do not allow (expr) ? void : void. */
+
+# define CALL_CHUNKFUN(h, size) \
+ (((h) -> use_extra_arg) \
+ ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \
+ : (*(struct _obstack_chunk *(*) (long)) (h)->chunkfun) ((size)))
+
+# define CALL_FREEFUN(h, old_chunk) \
+ do { \
+ if ((h) -> use_extra_arg) \
+ (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \
+ else \
+ (*(void (*) (void *)) (h)->freefun) ((old_chunk)); \
+ } while (0)
+
+
+/* Initialize an obstack H for use. Specify chunk size SIZE (0 means default).
+ Objects start on multiples of ALIGNMENT (0 means use default).
+ CHUNKFUN is the function to use to allocate chunks,
+ and FREEFUN the function to free them.
+
+ Return nonzero if successful, calls obstack_alloc_failed_handler if
+ allocation fails. */
+
+int
+_obstack_begin (struct obstack *h,
+ int size, int alignment,
+ void *(*chunkfun) (long),
+ void (*freefun) (void *))
+{
+ register struct _obstack_chunk *chunk; /* points to new chunk */
+
+ if (alignment == 0)
+ alignment = DEFAULT_ALIGNMENT;
+ if (size == 0)
+ /* Default size is what GNU malloc can fit in a 4096-byte block. */
+ {
+ /* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc.
+ Use the values for range checking, because if range checking is off,
+ the extra bytes won't be missed terribly, but if range checking is on
+ and we used a larger request, a whole extra 4096 bytes would be
+ allocated.
+
+ These number are irrelevant to the new GNU malloc. I suspect it is
+ less sensitive to the size of the request. */
+ int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1))
+ + 4 + DEFAULT_ROUNDING - 1)
+ & ~(DEFAULT_ROUNDING - 1));
+ size = 4096 - extra;
+ }
+
+ h->chunkfun = (struct _obstack_chunk * (*)(void *, long)) chunkfun;
+ h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun;
+ h->chunk_size = size;
+ h->alignment_mask = alignment - 1;
+ h->use_extra_arg = 0;
+
+ chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
+ if (!chunk)
+ (*obstack_alloc_failed_handler) ();
+ h->next_free = h->object_base = __PTR_ALIGN ((char *) chunk, chunk->contents,
+ alignment - 1);
+ h->chunk_limit = chunk->limit
+ = (char *) chunk + h->chunk_size;
+ chunk->prev = 0;
+ /* The initial chunk now contains no empty object. */
+ h->maybe_empty_object = 0;
+ h->alloc_failed = 0;
+ return 1;
+}
+
+int
+_obstack_begin_1 (struct obstack *h, int size, int alignment,
+ void *(*chunkfun) (void *, long),
+ void (*freefun) (void *, void *),
+ void *arg)
+{
+ register struct _obstack_chunk *chunk; /* points to new chunk */
+
+ if (alignment == 0)
+ alignment = DEFAULT_ALIGNMENT;
+ if (size == 0)
+ /* Default size is what GNU malloc can fit in a 4096-byte block. */
+ {
+ /* 12 is sizeof (mhead) and 4 is EXTRA from GNU malloc.
+ Use the values for range checking, because if range checking is off,
+ the extra bytes won't be missed terribly, but if range checking is on
+ and we used a larger request, a whole extra 4096 bytes would be
+ allocated.
+
+ These number are irrelevant to the new GNU malloc. I suspect it is
+ less sensitive to the size of the request. */
+ int extra = ((((12 + DEFAULT_ROUNDING - 1) & ~(DEFAULT_ROUNDING - 1))
+ + 4 + DEFAULT_ROUNDING - 1)
+ & ~(DEFAULT_ROUNDING - 1));
+ size = 4096 - extra;
+ }
+
+ h->chunkfun = (struct _obstack_chunk * (*)(void *,long)) chunkfun;
+ h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun;
+ h->chunk_size = size;
+ h->alignment_mask = alignment - 1;
+ h->extra_arg = arg;
+ h->use_extra_arg = 1;
+
+ chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size);
+ if (!chunk)
+ (*obstack_alloc_failed_handler) ();
+ h->next_free = h->object_base = __PTR_ALIGN ((char *) chunk, chunk->contents,
+ alignment - 1);
+ h->chunk_limit = chunk->limit
+ = (char *) chunk + h->chunk_size;
+ chunk->prev = 0;
+ /* The initial chunk now contains no empty object. */
+ h->maybe_empty_object = 0;
+ h->alloc_failed = 0;
+ return 1;
+}
+
+/* Allocate a new current chunk for the obstack *H
+ on the assumption that LENGTH bytes need to be added
+ to the current object, or a new object of length LENGTH allocated.
+ Copies any partial object from the end of the old chunk
+ to the beginning of the new one. */
+
+void
+_obstack_newchunk (struct obstack *h, int length)
+{
+ register struct _obstack_chunk *old_chunk = h->chunk;
+ register struct _obstack_chunk *new_chunk;
+ register long new_size;
+ register long obj_size = h->next_free - h->object_base;
+ register long i;
+ long already;
+ char *object_base;
+
+ /* Compute size for new chunk. */
+ new_size = (obj_size + length) + (obj_size >> 3) + h->alignment_mask + 100;
+ if (new_size < h->chunk_size)
+ new_size = h->chunk_size;
+
+ /* Allocate and initialize the new chunk. */
+ new_chunk = CALL_CHUNKFUN (h, new_size);
+ if (!new_chunk)
+ (*obstack_alloc_failed_handler) ();
+ h->chunk = new_chunk;
+ new_chunk->prev = old_chunk;
+ new_chunk->limit = h->chunk_limit = (char *) new_chunk + new_size;
+
+ /* Compute an aligned object_base in the new chunk */
+ object_base =
+ __PTR_ALIGN ((char *) new_chunk, new_chunk->contents, h->alignment_mask);
+
+ /* Move the existing object to the new chunk.
+ Word at a time is fast and is safe if the object
+ is sufficiently aligned. */
+ if (h->alignment_mask + 1 >= DEFAULT_ALIGNMENT)
+ {
+ for (i = obj_size / sizeof (COPYING_UNIT) - 1;
+ i >= 0; i--)
+ ((COPYING_UNIT *)object_base)[i]
+ = ((COPYING_UNIT *)h->object_base)[i];
+ /* We used to copy the odd few remaining bytes as one extra COPYING_UNIT,
+ but that can cross a page boundary on a machine
+ which does not do strict alignment for COPYING_UNITS. */
+ already = obj_size / sizeof (COPYING_UNIT) * sizeof (COPYING_UNIT);
+ }
+ else
+ already = 0;
+ /* Copy remaining bytes one by one. */
+ for (i = already; i < obj_size; i++)
+ object_base[i] = h->object_base[i];
+
+ /* If the object just copied was the only data in OLD_CHUNK,
+ free that chunk and remove it from the chain.
+ But not if that chunk might contain an empty object. */
+ if (! h->maybe_empty_object
+ && (h->object_base
+ == __PTR_ALIGN ((char *) old_chunk, old_chunk->contents,
+ h->alignment_mask)))
+ {
+ new_chunk->prev = old_chunk->prev;
+ CALL_FREEFUN (h, old_chunk);
+ }
+
+ h->object_base = object_base;
+ h->next_free = h->object_base + obj_size;
+ /* The new chunk certainly contains no empty object yet. */
+ h->maybe_empty_object = 0;
+}
+# ifdef _LIBC
+libc_hidden_def (_obstack_newchunk)
+# endif
+
+/* Return nonzero if object OBJ has been allocated from obstack H.
+ This is here for debugging.
+ If you use it in a program, you are probably losing. */
+
+/* Suppress -Wmissing-prototypes warning. We don't want to declare this in
+ obstack.h because it is just for debugging. */
+int _obstack_allocated_p (struct obstack *h, void *obj);
+
+int
+_obstack_allocated_p (struct obstack *h, void *obj)
+{
+ register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */
+ register struct _obstack_chunk *plp; /* point to previous chunk if any */
+
+ lp = (h)->chunk;
+ /* We use >= rather than > since the object cannot be exactly at
+ the beginning of the chunk but might be an empty object exactly
+ at the end of an adjacent chunk. */
+ while (lp != 0 && ((void *) lp >= obj || (void *) (lp)->limit < obj))
+ {
+ plp = lp->prev;
+ lp = plp;
+ }
+ return lp != 0;
+}
+
+/* Free objects in obstack H, including OBJ and everything allocate
+ more recently than OBJ. If OBJ is zero, free everything in H. */
+
+# undef obstack_free
+
+void
+obstack_free (struct obstack *h, void *obj)
+{
+ register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */
+ register struct _obstack_chunk *plp; /* point to previous chunk if any */
+
+ lp = h->chunk;
+ /* We use >= because there cannot be an object at the beginning of a chunk.
+ But there can be an empty object at that address
+ at the end of another chunk. */
+ while (lp != 0 && ((void *) lp >= obj || (void *) (lp)->limit < obj))
+ {
+ plp = lp->prev;
+ CALL_FREEFUN (h, lp);
+ lp = plp;
+ /* If we switch chunks, we can't tell whether the new current
+ chunk contains an empty object, so assume that it may. */
+ h->maybe_empty_object = 1;
+ }
+ if (lp)
+ {
+ h->object_base = h->next_free = (char *) (obj);
+ h->chunk_limit = lp->limit;
+ h->chunk = lp;
+ }
+ else if (obj != 0)
+ /* obj is not in any of the chunks! */
+ abort ();
+}
+
+# ifdef _LIBC
+/* Older versions of libc used a function _obstack_free intended to be
+ called by non-GCC compilers. */
+strong_alias (obstack_free, _obstack_free)
+# endif
+
+int
+_obstack_memory_used (struct obstack *h)
+{
+ register struct _obstack_chunk* lp;
+ register int nbytes = 0;
+
+ for (lp = h->chunk; lp != 0; lp = lp->prev)
+ {
+ nbytes += lp->limit - (char *) lp;
+ }
+ return nbytes;
+}
+
+# ifdef _LIBC
+# include <libio/iolibio.h>
+# endif
+
+# ifndef __attribute__
+/* This feature is available in gcc versions 2.5 and later. */
+# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5)
+# define __attribute__(Spec) /* empty */
+# endif
+# endif
+
+static void
+__attribute__ ((noreturn))
+print_and_abort (void)
+{
+ /* Don't change any of these strings. Yes, it would be possible to add
+ the newline to the string and use fputs or so. But this must not
+ happen because the "memory exhausted" message appears in other places
+ like this and the translation should be reused instead of creating
+ a very similar string which requires a separate translation. */
+# ifdef _LIBC
+ (void) __fxprintf (NULL, "%s\n", _("memory exhausted"));
+# else
+ fprintf (stderr, "%s\n", _("memory exhausted"));
+# endif
+ exit (1);
+}
+
+#endif /* !ELIDE_CODE */
diff --git a/compat/obstack.h b/compat/obstack.h
new file mode 100644
index 0000000000..d178bd6716
--- /dev/null
+++ b/compat/obstack.h
@@ -0,0 +1,506 @@
+/* obstack.h - object stack macros
+ Copyright (C) 1988-1994,1996-1999,2003,2004,2005,2009
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ Boston, MA 02110-1301, USA. */
+
+/* Summary:
+
+All the apparent functions defined here are macros. The idea
+is that you would use these pre-tested macros to solve a
+very specific set of problems, and they would run fast.
+Caution: no side-effects in arguments please!! They may be
+evaluated MANY times!!
+
+These macros operate a stack of objects. Each object starts life
+small, and may grow to maturity. (Consider building a word syllable
+by syllable.) An object can move while it is growing. Once it has
+been "finished" it never changes address again. So the "top of the
+stack" is typically an immature growing object, while the rest of the
+stack is of mature, fixed size and fixed address objects.
+
+These routines grab large chunks of memory, using a function you
+supply, called `obstack_chunk_alloc'. On occasion, they free chunks,
+by calling `obstack_chunk_free'. You must define them and declare
+them before using any obstack macros.
+
+Each independent stack is represented by a `struct obstack'.
+Each of the obstack macros expects a pointer to such a structure
+as the first argument.
+
+One motivation for this package is the problem of growing char strings
+in symbol tables. Unless you are "fascist pig with a read-only mind"
+--Gosper's immortal quote from HAKMEM item 154, out of context--you
+would not like to put any arbitrary upper limit on the length of your
+symbols.
+
+In practice this often means you will build many short symbols and a
+few long symbols. At the time you are reading a symbol you don't know
+how long it is. One traditional method is to read a symbol into a
+buffer, realloc()ating the buffer every time you try to read a symbol
+that is longer than the buffer. This is beaut, but you still will
+want to copy the symbol from the buffer to a more permanent
+symbol-table entry say about half the time.
+
+With obstacks, you can work differently. Use one obstack for all symbol
+names. As you read a symbol, grow the name in the obstack gradually.
+When the name is complete, finalize it. Then, if the symbol exists already,
+free the newly read name.
+
+The way we do this is to take a large chunk, allocating memory from
+low addresses. When you want to build a symbol in the chunk you just
+add chars above the current "high water mark" in the chunk. When you
+have finished adding chars, because you got to the end of the symbol,
+you know how long the chars are, and you can create a new object.
+Mostly the chars will not burst over the highest address of the chunk,
+because you would typically expect a chunk to be (say) 100 times as
+long as an average object.
+
+In case that isn't clear, when we have enough chars to make up
+the object, THEY ARE ALREADY CONTIGUOUS IN THE CHUNK (guaranteed)
+so we just point to it where it lies. No moving of chars is
+needed and this is the second win: potentially long strings need
+never be explicitly shuffled. Once an object is formed, it does not
+change its address during its lifetime.
+
+When the chars burst over a chunk boundary, we allocate a larger
+chunk, and then copy the partly formed object from the end of the old
+chunk to the beginning of the new larger chunk. We then carry on
+accreting characters to the end of the object as we normally would.
+
+A special macro is provided to add a single char at a time to a
+growing object. This allows the use of register variables, which
+break the ordinary 'growth' macro.
+
+Summary:
+ We allocate large chunks.
+ We carve out one object at a time from the current chunk.
+ Once carved, an object never moves.
+ We are free to append data of any size to the currently
+ growing object.
+ Exactly one object is growing in an obstack at any one time.
+ You can run one obstack per control block.
+ You may have as many control blocks as you dare.
+ Because of the way we do it, you can `unwind' an obstack
+ back to a previous state. (You may remove objects much
+ as you would with a stack.)
+*/
+
+
+/* Don't do the contents of this file more than once. */
+
+#ifndef _OBSTACK_H
+#define _OBSTACK_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* We need the type of a pointer subtraction. If __PTRDIFF_TYPE__ is
+ defined, as with GNU C, use that; that way we don't pollute the
+ namespace with <stddef.h>'s symbols. Otherwise, include <stddef.h>
+ and use ptrdiff_t. */
+
+#ifdef __PTRDIFF_TYPE__
+# define PTR_INT_TYPE __PTRDIFF_TYPE__
+#else
+# include <stddef.h>
+# define PTR_INT_TYPE ptrdiff_t
+#endif
+
+/* If B is the base of an object addressed by P, return the result of
+ aligning P to the next multiple of A + 1. B and P must be of type
+ char *. A + 1 must be a power of 2. */
+
+#define __BPTR_ALIGN(B, P, A) ((B) + (((P) - (B) + (A)) & ~(A)))
+
+/* Similiar to _BPTR_ALIGN (B, P, A), except optimize the common case
+ where pointers can be converted to integers, aligned as integers,
+ and converted back again. If PTR_INT_TYPE is narrower than a
+ pointer (e.g., the AS/400), play it safe and compute the alignment
+ relative to B. Otherwise, use the faster strategy of computing the
+ alignment relative to 0. */
+
+#define __PTR_ALIGN(B, P, A) \
+ __BPTR_ALIGN (sizeof (PTR_INT_TYPE) < sizeof (void *) ? (B) : (char *) 0, \
+ P, A)
+
+#include <string.h>
+
+struct _obstack_chunk /* Lives at front of each chunk. */
+{
+ char *limit; /* 1 past end of this chunk */
+ struct _obstack_chunk *prev; /* address of prior chunk or NULL */
+ char contents[4]; /* objects begin here */
+};
+
+struct obstack /* control current object in current chunk */
+{
+ long chunk_size; /* preferred size to allocate chunks in */
+ struct _obstack_chunk *chunk; /* address of current struct obstack_chunk */
+ char *object_base; /* address of object we are building */
+ char *next_free; /* where to add next char to current object */
+ char *chunk_limit; /* address of char after current chunk */
+ union
+ {
+ PTR_INT_TYPE tempint;
+ void *tempptr;
+ } temp; /* Temporary for some macros. */
+ int alignment_mask; /* Mask of alignment for each object. */
+ /* These prototypes vary based on `use_extra_arg', and we use
+ casts to the prototypeless function type in all assignments,
+ but having prototypes here quiets -Wstrict-prototypes. */
+ struct _obstack_chunk *(*chunkfun) (void *, long);
+ void (*freefun) (void *, struct _obstack_chunk *);
+ void *extra_arg; /* first arg for chunk alloc/dealloc funcs */
+ unsigned use_extra_arg:1; /* chunk alloc/dealloc funcs take extra arg */
+ unsigned maybe_empty_object:1;/* There is a possibility that the current
+ chunk contains a zero-length object. This
+ prevents freeing the chunk if we allocate
+ a bigger chunk to replace it. */
+ unsigned alloc_failed:1; /* No longer used, as we now call the failed
+ handler on error, but retained for binary
+ compatibility. */
+};
+
+/* Declare the external functions we use; they are in obstack.c. */
+
+extern void _obstack_newchunk (struct obstack *, int);
+extern int _obstack_begin (struct obstack *, int, int,
+ void *(*) (long), void (*) (void *));
+extern int _obstack_begin_1 (struct obstack *, int, int,
+ void *(*) (void *, long),
+ void (*) (void *, void *), void *);
+extern int _obstack_memory_used (struct obstack *);
+
+void obstack_free (struct obstack *, void *);
+
+
+/* Error handler called when `obstack_chunk_alloc' failed to allocate
+ more memory. This can be set to a user defined function which
+ should either abort gracefully or use longjump - but shouldn't
+ return. The default action is to print a message and abort. */
+extern void (*obstack_alloc_failed_handler) (void);
+
+/* Pointer to beginning of object being allocated or to be allocated next.
+ Note that this might not be the final address of the object
+ because a new chunk might be needed to hold the final size. */
+
+#define obstack_base(h) ((void *) (h)->object_base)
+
+/* Size for allocating ordinary chunks. */
+
+#define obstack_chunk_size(h) ((h)->chunk_size)
+
+/* Pointer to next byte not yet allocated in current chunk. */
+
+#define obstack_next_free(h) ((h)->next_free)
+
+/* Mask specifying low bits that should be clear in address of an object. */
+
+#define obstack_alignment_mask(h) ((h)->alignment_mask)
+
+/* To prevent prototype warnings provide complete argument list. */
+#define obstack_init(h) \
+ _obstack_begin ((h), 0, 0, \
+ (void *(*) (long)) obstack_chunk_alloc, \
+ (void (*) (void *)) obstack_chunk_free)
+
+#define obstack_begin(h, size) \
+ _obstack_begin ((h), (size), 0, \
+ (void *(*) (long)) obstack_chunk_alloc, \
+ (void (*) (void *)) obstack_chunk_free)
+
+#define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \
+ _obstack_begin ((h), (size), (alignment), \
+ (void *(*) (long)) (chunkfun), \
+ (void (*) (void *)) (freefun))
+
+#define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \
+ _obstack_begin_1 ((h), (size), (alignment), \
+ (void *(*) (void *, long)) (chunkfun), \
+ (void (*) (void *, void *)) (freefun), (arg))
+
+#define obstack_chunkfun(h, newchunkfun) \
+ ((h) -> chunkfun = (struct _obstack_chunk *(*)(void *, long)) (newchunkfun))
+
+#define obstack_freefun(h, newfreefun) \
+ ((h) -> freefun = (void (*)(void *, struct _obstack_chunk *)) (newfreefun))
+
+#define obstack_1grow_fast(h,achar) (*((h)->next_free)++ = (achar))
+
+#define obstack_blank_fast(h,n) ((h)->next_free += (n))
+
+#define obstack_memory_used(h) _obstack_memory_used (h)
+
+#if defined __GNUC__ && defined __STDC__ && __STDC__
+/* NextStep 2.0 cc is really gcc 1.93 but it defines __GNUC__ = 2 and
+ does not implement __extension__. But that compiler doesn't define
+ __GNUC_MINOR__. */
+# if __GNUC__ < 2 || (__NeXT__ && !__GNUC_MINOR__)
+# define __extension__
+# endif
+
+/* For GNU C, if not -traditional,
+ we can define these macros to compute all args only once
+ without using a global variable.
+ Also, we can avoid using the `temp' slot, to make faster code. */
+
+# define obstack_object_size(OBSTACK) \
+ __extension__ \
+ ({ struct obstack const *__o = (OBSTACK); \
+ (unsigned) (__o->next_free - __o->object_base); })
+
+# define obstack_room(OBSTACK) \
+ __extension__ \
+ ({ struct obstack const *__o = (OBSTACK); \
+ (unsigned) (__o->chunk_limit - __o->next_free); })
+
+# define obstack_make_room(OBSTACK,length) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ int __len = (length); \
+ if (__o->chunk_limit - __o->next_free < __len) \
+ _obstack_newchunk (__o, __len); \
+ (void) 0; })
+
+# define obstack_empty_p(OBSTACK) \
+ __extension__ \
+ ({ struct obstack const *__o = (OBSTACK); \
+ (__o->chunk->prev == 0 \
+ && __o->next_free == __PTR_ALIGN ((char *) __o->chunk, \
+ __o->chunk->contents, \
+ __o->alignment_mask)); })
+
+# define obstack_grow(OBSTACK,where,length) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ int __len = (length); \
+ if (__o->next_free + __len > __o->chunk_limit) \
+ _obstack_newchunk (__o, __len); \
+ memcpy (__o->next_free, where, __len); \
+ __o->next_free += __len; \
+ (void) 0; })
+
+# define obstack_grow0(OBSTACK,where,length) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ int __len = (length); \
+ if (__o->next_free + __len + 1 > __o->chunk_limit) \
+ _obstack_newchunk (__o, __len + 1); \
+ memcpy (__o->next_free, where, __len); \
+ __o->next_free += __len; \
+ *(__o->next_free)++ = 0; \
+ (void) 0; })
+
+# define obstack_1grow(OBSTACK,datum) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ if (__o->next_free + 1 > __o->chunk_limit) \
+ _obstack_newchunk (__o, 1); \
+ obstack_1grow_fast (__o, datum); \
+ (void) 0; })
+
+/* These assume that the obstack alignment is good enough for pointers
+ or ints, and that the data added so far to the current object
+ shares that much alignment. */
+
+# define obstack_ptr_grow(OBSTACK,datum) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ if (__o->next_free + sizeof (void *) > __o->chunk_limit) \
+ _obstack_newchunk (__o, sizeof (void *)); \
+ obstack_ptr_grow_fast (__o, datum); }) \
+
+# define obstack_int_grow(OBSTACK,datum) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ if (__o->next_free + sizeof (int) > __o->chunk_limit) \
+ _obstack_newchunk (__o, sizeof (int)); \
+ obstack_int_grow_fast (__o, datum); })
+
+# define obstack_ptr_grow_fast(OBSTACK,aptr) \
+__extension__ \
+({ struct obstack *__o1 = (OBSTACK); \
+ *(const void **) __o1->next_free = (aptr); \
+ __o1->next_free += sizeof (const void *); \
+ (void) 0; })
+
+# define obstack_int_grow_fast(OBSTACK,aint) \
+__extension__ \
+({ struct obstack *__o1 = (OBSTACK); \
+ *(int *) __o1->next_free = (aint); \
+ __o1->next_free += sizeof (int); \
+ (void) 0; })
+
+# define obstack_blank(OBSTACK,length) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ int __len = (length); \
+ if (__o->chunk_limit - __o->next_free < __len) \
+ _obstack_newchunk (__o, __len); \
+ obstack_blank_fast (__o, __len); \
+ (void) 0; })
+
+# define obstack_alloc(OBSTACK,length) \
+__extension__ \
+({ struct obstack *__h = (OBSTACK); \
+ obstack_blank (__h, (length)); \
+ obstack_finish (__h); })
+
+# define obstack_copy(OBSTACK,where,length) \
+__extension__ \
+({ struct obstack *__h = (OBSTACK); \
+ obstack_grow (__h, (where), (length)); \
+ obstack_finish (__h); })
+
+# define obstack_copy0(OBSTACK,where,length) \
+__extension__ \
+({ struct obstack *__h = (OBSTACK); \
+ obstack_grow0 (__h, (where), (length)); \
+ obstack_finish (__h); })
+
+/* The local variable is named __o1 to avoid a name conflict
+ when obstack_blank is called. */
+# define obstack_finish(OBSTACK) \
+__extension__ \
+({ struct obstack *__o1 = (OBSTACK); \
+ void *__value = (void *) __o1->object_base; \
+ if (__o1->next_free == __value) \
+ __o1->maybe_empty_object = 1; \
+ __o1->next_free \
+ = __PTR_ALIGN (__o1->object_base, __o1->next_free, \
+ __o1->alignment_mask); \
+ if (__o1->next_free - (char *)__o1->chunk \
+ > __o1->chunk_limit - (char *)__o1->chunk) \
+ __o1->next_free = __o1->chunk_limit; \
+ __o1->object_base = __o1->next_free; \
+ __value; })
+
+# define obstack_free(OBSTACK, OBJ) \
+__extension__ \
+({ struct obstack *__o = (OBSTACK); \
+ void *__obj = (OBJ); \
+ if (__obj > (void *)__o->chunk && __obj < (void *)__o->chunk_limit) \
+ __o->next_free = __o->object_base = (char *)__obj; \
+ else (obstack_free) (__o, __obj); })
+
+#else /* not __GNUC__ or not __STDC__ */
+
+# define obstack_object_size(h) \
+ (unsigned) ((h)->next_free - (h)->object_base)
+
+# define obstack_room(h) \
+ (unsigned) ((h)->chunk_limit - (h)->next_free)
+
+# define obstack_empty_p(h) \
+ ((h)->chunk->prev == 0 \
+ && (h)->next_free == __PTR_ALIGN ((char *) (h)->chunk, \
+ (h)->chunk->contents, \
+ (h)->alignment_mask))
+
+/* Note that the call to _obstack_newchunk is enclosed in (..., 0)
+ so that we can avoid having void expressions
+ in the arms of the conditional expression.
+ Casting the third operand to void was tried before,
+ but some compilers won't accept it. */
+
+# define obstack_make_room(h,length) \
+( (h)->temp.tempint = (length), \
+ (((h)->next_free + (h)->temp.tempint > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), (h)->temp.tempint), 0) : 0))
+
+# define obstack_grow(h,where,length) \
+( (h)->temp.tempint = (length), \
+ (((h)->next_free + (h)->temp.tempint > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), (h)->temp.tempint), 0) : 0), \
+ memcpy ((h)->next_free, where, (h)->temp.tempint), \
+ (h)->next_free += (h)->temp.tempint)
+
+# define obstack_grow0(h,where,length) \
+( (h)->temp.tempint = (length), \
+ (((h)->next_free + (h)->temp.tempint + 1 > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), (h)->temp.tempint + 1), 0) : 0), \
+ memcpy ((h)->next_free, where, (h)->temp.tempint), \
+ (h)->next_free += (h)->temp.tempint, \
+ *((h)->next_free)++ = 0)
+
+# define obstack_1grow(h,datum) \
+( (((h)->next_free + 1 > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), 1), 0) : 0), \
+ obstack_1grow_fast (h, datum))
+
+# define obstack_ptr_grow(h,datum) \
+( (((h)->next_free + sizeof (char *) > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), sizeof (char *)), 0) : 0), \
+ obstack_ptr_grow_fast (h, datum))
+
+# define obstack_int_grow(h,datum) \
+( (((h)->next_free + sizeof (int) > (h)->chunk_limit) \
+ ? (_obstack_newchunk ((h), sizeof (int)), 0) : 0), \
+ obstack_int_grow_fast (h, datum))
+
+# define obstack_ptr_grow_fast(h,aptr) \
+ (((const void **) ((h)->next_free += sizeof (void *)))[-1] = (aptr))
+
+# define obstack_int_grow_fast(h,aint) \
+ (((int *) ((h)->next_free += sizeof (int)))[-1] = (aint))
+
+# define obstack_blank(h,length) \
+( (h)->temp.tempint = (length), \
+ (((h)->chunk_limit - (h)->next_free < (h)->temp.tempint) \
+ ? (_obstack_newchunk ((h), (h)->temp.tempint), 0) : 0), \
+ obstack_blank_fast (h, (h)->temp.tempint))
+
+# define obstack_alloc(h,length) \
+ (obstack_blank ((h), (length)), obstack_finish ((h)))
+
+# define obstack_copy(h,where,length) \
+ (obstack_grow ((h), (where), (length)), obstack_finish ((h)))
+
+# define obstack_copy0(h,where,length) \
+ (obstack_grow0 ((h), (where), (length)), obstack_finish ((h)))
+
+# define obstack_finish(h) \
+( ((h)->next_free == (h)->object_base \
+ ? (((h)->maybe_empty_object = 1), 0) \
+ : 0), \
+ (h)->temp.tempptr = (h)->object_base, \
+ (h)->next_free \
+ = __PTR_ALIGN ((h)->object_base, (h)->next_free, \
+ (h)->alignment_mask), \
+ (((h)->next_free - (char *) (h)->chunk \
+ > (h)->chunk_limit - (char *) (h)->chunk) \
+ ? ((h)->next_free = (h)->chunk_limit) : 0), \
+ (h)->object_base = (h)->next_free, \
+ (h)->temp.tempptr)
+
+# define obstack_free(h,obj) \
+( (h)->temp.tempint = (char *) (obj) - (char *) (h)->chunk, \
+ ((((h)->temp.tempint > 0 \
+ && (h)->temp.tempint < (h)->chunk_limit - (char *) (h)->chunk)) \
+ ? (int) ((h)->next_free = (h)->object_base \
+ = (h)->temp.tempint + (char *) (h)->chunk) \
+ : (((obstack_free) ((h), (h)->temp.tempint + (char *) (h)->chunk), 0), 0)))
+
+#endif /* not __GNUC__ or not __STDC__ */
+
+#ifdef __cplusplus
+} /* C++ */
+#endif
+
+#endif /* obstack.h */
diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index ea03b9107e..c3760cfefd 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -6,6 +6,7 @@
#include "diff.h"
#include "diffcore.h"
#include "xdiff-interface.h"
+#include "kwset.h"
struct diffgrep_cb {
regex_t *regexp;
@@ -146,7 +147,7 @@ static void diffcore_pickaxe_grep(struct diff_options *o)
static unsigned int contains(struct diff_filespec *one,
const char *needle, unsigned long len,
- regex_t *regexp)
+ regex_t *regexp, kwset_t kws)
{
unsigned int cnt;
unsigned long sz;
@@ -175,9 +176,12 @@ static unsigned int contains(struct diff_filespec *one,
} else { /* Classic exact string match */
while (sz) {
- const char *found = memmem(data, sz, needle, len);
- if (!found)
+ size_t offset = kwsexec(kws, data, sz, NULL);
+ const char *found;
+ if (offset == -1)
break;
+ else
+ found = data + offset;
sz -= found - data + len;
data = found + len;
cnt++;
@@ -195,6 +199,7 @@ static void diffcore_pickaxe_count(struct diff_options *o)
unsigned long len = strlen(needle);
int i, has_changes;
regex_t regex, *regexp = NULL;
+ kwset_t kws = NULL;
struct diff_queue_struct outq;
DIFF_QUEUE_CLEAR(&outq);
@@ -209,6 +214,10 @@ static void diffcore_pickaxe_count(struct diff_options *o)
die("invalid pickaxe regex: %s", errbuf);
}
regexp = &regex;
+ } else {
+ kws = kwsalloc(NULL);
+ kwsincr(kws, needle, len);
+ kwsprep(kws);
}
if (opts & DIFF_PICKAXE_ALL) {
@@ -219,16 +228,16 @@ static void diffcore_pickaxe_count(struct diff_options *o)
if (!DIFF_FILE_VALID(p->two))
continue; /* ignore unmerged */
/* created */
- if (contains(p->two, needle, len, regexp))
+ if (contains(p->two, needle, len, regexp, kws))
has_changes++;
}
else if (!DIFF_FILE_VALID(p->two)) {
- if (contains(p->one, needle, len, regexp))
+ if (contains(p->one, needle, len, regexp, kws))
has_changes++;
}
else if (!diff_unmodified_pair(p) &&
- contains(p->one, needle, len, regexp) !=
- contains(p->two, needle, len, regexp))
+ contains(p->one, needle, len, regexp, kws) !=
+ contains(p->two, needle, len, regexp, kws))
has_changes++;
}
if (has_changes)
@@ -251,16 +260,17 @@ static void diffcore_pickaxe_count(struct diff_options *o)
if (!DIFF_FILE_VALID(p->two))
; /* ignore unmerged */
/* created */
- else if (contains(p->two, needle, len, regexp))
+ else if (contains(p->two, needle, len, regexp,
+ kws))
has_changes = 1;
}
else if (!DIFF_FILE_VALID(p->two)) {
- if (contains(p->one, needle, len, regexp))
+ if (contains(p->one, needle, len, regexp, kws))
has_changes = 1;
}
else if (!diff_unmodified_pair(p) &&
- contains(p->one, needle, len, regexp) !=
- contains(p->two, needle, len, regexp))
+ contains(p->one, needle, len, regexp, kws) !=
+ contains(p->two, needle, len, regexp, kws))
has_changes = 1;
if (has_changes)
@@ -271,6 +281,8 @@ static void diffcore_pickaxe_count(struct diff_options *o)
if (opts & DIFF_PICKAXE_REGEX)
regfree(&regex);
+ else
+ kwsfree(kws);
free(q->queue);
*q = outq;
diff --git a/grep.c b/grep.c
index 2dd2a25ad7..b29d09c7f6 100644
--- a/grep.c
+++ b/grep.c
@@ -137,16 +137,50 @@ static void free_pcre_regexp(struct grep_pat *p)
}
#endif /* !USE_LIBPCRE */
+static int is_fixed(const char *s, size_t len)
+{
+ size_t i;
+
+ /* regcomp cannot accept patterns with NULs so we
+ * consider any pattern containing a NUL fixed.
+ */
+ if (memchr(s, 0, len))
+ return 1;
+
+ for (i = 0; i < len; i++) {
+ if (is_regex_special(s[i]))
+ return 0;
+ }
+
+ return 1;
+}
+
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
int err;
p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;
- p->fixed = opt->fixed;
- if (p->fixed)
+ if (opt->fixed || is_fixed(p->pattern, p->patternlen))
+ p->fixed = 1;
+ else
+ p->fixed = 0;
+
+ if (p->fixed) {
+ if (opt->regflags & REG_ICASE || p->ignore_case) {
+ static char trans[256];
+ int i;
+ for (i = 0; i < 256; i++)
+ trans[i] = tolower(i);
+ p->kws = kwsalloc(trans);
+ } else {
+ p->kws = kwsalloc(NULL);
+ }
+ kwsincr(p->kws, p->pattern, p->patternlen);
+ kwsprep(p->kws);
return;
+ }
if (opt->pcre) {
compile_pcre_regexp(p, opt);
@@ -395,7 +429,9 @@ void free_grep_patterns(struct grep_opt *opt)
case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
case GREP_PATTERN_BODY:
- if (p->pcre_regexp)
+ if (p->kws)
+ kwsfree(p->kws);
+ else if (p->pcre_regexp)
free_pcre_regexp(p);
else
regfree(&p->regexp);
@@ -455,26 +491,14 @@ static void show_name(struct grep_opt *opt, const char *name)
static int fixmatch(struct grep_pat *p, char *line, char *eol,
regmatch_t *match)
{
- char *hit;
-
- if (p->ignore_case) {
- char *s = line;
- do {
- hit = strcasestr(s, p->pattern);
- if (hit)
- break;
- s += strlen(s) + 1;
- } while (s < eol);
- } else
- hit = memmem(line, eol - line, p->pattern, p->patternlen);
-
- if (!hit) {
+ struct kwsmatch kwsm;
+ size_t offset = kwsexec(p->kws, line, eol - line, &kwsm);
+ if (offset == -1) {
match->rm_so = match->rm_eo = -1;
return REG_NOMATCH;
- }
- else {
- match->rm_so = hit - line;
- match->rm_eo = match->rm_so + p->patternlen;
+ } else {
+ match->rm_so = offset;
+ match->rm_eo = match->rm_so + kwsm.size[0];
return 0;
}
}
diff --git a/grep.h b/grep.h
index ae50c45a4d..a65280026d 100644
--- a/grep.h
+++ b/grep.h
@@ -7,6 +7,7 @@
typedef int pcre;
typedef int pcre_extra;
#endif
+#include "kwset.h"
enum grep_pat_token {
GREP_PATTERN,
@@ -41,6 +42,7 @@ struct grep_pat {
regex_t regexp;
pcre *pcre_regexp;
pcre_extra *pcre_extra_info;
+ kwset_t kws;
unsigned fixed:1;
unsigned ignore_case:1;
unsigned word_regexp:1;
diff --git a/kwset.c b/kwset.c
new file mode 100644
index 0000000000..956ae72950
--- /dev/null
+++ b/kwset.c
@@ -0,0 +1,771 @@
+/*
+ * This file has been copied from commit e7ac713d^ in the GNU grep git
+ * repository. A few small changes have been made to adapt the code to
+ * Git.
+ */
+
+/* kwset.c - search for any of a set of keywords.
+ Copyright 1989, 1998, 2000, 2005 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* Written August 1989 by Mike Haertel.
+ The author may be reached (Email) at the address mike@ai.mit.edu,
+ or (US mail) as Mike Haertel c/o Free Software Foundation. */
+
+/* The algorithm implemented by these routines bears a startling resemblence
+ to one discovered by Beate Commentz-Walter, although it is not identical.
+ See "A String Matching Algorithm Fast on the Average," Technical Report,
+ IBM-Germany, Scientific Center Heidelberg, Tiergartenstrasse 15, D-6900
+ Heidelberg, Germany. See also Aho, A.V., and M. Corasick, "Efficient
+ String Matching: An Aid to Bibliographic Search," CACM June 1975,
+ Vol. 18, No. 6, which describes the failure function used below. */
+
+#include "cache.h"
+
+#include "kwset.h"
+#include "compat/obstack.h"
+
+#define NCHAR (UCHAR_MAX + 1)
+#define obstack_chunk_alloc xmalloc
+#define obstack_chunk_free free
+
+#define U(c) ((unsigned char) (c))
+
+/* Balanced tree of edges and labels leaving a given trie node. */
+struct tree
+{
+ struct tree *llink; /* Left link; MUST be first field. */
+ struct tree *rlink; /* Right link (to larger labels). */
+ struct trie *trie; /* Trie node pointed to by this edge. */
+ unsigned char label; /* Label on this edge. */
+ char balance; /* Difference in depths of subtrees. */
+};
+
+/* Node of a trie representing a set of reversed keywords. */
+struct trie
+{
+ unsigned int accepting; /* Word index of accepted word, or zero. */
+ struct tree *links; /* Tree of edges leaving this node. */
+ struct trie *parent; /* Parent of this node. */
+ struct trie *next; /* List of all trie nodes in level order. */
+ struct trie *fail; /* Aho-Corasick failure function. */
+ int depth; /* Depth of this node from the root. */
+ int shift; /* Shift function for search failures. */
+ int maxshift; /* Max shift of self and descendents. */
+};
+
+/* Structure returned opaquely to the caller, containing everything. */
+struct kwset
+{
+ struct obstack obstack; /* Obstack for node allocation. */
+ int words; /* Number of words in the trie. */
+ struct trie *trie; /* The trie itself. */
+ int mind; /* Minimum depth of an accepting node. */
+ int maxd; /* Maximum depth of any node. */
+ unsigned char delta[NCHAR]; /* Delta table for rapid search. */
+ struct trie *next[NCHAR]; /* Table of children of the root. */
+ char *target; /* Target string if there's only one. */
+ int mind2; /* Used in Boyer-Moore search for one string. */
+ char const *trans; /* Character translation table. */
+};
+
+/* Allocate and initialize a keyword set object, returning an opaque
+ pointer to it. Return NULL if memory is not available. */
+kwset_t
+kwsalloc (char const *trans)
+{
+ struct kwset *kwset;
+
+ kwset = (struct kwset *) xmalloc(sizeof (struct kwset));
+
+ obstack_init(&kwset->obstack);
+ kwset->words = 0;
+ kwset->trie
+ = (struct trie *) obstack_alloc(&kwset->obstack, sizeof (struct trie));
+ if (!kwset->trie)
+ {
+ kwsfree((kwset_t) kwset);
+ return NULL;
+ }
+ kwset->trie->accepting = 0;
+ kwset->trie->links = NULL;
+ kwset->trie->parent = NULL;
+ kwset->trie->next = NULL;
+ kwset->trie->fail = NULL;
+ kwset->trie->depth = 0;
+ kwset->trie->shift = 0;
+ kwset->mind = INT_MAX;
+ kwset->maxd = -1;
+ kwset->target = NULL;
+ kwset->trans = trans;
+
+ return (kwset_t) kwset;
+}
+
+/* This upper bound is valid for CHAR_BIT >= 4 and
+ exact for CHAR_BIT in { 4..11, 13, 15, 17, 19 }. */
+#define DEPTH_SIZE (CHAR_BIT + CHAR_BIT/2)
+
+/* Add the given string to the contents of the keyword set. Return NULL
+ for success, an error message otherwise. */
+const char *
+kwsincr (kwset_t kws, char const *text, size_t len)
+{
+ struct kwset *kwset;
+ register struct trie *trie;
+ register unsigned char label;
+ register struct tree *link;
+ register int depth;
+ struct tree *links[DEPTH_SIZE];
+ enum { L, R } dirs[DEPTH_SIZE];
+ struct tree *t, *r, *l, *rl, *lr;
+
+ kwset = (struct kwset *) kws;
+ trie = kwset->trie;
+ text += len;
+
+ /* Descend the trie (built of reversed keywords) character-by-character,
+ installing new nodes when necessary. */
+ while (len--)
+ {
+ label = kwset->trans ? kwset->trans[U(*--text)] : *--text;
+
+ /* Descend the tree of outgoing links for this trie node,
+ looking for the current character and keeping track
+ of the path followed. */
+ link = trie->links;
+ links[0] = (struct tree *) &trie->links;
+ dirs[0] = L;
+ depth = 1;
+
+ while (link && label != link->label)
+ {
+ links[depth] = link;
+ if (label < link->label)
+ dirs[depth++] = L, link = link->llink;
+ else
+ dirs[depth++] = R, link = link->rlink;
+ }
+
+ /* The current character doesn't have an outgoing link at
+ this trie node, so build a new trie node and install
+ a link in the current trie node's tree. */
+ if (!link)
+ {
+ link = (struct tree *) obstack_alloc(&kwset->obstack,
+ sizeof (struct tree));
+ if (!link)
+ return "memory exhausted";
+ link->llink = NULL;
+ link->rlink = NULL;
+ link->trie = (struct trie *) obstack_alloc(&kwset->obstack,
+ sizeof (struct trie));
+ if (!link->trie)
+ {
+ obstack_free(&kwset->obstack, link);
+ return "memory exhausted";
+ }
+ link->trie->accepting = 0;
+ link->trie->links = NULL;
+ link->trie->parent = trie;
+ link->trie->next = NULL;
+ link->trie->fail = NULL;
+ link->trie->depth = trie->depth + 1;
+ link->trie->shift = 0;
+ link->label = label;
+ link->balance = 0;
+
+ /* Install the new tree node in its parent. */
+ if (dirs[--depth] == L)
+ links[depth]->llink = link;
+ else
+ links[depth]->rlink = link;
+
+ /* Back up the tree fixing the balance flags. */
+ while (depth && !links[depth]->balance)
+ {
+ if (dirs[depth] == L)
+ --links[depth]->balance;
+ else
+ ++links[depth]->balance;
+ --depth;
+ }
+
+ /* Rebalance the tree by pointer rotations if necessary. */
+ if (depth && ((dirs[depth] == L && --links[depth]->balance)
+ || (dirs[depth] == R && ++links[depth]->balance)))
+ {
+ switch (links[depth]->balance)
+ {
+ case (char) -2:
+ switch (dirs[depth + 1])
+ {
+ case L:
+ r = links[depth], t = r->llink, rl = t->rlink;
+ t->rlink = r, r->llink = rl;
+ t->balance = r->balance = 0;
+ break;
+ case R:
+ r = links[depth], l = r->llink, t = l->rlink;
+ rl = t->rlink, lr = t->llink;
+ t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
+ l->balance = t->balance != 1 ? 0 : -1;
+ r->balance = t->balance != (char) -1 ? 0 : 1;
+ t->balance = 0;
+ break;
+ default:
+ abort ();
+ }
+ break;
+ case 2:
+ switch (dirs[depth + 1])
+ {
+ case R:
+ l = links[depth], t = l->rlink, lr = t->llink;
+ t->llink = l, l->rlink = lr;
+ t->balance = l->balance = 0;
+ break;
+ case L:
+ l = links[depth], r = l->rlink, t = r->llink;
+ lr = t->llink, rl = t->rlink;
+ t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
+ l->balance = t->balance != 1 ? 0 : -1;
+ r->balance = t->balance != (char) -1 ? 0 : 1;
+ t->balance = 0;
+ break;
+ default:
+ abort ();
+ }
+ break;
+ default:
+ abort ();
+ }
+
+ if (dirs[depth - 1] == L)
+ links[depth - 1]->llink = t;
+ else
+ links[depth - 1]->rlink = t;
+ }
+ }
+
+ trie = link->trie;
+ }
+
+ /* Mark the node we finally reached as accepting, encoding the
+ index number of this word in the keyword set so far. */
+ if (!trie->accepting)
+ trie->accepting = 1 + 2 * kwset->words;
+ ++kwset->words;
+
+ /* Keep track of the longest and shortest string of the keyword set. */
+ if (trie->depth < kwset->mind)
+ kwset->mind = trie->depth;
+ if (trie->depth > kwset->maxd)
+ kwset->maxd = trie->depth;
+
+ return NULL;
+}
+
+/* Enqueue the trie nodes referenced from the given tree in the
+ given queue. */
+static void
+enqueue (struct tree *tree, struct trie **last)
+{
+ if (!tree)
+ return;
+ enqueue(tree->llink, last);
+ enqueue(tree->rlink, last);
+ (*last) = (*last)->next = tree->trie;
+}
+
+/* Compute the Aho-Corasick failure function for the trie nodes referenced
+ from the given tree, given the failure function for their parent as
+ well as a last resort failure node. */
+static void
+treefails (register struct tree const *tree, struct trie const *fail,
+ struct trie *recourse)
+{
+ register struct tree *link;
+
+ if (!tree)
+ return;
+
+ treefails(tree->llink, fail, recourse);
+ treefails(tree->rlink, fail, recourse);
+
+ /* Find, in the chain of fails going back to the root, the first
+ node that has a descendent on the current label. */
+ while (fail)
+ {
+ link = fail->links;
+ while (link && tree->label != link->label)
+ if (tree->label < link->label)
+ link = link->llink;
+ else
+ link = link->rlink;
+ if (link)
+ {
+ tree->trie->fail = link->trie;
+ return;
+ }
+ fail = fail->fail;
+ }
+
+ tree->trie->fail = recourse;
+}
+
+/* Set delta entries for the links of the given tree such that
+ the preexisting delta value is larger than the current depth. */
+static void
+treedelta (register struct tree const *tree,
+ register unsigned int depth,
+ unsigned char delta[])
+{
+ if (!tree)
+ return;
+ treedelta(tree->llink, depth, delta);
+ treedelta(tree->rlink, depth, delta);
+ if (depth < delta[tree->label])
+ delta[tree->label] = depth;
+}
+
+/* Return true if A has every label in B. */
+static int
+hasevery (register struct tree const *a, register struct tree const *b)
+{
+ if (!b)
+ return 1;
+ if (!hasevery(a, b->llink))
+ return 0;
+ if (!hasevery(a, b->rlink))
+ return 0;
+ while (a && b->label != a->label)
+ if (b->label < a->label)
+ a = a->llink;
+ else
+ a = a->rlink;
+ return !!a;
+}
+
+/* Compute a vector, indexed by character code, of the trie nodes
+ referenced from the given tree. */
+static void
+treenext (struct tree const *tree, struct trie *next[])
+{
+ if (!tree)
+ return;
+ treenext(tree->llink, next);
+ treenext(tree->rlink, next);
+ next[tree->label] = tree->trie;
+}
+
+/* Compute the shift for each trie node, as well as the delta
+ table and next cache for the given keyword set. */
+const char *
+kwsprep (kwset_t kws)
+{
+ register struct kwset *kwset;
+ register int i;
+ register struct trie *curr;
+ register char const *trans;
+ unsigned char delta[NCHAR];
+
+ kwset = (struct kwset *) kws;
+
+ /* Initial values for the delta table; will be changed later. The
+ delta entry for a given character is the smallest depth of any
+ node at which an outgoing edge is labeled by that character. */
+ memset(delta, kwset->mind < UCHAR_MAX ? kwset->mind : UCHAR_MAX, NCHAR);
+
+ /* Check if we can use the simple boyer-moore algorithm, instead
+ of the hairy commentz-walter algorithm. */
+ if (kwset->words == 1 && kwset->trans == NULL)
+ {
+ char c;
+
+ /* Looking for just one string. Extract it from the trie. */
+ kwset->target = obstack_alloc(&kwset->obstack, kwset->mind);
+ if (!kwset->target)
+ return "memory exhausted";
+ for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i)
+ {
+ kwset->target[i] = curr->links->label;
+ curr = curr->links->trie;
+ }
+ /* Build the Boyer Moore delta. Boy that's easy compared to CW. */
+ for (i = 0; i < kwset->mind; ++i)
+ delta[U(kwset->target[i])] = kwset->mind - (i + 1);
+ /* Find the minimal delta2 shift that we might make after
+ a backwards match has failed. */
+ c = kwset->target[kwset->mind - 1];
+ for (i = kwset->mind - 2; i >= 0; --i)
+ if (kwset->target[i] == c)
+ break;
+ kwset->mind2 = kwset->mind - (i + 1);
+ }
+ else
+ {
+ register struct trie *fail;
+ struct trie *last, *next[NCHAR];
+
+ /* Traverse the nodes of the trie in level order, simultaneously
+ computing the delta table, failure function, and shift function. */
+ for (curr = last = kwset->trie; curr; curr = curr->next)
+ {
+ /* Enqueue the immediate descendents in the level order queue. */
+ enqueue(curr->links, &last);
+
+ curr->shift = kwset->mind;
+ curr->maxshift = kwset->mind;
+
+ /* Update the delta table for the descendents of this node. */
+ treedelta(curr->links, curr->depth, delta);
+
+ /* Compute the failure function for the decendents of this node. */
+ treefails(curr->links, curr->fail, kwset->trie);
+
+ /* Update the shifts at each node in the current node's chain
+ of fails back to the root. */
+ for (fail = curr->fail; fail; fail = fail->fail)
+ {
+ /* If the current node has some outgoing edge that the fail
+ doesn't, then the shift at the fail should be no larger
+ than the difference of their depths. */
+ if (!hasevery(fail->links, curr->links))
+ if (curr->depth - fail->depth < fail->shift)
+ fail->shift = curr->depth - fail->depth;
+
+ /* If the current node is accepting then the shift at the
+ fail and its descendents should be no larger than the
+ difference of their depths. */
+ if (curr->accepting && fail->maxshift > curr->depth - fail->depth)
+ fail->maxshift = curr->depth - fail->depth;
+ }
+ }
+
+ /* Traverse the trie in level order again, fixing up all nodes whose
+ shift exceeds their inherited maxshift. */
+ for (curr = kwset->trie->next; curr; curr = curr->next)
+ {
+ if (curr->maxshift > curr->parent->maxshift)
+ curr->maxshift = curr->parent->maxshift;
+ if (curr->shift > curr->maxshift)
+ curr->shift = curr->maxshift;
+ }
+
+ /* Create a vector, indexed by character code, of the outgoing links
+ from the root node. */
+ for (i = 0; i < NCHAR; ++i)
+ next[i] = NULL;
+ treenext(kwset->trie->links, next);
+
+ if ((trans = kwset->trans) != NULL)
+ for (i = 0; i < NCHAR; ++i)
+ kwset->next[i] = next[U(trans[i])];
+ else
+ memcpy(kwset->next, next, NCHAR * sizeof(struct trie *));
+ }
+
+ /* Fix things up for any translation table. */
+ if ((trans = kwset->trans) != NULL)
+ for (i = 0; i < NCHAR; ++i)
+ kwset->delta[i] = delta[U(trans[i])];
+ else
+ memcpy(kwset->delta, delta, NCHAR);
+
+ return NULL;
+}
+
+/* Fast boyer-moore search. */
+static size_t
+bmexec (kwset_t kws, char const *text, size_t size)
+{
+ struct kwset const *kwset;
+ register unsigned char const *d1;
+ register char const *ep, *sp, *tp;
+ register int d, gc, i, len, md2;
+
+ kwset = (struct kwset const *) kws;
+ len = kwset->mind;
+
+ if (len == 0)
+ return 0;
+ if (len > size)
+ return -1;
+ if (len == 1)
+ {
+ tp = memchr (text, kwset->target[0], size);
+ return tp ? tp - text : -1;
+ }
+
+ d1 = kwset->delta;
+ sp = kwset->target + len;
+ gc = U(sp[-2]);
+ md2 = kwset->mind2;
+ tp = text + len;
+
+ /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
+ if (size > 12 * len)
+ /* 11 is not a bug, the initial offset happens only once. */
+ for (ep = text + size - 11 * len;;)
+ {
+ while (tp <= ep)
+ {
+ d = d1[U(tp[-1])], tp += d;
+ d = d1[U(tp[-1])], tp += d;
+ if (d == 0)
+ goto found;
+ d = d1[U(tp[-1])], tp += d;
+ d = d1[U(tp[-1])], tp += d;
+ d = d1[U(tp[-1])], tp += d;
+ if (d == 0)
+ goto found;
+ d = d1[U(tp[-1])], tp += d;
+ d = d1[U(tp[-1])], tp += d;
+ d = d1[U(tp[-1])], tp += d;
+ if (d == 0)
+ goto found;
+ d = d1[U(tp[-1])], tp += d;
+ d = d1[U(tp[-1])], tp += d;
+ }
+ break;
+ found:
+ if (U(tp[-2]) == gc)
+ {
+ for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
+ ;
+ if (i > len)
+ return tp - len - text;
+ }
+ tp += md2;
+ }
+
+ /* Now we have only a few characters left to search. We
+ carefully avoid ever producing an out-of-bounds pointer. */
+ ep = text + size;
+ d = d1[U(tp[-1])];
+ while (d <= ep - tp)
+ {
+ d = d1[U((tp += d)[-1])];
+ if (d != 0)
+ continue;
+ if (U(tp[-2]) == gc)
+ {
+ for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i)
+ ;
+ if (i > len)
+ return tp - len - text;
+ }
+ d = md2;
+ }
+
+ return -1;
+}
+
+/* Hairy multiple string search. */
+static size_t
+cwexec (kwset_t kws, char const *text, size_t len, struct kwsmatch *kwsmatch)
+{
+ struct kwset const *kwset;
+ struct trie * const *next;
+ struct trie const *trie;
+ struct trie const *accept;
+ char const *beg, *lim, *mch, *lmch;
+ register unsigned char c;
+ register unsigned char const *delta;
+ register int d;
+ register char const *end, *qlim;
+ register struct tree const *tree;
+ register char const *trans;
+
+ accept = NULL;
+
+ /* Initialize register copies and look for easy ways out. */
+ kwset = (struct kwset *) kws;
+ if (len < kwset->mind)
+ return -1;
+ next = kwset->next;
+ delta = kwset->delta;
+ trans = kwset->trans;
+ lim = text + len;
+ end = text;
+ if ((d = kwset->mind) != 0)
+ mch = NULL;
+ else
+ {
+ mch = text, accept = kwset->trie;
+ goto match;
+ }
+
+ if (len >= 4 * kwset->mind)
+ qlim = lim - 4 * kwset->mind;
+ else
+ qlim = NULL;
+
+ while (lim - end >= d)
+ {
+ if (qlim && end <= qlim)
+ {
+ end += d - 1;
+ while ((d = delta[c = *end]) && end < qlim)
+ {
+ end += d;
+ end += delta[U(*end)];
+ end += delta[U(*end)];
+ }
+ ++end;
+ }
+ else
+ d = delta[c = (end += d)[-1]];
+ if (d)
+ continue;
+ beg = end - 1;
+ trie = next[c];
+ if (trie->accepting)
+ {
+ mch = beg;
+ accept = trie;
+ }
+ d = trie->shift;
+ while (beg > text)
+ {
+ c = trans ? trans[U(*--beg)] : *--beg;
+ tree = trie->links;
+ while (tree && c != tree->label)
+ if (c < tree->label)
+ tree = tree->llink;
+ else
+ tree = tree->rlink;
+ if (tree)
+ {
+ trie = tree->trie;
+ if (trie->accepting)
+ {
+ mch = beg;
+ accept = trie;
+ }
+ }
+ else
+ break;
+ d = trie->shift;
+ }
+ if (mch)
+ goto match;
+ }
+ return -1;
+
+ match:
+ /* Given a known match, find the longest possible match anchored
+ at or before its starting point. This is nearly a verbatim
+ copy of the preceding main search loops. */
+ if (lim - mch > kwset->maxd)
+ lim = mch + kwset->maxd;
+ lmch = 0;
+ d = 1;
+ while (lim - end >= d)
+ {
+ if ((d = delta[c = (end += d)[-1]]) != 0)
+ continue;
+ beg = end - 1;
+ if (!(trie = next[c]))
+ {
+ d = 1;
+ continue;
+ }
+ if (trie->accepting && beg <= mch)
+ {
+ lmch = beg;
+ accept = trie;
+ }
+ d = trie->shift;
+ while (beg > text)
+ {
+ c = trans ? trans[U(*--beg)] : *--beg;
+ tree = trie->links;
+ while (tree && c != tree->label)
+ if (c < tree->label)
+ tree = tree->llink;
+ else
+ tree = tree->rlink;
+ if (tree)
+ {
+ trie = tree->trie;
+ if (trie->accepting && beg <= mch)
+ {
+ lmch = beg;
+ accept = trie;
+ }
+ }
+ else
+ break;
+ d = trie->shift;
+ }
+ if (lmch)
+ {
+ mch = lmch;
+ goto match;
+ }
+ if (!d)
+ d = 1;
+ }
+
+ if (kwsmatch)
+ {
+ kwsmatch->index = accept->accepting / 2;
+ kwsmatch->offset[0] = mch - text;
+ kwsmatch->size[0] = accept->depth;
+ }
+ return mch - text;
+}
+
+/* Search through the given text for a match of any member of the
+ given keyword set. Return a pointer to the first character of
+ the matching substring, or NULL if no match is found. If FOUNDLEN
+ is non-NULL store in the referenced location the length of the
+ matching substring. Similarly, if FOUNDIDX is non-NULL, store
+ in the referenced location the index number of the particular
+ keyword matched. */
+size_t
+kwsexec (kwset_t kws, char const *text, size_t size,
+ struct kwsmatch *kwsmatch)
+{
+ struct kwset const *kwset = (struct kwset *) kws;
+ if (kwset->words == 1 && kwset->trans == NULL)
+ {
+ size_t ret = bmexec (kws, text, size);
+ if (kwsmatch != NULL && ret != (size_t) -1)
+ {
+ kwsmatch->index = 0;
+ kwsmatch->offset[0] = ret;
+ kwsmatch->size[0] = kwset->mind;
+ }
+ return ret;
+ }
+ else
+ return cwexec(kws, text, size, kwsmatch);
+}
+
+/* Free the components of the given keyword set. */
+void
+kwsfree (kwset_t kws)
+{
+ struct kwset *kwset;
+
+ kwset = (struct kwset *) kws;
+ obstack_free(&kwset->obstack, NULL);
+ free(kws);
+}
diff --git a/kwset.h b/kwset.h
new file mode 100644
index 0000000000..a21b2eadfd
--- /dev/null
+++ b/kwset.h
@@ -0,0 +1,63 @@
+/* This file has been copied from commit e7ac713d^ in the GNU grep git
+ * repository. A few small changes have been made to adapt the code to
+ * Git.
+ */
+
+/* kwset.h - header declaring the keyword set library.
+ Copyright (C) 1989, 1998, 2005 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* Written August 1989 by Mike Haertel.
+ The author may be reached (Email) at the address mike@ai.mit.edu,
+ or (US mail) as Mike Haertel c/o Free Software Foundation. */
+
+struct kwsmatch
+{
+ int index; /* Index number of matching keyword. */
+ size_t offset[1]; /* Offset of each submatch. */
+ size_t size[1]; /* Length of each submatch. */
+};
+
+struct kwset_t;
+typedef struct kwset_t* kwset_t;
+
+/* Return an opaque pointer to a newly allocated keyword set, or NULL
+ if enough memory cannot be obtained. The argument if non-NULL
+ specifies a table of character translations to be applied to all
+ pattern and search text. */
+extern kwset_t kwsalloc(char const *);
+
+/* Incrementally extend the keyword set to include the given string.
+ Return NULL for success, or an error message. Remember an index
+ number for each keyword included in the set. */
+extern const char *kwsincr(kwset_t, char const *, size_t);
+
+/* When the keyword set has been completely built, prepare it for
+ use. Return NULL for success, or an error message. */
+extern const char *kwsprep(kwset_t);
+
+/* Search through the given buffer for a member of the keyword set.
+ Return a pointer to the leftmost longest match found, or NULL if
+ no match is found. If foundlen is non-NULL, store the length of
+ the matching substring in the integer it points to. Similarly,
+ if foundindex is non-NULL, store the index of the particular
+ keyword found therein. */
+extern size_t kwsexec(kwset_t, char const *, size_t, struct kwsmatch *);
+
+/* Deallocate the given keyword set and all its associated storage. */
+extern void kwsfree(kwset_t);
+
diff --git a/t/t7008-grep-binary.sh b/t/t7008-grep-binary.sh
index e058d184d1..917a264eea 100755
--- a/t/t7008-grep-binary.sh
+++ b/t/t7008-grep-binary.sh
@@ -84,7 +84,7 @@ test_expect_success 'git grep -Fi Y<NUL>f a' "
git grep -f f -Fi a
"
-test_expect_failure 'git grep -Fi Y<NUL>x a' "
+test_expect_success 'git grep -Fi Y<NUL>x a' "
printf 'YQx' | q_to_nul >f &&
test_must_fail git grep -f f -Fi a
"
@@ -94,7 +94,7 @@ test_expect_success 'git grep y<NUL>f a' "
git grep -f f a
"
-test_expect_failure 'git grep y<NUL>x a' "
+test_expect_success 'git grep y<NUL>x a' "
printf 'yQx' | q_to_nul >f &&
test_must_fail git grep -f f a
"