summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2016-04-22 04:38:07 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2016-04-22 04:38:07 +0000
commit28ef1abc10cfbc2c3d2747c008eb2300858d0426 (patch)
tree41208fb8f393e6cb6cc8f939623ad47a0db17876 /src
downloadgrep-tarball-master.tar.gz
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am63
-rw-r--r--src/Makefile.in1602
-rw-r--r--src/dfa.c4168
-rw-r--r--src/dfa.h119
-rw-r--r--src/dfasearch.c451
-rw-r--r--src/dosbuf.c222
-rw-r--r--src/egrep.sh2
-rw-r--r--src/grep.c2720
-rw-r--r--src/grep.h34
-rw-r--r--src/kwsearch.c165
-rw-r--r--src/kwset.c868
-rw-r--r--src/kwset.h60
-rw-r--r--src/pcresearch.c389
-rw-r--r--src/search.h82
-rw-r--r--src/searchutils.c127
-rw-r--r--src/system.h110
16 files changed, 11182 insertions, 0 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
new file mode 100644
index 0000000..941384e
--- /dev/null
+++ b/src/Makefile.am
@@ -0,0 +1,63 @@
+## Process this file with automake to create Makefile.in
+# Copyright 1997-1998, 2005-2016 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+LN = ln
+
+AM_CFLAGS = $(WARN_CFLAGS) $(WERROR_CFLAGS) $(PCRE_CFLAGS)
+
+# Tell the linker to omit references to unused shared libraries.
+AM_LDFLAGS = $(IGNORE_UNUSED_LIBRARIES_CFLAGS)
+
+bin_PROGRAMS = grep
+bin_SCRIPTS = egrep fgrep
+grep_SOURCES = grep.c searchutils.c \
+ dfa.c dfasearch.c \
+ kwset.c kwsearch.c \
+ pcresearch.c
+noinst_HEADERS = grep.h dfa.h kwset.h search.h system.h
+
+# Sometimes, the expansion of $(LIBINTL) includes -lc which may
+# include modules defining variables like 'optind', so libgreputils.a
+# must precede $(LIBINTL) in order to ensure we use GNU getopt.
+# But libgreputils.a must also follow $(LIBINTL), since libintl uses
+# replacement functions defined in libgreputils.a.
+LDADD = \
+ ../lib/libgreputils.a $(LIBINTL) ../lib/libgreputils.a $(LIBICONV) \
+ $(LIBTHREAD)
+
+grep_LDADD = $(LDADD) $(PCRE_LIBS)
+localedir = $(datadir)/locale
+AM_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib
+
+EXTRA_DIST = dosbuf.c egrep.sh
+
+egrep fgrep: egrep.sh Makefile
+ $(AM_V_GEN)grep=`echo grep | sed -e '$(transform)'` && \
+ case $@ in egrep) option=-E;; fgrep) option=-F;; esac && \
+ shell_does_substrings='set x/y && d=$${1%/*} && test "$$d" = x' && \
+ if $(SHELL) -c "$$shell_does_substrings" 2>/dev/null; then \
+ edit_substring='s,X,X,'; \
+ else \
+ edit_substring='s,\$${0%/\*},`expr "X$$0" : '\''X\\(.*\\)/'\''`,g'; \
+ fi && \
+ sed -e 's|[@]SHELL@|$(SHELL)|g' \
+ -e "$$edit_substring" \
+ -e "s|[@]grep@|$$grep|g" \
+ -e "s|[@]option@|$$option|g" <$(srcdir)/egrep.sh >$@-t
+ $(AM_V_at)chmod +x $@-t
+ $(AM_V_at)mv $@-t $@
+
+CLEANFILES = egrep fgrep *-t
diff --git a/src/Makefile.in b/src/Makefile.in
new file mode 100644
index 0000000..02cb0b6
--- /dev/null
+++ b/src/Makefile.in
@@ -0,0 +1,1602 @@
+# Makefile.in generated by automake 1.99a from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2015 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 1997-1998, 2005-2016 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+bin_PROGRAMS = grep$(EXEEXT)
+subdir = src
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/00gnulib.m4 \
+ $(top_srcdir)/m4/absolute-header.m4 $(top_srcdir)/m4/alloca.m4 \
+ $(top_srcdir)/m4/btowc.m4 $(top_srcdir)/m4/chdir-long.m4 \
+ $(top_srcdir)/m4/close-stream.m4 $(top_srcdir)/m4/close.m4 \
+ $(top_srcdir)/m4/closedir.m4 $(top_srcdir)/m4/closeout.m4 \
+ $(top_srcdir)/m4/codeset.m4 $(top_srcdir)/m4/configmake.m4 \
+ $(top_srcdir)/m4/ctype.m4 $(top_srcdir)/m4/cycle-check.m4 \
+ $(top_srcdir)/m4/d-ino.m4 $(top_srcdir)/m4/d-type.m4 \
+ $(top_srcdir)/m4/dirent-safer.m4 $(top_srcdir)/m4/dirent_h.m4 \
+ $(top_srcdir)/m4/dirfd.m4 $(top_srcdir)/m4/dirname.m4 \
+ $(top_srcdir)/m4/double-slash-root.m4 $(top_srcdir)/m4/dup.m4 \
+ $(top_srcdir)/m4/dup2.m4 $(top_srcdir)/m4/eealloc.m4 \
+ $(top_srcdir)/m4/environ.m4 $(top_srcdir)/m4/errno_h.m4 \
+ $(top_srcdir)/m4/error.m4 $(top_srcdir)/m4/exponentd.m4 \
+ $(top_srcdir)/m4/extensions.m4 \
+ $(top_srcdir)/m4/extern-inline.m4 $(top_srcdir)/m4/fchdir.m4 \
+ $(top_srcdir)/m4/fcntl-o.m4 $(top_srcdir)/m4/fcntl-safer.m4 \
+ $(top_srcdir)/m4/fcntl.m4 $(top_srcdir)/m4/fcntl_h.m4 \
+ $(top_srcdir)/m4/fdopen.m4 $(top_srcdir)/m4/fdopendir.m4 \
+ $(top_srcdir)/m4/filenamecat.m4 $(top_srcdir)/m4/flexmember.m4 \
+ $(top_srcdir)/m4/float_h.m4 $(top_srcdir)/m4/fnmatch.m4 \
+ $(top_srcdir)/m4/fpending.m4 $(top_srcdir)/m4/fpieee.m4 \
+ $(top_srcdir)/m4/fstat.m4 $(top_srcdir)/m4/fstatat.m4 \
+ $(top_srcdir)/m4/fts.m4 $(top_srcdir)/m4/getcwd.m4 \
+ $(top_srcdir)/m4/getdtablesize.m4 $(top_srcdir)/m4/getopt.m4 \
+ $(top_srcdir)/m4/getpagesize.m4 $(top_srcdir)/m4/gettext.m4 \
+ $(top_srcdir)/m4/gettimeofday.m4 $(top_srcdir)/m4/glibc21.m4 \
+ $(top_srcdir)/m4/gnulib-common.m4 \
+ $(top_srcdir)/m4/gnulib-comp.m4 \
+ $(top_srcdir)/m4/hard-locale.m4 $(top_srcdir)/m4/i-ring.m4 \
+ $(top_srcdir)/m4/iconv.m4 $(top_srcdir)/m4/iconv_h.m4 \
+ $(top_srcdir)/m4/iconv_open.m4 \
+ $(top_srcdir)/m4/include_next.m4 $(top_srcdir)/m4/inline.m4 \
+ $(top_srcdir)/m4/intlmacosx.m4 $(top_srcdir)/m4/intmax_t.m4 \
+ $(top_srcdir)/m4/inttostr.m4 $(top_srcdir)/m4/inttypes-pri.m4 \
+ $(top_srcdir)/m4/inttypes.m4 $(top_srcdir)/m4/inttypes_h.m4 \
+ $(top_srcdir)/m4/isatty.m4 $(top_srcdir)/m4/isblank.m4 \
+ $(top_srcdir)/m4/iswblank.m4 $(top_srcdir)/m4/iswctype.m4 \
+ $(top_srcdir)/m4/langinfo_h.m4 $(top_srcdir)/m4/largefile.m4 \
+ $(top_srcdir)/m4/lcmessage.m4 $(top_srcdir)/m4/lib-ld.m4 \
+ $(top_srcdir)/m4/lib-link.m4 $(top_srcdir)/m4/lib-prefix.m4 \
+ $(top_srcdir)/m4/libunistring-base.m4 \
+ $(top_srcdir)/m4/localcharset.m4 $(top_srcdir)/m4/locale-fr.m4 \
+ $(top_srcdir)/m4/locale-ja.m4 $(top_srcdir)/m4/locale-tr.m4 \
+ $(top_srcdir)/m4/locale-zh.m4 $(top_srcdir)/m4/locale_h.m4 \
+ $(top_srcdir)/m4/localeconv.m4 $(top_srcdir)/m4/localename.m4 \
+ $(top_srcdir)/m4/lock.m4 $(top_srcdir)/m4/longlong.m4 \
+ $(top_srcdir)/m4/lseek.m4 $(top_srcdir)/m4/lstat.m4 \
+ $(top_srcdir)/m4/malloc.m4 $(top_srcdir)/m4/malloca.m4 \
+ $(top_srcdir)/m4/manywarnings.m4 $(top_srcdir)/m4/mbchar.m4 \
+ $(top_srcdir)/m4/mbiter.m4 $(top_srcdir)/m4/mbrlen.m4 \
+ $(top_srcdir)/m4/mbrtowc.m4 $(top_srcdir)/m4/mbsinit.m4 \
+ $(top_srcdir)/m4/mbslen.m4 $(top_srcdir)/m4/mbsrtowcs.m4 \
+ $(top_srcdir)/m4/mbstate_t.m4 $(top_srcdir)/m4/mbtowc.m4 \
+ $(top_srcdir)/m4/memchr.m4 $(top_srcdir)/m4/mempcpy.m4 \
+ $(top_srcdir)/m4/memrchr.m4 $(top_srcdir)/m4/minmax.m4 \
+ $(top_srcdir)/m4/mmap-anon.m4 $(top_srcdir)/m4/mode_t.m4 \
+ $(top_srcdir)/m4/msvc-inval.m4 \
+ $(top_srcdir)/m4/msvc-nothrow.m4 $(top_srcdir)/m4/multiarch.m4 \
+ $(top_srcdir)/m4/nl_langinfo.m4 $(top_srcdir)/m4/nls.m4 \
+ $(top_srcdir)/m4/nocrash.m4 $(top_srcdir)/m4/obstack.m4 \
+ $(top_srcdir)/m4/off_t.m4 $(top_srcdir)/m4/onceonly.m4 \
+ $(top_srcdir)/m4/open.m4 $(top_srcdir)/m4/openat.m4 \
+ $(top_srcdir)/m4/opendir.m4 $(top_srcdir)/m4/pathmax.m4 \
+ $(top_srcdir)/m4/pcre.m4 $(top_srcdir)/m4/perl.m4 \
+ $(top_srcdir)/m4/pipe.m4 $(top_srcdir)/m4/pkg.m4 \
+ $(top_srcdir)/m4/po.m4 $(top_srcdir)/m4/printf.m4 \
+ $(top_srcdir)/m4/progtest.m4 $(top_srcdir)/m4/putenv.m4 \
+ $(top_srcdir)/m4/quote.m4 $(top_srcdir)/m4/quotearg.m4 \
+ $(top_srcdir)/m4/read.m4 $(top_srcdir)/m4/readdir.m4 \
+ $(top_srcdir)/m4/realloc.m4 $(top_srcdir)/m4/regex.m4 \
+ $(top_srcdir)/m4/safe-read.m4 $(top_srcdir)/m4/save-cwd.m4 \
+ $(top_srcdir)/m4/setenv.m4 $(top_srcdir)/m4/setlocale.m4 \
+ $(top_srcdir)/m4/size_max.m4 $(top_srcdir)/m4/snprintf.m4 \
+ $(top_srcdir)/m4/ssize_t.m4 $(top_srcdir)/m4/stat.m4 \
+ $(top_srcdir)/m4/stdalign.m4 $(top_srcdir)/m4/stdarg.m4 \
+ $(top_srcdir)/m4/stdbool.m4 $(top_srcdir)/m4/stddef_h.m4 \
+ $(top_srcdir)/m4/stdint.m4 $(top_srcdir)/m4/stdint_h.m4 \
+ $(top_srcdir)/m4/stdio_h.m4 $(top_srcdir)/m4/stdlib_h.m4 \
+ $(top_srcdir)/m4/stpcpy.m4 $(top_srcdir)/m4/strdup.m4 \
+ $(top_srcdir)/m4/strerror.m4 $(top_srcdir)/m4/string_h.m4 \
+ $(top_srcdir)/m4/strnlen.m4 $(top_srcdir)/m4/strstr.m4 \
+ $(top_srcdir)/m4/strtoimax.m4 $(top_srcdir)/m4/strtoll.m4 \
+ $(top_srcdir)/m4/strtoull.m4 $(top_srcdir)/m4/strtoumax.m4 \
+ $(top_srcdir)/m4/symlink.m4 $(top_srcdir)/m4/sys_socket_h.m4 \
+ $(top_srcdir)/m4/sys_stat_h.m4 $(top_srcdir)/m4/sys_time_h.m4 \
+ $(top_srcdir)/m4/sys_types_h.m4 $(top_srcdir)/m4/threadlib.m4 \
+ $(top_srcdir)/m4/time_h.m4 $(top_srcdir)/m4/unistd-safer.m4 \
+ $(top_srcdir)/m4/unistd_h.m4 $(top_srcdir)/m4/unlocked-io.m4 \
+ $(top_srcdir)/m4/vasnprintf.m4 $(top_srcdir)/m4/version-etc.m4 \
+ $(top_srcdir)/m4/warn-on-use.m4 $(top_srcdir)/m4/warnings.m4 \
+ $(top_srcdir)/m4/wchar_h.m4 $(top_srcdir)/m4/wchar_t.m4 \
+ $(top_srcdir)/m4/wcrtomb.m4 $(top_srcdir)/m4/wctob.m4 \
+ $(top_srcdir)/m4/wctomb.m4 $(top_srcdir)/m4/wctype_h.m4 \
+ $(top_srcdir)/m4/wcwidth.m4 $(top_srcdir)/m4/wint_t.m4 \
+ $(top_srcdir)/m4/xalloc.m4 $(top_srcdir)/m4/xsize.m4 \
+ $(top_srcdir)/m4/xstrtol.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \
+ $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)"
+PROGRAMS = $(bin_PROGRAMS)
+am_grep_OBJECTS = grep.$(OBJEXT) searchutils.$(OBJEXT) dfa.$(OBJEXT) \
+ dfasearch.$(OBJEXT) kwset.$(OBJEXT) kwsearch.$(OBJEXT) \
+ pcresearch.$(OBJEXT)
+grep_OBJECTS = $(am_grep_OBJECTS)
+am__DEPENDENCIES_1 =
+am__DEPENDENCIES_2 = ../lib/libgreputils.a $(am__DEPENDENCIES_1) \
+ ../lib/libgreputils.a $(am__DEPENDENCIES_1) \
+ $(am__DEPENDENCIES_1)
+grep_DEPENDENCIES = $(am__DEPENDENCIES_2) $(am__DEPENDENCIES_1)
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+SCRIPTS = $(bin_SCRIPTS)
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp
+am__maybe_remake_depfiles = depfiles
+am__depfiles_remade = ./$(DEPDIR)/dfa.Po ./$(DEPDIR)/dfasearch.Po \
+ ./$(DEPDIR)/grep.Po ./$(DEPDIR)/kwsearch.Po \
+ ./$(DEPDIR)/kwset.Po ./$(DEPDIR)/pcresearch.Po \
+ ./$(DEPDIR)/searchutils.Po
+am__mv = mv -f
+am__set_depbase = depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.[^.]*$$||'`
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo " CC " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo " CCLD " $@;
+am__v_CCLD_1 =
+SOURCES = $(grep_SOURCES)
+DIST_SOURCES = $(grep_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+HEADERS = $(noinst_HEADERS)
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in \
+ $(top_srcdir)/build-aux/depcomp
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+pkglibexecdir = @pkglibexecdir@
+ACLOCAL = @ACLOCAL@
+ALLOCA = @ALLOCA@
+ALLOCA_H = @ALLOCA_H@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+APPLE_UNIVERSAL_BUILD = @APPLE_UNIVERSAL_BUILD@
+AR = @AR@
+ARFLAGS = @ARFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BITSIZEOF_PTRDIFF_T = @BITSIZEOF_PTRDIFF_T@
+BITSIZEOF_SIG_ATOMIC_T = @BITSIZEOF_SIG_ATOMIC_T@
+BITSIZEOF_SIZE_T = @BITSIZEOF_SIZE_T@
+BITSIZEOF_WCHAR_T = @BITSIZEOF_WCHAR_T@
+BITSIZEOF_WINT_T = @BITSIZEOF_WINT_T@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+COLORIZE_SOURCE = @COLORIZE_SOURCE@
+CONFIG_INCLUDE = @CONFIG_INCLUDE@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EMULTIHOP_HIDDEN = @EMULTIHOP_HIDDEN@
+EMULTIHOP_VALUE = @EMULTIHOP_VALUE@
+ENOLINK_HIDDEN = @ENOLINK_HIDDEN@
+ENOLINK_VALUE = @ENOLINK_VALUE@
+EOVERFLOW_HIDDEN = @EOVERFLOW_HIDDEN@
+EOVERFLOW_VALUE = @EOVERFLOW_VALUE@
+ERRNO_H = @ERRNO_H@
+EXEEXT = @EXEEXT@
+FLOAT_H = @FLOAT_H@
+FNMATCH_H = @FNMATCH_H@
+GETOPT_H = @GETOPT_H@
+GETTEXT_MACRO_VERSION = @GETTEXT_MACRO_VERSION@
+GLIBC21 = @GLIBC21@
+GMSGFMT = @GMSGFMT@
+GMSGFMT_015 = @GMSGFMT_015@
+GNULIB_ALPHASORT = @GNULIB_ALPHASORT@
+GNULIB_ATOLL = @GNULIB_ATOLL@
+GNULIB_BTOWC = @GNULIB_BTOWC@
+GNULIB_CALLOC_POSIX = @GNULIB_CALLOC_POSIX@
+GNULIB_CANONICALIZE_FILE_NAME = @GNULIB_CANONICALIZE_FILE_NAME@
+GNULIB_CHDIR = @GNULIB_CHDIR@
+GNULIB_CHOWN = @GNULIB_CHOWN@
+GNULIB_CLOSE = @GNULIB_CLOSE@
+GNULIB_CLOSEDIR = @GNULIB_CLOSEDIR@
+GNULIB_DIRFD = @GNULIB_DIRFD@
+GNULIB_DPRINTF = @GNULIB_DPRINTF@
+GNULIB_DUP = @GNULIB_DUP@
+GNULIB_DUP2 = @GNULIB_DUP2@
+GNULIB_DUP3 = @GNULIB_DUP3@
+GNULIB_DUPLOCALE = @GNULIB_DUPLOCALE@
+GNULIB_ENVIRON = @GNULIB_ENVIRON@
+GNULIB_EUIDACCESS = @GNULIB_EUIDACCESS@
+GNULIB_FACCESSAT = @GNULIB_FACCESSAT@
+GNULIB_FCHDIR = @GNULIB_FCHDIR@
+GNULIB_FCHMODAT = @GNULIB_FCHMODAT@
+GNULIB_FCHOWNAT = @GNULIB_FCHOWNAT@
+GNULIB_FCLOSE = @GNULIB_FCLOSE@
+GNULIB_FCNTL = @GNULIB_FCNTL@
+GNULIB_FDATASYNC = @GNULIB_FDATASYNC@
+GNULIB_FDOPEN = @GNULIB_FDOPEN@
+GNULIB_FDOPENDIR = @GNULIB_FDOPENDIR@
+GNULIB_FFLUSH = @GNULIB_FFLUSH@
+GNULIB_FFSL = @GNULIB_FFSL@
+GNULIB_FFSLL = @GNULIB_FFSLL@
+GNULIB_FGETC = @GNULIB_FGETC@
+GNULIB_FGETS = @GNULIB_FGETS@
+GNULIB_FOPEN = @GNULIB_FOPEN@
+GNULIB_FPRINTF = @GNULIB_FPRINTF@
+GNULIB_FPRINTF_POSIX = @GNULIB_FPRINTF_POSIX@
+GNULIB_FPURGE = @GNULIB_FPURGE@
+GNULIB_FPUTC = @GNULIB_FPUTC@
+GNULIB_FPUTS = @GNULIB_FPUTS@
+GNULIB_FREAD = @GNULIB_FREAD@
+GNULIB_FREOPEN = @GNULIB_FREOPEN@
+GNULIB_FSCANF = @GNULIB_FSCANF@
+GNULIB_FSEEK = @GNULIB_FSEEK@
+GNULIB_FSEEKO = @GNULIB_FSEEKO@
+GNULIB_FSTAT = @GNULIB_FSTAT@
+GNULIB_FSTATAT = @GNULIB_FSTATAT@
+GNULIB_FSYNC = @GNULIB_FSYNC@
+GNULIB_FTELL = @GNULIB_FTELL@
+GNULIB_FTELLO = @GNULIB_FTELLO@
+GNULIB_FTRUNCATE = @GNULIB_FTRUNCATE@
+GNULIB_FUTIMENS = @GNULIB_FUTIMENS@
+GNULIB_FWRITE = @GNULIB_FWRITE@
+GNULIB_GETC = @GNULIB_GETC@
+GNULIB_GETCHAR = @GNULIB_GETCHAR@
+GNULIB_GETCWD = @GNULIB_GETCWD@
+GNULIB_GETDELIM = @GNULIB_GETDELIM@
+GNULIB_GETDOMAINNAME = @GNULIB_GETDOMAINNAME@
+GNULIB_GETDTABLESIZE = @GNULIB_GETDTABLESIZE@
+GNULIB_GETGROUPS = @GNULIB_GETGROUPS@
+GNULIB_GETHOSTNAME = @GNULIB_GETHOSTNAME@
+GNULIB_GETLINE = @GNULIB_GETLINE@
+GNULIB_GETLOADAVG = @GNULIB_GETLOADAVG@
+GNULIB_GETLOGIN = @GNULIB_GETLOGIN@
+GNULIB_GETLOGIN_R = @GNULIB_GETLOGIN_R@
+GNULIB_GETPAGESIZE = @GNULIB_GETPAGESIZE@
+GNULIB_GETSUBOPT = @GNULIB_GETSUBOPT@
+GNULIB_GETTIMEOFDAY = @GNULIB_GETTIMEOFDAY@
+GNULIB_GETUSERSHELL = @GNULIB_GETUSERSHELL@
+GNULIB_GL_UNISTD_H_GETOPT = @GNULIB_GL_UNISTD_H_GETOPT@
+GNULIB_GRANTPT = @GNULIB_GRANTPT@
+GNULIB_GROUP_MEMBER = @GNULIB_GROUP_MEMBER@
+GNULIB_ICONV = @GNULIB_ICONV@
+GNULIB_IMAXABS = @GNULIB_IMAXABS@
+GNULIB_IMAXDIV = @GNULIB_IMAXDIV@
+GNULIB_ISATTY = @GNULIB_ISATTY@
+GNULIB_ISBLANK = @GNULIB_ISBLANK@
+GNULIB_ISWBLANK = @GNULIB_ISWBLANK@
+GNULIB_ISWCTYPE = @GNULIB_ISWCTYPE@
+GNULIB_LCHMOD = @GNULIB_LCHMOD@
+GNULIB_LCHOWN = @GNULIB_LCHOWN@
+GNULIB_LINK = @GNULIB_LINK@
+GNULIB_LINKAT = @GNULIB_LINKAT@
+GNULIB_LOCALECONV = @GNULIB_LOCALECONV@
+GNULIB_LSEEK = @GNULIB_LSEEK@
+GNULIB_LSTAT = @GNULIB_LSTAT@
+GNULIB_MALLOC_POSIX = @GNULIB_MALLOC_POSIX@
+GNULIB_MBRLEN = @GNULIB_MBRLEN@
+GNULIB_MBRTOWC = @GNULIB_MBRTOWC@
+GNULIB_MBSCASECMP = @GNULIB_MBSCASECMP@
+GNULIB_MBSCASESTR = @GNULIB_MBSCASESTR@
+GNULIB_MBSCHR = @GNULIB_MBSCHR@
+GNULIB_MBSCSPN = @GNULIB_MBSCSPN@
+GNULIB_MBSINIT = @GNULIB_MBSINIT@
+GNULIB_MBSLEN = @GNULIB_MBSLEN@
+GNULIB_MBSNCASECMP = @GNULIB_MBSNCASECMP@
+GNULIB_MBSNLEN = @GNULIB_MBSNLEN@
+GNULIB_MBSNRTOWCS = @GNULIB_MBSNRTOWCS@
+GNULIB_MBSPBRK = @GNULIB_MBSPBRK@
+GNULIB_MBSPCASECMP = @GNULIB_MBSPCASECMP@
+GNULIB_MBSRCHR = @GNULIB_MBSRCHR@
+GNULIB_MBSRTOWCS = @GNULIB_MBSRTOWCS@
+GNULIB_MBSSEP = @GNULIB_MBSSEP@
+GNULIB_MBSSPN = @GNULIB_MBSSPN@
+GNULIB_MBSSTR = @GNULIB_MBSSTR@
+GNULIB_MBSTOK_R = @GNULIB_MBSTOK_R@
+GNULIB_MBTOWC = @GNULIB_MBTOWC@
+GNULIB_MEMCHR = @GNULIB_MEMCHR@
+GNULIB_MEMMEM = @GNULIB_MEMMEM@
+GNULIB_MEMPCPY = @GNULIB_MEMPCPY@
+GNULIB_MEMRCHR = @GNULIB_MEMRCHR@
+GNULIB_MKDIRAT = @GNULIB_MKDIRAT@
+GNULIB_MKDTEMP = @GNULIB_MKDTEMP@
+GNULIB_MKFIFO = @GNULIB_MKFIFO@
+GNULIB_MKFIFOAT = @GNULIB_MKFIFOAT@
+GNULIB_MKNOD = @GNULIB_MKNOD@
+GNULIB_MKNODAT = @GNULIB_MKNODAT@
+GNULIB_MKOSTEMP = @GNULIB_MKOSTEMP@
+GNULIB_MKOSTEMPS = @GNULIB_MKOSTEMPS@
+GNULIB_MKSTEMP = @GNULIB_MKSTEMP@
+GNULIB_MKSTEMPS = @GNULIB_MKSTEMPS@
+GNULIB_MKTIME = @GNULIB_MKTIME@
+GNULIB_NANOSLEEP = @GNULIB_NANOSLEEP@
+GNULIB_NL_LANGINFO = @GNULIB_NL_LANGINFO@
+GNULIB_NONBLOCKING = @GNULIB_NONBLOCKING@
+GNULIB_OBSTACK_PRINTF = @GNULIB_OBSTACK_PRINTF@
+GNULIB_OBSTACK_PRINTF_POSIX = @GNULIB_OBSTACK_PRINTF_POSIX@
+GNULIB_OPEN = @GNULIB_OPEN@
+GNULIB_OPENAT = @GNULIB_OPENAT@
+GNULIB_OPENDIR = @GNULIB_OPENDIR@
+GNULIB_PCLOSE = @GNULIB_PCLOSE@
+GNULIB_PERROR = @GNULIB_PERROR@
+GNULIB_PIPE = @GNULIB_PIPE@
+GNULIB_PIPE2 = @GNULIB_PIPE2@
+GNULIB_POPEN = @GNULIB_POPEN@
+GNULIB_POSIX_OPENPT = @GNULIB_POSIX_OPENPT@
+GNULIB_PREAD = @GNULIB_PREAD@
+GNULIB_PRINTF = @GNULIB_PRINTF@
+GNULIB_PRINTF_POSIX = @GNULIB_PRINTF_POSIX@
+GNULIB_PTSNAME = @GNULIB_PTSNAME@
+GNULIB_PTSNAME_R = @GNULIB_PTSNAME_R@
+GNULIB_PUTC = @GNULIB_PUTC@
+GNULIB_PUTCHAR = @GNULIB_PUTCHAR@
+GNULIB_PUTENV = @GNULIB_PUTENV@
+GNULIB_PUTS = @GNULIB_PUTS@
+GNULIB_PWRITE = @GNULIB_PWRITE@
+GNULIB_QSORT_R = @GNULIB_QSORT_R@
+GNULIB_RANDOM = @GNULIB_RANDOM@
+GNULIB_RANDOM_R = @GNULIB_RANDOM_R@
+GNULIB_RAWMEMCHR = @GNULIB_RAWMEMCHR@
+GNULIB_READ = @GNULIB_READ@
+GNULIB_READDIR = @GNULIB_READDIR@
+GNULIB_READLINK = @GNULIB_READLINK@
+GNULIB_READLINKAT = @GNULIB_READLINKAT@
+GNULIB_REALLOC_POSIX = @GNULIB_REALLOC_POSIX@
+GNULIB_REALPATH = @GNULIB_REALPATH@
+GNULIB_REMOVE = @GNULIB_REMOVE@
+GNULIB_RENAME = @GNULIB_RENAME@
+GNULIB_RENAMEAT = @GNULIB_RENAMEAT@
+GNULIB_REWINDDIR = @GNULIB_REWINDDIR@
+GNULIB_RMDIR = @GNULIB_RMDIR@
+GNULIB_RPMATCH = @GNULIB_RPMATCH@
+GNULIB_SCANDIR = @GNULIB_SCANDIR@
+GNULIB_SCANF = @GNULIB_SCANF@
+GNULIB_SECURE_GETENV = @GNULIB_SECURE_GETENV@
+GNULIB_SETENV = @GNULIB_SETENV@
+GNULIB_SETHOSTNAME = @GNULIB_SETHOSTNAME@
+GNULIB_SETLOCALE = @GNULIB_SETLOCALE@
+GNULIB_SLEEP = @GNULIB_SLEEP@
+GNULIB_SNPRINTF = @GNULIB_SNPRINTF@
+GNULIB_SPRINTF_POSIX = @GNULIB_SPRINTF_POSIX@
+GNULIB_STAT = @GNULIB_STAT@
+GNULIB_STDIO_H_NONBLOCKING = @GNULIB_STDIO_H_NONBLOCKING@
+GNULIB_STDIO_H_SIGPIPE = @GNULIB_STDIO_H_SIGPIPE@
+GNULIB_STPCPY = @GNULIB_STPCPY@
+GNULIB_STPNCPY = @GNULIB_STPNCPY@
+GNULIB_STRCASESTR = @GNULIB_STRCASESTR@
+GNULIB_STRCHRNUL = @GNULIB_STRCHRNUL@
+GNULIB_STRDUP = @GNULIB_STRDUP@
+GNULIB_STRERROR = @GNULIB_STRERROR@
+GNULIB_STRERROR_R = @GNULIB_STRERROR_R@
+GNULIB_STRNCAT = @GNULIB_STRNCAT@
+GNULIB_STRNDUP = @GNULIB_STRNDUP@
+GNULIB_STRNLEN = @GNULIB_STRNLEN@
+GNULIB_STRPBRK = @GNULIB_STRPBRK@
+GNULIB_STRPTIME = @GNULIB_STRPTIME@
+GNULIB_STRSEP = @GNULIB_STRSEP@
+GNULIB_STRSIGNAL = @GNULIB_STRSIGNAL@
+GNULIB_STRSTR = @GNULIB_STRSTR@
+GNULIB_STRTOD = @GNULIB_STRTOD@
+GNULIB_STRTOIMAX = @GNULIB_STRTOIMAX@
+GNULIB_STRTOK_R = @GNULIB_STRTOK_R@
+GNULIB_STRTOLL = @GNULIB_STRTOLL@
+GNULIB_STRTOULL = @GNULIB_STRTOULL@
+GNULIB_STRTOUMAX = @GNULIB_STRTOUMAX@
+GNULIB_STRVERSCMP = @GNULIB_STRVERSCMP@
+GNULIB_SYMLINK = @GNULIB_SYMLINK@
+GNULIB_SYMLINKAT = @GNULIB_SYMLINKAT@
+GNULIB_SYSTEM_POSIX = @GNULIB_SYSTEM_POSIX@
+GNULIB_TIMEGM = @GNULIB_TIMEGM@
+GNULIB_TIME_R = @GNULIB_TIME_R@
+GNULIB_TIME_RZ = @GNULIB_TIME_RZ@
+GNULIB_TMPFILE = @GNULIB_TMPFILE@
+GNULIB_TOWCTRANS = @GNULIB_TOWCTRANS@
+GNULIB_TTYNAME_R = @GNULIB_TTYNAME_R@
+GNULIB_UNISTD_H_NONBLOCKING = @GNULIB_UNISTD_H_NONBLOCKING@
+GNULIB_UNISTD_H_SIGPIPE = @GNULIB_UNISTD_H_SIGPIPE@
+GNULIB_UNLINK = @GNULIB_UNLINK@
+GNULIB_UNLINKAT = @GNULIB_UNLINKAT@
+GNULIB_UNLOCKPT = @GNULIB_UNLOCKPT@
+GNULIB_UNSETENV = @GNULIB_UNSETENV@
+GNULIB_USLEEP = @GNULIB_USLEEP@
+GNULIB_UTIMENSAT = @GNULIB_UTIMENSAT@
+GNULIB_VASPRINTF = @GNULIB_VASPRINTF@
+GNULIB_VDPRINTF = @GNULIB_VDPRINTF@
+GNULIB_VFPRINTF = @GNULIB_VFPRINTF@
+GNULIB_VFPRINTF_POSIX = @GNULIB_VFPRINTF_POSIX@
+GNULIB_VFSCANF = @GNULIB_VFSCANF@
+GNULIB_VPRINTF = @GNULIB_VPRINTF@
+GNULIB_VPRINTF_POSIX = @GNULIB_VPRINTF_POSIX@
+GNULIB_VSCANF = @GNULIB_VSCANF@
+GNULIB_VSNPRINTF = @GNULIB_VSNPRINTF@
+GNULIB_VSPRINTF_POSIX = @GNULIB_VSPRINTF_POSIX@
+GNULIB_WARN_CFLAGS = @GNULIB_WARN_CFLAGS@
+GNULIB_WCPCPY = @GNULIB_WCPCPY@
+GNULIB_WCPNCPY = @GNULIB_WCPNCPY@
+GNULIB_WCRTOMB = @GNULIB_WCRTOMB@
+GNULIB_WCSCASECMP = @GNULIB_WCSCASECMP@
+GNULIB_WCSCAT = @GNULIB_WCSCAT@
+GNULIB_WCSCHR = @GNULIB_WCSCHR@
+GNULIB_WCSCMP = @GNULIB_WCSCMP@
+GNULIB_WCSCOLL = @GNULIB_WCSCOLL@
+GNULIB_WCSCPY = @GNULIB_WCSCPY@
+GNULIB_WCSCSPN = @GNULIB_WCSCSPN@
+GNULIB_WCSDUP = @GNULIB_WCSDUP@
+GNULIB_WCSLEN = @GNULIB_WCSLEN@
+GNULIB_WCSNCASECMP = @GNULIB_WCSNCASECMP@
+GNULIB_WCSNCAT = @GNULIB_WCSNCAT@
+GNULIB_WCSNCMP = @GNULIB_WCSNCMP@
+GNULIB_WCSNCPY = @GNULIB_WCSNCPY@
+GNULIB_WCSNLEN = @GNULIB_WCSNLEN@
+GNULIB_WCSNRTOMBS = @GNULIB_WCSNRTOMBS@
+GNULIB_WCSPBRK = @GNULIB_WCSPBRK@
+GNULIB_WCSRCHR = @GNULIB_WCSRCHR@
+GNULIB_WCSRTOMBS = @GNULIB_WCSRTOMBS@
+GNULIB_WCSSPN = @GNULIB_WCSSPN@
+GNULIB_WCSSTR = @GNULIB_WCSSTR@
+GNULIB_WCSTOK = @GNULIB_WCSTOK@
+GNULIB_WCSWIDTH = @GNULIB_WCSWIDTH@
+GNULIB_WCSXFRM = @GNULIB_WCSXFRM@
+GNULIB_WCTOB = @GNULIB_WCTOB@
+GNULIB_WCTOMB = @GNULIB_WCTOMB@
+GNULIB_WCTRANS = @GNULIB_WCTRANS@
+GNULIB_WCTYPE = @GNULIB_WCTYPE@
+GNULIB_WCWIDTH = @GNULIB_WCWIDTH@
+GNULIB_WMEMCHR = @GNULIB_WMEMCHR@
+GNULIB_WMEMCMP = @GNULIB_WMEMCMP@
+GNULIB_WMEMCPY = @GNULIB_WMEMCPY@
+GNULIB_WMEMMOVE = @GNULIB_WMEMMOVE@
+GNULIB_WMEMSET = @GNULIB_WMEMSET@
+GNULIB_WRITE = @GNULIB_WRITE@
+GNULIB__EXIT = @GNULIB__EXIT@
+GREP = @GREP@
+HAVE_ALPHASORT = @HAVE_ALPHASORT@
+HAVE_ATOLL = @HAVE_ATOLL@
+HAVE_BTOWC = @HAVE_BTOWC@
+HAVE_CANONICALIZE_FILE_NAME = @HAVE_CANONICALIZE_FILE_NAME@
+HAVE_CHOWN = @HAVE_CHOWN@
+HAVE_CLOSEDIR = @HAVE_CLOSEDIR@
+HAVE_DECL_DIRFD = @HAVE_DECL_DIRFD@
+HAVE_DECL_ENVIRON = @HAVE_DECL_ENVIRON@
+HAVE_DECL_FCHDIR = @HAVE_DECL_FCHDIR@
+HAVE_DECL_FDATASYNC = @HAVE_DECL_FDATASYNC@
+HAVE_DECL_FDOPENDIR = @HAVE_DECL_FDOPENDIR@
+HAVE_DECL_FPURGE = @HAVE_DECL_FPURGE@
+HAVE_DECL_FSEEKO = @HAVE_DECL_FSEEKO@
+HAVE_DECL_FTELLO = @HAVE_DECL_FTELLO@
+HAVE_DECL_GETDELIM = @HAVE_DECL_GETDELIM@
+HAVE_DECL_GETDOMAINNAME = @HAVE_DECL_GETDOMAINNAME@
+HAVE_DECL_GETLINE = @HAVE_DECL_GETLINE@
+HAVE_DECL_GETLOADAVG = @HAVE_DECL_GETLOADAVG@
+HAVE_DECL_GETLOGIN_R = @HAVE_DECL_GETLOGIN_R@
+HAVE_DECL_GETPAGESIZE = @HAVE_DECL_GETPAGESIZE@
+HAVE_DECL_GETUSERSHELL = @HAVE_DECL_GETUSERSHELL@
+HAVE_DECL_IMAXABS = @HAVE_DECL_IMAXABS@
+HAVE_DECL_IMAXDIV = @HAVE_DECL_IMAXDIV@
+HAVE_DECL_LOCALTIME_R = @HAVE_DECL_LOCALTIME_R@
+HAVE_DECL_MEMMEM = @HAVE_DECL_MEMMEM@
+HAVE_DECL_MEMRCHR = @HAVE_DECL_MEMRCHR@
+HAVE_DECL_OBSTACK_PRINTF = @HAVE_DECL_OBSTACK_PRINTF@
+HAVE_DECL_SETENV = @HAVE_DECL_SETENV@
+HAVE_DECL_SETHOSTNAME = @HAVE_DECL_SETHOSTNAME@
+HAVE_DECL_SNPRINTF = @HAVE_DECL_SNPRINTF@
+HAVE_DECL_STRDUP = @HAVE_DECL_STRDUP@
+HAVE_DECL_STRERROR_R = @HAVE_DECL_STRERROR_R@
+HAVE_DECL_STRNDUP = @HAVE_DECL_STRNDUP@
+HAVE_DECL_STRNLEN = @HAVE_DECL_STRNLEN@
+HAVE_DECL_STRSIGNAL = @HAVE_DECL_STRSIGNAL@
+HAVE_DECL_STRTOIMAX = @HAVE_DECL_STRTOIMAX@
+HAVE_DECL_STRTOK_R = @HAVE_DECL_STRTOK_R@
+HAVE_DECL_STRTOUMAX = @HAVE_DECL_STRTOUMAX@
+HAVE_DECL_TTYNAME_R = @HAVE_DECL_TTYNAME_R@
+HAVE_DECL_UNSETENV = @HAVE_DECL_UNSETENV@
+HAVE_DECL_VSNPRINTF = @HAVE_DECL_VSNPRINTF@
+HAVE_DECL_WCTOB = @HAVE_DECL_WCTOB@
+HAVE_DECL_WCWIDTH = @HAVE_DECL_WCWIDTH@
+HAVE_DIRENT_H = @HAVE_DIRENT_H@
+HAVE_DPRINTF = @HAVE_DPRINTF@
+HAVE_DUP2 = @HAVE_DUP2@
+HAVE_DUP3 = @HAVE_DUP3@
+HAVE_DUPLOCALE = @HAVE_DUPLOCALE@
+HAVE_EUIDACCESS = @HAVE_EUIDACCESS@
+HAVE_FACCESSAT = @HAVE_FACCESSAT@
+HAVE_FCHDIR = @HAVE_FCHDIR@
+HAVE_FCHMODAT = @HAVE_FCHMODAT@
+HAVE_FCHOWNAT = @HAVE_FCHOWNAT@
+HAVE_FCNTL = @HAVE_FCNTL@
+HAVE_FDATASYNC = @HAVE_FDATASYNC@
+HAVE_FDOPENDIR = @HAVE_FDOPENDIR@
+HAVE_FEATURES_H = @HAVE_FEATURES_H@
+HAVE_FFSL = @HAVE_FFSL@
+HAVE_FFSLL = @HAVE_FFSLL@
+HAVE_FSEEKO = @HAVE_FSEEKO@
+HAVE_FSTATAT = @HAVE_FSTATAT@
+HAVE_FSYNC = @HAVE_FSYNC@
+HAVE_FTELLO = @HAVE_FTELLO@
+HAVE_FTRUNCATE = @HAVE_FTRUNCATE@
+HAVE_FUTIMENS = @HAVE_FUTIMENS@
+HAVE_GETDTABLESIZE = @HAVE_GETDTABLESIZE@
+HAVE_GETGROUPS = @HAVE_GETGROUPS@
+HAVE_GETHOSTNAME = @HAVE_GETHOSTNAME@
+HAVE_GETLOGIN = @HAVE_GETLOGIN@
+HAVE_GETOPT_H = @HAVE_GETOPT_H@
+HAVE_GETPAGESIZE = @HAVE_GETPAGESIZE@
+HAVE_GETSUBOPT = @HAVE_GETSUBOPT@
+HAVE_GETTIMEOFDAY = @HAVE_GETTIMEOFDAY@
+HAVE_GRANTPT = @HAVE_GRANTPT@
+HAVE_GROUP_MEMBER = @HAVE_GROUP_MEMBER@
+HAVE_INTTYPES_H = @HAVE_INTTYPES_H@
+HAVE_ISBLANK = @HAVE_ISBLANK@
+HAVE_ISWBLANK = @HAVE_ISWBLANK@
+HAVE_ISWCNTRL = @HAVE_ISWCNTRL@
+HAVE_LANGINFO_CODESET = @HAVE_LANGINFO_CODESET@
+HAVE_LANGINFO_ERA = @HAVE_LANGINFO_ERA@
+HAVE_LANGINFO_H = @HAVE_LANGINFO_H@
+HAVE_LANGINFO_T_FMT_AMPM = @HAVE_LANGINFO_T_FMT_AMPM@
+HAVE_LANGINFO_YESEXPR = @HAVE_LANGINFO_YESEXPR@
+HAVE_LCHMOD = @HAVE_LCHMOD@
+HAVE_LCHOWN = @HAVE_LCHOWN@
+HAVE_LINK = @HAVE_LINK@
+HAVE_LINKAT = @HAVE_LINKAT@
+HAVE_LONG_LONG_INT = @HAVE_LONG_LONG_INT@
+HAVE_LSTAT = @HAVE_LSTAT@
+HAVE_MAX_ALIGN_T = @HAVE_MAX_ALIGN_T@
+HAVE_MBRLEN = @HAVE_MBRLEN@
+HAVE_MBRTOWC = @HAVE_MBRTOWC@
+HAVE_MBSINIT = @HAVE_MBSINIT@
+HAVE_MBSLEN = @HAVE_MBSLEN@
+HAVE_MBSNRTOWCS = @HAVE_MBSNRTOWCS@
+HAVE_MBSRTOWCS = @HAVE_MBSRTOWCS@
+HAVE_MEMCHR = @HAVE_MEMCHR@
+HAVE_MEMPCPY = @HAVE_MEMPCPY@
+HAVE_MKDIRAT = @HAVE_MKDIRAT@
+HAVE_MKDTEMP = @HAVE_MKDTEMP@
+HAVE_MKFIFO = @HAVE_MKFIFO@
+HAVE_MKFIFOAT = @HAVE_MKFIFOAT@
+HAVE_MKNOD = @HAVE_MKNOD@
+HAVE_MKNODAT = @HAVE_MKNODAT@
+HAVE_MKOSTEMP = @HAVE_MKOSTEMP@
+HAVE_MKOSTEMPS = @HAVE_MKOSTEMPS@
+HAVE_MKSTEMP = @HAVE_MKSTEMP@
+HAVE_MKSTEMPS = @HAVE_MKSTEMPS@
+HAVE_MSVC_INVALID_PARAMETER_HANDLER = @HAVE_MSVC_INVALID_PARAMETER_HANDLER@
+HAVE_NANOSLEEP = @HAVE_NANOSLEEP@
+HAVE_NL_LANGINFO = @HAVE_NL_LANGINFO@
+HAVE_OPENAT = @HAVE_OPENAT@
+HAVE_OPENDIR = @HAVE_OPENDIR@
+HAVE_OS_H = @HAVE_OS_H@
+HAVE_PCLOSE = @HAVE_PCLOSE@
+HAVE_PIPE = @HAVE_PIPE@
+HAVE_PIPE2 = @HAVE_PIPE2@
+HAVE_POPEN = @HAVE_POPEN@
+HAVE_POSIX_OPENPT = @HAVE_POSIX_OPENPT@
+HAVE_PREAD = @HAVE_PREAD@
+HAVE_PTSNAME = @HAVE_PTSNAME@
+HAVE_PTSNAME_R = @HAVE_PTSNAME_R@
+HAVE_PWRITE = @HAVE_PWRITE@
+HAVE_RANDOM = @HAVE_RANDOM@
+HAVE_RANDOM_H = @HAVE_RANDOM_H@
+HAVE_RANDOM_R = @HAVE_RANDOM_R@
+HAVE_RAWMEMCHR = @HAVE_RAWMEMCHR@
+HAVE_READDIR = @HAVE_READDIR@
+HAVE_READLINK = @HAVE_READLINK@
+HAVE_READLINKAT = @HAVE_READLINKAT@
+HAVE_REALPATH = @HAVE_REALPATH@
+HAVE_RENAMEAT = @HAVE_RENAMEAT@
+HAVE_REWINDDIR = @HAVE_REWINDDIR@
+HAVE_RPMATCH = @HAVE_RPMATCH@
+HAVE_SCANDIR = @HAVE_SCANDIR@
+HAVE_SECURE_GETENV = @HAVE_SECURE_GETENV@
+HAVE_SETENV = @HAVE_SETENV@
+HAVE_SETHOSTNAME = @HAVE_SETHOSTNAME@
+HAVE_SIGNED_SIG_ATOMIC_T = @HAVE_SIGNED_SIG_ATOMIC_T@
+HAVE_SIGNED_WCHAR_T = @HAVE_SIGNED_WCHAR_T@
+HAVE_SIGNED_WINT_T = @HAVE_SIGNED_WINT_T@
+HAVE_SLEEP = @HAVE_SLEEP@
+HAVE_STDINT_H = @HAVE_STDINT_H@
+HAVE_STPCPY = @HAVE_STPCPY@
+HAVE_STPNCPY = @HAVE_STPNCPY@
+HAVE_STRCASESTR = @HAVE_STRCASESTR@
+HAVE_STRCHRNUL = @HAVE_STRCHRNUL@
+HAVE_STRPBRK = @HAVE_STRPBRK@
+HAVE_STRPTIME = @HAVE_STRPTIME@
+HAVE_STRSEP = @HAVE_STRSEP@
+HAVE_STRTOD = @HAVE_STRTOD@
+HAVE_STRTOLL = @HAVE_STRTOLL@
+HAVE_STRTOULL = @HAVE_STRTOULL@
+HAVE_STRUCT_RANDOM_DATA = @HAVE_STRUCT_RANDOM_DATA@
+HAVE_STRUCT_TIMEVAL = @HAVE_STRUCT_TIMEVAL@
+HAVE_STRVERSCMP = @HAVE_STRVERSCMP@
+HAVE_SYMLINK = @HAVE_SYMLINK@
+HAVE_SYMLINKAT = @HAVE_SYMLINKAT@
+HAVE_SYS_BITYPES_H = @HAVE_SYS_BITYPES_H@
+HAVE_SYS_INTTYPES_H = @HAVE_SYS_INTTYPES_H@
+HAVE_SYS_LOADAVG_H = @HAVE_SYS_LOADAVG_H@
+HAVE_SYS_PARAM_H = @HAVE_SYS_PARAM_H@
+HAVE_SYS_TIME_H = @HAVE_SYS_TIME_H@
+HAVE_SYS_TYPES_H = @HAVE_SYS_TYPES_H@
+HAVE_TIMEGM = @HAVE_TIMEGM@
+HAVE_TIMEZONE_T = @HAVE_TIMEZONE_T@
+HAVE_UNISTD_H = @HAVE_UNISTD_H@
+HAVE_UNLINKAT = @HAVE_UNLINKAT@
+HAVE_UNLOCKPT = @HAVE_UNLOCKPT@
+HAVE_UNSIGNED_LONG_LONG_INT = @HAVE_UNSIGNED_LONG_LONG_INT@
+HAVE_USLEEP = @HAVE_USLEEP@
+HAVE_UTIMENSAT = @HAVE_UTIMENSAT@
+HAVE_VASPRINTF = @HAVE_VASPRINTF@
+HAVE_VDPRINTF = @HAVE_VDPRINTF@
+HAVE_WCHAR_H = @HAVE_WCHAR_H@
+HAVE_WCHAR_T = @HAVE_WCHAR_T@
+HAVE_WCPCPY = @HAVE_WCPCPY@
+HAVE_WCPNCPY = @HAVE_WCPNCPY@
+HAVE_WCRTOMB = @HAVE_WCRTOMB@
+HAVE_WCSCASECMP = @HAVE_WCSCASECMP@
+HAVE_WCSCAT = @HAVE_WCSCAT@
+HAVE_WCSCHR = @HAVE_WCSCHR@
+HAVE_WCSCMP = @HAVE_WCSCMP@
+HAVE_WCSCOLL = @HAVE_WCSCOLL@
+HAVE_WCSCPY = @HAVE_WCSCPY@
+HAVE_WCSCSPN = @HAVE_WCSCSPN@
+HAVE_WCSDUP = @HAVE_WCSDUP@
+HAVE_WCSLEN = @HAVE_WCSLEN@
+HAVE_WCSNCASECMP = @HAVE_WCSNCASECMP@
+HAVE_WCSNCAT = @HAVE_WCSNCAT@
+HAVE_WCSNCMP = @HAVE_WCSNCMP@
+HAVE_WCSNCPY = @HAVE_WCSNCPY@
+HAVE_WCSNLEN = @HAVE_WCSNLEN@
+HAVE_WCSNRTOMBS = @HAVE_WCSNRTOMBS@
+HAVE_WCSPBRK = @HAVE_WCSPBRK@
+HAVE_WCSRCHR = @HAVE_WCSRCHR@
+HAVE_WCSRTOMBS = @HAVE_WCSRTOMBS@
+HAVE_WCSSPN = @HAVE_WCSSPN@
+HAVE_WCSSTR = @HAVE_WCSSTR@
+HAVE_WCSTOK = @HAVE_WCSTOK@
+HAVE_WCSWIDTH = @HAVE_WCSWIDTH@
+HAVE_WCSXFRM = @HAVE_WCSXFRM@
+HAVE_WCTRANS_T = @HAVE_WCTRANS_T@
+HAVE_WCTYPE_H = @HAVE_WCTYPE_H@
+HAVE_WCTYPE_T = @HAVE_WCTYPE_T@
+HAVE_WINSOCK2_H = @HAVE_WINSOCK2_H@
+HAVE_WINT_T = @HAVE_WINT_T@
+HAVE_WMEMCHR = @HAVE_WMEMCHR@
+HAVE_WMEMCMP = @HAVE_WMEMCMP@
+HAVE_WMEMCPY = @HAVE_WMEMCPY@
+HAVE_WMEMMOVE = @HAVE_WMEMMOVE@
+HAVE_WMEMSET = @HAVE_WMEMSET@
+HAVE_XLOCALE_H = @HAVE_XLOCALE_H@
+HAVE__BOOL = @HAVE__BOOL@
+HAVE__EXIT = @HAVE__EXIT@
+ICONV_CONST = @ICONV_CONST@
+ICONV_H = @ICONV_H@
+INCLUDE_NEXT = @INCLUDE_NEXT@
+INCLUDE_NEXT_AS_FIRST_DIRECTIVE = @INCLUDE_NEXT_AS_FIRST_DIRECTIVE@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INT32_MAX_LT_INTMAX_MAX = @INT32_MAX_LT_INTMAX_MAX@
+INT64_MAX_EQ_LONG_MAX = @INT64_MAX_EQ_LONG_MAX@
+INTLLIBS = @INTLLIBS@
+INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@
+LDFLAGS = @LDFLAGS@
+LIBGREPUTILS_LIBDEPS = @LIBGREPUTILS_LIBDEPS@
+LIBGREPUTILS_LTLIBDEPS = @LIBGREPUTILS_LTLIBDEPS@
+LIBICONV = @LIBICONV@
+LIBINTL = @LIBINTL@
+LIBMULTITHREAD = @LIBMULTITHREAD@
+LIBOBJS = @LIBOBJS@
+LIBPTH = @LIBPTH@
+LIBPTH_PREFIX = @LIBPTH_PREFIX@
+LIBS = @LIBS@
+LIBTESTS_LIBDEPS = @LIBTESTS_LIBDEPS@
+LIBTHREAD = @LIBTHREAD@
+LIBUNISTRING_UNISTR_H = @LIBUNISTRING_UNISTR_H@
+LIBUNISTRING_UNITYPES_H = @LIBUNISTRING_UNITYPES_H@
+LIBUNISTRING_UNIWIDTH_H = @LIBUNISTRING_UNIWIDTH_H@
+LOCALCHARSET_TESTS_ENVIRONMENT = @LOCALCHARSET_TESTS_ENVIRONMENT@
+LOCALE_FR = @LOCALE_FR@
+LOCALE_FR_UTF8 = @LOCALE_FR_UTF8@
+LOCALE_JA = @LOCALE_JA@
+LOCALE_TR_UTF8 = @LOCALE_TR_UTF8@
+LOCALE_ZH_CN = @LOCALE_ZH_CN@
+LTLIBICONV = @LTLIBICONV@
+LTLIBINTL = @LTLIBINTL@
+LTLIBMULTITHREAD = @LTLIBMULTITHREAD@
+LTLIBOBJS = @LTLIBOBJS@
+LTLIBPTH = @LTLIBPTH@
+LTLIBTHREAD = @LTLIBTHREAD@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MSGFMT = @MSGFMT@
+MSGFMT_015 = @MSGFMT_015@
+MSGMERGE = @MSGMERGE@
+NEXT_AS_FIRST_DIRECTIVE_CTYPE_H = @NEXT_AS_FIRST_DIRECTIVE_CTYPE_H@
+NEXT_AS_FIRST_DIRECTIVE_DIRENT_H = @NEXT_AS_FIRST_DIRECTIVE_DIRENT_H@
+NEXT_AS_FIRST_DIRECTIVE_ERRNO_H = @NEXT_AS_FIRST_DIRECTIVE_ERRNO_H@
+NEXT_AS_FIRST_DIRECTIVE_FCNTL_H = @NEXT_AS_FIRST_DIRECTIVE_FCNTL_H@
+NEXT_AS_FIRST_DIRECTIVE_FLOAT_H = @NEXT_AS_FIRST_DIRECTIVE_FLOAT_H@
+NEXT_AS_FIRST_DIRECTIVE_GETOPT_H = @NEXT_AS_FIRST_DIRECTIVE_GETOPT_H@
+NEXT_AS_FIRST_DIRECTIVE_ICONV_H = @NEXT_AS_FIRST_DIRECTIVE_ICONV_H@
+NEXT_AS_FIRST_DIRECTIVE_INTTYPES_H = @NEXT_AS_FIRST_DIRECTIVE_INTTYPES_H@
+NEXT_AS_FIRST_DIRECTIVE_LANGINFO_H = @NEXT_AS_FIRST_DIRECTIVE_LANGINFO_H@
+NEXT_AS_FIRST_DIRECTIVE_LOCALE_H = @NEXT_AS_FIRST_DIRECTIVE_LOCALE_H@
+NEXT_AS_FIRST_DIRECTIVE_STDARG_H = @NEXT_AS_FIRST_DIRECTIVE_STDARG_H@
+NEXT_AS_FIRST_DIRECTIVE_STDDEF_H = @NEXT_AS_FIRST_DIRECTIVE_STDDEF_H@
+NEXT_AS_FIRST_DIRECTIVE_STDINT_H = @NEXT_AS_FIRST_DIRECTIVE_STDINT_H@
+NEXT_AS_FIRST_DIRECTIVE_STDIO_H = @NEXT_AS_FIRST_DIRECTIVE_STDIO_H@
+NEXT_AS_FIRST_DIRECTIVE_STDLIB_H = @NEXT_AS_FIRST_DIRECTIVE_STDLIB_H@
+NEXT_AS_FIRST_DIRECTIVE_STRING_H = @NEXT_AS_FIRST_DIRECTIVE_STRING_H@
+NEXT_AS_FIRST_DIRECTIVE_SYS_STAT_H = @NEXT_AS_FIRST_DIRECTIVE_SYS_STAT_H@
+NEXT_AS_FIRST_DIRECTIVE_SYS_TIME_H = @NEXT_AS_FIRST_DIRECTIVE_SYS_TIME_H@
+NEXT_AS_FIRST_DIRECTIVE_SYS_TYPES_H = @NEXT_AS_FIRST_DIRECTIVE_SYS_TYPES_H@
+NEXT_AS_FIRST_DIRECTIVE_TIME_H = @NEXT_AS_FIRST_DIRECTIVE_TIME_H@
+NEXT_AS_FIRST_DIRECTIVE_UNISTD_H = @NEXT_AS_FIRST_DIRECTIVE_UNISTD_H@
+NEXT_AS_FIRST_DIRECTIVE_WCHAR_H = @NEXT_AS_FIRST_DIRECTIVE_WCHAR_H@
+NEXT_AS_FIRST_DIRECTIVE_WCTYPE_H = @NEXT_AS_FIRST_DIRECTIVE_WCTYPE_H@
+NEXT_CTYPE_H = @NEXT_CTYPE_H@
+NEXT_DIRENT_H = @NEXT_DIRENT_H@
+NEXT_ERRNO_H = @NEXT_ERRNO_H@
+NEXT_FCNTL_H = @NEXT_FCNTL_H@
+NEXT_FLOAT_H = @NEXT_FLOAT_H@
+NEXT_GETOPT_H = @NEXT_GETOPT_H@
+NEXT_ICONV_H = @NEXT_ICONV_H@
+NEXT_INTTYPES_H = @NEXT_INTTYPES_H@
+NEXT_LANGINFO_H = @NEXT_LANGINFO_H@
+NEXT_LOCALE_H = @NEXT_LOCALE_H@
+NEXT_STDARG_H = @NEXT_STDARG_H@
+NEXT_STDDEF_H = @NEXT_STDDEF_H@
+NEXT_STDINT_H = @NEXT_STDINT_H@
+NEXT_STDIO_H = @NEXT_STDIO_H@
+NEXT_STDLIB_H = @NEXT_STDLIB_H@
+NEXT_STRING_H = @NEXT_STRING_H@
+NEXT_SYS_STAT_H = @NEXT_SYS_STAT_H@
+NEXT_SYS_TIME_H = @NEXT_SYS_TIME_H@
+NEXT_SYS_TYPES_H = @NEXT_SYS_TYPES_H@
+NEXT_TIME_H = @NEXT_TIME_H@
+NEXT_UNISTD_H = @NEXT_UNISTD_H@
+NEXT_WCHAR_H = @NEXT_WCHAR_H@
+NEXT_WCTYPE_H = @NEXT_WCTYPE_H@
+OBJEXT = @OBJEXT@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PCRE_CFLAGS = @PCRE_CFLAGS@
+PCRE_LIBS = @PCRE_LIBS@
+PERL = @PERL@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POSUB = @POSUB@
+PRAGMA_COLUMNS = @PRAGMA_COLUMNS@
+PRAGMA_SYSTEM_HEADER = @PRAGMA_SYSTEM_HEADER@
+PRIPTR_PREFIX = @PRIPTR_PREFIX@
+PRI_MACROS_BROKEN = @PRI_MACROS_BROKEN@
+PTHREAD_H_DEFINES_STRUCT_TIMESPEC = @PTHREAD_H_DEFINES_STRUCT_TIMESPEC@
+PTRDIFF_T_SUFFIX = @PTRDIFF_T_SUFFIX@
+RANLIB = @RANLIB@
+REPLACE_BTOWC = @REPLACE_BTOWC@
+REPLACE_CALLOC = @REPLACE_CALLOC@
+REPLACE_CANONICALIZE_FILE_NAME = @REPLACE_CANONICALIZE_FILE_NAME@
+REPLACE_CHOWN = @REPLACE_CHOWN@
+REPLACE_CLOSE = @REPLACE_CLOSE@
+REPLACE_CLOSEDIR = @REPLACE_CLOSEDIR@
+REPLACE_DIRFD = @REPLACE_DIRFD@
+REPLACE_DPRINTF = @REPLACE_DPRINTF@
+REPLACE_DUP = @REPLACE_DUP@
+REPLACE_DUP2 = @REPLACE_DUP2@
+REPLACE_DUPLOCALE = @REPLACE_DUPLOCALE@
+REPLACE_FCHOWNAT = @REPLACE_FCHOWNAT@
+REPLACE_FCLOSE = @REPLACE_FCLOSE@
+REPLACE_FCNTL = @REPLACE_FCNTL@
+REPLACE_FDOPEN = @REPLACE_FDOPEN@
+REPLACE_FDOPENDIR = @REPLACE_FDOPENDIR@
+REPLACE_FFLUSH = @REPLACE_FFLUSH@
+REPLACE_FOPEN = @REPLACE_FOPEN@
+REPLACE_FPRINTF = @REPLACE_FPRINTF@
+REPLACE_FPURGE = @REPLACE_FPURGE@
+REPLACE_FREOPEN = @REPLACE_FREOPEN@
+REPLACE_FSEEK = @REPLACE_FSEEK@
+REPLACE_FSEEKO = @REPLACE_FSEEKO@
+REPLACE_FSTAT = @REPLACE_FSTAT@
+REPLACE_FSTATAT = @REPLACE_FSTATAT@
+REPLACE_FTELL = @REPLACE_FTELL@
+REPLACE_FTELLO = @REPLACE_FTELLO@
+REPLACE_FTRUNCATE = @REPLACE_FTRUNCATE@
+REPLACE_FUTIMENS = @REPLACE_FUTIMENS@
+REPLACE_GETCWD = @REPLACE_GETCWD@
+REPLACE_GETDELIM = @REPLACE_GETDELIM@
+REPLACE_GETDOMAINNAME = @REPLACE_GETDOMAINNAME@
+REPLACE_GETDTABLESIZE = @REPLACE_GETDTABLESIZE@
+REPLACE_GETGROUPS = @REPLACE_GETGROUPS@
+REPLACE_GETLINE = @REPLACE_GETLINE@
+REPLACE_GETLOGIN_R = @REPLACE_GETLOGIN_R@
+REPLACE_GETPAGESIZE = @REPLACE_GETPAGESIZE@
+REPLACE_GETTIMEOFDAY = @REPLACE_GETTIMEOFDAY@
+REPLACE_GMTIME = @REPLACE_GMTIME@
+REPLACE_ICONV = @REPLACE_ICONV@
+REPLACE_ICONV_OPEN = @REPLACE_ICONV_OPEN@
+REPLACE_ICONV_UTF = @REPLACE_ICONV_UTF@
+REPLACE_ISATTY = @REPLACE_ISATTY@
+REPLACE_ISWBLANK = @REPLACE_ISWBLANK@
+REPLACE_ISWCNTRL = @REPLACE_ISWCNTRL@
+REPLACE_ITOLD = @REPLACE_ITOLD@
+REPLACE_LCHOWN = @REPLACE_LCHOWN@
+REPLACE_LINK = @REPLACE_LINK@
+REPLACE_LINKAT = @REPLACE_LINKAT@
+REPLACE_LOCALECONV = @REPLACE_LOCALECONV@
+REPLACE_LOCALTIME = @REPLACE_LOCALTIME@
+REPLACE_LOCALTIME_R = @REPLACE_LOCALTIME_R@
+REPLACE_LSEEK = @REPLACE_LSEEK@
+REPLACE_LSTAT = @REPLACE_LSTAT@
+REPLACE_MALLOC = @REPLACE_MALLOC@
+REPLACE_MBRLEN = @REPLACE_MBRLEN@
+REPLACE_MBRTOWC = @REPLACE_MBRTOWC@
+REPLACE_MBSINIT = @REPLACE_MBSINIT@
+REPLACE_MBSNRTOWCS = @REPLACE_MBSNRTOWCS@
+REPLACE_MBSRTOWCS = @REPLACE_MBSRTOWCS@
+REPLACE_MBSTATE_T = @REPLACE_MBSTATE_T@
+REPLACE_MBTOWC = @REPLACE_MBTOWC@
+REPLACE_MEMCHR = @REPLACE_MEMCHR@
+REPLACE_MEMMEM = @REPLACE_MEMMEM@
+REPLACE_MKDIR = @REPLACE_MKDIR@
+REPLACE_MKFIFO = @REPLACE_MKFIFO@
+REPLACE_MKNOD = @REPLACE_MKNOD@
+REPLACE_MKSTEMP = @REPLACE_MKSTEMP@
+REPLACE_MKTIME = @REPLACE_MKTIME@
+REPLACE_NANOSLEEP = @REPLACE_NANOSLEEP@
+REPLACE_NL_LANGINFO = @REPLACE_NL_LANGINFO@
+REPLACE_NULL = @REPLACE_NULL@
+REPLACE_OBSTACK_PRINTF = @REPLACE_OBSTACK_PRINTF@
+REPLACE_OPEN = @REPLACE_OPEN@
+REPLACE_OPENAT = @REPLACE_OPENAT@
+REPLACE_OPENDIR = @REPLACE_OPENDIR@
+REPLACE_PERROR = @REPLACE_PERROR@
+REPLACE_POPEN = @REPLACE_POPEN@
+REPLACE_PREAD = @REPLACE_PREAD@
+REPLACE_PRINTF = @REPLACE_PRINTF@
+REPLACE_PTSNAME = @REPLACE_PTSNAME@
+REPLACE_PTSNAME_R = @REPLACE_PTSNAME_R@
+REPLACE_PUTENV = @REPLACE_PUTENV@
+REPLACE_PWRITE = @REPLACE_PWRITE@
+REPLACE_QSORT_R = @REPLACE_QSORT_R@
+REPLACE_RANDOM_R = @REPLACE_RANDOM_R@
+REPLACE_READ = @REPLACE_READ@
+REPLACE_READLINK = @REPLACE_READLINK@
+REPLACE_READLINKAT = @REPLACE_READLINKAT@
+REPLACE_REALLOC = @REPLACE_REALLOC@
+REPLACE_REALPATH = @REPLACE_REALPATH@
+REPLACE_REMOVE = @REPLACE_REMOVE@
+REPLACE_RENAME = @REPLACE_RENAME@
+REPLACE_RENAMEAT = @REPLACE_RENAMEAT@
+REPLACE_RMDIR = @REPLACE_RMDIR@
+REPLACE_SETENV = @REPLACE_SETENV@
+REPLACE_SETLOCALE = @REPLACE_SETLOCALE@
+REPLACE_SLEEP = @REPLACE_SLEEP@
+REPLACE_SNPRINTF = @REPLACE_SNPRINTF@
+REPLACE_SPRINTF = @REPLACE_SPRINTF@
+REPLACE_STAT = @REPLACE_STAT@
+REPLACE_STDIO_READ_FUNCS = @REPLACE_STDIO_READ_FUNCS@
+REPLACE_STDIO_WRITE_FUNCS = @REPLACE_STDIO_WRITE_FUNCS@
+REPLACE_STPNCPY = @REPLACE_STPNCPY@
+REPLACE_STRCASESTR = @REPLACE_STRCASESTR@
+REPLACE_STRCHRNUL = @REPLACE_STRCHRNUL@
+REPLACE_STRDUP = @REPLACE_STRDUP@
+REPLACE_STRERROR = @REPLACE_STRERROR@
+REPLACE_STRERROR_R = @REPLACE_STRERROR_R@
+REPLACE_STRNCAT = @REPLACE_STRNCAT@
+REPLACE_STRNDUP = @REPLACE_STRNDUP@
+REPLACE_STRNLEN = @REPLACE_STRNLEN@
+REPLACE_STRSIGNAL = @REPLACE_STRSIGNAL@
+REPLACE_STRSTR = @REPLACE_STRSTR@
+REPLACE_STRTOD = @REPLACE_STRTOD@
+REPLACE_STRTOIMAX = @REPLACE_STRTOIMAX@
+REPLACE_STRTOK_R = @REPLACE_STRTOK_R@
+REPLACE_STRTOUMAX = @REPLACE_STRTOUMAX@
+REPLACE_STRUCT_LCONV = @REPLACE_STRUCT_LCONV@
+REPLACE_STRUCT_TIMEVAL = @REPLACE_STRUCT_TIMEVAL@
+REPLACE_SYMLINK = @REPLACE_SYMLINK@
+REPLACE_SYMLINKAT = @REPLACE_SYMLINKAT@
+REPLACE_TIMEGM = @REPLACE_TIMEGM@
+REPLACE_TMPFILE = @REPLACE_TMPFILE@
+REPLACE_TOWLOWER = @REPLACE_TOWLOWER@
+REPLACE_TTYNAME_R = @REPLACE_TTYNAME_R@
+REPLACE_UNLINK = @REPLACE_UNLINK@
+REPLACE_UNLINKAT = @REPLACE_UNLINKAT@
+REPLACE_UNSETENV = @REPLACE_UNSETENV@
+REPLACE_USLEEP = @REPLACE_USLEEP@
+REPLACE_UTIMENSAT = @REPLACE_UTIMENSAT@
+REPLACE_VASPRINTF = @REPLACE_VASPRINTF@
+REPLACE_VDPRINTF = @REPLACE_VDPRINTF@
+REPLACE_VFPRINTF = @REPLACE_VFPRINTF@
+REPLACE_VPRINTF = @REPLACE_VPRINTF@
+REPLACE_VSNPRINTF = @REPLACE_VSNPRINTF@
+REPLACE_VSPRINTF = @REPLACE_VSPRINTF@
+REPLACE_WCRTOMB = @REPLACE_WCRTOMB@
+REPLACE_WCSNRTOMBS = @REPLACE_WCSNRTOMBS@
+REPLACE_WCSRTOMBS = @REPLACE_WCSRTOMBS@
+REPLACE_WCSWIDTH = @REPLACE_WCSWIDTH@
+REPLACE_WCTOB = @REPLACE_WCTOB@
+REPLACE_WCTOMB = @REPLACE_WCTOMB@
+REPLACE_WCWIDTH = @REPLACE_WCWIDTH@
+REPLACE_WRITE = @REPLACE_WRITE@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SIG_ATOMIC_T_SUFFIX = @SIG_ATOMIC_T_SUFFIX@
+SIZE_T_SUFFIX = @SIZE_T_SUFFIX@
+STDALIGN_H = @STDALIGN_H@
+STDARG_H = @STDARG_H@
+STDBOOL_H = @STDBOOL_H@
+STDDEF_H = @STDDEF_H@
+STDINT_H = @STDINT_H@
+STRIP = @STRIP@
+SYS_TIME_H_DEFINES_STRUCT_TIMESPEC = @SYS_TIME_H_DEFINES_STRUCT_TIMESPEC@
+TIME_H_DEFINES_STRUCT_TIMESPEC = @TIME_H_DEFINES_STRUCT_TIMESPEC@
+UINT32_MAX_LT_UINTMAX_MAX = @UINT32_MAX_LT_UINTMAX_MAX@
+UINT64_MAX_EQ_ULONG_MAX = @UINT64_MAX_EQ_ULONG_MAX@
+UNDEFINE_STRTOK_R = @UNDEFINE_STRTOK_R@
+UNISTD_H_DEFINES_STRUCT_TIMESPEC = @UNISTD_H_DEFINES_STRUCT_TIMESPEC@
+UNISTD_H_HAVE_WINSOCK2_H = @UNISTD_H_HAVE_WINSOCK2_H@
+UNISTD_H_HAVE_WINSOCK2_H_AND_USE_SOCKETS = @UNISTD_H_HAVE_WINSOCK2_H_AND_USE_SOCKETS@
+USE_NLS = @USE_NLS@
+VERSION = @VERSION@
+WARN_CFLAGS = @WARN_CFLAGS@
+WCHAR_T_SUFFIX = @WCHAR_T_SUFFIX@
+WERROR_CFLAGS = @WERROR_CFLAGS@
+WINDOWS_64_BIT_OFF_T = @WINDOWS_64_BIT_OFF_T@
+WINDOWS_64_BIT_ST_SIZE = @WINDOWS_64_BIT_ST_SIZE@
+WINT_T_SUFFIX = @WINT_T_SUFFIX@
+XGETTEXT = @XGETTEXT@
+XGETTEXT_015 = @XGETTEXT_015@
+XGETTEXT_EXTRA_OPTIONS = @XGETTEXT_EXTRA_OPTIONS@
+abs_aux_dir = @abs_aux_dir@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+am__include = @am__include@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gl_LIBOBJS = @gl_LIBOBJS@
+gl_LTLIBOBJS = @gl_LTLIBOBJS@
+gltests_LIBOBJS = @gltests_LIBOBJS@
+gltests_LTLIBOBJS = @gltests_LTLIBOBJS@
+gltests_WITNESS = @gltests_WITNESS@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+lispdir = @lispdir@
+localedir = $(datadir)/locale
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+LN = ln
+AM_CFLAGS = $(WARN_CFLAGS) $(WERROR_CFLAGS) $(PCRE_CFLAGS)
+
+# Tell the linker to omit references to unused shared libraries.
+AM_LDFLAGS = $(IGNORE_UNUSED_LIBRARIES_CFLAGS)
+bin_SCRIPTS = egrep fgrep
+grep_SOURCES = grep.c searchutils.c \
+ dfa.c dfasearch.c \
+ kwset.c kwsearch.c \
+ pcresearch.c
+
+noinst_HEADERS = grep.h dfa.h kwset.h search.h system.h
+
+# Sometimes, the expansion of $(LIBINTL) includes -lc which may
+# include modules defining variables like 'optind', so libgreputils.a
+# must precede $(LIBINTL) in order to ensure we use GNU getopt.
+# But libgreputils.a must also follow $(LIBINTL), since libintl uses
+# replacement functions defined in libgreputils.a.
+LDADD = \
+ ../lib/libgreputils.a $(LIBINTL) ../lib/libgreputils.a $(LIBICONV) \
+ $(LIBTHREAD)
+
+grep_LDADD = $(LDADD) $(PCRE_LIBS)
+AM_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib
+EXTRA_DIST = dosbuf.c egrep.sh
+CLEANFILES = egrep fgrep *-t
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --gnu src/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binPROGRAMS: $(bin_PROGRAMS)
+ @$(NORMAL_INSTALL)
+ @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+ fi; \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed 's/$(EXEEXT)$$//' | \
+ while read p p1; do if test -f $$p \
+ ; then echo "$$p"; echo "$$p"; else :; fi; \
+ done | \
+ sed -e 'p;s,.*/,,;n;h' \
+ -e 's|.*|.|' \
+ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+ sed 'N;N;N;s,\n, ,g' | \
+ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+ if ($$2 == $$4) files[d] = files[d] " " $$1; \
+ else { print "f", $$3 "/" $$4, $$1; } } \
+ END { for (d in files) print "f", d, files[d] }' | \
+ while read type dir files; do \
+ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+ test -z "$$files" || { \
+ echo " $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+ $(INSTALL_PROGRAM_ENV) $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+ } \
+ ; done
+
+uninstall-binPROGRAMS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+ files=`for p in $$list; do echo "$$p"; done | \
+ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+ -e 's/$$/$(EXEEXT)/' \
+ `; \
+ test -n "$$list" || exit 0; \
+ echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+ cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+ -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS)
+
+grep$(EXEEXT): $(grep_OBJECTS) $(grep_DEPENDENCIES) $(EXTRA_grep_DEPENDENCIES)
+ @rm -f grep$(EXEEXT)
+ $(AM_V_CCLD)$(LINK) $(grep_OBJECTS) $(grep_LDADD) $(LIBS)
+install-binSCRIPTS: $(bin_SCRIPTS)
+ @$(NORMAL_INSTALL)
+ @list='$(bin_SCRIPTS)'; test -n "$(bindir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+ fi; \
+ for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \
+ done | \
+ sed -e 'p;s,.*/,,;n' \
+ -e 'h;s|.*|.|' \
+ -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \
+ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \
+ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+ if ($$2 == $$4) { files[d] = files[d] " " $$1; \
+ if (++n[d] == $(am__install_max)) { \
+ print "f", d, files[d]; n[d] = 0; files[d] = "" } } \
+ else { print "f", d "/" $$4, $$1 } } \
+ END { for (d in files) print "f", d, files[d] }' | \
+ while read type dir files; do \
+ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+ test -z "$$files" || { \
+ echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+ $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+ } \
+ ; done
+
+uninstall-binSCRIPTS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(bin_SCRIPTS)'; test -n "$(bindir)" || exit 0; \
+ files=`for p in $$list; do echo "$$p"; done | \
+ sed -e 's,.*/,,;$(transform)'`; \
+ dir='$(DESTDIR)$(bindir)'; $(am__uninstall_files_from_dir)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dfa.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dfasearch.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/grep.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/kwsearch.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/kwset.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pcresearch.Po@am__quote@ # am--include-marker
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/searchutils.Po@am__quote@ # am--include-marker
+
+$(am__depfiles_remade):
+ @$(MKDIR_P) $(@D)
+ @echo '# dummy' >$@-t && $(am__mv) $@-t $@
+
+am--depfiles: $(am__depfiles_remade)
+
+.c.o:
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(am__set_depbase) && \
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< && \
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(am__set_depbase) && \
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $$($(CYGPATH_W) $<) && \
+@am__fastdepCC_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $$($(CYGPATH_W) $<)
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS) $(SCRIPTS) $(HEADERS)
+installdirs:
+ for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+ -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic mostlyclean-am
+
+distclean: distclean-am
+ -rm -f ./$(DEPDIR)/dfa.Po
+ -rm -f ./$(DEPDIR)/dfasearch.Po
+ -rm -f ./$(DEPDIR)/grep.Po
+ -rm -f ./$(DEPDIR)/kwsearch.Po
+ -rm -f ./$(DEPDIR)/kwset.Po
+ -rm -f ./$(DEPDIR)/pcresearch.Po
+ -rm -f ./$(DEPDIR)/searchutils.Po
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS install-binSCRIPTS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f ./$(DEPDIR)/dfa.Po
+ -rm -f ./$(DEPDIR)/dfasearch.Po
+ -rm -f ./$(DEPDIR)/grep.Po
+ -rm -f ./$(DEPDIR)/kwsearch.Po
+ -rm -f ./$(DEPDIR)/kwset.Po
+ -rm -f ./$(DEPDIR)/pcresearch.Po
+ -rm -f ./$(DEPDIR)/searchutils.Po
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS uninstall-binSCRIPTS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \
+ clean-binPROGRAMS clean-generic cscopelist-am ctags ctags-am \
+ distclean distclean-compile distclean-generic distclean-tags \
+ distdir dvi dvi-am html html-am info info-am install \
+ install-am install-binPROGRAMS install-binSCRIPTS install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic pdf pdf-am ps ps-am tags tags-am uninstall \
+ uninstall-am uninstall-binPROGRAMS uninstall-binSCRIPTS
+
+.PRECIOUS: Makefile
+
+
+egrep fgrep: egrep.sh Makefile
+ $(AM_V_GEN)grep=`echo grep | sed -e '$(transform)'` && \
+ case $@ in egrep) option=-E;; fgrep) option=-F;; esac && \
+ shell_does_substrings='set x/y && d=$${1%/*} && test "$$d" = x' && \
+ if $(SHELL) -c "$$shell_does_substrings" 2>/dev/null; then \
+ edit_substring='s,X,X,'; \
+ else \
+ edit_substring='s,\$${0%/\*},`expr "X$$0" : '\''X\\(.*\\)/'\''`,g'; \
+ fi && \
+ sed -e 's|[@]SHELL@|$(SHELL)|g' \
+ -e "$$edit_substring" \
+ -e "s|[@]grep@|$$grep|g" \
+ -e "s|[@]option@|$$option|g" <$(srcdir)/egrep.sh >$@-t
+ $(AM_V_at)chmod +x $@-t
+ $(AM_V_at)mv $@-t $@
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/dfa.c b/src/dfa.c
new file mode 100644
index 0000000..98ee4ac
--- /dev/null
+++ b/src/dfa.c
@@ -0,0 +1,4168 @@
+/* dfa.c - deterministic extended regexp routines for GNU
+ Copyright (C) 1988, 1998, 2000, 2002, 2004-2005, 2007-2016 Free Software
+ Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc.,
+ 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA */
+
+/* Written June, 1988 by Mike Haertel
+ Modified July, 1988 by Arthur David Olson to assist BMG speedups */
+
+#include <config.h>
+
+#include "dfa.h"
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include <locale.h>
+
+#define STREQ(a, b) (strcmp (a, b) == 0)
+
+/* ISASCIIDIGIT differs from isdigit, as follows:
+ - Its arg may be any int or unsigned int; it need not be an unsigned char.
+ - It's guaranteed to evaluate its argument exactly once.
+ - It's typically faster.
+ Posix 1003.2-1992 section 2.5.2.1 page 50 lines 1556-1558 says that
+ only '0' through '9' are digits. Prefer ISASCIIDIGIT to isdigit unless
+ it's important to use the locale's definition of "digit" even when the
+ host does not conform to Posix. */
+#define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9)
+
+#include "gettext.h"
+#define _(str) gettext (str)
+
+#include <wchar.h>
+#include <wctype.h>
+
+/* HPUX defines these as macros in sys/param.h. */
+#ifdef setbit
+# undef setbit
+#endif
+#ifdef clrbit
+# undef clrbit
+#endif
+
+/* First integer value that is greater than any character code. */
+enum { NOTCHAR = 1 << CHAR_BIT };
+
+/* This represents part of a character class. It must be unsigned and
+ at least CHARCLASS_WORD_BITS wide. Any excess bits are zero. */
+typedef unsigned int charclass_word;
+
+/* The number of bits used in a charclass word. utf8_classes assumes
+ this is exactly 32. */
+enum { CHARCLASS_WORD_BITS = 32 };
+
+/* The maximum useful value of a charclass_word; all used bits are 1. */
+#define CHARCLASS_WORD_MASK \
+ (((charclass_word) 1 << (CHARCLASS_WORD_BITS - 1) << 1) - 1)
+
+/* Number of words required to hold a bit for every character. */
+enum
+{
+ CHARCLASS_WORDS = (NOTCHAR + CHARCLASS_WORD_BITS - 1) / CHARCLASS_WORD_BITS
+};
+
+/* Sets of unsigned characters are stored as bit vectors in arrays of ints. */
+typedef charclass_word charclass[CHARCLASS_WORDS];
+
+/* Convert a possibly-signed character to an unsigned character. This is
+ a bit safer than casting to unsigned char, since it catches some type
+ errors that the cast doesn't. */
+static unsigned char
+to_uchar (char ch)
+{
+ return ch;
+}
+
+/* Contexts tell us whether a character is a newline or a word constituent.
+ Word-constituent characters are those that satisfy iswalnum, plus '_'.
+ Each character has a single CTX_* value; bitmasks of CTX_* values denote
+ a particular character class.
+
+ A state also stores a context value, which is a bitmask of CTX_* values.
+ A state's context represents a set of characters that the state's
+ predecessors must match. For example, a state whose context does not
+ include CTX_LETTER will never have transitions where the previous
+ character is a word constituent. A state whose context is CTX_ANY
+ might have transitions from any character. */
+
+#define CTX_NONE 1
+#define CTX_LETTER 2
+#define CTX_NEWLINE 4
+#define CTX_ANY 7
+
+/* Sometimes characters can only be matched depending on the surrounding
+ context. Such context decisions depend on what the previous character
+ was, and the value of the current (lookahead) character. Context
+ dependent constraints are encoded as 8 bit integers. Each bit that
+ is set indicates that the constraint succeeds in the corresponding
+ context.
+
+ bit 8-11 - valid contexts when next character is CTX_NEWLINE
+ bit 4-7 - valid contexts when next character is CTX_LETTER
+ bit 0-3 - valid contexts when next character is CTX_NONE
+
+ The macro SUCCEEDS_IN_CONTEXT determines whether a given constraint
+ succeeds in a particular context. Prev is a bitmask of possible
+ context values for the previous character, curr is the (single-bit)
+ context value for the lookahead character. */
+#define NEWLINE_CONSTRAINT(constraint) (((constraint) >> 8) & 0xf)
+#define LETTER_CONSTRAINT(constraint) (((constraint) >> 4) & 0xf)
+#define OTHER_CONSTRAINT(constraint) ((constraint) & 0xf)
+
+#define SUCCEEDS_IN_CONTEXT(constraint, prev, curr) \
+ ((((curr) & CTX_NONE ? OTHER_CONSTRAINT (constraint) : 0) \
+ | ((curr) & CTX_LETTER ? LETTER_CONSTRAINT (constraint) : 0) \
+ | ((curr) & CTX_NEWLINE ? NEWLINE_CONSTRAINT (constraint) : 0)) & (prev))
+
+/* The following macros describe what a constraint depends on. */
+#define PREV_NEWLINE_CONSTRAINT(constraint) (((constraint) >> 2) & 0x111)
+#define PREV_LETTER_CONSTRAINT(constraint) (((constraint) >> 1) & 0x111)
+#define PREV_OTHER_CONSTRAINT(constraint) ((constraint) & 0x111)
+
+#define PREV_NEWLINE_DEPENDENT(constraint) \
+ (PREV_NEWLINE_CONSTRAINT (constraint) != PREV_OTHER_CONSTRAINT (constraint))
+#define PREV_LETTER_DEPENDENT(constraint) \
+ (PREV_LETTER_CONSTRAINT (constraint) != PREV_OTHER_CONSTRAINT (constraint))
+
+/* Tokens that match the empty string subject to some constraint actually
+ work by applying that constraint to determine what may follow them,
+ taking into account what has gone before. The following values are
+ the constraints corresponding to the special tokens previously defined. */
+#define NO_CONSTRAINT 0x777
+#define BEGLINE_CONSTRAINT 0x444
+#define ENDLINE_CONSTRAINT 0x700
+#define BEGWORD_CONSTRAINT 0x050
+#define ENDWORD_CONSTRAINT 0x202
+#define LIMWORD_CONSTRAINT 0x252
+#define NOTLIMWORD_CONSTRAINT 0x525
+
+/* The regexp is parsed into an array of tokens in postfix form. Some tokens
+ are operators and others are terminal symbols. Most (but not all) of these
+ codes are returned by the lexical analyzer. */
+
+typedef ptrdiff_t token;
+
+/* Predefined token values. */
+enum
+{
+ END = -1, /* END is a terminal symbol that matches the
+ end of input; any value of END or less in
+ the parse tree is such a symbol. Accepting
+ states of the DFA are those that would have
+ a transition on END. */
+
+ /* Ordinary character values are terminal symbols that match themselves. */
+
+ EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches
+ the empty string. */
+
+ BACKREF, /* BACKREF is generated by \<digit>
+ or by any other construct that
+ is not completely handled. If the scanner
+ detects a transition on backref, it returns
+ a kind of "semi-success" indicating that
+ the match will have to be verified with
+ a backtracking matcher. */
+
+ BEGLINE, /* BEGLINE is a terminal symbol that matches
+ the empty string at the beginning of a
+ line. */
+
+ ENDLINE, /* ENDLINE is a terminal symbol that matches
+ the empty string at the end of a line. */
+
+ BEGWORD, /* BEGWORD is a terminal symbol that matches
+ the empty string at the beginning of a
+ word. */
+
+ ENDWORD, /* ENDWORD is a terminal symbol that matches
+ the empty string at the end of a word. */
+
+ LIMWORD, /* LIMWORD is a terminal symbol that matches
+ the empty string at the beginning or the
+ end of a word. */
+
+ NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that
+ matches the empty string not at
+ the beginning or end of a word. */
+
+ QMARK, /* QMARK is an operator of one argument that
+ matches zero or one occurrences of its
+ argument. */
+
+ STAR, /* STAR is an operator of one argument that
+ matches the Kleene closure (zero or more
+ occurrences) of its argument. */
+
+ PLUS, /* PLUS is an operator of one argument that
+ matches the positive closure (one or more
+ occurrences) of its argument. */
+
+ REPMN, /* REPMN is a lexical token corresponding
+ to the {m,n} construct. REPMN never
+ appears in the compiled token vector. */
+
+ CAT, /* CAT is an operator of two arguments that
+ matches the concatenation of its
+ arguments. CAT is never returned by the
+ lexical analyzer. */
+
+ OR, /* OR is an operator of two arguments that
+ matches either of its arguments. */
+
+ LPAREN, /* LPAREN never appears in the parse tree,
+ it is only a lexeme. */
+
+ RPAREN, /* RPAREN never appears in the parse tree. */
+
+ ANYCHAR, /* ANYCHAR is a terminal symbol that matches
+ a valid multibyte (or single byte) character.
+ It is used only if MB_CUR_MAX > 1. */
+
+ MBCSET, /* MBCSET is similar to CSET, but for
+ multibyte characters. */
+
+ WCHAR, /* Only returned by lex. wctok contains
+ the wide character representation. */
+
+ CSET /* CSET and (and any value greater) is a
+ terminal symbol that matches any of a
+ class of characters. */
+};
+
+
+/* States of the recognizer correspond to sets of positions in the parse
+ tree, together with the constraints under which they may be matched.
+ So a position is encoded as an index into the parse tree together with
+ a constraint. */
+typedef struct
+{
+ size_t index; /* Index into the parse array. */
+ unsigned int constraint; /* Constraint for matching this position. */
+} position;
+
+/* Sets of positions are stored as arrays. */
+typedef struct
+{
+ position *elems; /* Elements of this position set. */
+ size_t nelem; /* Number of elements in this set. */
+ size_t alloc; /* Number of elements allocated in ELEMS. */
+} position_set;
+
+/* Sets of leaves are also stored as arrays. */
+typedef struct
+{
+ size_t *elems; /* Elements of this position set. */
+ size_t nelem; /* Number of elements in this set. */
+} leaf_set;
+
+/* A state of the dfa consists of a set of positions, some flags,
+ and the token value of the lowest-numbered position of the state that
+ contains an END token. */
+typedef struct
+{
+ size_t hash; /* Hash of the positions of this state. */
+ position_set elems; /* Positions this state could match. */
+ unsigned char context; /* Context from previous state. */
+ unsigned short constraint; /* Constraint for this state to accept. */
+ token first_end; /* Token value of the first END in elems. */
+ position_set mbps; /* Positions which can match multibyte
+ characters, e.g., period.
+ Used only if MB_CUR_MAX > 1. */
+} dfa_state;
+
+/* States are indexed by state_num values. These are normally
+ nonnegative but -1 is used as a special value. */
+typedef ptrdiff_t state_num;
+
+/* A bracket operator.
+ e.g., [a-c], [[:alpha:]], etc. */
+struct mb_char_classes
+{
+ ptrdiff_t cset;
+ bool invert;
+ wchar_t *chars; /* Normal characters. */
+ size_t nchars;
+};
+
+/* A compiled regular expression. */
+struct dfa
+{
+ /* Fields filled by the scanner. */
+ charclass *charclasses; /* Array of character sets for CSET tokens. */
+ size_t cindex; /* Index for adding new charclasses. */
+ size_t calloc; /* Number of charclasses allocated. */
+
+ /* Fields filled by the parser. */
+ token *tokens; /* Postfix parse array. */
+ size_t tindex; /* Index for adding new tokens. */
+ size_t talloc; /* Number of tokens currently allocated. */
+ size_t depth; /* Depth required of an evaluation stack
+ used for depth-first traversal of the
+ parse tree. */
+ size_t nleaves; /* Number of leaves on the parse tree. */
+ size_t nregexps; /* Count of parallel regexps being built
+ with dfaparse. */
+ bool fast; /* The DFA is fast. */
+ bool multibyte; /* MB_CUR_MAX > 1. */
+ token utf8_anychar_classes[5]; /* To lower ANYCHAR in UTF-8 locales. */
+ mbstate_t mbs; /* Multibyte conversion state. */
+
+ /* dfaexec implementation. */
+ char *(*dfaexec) (struct dfa *, char const *, char *, int, size_t *, int *);
+
+ /* The following are valid only if MB_CUR_MAX > 1. */
+
+ /* The value of multibyte_prop[i] is defined by following rule.
+ if tokens[i] < NOTCHAR
+ bit 0 : tokens[i] is the first byte of a character, including
+ single-byte characters.
+ bit 1 : tokens[i] is the last byte of a character, including
+ single-byte characters.
+
+ if tokens[i] = MBCSET
+ ("the index of mbcsets corresponding to this operator" << 2) + 3
+
+ e.g.
+ tokens
+ = 'single_byte_a', 'multi_byte_A', single_byte_b'
+ = 'sb_a', 'mb_A(1st byte)', 'mb_A(2nd byte)', 'mb_A(3rd byte)', 'sb_b'
+ multibyte_prop
+ = 3 , 1 , 0 , 2 , 3
+ */
+ int *multibyte_prop;
+
+ /* Array of the bracket expression in the DFA. */
+ struct mb_char_classes *mbcsets;
+ size_t nmbcsets;
+ size_t mbcsets_alloc;
+
+ /* Fields filled by the superset. */
+ struct dfa *superset; /* Hint of the dfa. */
+
+ /* Fields filled by the state builder. */
+ dfa_state *states; /* States of the dfa. */
+ state_num sindex; /* Index for adding new states. */
+ size_t salloc; /* Number of states currently allocated. */
+
+ /* Fields filled by the parse tree->NFA conversion. */
+ position_set *follows; /* Array of follow sets, indexed by position
+ index. The follow of a position is the set
+ of positions containing characters that
+ could conceivably follow a character
+ matching the given position in a string
+ matching the regexp. Allocated to the
+ maximum possible position index. */
+ bool searchflag; /* We are supposed to build a searching
+ as opposed to an exact matcher. A searching
+ matcher finds the first and shortest string
+ matching a regexp anywhere in the buffer,
+ whereas an exact matcher finds the longest
+ string matching, but anchored to the
+ beginning of the buffer. */
+
+ /* Fields filled by dfaexec. */
+ state_num tralloc; /* Number of transition tables that have
+ slots so far, not counting trans[-1]. */
+ int trcount; /* Number of transition tables that have
+ actually been built. */
+ int min_trcount; /* Minimum of number of transition tables.
+ Always keep the number, even after freeing
+ the transition tables. It is also the
+ number of initial states. */
+ state_num **trans; /* Transition tables for states that can
+ never accept. If the transitions for a
+ state have not yet been computed, or the
+ state could possibly accept, its entry in
+ this table is NULL. This points to one
+ past the start of the allocated array,
+ and trans[-1] is always NULL. */
+ state_num **fails; /* Transition tables after failing to accept
+ on a state that potentially could do so. */
+ int *success; /* Table of acceptance conditions used in
+ dfaexec and computed in build_state. */
+ state_num *newlines; /* Transitions on newlines. The entry for a
+ newline in any transition table is always
+ -1 so we can count lines without wasting
+ too many cycles. The transition for a
+ newline is stored separately and handled
+ as a special case. Newline is also used
+ as a sentinel at the end of the buffer. */
+ state_num initstate_letter; /* Initial state for letter context. */
+ state_num initstate_others; /* Initial state for other contexts. */
+ position_set mb_follows; /* Follow set added by ANYCHAR and/or MBCSET
+ on demand. */
+ int *mb_match_lens; /* Array of length reduced by ANYCHAR and/or
+ MBCSET. Null if mb_follows.elems has not
+ been allocated. */
+};
+
+/* Some macros for user access to dfa internals. */
+
+/* S could possibly be an accepting state of R. */
+#define ACCEPTING(s, r) ((r).states[s].constraint)
+
+/* STATE accepts in the specified context. */
+#define ACCEPTS_IN_CONTEXT(prev, curr, state, dfa) \
+ SUCCEEDS_IN_CONTEXT ((dfa).states[state].constraint, prev, curr)
+
+static void regexp (void);
+
+/* A table indexed by byte values that contains the corresponding wide
+ character (if any) for that byte. WEOF means the byte is not a
+ valid single-byte character. */
+static wint_t mbrtowc_cache[NOTCHAR];
+
+/* Store into *PWC the result of converting the leading bytes of the
+ multibyte buffer S of length N bytes, using the mbrtowc_cache in *D
+ and updating the conversion state in *D. On conversion error,
+ convert just a single byte, to WEOF. Return the number of bytes
+ converted.
+
+ This differs from mbrtowc (PWC, S, N, &D->mbs) as follows:
+
+ * PWC points to wint_t, not to wchar_t.
+ * The last arg is a dfa *D instead of merely a multibyte conversion
+ state D->mbs. D also contains an mbrtowc_cache for speed.
+ * N must be at least 1.
+ * S[N - 1] must be a sentinel byte.
+ * Shift encodings are not supported.
+ * The return value is always in the range 1..N.
+ * D->mbs is always valid afterwards.
+ * *PWC is always set to something. */
+static size_t
+mbs_to_wchar (wint_t *pwc, char const *s, size_t n, struct dfa *d)
+{
+ unsigned char uc = s[0];
+ wint_t wc = mbrtowc_cache[uc];
+
+ if (wc == WEOF)
+ {
+ wchar_t wch;
+ size_t nbytes = mbrtowc (&wch, s, n, &d->mbs);
+ if (0 < nbytes && nbytes < (size_t) -2)
+ {
+ *pwc = wch;
+ return nbytes;
+ }
+ memset (&d->mbs, 0, sizeof d->mbs);
+ }
+
+ *pwc = wc;
+ return 1;
+}
+
+#ifdef DEBUG
+
+static void
+prtok (token t)
+{
+ char const *s;
+
+ if (t < 0)
+ fprintf (stderr, "END");
+ else if (t < NOTCHAR)
+ {
+ unsigned int ch = t;
+ fprintf (stderr, "0x%02x", ch);
+ }
+ else
+ {
+ switch (t)
+ {
+ case EMPTY:
+ s = "EMPTY";
+ break;
+ case BACKREF:
+ s = "BACKREF";
+ break;
+ case BEGLINE:
+ s = "BEGLINE";
+ break;
+ case ENDLINE:
+ s = "ENDLINE";
+ break;
+ case BEGWORD:
+ s = "BEGWORD";
+ break;
+ case ENDWORD:
+ s = "ENDWORD";
+ break;
+ case LIMWORD:
+ s = "LIMWORD";
+ break;
+ case NOTLIMWORD:
+ s = "NOTLIMWORD";
+ break;
+ case QMARK:
+ s = "QMARK";
+ break;
+ case STAR:
+ s = "STAR";
+ break;
+ case PLUS:
+ s = "PLUS";
+ break;
+ case CAT:
+ s = "CAT";
+ break;
+ case OR:
+ s = "OR";
+ break;
+ case LPAREN:
+ s = "LPAREN";
+ break;
+ case RPAREN:
+ s = "RPAREN";
+ break;
+ case ANYCHAR:
+ s = "ANYCHAR";
+ break;
+ case MBCSET:
+ s = "MBCSET";
+ break;
+ default:
+ s = "CSET";
+ break;
+ }
+ fprintf (stderr, "%s", s);
+ }
+}
+#endif /* DEBUG */
+
+/* Stuff pertaining to charclasses. */
+
+static bool
+tstbit (unsigned int b, charclass const c)
+{
+ return c[b / CHARCLASS_WORD_BITS] >> b % CHARCLASS_WORD_BITS & 1;
+}
+
+static void
+setbit (unsigned int b, charclass c)
+{
+ c[b / CHARCLASS_WORD_BITS] |= (charclass_word) 1 << b % CHARCLASS_WORD_BITS;
+}
+
+static void
+clrbit (unsigned int b, charclass c)
+{
+ c[b / CHARCLASS_WORD_BITS] &= ~((charclass_word) 1
+ << b % CHARCLASS_WORD_BITS);
+}
+
+static void
+copyset (charclass const src, charclass dst)
+{
+ memcpy (dst, src, sizeof (charclass));
+}
+
+static void
+zeroset (charclass s)
+{
+ memset (s, 0, sizeof (charclass));
+}
+
+static void
+notset (charclass s)
+{
+ int i;
+
+ for (i = 0; i < CHARCLASS_WORDS; ++i)
+ s[i] = CHARCLASS_WORD_MASK & ~s[i];
+}
+
+static bool
+equal (charclass const s1, charclass const s2)
+{
+ return memcmp (s1, s2, sizeof (charclass)) == 0;
+}
+
+/* Ensure that the array addressed by PTR holds at least NITEMS +
+ (PTR || !NITEMS) items. Either return PTR, or reallocate the array
+ and return its new address. Although PTR may be null, the returned
+ value is never null.
+
+ The array holds *NALLOC items; *NALLOC is updated on reallocation.
+ ITEMSIZE is the size of one item. Avoid O(N**2) behavior on arrays
+ growing linearly. */
+static void *
+maybe_realloc (void *ptr, size_t nitems, size_t *nalloc, size_t itemsize)
+{
+ if (nitems < *nalloc)
+ return ptr;
+ *nalloc = nitems;
+ return x2nrealloc (ptr, nalloc, itemsize);
+}
+
+/* In DFA D, find the index of charclass S, or allocate a new one. */
+static size_t
+dfa_charclass_index (struct dfa *d, charclass const s)
+{
+ size_t i;
+
+ for (i = 0; i < d->cindex; ++i)
+ if (equal (s, d->charclasses[i]))
+ return i;
+ d->charclasses = maybe_realloc (d->charclasses, d->cindex, &d->calloc,
+ sizeof *d->charclasses);
+ ++d->cindex;
+ copyset (s, d->charclasses[i]);
+ return i;
+}
+
+/* A pointer to the current dfa is kept here during parsing. */
+static struct dfa *dfa;
+
+/* Find the index of charclass S in the current DFA, or allocate a new one. */
+static size_t
+charclass_index (charclass const s)
+{
+ return dfa_charclass_index (dfa, s);
+}
+
+/* Syntax bits controlling the behavior of the lexical analyzer. */
+static reg_syntax_t syntax_bits, syntax_bits_set;
+
+/* Flag for case-folding letters into sets. */
+static bool case_fold;
+
+/* End-of-line byte in data. */
+static unsigned char eolbyte;
+
+/* Cache of char-context values. */
+static int sbit[NOTCHAR];
+
+/* Set of characters considered letters. */
+static charclass letters;
+
+/* Set of characters that are newline. */
+static charclass newline;
+
+static bool
+unibyte_word_constituent (unsigned char c)
+{
+ return mbrtowc_cache[c] != WEOF && (isalnum (c) || (c) == '_');
+}
+
+static int
+char_context (unsigned char c)
+{
+ if (c == eolbyte)
+ return CTX_NEWLINE;
+ if (unibyte_word_constituent (c))
+ return CTX_LETTER;
+ return CTX_NONE;
+}
+
+static int
+wchar_context (wint_t wc)
+{
+ if (wc == (wchar_t) eolbyte || wc == 0)
+ return CTX_NEWLINE;
+ if (wc == L'_' || iswalnum (wc))
+ return CTX_LETTER;
+ return CTX_NONE;
+}
+
+/* Entry point to set syntax options. */
+void
+dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
+{
+ int i;
+ syntax_bits_set = 1;
+ syntax_bits = bits;
+ case_fold = fold != 0;
+ eolbyte = eol;
+
+ for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
+ {
+ char c = i;
+ unsigned char uc = i;
+ mbstate_t s = { 0 };
+ wchar_t wc;
+ mbrtowc_cache[uc] = mbrtowc (&wc, &c, 1, &s) <= 1 ? wc : WEOF;
+
+ /* Now that mbrtowc_cache[uc] is set, use it to calculate sbit. */
+ sbit[uc] = char_context (uc);
+ switch (sbit[uc])
+ {
+ case CTX_LETTER:
+ setbit (uc, letters);
+ break;
+ case CTX_NEWLINE:
+ setbit (uc, newline);
+ break;
+ }
+ }
+}
+
+/* Set a bit in the charclass for the given wchar_t. Do nothing if WC
+ is represented by a multi-byte sequence. Even for MB_CUR_MAX == 1,
+ this may happen when folding case in weird Turkish locales where
+ dotless i/dotted I are not included in the chosen character set.
+ Return whether a bit was set in the charclass. */
+static bool
+setbit_wc (wint_t wc, charclass c)
+{
+ int b = wctob (wc);
+ if (b == EOF)
+ return false;
+
+ setbit (b, c);
+ return true;
+}
+
+/* Set a bit for B and its case variants in the charclass C.
+ MB_CUR_MAX must be 1. */
+static void
+setbit_case_fold_c (int b, charclass c)
+{
+ int ub = toupper (b);
+ int i;
+ for (i = 0; i < NOTCHAR; i++)
+ if (toupper (i) == ub)
+ setbit (i, c);
+}
+
+
+
+/* UTF-8 encoding allows some optimizations that we can't otherwise
+ assume in a multibyte encoding. */
+int
+using_utf8 (void)
+{
+ static int utf8 = -1;
+ if (utf8 < 0)
+ {
+ wchar_t wc;
+ mbstate_t mbs = { 0 };
+ utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
+ }
+ return utf8;
+}
+
+/* The current locale is known to be a unibyte locale
+ without multicharacter collating sequences and where range
+ comparisons simply use the native encoding. These locales can be
+ processed more efficiently. */
+
+static bool
+using_simple_locale (void)
+{
+ /* The native character set is known to be compatible with
+ the C locale. The following test isn't perfect, but it's good
+ enough in practice, as only ASCII and EBCDIC are in common use
+ and this test correctly accepts ASCII and rejects EBCDIC. */
+ enum { native_c_charset =
+ ('\b' == 8 && '\t' == 9 && '\n' == 10 && '\v' == 11 && '\f' == 12
+ && '\r' == 13 && ' ' == 32 && '!' == 33 && '"' == 34 && '#' == 35
+ && '%' == 37 && '&' == 38 && '\'' == 39 && '(' == 40 && ')' == 41
+ && '*' == 42 && '+' == 43 && ',' == 44 && '-' == 45 && '.' == 46
+ && '/' == 47 && '0' == 48 && '9' == 57 && ':' == 58 && ';' == 59
+ && '<' == 60 && '=' == 61 && '>' == 62 && '?' == 63 && 'A' == 65
+ && 'Z' == 90 && '[' == 91 && '\\' == 92 && ']' == 93 && '^' == 94
+ && '_' == 95 && 'a' == 97 && 'z' == 122 && '{' == 123 && '|' == 124
+ && '}' == 125 && '~' == 126)
+ };
+
+ if (! native_c_charset || dfa->multibyte)
+ return false;
+ else
+ {
+ static int unibyte_c = -1;
+ if (unibyte_c < 0)
+ {
+ char const *locale = setlocale (LC_ALL, NULL);
+ unibyte_c = (!locale
+ || STREQ (locale, "C")
+ || STREQ (locale, "POSIX"));
+ }
+ return unibyte_c;
+ }
+}
+
+/* Lexical analyzer. All the dross that deals with the obnoxious
+ GNU Regex syntax bits is located here. The poor, suffering
+ reader is referred to the GNU Regex documentation for the
+ meaning of the @#%!@#%^!@ syntax bits. */
+
+static char const *lexptr; /* Pointer to next input character. */
+static size_t lexleft; /* Number of characters remaining. */
+static token lasttok; /* Previous token returned; initially END. */
+static bool laststart; /* We're separated from beginning or (,
+ | only by zero-width characters. */
+static size_t parens; /* Count of outstanding left parens. */
+static int minrep, maxrep; /* Repeat counts for {m,n}. */
+
+static int cur_mb_len = 1; /* Length of the multibyte representation of
+ wctok. */
+
+static wint_t wctok; /* Wide character representation of the current
+ multibyte character, or WEOF if there was
+ an encoding error. Used only if
+ MB_CUR_MAX > 1. */
+
+
+/* Fetch the next lexical input character. Set C (of type int) to the
+ next input byte, except set C to EOF if the input is a multibyte
+ character of length greater than 1. Set WC (of type wint_t) to the
+ value of the input if it is a valid multibyte character (possibly
+ of length 1); otherwise set WC to WEOF. If there is no more input,
+ report EOFERR if EOFERR is not null, and return lasttok = END
+ otherwise. */
+# define FETCH_WC(c, wc, eoferr) \
+ do { \
+ if (! lexleft) \
+ { \
+ if ((eoferr) != 0) \
+ dfaerror (eoferr); \
+ else \
+ return lasttok = END; \
+ } \
+ else \
+ { \
+ wint_t _wc; \
+ size_t nbytes = mbs_to_wchar (&_wc, lexptr, lexleft, dfa); \
+ cur_mb_len = nbytes; \
+ (wc) = _wc; \
+ (c) = nbytes == 1 ? to_uchar (*lexptr) : EOF; \
+ lexptr += nbytes; \
+ lexleft -= nbytes; \
+ } \
+ } while (0)
+
+#ifndef MIN
+# define MIN(a,b) ((a) < (b) ? (a) : (b))
+#endif
+
+/* The set of wchar_t values C such that there's a useful locale
+ somewhere where C != towupper (C) && C != towlower (towupper (C)).
+ For example, 0x00B5 (U+00B5 MICRO SIGN) is in this table, because
+ towupper (0x00B5) == 0x039C (U+039C GREEK CAPITAL LETTER MU), and
+ towlower (0x039C) == 0x03BC (U+03BC GREEK SMALL LETTER MU). */
+static short const lonesome_lower[] =
+ {
+ 0x00B5, 0x0131, 0x017F, 0x01C5, 0x01C8, 0x01CB, 0x01F2, 0x0345,
+ 0x03C2, 0x03D0, 0x03D1, 0x03D5, 0x03D6, 0x03F0, 0x03F1,
+
+ /* U+03F2 GREEK LUNATE SIGMA SYMBOL lacks a specific uppercase
+ counterpart in locales predating Unicode 4.0.0 (April 2003). */
+ 0x03F2,
+
+ 0x03F5, 0x1E9B, 0x1FBE,
+ };
+
+/* Maximum number of characters that can be the case-folded
+ counterparts of a single character, not counting the character
+ itself. This is 1 for towupper, 1 for towlower, and 1 for each
+ entry in LONESOME_LOWER. */
+enum
+{ CASE_FOLDED_BUFSIZE = 2 + sizeof lonesome_lower / sizeof *lonesome_lower };
+
+/* Find the characters equal to C after case-folding, other than C
+ itself, and store them into FOLDED. Return the number of characters
+ stored. */
+static unsigned int
+case_folded_counterparts (wchar_t c, wchar_t folded[CASE_FOLDED_BUFSIZE])
+{
+ unsigned int i;
+ unsigned int n = 0;
+ wint_t uc = towupper (c);
+ wint_t lc = towlower (uc);
+ if (uc != c)
+ folded[n++] = uc;
+ if (lc != uc && lc != c && towupper (lc) == uc)
+ folded[n++] = lc;
+ for (i = 0; i < sizeof lonesome_lower / sizeof *lonesome_lower; i++)
+ {
+ wint_t li = lonesome_lower[i];
+ if (li != lc && li != uc && li != c && towupper (li) == uc)
+ folded[n++] = li;
+ }
+ return n;
+}
+
+typedef int predicate (int);
+
+/* The following list maps the names of the Posix named character classes
+ to predicate functions that determine whether a given character is in
+ the class. The leading [ has already been eaten by the lexical
+ analyzer. */
+struct dfa_ctype
+{
+ const char *name;
+ predicate *func;
+ bool single_byte_only;
+};
+
+static const struct dfa_ctype prednames[] = {
+ {"alpha", isalpha, false},
+ {"upper", isupper, false},
+ {"lower", islower, false},
+ {"digit", isdigit, true},
+ {"xdigit", isxdigit, false},
+ {"space", isspace, false},
+ {"punct", ispunct, false},
+ {"alnum", isalnum, false},
+ {"print", isprint, false},
+ {"graph", isgraph, false},
+ {"cntrl", iscntrl, false},
+ {"blank", isblank, false},
+ {NULL, NULL, false}
+};
+
+static const struct dfa_ctype *_GL_ATTRIBUTE_PURE
+find_pred (const char *str)
+{
+ unsigned int i;
+ for (i = 0; prednames[i].name; ++i)
+ if (STREQ (str, prednames[i].name))
+ return &prednames[i];
+ return NULL;
+}
+
+/* Multibyte character handling sub-routine for lex.
+ Parse a bracket expression and build a struct mb_char_classes. */
+static token
+parse_bracket_exp (void)
+{
+ bool invert;
+ int c, c1, c2;
+ charclass ccl;
+
+ /* This is a bracket expression that dfaexec is known to
+ process correctly. */
+ bool known_bracket_exp = true;
+
+ /* Used to warn about [:space:].
+ Bit 0 = first character is a colon.
+ Bit 1 = last character is a colon.
+ Bit 2 = includes any other character but a colon.
+ Bit 3 = includes ranges, char/equiv classes or collation elements. */
+ int colon_warning_state;
+
+ wint_t wc;
+ wint_t wc2;
+ wint_t wc1 = 0;
+
+ /* Work area to build a mb_char_classes. */
+ struct mb_char_classes *work_mbc;
+ size_t chars_al;
+
+ chars_al = 0;
+ if (dfa->multibyte)
+ {
+ dfa->mbcsets = maybe_realloc (dfa->mbcsets, dfa->nmbcsets,
+ &dfa->mbcsets_alloc,
+ sizeof *dfa->mbcsets);
+
+ /* dfa->multibyte_prop[] hold the index of dfa->mbcsets.
+ We will update dfa->multibyte_prop[] in addtok, because we can't
+ decide the index in dfa->tokens[]. */
+
+ /* Initialize work area. */
+ work_mbc = &(dfa->mbcsets[dfa->nmbcsets++]);
+ memset (work_mbc, 0, sizeof *work_mbc);
+ }
+ else
+ work_mbc = NULL;
+
+ memset (ccl, 0, sizeof ccl);
+ FETCH_WC (c, wc, _("unbalanced ["));
+ if (c == '^')
+ {
+ FETCH_WC (c, wc, _("unbalanced ["));
+ invert = true;
+ known_bracket_exp = using_simple_locale ();
+ }
+ else
+ invert = false;
+
+ colon_warning_state = (c == ':');
+ do
+ {
+ c1 = NOTCHAR; /* Mark c1 as not initialized. */
+ colon_warning_state &= ~2;
+
+ /* Note that if we're looking at some other [:...:] construct,
+ we just treat it as a bunch of ordinary characters. We can do
+ this because we assume regex has checked for syntax errors before
+ dfa is ever called. */
+ if (c == '[')
+ {
+ FETCH_WC (c1, wc1, _("unbalanced ["));
+
+ if ((c1 == ':' && (syntax_bits & RE_CHAR_CLASSES))
+ || c1 == '.' || c1 == '=')
+ {
+ enum { MAX_BRACKET_STRING_LEN = 32 };
+ char str[MAX_BRACKET_STRING_LEN + 1];
+ size_t len = 0;
+ for (;;)
+ {
+ FETCH_WC (c, wc, _("unbalanced ["));
+ if ((c == c1 && *lexptr == ']') || lexleft == 0)
+ break;
+ if (len < MAX_BRACKET_STRING_LEN)
+ str[len++] = c;
+ else
+ /* This is in any case an invalid class name. */
+ str[0] = '\0';
+ }
+ str[len] = '\0';
+
+ /* Fetch bracket. */
+ FETCH_WC (c, wc, _("unbalanced ["));
+ if (c1 == ':')
+ /* Build character class. POSIX allows character
+ classes to match multicharacter collating elements,
+ but the regex code does not support that, so do not
+ worry about that possibility. */
+ {
+ char const *class
+ = (case_fold && (STREQ (str, "upper")
+ || STREQ (str, "lower")) ? "alpha" : str);
+ const struct dfa_ctype *pred = find_pred (class);
+ if (!pred)
+ dfaerror (_("invalid character class"));
+
+ if (dfa->multibyte && !pred->single_byte_only)
+ known_bracket_exp = false;
+ else
+ for (c2 = 0; c2 < NOTCHAR; ++c2)
+ if (pred->func (c2))
+ setbit (c2, ccl);
+ }
+ else
+ known_bracket_exp = false;
+
+ colon_warning_state |= 8;
+
+ /* Fetch new lookahead character. */
+ FETCH_WC (c1, wc1, _("unbalanced ["));
+ continue;
+ }
+
+ /* We treat '[' as a normal character here. c/c1/wc/wc1
+ are already set up. */
+ }
+
+ if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
+ FETCH_WC (c, wc, _("unbalanced ["));
+
+ if (c1 == NOTCHAR)
+ FETCH_WC (c1, wc1, _("unbalanced ["));
+
+ if (c1 == '-')
+ /* build range characters. */
+ {
+ FETCH_WC (c2, wc2, _("unbalanced ["));
+
+ /* A bracket expression like [a-[.aa.]] matches an unknown set.
+ Treat it like [-a[.aa.]] while parsing it, and
+ remember that the set is unknown. */
+ if (c2 == '[' && *lexptr == '.')
+ {
+ known_bracket_exp = false;
+ c2 = ']';
+ }
+
+ if (c2 == ']')
+ {
+ /* In the case [x-], the - is an ordinary hyphen,
+ which is left in c1, the lookahead character. */
+ lexptr -= cur_mb_len;
+ lexleft += cur_mb_len;
+ }
+ else
+ {
+ if (c2 == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
+ FETCH_WC (c2, wc2, _("unbalanced ["));
+
+ colon_warning_state |= 8;
+ FETCH_WC (c1, wc1, _("unbalanced ["));
+
+ /* Treat [x-y] as a range if x != y. */
+ if (wc != wc2 || wc == WEOF)
+ {
+ if (dfa->multibyte)
+ known_bracket_exp = false;
+ else if (using_simple_locale ())
+ {
+ int ci;
+ for (ci = c; ci <= c2; ci++)
+ setbit (ci, ccl);
+ if (case_fold)
+ {
+ int uc = toupper (c);
+ int uc2 = toupper (c2);
+ for (ci = 0; ci < NOTCHAR; ci++)
+ {
+ int uci = toupper (ci);
+ if (uc <= uci && uci <= uc2)
+ setbit (ci, ccl);
+ }
+ }
+ }
+ else
+ known_bracket_exp = false;
+
+ continue;
+ }
+ }
+ }
+
+ colon_warning_state |= (c == ':') ? 2 : 4;
+
+ if (!dfa->multibyte)
+ {
+ if (case_fold)
+ setbit_case_fold_c (c, ccl);
+ else
+ setbit (c, ccl);
+ continue;
+ }
+
+ if (wc == WEOF)
+ known_bracket_exp = false;
+ else
+ {
+ wchar_t folded[CASE_FOLDED_BUFSIZE + 1];
+ unsigned int i;
+ unsigned int n = (case_fold
+ ? case_folded_counterparts (wc, folded + 1) + 1
+ : 1);
+ folded[0] = wc;
+ for (i = 0; i < n; i++)
+ if (!setbit_wc (folded[i], ccl))
+ {
+ work_mbc->chars
+ = maybe_realloc (work_mbc->chars, work_mbc->nchars,
+ &chars_al, sizeof *work_mbc->chars);
+ work_mbc->chars[work_mbc->nchars++] = folded[i];
+ }
+ }
+ }
+ while ((wc = wc1, (c = c1) != ']'));
+
+ if (colon_warning_state == 7)
+ dfawarn (_("character class syntax is [[:space:]], not [:space:]"));
+
+ if (! known_bracket_exp)
+ return BACKREF;
+
+ if (dfa->multibyte)
+ {
+ static charclass zeroclass;
+ work_mbc->invert = invert;
+ work_mbc->cset = equal (ccl, zeroclass) ? -1 : charclass_index (ccl);
+ return MBCSET;
+ }
+
+ if (invert)
+ {
+ assert (!dfa->multibyte);
+ notset (ccl);
+ if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
+ clrbit ('\n', ccl);
+ }
+
+ return CSET + charclass_index (ccl);
+}
+
+#define PUSH_LEX_STATE(s) \
+ do \
+ { \
+ char const *lexptr_saved = lexptr; \
+ size_t lexleft_saved = lexleft; \
+ lexptr = (s); \
+ lexleft = strlen (lexptr)
+
+#define POP_LEX_STATE() \
+ lexptr = lexptr_saved; \
+ lexleft = lexleft_saved; \
+ } \
+ while (0)
+
+static token
+lex (void)
+{
+ int c, c2;
+ bool backslash = false;
+ charclass ccl;
+ int i;
+
+ /* Basic plan: We fetch a character. If it's a backslash,
+ we set the backslash flag and go through the loop again.
+ On the plus side, this avoids having a duplicate of the
+ main switch inside the backslash case. On the minus side,
+ it means that just about every case begins with
+ "if (backslash) ...". */
+ for (i = 0; i < 2; ++i)
+ {
+ FETCH_WC (c, wctok, NULL);
+
+ switch (c)
+ {
+ case '\\':
+ if (backslash)
+ goto normal_char;
+ if (lexleft == 0)
+ dfaerror (_("unfinished \\ escape"));
+ backslash = true;
+ break;
+
+ case '^':
+ if (backslash)
+ goto normal_char;
+ if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
+ || lasttok == END || lasttok == LPAREN || lasttok == OR)
+ return lasttok = BEGLINE;
+ goto normal_char;
+
+ case '$':
+ if (backslash)
+ goto normal_char;
+ if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
+ || lexleft == 0
+ || (syntax_bits & RE_NO_BK_PARENS
+ ? lexleft > 0 && *lexptr == ')'
+ : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == ')')
+ || (syntax_bits & RE_NO_BK_VBAR
+ ? lexleft > 0 && *lexptr == '|'
+ : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == '|')
+ || ((syntax_bits & RE_NEWLINE_ALT)
+ && lexleft > 0 && *lexptr == '\n'))
+ return lasttok = ENDLINE;
+ goto normal_char;
+
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (backslash && !(syntax_bits & RE_NO_BK_REFS))
+ {
+ laststart = false;
+ return lasttok = BACKREF;
+ }
+ goto normal_char;
+
+ case '`':
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = BEGLINE; /* FIXME: should be beginning of string */
+ goto normal_char;
+
+ case '\'':
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = ENDLINE; /* FIXME: should be end of string */
+ goto normal_char;
+
+ case '<':
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = BEGWORD;
+ goto normal_char;
+
+ case '>':
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = ENDWORD;
+ goto normal_char;
+
+ case 'b':
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = LIMWORD;
+ goto normal_char;
+
+ case 'B':
+ if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
+ return lasttok = NOTLIMWORD;
+ goto normal_char;
+
+ case '?':
+ if (syntax_bits & RE_LIMITED_OPS)
+ goto normal_char;
+ if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
+ goto normal_char;
+ if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+ goto normal_char;
+ return lasttok = QMARK;
+
+ case '*':
+ if (backslash)
+ goto normal_char;
+ if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+ goto normal_char;
+ return lasttok = STAR;
+
+ case '+':
+ if (syntax_bits & RE_LIMITED_OPS)
+ goto normal_char;
+ if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
+ goto normal_char;
+ if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+ goto normal_char;
+ return lasttok = PLUS;
+
+ case '{':
+ if (!(syntax_bits & RE_INTERVALS))
+ goto normal_char;
+ if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0))
+ goto normal_char;
+ if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+ goto normal_char;
+
+ /* Cases:
+ {M} - exact count
+ {M,} - minimum count, maximum is infinity
+ {,N} - 0 through N
+ {,} - 0 to infinity (same as '*')
+ {M,N} - M through N */
+ {
+ char const *p = lexptr;
+ char const *lim = p + lexleft;
+ minrep = maxrep = -1;
+ for (; p != lim && ISASCIIDIGIT (*p); p++)
+ {
+ if (minrep < 0)
+ minrep = *p - '0';
+ else
+ minrep = MIN (RE_DUP_MAX + 1, minrep * 10 + *p - '0');
+ }
+ if (p != lim)
+ {
+ if (*p != ',')
+ maxrep = minrep;
+ else
+ {
+ if (minrep < 0)
+ minrep = 0;
+ while (++p != lim && ISASCIIDIGIT (*p))
+ {
+ if (maxrep < 0)
+ maxrep = *p - '0';
+ else
+ maxrep = MIN (RE_DUP_MAX + 1, maxrep * 10 + *p - '0');
+ }
+ }
+ }
+ if (! ((! backslash || (p != lim && *p++ == '\\'))
+ && p != lim && *p++ == '}'
+ && 0 <= minrep && (maxrep < 0 || minrep <= maxrep)))
+ {
+ if (syntax_bits & RE_INVALID_INTERVAL_ORD)
+ goto normal_char;
+ dfaerror (_("invalid content of \\{\\}"));
+ }
+ if (RE_DUP_MAX < maxrep)
+ dfaerror (_("regular expression too big"));
+ lexptr = p;
+ lexleft = lim - p;
+ }
+ laststart = false;
+ return lasttok = REPMN;
+
+ case '|':
+ if (syntax_bits & RE_LIMITED_OPS)
+ goto normal_char;
+ if (backslash != ((syntax_bits & RE_NO_BK_VBAR) == 0))
+ goto normal_char;
+ laststart = true;
+ return lasttok = OR;
+
+ case '\n':
+ if (syntax_bits & RE_LIMITED_OPS
+ || backslash || !(syntax_bits & RE_NEWLINE_ALT))
+ goto normal_char;
+ laststart = true;
+ return lasttok = OR;
+
+ case '(':
+ if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
+ goto normal_char;
+ ++parens;
+ laststart = true;
+ return lasttok = LPAREN;
+
+ case ')':
+ if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
+ goto normal_char;
+ if (parens == 0 && syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_char;
+ --parens;
+ laststart = false;
+ return lasttok = RPAREN;
+
+ case '.':
+ if (backslash)
+ goto normal_char;
+ if (dfa->multibyte)
+ {
+ /* In multibyte environment period must match with a single
+ character not a byte. So we use ANYCHAR. */
+ laststart = false;
+ return lasttok = ANYCHAR;
+ }
+ zeroset (ccl);
+ notset (ccl);
+ if (!(syntax_bits & RE_DOT_NEWLINE))
+ clrbit ('\n', ccl);
+ if (syntax_bits & RE_DOT_NOT_NULL)
+ clrbit ('\0', ccl);
+ laststart = false;
+ return lasttok = CSET + charclass_index (ccl);
+
+ case 's':
+ case 'S':
+ if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
+ goto normal_char;
+ if (!dfa->multibyte)
+ {
+ zeroset (ccl);
+ for (c2 = 0; c2 < NOTCHAR; ++c2)
+ if (isspace (c2))
+ setbit (c2, ccl);
+ if (c == 'S')
+ notset (ccl);
+ laststart = false;
+ return lasttok = CSET + charclass_index (ccl);
+ }
+
+ /* FIXME: see if optimizing this, as is done with ANYCHAR and
+ add_utf8_anychar, makes sense. */
+
+ /* \s and \S are documented to be equivalent to [[:space:]] and
+ [^[:space:]] respectively, so tell the lexer to process those
+ strings, each minus its "already processed" '['. */
+ PUSH_LEX_STATE (c == 's' ? "[:space:]]" : "^[:space:]]");
+
+ lasttok = parse_bracket_exp ();
+
+ POP_LEX_STATE ();
+
+ laststart = false;
+ return lasttok;
+
+ case 'w':
+ case 'W':
+ if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
+ goto normal_char;
+
+ if (!dfa->multibyte)
+ {
+ zeroset (ccl);
+ for (c2 = 0; c2 < NOTCHAR; ++c2)
+ if (unibyte_word_constituent (c2))
+ setbit (c2, ccl);
+ if (c == 'W')
+ notset (ccl);
+ laststart = false;
+ return lasttok = CSET + charclass_index (ccl);
+ }
+
+ /* FIXME: see if optimizing this, as is done with ANYCHAR and
+ add_utf8_anychar, makes sense. */
+
+ /* \w and \W are documented to be equivalent to [_[:alnum:]] and
+ [^_[:alnum:]] respectively, so tell the lexer to process those
+ strings, each minus its "already processed" '['. */
+ PUSH_LEX_STATE (c == 'w' ? "_[:alnum:]]" : "^_[:alnum:]]");
+
+ lasttok = parse_bracket_exp ();
+
+ POP_LEX_STATE ();
+
+ laststart = false;
+ return lasttok;
+
+ case '[':
+ if (backslash)
+ goto normal_char;
+ laststart = false;
+ return lasttok = parse_bracket_exp ();
+
+ default:
+ normal_char:
+ laststart = false;
+ /* For multibyte character sets, folding is done in atom. Always
+ return WCHAR. */
+ if (dfa->multibyte)
+ return lasttok = WCHAR;
+
+ if (case_fold && isalpha (c))
+ {
+ zeroset (ccl);
+ setbit_case_fold_c (c, ccl);
+ return lasttok = CSET + charclass_index (ccl);
+ }
+
+ return lasttok = c;
+ }
+ }
+
+ /* The above loop should consume at most a backslash
+ and some other character. */
+ abort ();
+ return END; /* keeps pedantic compilers happy. */
+}
+
+/* Recursive descent parser for regular expressions. */
+
+static token tok; /* Lookahead token. */
+static size_t depth; /* Current depth of a hypothetical stack
+ holding deferred productions. This is
+ used to determine the depth that will be
+ required of the real stack later on in
+ dfaanalyze. */
+
+static void
+addtok_mb (token t, int mbprop)
+{
+ if (dfa->talloc == dfa->tindex)
+ {
+ dfa->tokens = x2nrealloc (dfa->tokens, &dfa->talloc,
+ sizeof *dfa->tokens);
+ if (dfa->multibyte)
+ dfa->multibyte_prop = xnrealloc (dfa->multibyte_prop, dfa->talloc,
+ sizeof *dfa->multibyte_prop);
+ }
+ if (dfa->multibyte)
+ dfa->multibyte_prop[dfa->tindex] = mbprop;
+ dfa->tokens[dfa->tindex++] = t;
+
+ switch (t)
+ {
+ case QMARK:
+ case STAR:
+ case PLUS:
+ break;
+
+ case CAT:
+ case OR:
+ --depth;
+ break;
+
+ case BACKREF:
+ dfa->fast = false;
+ /* fallthrough */
+ default:
+ ++dfa->nleaves;
+ /* fallthrough */
+ case EMPTY:
+ ++depth;
+ break;
+ }
+ if (depth > dfa->depth)
+ dfa->depth = depth;
+}
+
+static void addtok_wc (wint_t wc);
+
+/* Add the given token to the parse tree, maintaining the depth count and
+ updating the maximum depth if necessary. */
+static void
+addtok (token t)
+{
+ if (dfa->multibyte && t == MBCSET)
+ {
+ bool need_or = false;
+ struct mb_char_classes *work_mbc = &dfa->mbcsets[dfa->nmbcsets - 1];
+ size_t i;
+
+ /* Extract wide characters into alternations for better performance.
+ This does not require UTF-8. */
+ for (i = 0; i < work_mbc->nchars; i++)
+ {
+ addtok_wc (work_mbc->chars[i]);
+ if (need_or)
+ addtok (OR);
+ need_or = true;
+ }
+ work_mbc->nchars = 0;
+
+ /* Characters have been handled above, so it is possible
+ that the mbcset is empty now. Do nothing in that case. */
+ if (work_mbc->cset != -1)
+ {
+ addtok (CSET + work_mbc->cset);
+ if (need_or)
+ addtok (OR);
+ }
+ }
+ else
+ {
+ addtok_mb (t, 3);
+ }
+}
+
+/* We treat a multibyte character as a single atom, so that DFA
+ can treat a multibyte character as a single expression.
+
+ e.g., we construct the following tree from "<mb1><mb2>".
+ <mb1(1st-byte)><mb1(2nd-byte)><CAT><mb1(3rd-byte)><CAT>
+ <mb2(1st-byte)><mb2(2nd-byte)><CAT><mb2(3rd-byte)><CAT><CAT> */
+static void
+addtok_wc (wint_t wc)
+{
+ unsigned char buf[MB_LEN_MAX];
+ mbstate_t s = { 0 };
+ int i;
+ size_t stored_bytes = wcrtomb ((char *) buf, wc, &s);
+
+ if (stored_bytes != (size_t) -1)
+ cur_mb_len = stored_bytes;
+ else
+ {
+ /* This is merely stop-gap. buf[0] is undefined, yet skipping
+ the addtok_mb call altogether can corrupt the heap. */
+ cur_mb_len = 1;
+ buf[0] = 0;
+ }
+
+ addtok_mb (buf[0], cur_mb_len == 1 ? 3 : 1);
+ for (i = 1; i < cur_mb_len; i++)
+ {
+ addtok_mb (buf[i], i == cur_mb_len - 1 ? 2 : 0);
+ addtok (CAT);
+ }
+}
+
+static void
+add_utf8_anychar (void)
+{
+ static charclass const utf8_classes[5] = {
+ /* 80-bf: non-leading bytes. */
+ {0, 0, 0, 0, CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK, 0, 0},
+
+ /* 00-7f: 1-byte sequence. */
+ {CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK, CHARCLASS_WORD_MASK,
+ CHARCLASS_WORD_MASK, 0, 0, 0, 0},
+
+ /* c2-df: 2-byte sequence. */
+ {0, 0, 0, 0, 0, 0, ~3 & CHARCLASS_WORD_MASK, 0},
+
+ /* e0-ef: 3-byte sequence. */
+ {0, 0, 0, 0, 0, 0, 0, 0xffff},
+
+ /* f0-f7: 4-byte sequence. */
+ {0, 0, 0, 0, 0, 0, 0, 0xff0000}
+ };
+ const unsigned int n = sizeof (utf8_classes) / sizeof (utf8_classes[0]);
+ unsigned int i;
+
+ /* Define the five character classes that are needed below. */
+ if (dfa->utf8_anychar_classes[0] == 0)
+ for (i = 0; i < n; i++)
+ {
+ charclass c;
+ copyset (utf8_classes[i], c);
+ if (i == 1)
+ {
+ if (!(syntax_bits & RE_DOT_NEWLINE))
+ clrbit ('\n', c);
+ if (syntax_bits & RE_DOT_NOT_NULL)
+ clrbit ('\0', c);
+ }
+ dfa->utf8_anychar_classes[i] = CSET + charclass_index (c);
+ }
+
+ /* A valid UTF-8 character is
+
+ ([0x00-0x7f]
+ |[0xc2-0xdf][0x80-0xbf]
+ |[0xe0-0xef[0x80-0xbf][0x80-0xbf]
+ |[0xf0-f7][0x80-0xbf][0x80-0xbf][0x80-0xbf])
+
+ which I'll write more concisely "B|CA|DAA|EAAA". Factor the [0x00-0x7f]
+ and you get "B|(C|(D|EA)A)A". And since the token buffer is in reverse
+ Polish notation, you get "B C D E A CAT OR A CAT OR A CAT OR". */
+ for (i = 1; i < n; i++)
+ addtok (dfa->utf8_anychar_classes[i]);
+ while (--i > 1)
+ {
+ addtok (dfa->utf8_anychar_classes[0]);
+ addtok (CAT);
+ addtok (OR);
+ }
+}
+
+/* The grammar understood by the parser is as follows.
+
+ regexp:
+ regexp OR branch
+ branch
+
+ branch:
+ branch closure
+ closure
+
+ closure:
+ closure QMARK
+ closure STAR
+ closure PLUS
+ closure REPMN
+ atom
+
+ atom:
+ <normal character>
+ <multibyte character>
+ ANYCHAR
+ MBCSET
+ CSET
+ BACKREF
+ BEGLINE
+ ENDLINE
+ BEGWORD
+ ENDWORD
+ LIMWORD
+ NOTLIMWORD
+ LPAREN regexp RPAREN
+ <empty>
+
+ The parser builds a parse tree in postfix form in an array of tokens. */
+
+static void
+atom (void)
+{
+ if (tok == WCHAR)
+ {
+ if (wctok == WEOF)
+ addtok (BACKREF);
+ else
+ {
+ addtok_wc (wctok);
+
+ if (case_fold)
+ {
+ wchar_t folded[CASE_FOLDED_BUFSIZE];
+ unsigned int i, n = case_folded_counterparts (wctok, folded);
+ for (i = 0; i < n; i++)
+ {
+ addtok_wc (folded[i]);
+ addtok (OR);
+ }
+ }
+ }
+
+ tok = lex ();
+ }
+ else if (tok == ANYCHAR && using_utf8 ())
+ {
+ /* For UTF-8 expand the period to a series of CSETs that define a valid
+ UTF-8 character. This avoids using the slow multibyte path. I'm
+ pretty sure it would be both profitable and correct to do it for
+ any encoding; however, the optimization must be done manually as
+ it is done above in add_utf8_anychar. So, let's start with
+ UTF-8: it is the most used, and the structure of the encoding
+ makes the correctness more obvious. */
+ add_utf8_anychar ();
+ tok = lex ();
+ }
+ else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
+ || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
+ || tok == ANYCHAR || tok == MBCSET
+ || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
+ {
+ addtok (tok);
+ tok = lex ();
+ }
+ else if (tok == LPAREN)
+ {
+ tok = lex ();
+ regexp ();
+ if (tok != RPAREN)
+ dfaerror (_("unbalanced ("));
+ tok = lex ();
+ }
+ else
+ addtok (EMPTY);
+}
+
+/* Return the number of tokens in the given subexpression. */
+static size_t _GL_ATTRIBUTE_PURE
+nsubtoks (size_t tindex)
+{
+ size_t ntoks1;
+
+ switch (dfa->tokens[tindex - 1])
+ {
+ default:
+ return 1;
+ case QMARK:
+ case STAR:
+ case PLUS:
+ return 1 + nsubtoks (tindex - 1);
+ case CAT:
+ case OR:
+ ntoks1 = nsubtoks (tindex - 1);
+ return 1 + ntoks1 + nsubtoks (tindex - 1 - ntoks1);
+ }
+}
+
+/* Copy the given subexpression to the top of the tree. */
+static void
+copytoks (size_t tindex, size_t ntokens)
+{
+ size_t i;
+
+ if (dfa->multibyte)
+ for (i = 0; i < ntokens; ++i)
+ addtok_mb (dfa->tokens[tindex + i], dfa->multibyte_prop[tindex + i]);
+ else
+ for (i = 0; i < ntokens; ++i)
+ addtok_mb (dfa->tokens[tindex + i], 3);
+}
+
+static void
+closure (void)
+{
+ int i;
+ size_t tindex, ntokens;
+
+ atom ();
+ while (tok == QMARK || tok == STAR || tok == PLUS || tok == REPMN)
+ if (tok == REPMN && (minrep || maxrep))
+ {
+ ntokens = nsubtoks (dfa->tindex);
+ tindex = dfa->tindex - ntokens;
+ if (maxrep < 0)
+ addtok (PLUS);
+ if (minrep == 0)
+ addtok (QMARK);
+ for (i = 1; i < minrep; ++i)
+ {
+ copytoks (tindex, ntokens);
+ addtok (CAT);
+ }
+ for (; i < maxrep; ++i)
+ {
+ copytoks (tindex, ntokens);
+ addtok (QMARK);
+ addtok (CAT);
+ }
+ tok = lex ();
+ }
+ else if (tok == REPMN)
+ {
+ dfa->tindex -= nsubtoks (dfa->tindex);
+ tok = lex ();
+ closure ();
+ }
+ else
+ {
+ addtok (tok);
+ tok = lex ();
+ }
+}
+
+static void
+branch (void)
+{
+ closure ();
+ while (tok != RPAREN && tok != OR && tok >= 0)
+ {
+ closure ();
+ addtok (CAT);
+ }
+}
+
+static void
+regexp (void)
+{
+ branch ();
+ while (tok == OR)
+ {
+ tok = lex ();
+ branch ();
+ addtok (OR);
+ }
+}
+
+/* Main entry point for the parser. S is a string to be parsed, len is the
+ length of the string, so s can include NUL characters. D is a pointer to
+ the struct dfa to parse into. */
+void
+dfaparse (char const *s, size_t len, struct dfa *d)
+{
+ dfa = d;
+ lexptr = s;
+ lexleft = len;
+ lasttok = END;
+ laststart = true;
+ parens = 0;
+ if (dfa->multibyte)
+ {
+ cur_mb_len = 0;
+ memset (&d->mbs, 0, sizeof d->mbs);
+ }
+
+ if (!syntax_bits_set)
+ dfaerror (_("no syntax specified"));
+
+ tok = lex ();
+ depth = d->depth;
+
+ regexp ();
+
+ if (tok != END)
+ dfaerror (_("unbalanced )"));
+
+ addtok (END - d->nregexps);
+ addtok (CAT);
+
+ if (d->nregexps)
+ addtok (OR);
+
+ ++d->nregexps;
+}
+
+/* Some primitives for operating on sets of positions. */
+
+/* Copy one set to another. */
+static void
+copy (position_set const *src, position_set * dst)
+{
+ if (dst->alloc < src->nelem)
+ {
+ free (dst->elems);
+ dst->alloc = src->nelem;
+ dst->elems = x2nrealloc (NULL, &dst->alloc, sizeof *dst->elems);
+ }
+ memcpy (dst->elems, src->elems, src->nelem * sizeof *dst->elems);
+ dst->nelem = src->nelem;
+}
+
+static void
+alloc_position_set (position_set * s, size_t size)
+{
+ s->elems = xnmalloc (size, sizeof *s->elems);
+ s->alloc = size;
+ s->nelem = 0;
+}
+
+/* Insert position P in set S. S is maintained in sorted order on
+ decreasing index. If there is already an entry in S with P.index
+ then merge (logically-OR) P's constraints into the one in S.
+ S->elems must point to an array large enough to hold the resulting set. */
+static void
+insert (position p, position_set * s)
+{
+ size_t count = s->nelem;
+ size_t lo = 0, hi = count;
+ size_t i;
+ while (lo < hi)
+ {
+ size_t mid = (lo + hi) >> 1;
+ if (s->elems[mid].index > p.index)
+ lo = mid + 1;
+ else
+ hi = mid;
+ }
+
+ if (lo < count && p.index == s->elems[lo].index)
+ {
+ s->elems[lo].constraint |= p.constraint;
+ return;
+ }
+
+ s->elems = maybe_realloc (s->elems, count, &s->alloc, sizeof *s->elems);
+ for (i = count; i > lo; i--)
+ s->elems[i] = s->elems[i - 1];
+ s->elems[lo] = p;
+ ++s->nelem;
+}
+
+/* Merge two sets of positions into a third. The result is exactly as if
+ the positions of both sets were inserted into an initially empty set. */
+static void
+merge (position_set const *s1, position_set const *s2, position_set * m)
+{
+ size_t i = 0, j = 0;
+
+ if (m->alloc < s1->nelem + s2->nelem)
+ {
+ free (m->elems);
+ m->elems = maybe_realloc (NULL, s1->nelem + s2->nelem, &m->alloc,
+ sizeof *m->elems);
+ }
+ m->nelem = 0;
+ while (i < s1->nelem && j < s2->nelem)
+ if (s1->elems[i].index > s2->elems[j].index)
+ m->elems[m->nelem++] = s1->elems[i++];
+ else if (s1->elems[i].index < s2->elems[j].index)
+ m->elems[m->nelem++] = s2->elems[j++];
+ else
+ {
+ m->elems[m->nelem] = s1->elems[i++];
+ m->elems[m->nelem++].constraint |= s2->elems[j++].constraint;
+ }
+ while (i < s1->nelem)
+ m->elems[m->nelem++] = s1->elems[i++];
+ while (j < s2->nelem)
+ m->elems[m->nelem++] = s2->elems[j++];
+}
+
+/* Delete a position from a set. */
+static void
+delete (position p, position_set * s)
+{
+ size_t i;
+
+ for (i = 0; i < s->nelem; ++i)
+ if (p.index == s->elems[i].index)
+ break;
+ if (i < s->nelem)
+ for (--s->nelem; i < s->nelem; ++i)
+ s->elems[i] = s->elems[i + 1];
+}
+
+/* Find the index of the state corresponding to the given position set with
+ the given preceding context, or create a new state if there is no such
+ state. Context tells whether we got here on a newline or letter. */
+static state_num
+state_index (struct dfa *d, position_set const *s, int context)
+{
+ size_t hash = 0;
+ int constraint;
+ state_num i, j;
+
+ for (i = 0; i < s->nelem; ++i)
+ hash ^= s->elems[i].index + s->elems[i].constraint;
+
+ /* Try to find a state that exactly matches the proposed one. */
+ for (i = 0; i < d->sindex; ++i)
+ {
+ if (hash != d->states[i].hash || s->nelem != d->states[i].elems.nelem
+ || context != d->states[i].context)
+ continue;
+ for (j = 0; j < s->nelem; ++j)
+ if (s->elems[j].constraint
+ != d->states[i].elems.elems[j].constraint
+ || s->elems[j].index != d->states[i].elems.elems[j].index)
+ break;
+ if (j == s->nelem)
+ return i;
+ }
+
+#ifdef DEBUG
+ fprintf (stderr, "new state %zd\n nextpos:", i);
+ for (j = 0; j < s->nelem; ++j)
+ {
+ fprintf (stderr, " %zu:", s->elems[j].index);
+ prtok (d->tokens[s->elems[j].index]);
+ }
+ fprintf (stderr, "\n context:");
+ if (context ^ CTX_ANY)
+ {
+ if (context & CTX_NONE)
+ fprintf (stderr, " CTX_NONE");
+ if (context & CTX_LETTER)
+ fprintf (stderr, " CTX_LETTER");
+ if (context & CTX_NEWLINE)
+ fprintf (stderr, " CTX_NEWLINE");
+ }
+ else
+ fprintf (stderr, " CTX_ANY");
+ fprintf (stderr, "\n");
+#endif
+
+ /* We'll have to create a new state. */
+ d->states = maybe_realloc (d->states, d->sindex, &d->salloc,
+ sizeof *d->states);
+ d->states[i].hash = hash;
+ alloc_position_set (&d->states[i].elems, s->nelem);
+ copy (s, &d->states[i].elems);
+ d->states[i].context = context;
+ d->states[i].constraint = 0;
+ d->states[i].first_end = 0;
+ d->states[i].mbps.nelem = 0;
+ d->states[i].mbps.elems = NULL;
+
+ for (j = 0; j < s->nelem; ++j)
+ if (d->tokens[s->elems[j].index] < 0)
+ {
+ constraint = s->elems[j].constraint;
+ if (SUCCEEDS_IN_CONTEXT (constraint, context, CTX_ANY))
+ d->states[i].constraint |= constraint;
+ if (!d->states[i].first_end)
+ d->states[i].first_end = d->tokens[s->elems[j].index];
+ }
+ else if (d->tokens[s->elems[j].index] == BACKREF)
+ d->states[i].constraint = NO_CONSTRAINT;
+
+ ++d->sindex;
+
+ return i;
+}
+
+/* Find the epsilon closure of a set of positions. If any position of the set
+ contains a symbol that matches the empty string in some context, replace
+ that position with the elements of its follow labeled with an appropriate
+ constraint. Repeat exhaustively until no funny positions are left.
+ S->elems must be large enough to hold the result. */
+static void
+epsclosure (position_set *s, struct dfa const *d, char *visited)
+{
+ size_t i, j;
+ position p, old;
+ bool initialized = false;
+
+ for (i = 0; i < s->nelem; ++i)
+ if (d->tokens[s->elems[i].index] >= NOTCHAR
+ && d->tokens[s->elems[i].index] != BACKREF
+ && d->tokens[s->elems[i].index] != ANYCHAR
+ && d->tokens[s->elems[i].index] != MBCSET
+ && d->tokens[s->elems[i].index] < CSET)
+ {
+ if (!initialized)
+ {
+ memset (visited, 0, d->tindex * sizeof (*visited));
+ initialized = true;
+ }
+ old = s->elems[i];
+ p.constraint = old.constraint;
+ delete (s->elems[i], s);
+ if (visited[old.index])
+ {
+ --i;
+ continue;
+ }
+ visited[old.index] = 1;
+ switch (d->tokens[old.index])
+ {
+ case BEGLINE:
+ p.constraint &= BEGLINE_CONSTRAINT;
+ break;
+ case ENDLINE:
+ p.constraint &= ENDLINE_CONSTRAINT;
+ break;
+ case BEGWORD:
+ p.constraint &= BEGWORD_CONSTRAINT;
+ break;
+ case ENDWORD:
+ p.constraint &= ENDWORD_CONSTRAINT;
+ break;
+ case LIMWORD:
+ p.constraint &= LIMWORD_CONSTRAINT;
+ break;
+ case NOTLIMWORD:
+ p.constraint &= NOTLIMWORD_CONSTRAINT;
+ break;
+ default:
+ break;
+ }
+ for (j = 0; j < d->follows[old.index].nelem; ++j)
+ {
+ p.index = d->follows[old.index].elems[j].index;
+ insert (p, s);
+ }
+ /* Force rescan to start at the beginning. */
+ i = -1;
+ }
+}
+
+/* Returns the set of contexts for which there is at least one
+ character included in C. */
+
+static int
+charclass_context (charclass c)
+{
+ int context = 0;
+ unsigned int j;
+
+ if (tstbit (eolbyte, c))
+ context |= CTX_NEWLINE;
+
+ for (j = 0; j < CHARCLASS_WORDS; ++j)
+ {
+ if (c[j] & letters[j])
+ context |= CTX_LETTER;
+ if (c[j] & ~(letters[j] | newline[j]))
+ context |= CTX_NONE;
+ }
+
+ return context;
+}
+
+/* Returns the contexts on which the position set S depends. Each context
+ in the set of returned contexts (let's call it SC) may have a different
+ follow set than other contexts in SC, and also different from the
+ follow set of the complement set (sc ^ CTX_ANY). However, all contexts
+ in the complement set will have the same follow set. */
+
+static int _GL_ATTRIBUTE_PURE
+state_separate_contexts (position_set const *s)
+{
+ int separate_contexts = 0;
+ size_t j;
+
+ for (j = 0; j < s->nelem; ++j)
+ {
+ if (PREV_NEWLINE_DEPENDENT (s->elems[j].constraint))
+ separate_contexts |= CTX_NEWLINE;
+ if (PREV_LETTER_DEPENDENT (s->elems[j].constraint))
+ separate_contexts |= CTX_LETTER;
+ }
+
+ return separate_contexts;
+}
+
+
+/* Perform bottom-up analysis on the parse tree, computing various functions.
+ Note that at this point, we're pretending constructs like \< are real
+ characters rather than constraints on what can follow them.
+
+ Nullable: A node is nullable if it is at the root of a regexp that can
+ match the empty string.
+ * EMPTY leaves are nullable.
+ * No other leaf is nullable.
+ * A QMARK or STAR node is nullable.
+ * A PLUS node is nullable if its argument is nullable.
+ * A CAT node is nullable if both its arguments are nullable.
+ * An OR node is nullable if either argument is nullable.
+
+ Firstpos: The firstpos of a node is the set of positions (nonempty leaves)
+ that could correspond to the first character of a string matching the
+ regexp rooted at the given node.
+ * EMPTY leaves have empty firstpos.
+ * The firstpos of a nonempty leaf is that leaf itself.
+ * The firstpos of a QMARK, STAR, or PLUS node is the firstpos of its
+ argument.
+ * The firstpos of a CAT node is the firstpos of the left argument, union
+ the firstpos of the right if the left argument is nullable.
+ * The firstpos of an OR node is the union of firstpos of each argument.
+
+ Lastpos: The lastpos of a node is the set of positions that could
+ correspond to the last character of a string matching the regexp at
+ the given node.
+ * EMPTY leaves have empty lastpos.
+ * The lastpos of a nonempty leaf is that leaf itself.
+ * The lastpos of a QMARK, STAR, or PLUS node is the lastpos of its
+ argument.
+ * The lastpos of a CAT node is the lastpos of its right argument, union
+ the lastpos of the left if the right argument is nullable.
+ * The lastpos of an OR node is the union of the lastpos of each argument.
+
+ Follow: The follow of a position is the set of positions that could
+ correspond to the character following a character matching the node in
+ a string matching the regexp. At this point we consider special symbols
+ that match the empty string in some context to be just normal characters.
+ Later, if we find that a special symbol is in a follow set, we will
+ replace it with the elements of its follow, labeled with an appropriate
+ constraint.
+ * Every node in the firstpos of the argument of a STAR or PLUS node is in
+ the follow of every node in the lastpos.
+ * Every node in the firstpos of the second argument of a CAT node is in
+ the follow of every node in the lastpos of the first argument.
+
+ Because of the postfix representation of the parse tree, the depth-first
+ analysis is conveniently done by a linear scan with the aid of a stack.
+ Sets are stored as arrays of the elements, obeying a stack-like allocation
+ scheme; the number of elements in each set deeper in the stack can be
+ used to determine the address of a particular set's array. */
+void
+dfaanalyze (struct dfa *d, int searchflag)
+{
+ /* Array allocated to hold position sets. */
+ position *posalloc = xnmalloc (d->nleaves, 2 * sizeof *posalloc);
+ /* Firstpos and lastpos elements. */
+ position *firstpos = posalloc + d->nleaves;
+ position *lastpos = firstpos + d->nleaves;
+
+ /* Stack for element counts and nullable flags. */
+ struct
+ {
+ /* Whether the entry is nullable. */
+ bool nullable;
+
+ /* Counts of firstpos and lastpos sets. */
+ size_t nfirstpos;
+ size_t nlastpos;
+ } *stkalloc = xnmalloc (d->depth, sizeof *stkalloc), *stk = stkalloc;
+
+ position_set tmp; /* Temporary set for merging sets. */
+ position_set merged; /* Result of merging sets. */
+ int separate_contexts; /* Context wanted by some position. */
+ size_t i, j;
+ position *pos;
+ char *visited = xnmalloc (d->tindex, sizeof *visited);
+
+#ifdef DEBUG
+ fprintf (stderr, "dfaanalyze:\n");
+ for (i = 0; i < d->tindex; ++i)
+ {
+ fprintf (stderr, " %zu:", i);
+ prtok (d->tokens[i]);
+ }
+ putc ('\n', stderr);
+#endif
+
+ d->searchflag = searchflag != 0;
+ alloc_position_set (&merged, d->nleaves);
+ d->follows = xcalloc (d->tindex, sizeof *d->follows);
+
+ for (i = 0; i < d->tindex; ++i)
+ {
+ switch (d->tokens[i])
+ {
+ case EMPTY:
+ /* The empty set is nullable. */
+ stk->nullable = true;
+
+ /* The firstpos and lastpos of the empty leaf are both empty. */
+ stk->nfirstpos = stk->nlastpos = 0;
+ stk++;
+ break;
+
+ case STAR:
+ case PLUS:
+ /* Every element in the firstpos of the argument is in the follow
+ of every element in the lastpos. */
+ tmp.nelem = stk[-1].nfirstpos;
+ tmp.elems = firstpos;
+ pos = lastpos;
+ for (j = 0; j < stk[-1].nlastpos; ++j)
+ {
+ merge (&tmp, &d->follows[pos[j].index], &merged);
+ copy (&merged, &d->follows[pos[j].index]);
+ }
+ /* fallthrough */
+
+ case QMARK:
+ /* A QMARK or STAR node is automatically nullable. */
+ if (d->tokens[i] != PLUS)
+ stk[-1].nullable = true;
+ break;
+
+ case CAT:
+ /* Every element in the firstpos of the second argument is in the
+ follow of every element in the lastpos of the first argument. */
+ tmp.nelem = stk[-1].nfirstpos;
+ tmp.elems = firstpos;
+ pos = lastpos + stk[-1].nlastpos;
+ for (j = 0; j < stk[-2].nlastpos; ++j)
+ {
+ merge (&tmp, &d->follows[pos[j].index], &merged);
+ copy (&merged, &d->follows[pos[j].index]);
+ }
+
+ /* The firstpos of a CAT node is the firstpos of the first argument,
+ union that of the second argument if the first is nullable. */
+ if (stk[-2].nullable)
+ stk[-2].nfirstpos += stk[-1].nfirstpos;
+ else
+ firstpos += stk[-1].nfirstpos;
+
+ /* The lastpos of a CAT node is the lastpos of the second argument,
+ union that of the first argument if the second is nullable. */
+ if (stk[-1].nullable)
+ stk[-2].nlastpos += stk[-1].nlastpos;
+ else
+ {
+ pos = lastpos + stk[-2].nlastpos;
+ for (j = stk[-1].nlastpos; j-- > 0;)
+ pos[j] = lastpos[j];
+ lastpos += stk[-2].nlastpos;
+ stk[-2].nlastpos = stk[-1].nlastpos;
+ }
+
+ /* A CAT node is nullable if both arguments are nullable. */
+ stk[-2].nullable &= stk[-1].nullable;
+ stk--;
+ break;
+
+ case OR:
+ /* The firstpos is the union of the firstpos of each argument. */
+ stk[-2].nfirstpos += stk[-1].nfirstpos;
+
+ /* The lastpos is the union of the lastpos of each argument. */
+ stk[-2].nlastpos += stk[-1].nlastpos;
+
+ /* An OR node is nullable if either argument is nullable. */
+ stk[-2].nullable |= stk[-1].nullable;
+ stk--;
+ break;
+
+ default:
+ /* Anything else is a nonempty position. (Note that special
+ constructs like \< are treated as nonempty strings here;
+ an "epsilon closure" effectively makes them nullable later.
+ Backreferences have to get a real position so we can detect
+ transitions on them later. But they are nullable. */
+ stk->nullable = d->tokens[i] == BACKREF;
+
+ /* This position is in its own firstpos and lastpos. */
+ stk->nfirstpos = stk->nlastpos = 1;
+ stk++;
+
+ --firstpos, --lastpos;
+ firstpos->index = lastpos->index = i;
+ firstpos->constraint = lastpos->constraint = NO_CONSTRAINT;
+
+ /* Allocate the follow set for this position. */
+ alloc_position_set (&d->follows[i], 1);
+ break;
+ }
+#ifdef DEBUG
+ /* ... balance the above nonsyntactic #ifdef goo... */
+ fprintf (stderr, "node %zu:", i);
+ prtok (d->tokens[i]);
+ putc ('\n', stderr);
+ fprintf (stderr,
+ stk[-1].nullable ? " nullable: yes\n" : " nullable: no\n");
+ fprintf (stderr, " firstpos:");
+ for (j = stk[-1].nfirstpos; j-- > 0;)
+ {
+ fprintf (stderr, " %zu:", firstpos[j].index);
+ prtok (d->tokens[firstpos[j].index]);
+ }
+ fprintf (stderr, "\n lastpos:");
+ for (j = stk[-1].nlastpos; j-- > 0;)
+ {
+ fprintf (stderr, " %zu:", lastpos[j].index);
+ prtok (d->tokens[lastpos[j].index]);
+ }
+ putc ('\n', stderr);
+#endif
+ }
+
+ /* For each follow set that is the follow set of a real position, replace
+ it with its epsilon closure. */
+ for (i = 0; i < d->tindex; ++i)
+ if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF
+ || d->tokens[i] == ANYCHAR || d->tokens[i] == MBCSET
+ || d->tokens[i] >= CSET)
+ {
+#ifdef DEBUG
+ fprintf (stderr, "follows(%zu:", i);
+ prtok (d->tokens[i]);
+ fprintf (stderr, "):");
+ for (j = d->follows[i].nelem; j-- > 0;)
+ {
+ fprintf (stderr, " %zu:", d->follows[i].elems[j].index);
+ prtok (d->tokens[d->follows[i].elems[j].index]);
+ }
+ putc ('\n', stderr);
+#endif
+ copy (&d->follows[i], &merged);
+ epsclosure (&merged, d, visited);
+ copy (&merged, &d->follows[i]);
+ }
+
+ /* Get the epsilon closure of the firstpos of the regexp. The result will
+ be the set of positions of state 0. */
+ merged.nelem = 0;
+ for (i = 0; i < stk[-1].nfirstpos; ++i)
+ insert (firstpos[i], &merged);
+ epsclosure (&merged, d, visited);
+
+ /* Build the initial state. */
+ separate_contexts = state_separate_contexts (&merged);
+ if (separate_contexts & CTX_NEWLINE)
+ state_index (d, &merged, CTX_NEWLINE);
+ d->initstate_others = d->min_trcount
+ = state_index (d, &merged, separate_contexts ^ CTX_ANY);
+ if (separate_contexts & CTX_LETTER)
+ d->initstate_letter = d->min_trcount
+ = state_index (d, &merged, CTX_LETTER);
+ else
+ d->initstate_letter = d->initstate_others;
+ d->min_trcount++;
+
+ free (posalloc);
+ free (stkalloc);
+ free (merged.elems);
+ free (visited);
+}
+
+
+/* Find, for each character, the transition out of state s of d, and store
+ it in the appropriate slot of trans.
+
+ We divide the positions of s into groups (positions can appear in more
+ than one group). Each group is labeled with a set of characters that
+ every position in the group matches (taking into account, if necessary,
+ preceding context information of s). For each group, find the union
+ of the its elements' follows. This set is the set of positions of the
+ new state. For each character in the group's label, set the transition
+ on this character to be to a state corresponding to the set's positions,
+ and its associated backward context information, if necessary.
+
+ If we are building a searching matcher, we include the positions of state
+ 0 in every state.
+
+ The collection of groups is constructed by building an equivalence-class
+ partition of the positions of s.
+
+ For each position, find the set of characters C that it matches. Eliminate
+ any characters from C that fail on grounds of backward context.
+
+ Search through the groups, looking for a group whose label L has nonempty
+ intersection with C. If L - C is nonempty, create a new group labeled
+ L - C and having the same positions as the current group, and set L to
+ the intersection of L and C. Insert the position in this group, set
+ C = C - L, and resume scanning.
+
+ If after comparing with every group there are characters remaining in C,
+ create a new group labeled with the characters of C and insert this
+ position in that group. */
+void
+dfastate (state_num s, struct dfa *d, state_num trans[])
+{
+ leaf_set grps[NOTCHAR]; /* As many as will ever be needed. */
+ charclass labels[NOTCHAR]; /* Labels corresponding to the groups. */
+ size_t ngrps = 0; /* Number of groups actually used. */
+ position pos; /* Current position being considered. */
+ charclass matches; /* Set of matching characters. */
+ charclass_word matchesf; /* Nonzero if matches is nonempty. */
+ charclass intersect; /* Intersection with some label set. */
+ charclass_word intersectf; /* Nonzero if intersect is nonempty. */
+ charclass leftovers; /* Stuff in the label that didn't match. */
+ charclass_word leftoversf; /* Nonzero if leftovers is nonempty. */
+ position_set follows; /* Union of the follows of some group. */
+ position_set tmp; /* Temporary space for merging sets. */
+ int possible_contexts; /* Contexts that this group can match. */
+ int separate_contexts; /* Context that new state wants to know. */
+ state_num state; /* New state. */
+ state_num state_newline; /* New state on a newline transition. */
+ state_num state_letter; /* New state on a letter transition. */
+ bool next_isnt_1st_byte = false; /* We can't add state0. */
+ size_t i, j, k;
+
+#ifdef DEBUG
+ fprintf (stderr, "build state %td\n", s);
+#endif
+
+ zeroset (matches);
+
+ for (i = 0; i < d->states[s].elems.nelem; ++i)
+ {
+ pos = d->states[s].elems.elems[i];
+ if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR)
+ setbit (d->tokens[pos.index], matches);
+ else if (d->tokens[pos.index] >= CSET)
+ copyset (d->charclasses[d->tokens[pos.index] - CSET], matches);
+ else
+ {
+ if (d->tokens[pos.index] == MBCSET
+ || d->tokens[pos.index] == ANYCHAR)
+ {
+ /* ANYCHAR and MBCSET must match with a single character, so we
+ must put it to d->states[s].mbps, which contains the positions
+ which can match with a single character not a byte. */
+ if (d->states[s].mbps.nelem == 0)
+ alloc_position_set (&d->states[s].mbps, 1);
+ insert (pos, &(d->states[s].mbps));
+ }
+ continue;
+ }
+
+ /* Some characters may need to be eliminated from matches because
+ they fail in the current context. */
+ if (pos.constraint != NO_CONSTRAINT)
+ {
+ if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
+ d->states[s].context, CTX_NEWLINE))
+ for (j = 0; j < CHARCLASS_WORDS; ++j)
+ matches[j] &= ~newline[j];
+ if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
+ d->states[s].context, CTX_LETTER))
+ for (j = 0; j < CHARCLASS_WORDS; ++j)
+ matches[j] &= ~letters[j];
+ if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
+ d->states[s].context, CTX_NONE))
+ for (j = 0; j < CHARCLASS_WORDS; ++j)
+ matches[j] &= letters[j] | newline[j];
+
+ /* If there are no characters left, there's no point in going on. */
+ for (j = 0; j < CHARCLASS_WORDS && !matches[j]; ++j)
+ continue;
+ if (j == CHARCLASS_WORDS)
+ continue;
+ }
+
+#ifdef DEBUG
+ fprintf (stderr, " nextpos %zu:", pos.index);
+ prtok (d->tokens[pos.index]);
+ fprintf (stderr, " of");
+ for (j = 0; j < NOTCHAR; j++)
+ if (tstbit (j, matches))
+ fprintf (stderr, " 0x%02zx", j);
+ fprintf (stderr, "\n");
+#endif
+
+ for (j = 0; j < ngrps; ++j)
+ {
+ /* If matches contains a single character only, and the current
+ group's label doesn't contain that character, go on to the
+ next group. */
+ if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR
+ && !tstbit (d->tokens[pos.index], labels[j]))
+ continue;
+
+ /* Check if this group's label has a nonempty intersection with
+ matches. */
+ intersectf = 0;
+ for (k = 0; k < CHARCLASS_WORDS; ++k)
+ intersectf |= intersect[k] = matches[k] & labels[j][k];
+ if (!intersectf)
+ continue;
+
+ /* It does; now find the set differences both ways. */
+ leftoversf = matchesf = 0;
+ for (k = 0; k < CHARCLASS_WORDS; ++k)
+ {
+ /* Even an optimizing compiler can't know this for sure. */
+ charclass_word match = matches[k], label = labels[j][k];
+
+ leftoversf |= leftovers[k] = ~match & label;
+ matchesf |= matches[k] = match & ~label;
+ }
+
+ /* If there were leftovers, create a new group labeled with them. */
+ if (leftoversf)
+ {
+ copyset (leftovers, labels[ngrps]);
+ copyset (intersect, labels[j]);
+ grps[ngrps].elems = xnmalloc (d->nleaves,
+ sizeof *grps[ngrps].elems);
+ memcpy (grps[ngrps].elems, grps[j].elems,
+ sizeof (grps[j].elems[0]) * grps[j].nelem);
+ grps[ngrps].nelem = grps[j].nelem;
+ ++ngrps;
+ }
+
+ /* Put the position in the current group. The constraint is
+ irrelevant here. */
+ grps[j].elems[grps[j].nelem++] = pos.index;
+
+ /* If every character matching the current position has been
+ accounted for, we're done. */
+ if (!matchesf)
+ break;
+ }
+
+ /* If we've passed the last group, and there are still characters
+ unaccounted for, then we'll have to create a new group. */
+ if (j == ngrps)
+ {
+ copyset (matches, labels[ngrps]);
+ zeroset (matches);
+ grps[ngrps].elems = xnmalloc (d->nleaves, sizeof *grps[ngrps].elems);
+ grps[ngrps].nelem = 1;
+ grps[ngrps].elems[0] = pos.index;
+ ++ngrps;
+ }
+ }
+
+ alloc_position_set (&follows, d->nleaves);
+ alloc_position_set (&tmp, d->nleaves);
+
+ /* If we are a searching matcher, the default transition is to a state
+ containing the positions of state 0, otherwise the default transition
+ is to fail miserably. */
+ if (d->searchflag)
+ {
+ /* Find the state(s) corresponding to the positions of state 0. */
+ copy (&d->states[0].elems, &follows);
+ separate_contexts = state_separate_contexts (&follows);
+ state = state_index (d, &follows, separate_contexts ^ CTX_ANY);
+ if (separate_contexts & CTX_NEWLINE)
+ state_newline = state_index (d, &follows, CTX_NEWLINE);
+ else
+ state_newline = state;
+ if (separate_contexts & CTX_LETTER)
+ state_letter = state_index (d, &follows, CTX_LETTER);
+ else
+ state_letter = state;
+
+ for (i = 0; i < NOTCHAR; ++i)
+ trans[i] = unibyte_word_constituent (i) ? state_letter : state;
+ trans[eolbyte] = state_newline;
+ }
+ else
+ for (i = 0; i < NOTCHAR; ++i)
+ trans[i] = -1;
+
+ for (i = 0; i < ngrps; ++i)
+ {
+ follows.nelem = 0;
+
+ /* Find the union of the follows of the positions of the group.
+ This is a hideously inefficient loop. Fix it someday. */
+ for (j = 0; j < grps[i].nelem; ++j)
+ for (k = 0; k < d->follows[grps[i].elems[j]].nelem; ++k)
+ insert (d->follows[grps[i].elems[j]].elems[k], &follows);
+
+ if (d->multibyte)
+ {
+ /* If a token in follows.elems is not 1st byte of a multibyte
+ character, or the states of follows must accept the bytes
+ which are not 1st byte of the multibyte character.
+ Then, if a state of follows encounter a byte, it must not be
+ a 1st byte of a multibyte character nor single byte character.
+ We cansel to add state[0].follows to next state, because
+ state[0] must accept 1st-byte
+
+ For example, we assume <sb a> is a certain single byte
+ character, <mb A> is a certain multibyte character, and the
+ codepoint of <sb a> equals the 2nd byte of the codepoint of
+ <mb A>.
+ When state[0] accepts <sb a>, state[i] transit to state[i+1]
+ by accepting accepts 1st byte of <mb A>, and state[i+1]
+ accepts 2nd byte of <mb A>, if state[i+1] encounter the
+ codepoint of <sb a>, it must not be <sb a> but 2nd byte of
+ <mb A>, so we cannot add state[0]. */
+
+ next_isnt_1st_byte = false;
+ for (j = 0; j < follows.nelem; ++j)
+ {
+ if (!(d->multibyte_prop[follows.elems[j].index] & 1))
+ {
+ next_isnt_1st_byte = true;
+ break;
+ }
+ }
+ }
+
+ /* If we are building a searching matcher, throw in the positions
+ of state 0 as well. */
+ if (d->searchflag && (!d->multibyte || !next_isnt_1st_byte))
+ {
+ merge (&d->states[0].elems, &follows, &tmp);
+ copy (&tmp, &follows);
+ }
+
+ /* Find out if the new state will want any context information. */
+ possible_contexts = charclass_context (labels[i]);
+ separate_contexts = state_separate_contexts (&follows);
+
+ /* Find the state(s) corresponding to the union of the follows. */
+ if ((separate_contexts & possible_contexts) != possible_contexts)
+ state = state_index (d, &follows, separate_contexts ^ CTX_ANY);
+ else
+ state = -1;
+ if (separate_contexts & possible_contexts & CTX_NEWLINE)
+ state_newline = state_index (d, &follows, CTX_NEWLINE);
+ else
+ state_newline = state;
+ if (separate_contexts & possible_contexts & CTX_LETTER)
+ state_letter = state_index (d, &follows, CTX_LETTER);
+ else
+ state_letter = state;
+
+#ifdef DEBUG
+ fprintf (stderr, "group %zu\n nextpos:", i);
+ for (j = 0; j < grps[i].nelem; ++j)
+ {
+ fprintf (stderr, " %zu:", grps[i].elems[j]);
+ prtok (d->tokens[grps[i].elems[j]]);
+ }
+ fprintf (stderr, "\n follows:");
+ for (j = 0; j < follows.nelem; ++j)
+ {
+ fprintf (stderr, " %zu:", follows.elems[j].index);
+ prtok (d->tokens[follows.elems[j].index]);
+ }
+ fprintf (stderr, "\n states:");
+ if (possible_contexts & CTX_NEWLINE)
+ fprintf (stderr, " CTX_NEWLINE:%td", state_newline);
+ if (possible_contexts & CTX_LETTER)
+ fprintf (stderr, " CTX_LETTER:%td", state_letter);
+ if (possible_contexts & CTX_NONE)
+ fprintf (stderr, " CTX_NONE:%td", state);
+ fprintf (stderr, "\n");
+#endif
+
+ /* Set the transitions for each character in the current label. */
+ for (j = 0; j < CHARCLASS_WORDS; ++j)
+ for (k = 0; k < CHARCLASS_WORD_BITS; ++k)
+ if (labels[i][j] >> k & 1)
+ {
+ int c = j * CHARCLASS_WORD_BITS + k;
+
+ if (c == eolbyte)
+ trans[c] = state_newline;
+ else if (unibyte_word_constituent (c))
+ trans[c] = state_letter;
+ else if (c < NOTCHAR)
+ trans[c] = state;
+ }
+ }
+
+#ifdef DEBUG
+ fprintf (stderr, "trans table %td", s);
+ for (i = 0; i < NOTCHAR; ++i)
+ {
+ if (!(i & 0xf))
+ fprintf (stderr, "\n");
+ fprintf (stderr, " %2td", trans[i]);
+ }
+ fprintf (stderr, "\n");
+#endif
+
+ for (i = 0; i < ngrps; ++i)
+ free (grps[i].elems);
+ free (follows.elems);
+ free (tmp.elems);
+}
+
+/* Make sure D's state arrays are large enough to hold NEW_STATE. */
+static void
+realloc_trans_if_necessary (struct dfa *d, state_num new_state)
+{
+ state_num oldalloc = d->tralloc;
+ if (oldalloc <= new_state)
+ {
+ state_num **realtrans = d->trans ? d->trans - 1 : NULL;
+ size_t newalloc, newalloc1;
+ newalloc1 = new_state + 1;
+ realtrans = x2nrealloc (realtrans, &newalloc1, sizeof *realtrans);
+ realtrans[0] = NULL;
+ d->trans = realtrans + 1;
+ d->tralloc = newalloc = newalloc1 - 1;
+ d->fails = xnrealloc (d->fails, newalloc, sizeof *d->fails);
+ d->success = xnrealloc (d->success, newalloc, sizeof *d->success);
+ d->newlines = xnrealloc (d->newlines, newalloc, sizeof *d->newlines);
+ for (; oldalloc < newalloc; oldalloc++)
+ {
+ d->trans[oldalloc] = NULL;
+ d->fails[oldalloc] = NULL;
+ }
+ }
+}
+
+/* Some routines for manipulating a compiled dfa's transition tables.
+ Each state may or may not have a transition table; if it does, and it
+ is a non-accepting state, then d->trans[state] points to its table.
+ If it is an accepting state then d->fails[state] points to its table.
+ If it has no table at all, then d->trans[state] is NULL.
+ TODO: Improve this comment, get rid of the unnecessary redundancy. */
+
+static void
+build_state (state_num s, struct dfa *d)
+{
+ state_num *trans; /* The new transition table. */
+ state_num i, maxstate;
+
+ /* Set an upper limit on the number of transition tables that will ever
+ exist at once. 1024 is arbitrary. The idea is that the frequently
+ used transition tables will be quickly rebuilt, whereas the ones that
+ were only needed once or twice will be cleared away. However, do not
+ clear the initial D->min_trcount states, since they are always used. */
+ if (d->trcount >= 1024)
+ {
+ for (i = d->min_trcount; i < d->tralloc; ++i)
+ {
+ free (d->trans[i]);
+ free (d->fails[i]);
+ d->trans[i] = d->fails[i] = NULL;
+ }
+ d->trcount = d->min_trcount;
+ }
+
+ ++d->trcount;
+
+ /* Set up the success bits for this state. */
+ d->success[s] = 0;
+ if (ACCEPTS_IN_CONTEXT (d->states[s].context, CTX_NEWLINE, s, *d))
+ d->success[s] |= CTX_NEWLINE;
+ if (ACCEPTS_IN_CONTEXT (d->states[s].context, CTX_LETTER, s, *d))
+ d->success[s] |= CTX_LETTER;
+ if (ACCEPTS_IN_CONTEXT (d->states[s].context, CTX_NONE, s, *d))
+ d->success[s] |= CTX_NONE;
+
+ trans = xmalloc (NOTCHAR * sizeof *trans);
+ dfastate (s, d, trans);
+
+ /* Now go through the new transition table, and make sure that the trans
+ and fail arrays are allocated large enough to hold a pointer for the
+ largest state mentioned in the table. */
+ maxstate = -1;
+ for (i = 0; i < NOTCHAR; ++i)
+ if (maxstate < trans[i])
+ maxstate = trans[i];
+ realloc_trans_if_necessary (d, maxstate);
+
+ /* Keep the newline transition in a special place so we can use it as
+ a sentinel. */
+ d->newlines[s] = trans[eolbyte];
+ trans[eolbyte] = -1;
+
+ if (ACCEPTING (s, *d))
+ d->fails[s] = trans;
+ else
+ d->trans[s] = trans;
+}
+
+/* Multibyte character handling sub-routines for dfaexec. */
+
+/* Return values of transit_state_singlebyte, and
+ transit_state_consume_1char. */
+typedef enum
+{
+ TRANSIT_STATE_IN_PROGRESS, /* State transition has not finished. */
+ TRANSIT_STATE_DONE, /* State transition has finished. */
+ TRANSIT_STATE_END_BUFFER /* Reach the end of the buffer. */
+} status_transit_state;
+
+/* Consume a single byte and transit state from 's' to '*next_state'.
+ This function is almost same as the state transition routin in dfaexec.
+ But state transition is done just once, otherwise matching succeed or
+ reach the end of the buffer. */
+static status_transit_state
+transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const *p,
+ state_num * next_state)
+{
+ state_num *t;
+ state_num works = s;
+
+ status_transit_state rval = TRANSIT_STATE_IN_PROGRESS;
+
+ while (rval == TRANSIT_STATE_IN_PROGRESS)
+ {
+ if ((t = d->trans[works]) != NULL)
+ {
+ works = t[*p];
+ rval = TRANSIT_STATE_DONE;
+ if (works < 0)
+ works = 0;
+ }
+ else if (works < 0)
+ works = 0;
+ else if (d->fails[works])
+ {
+ works = d->fails[works][*p];
+ rval = TRANSIT_STATE_DONE;
+ }
+ else
+ {
+ build_state (works, d);
+ }
+ }
+ *next_state = works;
+ return rval;
+}
+
+/* Match a "." against the current context. Return the length of the
+ match, in bytes. POS is the position of the ".". */
+static int
+match_anychar (struct dfa *d, state_num s, position pos,
+ wint_t wc, size_t mbclen)
+{
+ int context;
+
+ /* Check syntax bits. */
+ if (wc == (wchar_t) '\n')
+ {
+ if (!(syntax_bits & RE_DOT_NEWLINE))
+ return 0;
+ }
+ else if (wc == (wchar_t) '\0')
+ {
+ if (syntax_bits & RE_DOT_NOT_NULL)
+ return 0;
+ }
+ else if (wc == WEOF)
+ return 0;
+
+ context = wchar_context (wc);
+ if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, context))
+ return 0;
+
+ return mbclen;
+}
+
+/* Check whether each of 'd->states[s].mbps.elem' can match. Then return the
+ array which corresponds to 'd->states[s].mbps.elem'; each element of the
+ array contains the number of bytes with which the element can match.
+
+ The caller MUST free the array which this function return. */
+static int *
+check_matching_with_multibyte_ops (struct dfa *d, state_num s,
+ char const *p, wint_t wc, size_t mbclen)
+{
+ size_t i;
+ int *rarray;
+
+ rarray = d->mb_match_lens;
+ for (i = 0; i < d->states[s].mbps.nelem; ++i)
+ {
+ position pos = d->states[s].mbps.elems[i];
+ switch (d->tokens[pos.index])
+ {
+ case ANYCHAR:
+ rarray[i] = match_anychar (d, s, pos, wc, mbclen);
+ break;
+ default:
+ break; /* cannot happen. */
+ }
+ }
+ return rarray;
+}
+
+/* Consume a single character and enumerate all of the positions which can
+ be the next position from the state 's'.
+
+ 'match_lens' is the input. It can be NULL, but it can also be the output
+ of check_matching_with_multibyte_ops for optimization.
+
+ 'mbclen' and 'pps' are the output. 'mbclen' is the length of the
+ character consumed, and 'pps' is the set this function enumerates. */
+static status_transit_state
+transit_state_consume_1char (struct dfa *d, state_num s,
+ unsigned char const **pp,
+ wint_t wc, size_t mbclen,
+ int *match_lens)
+{
+ size_t i, j;
+ int k;
+ state_num s1, s2;
+ status_transit_state rs = TRANSIT_STATE_DONE;
+
+ if (! match_lens && d->states[s].mbps.nelem != 0)
+ match_lens = check_matching_with_multibyte_ops (d, s, (char const *) *pp,
+ wc, mbclen);
+
+ /* Calculate the state which can be reached from the state 's' by
+ consuming 'mbclen' single bytes from the buffer. */
+ s1 = s;
+ for (k = 0; k < mbclen; k++)
+ {
+ s2 = s1;
+ rs = transit_state_singlebyte (d, s2, (*pp)++, &s1);
+ }
+ copy (&d->states[s1].elems, &d->mb_follows);
+
+ /* Add all of the positions which can be reached from 's' by consuming
+ a single character. */
+ for (i = 0; i < d->states[s].mbps.nelem; i++)
+ {
+ if (match_lens[i] == mbclen)
+ for (j = 0; j < d->follows[d->states[s].mbps.elems[i].index].nelem;
+ j++)
+ insert (d->follows[d->states[s].mbps.elems[i].index].elems[j],
+ &d->mb_follows);
+ }
+
+ /* FIXME: this return value is always ignored. */
+ return rs;
+}
+
+/* Transit state from s, then return new state and update the pointer of the
+ buffer. This function is for some operator which can match with a multi-
+ byte character or a collating element (which may be multi characters). */
+static state_num
+transit_state (struct dfa *d, state_num s, unsigned char const **pp,
+ unsigned char const *end)
+{
+ state_num s1;
+ int mbclen; /* The length of current input multibyte character. */
+ int maxlen = 0;
+ size_t i, j;
+ int *match_lens = NULL;
+ size_t nelem = d->states[s].mbps.nelem; /* Just a alias. */
+ unsigned char const *p1 = *pp;
+ wint_t wc;
+
+ if (nelem > 0)
+ /* This state has (a) multibyte operator(s).
+ We check whether each of them can match or not. */
+ {
+ /* Note: caller must free the return value of this function. */
+ mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d);
+ match_lens = check_matching_with_multibyte_ops (d, s, (char const *) *pp,
+ wc, mbclen);
+
+ for (i = 0; i < nelem; i++)
+ /* Search the operator which match the longest string,
+ in this state. */
+ {
+ if (match_lens[i] > maxlen)
+ maxlen = match_lens[i];
+ }
+ }
+
+ if (nelem == 0 || maxlen == 0)
+ /* This state has no multibyte operator which can match.
+ We need to check only one single byte character. */
+ {
+ status_transit_state rs;
+ rs = transit_state_singlebyte (d, s, *pp, &s1);
+
+ /* We must update the pointer if state transition succeeded. */
+ if (rs == TRANSIT_STATE_DONE)
+ ++*pp;
+
+ return s1;
+ }
+
+ /* This state has some operators which can match a multibyte character. */
+ d->mb_follows.nelem = 0;
+
+ /* 'maxlen' may be longer than the length of a character, because it may
+ not be a character but a (multi character) collating element.
+ We enumerate all of the positions which 's' can reach by consuming
+ 'maxlen' bytes. */
+ transit_state_consume_1char (d, s, pp, wc, mbclen, match_lens);
+
+ s1 = state_index (d, &d->mb_follows, wchar_context (wc));
+ realloc_trans_if_necessary (d, s1);
+
+ while (*pp - p1 < maxlen)
+ {
+ mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d);
+ transit_state_consume_1char (d, s1, pp, wc, mbclen, NULL);
+
+ for (i = 0; i < nelem; i++)
+ {
+ if (match_lens[i] == *pp - p1)
+ for (j = 0;
+ j < d->follows[d->states[s1].mbps.elems[i].index].nelem; j++)
+ insert (d->follows[d->states[s1].mbps.elems[i].index].elems[j],
+ &d->mb_follows);
+ }
+
+ s1 = state_index (d, &d->mb_follows, wchar_context (wc));
+ realloc_trans_if_necessary (d, s1);
+ }
+ return s1;
+}
+
+/* The initial state may encounter a byte which is not a single byte character
+ nor the first byte of a multibyte character. But it is incorrect for the
+ initial state to accept such a byte. For example, in Shift JIS the regular
+ expression "\\" accepts the codepoint 0x5c, but should not accept the second
+ byte of the codepoint 0x815c. Then the initial state must skip the bytes
+ that are not a single byte character nor the first byte of a multibyte
+ character.
+
+ Given DFA state d, use mbs_to_wchar to advance MBP until it reaches or
+ exceeds P. If WCP is non-NULL, set *WCP to the final wide character
+ processed, or if no wide character is processed, set it to WEOF.
+ Both P and MBP must be no larger than END. */
+static unsigned char const *
+skip_remains_mb (struct dfa *d, unsigned char const *p,
+ unsigned char const *mbp, char const *end, wint_t *wcp)
+{
+ wint_t wc = WEOF;
+ while (mbp < p)
+ mbp += mbs_to_wchar (&wc, (char const *) mbp,
+ end - (char const *) mbp, d);
+ if (wcp != NULL)
+ *wcp = wc;
+ return mbp;
+}
+
+/* Search through a buffer looking for a match to the given struct dfa.
+ Find the first occurrence of a string matching the regexp in the
+ buffer, and the shortest possible version thereof. Return a pointer to
+ the first character after the match, or NULL if none is found. BEGIN
+ points to the beginning of the buffer, and END points to the first byte
+ after its end. Note however that we store a sentinel byte (usually
+ newline) in *END, so the actual buffer must be one byte longer.
+ When ALLOW_NL is nonzero, newlines may appear in the matching string.
+ If COUNT is non-NULL, increment *COUNT once for each newline processed.
+ Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we
+ encountered a DFA-unfriendly construct. The caller may use this to
+ decide whether to fall back on a matcher like regex. If MULTIBYTE,
+ the input consists of multibyte characters and/or encoding-error bytes.
+ Otherwise, the input consists of single-byte characters.
+ Here is the list of features that make this DFA matcher punt:
+ - [M-N]-range-in-MB-locale: regex is up to 25% faster on [a-z]
+ - back-reference: (.)\1
+ - word-delimiter-in-MB-locale: \<, \>, \b
+ */
+static inline char *
+dfaexec_main (struct dfa *d, char const *begin, char *end, int allow_nl,
+ size_t *count, bool multibyte)
+{
+ state_num s, s1; /* Current state. */
+ unsigned char const *p, *mbp; /* Current input character. */
+ state_num **trans, *t; /* Copy of d->trans so it can be optimized
+ into a register. */
+ unsigned char eol = eolbyte; /* Likewise for eolbyte. */
+ unsigned char saved_end;
+ size_t nlcount = 0;
+
+ if (!d->tralloc)
+ {
+ realloc_trans_if_necessary (d, 1);
+ build_state (0, d);
+ }
+
+ s = s1 = 0;
+ p = mbp = (unsigned char const *) begin;
+ trans = d->trans;
+ saved_end = *(unsigned char *) end;
+ *end = eol;
+
+ if (multibyte)
+ {
+ memset (&d->mbs, 0, sizeof d->mbs);
+ if (! d->mb_match_lens)
+ {
+ d->mb_match_lens = xnmalloc (d->nleaves, sizeof *d->mb_match_lens);
+ alloc_position_set (&d->mb_follows, d->nleaves);
+ }
+ }
+
+ for (;;)
+ {
+ if (multibyte)
+ {
+ while ((t = trans[s]) != NULL)
+ {
+ s1 = s;
+
+ if (s < d->min_trcount)
+ {
+ if (d->min_trcount == 1)
+ {
+ if (d->states[s].mbps.nelem == 0)
+ {
+ do
+ {
+ while (t[*p] == 0)
+ p++;
+ p = mbp = skip_remains_mb (d, p, mbp, end, NULL);
+ }
+ while (t[*p] == 0);
+ }
+ else
+ p = mbp = skip_remains_mb (d, p, mbp, end, NULL);
+ }
+ else
+ {
+ wint_t wc;
+ mbp = skip_remains_mb (d, p, mbp, end, &wc);
+
+ /* If d->min_trcount is greater than 1, maybe
+ transit to another initial state after skip. */
+ if (p < mbp)
+ {
+ int context = wchar_context (wc);
+ if (context == CTX_LETTER)
+ s = d->initstate_letter;
+ else
+ /* It's CTX_NONE. CTX_NEWLINE cannot happen,
+ as we assume that a newline is always a
+ single byte character. */
+ s = d->initstate_others;
+ p = mbp;
+ s1 = s;
+ }
+ }
+ }
+
+ if (d->states[s].mbps.nelem == 0)
+ {
+ s = t[*p++];
+ continue;
+ }
+
+ /* The following code is used twice.
+ Use a macro to avoid the risk that they diverge. */
+#define State_transition() \
+ do { \
+ /* Can match with a multibyte character (and multi-character \
+ collating element). Transition table might be updated. */ \
+ s = transit_state (d, s, &p, (unsigned char *) end); \
+ \
+ /* If previous character is newline after a transition \
+ for ANYCHAR or MBCSET in non-UTF8 multibyte locales, \
+ check whether current position is beyond the end of \
+ the input buffer. Also, transit to initial state if \
+ !ALLOW_NL, even if RE_DOT_NEWLINE is set. */ \
+ if (p[-1] == eol) \
+ { \
+ if ((char *) p > end) \
+ { \
+ p = NULL; \
+ goto done; \
+ } \
+ \
+ nlcount++; \
+ \
+ if (!allow_nl) \
+ s = 0; \
+ } \
+ \
+ mbp = p; \
+ trans = d->trans; \
+ } while (0)
+
+ State_transition();
+ }
+ }
+ else
+ {
+ if (s == 0 && (t = trans[s]) != NULL)
+ {
+ while (t[*p] == 0)
+ p++;
+ s1 = 0;
+ s = t[*p++];
+ }
+
+ while ((t = trans[s]) != NULL)
+ {
+ s1 = t[*p++];
+ if ((t = trans[s1]) == NULL)
+ {
+ state_num tmp = s;
+ s = s1;
+ s1 = tmp; /* swap */
+ break;
+ }
+ s = t[*p++];
+ }
+ }
+
+ if (s < 0)
+ {
+ if ((char *) p > end || p[-1] != eol || d->newlines[s1] < 0)
+ {
+ p = NULL;
+ goto done;
+ }
+
+ /* The previous character was a newline, count it, and skip
+ checking of multibyte character boundary until here. */
+ nlcount++;
+ mbp = p;
+
+ s = allow_nl ? d->newlines[s1] : 0;
+ }
+
+ if (d->fails[s])
+ {
+ if (d->success[s] & sbit[*p])
+ goto done;
+
+ s1 = s;
+ if (multibyte)
+ State_transition();
+ else
+ s = d->fails[s][*p++];
+ }
+ else
+ {
+ if (!d->trans[s])
+ build_state (s, d);
+ trans = d->trans;
+ }
+ }
+
+ done:
+ if (count)
+ *count += nlcount;
+ *end = saved_end;
+ return (char *) p;
+}
+
+/* Specialized versions of dfaexec_main for multibyte and single-byte
+ cases. This is for performance. */
+
+static char *
+dfaexec_mb (struct dfa *d, char const *begin, char *end,
+ int allow_nl, size_t *count, int *backref)
+{
+ return dfaexec_main (d, begin, end, allow_nl, count, true);
+}
+
+static char *
+dfaexec_sb (struct dfa *d, char const *begin, char *end,
+ int allow_nl, size_t *count, int *backref)
+{
+ return dfaexec_main (d, begin, end, allow_nl, count, false);
+}
+
+/* Always set *BACKREF and return BEGIN. Use this wrapper for
+ any regexp that uses a construct not supported by this code. */
+static char *
+dfaexec_noop (struct dfa *d, char const *begin, char *end,
+ int allow_nl, size_t *count, int *backref)
+{
+ *backref = 1;
+ return (char *) begin;
+}
+
+/* Like dfaexec_main (D, BEGIN, END, ALLOW_NL, COUNT, BACKREF, D->multibyte),
+ but faster. */
+
+char *
+dfaexec (struct dfa *d, char const *begin, char *end,
+ int allow_nl, size_t *count, int *backref)
+{
+ return d->dfaexec (d, begin, end, allow_nl, count, backref);
+}
+
+struct dfa *
+dfasuperset (struct dfa const *d)
+{
+ return d->superset;
+}
+
+bool
+dfaisfast (struct dfa const *d)
+{
+ return d->fast;
+}
+
+static void
+free_mbdata (struct dfa *d)
+{
+ size_t i;
+
+ free (d->multibyte_prop);
+
+ for (i = 0; i < d->nmbcsets; ++i)
+ {
+ struct mb_char_classes *p = &(d->mbcsets[i]);
+ free (p->chars);
+ }
+
+ free (d->mbcsets);
+ free (d->mb_follows.elems);
+ free (d->mb_match_lens);
+ d->mb_match_lens = NULL;
+}
+
+/* Initialize the components of a dfa that the other routines don't
+ initialize for themselves. */
+void
+dfainit (struct dfa *d)
+{
+ memset (d, 0, sizeof *d);
+ d->multibyte = MB_CUR_MAX > 1;
+ d->dfaexec = d->multibyte ? dfaexec_mb : dfaexec_sb;
+ d->fast = !d->multibyte;
+}
+
+/* Return true if every construct in D is supported by this DFA matcher. */
+static bool _GL_ATTRIBUTE_PURE
+dfa_supported (struct dfa const *d)
+{
+ for (size_t i = 0; i < d->tindex; i++)
+ {
+ switch (d->tokens[i])
+ {
+ case BEGWORD:
+ case ENDWORD:
+ case LIMWORD:
+ case NOTLIMWORD:
+ if (!d->multibyte)
+ continue;
+ /* fallthrough */
+
+ case BACKREF:
+ case MBCSET:
+ return false;
+ }
+ }
+ return true;
+}
+
+static void
+dfaoptimize (struct dfa *d)
+{
+ size_t i;
+ bool have_backref = false;
+
+ if (!using_utf8 ())
+ return;
+
+ for (i = 0; i < d->tindex; ++i)
+ {
+ switch (d->tokens[i])
+ {
+ case ANYCHAR:
+ /* Lowered. */
+ abort ();
+ case BACKREF:
+ have_backref = true;
+ break;
+ case MBCSET:
+ /* Requires multi-byte algorithm. */
+ return;
+ default:
+ break;
+ }
+ }
+
+ if (!have_backref && d->superset)
+ {
+ /* The superset DFA is not likely to be much faster, so remove it. */
+ dfafree (d->superset);
+ free (d->superset);
+ d->superset = NULL;
+ }
+
+ free_mbdata (d);
+ d->multibyte = false;
+ d->dfaexec = dfaexec_sb;
+}
+
+static void
+dfassbuild (struct dfa *d)
+{
+ size_t i, j;
+ charclass ccl;
+ bool have_achar = false;
+ bool have_nchar = false;
+ struct dfa *sup = dfaalloc ();
+
+ *sup = *d;
+ sup->multibyte = false;
+ sup->dfaexec = dfaexec_sb;
+ sup->multibyte_prop = NULL;
+ sup->mbcsets = NULL;
+ sup->superset = NULL;
+ sup->states = NULL;
+ sup->sindex = 0;
+ sup->follows = NULL;
+ sup->tralloc = 0;
+ sup->trans = NULL;
+ sup->fails = NULL;
+ sup->success = NULL;
+ sup->newlines = NULL;
+
+ sup->charclasses = xnmalloc (sup->calloc, sizeof *sup->charclasses);
+ if (d->cindex)
+ {
+ memcpy (sup->charclasses, d->charclasses,
+ d->cindex * sizeof *sup->charclasses);
+ }
+
+ sup->tokens = xnmalloc (d->tindex, 2 * sizeof *sup->tokens);
+ sup->talloc = d->tindex * 2;
+
+ for (i = j = 0; i < d->tindex; i++)
+ {
+ switch (d->tokens[i])
+ {
+ case ANYCHAR:
+ case MBCSET:
+ case BACKREF:
+ zeroset (ccl);
+ notset (ccl);
+ sup->tokens[j++] = CSET + dfa_charclass_index (sup, ccl);
+ sup->tokens[j++] = STAR;
+ if (d->tokens[i + 1] == QMARK || d->tokens[i + 1] == STAR
+ || d->tokens[i + 1] == PLUS)
+ i++;
+ have_achar = true;
+ break;
+ case BEGWORD:
+ case ENDWORD:
+ case LIMWORD:
+ case NOTLIMWORD:
+ if (d->multibyte)
+ {
+ /* These constraints aren't supported in a multibyte locale.
+ Ignore them in the superset DFA. */
+ sup->tokens[j++] = EMPTY;
+ break;
+ }
+ default:
+ sup->tokens[j++] = d->tokens[i];
+ if ((0 <= d->tokens[i] && d->tokens[i] < NOTCHAR)
+ || d->tokens[i] >= CSET)
+ have_nchar = true;
+ break;
+ }
+ }
+ sup->tindex = j;
+
+ if (have_nchar && (have_achar || d->multibyte))
+ d->superset = sup;
+ else
+ {
+ dfafree (sup);
+ free (sup);
+ }
+}
+
+/* Parse and analyze a single string of the given length. */
+void
+dfacomp (char const *s, size_t len, struct dfa *d, int searchflag)
+{
+ dfainit (d);
+ dfaparse (s, len, d);
+ dfassbuild (d);
+
+ if (dfa_supported (d))
+ {
+ dfaoptimize (d);
+ dfaanalyze (d, searchflag);
+ }
+ else
+ {
+ d->dfaexec = dfaexec_noop;
+ }
+
+ if (d->superset)
+ {
+ d->fast = true;
+ dfaanalyze (d->superset, searchflag);
+ }
+}
+
+/* Free the storage held by the components of a dfa. */
+void
+dfafree (struct dfa *d)
+{
+ size_t i;
+
+ free (d->charclasses);
+ free (d->tokens);
+
+ if (d->multibyte)
+ free_mbdata (d);
+
+ for (i = 0; i < d->sindex; ++i)
+ {
+ free (d->states[i].elems.elems);
+ free (d->states[i].mbps.elems);
+ }
+ free (d->states);
+
+ if (d->follows)
+ {
+ for (i = 0; i < d->tindex; ++i)
+ free (d->follows[i].elems);
+ free (d->follows);
+ }
+
+ if (d->trans)
+ {
+ for (i = 0; i < d->tralloc; ++i)
+ {
+ free (d->trans[i]);
+ free (d->fails[i]);
+ }
+
+ free (d->trans - 1);
+ free (d->fails);
+ free (d->newlines);
+ free (d->success);
+ }
+
+ if (d->superset)
+ dfafree (d->superset);
+}
+
+/* Having found the postfix representation of the regular expression,
+ try to find a long sequence of characters that must appear in any line
+ containing the r.e.
+ Finding a "longest" sequence is beyond the scope here;
+ we take an easy way out and hope for the best.
+ (Take "(ab|a)b"--please.)
+
+ We do a bottom-up calculation of sequences of characters that must appear
+ in matches of r.e.'s represented by trees rooted at the nodes of the postfix
+ representation:
+ sequences that must appear at the left of the match ("left")
+ sequences that must appear at the right of the match ("right")
+ lists of sequences that must appear somewhere in the match ("in")
+ sequences that must constitute the match ("is")
+
+ When we get to the root of the tree, we use one of the longest of its
+ calculated "in" sequences as our answer.
+
+ The sequences calculated for the various types of node (in pseudo ANSI c)
+ are shown below. "p" is the operand of unary operators (and the left-hand
+ operand of binary operators); "q" is the right-hand operand of binary
+ operators.
+
+ "ZERO" means "a zero-length sequence" below.
+
+ Type left right is in
+ ---- ---- ----- -- --
+ char c # c # c # c # c
+
+ ANYCHAR ZERO ZERO ZERO ZERO
+
+ MBCSET ZERO ZERO ZERO ZERO
+
+ CSET ZERO ZERO ZERO ZERO
+
+ STAR ZERO ZERO ZERO ZERO
+
+ QMARK ZERO ZERO ZERO ZERO
+
+ PLUS p->left p->right ZERO p->in
+
+ CAT (p->is==ZERO)? (q->is==ZERO)? (p->is!=ZERO && p->in plus
+ p->left : q->right : q->is!=ZERO) ? q->in plus
+ p->is##q->left p->right##q->is p->is##q->is : p->right##q->left
+ ZERO
+
+ OR longest common longest common (do p->is and substrings common
+ leading trailing to q->is have same p->in and
+ (sub)sequence (sub)sequence q->in length and content) ?
+ of p->left of p->right
+ and q->left and q->right p->is : NULL
+
+ If there's anything else we recognize in the tree, all four sequences get set
+ to zero-length sequences. If there's something we don't recognize in the
+ tree, we just return a zero-length sequence.
+
+ Break ties in favor of infrequent letters (choosing 'zzz' in preference to
+ 'aaa')?
+
+ And ... is it here or someplace that we might ponder "optimizations" such as
+ egrep 'psi|epsilon' -> egrep 'psi'
+ egrep 'pepsi|epsilon' -> egrep 'epsi'
+ (Yes, we now find "epsi" as a "string
+ that must occur", but we might also
+ simplify the *entire* r.e. being sought)
+ grep '[c]' -> grep 'c'
+ grep '(ab|a)b' -> grep 'ab'
+ grep 'ab*' -> grep 'a'
+ grep 'a*b' -> grep 'b'
+
+ There are several issues:
+
+ Is optimization easy (enough)?
+
+ Does optimization actually accomplish anything,
+ or is the automaton you get from "psi|epsilon" (for example)
+ the same as the one you get from "psi" (for example)?
+
+ Are optimizable r.e.'s likely to be used in real-life situations
+ (something like 'ab*' is probably unlikely; something like is
+ 'psi|epsilon' is likelier)? */
+
+static char *
+icatalloc (char *old, char const *new)
+{
+ char *result;
+ size_t oldsize;
+ size_t newsize = strlen (new);
+ if (newsize == 0)
+ return old;
+ oldsize = strlen (old);
+ result = xrealloc (old, oldsize + newsize + 1);
+ memcpy (result + oldsize, new, newsize + 1);
+ return result;
+}
+
+static void
+freelist (char **cpp)
+{
+ while (*cpp)
+ free (*cpp++);
+}
+
+static char **
+enlist (char **cpp, char *new, size_t len)
+{
+ size_t i, j;
+ new = memcpy (xmalloc (len + 1), new, len);
+ new[len] = '\0';
+ /* Is there already something in the list that's new (or longer)? */
+ for (i = 0; cpp[i] != NULL; ++i)
+ if (strstr (cpp[i], new) != NULL)
+ {
+ free (new);
+ return cpp;
+ }
+ /* Eliminate any obsoleted strings. */
+ j = 0;
+ while (cpp[j] != NULL)
+ if (strstr (new, cpp[j]) == NULL)
+ ++j;
+ else
+ {
+ free (cpp[j]);
+ if (--i == j)
+ break;
+ cpp[j] = cpp[i];
+ cpp[i] = NULL;
+ }
+ /* Add the new string. */
+ cpp = xnrealloc (cpp, i + 2, sizeof *cpp);
+ cpp[i] = new;
+ cpp[i + 1] = NULL;
+ return cpp;
+}
+
+/* Given pointers to two strings, return a pointer to an allocated
+ list of their distinct common substrings. */
+static char **
+comsubs (char *left, char const *right)
+{
+ char **cpp = xzalloc (sizeof *cpp);
+ char *lcp;
+
+ for (lcp = left; *lcp != '\0'; ++lcp)
+ {
+ size_t len = 0;
+ char *rcp = strchr (right, *lcp);
+ while (rcp != NULL)
+ {
+ size_t i;
+ for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i)
+ continue;
+ if (i > len)
+ len = i;
+ rcp = strchr (rcp + 1, *lcp);
+ }
+ if (len != 0)
+ cpp = enlist (cpp, lcp, len);
+ }
+ return cpp;
+}
+
+static char **
+addlists (char **old, char **new)
+{
+ for (; *new; new++)
+ old = enlist (old, *new, strlen (*new));
+ return old;
+}
+
+/* Given two lists of substrings, return a new list giving substrings
+ common to both. */
+static char **
+inboth (char **left, char **right)
+{
+ char **both = xzalloc (sizeof *both);
+ size_t lnum, rnum;
+
+ for (lnum = 0; left[lnum] != NULL; ++lnum)
+ {
+ for (rnum = 0; right[rnum] != NULL; ++rnum)
+ {
+ char **temp = comsubs (left[lnum], right[rnum]);
+ both = addlists (both, temp);
+ freelist (temp);
+ free (temp);
+ }
+ }
+ return both;
+}
+
+typedef struct must must;
+
+struct must
+{
+ char **in;
+ char *left;
+ char *right;
+ char *is;
+ bool begline;
+ bool endline;
+ must *prev;
+};
+
+static must *
+allocmust (must *mp, size_t size)
+{
+ must *new_mp = xmalloc (sizeof *new_mp);
+ new_mp->in = xzalloc (sizeof *new_mp->in);
+ new_mp->left = xzalloc (size);
+ new_mp->right = xzalloc (size);
+ new_mp->is = xzalloc (size);
+ new_mp->begline = false;
+ new_mp->endline = false;
+ new_mp->prev = mp;
+ return new_mp;
+}
+
+static void
+resetmust (must *mp)
+{
+ freelist (mp->in);
+ mp->in[0] = NULL;
+ mp->left[0] = mp->right[0] = mp->is[0] = '\0';
+ mp->begline = false;
+ mp->endline = false;
+}
+
+static void
+freemust (must *mp)
+{
+ freelist (mp->in);
+ free (mp->in);
+ free (mp->left);
+ free (mp->right);
+ free (mp->is);
+ free (mp);
+}
+
+struct dfamust *
+dfamust (struct dfa const *d)
+{
+ must *mp = NULL;
+ char const *result = "";
+ size_t i;
+ bool exact = false;
+ bool begline = false;
+ bool endline = false;
+ bool need_begline = false;
+ bool need_endline = false;
+ bool case_fold_unibyte = case_fold && MB_CUR_MAX == 1;
+
+ for (size_t ri = 0; ri < d->tindex; ++ri)
+ {
+ token t = d->tokens[ri];
+ switch (t)
+ {
+ case BEGLINE:
+ mp = allocmust (mp, 2);
+ mp->begline = true;
+ need_begline = true;
+ break;
+ case ENDLINE:
+ mp = allocmust (mp, 2);
+ mp->endline = true;
+ need_endline = true;
+ break;
+ case LPAREN:
+ case RPAREN:
+ assert (!"neither LPAREN nor RPAREN may appear here");
+
+ case EMPTY:
+ case BEGWORD:
+ case ENDWORD:
+ case LIMWORD:
+ case NOTLIMWORD:
+ case BACKREF:
+ case ANYCHAR:
+ case MBCSET:
+ mp = allocmust (mp, 2);
+ break;
+
+ case STAR:
+ case QMARK:
+ resetmust (mp);
+ break;
+
+ case OR:
+ {
+ char **new;
+ must *rmp = mp;
+ must *lmp = mp = mp->prev;
+ size_t j, ln, rn, n;
+
+ /* Guaranteed to be. Unlikely, but ... */
+ if (STREQ (lmp->is, rmp->is))
+ {
+ lmp->begline &= rmp->begline;
+ lmp->endline &= rmp->endline;
+ }
+ else
+ {
+ lmp->is[0] = '\0';
+ lmp->begline = false;
+ lmp->endline = false;
+ }
+ /* Left side--easy */
+ i = 0;
+ while (lmp->left[i] != '\0' && lmp->left[i] == rmp->left[i])
+ ++i;
+ lmp->left[i] = '\0';
+ /* Right side */
+ ln = strlen (lmp->right);
+ rn = strlen (rmp->right);
+ n = ln;
+ if (n > rn)
+ n = rn;
+ for (i = 0; i < n; ++i)
+ if (lmp->right[ln - i - 1] != rmp->right[rn - i - 1])
+ break;
+ for (j = 0; j < i; ++j)
+ lmp->right[j] = lmp->right[(ln - i) + j];
+ lmp->right[j] = '\0';
+ new = inboth (lmp->in, rmp->in);
+ freelist (lmp->in);
+ free (lmp->in);
+ lmp->in = new;
+ freemust (rmp);
+ }
+ break;
+
+ case PLUS:
+ mp->is[0] = '\0';
+ break;
+
+ case END:
+ assert (!mp->prev);
+ for (i = 0; mp->in[i] != NULL; ++i)
+ if (strlen (mp->in[i]) > strlen (result))
+ result = mp->in[i];
+ if (STREQ (result, mp->is))
+ {
+ if ((!need_begline || mp->begline) && (!need_endline
+ || mp->endline))
+ exact = true;
+ begline = mp->begline;
+ endline = mp->endline;
+ }
+ goto done;
+
+ case CAT:
+ {
+ must *rmp = mp;
+ must *lmp = mp = mp->prev;
+
+ /* In. Everything in left, plus everything in
+ right, plus concatenation of
+ left's right and right's left. */
+ lmp->in = addlists (lmp->in, rmp->in);
+ if (lmp->right[0] != '\0' && rmp->left[0] != '\0')
+ {
+ size_t lrlen = strlen (lmp->right);
+ size_t rllen = strlen (rmp->left);
+ char *tp = xmalloc (lrlen + rllen);
+ memcpy (tp, lmp->right, lrlen);
+ memcpy (tp + lrlen, rmp->left, rllen);
+ lmp->in = enlist (lmp->in, tp, lrlen + rllen);
+ free (tp);
+ }
+ /* Left-hand */
+ if (lmp->is[0] != '\0')
+ lmp->left = icatalloc (lmp->left, rmp->left);
+ /* Right-hand */
+ if (rmp->is[0] == '\0')
+ lmp->right[0] = '\0';
+ lmp->right = icatalloc (lmp->right, rmp->right);
+ /* Guaranteed to be */
+ if ((lmp->is[0] != '\0' || lmp->begline)
+ && (rmp->is[0] != '\0' || rmp->endline))
+ {
+ lmp->is = icatalloc (lmp->is, rmp->is);
+ lmp->endline = rmp->endline;
+ }
+ else
+ {
+ lmp->is[0] = '\0';
+ lmp->begline = false;
+ lmp->endline = false;
+ }
+ freemust (rmp);
+ }
+ break;
+
+ case '\0':
+ /* Not on *my* shift. */
+ goto done;
+
+ default:
+ if (CSET <= t)
+ {
+ /* If T is a singleton, or if case-folding in a unibyte
+ locale and T's members all case-fold to the same char,
+ convert T to one of its members. Otherwise, do
+ nothing further with T. */
+ charclass *ccl = &d->charclasses[t - CSET];
+ int j;
+ for (j = 0; j < NOTCHAR; j++)
+ if (tstbit (j, *ccl))
+ break;
+ if (! (j < NOTCHAR))
+ {
+ mp = allocmust (mp, 2);
+ break;
+ }
+ t = j;
+ while (++j < NOTCHAR)
+ if (tstbit (j, *ccl)
+ && ! (case_fold_unibyte
+ && toupper (j) == toupper (t)))
+ break;
+ if (j < NOTCHAR)
+ {
+ mp = allocmust (mp, 2);
+ break;
+ }
+ }
+
+ size_t rj = ri + 2;
+ if (d->tokens[ri + 1] == CAT)
+ {
+ for (; rj < d->tindex - 1; rj += 2)
+ {
+ if ((rj != ri && (d->tokens[rj] <= 0
+ || NOTCHAR <= d->tokens[rj]))
+ || d->tokens[rj + 1] != CAT)
+ break;
+ }
+ }
+ mp = allocmust (mp, ((rj - ri) >> 1) + 1);
+ mp->is[0] = mp->left[0] = mp->right[0]
+ = case_fold_unibyte ? toupper (t) : t;
+
+ for (i = 1; ri + 2 < rj; i++)
+ {
+ ri += 2;
+ t = d->tokens[ri];
+ mp->is[i] = mp->left[i] = mp->right[i]
+ = case_fold_unibyte ? toupper (t) : t;
+ }
+ mp->is[i] = mp->left[i] = mp->right[i] = '\0';
+ mp->in = enlist (mp->in, mp->is, i);
+ break;
+ }
+ }
+ done:;
+
+ struct dfamust *dm = NULL;
+ if (*result)
+ {
+ dm = xmalloc (sizeof *dm);
+ dm->exact = exact;
+ dm->begline = begline;
+ dm->endline = endline;
+ dm->must = xstrdup (result);
+ }
+
+ while (mp)
+ {
+ must *prev = mp->prev;
+ freemust (mp);
+ mp = prev;
+ }
+
+ return dm;
+}
+
+void
+dfamustfree (struct dfamust *dm)
+{
+ free (dm->must);
+ free (dm);
+}
+
+struct dfa *
+dfaalloc (void)
+{
+ return xmalloc (sizeof (struct dfa));
+}
+
+/* vim:set shiftwidth=2: */
diff --git a/src/dfa.h b/src/dfa.h
new file mode 100644
index 0000000..fb9ac9a
--- /dev/null
+++ b/src/dfa.h
@@ -0,0 +1,119 @@
+/* dfa.h - declarations for GNU deterministic regexp compiler
+ Copyright (C) 1988, 1998, 2007, 2009-2016 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc.,
+ 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA */
+
+/* Written June, 1988 by Mike Haertel */
+
+#include <regex.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "xalloc.h" /* for _GL_ATTRIBUTE_MALLOC */
+
+/* Element of a list of strings, at least one of which is known to
+ appear in any R.E. matching the DFA. */
+struct dfamust
+{
+ bool exact;
+ bool begline;
+ bool endline;
+ char *must;
+};
+
+/* The dfa structure. It is completely opaque. */
+struct dfa;
+
+/* Entry points. */
+
+/* Allocate a struct dfa. The struct dfa is completely opaque.
+ The returned pointer should be passed directly to free() after
+ calling dfafree() on it. */
+extern struct dfa *dfaalloc (void) _GL_ATTRIBUTE_MALLOC;
+
+/* Build and return the struct dfamust from the given struct dfa. */
+extern struct dfamust *dfamust (struct dfa const *);
+
+/* Free the storage held by the components of a struct dfamust. */
+extern void dfamustfree (struct dfamust *);
+
+/* dfasyntax() takes three arguments; the first sets the syntax bits described
+ earlier in this file, the second sets the case-folding flag, and the
+ third specifies the line terminator. */
+extern void dfasyntax (reg_syntax_t, int, unsigned char);
+
+/* Compile the given string of the given length into the given struct dfa.
+ Final argument is a flag specifying whether to build a searching or an
+ exact matcher. */
+extern void dfacomp (char const *, size_t, struct dfa *, int);
+
+/* Search through a buffer looking for a match to the given struct dfa.
+ Find the first occurrence of a string matching the regexp in the
+ buffer, and the shortest possible version thereof. Return a pointer to
+ the first character after the match, or NULL if none is found. BEGIN
+ points to the beginning of the buffer, and END points to the first byte
+ after its end. Note however that we store a sentinel byte (usually
+ newline) in *END, so the actual buffer must be one byte longer.
+ When NEWLINE is nonzero, newlines may appear in the matching string.
+ If COUNT is non-NULL, increment *COUNT once for each newline processed.
+ Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we
+ encountered a back-reference (1) or not (0). The caller may use this
+ to decide whether to fall back on a backtracking matcher. */
+extern char *dfaexec (struct dfa *d, char const *begin, char *end,
+ int newline, size_t *count, int *backref);
+
+/* Return a superset for D. The superset matches everything that D
+ matches, along with some other strings (though the latter should be
+ rare, for efficiency reasons). Return a null pointer if no useful
+ superset is available. */
+extern struct dfa *dfasuperset (struct dfa const *d) _GL_ATTRIBUTE_PURE;
+
+/* The DFA is likely to be fast. */
+extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE;
+
+/* Free the storage held by the components of a struct dfa. */
+extern void dfafree (struct dfa *);
+
+/* Entry points for people who know what they're doing. */
+
+/* Initialize the components of a struct dfa. */
+extern void dfainit (struct dfa *);
+
+/* Incrementally parse a string of given length into a struct dfa. */
+extern void dfaparse (char const *, size_t, struct dfa *);
+
+/* Analyze a parsed regexp; second argument tells whether to build a searching
+ or an exact matcher. */
+extern void dfaanalyze (struct dfa *, int);
+
+/* Compute, for each possible character, the transitions out of a given
+ state, storing them in an array of integers. */
+extern void dfastate (ptrdiff_t, struct dfa *, ptrdiff_t []);
+
+/* Error handling. */
+
+/* dfawarn() is called by the regexp routines whenever a regex is compiled
+ that likely doesn't do what the user wanted. It takes a single
+ argument, a NUL-terminated string describing the situation. The user
+ must supply a dfawarn. */
+extern void dfawarn (const char *);
+
+/* dfaerror() is called by the regexp routines whenever an error occurs. It
+ takes a single argument, a NUL-terminated string describing the error.
+ The user must supply a dfaerror. */
+extern _Noreturn void dfaerror (const char *);
+
+extern int using_utf8 (void);
diff --git a/src/dfasearch.c b/src/dfasearch.c
new file mode 100644
index 0000000..d348d44
--- /dev/null
+++ b/src/dfasearch.c
@@ -0,0 +1,451 @@
+/* dfasearch.c - searching subroutines using dfa and regex for grep.
+ Copyright 1992, 1998, 2000, 2007, 2009-2016 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* Written August 1992 by Mike Haertel. */
+
+#include <config.h>
+#include "intprops.h"
+#include "search.h"
+
+/* Whether -w considers WC to be a word constituent. */
+static bool
+wordchar (wint_t wc)
+{
+ return wc == L'_' || iswalnum (wc);
+}
+
+/* KWset compiled pattern. For Ecompile and Gcompile, we compile
+ a list of strings, at least one of which is known to occur in
+ any string matching the regexp. */
+static kwset_t kwset;
+
+/* DFA compiled regexp. */
+static struct dfa *dfa;
+
+/* The Regex compiled patterns. */
+static struct patterns
+{
+ /* Regex compiled regexp. */
+ struct re_pattern_buffer regexbuf;
+ struct re_registers regs; /* This is here on account of a BRAIN-DEAD
+ Q@#%!# library interface in regex.c. */
+} patterns0;
+
+static struct patterns *patterns;
+static size_t pcount;
+
+/* Number of compiled fixed strings known to exactly match the regexp.
+ If kwsexec returns < kwset_exact_matches, then we don't need to
+ call the regexp matcher at all. */
+static size_t kwset_exact_matches;
+
+static bool begline;
+
+void
+dfaerror (char const *mesg)
+{
+ error (EXIT_TROUBLE, 0, "%s", mesg);
+
+ /* notreached */
+ /* Tell static analyzers that this function does not return. */
+ abort ();
+}
+
+/* For now, the sole dfawarn-eliciting condition (use of a regexp
+ like '[:lower:]') is unequivocally an error, so treat it as such,
+ when possible. */
+void
+dfawarn (char const *mesg)
+{
+ static enum { DW_NONE = 0, DW_POSIX, DW_GNU } mode;
+ if (mode == DW_NONE)
+ mode = (getenv ("POSIXLY_CORRECT") ? DW_POSIX : DW_GNU);
+ if (mode == DW_GNU)
+ dfaerror (mesg);
+}
+
+/* If the DFA turns out to have some set of fixed strings one of
+ which must occur in the match, then we build a kwset matcher
+ to find those strings, and thus quickly filter out impossible
+ matches. */
+static void
+kwsmusts (void)
+{
+ struct dfamust *dm = dfamust (dfa);
+ if (!dm)
+ return;
+ kwsinit (&kwset);
+ if (dm->exact)
+ {
+ /* Prepare a substring whose presence implies a match.
+ The kwset matcher will return the index of the matching
+ string that it chooses. */
+ ++kwset_exact_matches;
+ size_t old_len = strlen (dm->must);
+ size_t new_len = old_len + dm->begline + dm->endline;
+ char *must = xmalloc (new_len);
+ char *mp = must;
+ *mp = eolbyte;
+ mp += dm->begline;
+ begline |= dm->begline;
+ memcpy (mp, dm->must, old_len);
+ if (dm->endline)
+ mp[old_len] = eolbyte;
+ kwsincr (kwset, must, new_len);
+ free (must);
+ }
+ else
+ {
+ /* Otherwise, filtering with this substring should help reduce the
+ search space, but we'll still have to use the regexp matcher. */
+ kwsincr (kwset, dm->must, strlen (dm->must));
+ }
+ kwsprep (kwset);
+ dfamustfree (dm);
+}
+
+void
+GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits)
+{
+ size_t total = size;
+ char *motif;
+
+ if (match_icase)
+ syntax_bits |= RE_ICASE;
+ re_set_syntax (syntax_bits);
+ dfasyntax (syntax_bits, match_icase, eolbyte);
+
+ /* For GNU regex, pass the patterns separately to detect errors like
+ "[\nallo\n]\n", where the patterns are "[", "allo" and "]", and
+ this should be a syntax error. The same for backref, where the
+ backref should be local to each pattern. */
+ char const *p = pattern;
+ do
+ {
+ size_t len;
+ char const *sep = memchr (p, '\n', total);
+ if (sep)
+ {
+ len = sep - p;
+ sep++;
+ total -= (len + 1);
+ }
+ else
+ {
+ len = total;
+ total = 0;
+ }
+
+ patterns = xnrealloc (patterns, pcount + 1, sizeof *patterns);
+ patterns[pcount] = patterns0;
+
+ char const *err = re_compile_pattern (p, len,
+ &(patterns[pcount].regexbuf));
+ if (err)
+ error (EXIT_TROUBLE, 0, "%s", err);
+ pcount++;
+ p = sep;
+ }
+ while (p);
+
+ /* In the match_words and match_lines cases, we use a different pattern
+ for the DFA matcher that will quickly throw out cases that won't work.
+ Then if DFA succeeds we do some hairy stuff using the regex matcher
+ to decide whether the match should really count. */
+ if (match_words || match_lines)
+ {
+ static char const line_beg_no_bk[] = "^(";
+ static char const line_end_no_bk[] = ")$";
+ static char const word_beg_no_bk[] = "(^|[^[:alnum:]_])(";
+ static char const word_end_no_bk[] = ")([^[:alnum:]_]|$)";
+ static char const line_beg_bk[] = "^\\(";
+ static char const line_end_bk[] = "\\)$";
+ static char const word_beg_bk[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
+ static char const word_end_bk[] = "\\)\\([^[:alnum:]_]\\|$\\)";
+ int bk = !(syntax_bits & RE_NO_BK_PARENS);
+ char *n = xmalloc (sizeof word_beg_bk - 1 + size + sizeof word_end_bk);
+
+ strcpy (n, match_lines ? (bk ? line_beg_bk : line_beg_no_bk)
+ : (bk ? word_beg_bk : word_beg_no_bk));
+ total = strlen(n);
+ memcpy (n + total, pattern, size);
+ total += size;
+ strcpy (n + total, match_lines ? (bk ? line_end_bk : line_end_no_bk)
+ : (bk ? word_end_bk : word_end_no_bk));
+ total += strlen (n + total);
+ pattern = motif = n;
+ size = total;
+ }
+ else
+ motif = NULL;
+
+ dfa = dfaalloc ();
+ dfacomp (pattern, size, dfa, 1);
+ kwsmusts ();
+
+ free(motif);
+}
+
+size_t
+EGexecute (char *buf, size_t size, size_t *match_size,
+ char const *start_ptr)
+{
+ char const *buflim, *beg, *end, *ptr, *match, *best_match, *mb_start;
+ char eol = eolbyte;
+ regoff_t start;
+ size_t len, best_len;
+ struct kwsmatch kwsm;
+ size_t i;
+ struct dfa *superset = dfasuperset (dfa);
+ bool dfafast = dfaisfast (dfa);
+
+ mb_start = buf;
+ buflim = buf + size;
+
+ for (beg = end = buf; end < buflim; beg = end)
+ {
+ end = buflim;
+
+ if (!start_ptr)
+ {
+ char const *next_beg, *dfa_beg = beg;
+ size_t count = 0;
+ bool exact_kwset_match = false;
+ int backref = 0;
+
+ /* Try matching with KWset, if it's defined. */
+ if (kwset)
+ {
+ char const *prev_beg;
+
+ /* Find a possible match using the KWset matcher. */
+ size_t offset = kwsexec (kwset, beg - begline,
+ buflim - beg + begline, &kwsm);
+ if (offset == (size_t) -1)
+ goto failure;
+ match = beg + offset;
+ prev_beg = beg;
+
+ /* Narrow down to the line containing the possible match. */
+ beg = memrchr (buf, eol, match - buf);
+ beg = beg ? beg + 1 : buf;
+ dfa_beg = beg;
+
+ /* Determine the end pointer to give the DFA next. Typically
+ this is after the first newline after MATCH; but if the KWset
+ match is not exact, the DFA is fast, and the offset from
+ PREV_BEG is less than 64 or (MATCH - PREV_BEG), this is the
+ greater of the latter two values; this temporarily prefers
+ the DFA to KWset. */
+ exact_kwset_match = kwsm.index < kwset_exact_matches;
+ end = ((exact_kwset_match || !dfafast
+ || MAX (16, match - beg) < (match - prev_beg) >> 2)
+ ? match
+ : MAX (16, match - beg) < (buflim - prev_beg) >> 2
+ ? prev_beg + 4 * MAX (16, match - beg)
+ : buflim);
+ end = memchr (end, eol, buflim - end);
+ end = end ? end + 1 : buflim;
+
+ if (exact_kwset_match)
+ {
+ if (MB_CUR_MAX == 1 || using_utf8 ())
+ goto success;
+ if (mb_start < beg)
+ mb_start = beg;
+ if (mb_goback (&mb_start, match, buflim) == 0)
+ goto success;
+ /* The matched line starts in the middle of a multibyte
+ character. Perform the DFA search starting from the
+ beginning of the next character. */
+ dfa_beg = mb_start;
+ }
+ }
+
+ /* Try matching with the superset of DFA, if it's defined. */
+ if (superset && !exact_kwset_match)
+ {
+ /* Keep using the superset while it reports multiline
+ potential matches; this is more likely to be fast
+ than falling back to KWset would be. */
+ while ((next_beg = dfaexec (superset, dfa_beg, (char *) end, 1,
+ &count, NULL))
+ && next_beg != end
+ && count != 0)
+ {
+ /* Try to match in just one line. */
+ count = 0;
+ beg = memrchr (buf, eol, next_beg - buf);
+ beg++;
+ dfa_beg = beg;
+ }
+ if (next_beg == NULL || next_beg == end)
+ continue;
+
+ /* Narrow down to the line we've found. */
+ end = memchr (next_beg, eol, buflim - next_beg);
+ end = end ? end + 1 : buflim;
+ }
+
+ /* Try matching with DFA. */
+ next_beg = dfaexec (dfa, dfa_beg, (char *) end, 0, &count, &backref);
+
+ /* If there's no match, or if we've matched the sentinel,
+ we're done. */
+ if (next_beg == NULL || next_beg == end)
+ continue;
+
+ /* Narrow down to the line we've found. */
+ if (count != 0)
+ {
+ beg = memrchr (buf, eol, next_beg - buf);
+ beg++;
+ }
+ end = memchr (next_beg, eol, buflim - next_beg);
+ end = end ? end + 1 : buflim;
+
+ /* Successful, no backreferences encountered! */
+ if (!backref)
+ goto success;
+ ptr = beg;
+ }
+ else
+ {
+ /* We are looking for the leftmost (then longest) exact match.
+ We will go through the outer loop only once. */
+ ptr = start_ptr;
+ }
+
+ /* If the "line" is longer than the maximum regexp offset,
+ die as if we've run out of memory. */
+ if (TYPE_MAXIMUM (regoff_t) < end - beg - 1)
+ xalloc_die ();
+
+ /* Run the possible match through Regex. */
+ best_match = end;
+ best_len = 0;
+ for (i = 0; i < pcount; i++)
+ {
+ patterns[i].regexbuf.not_eol = 0;
+ patterns[i].regexbuf.newline_anchor = eolbyte == '\n';
+ start = re_search (&(patterns[i].regexbuf),
+ beg, end - beg - 1,
+ ptr - beg, end - ptr - 1,
+ &(patterns[i].regs));
+ if (start < -1)
+ xalloc_die ();
+ else if (0 <= start)
+ {
+ len = patterns[i].regs.end[0] - start;
+ match = beg + start;
+ if (match > best_match)
+ continue;
+ if (start_ptr && !match_words)
+ goto assess_pattern_match;
+ if ((!match_lines && !match_words)
+ || (match_lines && len == end - ptr - 1))
+ {
+ match = ptr;
+ len = end - ptr;
+ goto assess_pattern_match;
+ }
+ /* If -w and not -x, check whether the match aligns with
+ word boundaries. Do this iteratively because:
+ (a) the line may contain more than one occurrence of the
+ pattern, and
+ (b) Several alternatives in the pattern might be valid at a
+ given point, and we may need to consider a shorter one to
+ find a word boundary. */
+ if (!match_lines && match_words)
+ while (match <= best_match)
+ {
+ regoff_t shorter_len = 0;
+ if (!wordchar (mb_prev_wc (beg, match, end - 1))
+ && !wordchar (mb_next_wc (match + len, end - 1)))
+ goto assess_pattern_match;
+ if (len > 0)
+ {
+ /* Try a shorter length anchored at the same place. */
+ --len;
+ patterns[i].regexbuf.not_eol = 1;
+ shorter_len = re_match (&(patterns[i].regexbuf),
+ beg, match + len - ptr,
+ match - beg,
+ &(patterns[i].regs));
+ if (shorter_len < -1)
+ xalloc_die ();
+ }
+ if (0 < shorter_len)
+ len = shorter_len;
+ else
+ {
+ /* Try looking further on. */
+ if (match == end - 1)
+ break;
+ match++;
+ patterns[i].regexbuf.not_eol = 0;
+ start = re_search (&(patterns[i].regexbuf),
+ beg, end - beg - 1,
+ match - beg, end - match - 1,
+ &(patterns[i].regs));
+ if (start < 0)
+ {
+ if (start < -1)
+ xalloc_die ();
+ break;
+ }
+ len = patterns[i].regs.end[0] - start;
+ match = beg + start;
+ }
+ } /* while (match <= best_match) */
+ continue;
+ assess_pattern_match:
+ if (!start_ptr)
+ {
+ /* Good enough for a non-exact match.
+ No need to look at further patterns, if any. */
+ goto success;
+ }
+ if (match < best_match || (match == best_match && len > best_len))
+ {
+ /* Best exact match: leftmost, then longest. */
+ best_match = match;
+ best_len = len;
+ }
+ } /* if re_search >= 0 */
+ } /* for Regex patterns. */
+ if (best_match < end)
+ {
+ /* We have found an exact match. We were just
+ waiting for the best one (leftmost then longest). */
+ beg = best_match;
+ len = best_len;
+ goto success_in_len;
+ }
+ } /* for (beg = end ..) */
+
+ failure:
+ return -1;
+
+ success:
+ len = end - beg;
+ success_in_len:;
+ size_t off = beg - buf;
+ *match_size = len;
+ return off;
+}
diff --git a/src/dosbuf.c b/src/dosbuf.c
new file mode 100644
index 0000000..839cc3a
--- /dev/null
+++ b/src/dosbuf.c
@@ -0,0 +1,222 @@
+/* dosbuf.c
+ Copyright (C) 1992, 1997-2002, 2004-2016 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* Messy DOS-specific code for correctly treating binary, Unix text
+ and DOS text files.
+
+ This has several aspects:
+
+ * Guessing the file type (unless the user tells us);
+ * Stripping CR characters from DOS text files (otherwise regex
+ functions won't work correctly);
+ * Reporting correct byte count with -b for any kind of file.
+
+*/
+
+#include <config.h>
+
+typedef enum {
+ UNKNOWN, DOS_BINARY, DOS_TEXT, UNIX_TEXT
+} File_type;
+
+struct dos_map {
+ off_t pos; /* position in buffer passed to matcher */
+ off_t add; /* how much to add when reporting char position */
+};
+
+static int dos_report_unix_offset = 0;
+
+static File_type dos_file_type = UNKNOWN;
+static File_type dos_use_file_type = UNKNOWN;
+static off_t dos_stripped_crs = 0;
+static struct dos_map *dos_pos_map;
+static int dos_pos_map_size = 0;
+static int dos_pos_map_used = 0;
+static int inp_map_idx = 0, out_map_idx = 1;
+
+/* Set default DOS file type to binary. */
+static void
+dos_binary (void)
+{
+ if (O_BINARY)
+ dos_use_file_type = DOS_BINARY;
+}
+
+/* Tell DOS routines to report Unix offset. */
+static void
+dos_unix_byte_offsets (void)
+{
+ if (O_BINARY)
+ dos_report_unix_offset = 1;
+}
+
+/* Guess DOS file type by looking at its contents. */
+static File_type
+guess_type (char *buf, size_t buflen)
+{
+ int crlf_seen = 0;
+ char *bp = buf;
+
+ while (buflen--)
+ {
+ /* Treat a file as binary if it has a NUL character. */
+ if (!*bp)
+ return DOS_BINARY;
+
+ /* CR before LF means DOS text file (unless we later see
+ binary characters). */
+ else if (*bp == '\r' && buflen && bp[1] == '\n')
+ crlf_seen = 1;
+
+ bp++;
+ }
+
+ return crlf_seen ? DOS_TEXT : UNIX_TEXT;
+}
+
+/* Convert external DOS file representation to internal.
+ Return the count of bytes left in the buffer.
+ Build table to map character positions when reporting byte counts. */
+static size_t
+undossify_input (char *buf, size_t buflen)
+{
+ if (! O_BINARY)
+ return buflen;
+
+ size_t bytes_left = 0;
+
+ if (totalcc == 0)
+ {
+ /* New file: forget everything we knew about character
+ position mapping table and file type. */
+ inp_map_idx = 0;
+ out_map_idx = 1;
+ dos_pos_map_used = 0;
+ dos_stripped_crs = 0;
+ dos_file_type = dos_use_file_type;
+ }
+
+ /* Guess if this file is binary, unless we already know that. */
+ if (dos_file_type == UNKNOWN)
+ dos_file_type = guess_type(buf, buflen);
+
+ /* If this file is to be treated as DOS Text, strip the CR characters
+ and maybe build the table for character position mapping on output. */
+ if (dos_file_type == DOS_TEXT)
+ {
+ char *destp = buf;
+
+ while (buflen--)
+ {
+ if (*buf != '\r')
+ {
+ *destp++ = *buf++;
+ bytes_left++;
+ }
+ else
+ {
+ buf++;
+ if (out_byte && !dos_report_unix_offset)
+ {
+ dos_stripped_crs++;
+ while (buflen && *buf == '\r')
+ {
+ dos_stripped_crs++;
+ buflen--;
+ buf++;
+ }
+ if (inp_map_idx >= dos_pos_map_size - 1)
+ {
+ dos_pos_map_size = inp_map_idx ? inp_map_idx * 2 : 1000;
+ dos_pos_map = xrealloc(dos_pos_map,
+ dos_pos_map_size *
+ sizeof(struct dos_map));
+ }
+
+ if (!inp_map_idx)
+ {
+ /* Add sentinel entry. */
+ dos_pos_map[inp_map_idx].pos = 0;
+ dos_pos_map[inp_map_idx++].add = 0;
+
+ /* Initialize first real entry. */
+ dos_pos_map[inp_map_idx].add = 0;
+ }
+
+ /* Put the new entry. If the stripped CR characters
+ precede a Newline (the usual case), pretend that
+ they were found *after* the Newline. This makes
+ displayed byte offsets more reasonable in some
+ cases, and fits better the intuitive notion that
+ the line ends *before* the CR, not *after* it. */
+ inp_map_idx++;
+ dos_pos_map[inp_map_idx-1].pos =
+ (*buf == '\n' ? destp + 1 : destp ) - bufbeg + totalcc;
+ dos_pos_map[inp_map_idx].add = dos_stripped_crs;
+ dos_pos_map_used = inp_map_idx;
+
+ /* The following will be updated on the next pass. */
+ dos_pos_map[inp_map_idx].pos = destp - bufbeg + totalcc + 1;
+ }
+ }
+ }
+
+ return bytes_left;
+ }
+
+ return buflen;
+}
+
+/* Convert internal byte count into external. */
+static off_t
+dossified_pos (off_t byteno)
+{
+ if (! O_BINARY)
+ return byteno;
+
+ off_t pos_lo;
+ off_t pos_hi;
+
+ if (dos_file_type != DOS_TEXT || dos_report_unix_offset)
+ return byteno;
+
+ /* Optimization: usually the file will be scanned sequentially.
+ So in most cases, this byte position will be found in the
+ table near the previous one, as recorded in 'out_map_idx'. */
+ pos_lo = dos_pos_map[out_map_idx-1].pos;
+ pos_hi = dos_pos_map[out_map_idx].pos;
+
+ /* If the initial guess failed, search up or down, as
+ appropriate, beginning with the previous place. */
+ if (byteno >= pos_hi)
+ {
+ out_map_idx++;
+ while (out_map_idx < dos_pos_map_used
+ && byteno >= dos_pos_map[out_map_idx].pos)
+ out_map_idx++;
+ }
+
+ else if (byteno < pos_lo)
+ {
+ out_map_idx--;
+ while (out_map_idx > 1 && byteno < dos_pos_map[out_map_idx-1].pos)
+ out_map_idx--;
+ }
+
+ return byteno + dos_pos_map[out_map_idx].add;
+}
diff --git a/src/egrep.sh b/src/egrep.sh
new file mode 100644
index 0000000..6d6c15a
--- /dev/null
+++ b/src/egrep.sh
@@ -0,0 +1,2 @@
+#!@SHELL@
+exec @grep@ @option@ "$@"
diff --git a/src/grep.c b/src/grep.c
new file mode 100644
index 0000000..8baca5a
--- /dev/null
+++ b/src/grep.c
@@ -0,0 +1,2720 @@
+/* grep.c - main driver file for grep.
+ Copyright (C) 1992, 1997-2002, 2004-2016 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* Written July 1992 by Mike Haertel. */
+
+#include <config.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <wchar.h>
+#include <wctype.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include "system.h"
+
+#include "argmatch.h"
+#include "c-ctype.h"
+#include "closeout.h"
+#include "colorize.h"
+#include "error.h"
+#include "exclude.h"
+#include "exitfail.h"
+#include "fcntl-safer.h"
+#include "fts_.h"
+#include "getopt.h"
+#include "grep.h"
+#include "intprops.h"
+#include "progname.h"
+#include "propername.h"
+#include "quote.h"
+#include "safe-read.h"
+#include "search.h"
+#include "version-etc.h"
+#include "xalloc.h"
+#include "xstrtol.h"
+
+#define SEP_CHAR_SELECTED ':'
+#define SEP_CHAR_REJECTED '-'
+#define SEP_STR_GROUP "--"
+
+#define AUTHORS \
+ proper_name ("Mike Haertel"), \
+ _("others, see <http://git.sv.gnu.org/cgit/grep.git/tree/AUTHORS>")
+
+/* When stdout is connected to a regular file, save its stat
+ information here, so that we can automatically skip it, thus
+ avoiding a potential (racy) infinite loop. */
+static struct stat out_stat;
+
+/* if non-zero, display usage information and exit */
+static int show_help;
+
+/* Print the version on standard output and exit. */
+static bool show_version;
+
+/* Suppress diagnostics for nonexistent or unreadable files. */
+static bool suppress_errors;
+
+/* If nonzero, use color markers. */
+static int color_option;
+
+/* Show only the part of a line matching the expression. */
+static bool only_matching;
+
+/* If nonzero, make sure first content char in a line is on a tab stop. */
+static bool align_tabs;
+
+#if HAVE_ASAN
+/* Record the starting address and length of the sole poisoned region,
+ so that we can unpoison it later, just before each following read. */
+static void const *poison_buf;
+static size_t poison_len;
+
+static void
+clear_asan_poison (void)
+{
+ if (poison_buf)
+ __asan_unpoison_memory_region (poison_buf, poison_len);
+}
+
+static void
+asan_poison (void const *addr, size_t size)
+{
+ poison_buf = addr;
+ poison_len = size;
+
+ __asan_poison_memory_region (poison_buf, poison_len);
+}
+#else
+static void clear_asan_poison (void) { }
+static void asan_poison (void const volatile *addr, size_t size) { }
+#endif
+
+/* The group separator used when context is requested. */
+static const char *group_separator = SEP_STR_GROUP;
+
+/* The context and logic for choosing default --color screen attributes
+ (foreground and background colors, etc.) are the following.
+ -- There are eight basic colors available, each with its own
+ nominal luminosity to the human eye and foreground/background
+ codes (black [0 %, 30/40], blue [11 %, 34/44], red [30 %, 31/41],
+ magenta [41 %, 35/45], green [59 %, 32/42], cyan [70 %, 36/46],
+ yellow [89 %, 33/43], and white [100 %, 37/47]).
+ -- Sometimes, white as a background is actually implemented using
+ a shade of light gray, so that a foreground white can be visible
+ on top of it (but most often not).
+ -- Sometimes, black as a foreground is actually implemented using
+ a shade of dark gray, so that it can be visible on top of a
+ background black (but most often not).
+ -- Sometimes, more colors are available, as extensions.
+ -- Other attributes can be selected/deselected (bold [1/22],
+ underline [4/24], standout/inverse [7/27], blink [5/25], and
+ invisible/hidden [8/28]). They are sometimes implemented by
+ using colors instead of what their names imply; e.g., bold is
+ often achieved by using brighter colors. In practice, only bold
+ is really available to us, underline sometimes being mapped by
+ the terminal to some strange color choice, and standout best
+ being left for use by downstream programs such as less(1).
+ -- We cannot assume that any of the extensions or special features
+ are available for the purpose of choosing defaults for everyone.
+ -- The most prevalent default terminal backgrounds are pure black
+ and pure white, and are not necessarily the same shades of
+ those as if they were selected explicitly with SGR sequences.
+ Some terminals use dark or light pictures as default background,
+ but those are covered over by an explicit selection of background
+ color with an SGR sequence; their users will appreciate their
+ background pictures not be covered like this, if possible.
+ -- Some uses of colors attributes is to make some output items
+ more understated (e.g., context lines); this cannot be achieved
+ by changing the background color.
+ -- For these reasons, the grep color defaults should strive not
+ to change the background color from its default, unless it's
+ for a short item that should be highlighted, not understated.
+ -- The grep foreground color defaults (without an explicitly set
+ background) should provide enough contrast to be readable on any
+ terminal with either a black (dark) or white (light) background.
+ This only leaves red, magenta, green, and cyan (and their bold
+ counterparts) and possibly bold blue. */
+/* The color strings used for matched text.
+ The user can overwrite them using the deprecated
+ environment variable GREP_COLOR or the new GREP_COLORS. */
+static const char *selected_match_color = "01;31"; /* bold red */
+static const char *context_match_color = "01;31"; /* bold red */
+
+/* Other colors. Defaults look damn good. */
+static const char *filename_color = "35"; /* magenta */
+static const char *line_num_color = "32"; /* green */
+static const char *byte_num_color = "32"; /* green */
+static const char *sep_color = "36"; /* cyan */
+static const char *selected_line_color = ""; /* default color pair */
+static const char *context_line_color = ""; /* default color pair */
+
+/* Select Graphic Rendition (SGR, "\33[...m") strings. */
+/* Also Erase in Line (EL) to Right ("\33[K") by default. */
+/* Why have EL to Right after SGR?
+ -- The behavior of line-wrapping when at the bottom of the
+ terminal screen and at the end of the current line is often
+ such that a new line is introduced, entirely cleared with
+ the current background color which may be different from the
+ default one (see the boolean back_color_erase terminfo(5)
+ capability), thus scrolling the display by one line.
+ The end of this new line will stay in this background color
+ even after reverting to the default background color with
+ "\33[m', unless it is explicitly cleared again with "\33[K"
+ (which is the behavior the user would instinctively expect
+ from the whole thing). There may be some unavoidable
+ background-color flicker at the end of this new line because
+ of this (when timing with the monitor's redraw is just right).
+ -- The behavior of HT (tab, "\t") is usually the same as that of
+ Cursor Forward Tabulation (CHT) with a default parameter
+ of 1 ("\33[I"), i.e., it performs pure movement to the next
+ tab stop, without any clearing of either content or screen
+ attributes (including background color); try
+ printf 'asdfqwerzxcv\rASDF\tZXCV\n'
+ in a bash(1) shell to demonstrate this. This is not what the
+ user would instinctively expect of HT (but is ok for CHT).
+ The instinctive behavior would include clearing the terminal
+ cells that are skipped over by HT with blank cells in the
+ current screen attributes, including background color;
+ the boolean dest_tabs_magic_smso terminfo(5) capability
+ indicates this saner behavior for HT, but only some rare
+ terminals have it (although it also indicates a special
+ glitch with standout mode in the Teleray terminal for which
+ it was initially introduced). The remedy is to add "\33K"
+ after each SGR sequence, be it START (to fix the behavior
+ of any HT after that before another SGR) or END (to fix the
+ behavior of an HT in default background color that would
+ follow a line-wrapping at the bottom of the screen in another
+ background color, and to complement doing it after START).
+ Piping grep's output through a pager such as less(1) avoids
+ any HT problems since the pager performs tab expansion.
+
+ Generic disadvantages of this remedy are:
+ -- Some very rare terminals might support SGR but not EL (nobody
+ will use "grep --color" on a terminal that does not support
+ SGR in the first place).
+ -- Having these extra control sequences might somewhat complicate
+ the task of any program trying to parse "grep --color"
+ output in order to extract structuring information from it.
+ A specific disadvantage to doing it after SGR START is:
+ -- Even more possible background color flicker (when timing
+ with the monitor's redraw is just right), even when not at the
+ bottom of the screen.
+ There are no additional disadvantages specific to doing it after
+ SGR END.
+
+ It would be impractical for GNU grep to become a full-fledged
+ terminal program linked against ncurses or the like, so it will
+ not detect terminfo(5) capabilities. */
+static const char *sgr_start = "\33[%sm\33[K";
+static const char *sgr_end = "\33[m\33[K";
+
+/* SGR utility functions. */
+static void
+pr_sgr_start (char const *s)
+{
+ if (*s)
+ print_start_colorize (sgr_start, s);
+}
+static void
+pr_sgr_end (char const *s)
+{
+ if (*s)
+ print_end_colorize (sgr_end);
+}
+static void
+pr_sgr_start_if (char const *s)
+{
+ if (color_option)
+ pr_sgr_start (s);
+}
+static void
+pr_sgr_end_if (char const *s)
+{
+ if (color_option)
+ pr_sgr_end (s);
+}
+
+struct color_cap
+ {
+ const char *name;
+ const char **var;
+ void (*fct) (void);
+ };
+
+static void
+color_cap_mt_fct (void)
+{
+ /* Our caller just set selected_match_color. */
+ context_match_color = selected_match_color;
+}
+
+static void
+color_cap_rv_fct (void)
+{
+ /* By this point, it was 1 (or already -1). */
+ color_option = -1; /* That's still != 0. */
+}
+
+static void
+color_cap_ne_fct (void)
+{
+ sgr_start = "\33[%sm";
+ sgr_end = "\33[m";
+}
+
+/* For GREP_COLORS. */
+static const struct color_cap color_dict[] =
+ {
+ { "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */
+ { "ms", &selected_match_color, NULL }, /* selected matched text */
+ { "mc", &context_match_color, NULL }, /* context matched text */
+ { "fn", &filename_color, NULL }, /* filename */
+ { "ln", &line_num_color, NULL }, /* line number */
+ { "bn", &byte_num_color, NULL }, /* byte (sic) offset */
+ { "se", &sep_color, NULL }, /* separator */
+ { "sl", &selected_line_color, NULL }, /* selected lines */
+ { "cx", &context_line_color, NULL }, /* context lines */
+ { "rv", NULL, color_cap_rv_fct }, /* -v reverses sl/cx */
+ { "ne", NULL, color_cap_ne_fct }, /* no EL on SGR_* */
+ { NULL, NULL, NULL }
+ };
+
+/* Saved errno value from failed output functions on stdout. */
+static int stdout_errno;
+
+static void
+putchar_errno (int c)
+{
+ if (putchar (c) < 0)
+ stdout_errno = errno;
+}
+
+static void
+fputs_errno (char const *s)
+{
+ if (fputs (s, stdout) < 0)
+ stdout_errno = errno;
+}
+
+static void _GL_ATTRIBUTE_FORMAT_PRINTF (1, 2)
+printf_errno (char const *format, ...)
+{
+ va_list ap;
+ va_start (ap, format);
+ if (vfprintf (stdout, format, ap) < 0)
+ stdout_errno = errno;
+ va_end (ap);
+}
+
+static void
+fwrite_errno (void const *ptr, size_t size, size_t nmemb)
+{
+ if (fwrite (ptr, size, nmemb, stdout) != nmemb)
+ stdout_errno = errno;
+}
+
+static void
+fflush_errno (void)
+{
+ if (fflush (stdout) != 0)
+ stdout_errno = errno;
+}
+
+static struct exclude *excluded_patterns[2];
+static struct exclude *excluded_directory_patterns[2];
+/* Short options. */
+static char const short_options[] =
+"0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz";
+
+/* Non-boolean long options that have no corresponding short equivalents. */
+enum
+{
+ BINARY_FILES_OPTION = CHAR_MAX + 1,
+ COLOR_OPTION,
+ EXCLUDE_DIRECTORY_OPTION,
+ EXCLUDE_OPTION,
+ EXCLUDE_FROM_OPTION,
+ GROUP_SEPARATOR_OPTION,
+ INCLUDE_OPTION,
+ LINE_BUFFERED_OPTION,
+ LABEL_OPTION
+};
+
+/* Long options equivalences. */
+static struct option const long_options[] =
+{
+ {"basic-regexp", no_argument, NULL, 'G'},
+ {"extended-regexp", no_argument, NULL, 'E'},
+ {"fixed-regexp", no_argument, NULL, 'F'},
+ {"fixed-strings", no_argument, NULL, 'F'},
+ {"perl-regexp", no_argument, NULL, 'P'},
+ {"after-context", required_argument, NULL, 'A'},
+ {"before-context", required_argument, NULL, 'B'},
+ {"binary-files", required_argument, NULL, BINARY_FILES_OPTION},
+ {"byte-offset", no_argument, NULL, 'b'},
+ {"context", required_argument, NULL, 'C'},
+ {"color", optional_argument, NULL, COLOR_OPTION},
+ {"colour", optional_argument, NULL, COLOR_OPTION},
+ {"count", no_argument, NULL, 'c'},
+ {"devices", required_argument, NULL, 'D'},
+ {"directories", required_argument, NULL, 'd'},
+ {"exclude", required_argument, NULL, EXCLUDE_OPTION},
+ {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION},
+ {"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION},
+ {"file", required_argument, NULL, 'f'},
+ {"files-with-matches", no_argument, NULL, 'l'},
+ {"files-without-match", no_argument, NULL, 'L'},
+ {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION},
+ {"help", no_argument, &show_help, 1},
+ {"include", required_argument, NULL, INCLUDE_OPTION},
+ {"ignore-case", no_argument, NULL, 'i'},
+ {"initial-tab", no_argument, NULL, 'T'},
+ {"label", required_argument, NULL, LABEL_OPTION},
+ {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION},
+ {"line-number", no_argument, NULL, 'n'},
+ {"line-regexp", no_argument, NULL, 'x'},
+ {"max-count", required_argument, NULL, 'm'},
+
+ {"no-filename", no_argument, NULL, 'h'},
+ {"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION},
+ {"no-messages", no_argument, NULL, 's'},
+ {"null", no_argument, NULL, 'Z'},
+ {"null-data", no_argument, NULL, 'z'},
+ {"only-matching", no_argument, NULL, 'o'},
+ {"quiet", no_argument, NULL, 'q'},
+ {"recursive", no_argument, NULL, 'r'},
+ {"dereference-recursive", no_argument, NULL, 'R'},
+ {"regexp", required_argument, NULL, 'e'},
+ {"invert-match", no_argument, NULL, 'v'},
+ {"silent", no_argument, NULL, 'q'},
+ {"text", no_argument, NULL, 'a'},
+ {"binary", no_argument, NULL, 'U'},
+ {"unix-byte-offsets", no_argument, NULL, 'u'},
+ {"version", no_argument, NULL, 'V'},
+ {"with-filename", no_argument, NULL, 'H'},
+ {"word-regexp", no_argument, NULL, 'w'},
+ {0, 0, 0, 0}
+};
+
+/* Define flags declared in grep.h. */
+bool match_icase;
+bool match_words;
+bool match_lines;
+char eolbyte;
+
+static char const *matcher;
+
+/* For error messages. */
+/* The input file name, or (if standard input) "-" or a --label argument. */
+static char const *filename;
+/* Omit leading "./" from file names in diagnostics. */
+static bool omit_dot_slash;
+static bool errseen;
+
+/* True if output from the current input file has been suppressed
+ because an output line had an encoding error. */
+static bool encoding_error_output;
+
+enum directories_type
+ {
+ READ_DIRECTORIES = 2,
+ RECURSE_DIRECTORIES,
+ SKIP_DIRECTORIES
+ };
+
+/* How to handle directories. */
+static char const *const directories_args[] =
+{
+ "read", "recurse", "skip", NULL
+};
+static enum directories_type const directories_types[] =
+{
+ READ_DIRECTORIES, RECURSE_DIRECTORIES, SKIP_DIRECTORIES
+};
+ARGMATCH_VERIFY (directories_args, directories_types);
+
+static enum directories_type directories = READ_DIRECTORIES;
+
+enum { basic_fts_options = FTS_CWDFD | FTS_NOSTAT | FTS_TIGHT_CYCLE_CHECK };
+static int fts_options = basic_fts_options | FTS_COMFOLLOW | FTS_PHYSICAL;
+
+/* How to handle devices. */
+static enum
+ {
+ READ_COMMAND_LINE_DEVICES,
+ READ_DEVICES,
+ SKIP_DEVICES
+ } devices = READ_COMMAND_LINE_DEVICES;
+
+static bool grepfile (int, char const *, bool, bool);
+static bool grepdesc (int, bool);
+
+static void dos_binary (void);
+static void dos_unix_byte_offsets (void);
+static size_t undossify_input (char *, size_t);
+
+static bool
+is_device_mode (mode_t m)
+{
+ return S_ISCHR (m) || S_ISBLK (m) || S_ISSOCK (m) || S_ISFIFO (m);
+}
+
+static bool
+skip_devices (bool command_line)
+{
+ return (devices == SKIP_DEVICES
+ || (devices == READ_COMMAND_LINE_DEVICES && !command_line));
+}
+
+/* Return if ST->st_size is defined. Assume the file is not a
+ symbolic link. */
+static bool
+usable_st_size (struct stat const *st)
+{
+ return S_ISREG (st->st_mode) || S_TYPEISSHM (st) || S_TYPEISTMO (st);
+}
+
+/* Lame substitutes for SEEK_DATA and SEEK_HOLE on platforms lacking them.
+ Do not rely on these finding data or holes if they equal SEEK_SET. */
+#ifndef SEEK_DATA
+enum { SEEK_DATA = SEEK_SET };
+#endif
+#ifndef SEEK_HOLE
+enum { SEEK_HOLE = SEEK_SET };
+#endif
+
+/* Functions we'll use to search. */
+typedef void (*compile_fp_t) (char const *, size_t);
+typedef size_t (*execute_fp_t) (char *, size_t, size_t *, char const *);
+static compile_fp_t compile;
+static execute_fp_t execute;
+
+/* Like error, but suppress the diagnostic if requested. */
+static void
+suppressible_error (char const *mesg, int errnum)
+{
+ if (! suppress_errors)
+ error (0, errnum, "%s", mesg);
+ errseen = true;
+}
+
+/* If there has already been a write error, don't bother closing
+ standard output, as that might elicit a duplicate diagnostic. */
+static void
+clean_up_stdout (void)
+{
+ if (! stdout_errno)
+ close_stdout ();
+}
+
+/* A cast to TYPE of VAL. Use this when TYPE is a pointer type, VAL
+ is properly aligned for TYPE, and 'gcc -Wcast-align' cannot infer
+ the alignment and would otherwise complain about the cast. */
+#if 4 < __GNUC__ + (6 <= __GNUC_MINOR__)
+# define CAST_ALIGNED(type, val) \
+ ({ __typeof__ (val) val_ = val; \
+ _Pragma ("GCC diagnostic push") \
+ _Pragma ("GCC diagnostic ignored \"-Wcast-align\"") \
+ (type) val_; \
+ _Pragma ("GCC diagnostic pop") \
+ })
+#else
+# define CAST_ALIGNED(type, val) ((type) (val))
+#endif
+
+/* An unsigned type suitable for fast matching. */
+typedef uintmax_t uword;
+
+/* A mask to test for unibyte characters, with the pattern repeated to
+ fill a uword. For a multibyte character encoding where
+ all bytes are unibyte characters, this is 0. For UTF-8, this is
+ 0x808080.... For encodings where unibyte characters have no discerned
+ pattern, this is all 1s. The unsigned char C is a unibyte
+ character if C & UNIBYTE_MASK is zero. If the uword W is the
+ concatenation of bytes, the bytes are all unibyte characters
+ if W & UNIBYTE_MASK is zero. */
+static uword unibyte_mask;
+
+static void
+initialize_unibyte_mask (void)
+{
+ /* For each encoding error I that MASK does not already match,
+ accumulate I's most significant 1 bit by ORing it into MASK.
+ Although any 1 bit of I could be used, in practice high-order
+ bits work better. */
+ unsigned char mask = 0;
+ int ms1b = 1;
+ for (int i = 1; i <= UCHAR_MAX; i++)
+ if (mbclen_cache[i] != 1 && ! (mask & i))
+ {
+ while (ms1b * 2 <= i)
+ ms1b *= 2;
+ mask |= ms1b;
+ }
+
+ /* Now MASK will detect any encoding-error byte, although it may
+ cry wolf and it may not be optimal. Build a uword-length mask by
+ repeating MASK. */
+ uword uword_max = -1;
+ unibyte_mask = uword_max / UCHAR_MAX * mask;
+}
+
+/* Skip the easy bytes in a buffer that is guaranteed to have a sentinel
+ that is not easy, and return a pointer to the first non-easy byte.
+ The easy bytes all have UNIBYTE_MASK off. */
+static char const * _GL_ATTRIBUTE_PURE
+skip_easy_bytes (char const *buf)
+{
+ /* Search a byte at a time until the pointer is aligned, then a
+ uword at a time until a match is found, then a byte at a time to
+ identify the exact byte. The uword search may go slightly past
+ the buffer end, but that's benign. */
+ char const *p;
+ uword const *s;
+ for (p = buf; (uintptr_t) p % sizeof (uword) != 0; p++)
+ if (to_uchar (*p) & unibyte_mask)
+ return p;
+ for (s = CAST_ALIGNED (uword const *, p); ! (*s & unibyte_mask); s++)
+ continue;
+ for (p = (char const *) s; ! (to_uchar (*p) & unibyte_mask); p++)
+ continue;
+ return p;
+}
+
+/* Return true if BUF, of size SIZE, has an encoding error.
+ BUF must be followed by at least sizeof (uword) bytes,
+ the first of which may be modified. */
+bool
+buf_has_encoding_errors (char *buf, size_t size)
+{
+ if (! unibyte_mask)
+ return false;
+
+ mbstate_t mbs = { 0 };
+ size_t clen;
+
+ buf[size] = -1;
+ for (char const *p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen)
+ {
+ clen = mbrlen (p, buf + size - p, &mbs);
+ if ((size_t) -2 <= clen)
+ return true;
+ }
+
+ return false;
+}
+
+
+/* Return true if BUF, of size SIZE, has a null byte.
+ BUF must be followed by at least one byte,
+ which may be arbitrarily written to or read from. */
+static bool
+buf_has_nulls (char *buf, size_t size)
+{
+ buf[size] = 0;
+ return strlen (buf) != size;
+}
+
+/* Return true if a file is known to contain null bytes.
+ SIZE bytes have already been read from the file
+ with descriptor FD and status ST. */
+static bool
+file_must_have_nulls (size_t size, int fd, struct stat const *st)
+{
+ if (usable_st_size (st))
+ {
+ if (st->st_size <= size)
+ return false;
+
+ /* If the file has holes, it must contain a null byte somewhere. */
+ if (SEEK_HOLE != SEEK_SET)
+ {
+ off_t cur = size;
+ if (O_BINARY || fd == STDIN_FILENO)
+ {
+ cur = lseek (fd, 0, SEEK_CUR);
+ if (cur < 0)
+ return false;
+ }
+
+ /* Look for a hole after the current location. */
+ off_t hole_start = lseek (fd, cur, SEEK_HOLE);
+ if (0 <= hole_start)
+ {
+ if (lseek (fd, cur, SEEK_SET) < 0)
+ suppressible_error (filename, errno);
+ if (hole_start < st->st_size)
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+/* Convert STR to a nonnegative integer, storing the result in *OUT.
+ STR must be a valid context length argument; report an error if it
+ isn't. Silently ceiling *OUT at the maximum value, as that is
+ practically equivalent to infinity for grep's purposes. */
+static void
+context_length_arg (char const *str, intmax_t *out)
+{
+ switch (xstrtoimax (str, 0, 10, out, ""))
+ {
+ case LONGINT_OK:
+ case LONGINT_OVERFLOW:
+ if (0 <= *out)
+ break;
+ /* Fall through. */
+ default:
+ error (EXIT_TROUBLE, 0, "%s: %s", str,
+ _("invalid context length argument"));
+ }
+}
+
+/* Return the add_exclude options suitable for excluding a file name.
+ If COMMAND_LINE, it is a command-line file name. */
+static int
+exclude_options (bool command_line)
+{
+ return EXCLUDE_WILDCARDS | (command_line ? 0 : EXCLUDE_ANCHORED);
+}
+
+/* Return true if the file with NAME should be skipped.
+ If COMMAND_LINE, it is a command-line argument.
+ If IS_DIR, it is a directory. */
+static bool
+skipped_file (char const *name, bool command_line, bool is_dir)
+{
+ struct exclude **pats;
+ if (! is_dir)
+ pats = excluded_patterns;
+ else if (directories == SKIP_DIRECTORIES)
+ return true;
+ else if (command_line && omit_dot_slash)
+ return false;
+ else
+ pats = excluded_directory_patterns;
+ return pats[command_line] && excluded_file_name (pats[command_line], name);
+}
+
+/* Hairy buffering mechanism for grep. The intent is to keep
+ all reads aligned on a page boundary and multiples of the
+ page size, unless a read yields a partial page. */
+
+static char *buffer; /* Base of buffer. */
+static size_t bufalloc; /* Allocated buffer size, counting slop. */
+#define INITIAL_BUFSIZE 32768 /* Initial buffer size, not counting slop. */
+static int bufdesc; /* File descriptor. */
+static char *bufbeg; /* Beginning of user-visible stuff. */
+static char *buflim; /* Limit of user-visible stuff. */
+static size_t pagesize; /* alignment of memory pages */
+static off_t bufoffset; /* Read offset; defined on regular files. */
+static off_t after_last_match; /* Pointer after last matching line that
+ would have been output if we were
+ outputting characters. */
+static bool skip_nuls; /* Skip '\0' in data. */
+static bool skip_empty_lines; /* Skip empty lines in data. */
+static bool seek_data_failed; /* lseek with SEEK_DATA failed. */
+static uintmax_t totalnl; /* Total newline count before lastnl. */
+
+/* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be
+ an integer or a pointer. Both args must be free of side effects. */
+#define ALIGN_TO(val, alignment) \
+ ((size_t) (val) % (alignment) == 0 \
+ ? (val) \
+ : (val) + ((alignment) - (size_t) (val) % (alignment)))
+
+/* Add two numbers that count input bytes or lines, and report an
+ error if the addition overflows. */
+static uintmax_t
+add_count (uintmax_t a, uintmax_t b)
+{
+ uintmax_t sum = a + b;
+ if (sum < a)
+ error (EXIT_TROUBLE, 0, _("input is too large to count"));
+ return sum;
+}
+
+/* Return true if BUF (of size SIZE) is all zeros. */
+static bool
+all_zeros (char const *buf, size_t size)
+{
+ for (char const *p = buf; p < buf + size; p++)
+ if (*p)
+ return false;
+ return true;
+}
+
+/* Reset the buffer for a new file, returning false if we should skip it.
+ Initialize on the first time through. */
+static bool
+reset (int fd, struct stat const *st)
+{
+ if (! pagesize)
+ {
+ pagesize = getpagesize ();
+ if (pagesize == 0 || 2 * pagesize + 1 <= pagesize)
+ abort ();
+ bufalloc = (ALIGN_TO (INITIAL_BUFSIZE, pagesize)
+ + pagesize + sizeof (uword));
+ buffer = xmalloc (bufalloc);
+ }
+
+ bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize);
+ bufbeg[-1] = eolbyte;
+ bufdesc = fd;
+
+ if (S_ISREG (st->st_mode))
+ {
+ if (fd != STDIN_FILENO)
+ bufoffset = 0;
+ else
+ {
+ bufoffset = lseek (fd, 0, SEEK_CUR);
+ if (bufoffset < 0)
+ {
+ suppressible_error (_("lseek failed"), errno);
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+/* Read new stuff into the buffer, saving the specified
+ amount of old stuff. When we're done, 'bufbeg' points
+ to the beginning of the buffer contents, and 'buflim'
+ points just after the end. Return false if there's an error. */
+static bool
+fillbuf (size_t save, struct stat const *st)
+{
+ size_t fillsize;
+ bool cc = true;
+ char *readbuf;
+ size_t readsize;
+
+ /* Offset from start of buffer to start of old stuff
+ that we want to save. */
+ size_t saved_offset = buflim - save - buffer;
+
+ if (pagesize <= buffer + bufalloc - sizeof (uword) - buflim)
+ {
+ readbuf = buflim;
+ bufbeg = buflim - save;
+ }
+ else
+ {
+ size_t minsize = save + pagesize;
+ size_t newsize;
+ size_t newalloc;
+ char *newbuf;
+
+ /* Grow newsize until it is at least as great as minsize. */
+ for (newsize = bufalloc - pagesize - sizeof (uword);
+ newsize < minsize;
+ newsize *= 2)
+ if ((SIZE_MAX - pagesize - sizeof (uword)) / 2 < newsize)
+ xalloc_die ();
+
+ /* Try not to allocate more memory than the file size indicates,
+ as that might cause unnecessary memory exhaustion if the file
+ is large. However, do not use the original file size as a
+ heuristic if we've already read past the file end, as most
+ likely the file is growing. */
+ if (usable_st_size (st))
+ {
+ off_t to_be_read = st->st_size - bufoffset;
+ off_t maxsize_off = save + to_be_read;
+ if (0 <= to_be_read && to_be_read <= maxsize_off
+ && maxsize_off == (size_t) maxsize_off
+ && minsize <= (size_t) maxsize_off
+ && (size_t) maxsize_off < newsize)
+ newsize = maxsize_off;
+ }
+
+ /* Add enough room so that the buffer is aligned and has room
+ for byte sentinels fore and aft, and so that a uword can
+ be read aft. */
+ newalloc = newsize + pagesize + sizeof (uword);
+
+ newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer;
+ readbuf = ALIGN_TO (newbuf + 1 + save, pagesize);
+ bufbeg = readbuf - save;
+ memmove (bufbeg, buffer + saved_offset, save);
+ bufbeg[-1] = eolbyte;
+ if (newbuf != buffer)
+ {
+ free (buffer);
+ buffer = newbuf;
+ }
+ }
+
+ clear_asan_poison ();
+
+ readsize = buffer + bufalloc - sizeof (uword) - readbuf;
+ readsize -= readsize % pagesize;
+
+ while (true)
+ {
+ fillsize = safe_read (bufdesc, readbuf, readsize);
+ if (fillsize == SAFE_READ_ERROR)
+ {
+ fillsize = 0;
+ cc = false;
+ }
+ bufoffset += fillsize;
+
+ if (fillsize == 0 || !skip_nuls || !all_zeros (readbuf, fillsize))
+ break;
+ totalnl = add_count (totalnl, fillsize);
+
+ if (SEEK_DATA != SEEK_SET && !seek_data_failed)
+ {
+ /* Solaris SEEK_DATA fails with errno == ENXIO in a hole at EOF. */
+ off_t data_start = lseek (bufdesc, bufoffset, SEEK_DATA);
+ if (data_start < 0 && errno == ENXIO
+ && usable_st_size (st) && bufoffset < st->st_size)
+ data_start = lseek (bufdesc, 0, SEEK_END);
+
+ if (data_start < 0)
+ seek_data_failed = true;
+ else
+ {
+ totalnl = add_count (totalnl, data_start - bufoffset);
+ bufoffset = data_start;
+ }
+ }
+ }
+
+ fillsize = undossify_input (readbuf, fillsize);
+ buflim = readbuf + fillsize;
+
+ /* Initialize the following word, because skip_easy_bytes and some
+ matchers read (but do not use) those bytes. This avoids false
+ positive reports of these bytes being used uninitialized. */
+ memset (buflim, 0, sizeof (uword));
+
+ /* Mark the part of the buffer not filled by the read or set by
+ the above memset call as ASAN-poisoned. */
+ asan_poison (buflim + sizeof (uword),
+ bufalloc - (buflim - buffer) - sizeof (uword));
+
+ return cc;
+}
+
+/* Flags controlling the style of output. */
+static enum
+{
+ BINARY_BINARY_FILES,
+ TEXT_BINARY_FILES,
+ WITHOUT_MATCH_BINARY_FILES
+} binary_files; /* How to handle binary files. */
+
+static int filename_mask; /* If zero, output nulls after filenames. */
+static bool out_quiet; /* Suppress all normal output. */
+static bool out_invert; /* Print nonmatching stuff. */
+static int out_file; /* Print filenames. */
+static bool out_line; /* Print line numbers. */
+static bool out_byte; /* Print byte offsets. */
+static intmax_t out_before; /* Lines of leading context. */
+static intmax_t out_after; /* Lines of trailing context. */
+static bool count_matches; /* Count matching lines. */
+static int list_files; /* List matching files. */
+static bool no_filenames; /* Suppress file names. */
+static intmax_t max_count; /* Stop after outputting this many
+ lines from an input file. */
+static bool line_buffered; /* Use line buffering. */
+static char *label = NULL; /* Fake filename for stdin */
+
+
+/* Internal variables to keep track of byte count, context, etc. */
+static uintmax_t totalcc; /* Total character count before bufbeg. */
+static char const *lastnl; /* Pointer after last newline counted. */
+static char *lastout; /* Pointer after last character output;
+ NULL if no character has been output
+ or if it's conceptually before bufbeg. */
+static intmax_t outleft; /* Maximum number of lines to be output. */
+static intmax_t pending; /* Pending lines of output.
+ Always kept 0 if out_quiet is true. */
+static bool done_on_match; /* Stop scanning file on first match. */
+static bool exit_on_match; /* Exit on first match. */
+
+#include "dosbuf.c"
+
+static void
+nlscan (char const *lim)
+{
+ size_t newlines = 0;
+ char const *beg;
+ for (beg = lastnl; beg < lim; beg++)
+ {
+ beg = memchr (beg, eolbyte, lim - beg);
+ if (!beg)
+ break;
+ newlines++;
+ }
+ totalnl = add_count (totalnl, newlines);
+ lastnl = lim;
+}
+
+/* Print the current filename. */
+static void
+print_filename (void)
+{
+ pr_sgr_start_if (filename_color);
+ fputs_errno (filename);
+ pr_sgr_end_if (filename_color);
+}
+
+/* Print a character separator. */
+static void
+print_sep (char sep)
+{
+ pr_sgr_start_if (sep_color);
+ putchar_errno (sep);
+ pr_sgr_end_if (sep_color);
+}
+
+/* Print a line number or a byte offset. */
+static void
+print_offset (uintmax_t pos, int min_width, const char *color)
+{
+ /* Do not rely on printf to print pos, since uintmax_t may be longer
+ than long, and long long is not portable. */
+
+ char buf[sizeof pos * CHAR_BIT];
+ char *p = buf + sizeof buf;
+
+ do
+ {
+ *--p = '0' + pos % 10;
+ --min_width;
+ }
+ while ((pos /= 10) != 0);
+
+ /* Do this to maximize the probability of alignment across lines. */
+ if (align_tabs)
+ while (--min_width >= 0)
+ *--p = ' ';
+
+ pr_sgr_start_if (color);
+ fwrite_errno (p, 1, buf + sizeof buf - p);
+ pr_sgr_end_if (color);
+}
+
+/* Print a whole line head (filename, line, byte). The output data
+ starts at BEG and contains LEN bytes; it is followed by at least
+ sizeof (uword) bytes, the first of which may be temporarily modified.
+ The output data comes from what is perhaps a larger input line that
+ goes until LIM, where LIM[-1] is an end-of-line byte. Use SEP as
+ the separator on output.
+
+ Return true unless the line was suppressed due to an encoding error. */
+
+static bool
+print_line_head (char *beg, size_t len, char const *lim, char sep)
+{
+ bool encoding_errors = false;
+ if (binary_files != TEXT_BINARY_FILES)
+ {
+ char ch = beg[len];
+ encoding_errors = buf_has_encoding_errors (beg, len);
+ beg[len] = ch;
+ }
+ if (encoding_errors)
+ {
+ encoding_error_output = done_on_match = out_quiet = true;
+ return false;
+ }
+
+ bool pending_sep = false;
+
+ if (out_file)
+ {
+ print_filename ();
+ if (filename_mask)
+ pending_sep = true;
+ else
+ putchar_errno (0);
+ }
+
+ if (out_line)
+ {
+ if (lastnl < lim)
+ {
+ nlscan (beg);
+ totalnl = add_count (totalnl, 1);
+ lastnl = lim;
+ }
+ if (pending_sep)
+ print_sep (sep);
+ print_offset (totalnl, 4, line_num_color);
+ pending_sep = true;
+ }
+
+ if (out_byte)
+ {
+ uintmax_t pos = add_count (totalcc, beg - bufbeg);
+ pos = dossified_pos (pos);
+ if (pending_sep)
+ print_sep (sep);
+ print_offset (pos, 6, byte_num_color);
+ pending_sep = true;
+ }
+
+ if (pending_sep)
+ {
+ /* This assumes sep is one column wide.
+ Try doing this any other way with Unicode
+ (and its combining and wide characters)
+ filenames and you're wasting your efforts. */
+ if (align_tabs)
+ fputs_errno ("\t\b");
+
+ print_sep (sep);
+ }
+
+ return true;
+}
+
+static char *
+print_line_middle (char *beg, char *lim,
+ const char *line_color, const char *match_color)
+{
+ size_t match_size;
+ size_t match_offset;
+ char *cur;
+ char *mid = NULL;
+ char *b;
+
+ for (cur = beg;
+ (cur < lim
+ && ((match_offset = execute (beg, lim - beg, &match_size, cur))
+ != (size_t) -1));
+ cur = b + match_size)
+ {
+ b = beg + match_offset;
+
+ /* Avoid matching the empty line at the end of the buffer. */
+ if (b == lim)
+ break;
+
+ /* Avoid hanging on grep --color "" foo */
+ if (match_size == 0)
+ {
+ /* Make minimal progress; there may be further non-empty matches. */
+ /* XXX - Could really advance by one whole multi-octet character. */
+ match_size = 1;
+ if (!mid)
+ mid = cur;
+ }
+ else
+ {
+ /* This function is called on a matching line only,
+ but is it selected or rejected/context? */
+ if (only_matching)
+ {
+ char sep = out_invert ? SEP_CHAR_REJECTED : SEP_CHAR_SELECTED;
+ if (! print_line_head (b, match_size, lim, sep))
+ return NULL;
+ }
+ else
+ {
+ pr_sgr_start (line_color);
+ if (mid)
+ {
+ cur = mid;
+ mid = NULL;
+ }
+ fwrite_errno (cur, 1, b - cur);
+ }
+
+ pr_sgr_start_if (match_color);
+ fwrite_errno (b, 1, match_size);
+ pr_sgr_end_if (match_color);
+ if (only_matching)
+ putchar_errno (eolbyte);
+ }
+ }
+
+ if (only_matching)
+ cur = lim;
+ else if (mid)
+ cur = mid;
+
+ return cur;
+}
+
+static char *
+print_line_tail (char *beg, const char *lim, const char *line_color)
+{
+ size_t eol_size;
+ size_t tail_size;
+
+ eol_size = (lim > beg && lim[-1] == eolbyte);
+ eol_size += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r');
+ tail_size = lim - eol_size - beg;
+
+ if (tail_size > 0)
+ {
+ pr_sgr_start (line_color);
+ fwrite_errno (beg, 1, tail_size);
+ beg += tail_size;
+ pr_sgr_end (line_color);
+ }
+
+ return beg;
+}
+
+static void
+prline (char *beg, char *lim, char sep)
+{
+ bool matching;
+ const char *line_color;
+ const char *match_color;
+
+ if (!only_matching)
+ if (! print_line_head (beg, lim - beg - 1, lim, sep))
+ return;
+
+ matching = (sep == SEP_CHAR_SELECTED) ^ out_invert;
+
+ if (color_option)
+ {
+ line_color = (((sep == SEP_CHAR_SELECTED)
+ ^ (out_invert && (color_option < 0)))
+ ? selected_line_color : context_line_color);
+ match_color = (sep == SEP_CHAR_SELECTED
+ ? selected_match_color : context_match_color);
+ }
+ else
+ line_color = match_color = NULL; /* Shouldn't be used. */
+
+ if ((only_matching && matching)
+ || (color_option && (*line_color || *match_color)))
+ {
+ /* We already know that non-matching lines have no match (to colorize). */
+ if (matching && (only_matching || *match_color))
+ {
+ beg = print_line_middle (beg, lim, line_color, match_color);
+ if (! beg)
+ return;
+ }
+
+ if (!only_matching && *line_color)
+ {
+ /* This code is exercised at least when grep is invoked like this:
+ echo k| GREP_COLORS='sl=01;32' src/grep k --color=always */
+ beg = print_line_tail (beg, lim, line_color);
+ }
+ }
+
+ if (!only_matching && lim > beg)
+ fwrite_errno (beg, 1, lim - beg);
+
+ if (line_buffered)
+ fflush_errno ();
+
+ if (stdout_errno)
+ error (EXIT_TROUBLE, stdout_errno, _("write error"));
+
+ lastout = lim;
+}
+
+/* Print pending lines of trailing context prior to LIM. Trailing context ends
+ at the next matching line when OUTLEFT is 0. */
+static void
+prpending (char const *lim)
+{
+ if (!lastout)
+ lastout = bufbeg;
+ while (pending > 0 && lastout < lim)
+ {
+ char *nl = memchr (lastout, eolbyte, lim - lastout);
+ size_t match_size;
+ --pending;
+ if (outleft
+ || ((execute (lastout, nl + 1 - lastout,
+ &match_size, NULL) == (size_t) -1)
+ == !out_invert))
+ prline (lastout, nl + 1, SEP_CHAR_REJECTED);
+ else
+ pending = 0;
+ }
+}
+
+/* Output the lines between BEG and LIM. Deal with context. */
+static void
+prtext (char *beg, char *lim)
+{
+ static bool used; /* Avoid printing SEP_STR_GROUP before any output. */
+ char eol = eolbyte;
+
+ if (!out_quiet && pending > 0)
+ prpending (beg);
+
+ char *p = beg;
+
+ if (!out_quiet)
+ {
+ /* Deal with leading context. */
+ char const *bp = lastout ? lastout : bufbeg;
+ intmax_t i;
+ for (i = 0; i < out_before; ++i)
+ if (p > bp)
+ do
+ --p;
+ while (p[-1] != eol);
+
+ /* Print the group separator unless the output is adjacent to
+ the previous output in the file. */
+ if ((0 <= out_before || 0 <= out_after) && used
+ && p != lastout && group_separator)
+ {
+ pr_sgr_start_if (sep_color);
+ fputs_errno (group_separator);
+ pr_sgr_end_if (sep_color);
+ putchar_errno ('\n');
+ }
+
+ while (p < beg)
+ {
+ char *nl = memchr (p, eol, beg - p);
+ nl++;
+ prline (p, nl, SEP_CHAR_REJECTED);
+ p = nl;
+ }
+ }
+
+ intmax_t n;
+ if (out_invert)
+ {
+ /* One or more lines are output. */
+ for (n = 0; p < lim && n < outleft; n++)
+ {
+ char *nl = memchr (p, eol, lim - p);
+ nl++;
+ if (!out_quiet)
+ prline (p, nl, SEP_CHAR_SELECTED);
+ p = nl;
+ }
+ }
+ else
+ {
+ /* Just one line is output. */
+ if (!out_quiet)
+ prline (beg, lim, SEP_CHAR_SELECTED);
+ n = 1;
+ p = lim;
+ }
+
+ after_last_match = bufoffset - (buflim - p);
+ pending = out_quiet ? 0 : MAX (0, out_after);
+ used = true;
+ outleft -= n;
+}
+
+/* Replace all NUL bytes in buffer P (which ends at LIM) with EOL.
+ This avoids running out of memory when binary input contains a long
+ sequence of zeros, which would otherwise be considered to be part
+ of a long line. P[LIM] should be EOL. */
+static void
+zap_nuls (char *p, char *lim, char eol)
+{
+ if (eol)
+ while (true)
+ {
+ *lim = '\0';
+ p += strlen (p);
+ *lim = eol;
+ if (p == lim)
+ break;
+ do
+ *p++ = eol;
+ while (!*p);
+ }
+}
+
+/* Scan the specified portion of the buffer, matching lines (or
+ between matching lines if OUT_INVERT is true). Return a count of
+ lines printed. Replace all NUL bytes with NUL_ZAPPER as we go. */
+static intmax_t
+grepbuf (char *beg, char const *lim)
+{
+ intmax_t outleft0 = outleft;
+ char *endp;
+
+ for (char *p = beg; p < lim; p = endp)
+ {
+ size_t match_size;
+ size_t match_offset = execute (p, lim - p, &match_size, NULL);
+ if (match_offset == (size_t) -1)
+ {
+ if (!out_invert)
+ break;
+ match_offset = lim - p;
+ match_size = 0;
+ }
+ char *b = p + match_offset;
+ endp = b + match_size;
+ /* Avoid matching the empty line at the end of the buffer. */
+ if (!out_invert && b == lim)
+ break;
+ if (!out_invert || p < b)
+ {
+ char *prbeg = out_invert ? p : b;
+ char *prend = out_invert ? b : endp;
+ prtext (prbeg, prend);
+ if (!outleft || done_on_match)
+ {
+ if (exit_on_match)
+ exit (EXIT_SUCCESS);
+ break;
+ }
+ }
+ }
+
+ return outleft0 - outleft;
+}
+
+/* Search a given file. Normally, return a count of lines printed;
+ but if the file is a directory and we search it recursively, then
+ return -2 if there was a match, and -1 otherwise. */
+static intmax_t
+grep (int fd, struct stat const *st)
+{
+ intmax_t nlines, i;
+ size_t residue, save;
+ char oldc;
+ char *beg;
+ char *lim;
+ char eol = eolbyte;
+ char nul_zapper = '\0';
+ bool done_on_match_0 = done_on_match;
+ bool out_quiet_0 = out_quiet;
+
+ /* The value of NLINES when nulls were first deduced in the input;
+ this is not necessarily the same as the number of matching lines
+ before the first null. -1 if no input nulls have been deduced. */
+ intmax_t nlines_first_null = -1;
+
+ if (! reset (fd, st))
+ return 0;
+
+ totalcc = 0;
+ lastout = 0;
+ totalnl = 0;
+ outleft = max_count;
+ after_last_match = 0;
+ pending = 0;
+ skip_nuls = skip_empty_lines && !eol;
+ encoding_error_output = false;
+ seek_data_failed = false;
+
+ nlines = 0;
+ residue = 0;
+ save = 0;
+
+ if (! fillbuf (save, st))
+ {
+ suppressible_error (filename, errno);
+ return 0;
+ }
+
+ for (bool firsttime = true; ; firsttime = false)
+ {
+ if (nlines_first_null < 0 && eol && binary_files != TEXT_BINARY_FILES
+ && (buf_has_nulls (bufbeg, buflim - bufbeg)
+ || (firsttime && file_must_have_nulls (buflim - bufbeg, fd, st))))
+ {
+ if (binary_files == WITHOUT_MATCH_BINARY_FILES)
+ return 0;
+ if (!count_matches)
+ done_on_match = out_quiet = true;
+ nlines_first_null = nlines;
+ nul_zapper = eol;
+ skip_nuls = skip_empty_lines;
+ }
+
+ lastnl = bufbeg;
+ if (lastout)
+ lastout = bufbeg;
+
+ beg = bufbeg + save;
+
+ /* no more data to scan (eof) except for maybe a residue -> break */
+ if (beg == buflim)
+ break;
+
+ zap_nuls (beg, buflim, nul_zapper);
+
+ /* Determine new residue (the length of an incomplete line at the end of
+ the buffer, 0 means there is no incomplete last line). */
+ oldc = beg[-1];
+ beg[-1] = eol;
+ /* FIXME: use rawmemrchr if/when it exists, since we have ensured
+ that this use of memrchr is guaranteed never to return NULL. */
+ lim = memrchr (beg - 1, eol, buflim - beg + 1);
+ ++lim;
+ beg[-1] = oldc;
+ if (lim == beg)
+ lim = beg - residue;
+ beg -= residue;
+ residue = buflim - lim;
+
+ if (beg < lim)
+ {
+ if (outleft)
+ nlines += grepbuf (beg, lim);
+ if (pending)
+ prpending (lim);
+ if ((!outleft && !pending)
+ || (done_on_match && MAX (0, nlines_first_null) < nlines))
+ goto finish_grep;
+ }
+
+ /* The last OUT_BEFORE lines at the end of the buffer will be needed as
+ leading context if there is a matching line at the begin of the
+ next data. Make beg point to their begin. */
+ i = 0;
+ beg = lim;
+ while (i < out_before && beg > bufbeg && beg != lastout)
+ {
+ ++i;
+ do
+ --beg;
+ while (beg[-1] != eol);
+ }
+
+ /* Detect whether leading context is adjacent to previous output. */
+ if (beg != lastout)
+ lastout = 0;
+
+ /* Handle some details and read more data to scan. */
+ save = residue + lim - beg;
+ if (out_byte)
+ totalcc = add_count (totalcc, buflim - bufbeg - save);
+ if (out_line)
+ nlscan (beg);
+ if (! fillbuf (save, st))
+ {
+ suppressible_error (filename, errno);
+ goto finish_grep;
+ }
+ }
+ if (residue)
+ {
+ *buflim++ = eol;
+ if (outleft)
+ nlines += grepbuf (bufbeg + save - residue, buflim);
+ if (pending)
+ prpending (buflim);
+ }
+
+ finish_grep:
+ done_on_match = done_on_match_0;
+ out_quiet = out_quiet_0;
+ if (!out_quiet && (encoding_error_output
+ || (0 <= nlines_first_null && nlines_first_null < nlines)))
+ {
+ printf_errno (_("Binary file %s matches\n"), filename);
+ if (line_buffered)
+ fflush_errno ();
+ }
+ return nlines;
+}
+
+static bool
+grepdirent (FTS *fts, FTSENT *ent, bool command_line)
+{
+ bool follow;
+ int dirdesc;
+ command_line &= ent->fts_level == FTS_ROOTLEVEL;
+
+ if (ent->fts_info == FTS_DP)
+ {
+ if (directories == RECURSE_DIRECTORIES && command_line)
+ out_file &= ~ (2 * !no_filenames);
+ return true;
+ }
+
+ if (!command_line
+ && skipped_file (ent->fts_name, false,
+ (ent->fts_info == FTS_D || ent->fts_info == FTS_DC
+ || ent->fts_info == FTS_DNR)))
+ {
+ fts_set (fts, ent, FTS_SKIP);
+ return true;
+ }
+
+ filename = ent->fts_path;
+ if (omit_dot_slash && filename[1])
+ filename += 2;
+ follow = (fts->fts_options & FTS_LOGICAL
+ || (fts->fts_options & FTS_COMFOLLOW && command_line));
+
+ switch (ent->fts_info)
+ {
+ case FTS_D:
+ if (directories == RECURSE_DIRECTORIES)
+ {
+ out_file |= 2 * !no_filenames;
+ return true;
+ }
+ fts_set (fts, ent, FTS_SKIP);
+ break;
+
+ case FTS_DC:
+ if (!suppress_errors)
+ error (0, 0, _("warning: %s: %s"), filename,
+ _("recursive directory loop"));
+ return true;
+
+ case FTS_DNR:
+ case FTS_ERR:
+ case FTS_NS:
+ suppressible_error (filename, ent->fts_errno);
+ return true;
+
+ case FTS_DEFAULT:
+ case FTS_NSOK:
+ if (skip_devices (command_line))
+ {
+ struct stat *st = ent->fts_statp;
+ struct stat st1;
+ if (! st->st_mode)
+ {
+ /* The file type is not already known. Get the file status
+ before opening, since opening might have side effects
+ on a device. */
+ int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW;
+ if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0)
+ {
+ suppressible_error (filename, errno);
+ return true;
+ }
+ st = &st1;
+ }
+ if (is_device_mode (st->st_mode))
+ return true;
+ }
+ break;
+
+ case FTS_F:
+ case FTS_SLNONE:
+ break;
+
+ case FTS_SL:
+ case FTS_W:
+ return true;
+
+ default:
+ abort ();
+ }
+
+ dirdesc = ((fts->fts_options & (FTS_NOCHDIR | FTS_CWDFD)) == FTS_CWDFD
+ ? fts->fts_cwd_fd
+ : AT_FDCWD);
+ return grepfile (dirdesc, ent->fts_accpath, follow, command_line);
+}
+
+/* True if errno is ERR after 'open ("symlink", ... O_NOFOLLOW ...)'.
+ POSIX specifies ELOOP, but it's EMLINK on FreeBSD and EFTYPE on NetBSD. */
+static bool
+open_symlink_nofollow_error (int err)
+{
+ if (err == ELOOP || err == EMLINK)
+ return true;
+#ifdef EFTYPE
+ if (err == EFTYPE)
+ return true;
+#endif
+ return false;
+}
+
+static bool
+grepfile (int dirdesc, char const *name, bool follow, bool command_line)
+{
+ int oflag = (O_RDONLY | O_NOCTTY
+ | (follow ? 0 : O_NOFOLLOW)
+ | (skip_devices (command_line) ? O_NONBLOCK : 0));
+ int desc = openat_safer (dirdesc, name, oflag);
+ if (desc < 0)
+ {
+ if (follow || ! open_symlink_nofollow_error (errno))
+ suppressible_error (filename, errno);
+ return true;
+ }
+ return grepdesc (desc, command_line);
+}
+
+static bool
+grepdesc (int desc, bool command_line)
+{
+ intmax_t count;
+ bool status = true;
+ struct stat st;
+
+ /* Get the file status, possibly for the second time. This catches
+ a race condition if the directory entry changes after the
+ directory entry is read and before the file is opened. For
+ example, normally DESC is a directory only at the top level, but
+ there is an exception if some other process substitutes a
+ directory for a non-directory while 'grep' is running. */
+ if (fstat (desc, &st) != 0)
+ {
+ suppressible_error (filename, errno);
+ goto closeout;
+ }
+
+ if (desc != STDIN_FILENO && skip_devices (command_line)
+ && is_device_mode (st.st_mode))
+ goto closeout;
+
+ if (desc != STDIN_FILENO && command_line
+ && skipped_file (filename, true, S_ISDIR (st.st_mode) != 0))
+ goto closeout;
+
+ if (desc != STDIN_FILENO
+ && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode))
+ {
+ /* Traverse the directory starting with its full name, because
+ unfortunately fts provides no way to traverse the directory
+ starting from its file descriptor. */
+
+ FTS *fts;
+ FTSENT *ent;
+ int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW);
+ char *fts_arg[2];
+
+ /* Close DESC now, to conserve file descriptors if the race
+ condition occurs many times in a deep recursion. */
+ if (close (desc) != 0)
+ suppressible_error (filename, errno);
+
+ fts_arg[0] = (char *) filename;
+ fts_arg[1] = NULL;
+ fts = fts_open (fts_arg, opts, NULL);
+
+ if (!fts)
+ xalloc_die ();
+ while ((ent = fts_read (fts)))
+ status &= grepdirent (fts, ent, command_line);
+ if (errno)
+ suppressible_error (filename, errno);
+ if (fts_close (fts) != 0)
+ suppressible_error (filename, errno);
+ return status;
+ }
+ if (desc != STDIN_FILENO
+ && ((directories == SKIP_DIRECTORIES && S_ISDIR (st.st_mode))
+ || ((devices == SKIP_DEVICES
+ || (devices == READ_COMMAND_LINE_DEVICES && !command_line))
+ && is_device_mode (st.st_mode))))
+ goto closeout;
+
+ /* If there is a regular file on stdout and the current file refers
+ to the same i-node, we have to report the problem and skip it.
+ Otherwise when matching lines from some other input reach the
+ disk before we open this file, we can end up reading and matching
+ those lines and appending them to the file from which we're reading.
+ Then we'd have what appears to be an infinite loop that'd terminate
+ only upon filling the output file system or reaching a quota.
+ However, there is no risk of an infinite loop if grep is generating
+ no output, i.e., with --silent, --quiet, -q.
+ Similarly, with any of these:
+ --max-count=N (-m) (for N >= 2)
+ --files-with-matches (-l)
+ --files-without-match (-L)
+ there is no risk of trouble.
+ For --max-count=1, grep stops after printing the first match,
+ so there is no risk of malfunction. But even --max-count=2, with
+ input==output, while there is no risk of infloop, there is a race
+ condition that could result in "alternate" output. */
+ if (!out_quiet && list_files == 0 && 1 < max_count
+ && S_ISREG (out_stat.st_mode) && out_stat.st_ino
+ && SAME_INODE (st, out_stat))
+ {
+ if (! suppress_errors)
+ error (0, 0, _("input file %s is also the output"), quote (filename));
+ errseen = true;
+ goto closeout;
+ }
+
+#if defined SET_BINARY
+ /* Set input to binary mode. Pipes are simulated with files
+ on DOS, so this includes the case of "foo | grep bar". */
+ if (!isatty (desc))
+ SET_BINARY (desc);
+#endif
+
+ count = grep (desc, &st);
+ if (count < 0)
+ status = count + 2;
+ else
+ {
+ if (count_matches)
+ {
+ if (out_file)
+ {
+ print_filename ();
+ if (filename_mask)
+ print_sep (SEP_CHAR_SELECTED);
+ else
+ putchar_errno (0);
+ }
+ printf_errno ("%" PRIdMAX "\n", count);
+ if (line_buffered)
+ fflush_errno ();
+ }
+
+ status = !count;
+ if (list_files == 1 - 2 * status)
+ {
+ print_filename ();
+ putchar_errno ('\n' & filename_mask);
+ if (line_buffered)
+ fflush_errno ();
+ }
+
+ if (desc == STDIN_FILENO)
+ {
+ off_t required_offset = outleft ? bufoffset : after_last_match;
+ if (required_offset != bufoffset
+ && lseek (desc, required_offset, SEEK_SET) < 0
+ && S_ISREG (st.st_mode))
+ suppressible_error (filename, errno);
+ }
+ }
+
+ closeout:
+ if (desc != STDIN_FILENO && close (desc) != 0)
+ suppressible_error (filename, errno);
+ return status;
+}
+
+static bool
+grep_command_line_arg (char const *arg)
+{
+ if (STREQ (arg, "-"))
+ {
+ filename = label ? label : _("(standard input)");
+ return grepdesc (STDIN_FILENO, true);
+ }
+ else
+ {
+ filename = arg;
+ return grepfile (AT_FDCWD, arg, true, true);
+ }
+}
+
+_Noreturn void usage (int);
+void
+usage (int status)
+{
+ if (status != 0)
+ {
+ fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"),
+ program_name);
+ fprintf (stderr, _("Try '%s --help' for more information.\n"),
+ program_name);
+ }
+ else
+ {
+ printf (_("Usage: %s [OPTION]... PATTERN [FILE]...\n"), program_name);
+ printf (_("Search for PATTERN in each FILE or standard input.\n"));
+ printf (_("PATTERN is, by default, a basic regular expression (BRE).\n"));
+ printf (_("\
+Example: %s -i 'hello world' menu.h main.c\n\
+\n\
+Regexp selection and interpretation:\n"), program_name);
+ printf (_("\
+ -E, --extended-regexp PATTERN is an extended regular expression (ERE)\n\
+ -F, --fixed-strings PATTERN is a set of newline-separated strings\n\
+ -G, --basic-regexp PATTERN is a basic regular expression (BRE)\n\
+ -P, --perl-regexp PATTERN is a Perl regular expression\n"));
+ /* -X is deliberately undocumented. */
+ printf (_("\
+ -e, --regexp=PATTERN use PATTERN for matching\n\
+ -f, --file=FILE obtain PATTERN from FILE\n\
+ -i, --ignore-case ignore case distinctions\n\
+ -w, --word-regexp force PATTERN to match only whole words\n\
+ -x, --line-regexp force PATTERN to match only whole lines\n\
+ -z, --null-data a data line ends in 0 byte, not newline\n"));
+ printf (_("\
+\n\
+Miscellaneous:\n\
+ -s, --no-messages suppress error messages\n\
+ -v, --invert-match select non-matching lines\n\
+ -V, --version display version information and exit\n\
+ --help display this help text and exit\n"));
+ printf (_("\
+\n\
+Output control:\n\
+ -m, --max-count=NUM stop after NUM matches\n\
+ -b, --byte-offset print the byte offset with output lines\n\
+ -n, --line-number print line number with output lines\n\
+ --line-buffered flush output on every line\n\
+ -H, --with-filename print the file name for each match\n\
+ -h, --no-filename suppress the file name prefix on output\n\
+ --label=LABEL use LABEL as the standard input file name prefix\n\
+"));
+ printf (_("\
+ -o, --only-matching show only the part of a line matching PATTERN\n\
+ -q, --quiet, --silent suppress all normal output\n\
+ --binary-files=TYPE assume that binary files are TYPE;\n\
+ TYPE is 'binary', 'text', or 'without-match'\n\
+ -a, --text equivalent to --binary-files=text\n\
+"));
+ printf (_("\
+ -I equivalent to --binary-files=without-match\n\
+ -d, --directories=ACTION how to handle directories;\n\
+ ACTION is 'read', 'recurse', or 'skip'\n\
+ -D, --devices=ACTION how to handle devices, FIFOs and sockets;\n\
+ ACTION is 'read' or 'skip'\n\
+ -r, --recursive like --directories=recurse\n\
+ -R, --dereference-recursive likewise, but follow all symlinks\n\
+"));
+ printf (_("\
+ --include=FILE_PATTERN search only files that match FILE_PATTERN\n\
+ --exclude=FILE_PATTERN skip files and directories matching\
+ FILE_PATTERN\n\
+ --exclude-from=FILE skip files matching any file pattern from FILE\n\
+ --exclude-dir=PATTERN directories that match PATTERN will be skipped.\n\
+"));
+ printf (_("\
+ -L, --files-without-match print only names of FILEs containing no match\n\
+ -l, --files-with-matches print only names of FILEs containing matches\n\
+ -c, --count print only a count of matching lines per FILE\n\
+ -T, --initial-tab make tabs line up (if needed)\n\
+ -Z, --null print 0 byte after FILE name\n"));
+ printf (_("\
+\n\
+Context control:\n\
+ -B, --before-context=NUM print NUM lines of leading context\n\
+ -A, --after-context=NUM print NUM lines of trailing context\n\
+ -C, --context=NUM print NUM lines of output context\n\
+"));
+ printf (_("\
+ -NUM same as --context=NUM\n\
+ --color[=WHEN],\n\
+ --colour[=WHEN] use markers to highlight the matching strings;\n\
+ WHEN is 'always', 'never', or 'auto'\n\
+ -U, --binary do not strip CR characters at EOL (MSDOS/Windows)\n\
+ -u, --unix-byte-offsets report offsets as if CRs were not there\n\
+ (MSDOS/Windows)\n\
+\n"));
+ printf (_("\
+'egrep' means 'grep -E'. 'fgrep' means 'grep -F'.\n\
+Direct invocation as either 'egrep' or 'fgrep' is deprecated.\n"));
+ printf (_("\
+When FILE is -, read standard input. With no FILE, read . if a command-line\n\
+-r is given, - otherwise. If fewer than two FILEs are given, assume -h.\n\
+Exit status is 0 if any line is selected, 1 otherwise;\n\
+if any error occurs and -q is not given, the exit status is 2.\n"));
+ emit_bug_reporting_address ();
+ }
+ exit (status);
+}
+
+/* Pattern compilers and matchers. */
+
+static void
+Gcompile (char const *pattern, size_t size)
+{
+ GEAcompile (pattern, size, RE_SYNTAX_GREP);
+}
+
+static void
+Ecompile (char const *pattern, size_t size)
+{
+ GEAcompile (pattern, size, RE_SYNTAX_EGREP);
+}
+
+static void
+Acompile (char const *pattern, size_t size)
+{
+ GEAcompile (pattern, size, RE_SYNTAX_AWK);
+}
+
+static void
+GAcompile (char const *pattern, size_t size)
+{
+ GEAcompile (pattern, size, RE_SYNTAX_GNU_AWK);
+}
+
+static void
+PAcompile (char const *pattern, size_t size)
+{
+ GEAcompile (pattern, size, RE_SYNTAX_POSIX_AWK);
+}
+
+struct matcher
+{
+ char const name[16];
+ compile_fp_t compile;
+ execute_fp_t execute;
+};
+static struct matcher const matchers[] = {
+ { "grep", Gcompile, EGexecute },
+ { "egrep", Ecompile, EGexecute },
+ { "fgrep", Fcompile, Fexecute },
+ { "awk", Acompile, EGexecute },
+ { "gawk", GAcompile, EGexecute },
+ { "posixawk", PAcompile, EGexecute },
+ { "perl", Pcompile, Pexecute },
+ { "", NULL, NULL },
+};
+
+/* Set the matcher to M if available. Exit in case of conflicts or if
+ M is not available. */
+static void
+setmatcher (char const *m)
+{
+ struct matcher const *p;
+
+ if (matcher && !STREQ (matcher, m))
+ error (EXIT_TROUBLE, 0, _("conflicting matchers specified"));
+
+ for (p = matchers; p->compile; p++)
+ if (STREQ (m, p->name))
+ {
+ matcher = p->name;
+ compile = p->compile;
+ execute = p->execute;
+ return;
+ }
+
+ error (EXIT_TROUBLE, 0, _("invalid matcher %s"), m);
+}
+
+/* Find the white-space-separated options specified by OPTIONS, and
+ using BUF to store copies of these options, set ARGV[0], ARGV[1],
+ etc. to the option copies. Return the number N of options found.
+ Do not set ARGV[N] to NULL. If ARGV is NULL, do not store ARGV[0]
+ etc. Backslash can be used to escape whitespace (and backslashes). */
+static size_t
+prepend_args (char const *options, char *buf, char **argv)
+{
+ char const *o = options;
+ char *b = buf;
+ size_t n = 0;
+
+ for (;;)
+ {
+ while (c_isspace (to_uchar (*o)))
+ o++;
+ if (!*o)
+ return n;
+ if (argv)
+ argv[n] = b;
+ n++;
+
+ do
+ if ((*b++ = *o++) == '\\' && *o)
+ b[-1] = *o++;
+ while (*o && ! c_isspace (to_uchar (*o)));
+
+ *b++ = '\0';
+ }
+}
+
+/* Prepend the whitespace-separated options in OPTIONS to the argument
+ vector of a main program with argument count *PARGC and argument
+ vector *PARGV. Return the number of options prepended. */
+static int
+prepend_default_options (char const *options, int *pargc, char ***pargv)
+{
+ if (options && *options)
+ {
+ char *buf = xmalloc (strlen (options) + 1);
+ size_t prepended = prepend_args (options, buf, NULL);
+ int argc = *pargc;
+ char *const *argv = *pargv;
+ char **pp;
+ enum { MAX_ARGS = MIN (INT_MAX, SIZE_MAX / sizeof *pp - 1) };
+ if (MAX_ARGS - argc < prepended)
+ xalloc_die ();
+ pp = xmalloc ((prepended + argc + 1) * sizeof *pp);
+ *pargc = prepended + argc;
+ *pargv = pp;
+ *pp++ = *argv++;
+ pp += prepend_args (options, buf, pp);
+ while ((*pp++ = *argv++))
+ continue;
+ return prepended;
+ }
+
+ return 0;
+}
+
+/* Get the next non-digit option from ARGC and ARGV.
+ Return -1 if there are no more options.
+ Process any digit options that were encountered on the way,
+ and store the resulting integer into *DEFAULT_CONTEXT. */
+static int
+get_nondigit_option (int argc, char *const *argv, intmax_t *default_context)
+{
+ static int prev_digit_optind = -1;
+ int this_digit_optind;
+ bool was_digit;
+ char buf[INT_BUFSIZE_BOUND (intmax_t) + 4];
+ char *p = buf;
+ int opt;
+
+ was_digit = false;
+ this_digit_optind = optind;
+ while (true)
+ {
+ opt = getopt_long (argc, (char **) argv, short_options,
+ long_options, NULL);
+ if ( ! ('0' <= opt && opt <= '9'))
+ break;
+
+ if (prev_digit_optind != this_digit_optind || !was_digit)
+ {
+ /* Reset to start another context length argument. */
+ p = buf;
+ }
+ else
+ {
+ /* Suppress trivial leading zeros, to avoid incorrect
+ diagnostic on strings like 00000000000. */
+ p -= buf[0] == '0';
+ }
+
+ if (p == buf + sizeof buf - 4)
+ {
+ /* Too many digits. Append "..." to make context_length_arg
+ complain about "X...", where X contains the digits seen
+ so far. */
+ strcpy (p, "...");
+ p += 3;
+ break;
+ }
+ *p++ = opt;
+
+ was_digit = true;
+ prev_digit_optind = this_digit_optind;
+ this_digit_optind = optind;
+ }
+ if (p != buf)
+ {
+ *p = '\0';
+ context_length_arg (buf, default_context);
+ }
+
+ return opt;
+}
+
+/* Parse GREP_COLORS. The default would look like:
+ GREP_COLORS='ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36'
+ with boolean capabilities (ne and rv) unset (i.e., omitted).
+ No character escaping is needed or supported. */
+static void
+parse_grep_colors (void)
+{
+ const char *p;
+ char *q;
+ char *name;
+ char *val;
+
+ p = getenv ("GREP_COLORS"); /* Plural! */
+ if (p == NULL || *p == '\0')
+ return;
+
+ /* Work off a writable copy. */
+ q = xstrdup (p);
+
+ name = q;
+ val = NULL;
+ /* From now on, be well-formed or you're gone. */
+ for (;;)
+ if (*q == ':' || *q == '\0')
+ {
+ char c = *q;
+ struct color_cap const *cap;
+
+ *q++ = '\0'; /* Terminate name or val. */
+ /* Empty name without val (empty cap)
+ * won't match and will be ignored. */
+ for (cap = color_dict; cap->name; cap++)
+ if (STREQ (cap->name, name))
+ break;
+ /* If name unknown, go on for forward compatibility. */
+ if (cap->var && val)
+ *(cap->var) = val;
+ if (cap->fct)
+ cap->fct ();
+ if (c == '\0')
+ return;
+ name = q;
+ val = NULL;
+ }
+ else if (*q == '=')
+ {
+ if (q == name || val)
+ return;
+ *q++ = '\0'; /* Terminate name. */
+ val = q; /* Can be the empty string. */
+ }
+ else if (val == NULL)
+ q++; /* Accumulate name. */
+ else if (*q == ';' || (*q >= '0' && *q <= '9'))
+ q++; /* Accumulate val. Protect the terminal from being sent crap. */
+ else
+ return;
+}
+
+/* Return true if PAT (of length PATLEN) contains an encoding error. */
+static bool
+contains_encoding_error (char const *pat, size_t patlen)
+{
+ mbstate_t mbs = { 0 };
+ size_t i, charlen;
+
+ for (i = 0; i < patlen; i += charlen)
+ {
+ charlen = mb_clen (pat + i, patlen - i, &mbs);
+ if ((size_t) -2 <= charlen)
+ return true;
+ }
+ return false;
+}
+
+/* Change a pattern for fgrep into grep. */
+static void
+fgrep_to_grep_pattern (size_t len, char const *keys,
+ size_t *new_len, char **new_keys)
+{
+ char *p = *new_keys = xnmalloc (len + 1, 2);
+ mbstate_t mb_state = { 0 };
+ size_t n;
+
+ for (; len; keys += n, len -= n)
+ {
+ n = mb_clen (keys, len, &mb_state);
+ switch (n)
+ {
+ case (size_t) -2:
+ n = len;
+ /* Fall through. */
+ default:
+ p = mempcpy (p, keys, n);
+ break;
+
+ case (size_t) -1:
+ memset (&mb_state, 0, sizeof mb_state);
+ /* Fall through. */
+ case 1:
+ *p = '\\';
+ p += strchr ("$*.[\\^", *keys) != NULL;
+ /* Fall through. */
+ case 0:
+ *p++ = *keys;
+ n = 1;
+ break;
+ }
+ }
+
+ *new_len = p - *new_keys;
+}
+
+int
+main (int argc, char **argv)
+{
+ char *keys;
+ size_t keycc, oldcc, keyalloc;
+ bool with_filenames;
+ size_t cc;
+ int opt, prepended;
+ int prev_optind, last_recursive;
+ int fread_errno;
+ intmax_t default_context;
+ FILE *fp;
+ exit_failure = EXIT_TROUBLE;
+ initialize_main (&argc, &argv);
+ set_program_name (argv[0]);
+ program_name = argv[0];
+
+ keys = NULL;
+ keycc = 0;
+ with_filenames = false;
+ eolbyte = '\n';
+ filename_mask = ~0;
+
+ max_count = INTMAX_MAX;
+
+ /* The value -1 means to use DEFAULT_CONTEXT. */
+ out_after = out_before = -1;
+ /* Default before/after context: changed by -C/-NUM options */
+ default_context = -1;
+ /* Changed by -o option */
+ only_matching = false;
+
+ /* Internationalization. */
+#if defined HAVE_SETLOCALE
+ setlocale (LC_ALL, "");
+#endif
+#if defined ENABLE_NLS
+ bindtextdomain (PACKAGE, LOCALEDIR);
+ textdomain (PACKAGE);
+#endif
+
+ exit_failure = EXIT_TROUBLE;
+ atexit (clean_up_stdout);
+
+ last_recursive = 0;
+
+ prepended = prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv);
+ if (prepended)
+ error (0, 0, _("warning: GREP_OPTIONS is deprecated;"
+ " please use an alias or script"));
+
+ compile = matchers[0].compile;
+ execute = matchers[0].execute;
+
+ while (prev_optind = optind,
+ (opt = get_nondigit_option (argc, argv, &default_context)) != -1)
+ switch (opt)
+ {
+ case 'A':
+ context_length_arg (optarg, &out_after);
+ break;
+
+ case 'B':
+ context_length_arg (optarg, &out_before);
+ break;
+
+ case 'C':
+ /* Set output match context, but let any explicit leading or
+ trailing amount specified with -A or -B stand. */
+ context_length_arg (optarg, &default_context);
+ break;
+
+ case 'D':
+ if (STREQ (optarg, "read"))
+ devices = READ_DEVICES;
+ else if (STREQ (optarg, "skip"))
+ devices = SKIP_DEVICES;
+ else
+ error (EXIT_TROUBLE, 0, _("unknown devices method"));
+ break;
+
+ case 'E':
+ setmatcher ("egrep");
+ break;
+
+ case 'F':
+ setmatcher ("fgrep");
+ break;
+
+ case 'P':
+ setmatcher ("perl");
+ break;
+
+ case 'G':
+ setmatcher ("grep");
+ break;
+
+ case 'X': /* undocumented on purpose */
+ setmatcher (optarg);
+ break;
+
+ case 'H':
+ with_filenames = true;
+ no_filenames = false;
+ break;
+
+ case 'I':
+ binary_files = WITHOUT_MATCH_BINARY_FILES;
+ break;
+
+ case 'T':
+ align_tabs = true;
+ break;
+
+ case 'U':
+ dos_binary ();
+ break;
+
+ case 'u':
+ dos_unix_byte_offsets ();
+ break;
+
+ case 'V':
+ show_version = true;
+ break;
+
+ case 'a':
+ binary_files = TEXT_BINARY_FILES;
+ break;
+
+ case 'b':
+ out_byte = true;
+ break;
+
+ case 'c':
+ count_matches = true;
+ break;
+
+ case 'd':
+ directories = XARGMATCH ("--directories", optarg,
+ directories_args, directories_types);
+ if (directories == RECURSE_DIRECTORIES)
+ last_recursive = prev_optind;
+ break;
+
+ case 'e':
+ cc = strlen (optarg);
+ keys = xrealloc (keys, keycc + cc + 1);
+ strcpy (&keys[keycc], optarg);
+ keycc += cc;
+ keys[keycc++] = '\n';
+ break;
+
+ case 'f':
+ fp = STREQ (optarg, "-") ? stdin : fopen (optarg, O_TEXT ? "rt" : "r");
+ if (!fp)
+ error (EXIT_TROUBLE, errno, "%s", optarg);
+ for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2)
+ ;
+ keys = xrealloc (keys, keyalloc);
+ oldcc = keycc;
+ while ((cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) != 0)
+ {
+ keycc += cc;
+ if (keycc == keyalloc - 1)
+ keys = x2nrealloc (keys, &keyalloc, sizeof *keys);
+ }
+ fread_errno = errno;
+ if (ferror (fp))
+ error (EXIT_TROUBLE, fread_errno, "%s", optarg);
+ if (fp != stdin)
+ fclose (fp);
+ /* Append final newline if file ended in non-newline. */
+ if (oldcc != keycc && keys[keycc - 1] != '\n')
+ keys[keycc++] = '\n';
+ break;
+
+ case 'h':
+ with_filenames = false;
+ no_filenames = true;
+ break;
+
+ case 'i':
+ case 'y': /* For old-timers . . . */
+ match_icase = true;
+ break;
+
+ case 'L':
+ /* Like -l, except list files that don't contain matches.
+ Inspired by the same option in Hume's gre. */
+ list_files = -1;
+ break;
+
+ case 'l':
+ list_files = 1;
+ break;
+
+ case 'm':
+ switch (xstrtoimax (optarg, 0, 10, &max_count, ""))
+ {
+ case LONGINT_OK:
+ case LONGINT_OVERFLOW:
+ break;
+
+ default:
+ error (EXIT_TROUBLE, 0, _("invalid max count"));
+ }
+ break;
+
+ case 'n':
+ out_line = true;
+ break;
+
+ case 'o':
+ only_matching = true;
+ break;
+
+ case 'q':
+ exit_on_match = true;
+ exit_failure = 0;
+ break;
+
+ case 'R':
+ fts_options = basic_fts_options | FTS_LOGICAL;
+ /* Fall through. */
+ case 'r':
+ directories = RECURSE_DIRECTORIES;
+ last_recursive = prev_optind;
+ break;
+
+ case 's':
+ suppress_errors = true;
+ break;
+
+ case 'v':
+ out_invert = true;
+ break;
+
+ case 'w':
+ match_words = true;
+ break;
+
+ case 'x':
+ match_lines = true;
+ break;
+
+ case 'Z':
+ filename_mask = 0;
+ break;
+
+ case 'z':
+ eolbyte = '\0';
+ break;
+
+ case BINARY_FILES_OPTION:
+ if (STREQ (optarg, "binary"))
+ binary_files = BINARY_BINARY_FILES;
+ else if (STREQ (optarg, "text"))
+ binary_files = TEXT_BINARY_FILES;
+ else if (STREQ (optarg, "without-match"))
+ binary_files = WITHOUT_MATCH_BINARY_FILES;
+ else
+ error (EXIT_TROUBLE, 0, _("unknown binary-files type"));
+ break;
+
+ case COLOR_OPTION:
+ if (optarg)
+ {
+ if (!strcasecmp (optarg, "always") || !strcasecmp (optarg, "yes")
+ || !strcasecmp (optarg, "force"))
+ color_option = 1;
+ else if (!strcasecmp (optarg, "never") || !strcasecmp (optarg, "no")
+ || !strcasecmp (optarg, "none"))
+ color_option = 0;
+ else if (!strcasecmp (optarg, "auto") || !strcasecmp (optarg, "tty")
+ || !strcasecmp (optarg, "if-tty"))
+ color_option = 2;
+ else
+ show_help = 1;
+ }
+ else
+ color_option = 2;
+ break;
+
+ case EXCLUDE_OPTION:
+ case INCLUDE_OPTION:
+ for (int cmd = 0; cmd < 2; cmd++)
+ {
+ if (!excluded_patterns[cmd])
+ excluded_patterns[cmd] = new_exclude ();
+ add_exclude (excluded_patterns[cmd], optarg,
+ ((opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0)
+ | exclude_options (cmd)));
+ }
+ break;
+ case EXCLUDE_FROM_OPTION:
+ for (int cmd = 0; cmd < 2; cmd++)
+ {
+ if (!excluded_patterns[cmd])
+ excluded_patterns[cmd] = new_exclude ();
+ if (add_exclude_file (add_exclude, excluded_patterns[cmd],
+ optarg, exclude_options (cmd), '\n')
+ != 0)
+ error (EXIT_TROUBLE, errno, "%s", optarg);
+ }
+ break;
+
+ case EXCLUDE_DIRECTORY_OPTION:
+ strip_trailing_slashes (optarg);
+ for (int cmd = 0; cmd < 2; cmd++)
+ {
+ if (!excluded_directory_patterns[cmd])
+ excluded_directory_patterns[cmd] = new_exclude ();
+ add_exclude (excluded_directory_patterns[cmd], optarg,
+ exclude_options (cmd));
+ }
+ break;
+
+ case GROUP_SEPARATOR_OPTION:
+ group_separator = optarg;
+ break;
+
+ case LINE_BUFFERED_OPTION:
+ line_buffered = true;
+ break;
+
+ case LABEL_OPTION:
+ label = optarg;
+ break;
+
+ case 0:
+ /* long options */
+ break;
+
+ default:
+ usage (EXIT_TROUBLE);
+ break;
+
+ }
+
+ if (color_option == 2)
+ color_option = isatty (STDOUT_FILENO) && should_colorize ();
+ init_colorize ();
+
+ /* POSIX says that -q overrides -l, which in turn overrides the
+ other output options. */
+ if (exit_on_match)
+ list_files = 0;
+ if (exit_on_match | list_files)
+ {
+ count_matches = false;
+ done_on_match = true;
+ }
+ out_quiet = count_matches | done_on_match;
+
+ if (out_after < 0)
+ out_after = default_context;
+ if (out_before < 0)
+ out_before = default_context;
+
+ if (color_option)
+ {
+ /* Legacy. */
+ char *userval = getenv ("GREP_COLOR");
+ if (userval != NULL && *userval != '\0')
+ selected_match_color = context_match_color = userval;
+
+ /* New GREP_COLORS has priority. */
+ parse_grep_colors ();
+ }
+
+ if (show_version)
+ {
+ version_etc (stdout, program_name, PACKAGE_NAME, VERSION, AUTHORS,
+ (char *) NULL);
+ return EXIT_SUCCESS;
+ }
+
+ if (show_help)
+ usage (EXIT_SUCCESS);
+
+ struct stat tmp_stat;
+ if (fstat (STDOUT_FILENO, &tmp_stat) == 0 && S_ISREG (tmp_stat.st_mode))
+ out_stat = tmp_stat;
+
+ if (keys)
+ {
+ if (keycc == 0)
+ {
+ /* No keys were specified (e.g. -f /dev/null). Match nothing. */
+ out_invert ^= true;
+ match_lines = match_words = false;
+ }
+ else
+ /* Strip trailing newline. */
+ --keycc;
+ }
+ else if (optind < argc)
+ {
+ /* A copy must be made in case of an xrealloc() or free() later. */
+ keycc = strlen (argv[optind]);
+ keys = xmemdup (argv[optind++], keycc + 1);
+ }
+ else
+ usage (EXIT_TROUBLE);
+
+ build_mbclen_cache ();
+ initialize_unibyte_mask ();
+
+ /* In a unibyte locale, switch from fgrep to grep if
+ the pattern matches words (where grep is typically faster).
+ In a multibyte locale, switch from fgrep to grep if either
+ (1) case is ignored (where grep is typically faster), or
+ (2) the pattern has an encoding error (where fgrep might not work). */
+ if (compile == Fcompile
+ && (MB_CUR_MAX <= 1
+ ? match_words
+ : match_icase || contains_encoding_error (keys, keycc)))
+ {
+ size_t new_keycc;
+ char *new_keys;
+ fgrep_to_grep_pattern (keycc, keys, &new_keycc, &new_keys);
+ free (keys);
+ keys = new_keys;
+ keycc = new_keycc;
+ matcher = "grep";
+ compile = Gcompile;
+ execute = EGexecute;
+ }
+
+ compile (keys, keycc);
+ free (keys);
+ /* We need one byte prior and one after. */
+ char eolbytes[3] = { 0, eolbyte, 0 };
+ size_t match_size;
+ skip_empty_lines = ((execute (eolbytes + 1, 1, &match_size, NULL) == 0)
+ == out_invert);
+
+ if ((argc - optind > 1 && !no_filenames) || with_filenames)
+ out_file = 1;
+
+#ifdef SET_BINARY
+ /* Output is set to binary mode because we shouldn't convert
+ NL to CR-LF pairs, especially when grepping binary files. */
+ if (!isatty (STDOUT_FILENO))
+ SET_BINARY (STDOUT_FILENO);
+#endif
+
+ if (max_count == 0)
+ return EXIT_FAILURE;
+
+ if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES)
+ devices = READ_DEVICES;
+
+ char *const *files;
+ if (optind < argc)
+ {
+ files = argv + optind;
+ }
+ else if (directories == RECURSE_DIRECTORIES && prepended < last_recursive)
+ {
+ static char *const cwd_only[] = { (char *) ".", NULL };
+ files = cwd_only;
+ omit_dot_slash = true;
+ }
+ else
+ {
+ static char *const stdin_only[] = { (char *) "-", NULL };
+ files = stdin_only;
+ }
+
+ bool status = true;
+ do
+ status &= grep_command_line_arg (*files++);
+ while (*files != NULL);
+
+ /* We register via atexit() to test stdout. */
+ return errseen ? EXIT_TROUBLE : status;
+}
diff --git a/src/grep.h b/src/grep.h
new file mode 100644
index 0000000..75b7ef7
--- /dev/null
+++ b/src/grep.h
@@ -0,0 +1,34 @@
+/* grep.h - interface to grep driver for searching subroutines.
+ Copyright (C) 1992, 1998, 2001, 2007, 2009-2016 Free Software Foundation,
+ Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+#ifndef GREP_GREP_H
+#define GREP_GREP_H 1
+
+#include <stdbool.h>
+
+/* The following flags are exported from grep for the matchers
+ to look at. */
+extern bool match_icase; /* -i */
+extern bool match_words; /* -w */
+extern bool match_lines; /* -x */
+extern char eolbyte; /* -z */
+
+extern bool buf_has_encoding_errors (char *, size_t);
+
+#endif
diff --git a/src/kwsearch.c b/src/kwsearch.c
new file mode 100644
index 0000000..e9966d4
--- /dev/null
+++ b/src/kwsearch.c
@@ -0,0 +1,165 @@
+/* kwsearch.c - searching subroutines using kwset for grep.
+ Copyright 1992, 1998, 2000, 2007, 2009-2016 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* Written August 1992 by Mike Haertel. */
+
+#include <config.h>
+#include "search.h"
+
+/* Whether -w considers WC to be a word constituent. */
+static bool
+wordchar (wint_t wc)
+{
+ return wc == L'_' || iswalnum (wc);
+}
+
+/* KWset compiled pattern. For Ecompile and Gcompile, we compile
+ a list of strings, at least one of which is known to occur in
+ any string matching the regexp. */
+static kwset_t kwset;
+
+void
+Fcompile (char const *pattern, size_t size)
+{
+ size_t total = size;
+
+ kwsinit (&kwset);
+
+ char const *p = pattern;
+ do
+ {
+ size_t len;
+ char const *sep = memchr (p, '\n', total);
+ if (sep)
+ {
+ len = sep - p;
+ sep++;
+ total -= (len + 1);
+ }
+ else
+ {
+ len = total;
+ total = 0;
+ }
+
+ char *buf = NULL;
+ if (match_lines)
+ {
+ buf = xmalloc (len + 2);
+ buf[0] = eolbyte;
+ memcpy (buf + 1, p, len);
+ buf[len + 1] = eolbyte;
+ p = buf;
+ len += 2;
+ }
+ kwsincr (kwset, p, len);
+ free (buf);
+
+ p = sep;
+ }
+ while (p);
+
+ kwsprep (kwset);
+}
+
+size_t
+Fexecute (char *buf, size_t size, size_t *match_size,
+ char const *start_ptr)
+{
+ char const *beg, *try, *end, *mb_start;
+ size_t len;
+ char eol = eolbyte;
+ struct kwsmatch kwsmatch;
+ size_t ret_val;
+
+ for (mb_start = beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++)
+ {
+ size_t offset = kwsexec (kwset, beg - match_lines,
+ buf + size - beg + match_lines, &kwsmatch);
+ if (offset == (size_t) -1)
+ goto failure;
+ len = kwsmatch.size[0] - 2 * match_lines;
+ if (!match_lines && MB_CUR_MAX > 1 && !using_utf8 ()
+ && mb_goback (&mb_start, beg + offset, buf + size) != 0)
+ {
+ /* We have matched a single byte that is not at the beginning of a
+ multibyte character. mb_goback has advanced MB_START past that
+ multibyte character. Now, we want to position BEG so that the
+ next kwsexec search starts there. Thus, to compensate for the
+ for-loop's BEG++, above, subtract one here. This code is
+ unusually hard to reach, and exceptionally, let's show how to
+ trigger it here:
+
+ printf '\203AA\n'|LC_ALL=ja_JP.SHIFT_JIS src/grep -F A
+
+ That assumes the named locale is installed.
+ Note that your system's shift-JIS locale may have a different
+ name, possibly including "sjis". */
+ beg = mb_start - 1;
+ continue;
+ }
+ beg += offset;
+ if (start_ptr && !match_words)
+ goto success_in_beg_and_len;
+ if (match_lines)
+ {
+ len += start_ptr == NULL;
+ goto success_in_beg_and_len;
+ }
+ if (match_words)
+ for (try = beg; ; )
+ {
+ char const *bol = memrchr (buf, eol, beg - buf);
+ bol = bol ? bol + 1 : buf;
+ if (wordchar (mb_prev_wc (bol, try, buf + size)))
+ break;
+ if (wordchar (mb_next_wc (try + len, buf + size)))
+ {
+ if (!len)
+ break;
+ offset = kwsexec (kwset, beg, --len, &kwsmatch);
+ if (offset == (size_t) -1)
+ break;
+ try = beg + offset;
+ len = kwsmatch.size[0];
+ }
+ else if (!start_ptr)
+ goto success;
+ else
+ goto success_in_beg_and_len;
+ } /* for (try) */
+ else
+ goto success;
+ } /* for (beg in buf) */
+
+ failure:
+ return -1;
+
+ success:
+ end = memchr (beg + len, eol, (buf + size) - (beg + len));
+ end = end ? end + 1 : buf + size;
+ beg = memrchr (buf, eol, beg - buf);
+ beg = beg ? beg + 1 : buf;
+ len = end - beg;
+ success_in_beg_and_len:;
+ size_t off = beg - buf;
+
+ *match_size = len;
+ ret_val = off;
+ return ret_val;
+}
diff --git a/src/kwset.c b/src/kwset.c
new file mode 100644
index 0000000..03520b6
--- /dev/null
+++ b/src/kwset.c
@@ -0,0 +1,868 @@
+/* kwset.c - search for any of a set of keywords.
+ Copyright (C) 1989, 1998, 2000, 2005, 2007, 2009-2016 Free Software
+ Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* Written August 1989 by Mike Haertel.
+ The author may be reached (Email) at the address mike@ai.mit.edu,
+ or (US mail) as Mike Haertel c/o Free Software Foundation. */
+
+/* The algorithm implemented by these routines bears a startling resemblance
+ to one discovered by Beate Commentz-Walter, although it is not identical.
+ See: Commentz-Walter B. A string matching algorithm fast on the average.
+ Lecture Notes in Computer Science 71 (1979), 118-32
+ <http://dx.doi.org/10.1007/3-540-09510-1_10>.
+ See also: Aho AV, Corasick MJ. Efficient string matching: an aid to
+ bibliographic search. CACM 18, 6 (1975), 333-40
+ <http://dx.doi.org/10.1145/360825.360855>, which describes the
+ failure function used below. */
+
+#include <config.h>
+
+#include "kwset.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include "system.h"
+#include "memchr2.h"
+#include "obstack.h"
+#include "xalloc.h"
+
+#define link kwset_link
+
+#ifdef GREP
+# include "xalloc.h"
+# undef malloc
+# define malloc xmalloc
+#endif
+
+#define NCHAR (UCHAR_MAX + 1)
+#define obstack_chunk_alloc malloc
+#define obstack_chunk_free free
+
+#define U(c) (to_uchar (c))
+
+/* Balanced tree of edges and labels leaving a given trie node. */
+struct tree
+{
+ struct tree *llink; /* Left link; MUST be first field. */
+ struct tree *rlink; /* Right link (to larger labels). */
+ struct trie *trie; /* Trie node pointed to by this edge. */
+ unsigned char label; /* Label on this edge. */
+ char balance; /* Difference in depths of subtrees. */
+};
+
+/* Node of a trie representing a set of reversed keywords. */
+struct trie
+{
+ size_t accepting; /* Word index of accepted word, or zero. */
+ struct tree *links; /* Tree of edges leaving this node. */
+ struct trie *parent; /* Parent of this node. */
+ struct trie *next; /* List of all trie nodes in level order. */
+ struct trie *fail; /* Aho-Corasick failure function. */
+ int depth; /* Depth of this node from the root. */
+ int shift; /* Shift function for search failures. */
+ int maxshift; /* Max shift of self and descendants. */
+};
+
+/* Structure returned opaquely to the caller, containing everything. */
+struct kwset
+{
+ struct obstack obstack; /* Obstack for node allocation. */
+ ptrdiff_t words; /* Number of words in the trie. */
+ struct trie *trie; /* The trie itself. */
+ int mind; /* Minimum depth of an accepting node. */
+ int maxd; /* Maximum depth of any node. */
+ unsigned char delta[NCHAR]; /* Delta table for rapid search. */
+ struct trie *next[NCHAR]; /* Table of children of the root. */
+ char *target; /* Target string if there's only one. */
+ int *shift; /* Used in Boyer-Moore search for one string. */
+ char const *trans; /* Character translation table. */
+
+ /* If there's only one string, this is the string's last byte,
+ translated via TRANS if TRANS is nonnull. */
+ char gc1;
+
+ /* Likewise for the string's penultimate byte, if it has two or more
+ bytes. */
+ char gc2;
+
+ /* If there's only one string, this helps to match the string's last byte.
+ If GC1HELP is negative, only GC1 matches the string's last byte;
+ otherwise at least two bytes match, and B matches if TRANS[B] == GC1.
+ If GC1HELP is in the range 0..(NCHAR - 1), there are exactly two
+ such matches, and GC1HELP is the other match after conversion to
+ unsigned char. If GC1HELP is at least NCHAR, there are three or
+ more such matches; e.g., Greek has three sigma characters that
+ all match when case-folding. */
+ int gc1help;
+};
+
+/* Use TRANS to transliterate C. A null TRANS does no transliteration. */
+static inline char
+tr (char const *trans, char c)
+{
+ return trans ? trans[U(c)] : c;
+}
+
+/* Allocate and initialize a keyword set object, returning an opaque
+ pointer to it. */
+kwset_t
+kwsalloc (char const *trans)
+{
+ struct kwset *kwset = xmalloc (sizeof *kwset);
+
+ obstack_init (&kwset->obstack);
+ kwset->words = 0;
+ kwset->trie = obstack_alloc (&kwset->obstack, sizeof *kwset->trie);
+ kwset->trie->accepting = 0;
+ kwset->trie->links = NULL;
+ kwset->trie->parent = NULL;
+ kwset->trie->next = NULL;
+ kwset->trie->fail = NULL;
+ kwset->trie->depth = 0;
+ kwset->trie->shift = 0;
+ kwset->mind = INT_MAX;
+ kwset->maxd = -1;
+ kwset->target = NULL;
+ kwset->trans = trans;
+
+ return kwset;
+}
+
+/* This upper bound is valid for CHAR_BIT >= 4 and
+ exact for CHAR_BIT in { 4..11, 13, 15, 17, 19 }. */
+#define DEPTH_SIZE (CHAR_BIT + CHAR_BIT/2)
+
+/* Add the given string to the contents of the keyword set. */
+void
+kwsincr (kwset_t kwset, char const *text, size_t len)
+{
+ struct trie *trie = kwset->trie;
+ char const *trans = kwset->trans;
+
+ text += len;
+
+ /* Descend the trie (built of reversed keywords) character-by-character,
+ installing new nodes when necessary. */
+ while (len--)
+ {
+ unsigned char uc = *--text;
+ unsigned char label = trans ? trans[uc] : uc;
+
+ /* Descend the tree of outgoing links for this trie node,
+ looking for the current character and keeping track
+ of the path followed. */
+ struct tree *link = trie->links;
+ struct tree *links[DEPTH_SIZE];
+ enum { L, R } dirs[DEPTH_SIZE];
+ links[0] = (struct tree *) &trie->links;
+ dirs[0] = L;
+ int depth = 1;
+
+ while (link && label != link->label)
+ {
+ links[depth] = link;
+ if (label < link->label)
+ dirs[depth++] = L, link = link->llink;
+ else
+ dirs[depth++] = R, link = link->rlink;
+ }
+
+ /* The current character doesn't have an outgoing link at
+ this trie node, so build a new trie node and install
+ a link in the current trie node's tree. */
+ if (!link)
+ {
+ link = obstack_alloc (&kwset->obstack, sizeof *link);
+ link->llink = NULL;
+ link->rlink = NULL;
+ link->trie = obstack_alloc (&kwset->obstack, sizeof *link->trie);
+ link->trie->accepting = 0;
+ link->trie->links = NULL;
+ link->trie->parent = trie;
+ link->trie->next = NULL;
+ link->trie->fail = NULL;
+ link->trie->depth = trie->depth + 1;
+ link->trie->shift = 0;
+ link->label = label;
+ link->balance = 0;
+
+ /* Install the new tree node in its parent. */
+ if (dirs[--depth] == L)
+ links[depth]->llink = link;
+ else
+ links[depth]->rlink = link;
+
+ /* Back up the tree fixing the balance flags. */
+ while (depth && !links[depth]->balance)
+ {
+ if (dirs[depth] == L)
+ --links[depth]->balance;
+ else
+ ++links[depth]->balance;
+ --depth;
+ }
+
+ /* Rebalance the tree by pointer rotations if necessary. */
+ if (depth && ((dirs[depth] == L && --links[depth]->balance)
+ || (dirs[depth] == R && ++links[depth]->balance)))
+ {
+ struct tree *t, *r, *l, *rl, *lr;
+
+ switch (links[depth]->balance)
+ {
+ case (char) -2:
+ switch (dirs[depth + 1])
+ {
+ case L:
+ r = links[depth], t = r->llink, rl = t->rlink;
+ t->rlink = r, r->llink = rl;
+ t->balance = r->balance = 0;
+ break;
+ case R:
+ r = links[depth], l = r->llink, t = l->rlink;
+ rl = t->rlink, lr = t->llink;
+ t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
+ l->balance = t->balance != 1 ? 0 : -1;
+ r->balance = t->balance != (char) -1 ? 0 : 1;
+ t->balance = 0;
+ break;
+ default:
+ abort ();
+ }
+ break;
+ case 2:
+ switch (dirs[depth + 1])
+ {
+ case R:
+ l = links[depth], t = l->rlink, lr = t->llink;
+ t->llink = l, l->rlink = lr;
+ t->balance = l->balance = 0;
+ break;
+ case L:
+ l = links[depth], r = l->rlink, t = r->llink;
+ lr = t->llink, rl = t->rlink;
+ t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
+ l->balance = t->balance != 1 ? 0 : -1;
+ r->balance = t->balance != (char) -1 ? 0 : 1;
+ t->balance = 0;
+ break;
+ default:
+ abort ();
+ }
+ break;
+ default:
+ abort ();
+ }
+
+ if (dirs[depth - 1] == L)
+ links[depth - 1]->llink = t;
+ else
+ links[depth - 1]->rlink = t;
+ }
+ }
+
+ trie = link->trie;
+ }
+
+ /* Mark the node we finally reached as accepting, encoding the
+ index number of this word in the keyword set so far. */
+ if (!trie->accepting)
+ trie->accepting = 1 + 2 * kwset->words;
+ ++kwset->words;
+
+ /* Keep track of the longest and shortest string of the keyword set. */
+ if (trie->depth < kwset->mind)
+ kwset->mind = trie->depth;
+ if (trie->depth > kwset->maxd)
+ kwset->maxd = trie->depth;
+}
+
+/* Enqueue the trie nodes referenced from the given tree in the
+ given queue. */
+static void
+enqueue (struct tree *tree, struct trie **last)
+{
+ if (!tree)
+ return;
+ enqueue(tree->llink, last);
+ enqueue(tree->rlink, last);
+ (*last) = (*last)->next = tree->trie;
+}
+
+/* Compute the Aho-Corasick failure function for the trie nodes referenced
+ from the given tree, given the failure function for their parent as
+ well as a last resort failure node. */
+static void
+treefails (struct tree const *tree, struct trie const *fail,
+ struct trie *recourse)
+{
+ struct tree *link;
+
+ if (!tree)
+ return;
+
+ treefails(tree->llink, fail, recourse);
+ treefails(tree->rlink, fail, recourse);
+
+ /* Find, in the chain of fails going back to the root, the first
+ node that has a descendant on the current label. */
+ while (fail)
+ {
+ link = fail->links;
+ while (link && tree->label != link->label)
+ if (tree->label < link->label)
+ link = link->llink;
+ else
+ link = link->rlink;
+ if (link)
+ {
+ tree->trie->fail = link->trie;
+ return;
+ }
+ fail = fail->fail;
+ }
+
+ tree->trie->fail = recourse;
+}
+
+/* Set delta entries for the links of the given tree such that
+ the preexisting delta value is larger than the current depth. */
+static void
+treedelta (struct tree const *tree,
+ unsigned int depth,
+ unsigned char delta[])
+{
+ if (!tree)
+ return;
+ treedelta(tree->llink, depth, delta);
+ treedelta(tree->rlink, depth, delta);
+ if (depth < delta[tree->label])
+ delta[tree->label] = depth;
+}
+
+/* Return true if A has every label in B. */
+static int _GL_ATTRIBUTE_PURE
+hasevery (struct tree const *a, struct tree const *b)
+{
+ if (!b)
+ return 1;
+ if (!hasevery(a, b->llink))
+ return 0;
+ if (!hasevery(a, b->rlink))
+ return 0;
+ while (a && b->label != a->label)
+ if (b->label < a->label)
+ a = a->llink;
+ else
+ a = a->rlink;
+ return !!a;
+}
+
+/* Compute a vector, indexed by character code, of the trie nodes
+ referenced from the given tree. */
+static void
+treenext (struct tree const *tree, struct trie *next[])
+{
+ if (!tree)
+ return;
+ treenext(tree->llink, next);
+ treenext(tree->rlink, next);
+ next[tree->label] = tree->trie;
+}
+
+/* Compute the shift for each trie node, as well as the delta
+ table and next cache for the given keyword set. */
+void
+kwsprep (kwset_t kwset)
+{
+ char const *trans = kwset->trans;
+ int i;
+ unsigned char deltabuf[NCHAR];
+ unsigned char *delta = trans ? deltabuf : kwset->delta;
+
+ /* Initial values for the delta table; will be changed later. The
+ delta entry for a given character is the smallest depth of any
+ node at which an outgoing edge is labeled by that character. */
+ memset (delta, MIN (kwset->mind, UCHAR_MAX), sizeof deltabuf);
+
+ /* Traverse the nodes of the trie in level order, simultaneously
+ computing the delta table, failure function, and shift function. */
+ struct trie *curr, *last;
+ for (curr = last = kwset->trie; curr; curr = curr->next)
+ {
+ /* Enqueue the immediate descendants in the level order queue. */
+ enqueue (curr->links, &last);
+
+ curr->shift = kwset->mind;
+ curr->maxshift = kwset->mind;
+
+ /* Update the delta table for the descendants of this node. */
+ treedelta (curr->links, curr->depth, delta);
+
+ /* Compute the failure function for the descendants of this node. */
+ treefails (curr->links, curr->fail, kwset->trie);
+
+ /* Update the shifts at each node in the current node's chain
+ of fails back to the root. */
+ struct trie *fail;
+ for (fail = curr->fail; fail; fail = fail->fail)
+ {
+ /* If the current node has some outgoing edge that the fail
+ doesn't, then the shift at the fail should be no larger
+ than the difference of their depths. */
+ if (!hasevery (fail->links, curr->links))
+ if (curr->depth - fail->depth < fail->shift)
+ fail->shift = curr->depth - fail->depth;
+
+ /* If the current node is accepting then the shift at the
+ fail and its descendants should be no larger than the
+ difference of their depths. */
+ if (curr->accepting && fail->maxshift > curr->depth - fail->depth)
+ fail->maxshift = curr->depth - fail->depth;
+ }
+ }
+
+ /* Traverse the trie in level order again, fixing up all nodes whose
+ shift exceeds their inherited maxshift. */
+ for (curr = kwset->trie->next; curr; curr = curr->next)
+ {
+ if (curr->maxshift > curr->parent->maxshift)
+ curr->maxshift = curr->parent->maxshift;
+ if (curr->shift > curr->maxshift)
+ curr->shift = curr->maxshift;
+ }
+
+ /* Create a vector, indexed by character code, of the outgoing links
+ from the root node. */
+ struct trie *nextbuf[NCHAR];
+ struct trie **next = trans ? nextbuf : kwset->next;
+ memset (next, 0, sizeof nextbuf);
+ treenext (kwset->trie->links, next);
+ if (trans)
+ for (i = 0; i < NCHAR; ++i)
+ kwset->next[i] = next[U(trans[i])];
+
+ /* Check if we can use the simple boyer-moore algorithm, instead
+ of the hairy commentz-walter algorithm. */
+ if (kwset->words == 1)
+ {
+ /* Looking for just one string. Extract it from the trie. */
+ kwset->target = obstack_alloc (&kwset->obstack, kwset->mind);
+ for (i = kwset->mind - 1, curr = kwset->trie; i >= 0; --i)
+ {
+ kwset->target[i] = curr->links->label;
+ curr = curr->next;
+ }
+ /* Looking for the delta2 shift that we might make after a
+ backwards match has failed. Extract it from the trie. */
+ if (kwset->mind > 1)
+ {
+ kwset->shift
+ = obstack_alloc (&kwset->obstack,
+ sizeof *kwset->shift * (kwset->mind - 1));
+ for (i = 0, curr = kwset->trie->next; i < kwset->mind - 1; ++i)
+ {
+ kwset->shift[i] = curr->shift;
+ curr = curr->next;
+ }
+ }
+
+ char gc1 = tr (trans, kwset->target[kwset->mind - 1]);
+
+ /* Set GC1HELP according to whether exactly one, exactly two, or
+ three-or-more characters match GC1. */
+ int gc1help = -1;
+ if (trans)
+ {
+ char const *equiv1 = memchr (trans, gc1, NCHAR);
+ char const *equiv2 = memchr (equiv1 + 1, gc1,
+ trans + NCHAR - (equiv1 + 1));
+ if (equiv2)
+ gc1help = (memchr (equiv2 + 1, gc1, trans + NCHAR - (equiv2 + 1))
+ ? NCHAR
+ : U(gc1) ^ (equiv1 - trans) ^ (equiv2 - trans));
+ }
+
+ kwset->gc1 = gc1;
+ kwset->gc1help = gc1help;
+ if (kwset->mind > 1)
+ kwset->gc2 = tr (trans, kwset->target[kwset->mind - 2]);
+ }
+
+ /* Fix things up for any translation table. */
+ if (trans)
+ for (i = 0; i < NCHAR; ++i)
+ kwset->delta[i] = delta[U(trans[i])];
+}
+
+/* Delta2 portion of a Boyer-Moore search. *TP is the string text
+ pointer; it is updated in place. EP is the end of the string text,
+ and SP the end of the pattern. LEN is the pattern length; it must
+ be at least 2. TRANS, if nonnull, is the input translation table.
+ GC1 and GC2 are the last and second-from last bytes of the pattern,
+ transliterated by TRANS; the caller precomputes them for
+ efficiency. If D1 is nonnull, it is a delta1 table for shifting *TP
+ when failing. KWSET->shift says how much to shift. */
+static inline bool
+bm_delta2_search (char const **tpp, char const *ep, char const *sp, int len,
+ char const *trans, char gc1, char gc2,
+ unsigned char const *d1, kwset_t kwset)
+{
+ char const *tp = *tpp;
+ int d = len, skip = 0;
+
+ while (true)
+ {
+ int i = 2;
+ if (tr (trans, tp[-2]) == gc2)
+ {
+ while (++i <= d)
+ if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
+ break;
+ if (i > d)
+ {
+ for (i = d + skip + 1; i <= len; ++i)
+ if (tr (trans, tp[-i]) != tr (trans, sp[-i]))
+ break;
+ if (i > len)
+ {
+ *tpp = tp - len;
+ return true;
+ }
+ }
+ }
+
+ tp += d = kwset->shift[i - 2];
+ if (tp > ep)
+ break;
+ if (tr (trans, tp[-1]) != gc1)
+ {
+ if (d1)
+ tp += d1[U(tp[-1])];
+ break;
+ }
+ skip = i - 1;
+ }
+
+ *tpp = tp;
+ return false;
+}
+
+/* Return the address of the first byte in the buffer S (of size N)
+ that matches the last byte specified by KWSET, a singleton. */
+static char const *
+memchr_kwset (char const *s, size_t n, kwset_t kwset)
+{
+ if (kwset->gc1help < 0)
+ return memchr (s, kwset->gc1, n);
+ int small_heuristic = 2;
+ int small = (- (uintptr_t) s % sizeof (long)
+ + small_heuristic * sizeof (long));
+ size_t ntrans = kwset->gc1help < NCHAR && small < n ? small : n;
+ char const *slim = s + ntrans;
+ for (; s < slim; s++)
+ if (kwset->trans[U(*s)] == kwset->gc1)
+ return s;
+ n -= ntrans;
+ return n == 0 ? NULL : memchr2 (s, kwset->gc1, kwset->gc1help, n);
+}
+
+/* Fast Boyer-Moore search (inlinable version). */
+static inline size_t _GL_ATTRIBUTE_PURE
+bmexec_trans (kwset_t kwset, char const *text, size_t size)
+{
+ unsigned char const *d1;
+ char const *ep, *sp, *tp;
+ int d;
+ int len = kwset->mind;
+ char const *trans = kwset->trans;
+
+ if (len == 0)
+ return 0;
+ if (len > size)
+ return -1;
+ if (len == 1)
+ {
+ tp = memchr_kwset (text, size, kwset);
+ return tp ? tp - text : -1;
+ }
+
+ d1 = kwset->delta;
+ sp = kwset->target + len;
+ tp = text + len;
+ char gc1 = kwset->gc1;
+ char gc2 = kwset->gc2;
+
+ /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
+ if (size > 12 * len)
+ /* 11 is not a bug, the initial offset happens only once. */
+ for (ep = text + size - 11 * len; tp <= ep; )
+ {
+ char const *tp0 = tp;
+ d = d1[U(tp[-1])], tp += d;
+ d = d1[U(tp[-1])], tp += d;
+ if (d != 0)
+ {
+ d = d1[U(tp[-1])], tp += d;
+ d = d1[U(tp[-1])], tp += d;
+ d = d1[U(tp[-1])], tp += d;
+ if (d != 0)
+ {
+ d = d1[U(tp[-1])], tp += d;
+ d = d1[U(tp[-1])], tp += d;
+ d = d1[U(tp[-1])], tp += d;
+ if (d != 0)
+ {
+ d = d1[U(tp[-1])], tp += d;
+ d = d1[U(tp[-1])], tp += d;
+
+ /* As a heuristic, prefer memchr to seeking by
+ delta1 when the latter doesn't advance much. */
+ int advance_heuristic = 16 * sizeof (long);
+ if (advance_heuristic <= tp - tp0)
+ continue;
+ tp--;
+ tp = memchr_kwset (tp, text + size - tp, kwset);
+ if (! tp)
+ return -1;
+ tp++;
+ if (ep <= tp)
+ break;
+ }
+ }
+ }
+ if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset))
+ return tp - text;
+ }
+
+ /* Now we have only a few characters left to search. We
+ carefully avoid ever producing an out-of-bounds pointer. */
+ ep = text + size;
+ d = d1[U(tp[-1])];
+ while (d <= ep - tp)
+ {
+ d = d1[U((tp += d)[-1])];
+ if (d != 0)
+ continue;
+ if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, NULL, kwset))
+ return tp - text;
+ }
+
+ return -1;
+}
+
+/* Fast Boyer-Moore search. */
+static size_t
+bmexec (kwset_t kwset, char const *text, size_t size)
+{
+ /* Help the compiler inline bmexec_trans in two ways, depending on
+ whether kwset->trans is null. */
+ return (kwset->trans
+ ? bmexec_trans (kwset, text, size)
+ : bmexec_trans (kwset, text, size));
+}
+
+/* Hairy multiple string search. */
+static size_t _GL_ARG_NONNULL ((4))
+cwexec (kwset_t kwset, char const *text, size_t len, struct kwsmatch *kwsmatch)
+{
+ struct trie * const *next;
+ struct trie const *trie;
+ struct trie const *accept;
+ char const *beg, *lim, *mch, *lmch;
+ unsigned char c;
+ unsigned char const *delta;
+ int d;
+ char const *end, *qlim;
+ struct tree const *tree;
+ char const *trans;
+
+#ifdef lint
+ accept = NULL;
+#endif
+
+ /* Initialize register copies and look for easy ways out. */
+ if (len < kwset->mind)
+ return -1;
+ next = kwset->next;
+ delta = kwset->delta;
+ trans = kwset->trans;
+ lim = text + len;
+ end = text;
+ if ((d = kwset->mind) != 0)
+ mch = NULL;
+ else
+ {
+ mch = text, accept = kwset->trie;
+ goto match;
+ }
+
+ if (len >= 4 * kwset->mind)
+ qlim = lim - 4 * kwset->mind;
+ else
+ qlim = NULL;
+
+ while (lim - end >= d)
+ {
+ if (qlim && end <= qlim)
+ {
+ end += d - 1;
+ while ((d = delta[c = *end]) && end < qlim)
+ {
+ end += d;
+ end += delta[U(*end)];
+ end += delta[U(*end)];
+ }
+ ++end;
+ }
+ else
+ d = delta[c = (end += d)[-1]];
+ if (d)
+ continue;
+ beg = end - 1;
+ trie = next[c];
+ if (trie->accepting)
+ {
+ mch = beg;
+ accept = trie;
+ }
+ d = trie->shift;
+ while (beg > text)
+ {
+ unsigned char uc = *--beg;
+ c = trans ? trans[uc] : uc;
+ tree = trie->links;
+ while (tree && c != tree->label)
+ if (c < tree->label)
+ tree = tree->llink;
+ else
+ tree = tree->rlink;
+ if (tree)
+ {
+ trie = tree->trie;
+ if (trie->accepting)
+ {
+ mch = beg;
+ accept = trie;
+ }
+ }
+ else
+ break;
+ d = trie->shift;
+ }
+ if (mch)
+ goto match;
+ }
+ return -1;
+
+ match:
+ /* Given a known match, find the longest possible match anchored
+ at or before its starting point. This is nearly a verbatim
+ copy of the preceding main search loops. */
+ if (lim - mch > kwset->maxd)
+ lim = mch + kwset->maxd;
+ lmch = 0;
+ d = 1;
+ while (lim - end >= d)
+ {
+ if ((d = delta[c = (end += d)[-1]]) != 0)
+ continue;
+ beg = end - 1;
+ if (!(trie = next[c]))
+ {
+ d = 1;
+ continue;
+ }
+ if (trie->accepting && beg <= mch)
+ {
+ lmch = beg;
+ accept = trie;
+ }
+ d = trie->shift;
+ while (beg > text)
+ {
+ unsigned char uc = *--beg;
+ c = trans ? trans[uc] : uc;
+ tree = trie->links;
+ while (tree && c != tree->label)
+ if (c < tree->label)
+ tree = tree->llink;
+ else
+ tree = tree->rlink;
+ if (tree)
+ {
+ trie = tree->trie;
+ if (trie->accepting && beg <= mch)
+ {
+ lmch = beg;
+ accept = trie;
+ }
+ }
+ else
+ break;
+ d = trie->shift;
+ }
+ if (lmch)
+ {
+ mch = lmch;
+ goto match;
+ }
+ if (!d)
+ d = 1;
+ }
+
+ kwsmatch->index = accept->accepting / 2;
+ kwsmatch->offset[0] = mch - text;
+ kwsmatch->size[0] = accept->depth;
+
+ return mch - text;
+}
+
+/* Search TEXT for a match of any member of KWSET.
+ Return the offset (into TEXT) of the first byte of the matching substring,
+ or (size_t) -1 if no match is found. Upon a match, store details in
+ *KWSMATCH: index of matched keyword, start offset (same as the return
+ value), and length. */
+size_t
+kwsexec (kwset_t kwset, char const *text, size_t size,
+ struct kwsmatch *kwsmatch)
+{
+ if (kwset->words == 1)
+ {
+ size_t ret = bmexec (kwset, text, size);
+ if (ret != (size_t) -1)
+ {
+ kwsmatch->index = 0;
+ kwsmatch->offset[0] = ret;
+ kwsmatch->size[0] = kwset->mind;
+ }
+ return ret;
+ }
+ else
+ return cwexec (kwset, text, size, kwsmatch);
+}
+
+/* Free the components of the given keyword set. */
+void
+kwsfree (kwset_t kwset)
+{
+ obstack_free (&kwset->obstack, NULL);
+ free (kwset);
+}
diff --git a/src/kwset.h b/src/kwset.h
new file mode 100644
index 0000000..95693e3
--- /dev/null
+++ b/src/kwset.h
@@ -0,0 +1,60 @@
+/* kwset.h - header declaring the keyword set library.
+ Copyright (C) 1989, 1998, 2005, 2007, 2009-2016 Free Software Foundation,
+ Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* Written August 1989 by Mike Haertel.
+ The author may be reached (Email) at the address mike@ai.mit.edu,
+ or (US mail) as Mike Haertel c/o Free Software Foundation. */
+
+#include <stddef.h>
+
+struct kwsmatch
+{
+ size_t index; /* Index number of matching keyword. */
+ size_t offset[1]; /* Offset of each submatch. */
+ size_t size[1]; /* Length of each submatch. */
+};
+
+#include "arg-nonnull.h"
+
+struct kwset;
+typedef struct kwset *kwset_t;
+
+/* Return an opaque pointer to a newly allocated keyword set. A nonnull arg
+ specifies a table of character translations to be applied to all
+ pattern and search text. */
+extern kwset_t kwsalloc (char const *);
+
+/* Incrementally extend the keyword set to include the given string.
+ Remember an index number for each keyword included in the set. */
+extern void kwsincr (kwset_t, char const *, size_t);
+
+/* When the keyword set has been completely built, prepare it for use. */
+extern void kwsprep (kwset_t);
+
+/* Search through the given buffer for a member of the keyword set.
+ Return a pointer to the leftmost longest match found, or NULL if
+ no match is found. If foundlen is non-NULL, store the length of
+ the matching substring in the integer it points to. Similarly,
+ if foundindex is non-NULL, store the index of the particular
+ keyword found therein. */
+extern size_t kwsexec (kwset_t, char const *, size_t, struct kwsmatch *)
+ _GL_ARG_NONNULL ((4));
+
+/* Deallocate the given keyword set and all its associated storage. */
+extern void kwsfree (kwset_t);
diff --git a/src/pcresearch.c b/src/pcresearch.c
new file mode 100644
index 0000000..f6e72b0
--- /dev/null
+++ b/src/pcresearch.c
@@ -0,0 +1,389 @@
+/* pcresearch.c - searching subroutines using PCRE for grep.
+ Copyright 2000, 2007, 2009-2016 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* Written August 1992 by Mike Haertel. */
+
+#include <config.h>
+#include "search.h"
+
+#if HAVE_LIBPCRE
+# include <pcre.h>
+
+/* This must be at least 2; everything after that is for performance
+ in pcre_exec. */
+enum { NSUB = 300 };
+
+/* Compiled internal form of a Perl regular expression. */
+static pcre *cre;
+
+/* Additional information about the pattern. */
+static pcre_extra *extra;
+
+# ifndef PCRE_STUDY_JIT_COMPILE
+# define PCRE_STUDY_JIT_COMPILE 0
+# endif
+
+# if PCRE_STUDY_JIT_COMPILE
+/* Maximum size of the JIT stack. */
+static int jit_stack_size;
+# endif
+
+/* Match the already-compiled PCRE pattern against the data in SUBJECT,
+ of size SEARCH_BYTES and starting with offset SEARCH_OFFSET, with
+ options OPTIONS, and storing resulting matches into SUB. Return
+ the (nonnegative) match location or a (negative) error number. */
+static int
+jit_exec (char const *subject, int search_bytes, int search_offset,
+ int options, int *sub)
+{
+ while (true)
+ {
+ int e = pcre_exec (cre, extra, subject, search_bytes, search_offset,
+ options, sub, NSUB);
+
+# if PCRE_STUDY_JIT_COMPILE
+ if (e == PCRE_ERROR_JIT_STACKLIMIT
+ && 0 < jit_stack_size && jit_stack_size <= INT_MAX / 2)
+ {
+ int old_size = jit_stack_size;
+ int new_size = jit_stack_size = old_size * 2;
+ static pcre_jit_stack *jit_stack;
+ if (jit_stack)
+ pcre_jit_stack_free (jit_stack);
+ jit_stack = pcre_jit_stack_alloc (old_size, new_size);
+ if (!jit_stack)
+ error (EXIT_TROUBLE, 0,
+ _("failed to allocate memory for the PCRE JIT stack"));
+ pcre_assign_jit_stack (extra, NULL, jit_stack);
+ continue;
+ }
+# endif
+
+ return e;
+ }
+}
+
+#endif
+
+#if HAVE_LIBPCRE
+/* Table, indexed by ! (flag & PCRE_NOTBOL), of whether the empty
+ string matches when that flag is used. */
+static int empty_match[2];
+
+static bool multibyte_locale;
+#endif
+
+void
+Pcompile (char const *pattern, size_t size)
+{
+#if !HAVE_LIBPCRE
+ error (EXIT_TROUBLE, 0, "%s",
+ _("support for the -P option is not compiled into "
+ "this --disable-perl-regexp binary"));
+#else
+ int e;
+ char const *ep;
+ static char const wprefix[] = "(?<!\\w)(?:";
+ static char const wsuffix[] = ")(?!\\w)";
+ static char const xprefix[] = "^(?:";
+ static char const xsuffix[] = ")$";
+ int fix_len_max = MAX (sizeof wprefix - 1 + sizeof wsuffix - 1,
+ sizeof xprefix - 1 + sizeof xsuffix - 1);
+ char *re = xnmalloc (4, size + (fix_len_max + 4 - 1) / 4);
+ int flags = (PCRE_MULTILINE
+ | (match_icase ? PCRE_CASELESS : 0));
+ char const *patlim = pattern + size;
+ char *n = re;
+ char const *p;
+ char const *pnul;
+
+ if (1 < MB_CUR_MAX)
+ {
+ if (! using_utf8 ())
+ error (EXIT_TROUBLE, 0,
+ _("-P supports only unibyte and UTF-8 locales"));
+ multibyte_locale = true;
+ flags |= PCRE_UTF8;
+ }
+
+ /* FIXME: Remove these restrictions. */
+ if (memchr (pattern, '\n', size))
+ error (EXIT_TROUBLE, 0, _("the -P option only supports a single pattern"));
+ if (! eolbyte)
+ {
+ bool escaped = false;
+ bool after_unescaped_left_bracket = false;
+ for (p = pattern; *p; p++)
+ if (escaped)
+ escaped = after_unescaped_left_bracket = false;
+ else
+ {
+ if (*p == '$' || (*p == '^' && !after_unescaped_left_bracket))
+ error (EXIT_TROUBLE, 0,
+ _("unescaped ^ or $ not supported with -Pz"));
+ escaped = *p == '\\';
+ after_unescaped_left_bracket = *p == '[';
+ }
+ }
+
+ *n = '\0';
+ if (match_words)
+ strcpy (n, wprefix);
+ if (match_lines)
+ strcpy (n, xprefix);
+ n += strlen (n);
+
+ /* The PCRE interface doesn't allow NUL bytes in the pattern, so
+ replace each NUL byte in the pattern with the four characters
+ "\000", removing a preceding backslash if there are an odd
+ number of backslashes before the NUL. */
+ for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
+ {
+ memcpy (n, p, pnul - p);
+ n += pnul - p;
+ for (p = pnul; pattern < p && p[-1] == '\\'; p--)
+ continue;
+ n -= (pnul - p) & 1;
+ strcpy (n, "\\000");
+ n += 4;
+ }
+
+ memcpy (n, p, patlim - p);
+ n += patlim - p;
+ *n = '\0';
+ if (match_words)
+ strcpy (n, wsuffix);
+ if (match_lines)
+ strcpy (n, xsuffix);
+
+ cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
+ if (!cre)
+ error (EXIT_TROUBLE, 0, "%s", ep);
+
+ extra = pcre_study (cre, PCRE_STUDY_JIT_COMPILE, &ep);
+ if (ep)
+ error (EXIT_TROUBLE, 0, "%s", ep);
+
+# if PCRE_STUDY_JIT_COMPILE
+ if (pcre_fullinfo (cre, extra, PCRE_INFO_JIT, &e))
+ error (EXIT_TROUBLE, 0, _("internal error (should never happen)"));
+
+ /* The PCRE documentation says that a 32 KiB stack is the default. */
+ if (e)
+ jit_stack_size = 32 << 10;
+# endif
+
+ free (re);
+
+ int sub[NSUB];
+ empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+ PCRE_NOTBOL, sub, NSUB);
+ empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
+#endif /* HAVE_LIBPCRE */
+}
+
+size_t
+Pexecute (char *buf, size_t size, size_t *match_size,
+ char const *start_ptr)
+{
+#if !HAVE_LIBPCRE
+ /* We can't get here, because Pcompile would have been called earlier. */
+ error (EXIT_TROUBLE, 0, _("internal error"));
+ return -1;
+#else
+ int sub[NSUB];
+ char const *p = start_ptr ? start_ptr : buf;
+ bool bol = p[-1] == eolbyte;
+ char const *line_start = buf;
+ int e = PCRE_ERROR_NOMATCH;
+ char const *line_end;
+
+ /* The search address to pass to pcre_exec. This is the start of
+ the buffer, or just past the most-recently discovered encoding
+ error. */
+ char const *subject = buf;
+
+ /* If the input is unibyte or is free of encoding errors a multiline search is
+ typically more efficient. Otherwise, a single-line search is
+ typically faster, so that pcre_exec doesn't waste time validating
+ the entire input buffer. */
+ bool multiline = true;
+ if (multibyte_locale)
+ {
+ multiline = ! buf_has_encoding_errors (buf, size - 1);
+ buf[size - 1] = eolbyte;
+ }
+
+ for (; p < buf + size; p = line_start = line_end + 1)
+ {
+ bool too_big;
+
+ if (multiline)
+ {
+ size_t pcre_size_max = MIN (INT_MAX, SIZE_MAX - 1);
+ size_t scan_size = MIN (pcre_size_max + 1, buf + size - p);
+ line_end = memrchr (p, eolbyte, scan_size);
+ too_big = ! line_end;
+ }
+ else
+ {
+ line_end = memchr (p, eolbyte, buf + size - p);
+ too_big = INT_MAX < line_end - p;
+ }
+
+ if (too_big)
+ error (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));
+
+ for (;;)
+ {
+ /* Skip past bytes that are easily determined to be encoding
+ errors, treating them as data that cannot match. This is
+ faster than having pcre_exec check them. */
+ while (mbclen_cache[to_uchar (*p)] == (size_t) -1)
+ {
+ p++;
+ subject = p;
+ bol = false;
+ }
+
+ int search_offset = p - subject;
+
+ /* Check for an empty match; this is faster than letting
+ pcre_exec do it. */
+ if (p == line_end)
+ {
+ sub[0] = sub[1] = search_offset;
+ e = empty_match[bol];
+ break;
+ }
+
+ int options = 0;
+ if (!bol)
+ options |= PCRE_NOTBOL;
+ if (multiline)
+ options |= PCRE_NO_UTF8_CHECK;
+
+ e = jit_exec (subject, line_end - subject, search_offset,
+ options, sub);
+ if (e != PCRE_ERROR_BADUTF8)
+ {
+ if (0 < e && multiline && sub[1] - sub[0] != 0)
+ {
+ char const *nl = memchr (subject + sub[0], eolbyte,
+ sub[1] - sub[0]);
+ if (nl)
+ {
+ /* This match crosses a line boundary; reject it. */
+ p = subject + sub[0];
+ line_end = nl;
+ continue;
+ }
+ }
+ break;
+ }
+ int valid_bytes = sub[0];
+
+ if (search_offset <= valid_bytes)
+ {
+ /* Try to match the string before the encoding error. */
+ if (valid_bytes == 0)
+ {
+ /* Handle the empty-match case specially, for speed.
+ This optimization is valid if VALID_BYTES is zero,
+ which means SEARCH_OFFSET is also zero. */
+ sub[1] = 0;
+ e = empty_match[bol];
+ }
+ else
+ e = jit_exec (subject, valid_bytes, search_offset,
+ options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL, sub);
+
+ if (e != PCRE_ERROR_NOMATCH)
+ break;
+
+ /* Treat the encoding error as data that cannot match. */
+ p = subject + valid_bytes + 1;
+ bol = false;
+ }
+
+ subject += valid_bytes + 1;
+ }
+
+ if (e != PCRE_ERROR_NOMATCH)
+ break;
+ bol = true;
+ }
+
+ if (e <= 0)
+ {
+ switch (e)
+ {
+ case PCRE_ERROR_NOMATCH:
+ break;
+
+ case PCRE_ERROR_NOMEMORY:
+ error (EXIT_TROUBLE, 0, _("memory exhausted"));
+
+# if PCRE_STUDY_JIT_COMPILE
+ case PCRE_ERROR_JIT_STACKLIMIT:
+ error (EXIT_TROUBLE, 0, _("exhausted PCRE JIT stack"));
+# endif
+
+ case PCRE_ERROR_MATCHLIMIT:
+ error (EXIT_TROUBLE, 0, _("exceeded PCRE's backtracking limit"));
+
+ default:
+ /* For now, we lump all remaining PCRE failures into this basket.
+ If anyone cares to provide sample grep usage that can trigger
+ particular PCRE errors, we can add to the list (above) of more
+ detailed diagnostics. */
+ error (EXIT_TROUBLE, 0, _("internal PCRE error: %d"), e);
+ }
+
+ return -1;
+ }
+ else
+ {
+ char const *matchbeg = subject + sub[0];
+ char const *matchend = subject + sub[1];
+ char const *beg;
+ char const *end;
+ if (start_ptr)
+ {
+ beg = matchbeg;
+ end = matchend;
+ }
+ else if (multiline)
+ {
+ char const *prev_nl = memrchr (line_start - 1, eolbyte,
+ matchbeg - (line_start - 1));
+ char const *next_nl = memchr (matchend, eolbyte,
+ line_end + 1 - matchend);
+ beg = prev_nl + 1;
+ end = next_nl + 1;
+ }
+ else
+ {
+ beg = line_start;
+ end = line_end + 1;
+ }
+ *match_size = end - beg;
+ return beg - buf;
+ }
+#endif
+}
diff --git a/src/search.h b/src/search.h
new file mode 100644
index 0000000..7dc1940
--- /dev/null
+++ b/src/search.h
@@ -0,0 +1,82 @@
+/* search.c - searching subroutines using dfa, kwset and regex for grep.
+ Copyright 1992, 1998, 2000, 2007, 2009-2016 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+#ifndef GREP_SEARCH_H
+#define GREP_SEARCH_H 1
+
+#include <config.h>
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <wchar.h>
+#include <wctype.h>
+#include <regex.h>
+
+#include "system.h"
+#include "error.h"
+#include "grep.h"
+#include "dfa.h"
+#include "kwset.h"
+#include "xalloc.h"
+
+_GL_INLINE_HEADER_BEGIN
+#ifndef SEARCH_INLINE
+# define SEARCH_INLINE _GL_INLINE
+#endif
+
+/* This must be a signed type. Each value is the difference in the size
+ of a character (in bytes) induced by converting to lower case.
+ The vast majority of values are 0, but a few are 1 or -1, so
+ technically, two bits may be sufficient. */
+typedef signed char mb_len_map_t;
+
+/* searchutils.c */
+extern void kwsinit (kwset_t *);
+
+extern void build_mbclen_cache (void);
+extern size_t mbclen_cache[];
+extern ptrdiff_t mb_goback (char const **, char const *, char const *);
+extern wint_t mb_prev_wc (char const *, char const *, char const *);
+extern wint_t mb_next_wc (char const *, char const *);
+
+/* dfasearch.c */
+extern void GEAcompile (char const *, size_t, reg_syntax_t);
+extern size_t EGexecute (char *, size_t, size_t *, char const *);
+
+/* kwsearch.c */
+extern void Fcompile (char const *, size_t);
+extern size_t Fexecute (char *, size_t, size_t *, char const *);
+
+/* pcresearch.c */
+extern void Pcompile (char const *, size_t);
+extern size_t Pexecute (char *, size_t, size_t *, char const *);
+
+/* Return the number of bytes in the character at the start of S, which
+ is of size N. N must be positive. MBS is the conversion state.
+ This acts like mbrlen, except it returns 1 when mbrlen would return 0,
+ and it is typically faster because of the cache. */
+SEARCH_INLINE size_t
+mb_clen (char const *s, size_t n, mbstate_t *mbs)
+{
+ size_t len = mbclen_cache[to_uchar (*s)];
+ return len == (size_t) -2 ? mbrlen (s, n, mbs) : len;
+}
+
+_GL_INLINE_HEADER_END
+
+#endif /* GREP_SEARCH_H */
diff --git a/src/searchutils.c b/src/searchutils.c
new file mode 100644
index 0000000..1f21a0e
--- /dev/null
+++ b/src/searchutils.c
@@ -0,0 +1,127 @@
+/* searchutils.c - helper subroutines for grep's matchers.
+ Copyright 1992, 1998, 2000, 2007, 2009-2016 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+#include <config.h>
+
+#define SEARCH_INLINE _GL_EXTERN_INLINE
+#define SYSTEM_INLINE _GL_EXTERN_INLINE
+#include "search.h"
+
+#define NCHAR (UCHAR_MAX + 1)
+
+size_t mbclen_cache[NCHAR];
+
+void
+kwsinit (kwset_t *kwset)
+{
+ static char trans[NCHAR];
+ int i;
+
+ if (match_icase && MB_CUR_MAX == 1)
+ {
+ for (i = 0; i < NCHAR; ++i)
+ trans[i] = toupper (i);
+
+ *kwset = kwsalloc (trans);
+ }
+ else
+ *kwset = kwsalloc (NULL);
+
+ if (!*kwset)
+ xalloc_die ();
+}
+
+/* Initialize a cache of mbrlen values for each of its 1-byte inputs. */
+void
+build_mbclen_cache (void)
+{
+ int i;
+
+ for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
+ {
+ char c = i;
+ unsigned char uc = i;
+ mbstate_t mbs = { 0 };
+ size_t len = mbrlen (&c, 1, &mbs);
+ mbclen_cache[uc] = len ? len : 1;
+ }
+}
+
+/* In the buffer *MB_START, return the number of bytes needed to go
+ back from CUR to the previous boundary, where a "boundary" is the
+ start of a multibyte character or is an error-encoding byte. The
+ buffer ends at END (i.e., one past the address of the buffer's last
+ byte). If CUR is already at a boundary, return 0. If *MB_START is
+ greater than or equal to CUR, return the negative value CUR - *MB_START.
+
+ When returning zero, set *MB_START to CUR. When returning a
+ positive value, set *MB_START to the next boundary after CUR, or to
+ END if there is no such boundary. When returning a negative value,
+ leave *MB_START alone. */
+ptrdiff_t
+mb_goback (char const **mb_start, char const *cur, char const *end)
+{
+ const char *p = *mb_start;
+ const char *p0 = p;
+ mbstate_t cur_state;
+
+ memset (&cur_state, 0, sizeof cur_state);
+
+ while (p < cur)
+ {
+ size_t clen = mb_clen (p, end - p, &cur_state);
+
+ if ((size_t) -2 <= clen)
+ {
+ /* An invalid sequence, or a truncated multibyte character.
+ Treat it as a single byte character. */
+ clen = 1;
+ memset (&cur_state, 0, sizeof cur_state);
+ }
+ p0 = p;
+ p += clen;
+ }
+
+ *mb_start = p;
+ return p == cur ? 0 : cur - p0;
+}
+
+/* In the buffer BUF, return the wide character that is encoded just
+ before CUR. The buffer ends at END. Return WEOF if there is no
+ wide character just before CUR. */
+wint_t
+mb_prev_wc (char const *buf, char const *cur, char const *end)
+{
+ if (cur == buf)
+ return WEOF;
+ char const *p = buf;
+ cur--;
+ cur -= mb_goback (&p, cur, end);
+ return mb_next_wc (cur, end);
+}
+
+/* Return the wide character that is encoded at CUR. The buffer ends
+ at END. Return WEOF if there is no wide character encoded at CUR. */
+wint_t
+mb_next_wc (char const *cur, char const *end)
+{
+ wchar_t wc;
+ mbstate_t mbs = { 0 };
+ return (end - cur != 0 && mbrtowc (&wc, cur, end - cur, &mbs) < (size_t) -2
+ ? wc : WEOF);
+}
diff --git a/src/system.h b/src/system.h
new file mode 100644
index 0000000..6f4918d
--- /dev/null
+++ b/src/system.h
@@ -0,0 +1,110 @@
+/* Portability cruft. Include after config.h and sys/types.h.
+ Copyright 1996, 1998-2000, 2007, 2009-2016 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+#ifndef GREP_SYSTEM_H
+#define GREP_SYSTEM_H 1
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "binary-io.h"
+#include "configmake.h"
+#include "dirname.h"
+#include "ignore-value.h"
+#include "minmax.h"
+#include "same-inode.h"
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <limits.h>
+#include <string.h>
+#include <ctype.h>
+
+enum { EXIT_TROUBLE = 2 };
+
+#include <gettext.h>
+#define N_(String) gettext_noop(String)
+#define _(String) gettext(String)
+
+#include <locale.h>
+
+#ifndef initialize_main
+# define initialize_main(argcp, argvp)
+#endif
+
+#include "unlocked-io.h"
+
+_GL_INLINE_HEADER_BEGIN
+#ifndef SYSTEM_INLINE
+# define SYSTEM_INLINE _GL_INLINE
+#endif
+
+#define STREQ(a, b) (strcmp (a, b) == 0)
+
+/* Convert a possibly-signed character to an unsigned character. This is
+ a bit safer than casting to unsigned char, since it catches some type
+ errors that the cast doesn't. */
+SYSTEM_INLINE unsigned char
+to_uchar (char ch)
+{
+ return ch;
+}
+
+_GL_INLINE_HEADER_END
+
+#ifndef __has_feature
+# define __has_feature(F) false
+#endif
+
+#if defined __SANITIZE_ADDRESS__ || __has_feature (address_sanitizer)
+# define HAVE_ASAN 1
+#else
+# define HAVE_ASAN 0
+#endif
+
+#if HAVE_ASAN
+
+/* Mark memory region [addr, addr+size) as unaddressable.
+ This memory must be previously allocated by the user program. Accessing
+ addresses in this region from instrumented code is forbidden until
+ this region is unpoisoned. This function is not guaranteed to poison
+ the whole region - it may poison only a subregion of [addr, addr+size)
+ due to ASan alignment restrictions.
+ Method is NOT thread-safe in the sense that no two threads can
+ (un)poison memory in the same memory region simultaneously. */
+void __asan_poison_memory_region (void const volatile *addr, size_t size);
+
+/* Mark memory region [addr, addr+size) as addressable.
+ This memory must be previously allocated by the user program. Accessing
+ addresses in this region is allowed until this region is poisoned again.
+ This function may unpoison a superregion of [addr, addr+size) due to
+ ASan alignment restrictions.
+ Method is NOT thread-safe in the sense that no two threads can
+ (un)poison memory in the same memory region simultaneously. */
+void __asan_unpoison_memory_region (void const volatile *addr, size_t size);
+
+#else
+
+static _GL_UNUSED void
+__asan_poison_memory_region (void const volatile *addr, size_t size) { }
+static _GL_UNUSED void
+__asan_unpoison_memory_region (void const volatile *addr, size_t size) { }
+#endif
+
+#endif