summaryrefslogtreecommitdiff
path: root/src/searchutils.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/searchutils.c')
-rw-r--r--src/searchutils.c127
1 files changed, 127 insertions, 0 deletions
diff --git a/src/searchutils.c b/src/searchutils.c
new file mode 100644
index 0000000..1f21a0e
--- /dev/null
+++ b/src/searchutils.c
@@ -0,0 +1,127 @@
+/* searchutils.c - helper subroutines for grep's matchers.
+ Copyright 1992, 1998, 2000, 2007, 2009-2016 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+#include <config.h>
+
+#define SEARCH_INLINE _GL_EXTERN_INLINE
+#define SYSTEM_INLINE _GL_EXTERN_INLINE
+#include "search.h"
+
+#define NCHAR (UCHAR_MAX + 1)
+
+size_t mbclen_cache[NCHAR];
+
+void
+kwsinit (kwset_t *kwset)
+{
+ static char trans[NCHAR];
+ int i;
+
+ if (match_icase && MB_CUR_MAX == 1)
+ {
+ for (i = 0; i < NCHAR; ++i)
+ trans[i] = toupper (i);
+
+ *kwset = kwsalloc (trans);
+ }
+ else
+ *kwset = kwsalloc (NULL);
+
+ if (!*kwset)
+ xalloc_die ();
+}
+
+/* Initialize a cache of mbrlen values for each of its 1-byte inputs. */
+void
+build_mbclen_cache (void)
+{
+ int i;
+
+ for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
+ {
+ char c = i;
+ unsigned char uc = i;
+ mbstate_t mbs = { 0 };
+ size_t len = mbrlen (&c, 1, &mbs);
+ mbclen_cache[uc] = len ? len : 1;
+ }
+}
+
+/* In the buffer *MB_START, return the number of bytes needed to go
+ back from CUR to the previous boundary, where a "boundary" is the
+ start of a multibyte character or is an error-encoding byte. The
+ buffer ends at END (i.e., one past the address of the buffer's last
+ byte). If CUR is already at a boundary, return 0. If *MB_START is
+ greater than or equal to CUR, return the negative value CUR - *MB_START.
+
+ When returning zero, set *MB_START to CUR. When returning a
+ positive value, set *MB_START to the next boundary after CUR, or to
+ END if there is no such boundary. When returning a negative value,
+ leave *MB_START alone. */
+ptrdiff_t
+mb_goback (char const **mb_start, char const *cur, char const *end)
+{
+ const char *p = *mb_start;
+ const char *p0 = p;
+ mbstate_t cur_state;
+
+ memset (&cur_state, 0, sizeof cur_state);
+
+ while (p < cur)
+ {
+ size_t clen = mb_clen (p, end - p, &cur_state);
+
+ if ((size_t) -2 <= clen)
+ {
+ /* An invalid sequence, or a truncated multibyte character.
+ Treat it as a single byte character. */
+ clen = 1;
+ memset (&cur_state, 0, sizeof cur_state);
+ }
+ p0 = p;
+ p += clen;
+ }
+
+ *mb_start = p;
+ return p == cur ? 0 : cur - p0;
+}
+
+/* In the buffer BUF, return the wide character that is encoded just
+ before CUR. The buffer ends at END. Return WEOF if there is no
+ wide character just before CUR. */
+wint_t
+mb_prev_wc (char const *buf, char const *cur, char const *end)
+{
+ if (cur == buf)
+ return WEOF;
+ char const *p = buf;
+ cur--;
+ cur -= mb_goback (&p, cur, end);
+ return mb_next_wc (cur, end);
+}
+
+/* Return the wide character that is encoded at CUR. The buffer ends
+ at END. Return WEOF if there is no wide character encoded at CUR. */
+wint_t
+mb_next_wc (char const *cur, char const *end)
+{
+ wchar_t wc;
+ mbstate_t mbs = { 0 };
+ return (end - cur != 0 && mbrtowc (&wc, cur, end - cur, &mbs) < (size_t) -2
+ ? wc : WEOF);
+}