1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
/* searchutils.c - helper subroutines for grep's matchers.
Copyright 1992, 1998, 2000, 2007, 2009-2016 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
02110-1301, USA. */
#include <config.h>
#define SEARCH_INLINE _GL_EXTERN_INLINE
#define SYSTEM_INLINE _GL_EXTERN_INLINE
#include "search.h"
#define NCHAR (UCHAR_MAX + 1)
size_t mbclen_cache[NCHAR];
void
kwsinit (kwset_t *kwset)
{
static char trans[NCHAR];
int i;
if (match_icase && MB_CUR_MAX == 1)
{
for (i = 0; i < NCHAR; ++i)
trans[i] = toupper (i);
*kwset = kwsalloc (trans);
}
else
*kwset = kwsalloc (NULL);
if (!*kwset)
xalloc_die ();
}
/* Initialize a cache of mbrlen values for each of its 1-byte inputs. */
void
build_mbclen_cache (void)
{
int i;
for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
{
char c = i;
unsigned char uc = i;
mbstate_t mbs = { 0 };
size_t len = mbrlen (&c, 1, &mbs);
mbclen_cache[uc] = len ? len : 1;
}
}
/* In the buffer *MB_START, return the number of bytes needed to go
back from CUR to the previous boundary, where a "boundary" is the
start of a multibyte character or is an error-encoding byte. The
buffer ends at END (i.e., one past the address of the buffer's last
byte). If CUR is already at a boundary, return 0. If *MB_START is
greater than or equal to CUR, return the negative value CUR - *MB_START.
When returning zero, set *MB_START to CUR. When returning a
positive value, set *MB_START to the next boundary after CUR, or to
END if there is no such boundary. When returning a negative value,
leave *MB_START alone. */
ptrdiff_t
mb_goback (char const **mb_start, char const *cur, char const *end)
{
const char *p = *mb_start;
const char *p0 = p;
mbstate_t cur_state;
memset (&cur_state, 0, sizeof cur_state);
while (p < cur)
{
size_t clen = mb_clen (p, end - p, &cur_state);
if ((size_t) -2 <= clen)
{
/* An invalid sequence, or a truncated multibyte character.
Treat it as a single byte character. */
clen = 1;
memset (&cur_state, 0, sizeof cur_state);
}
p0 = p;
p += clen;
}
*mb_start = p;
return p == cur ? 0 : cur - p0;
}
/* In the buffer BUF, return the wide character that is encoded just
before CUR. The buffer ends at END. Return WEOF if there is no
wide character just before CUR. */
wint_t
mb_prev_wc (char const *buf, char const *cur, char const *end)
{
if (cur == buf)
return WEOF;
char const *p = buf;
cur--;
cur -= mb_goback (&p, cur, end);
return mb_next_wc (cur, end);
}
/* Return the wide character that is encoded at CUR. The buffer ends
at END. Return WEOF if there is no wide character encoded at CUR. */
wint_t
mb_next_wc (char const *cur, char const *end)
{
wchar_t wc;
mbstate_t mbs = { 0 };
return (end - cur != 0 && mbrtowc (&wc, cur, end - cur, &mbs) < (size_t) -2
? wc : WEOF);
}
|