summaryrefslogtreecommitdiff
path: root/support/regcomp.c
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2021-12-10 12:19:30 +0200
committerArnold D. Robbins <arnold@skeeve.com>2021-12-10 12:19:30 +0200
commit6e872ca782a6fa480aedbef8205bba6b7ccaf307 (patch)
tree975d3ddb5614624f935a4837690b9fb9084c475f /support/regcomp.c
parentcfa3ac1c6a0fcbe794df853f25c757e4acd5fcc0 (diff)
downloadgawk-6e872ca782a6fa480aedbef8205bba6b7ccaf307.tar.gz
Update support files from GNULIB.
Diffstat (limited to 'support/regcomp.c')
-rw-r--r--support/regcomp.c813
1 files changed, 337 insertions, 476 deletions
diff --git a/support/regcomp.c b/support/regcomp.c
index 887e5b50..6a97fdee 100644
--- a/support/regcomp.c
+++ b/support/regcomp.c
@@ -27,14 +27,10 @@ static void re_compile_fastmap_iter (regex_t *bufp,
const re_dfastate_t *init_state,
char *fastmap);
static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
-#ifdef RE_ENABLE_I18N
static void free_charset (re_charset_t *cset);
-#endif /* RE_ENABLE_I18N */
static void free_workarea_compile (regex_t *preg);
static reg_errcode_t create_initial_state (re_dfa_t *dfa);
-#ifdef RE_ENABLE_I18N
static void optimize_utf8 (re_dfa_t *dfa);
-#endif
static reg_errcode_t analyze (regex_t *preg);
static reg_errcode_t preorder (bin_tree_t *root,
reg_errcode_t (fn (void *, bin_tree_t *)),
@@ -89,7 +85,6 @@ static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
re_string_t *regexp,
re_token_t *token);
-#ifdef RE_ENABLE_I18N
static reg_errcode_t build_equiv_class (bitset_t sbcset,
re_charset_t *mbcset,
Idx *equiv_class_alloc,
@@ -100,14 +95,6 @@ static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
Idx *char_class_alloc,
const char *class_name,
reg_syntax_t syntax);
-#else /* not RE_ENABLE_I18N */
-static reg_errcode_t build_equiv_class (bitset_t sbcset,
- const unsigned char *name);
-static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
- bitset_t sbcset,
- const char *class_name,
- reg_syntax_t syntax);
-#endif /* not RE_ENABLE_I18N */
static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
RE_TRANSLATE_TYPE trans,
const char *class_name,
@@ -279,8 +266,7 @@ re_compile_fastmap (struct re_pattern_buffer *bufp)
}
weak_alias (__re_compile_fastmap, re_compile_fastmap)
-static inline void
-__attribute__ ((always_inline))
+static __always_inline void
re_set_fastmap (char *fastmap, bool icase, int ch)
{
fastmap[ch] = 1;
@@ -306,7 +292,6 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
if (type == CHARACTER)
{
re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
-#ifdef RE_ENABLE_I18N
if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
{
unsigned char buf[MB_LEN_MAX];
@@ -327,7 +312,6 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
!= (size_t) -1))
re_set_fastmap (fastmap, false, buf[0]);
}
-#endif
}
else if (type == SIMPLE_BRACKET)
{
@@ -341,13 +325,12 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
re_set_fastmap (fastmap, icase, ch);
}
}
-#ifdef RE_ENABLE_I18N
else if (type == COMPLEX_BRACKET)
{
re_charset_t *cset = dfa->nodes[node].opr.mbcset;
Idx i;
-# ifdef _LIBC
+#ifdef _LIBC
/* See if we have to try all bytes which start multiple collation
elements.
e.g. In da_DK, we want to catch 'a' since "aa" is a valid
@@ -363,7 +346,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
if (table[i] < 0)
re_set_fastmap (fastmap, icase, i);
}
-# endif /* _LIBC */
+#endif /* _LIBC */
/* See if we have to start the match at all multibyte characters,
i.e. where we would not find an invalid sequence. This only
@@ -371,9 +354,9 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
sets, the SIMPLE_BRACKET again suffices. */
if (dfa->mb_cur_max > 1
&& (cset->nchar_classes || cset->non_match || cset->nranges
-# ifdef _LIBC
+#ifdef _LIBC
|| cset->nequiv_classes
-# endif /* _LIBC */
+#endif /* _LIBC */
))
{
unsigned char c = 0;
@@ -406,12 +389,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
}
}
}
-#endif /* RE_ENABLE_I18N */
- else if (type == OP_PERIOD
-#ifdef RE_ENABLE_I18N
- || type == OP_UTF8_PERIOD
-#endif /* RE_ENABLE_I18N */
- || type == END_OF_RE)
+ else if (type == OP_PERIOD || type == OP_UTF8_PERIOD || type == END_OF_RE)
{
memset (fastmap, '\1', sizeof (char) * SBC_MAX);
if (type == END_OF_RE)
@@ -550,7 +528,6 @@ regerror (int errcode, const regex_t *__restrict preg, char *__restrict errbuf,
weak_alias (__regerror, regerror)
-#ifdef RE_ENABLE_I18N
/* This static array is used for the map to single-byte characters when
UTF-8 is used. Otherwise we would allocate memory just to initialize
it the same all the time. UTF-8 is the preferred encoding so this is
@@ -558,25 +535,24 @@ weak_alias (__regerror, regerror)
static const bitset_t utf8_sb_map =
{
/* Set the first 128 bits. */
-# if (defined __GNUC__ || __clang_major__ >= 4) && !defined __STRICT_ANSI__
+#if (defined __GNUC__ || __clang_major__ >= 4) && !defined __STRICT_ANSI__
[0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
-# else
-# if 4 * BITSET_WORD_BITS < ASCII_CHARS
-# error "bitset_word_t is narrower than 32 bits"
-# elif 3 * BITSET_WORD_BITS < ASCII_CHARS
+#else
+# if 4 * BITSET_WORD_BITS < ASCII_CHARS
+# error "bitset_word_t is narrower than 32 bits"
+# elif 3 * BITSET_WORD_BITS < ASCII_CHARS
BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX,
-# elif 2 * BITSET_WORD_BITS < ASCII_CHARS
+# elif 2 * BITSET_WORD_BITS < ASCII_CHARS
BITSET_WORD_MAX, BITSET_WORD_MAX,
-# elif 1 * BITSET_WORD_BITS < ASCII_CHARS
+# elif 1 * BITSET_WORD_BITS < ASCII_CHARS
BITSET_WORD_MAX,
-# endif
+# endif
(BITSET_WORD_MAX
>> (SBC_MAX % BITSET_WORD_BITS == 0
? 0
: BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))
-# endif
-};
#endif
+};
static void
@@ -614,10 +590,8 @@ free_dfa_content (re_dfa_t *dfa)
re_free (entry->array);
}
re_free (dfa->state_table);
-#ifdef RE_ENABLE_I18N
if (dfa->sb_char != utf8_sb_map)
re_free (dfa->sb_char);
-#endif
re_free (dfa->subexp_map);
#ifdef DEBUG
re_free (dfa->re_str);
@@ -796,11 +770,9 @@ re_compile_internal (regex_t *preg, const char * pattern, size_t length,
if (__glibc_unlikely (err != REG_NOERROR))
goto re_compile_internal_free_return;
-#ifdef RE_ENABLE_I18N
/* If possible, do searching in single byte encoding to speed things up. */
if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
optimize_utf8 (dfa);
-#endif
/* Then create the initial state of the dfa. */
err = create_initial_state (dfa);
@@ -830,11 +802,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
#ifndef _LIBC
const char *codeset_name;
#endif
-#ifdef RE_ENABLE_I18N
size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t));
-#else
- size_t max_i18n_object_size = 0;
-#endif
size_t max_object_size =
MAX (sizeof (struct re_state_table_entry),
MAX (sizeof (re_token_t),
@@ -886,7 +854,6 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
dfa->map_notascii = 0;
#endif
-#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
{
if (dfa->is_utf8)
@@ -906,14 +873,13 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
wint_t wch = __btowc (ch);
if (wch != WEOF)
dfa->sb_char[i] |= (bitset_word_t) 1 << j;
-# ifndef _LIBC
+#ifndef _LIBC
if (isascii (ch) && wch != ch)
dfa->map_notascii = 1;
-# endif
+#endif
}
}
}
-#endif
if (__glibc_unlikely (dfa->nodes == NULL || dfa->state_table == NULL))
return REG_ESPACE;
@@ -933,8 +899,6 @@ init_word_char (re_dfa_t *dfa)
dfa->word_ops_used = 1;
if (__glibc_likely (dfa->map_notascii == 0))
{
- /* Avoid uint32_t and uint64_t as some non-GCC platforms lack
- them, an issue when this code is used in Gnulib. */
bitset_word_t bits0 = 0x00000000;
bitset_word_t bits1 = 0x03ff0000;
bitset_word_t bits2 = 0x87fffffe;
@@ -1074,7 +1038,6 @@ create_initial_state (re_dfa_t *dfa)
return REG_NOERROR;
}
-#ifdef RE_ENABLE_I18N
/* If it is possible to do searching in single byte encoding instead of UTF-8
to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
DFA nodes where needed. */
@@ -1154,7 +1117,6 @@ optimize_utf8 (re_dfa_t *dfa)
dfa->is_utf8 = 0;
dfa->has_mb_node = dfa->nbackref > 0 || has_period;
}
-#endif
/* Analyze the structure tree, and calculate "first", "next", "edest",
"eclosure", and "inveclosure". */
@@ -1792,7 +1754,6 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
token->opr.c = c;
token->word_char = 0;
-#ifdef RE_ENABLE_I18N
token->mb_partial = 0;
if (input->mb_cur_max > 1
&& !re_string_first_byte (input, re_string_cur_idx (input)))
@@ -1801,7 +1762,6 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
token->mb_partial = 1;
return 1;
}
-#endif
if (c == '\\')
{
unsigned char c2;
@@ -1814,7 +1774,6 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
c2 = re_string_peek_byte_case (input, 1);
token->opr.c = c2;
token->type = CHARACTER;
-#ifdef RE_ENABLE_I18N
if (input->mb_cur_max > 1)
{
wint_t wc = re_string_wchar_at (input,
@@ -1822,7 +1781,6 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
}
else
-#endif
token->word_char = IS_WORD_CHAR (c2) != 0;
switch (c2)
@@ -1928,14 +1886,12 @@ peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
}
token->type = CHARACTER;
-#ifdef RE_ENABLE_I18N
if (input->mb_cur_max > 1)
{
wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
}
else
-#endif
token->word_char = IS_WORD_CHAR (token->opr.c);
switch (c)
@@ -2027,14 +1983,12 @@ peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax)
c = re_string_peek_byte (input, 0);
token->opr.c = c;
-#ifdef RE_ENABLE_I18N
if (input->mb_cur_max > 1
&& !re_string_first_byte (input, re_string_cur_idx (input)))
{
token->type = CHARACTER;
return 1;
}
-#endif /* RE_ENABLE_I18N */
if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
&& re_string_cur_idx (input) + 1 < re_string_length (input))
@@ -2256,7 +2210,6 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
*err = REG_ESPACE;
return NULL;
}
-#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
{
while (!re_string_eoi (regexp)
@@ -2273,7 +2226,6 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
}
}
}
-#endif
break;
case OP_OPEN_SUBEXP:
@@ -2666,40 +2618,30 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
#ifndef _LIBC
-# ifdef RE_ENABLE_I18N
/* Convert the byte B to the corresponding wide character. In a
unibyte locale, treat B as itself. In a multibyte locale, return
WEOF if B is an encoding error. */
static wint_t
-parse_byte (unsigned char b, re_charset_t *mbcset)
+parse_byte (unsigned char b, re_dfa_t const *dfa)
{
- return mbcset == NULL ? b : __btowc (b);
+ return dfa->mb_cur_max > 1 ? __btowc (b) : b;
}
-# endif
- /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
- Build the range expression which starts from START_ELEM, and ends
- at END_ELEM. The result are written to MBCSET and SBCSET.
- RANGE_ALLOC is the allocated size of mbcset->range_starts, and
- mbcset->range_ends, is a pointer argument since we may
- update it. */
+/* Local function for parse_bracket_exp used in _LIBC environment.
+ Build the range expression which starts from START_ELEM, and ends
+ at END_ELEM. The result are written to MBCSET and SBCSET.
+ RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+ mbcset->range_ends, is a pointer argument since we may
+ update it. */
static reg_errcode_t
-# ifdef RE_ENABLE_I18N
-build_range_exp (const reg_syntax_t syntax,
- bitset_t sbcset,
- re_charset_t *mbcset,
- Idx *range_alloc,
- const bracket_elem_t *start_elem,
- const bracket_elem_t *end_elem)
-# else /* not RE_ENABLE_I18N */
-build_range_exp (const reg_syntax_t syntax,
- bitset_t sbcset,
- const bracket_elem_t *start_elem,
- const bracket_elem_t *end_elem)
-# endif /* not RE_ENABLE_I18N */
+build_range_exp (bitset_t sbcset, re_charset_t *mbcset, Idx *range_alloc,
+ bracket_elem_t *start_elem, bracket_elem_t *end_elem,
+ re_dfa_t *dfa, reg_syntax_t syntax, uint_fast32_t nrules,
+ const unsigned char *collseqmb, const char *collseqwc,
+ int_fast32_t table_size, const void *symb_table,
+ const unsigned char *extra)
{
- unsigned int start_ch, end_ch;
/* Equivalence Classes and Character Classes can't be a range start/end. */
if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
|| start_elem->type == CHAR_CLASS
@@ -2715,110 +2657,88 @@ build_range_exp (const reg_syntax_t syntax,
&& strlen ((char *) end_elem->opr.name) > 1)))
return REG_ECOLLATE;
-# ifdef RE_ENABLE_I18N
- {
- wchar_t wc;
- wint_t start_wc;
- wint_t end_wc;
-
+ unsigned int
start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
- : 0));
+ : 0)),
end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
: ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
: 0));
+ wint_t
start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)
- ? parse_byte (start_ch, mbcset) : start_elem->opr.wch);
+ ? parse_byte (start_ch, dfa) : start_elem->opr.wch),
end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)
- ? parse_byte (end_ch, mbcset) : end_elem->opr.wch);
- if (start_wc == WEOF || end_wc == WEOF)
- return REG_ECOLLATE;
- else if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
- && start_wc > end_wc))
- return REG_ERANGE;
-
- /* Got valid collation sequence values, add them as a new entry.
- However, for !_LIBC we have no collation elements: if the
- character set is single byte, the single byte character set
- that we build below suffices. parse_bracket_exp passes
- no MBCSET if dfa->mb_cur_max == 1. */
- if (mbcset)
- {
- /* Check the space of the arrays. */
- if (__glibc_unlikely (*range_alloc == mbcset->nranges))
- {
- /* There is not enough space, need realloc. */
- wchar_t *new_array_start, *new_array_end;
- Idx new_nranges;
-
- /* +1 in case of mbcset->nranges is 0. */
- new_nranges = 2 * mbcset->nranges + 1;
- /* Use realloc since mbcset->range_starts and mbcset->range_ends
- are NULL if *range_alloc == 0. */
- new_array_start = re_realloc (mbcset->range_starts, wchar_t,
- new_nranges);
- new_array_end = re_realloc (mbcset->range_ends, wchar_t,
- new_nranges);
+ ? parse_byte (end_ch, dfa) : end_elem->opr.wch);
- if (__glibc_unlikely (new_array_start == NULL
- || new_array_end == NULL))
- {
- re_free (new_array_start);
- re_free (new_array_end);
- return REG_ESPACE;
- }
+ if (start_wc == WEOF || end_wc == WEOF)
+ return REG_ECOLLATE;
+ else if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
+ && start_wc > end_wc))
+ return REG_ERANGE;
- mbcset->range_starts = new_array_start;
- mbcset->range_ends = new_array_end;
- *range_alloc = new_nranges;
- }
+ /* Got valid collation sequence values, add them as a new entry.
+ However, for !_LIBC we have no collation elements: if the
+ character set is single byte, the single byte character set
+ that we build below suffices. parse_bracket_exp passes
+ no MBCSET if dfa->mb_cur_max == 1. */
+ if (dfa->mb_cur_max > 1)
+ {
+ /* Check the space of the arrays. */
+ if (__glibc_unlikely (*range_alloc == mbcset->nranges))
+ {
+ /* There is not enough space, need realloc. */
+ wchar_t *new_array_start, *new_array_end;
+ Idx new_nranges;
- mbcset->range_starts[mbcset->nranges] = start_wc;
- mbcset->range_ends[mbcset->nranges++] = end_wc;
- }
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ /* Use realloc since mbcset->range_starts and mbcset->range_ends
+ are NULL if *range_alloc == 0. */
+ new_array_start = re_realloc (mbcset->range_starts, wchar_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, wchar_t,
+ new_nranges);
+
+ if (__glibc_unlikely (new_array_start == NULL
+ || new_array_end == NULL))
+ {
+ re_free (new_array_start);
+ re_free (new_array_end);
+ return REG_ESPACE;
+ }
+
+ mbcset->range_starts = new_array_start;
+ mbcset->range_ends = new_array_end;
+ *range_alloc = new_nranges;
+ }
+
+ mbcset->range_starts[mbcset->nranges] = start_wc;
+ mbcset->range_ends[mbcset->nranges++] = end_wc;
+ }
+
+ /* Build the table for single byte characters. */
+ for (wchar_t wc = 0; wc < SBC_MAX; ++wc)
+ {
+ if (start_wc <= wc && wc <= end_wc)
+ bitset_set (sbcset, wc);
+ }
- /* Build the table for single byte characters. */
- for (wc = 0; wc < SBC_MAX; ++wc)
- {
- if (start_wc <= wc && wc <= end_wc)
- bitset_set (sbcset, wc);
- }
- }
-# else /* not RE_ENABLE_I18N */
- {
- unsigned int ch;
- start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
- : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
- : 0));
- end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
- : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
- : 0));
- if (start_ch > end_ch)
- return REG_ERANGE;
- /* Build the table for single byte characters. */
- for (ch = 0; ch < SBC_MAX; ++ch)
- if (start_ch <= ch && ch <= end_ch)
- bitset_set (sbcset, ch);
- }
-# endif /* not RE_ENABLE_I18N */
return REG_NOERROR;
}
#endif /* not _LIBC */
#ifndef _LIBC
-/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
+/* Helper function for parse_bracket_exp only used in case of NOT _LIBC.
Build the collating element which is represented by NAME.
The result are written to MBCSET and SBCSET.
COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
pointer argument since we may update it. */
static reg_errcode_t
-# ifdef RE_ENABLE_I18N
build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
- Idx *coll_sym_alloc, const unsigned char *name)
-# else /* not RE_ENABLE_I18N */
-build_collating_symbol (bitset_t sbcset, const unsigned char *name)
-# endif /* not RE_ENABLE_I18N */
+ Idx *coll_sym_alloc, const unsigned char *name,
+ uint_fast32_t nrules, int_fast32_t table_size,
+ const void *symb_table, const unsigned char *extra)
{
size_t name_len = strlen ((const char *) name);
if (__glibc_unlikely (name_len != 1))
@@ -2831,271 +2751,280 @@ build_collating_symbol (bitset_t sbcset, const unsigned char *name)
}
#endif /* not _LIBC */
-/* This function parse bracket expression like "[abc]", "[a-c]",
- "[[.a-a.]]" etc. */
-
-static bin_tree_t *
-parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
- reg_syntax_t syntax, reg_errcode_t *err)
-{
#ifdef _LIBC
- const unsigned char *collseqmb;
- const char *collseqwc;
- uint32_t nrules;
- int32_t table_size;
- const int32_t *symb_table;
- const unsigned char *extra;
-
- /* Local function for parse_bracket_exp used in _LIBC environment.
- Seek the collating symbol entry corresponding to NAME.
- Return the index of the symbol in the SYMB_TABLE,
- or -1 if not found. */
-
- auto inline int32_t
- __attribute__ ((always_inline))
- seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
- {
- int32_t elem;
-
- for (elem = 0; elem < table_size; elem++)
- if (symb_table[2 * elem] != 0)
- {
- int32_t idx = symb_table[2 * elem + 1];
- /* Skip the name of collating element name. */
- idx += 1 + extra[idx];
- if (/* Compare the length of the name. */
- name_len == extra[idx]
- /* Compare the name. */
- && memcmp (name, &extra[idx + 1], name_len) == 0)
- /* Yep, this is the entry. */
- return elem;
- }
- return -1;
- }
+/* Local function for parse_bracket_exp used in _LIBC environment.
+ Seek the collating symbol entry corresponding to NAME.
+ Return the index of the symbol in the SYMB_TABLE,
+ or -1 if not found. */
+
+static __always_inline int32_t
+seek_collating_symbol_entry (const unsigned char *name, size_t name_len,
+ const int32_t *symb_table,
+ int_fast32_t table_size,
+ const unsigned char *extra)
+{
+ int_fast32_t elem;
- /* Local function for parse_bracket_exp used in _LIBC environment.
- Look up the collation sequence value of BR_ELEM.
- Return the value if succeeded, UINT_MAX otherwise. */
+ for (elem = 0; elem < table_size; elem++)
+ if (symb_table[2 * elem] != 0)
+ {
+ int32_t idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ if (/* Compare the length of the name. */
+ name_len == extra[idx]
+ /* Compare the name. */
+ && memcmp (name, &extra[idx + 1], name_len) == 0)
+ /* Yep, this is the entry. */
+ return elem;
+ }
+ return -1;
+}
- auto inline unsigned int
- __attribute__ ((always_inline))
- lookup_collation_sequence_value (bracket_elem_t *br_elem)
+/* Local function for parse_bracket_exp used in _LIBC environment.
+ Look up the collation sequence value of BR_ELEM.
+ Return the value if succeeded, UINT_MAX otherwise. */
+
+static __always_inline unsigned int
+lookup_collation_sequence_value (bracket_elem_t *br_elem, uint32_t nrules,
+ const unsigned char *collseqmb,
+ const char *collseqwc,
+ int_fast32_t table_size,
+ const int32_t *symb_table,
+ const unsigned char *extra)
+{
+ if (br_elem->type == SB_CHAR)
{
- if (br_elem->type == SB_CHAR)
- {
- /*
- if (MB_CUR_MAX == 1)
- */
- if (nrules == 0)
- return collseqmb[br_elem->opr.ch];
- else
- {
- wint_t wc = __btowc (br_elem->opr.ch);
- return __collseq_table_lookup (collseqwc, wc);
- }
- }
- else if (br_elem->type == MB_CHAR)
+ /* if (MB_CUR_MAX == 1) */
+ if (nrules == 0)
+ return collseqmb[br_elem->opr.ch];
+ else
{
- if (nrules != 0)
- return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+ wint_t wc = __btowc (br_elem->opr.ch);
+ return __collseq_table_lookup (collseqwc, wc);
}
- else if (br_elem->type == COLL_SYM)
+ }
+ else if (br_elem->type == MB_CHAR)
+ {
+ if (nrules != 0)
+ return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+ }
+ else if (br_elem->type == COLL_SYM)
+ {
+ size_t sym_name_len = strlen ((char *) br_elem->opr.name);
+ if (nrules != 0)
{
- size_t sym_name_len = strlen ((char *) br_elem->opr.name);
- if (nrules != 0)
+ int32_t elem, idx;
+ elem = seek_collating_symbol_entry (br_elem->opr.name,
+ sym_name_len,
+ symb_table, table_size,
+ extra);
+ if (elem != -1)
{
- int32_t elem, idx;
- elem = seek_collating_symbol_entry (br_elem->opr.name,
- sym_name_len);
- if (elem != -1)
- {
- /* We found the entry. */
- idx = symb_table[2 * elem + 1];
- /* Skip the name of collating element name. */
- idx += 1 + extra[idx];
- /* Skip the byte sequence of the collating element. */
- idx += 1 + extra[idx];
- /* Adjust for the alignment. */
- idx = (idx + 3) & ~3;
- /* Skip the multibyte collation sequence value. */
- idx += sizeof (unsigned int);
- /* Skip the wide char sequence of the collating element. */
- idx += sizeof (unsigned int) *
- (1 + *(unsigned int *) (extra + idx));
- /* Return the collation sequence value. */
- return *(unsigned int *) (extra + idx);
- }
- else if (sym_name_len == 1)
- {
- /* No valid character. Match it as a single byte
- character. */
- return collseqmb[br_elem->opr.name[0]];
- }
+ /* We found the entry. */
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ /* Skip the byte sequence of the collating element. */
+ idx += 1 + extra[idx];
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+ /* Skip the multibyte collation sequence value. */
+ idx += sizeof (unsigned int);
+ /* Skip the wide char sequence of the collating element. */
+ idx += sizeof (unsigned int) *
+ (1 + *(unsigned int *) (extra + idx));
+ /* Return the collation sequence value. */
+ return *(unsigned int *) (extra + idx);
}
else if (sym_name_len == 1)
- return collseqmb[br_elem->opr.name[0]];
+ {
+ /* No valid character. Match it as a single byte
+ character. */
+ return collseqmb[br_elem->opr.name[0]];
+ }
}
- return UINT_MAX;
+ else if (sym_name_len == 1)
+ return collseqmb[br_elem->opr.name[0]];
}
+ return UINT_MAX;
+}
- /* Local function for parse_bracket_exp used in _LIBC environment.
- Build the range expression which starts from START_ELEM, and ends
- at END_ELEM. The result are written to MBCSET and SBCSET.
- RANGE_ALLOC is the allocated size of mbcset->range_starts, and
- mbcset->range_ends, is a pointer argument since we may
- update it. */
+/* Local function for parse_bracket_exp used in _LIBC environment.
+ Build the range expression which starts from START_ELEM, and ends
+ at END_ELEM. The result are written to MBCSET and SBCSET.
+ RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+ mbcset->range_ends, is a pointer argument since we may
+ update it. */
+
+static __always_inline reg_errcode_t
+build_range_exp (bitset_t sbcset, re_charset_t *mbcset, Idx *range_alloc,
+ bracket_elem_t *start_elem, bracket_elem_t *end_elem,
+ re_dfa_t *dfa, reg_syntax_t syntax, uint32_t nrules,
+ const unsigned char *collseqmb, const char *collseqwc,
+ int_fast32_t table_size, const int32_t *symb_table,
+ const unsigned char *extra)
+{
+ unsigned int ch;
+ uint32_t start_collseq;
+ uint32_t end_collseq;
- auto inline reg_errcode_t
- __attribute__ ((always_inline))
- build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
- bracket_elem_t *start_elem, bracket_elem_t *end_elem)
- {
- unsigned int ch;
- uint32_t start_collseq;
- uint32_t end_collseq;
-
- /* Equivalence Classes and Character Classes can't be a range
- start/end. */
- if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
- || start_elem->type == CHAR_CLASS
- || end_elem->type == EQUIV_CLASS
- || end_elem->type == CHAR_CLASS))
- return REG_ERANGE;
+ /* Equivalence Classes and Character Classes can't be a range
+ start/end. */
+ if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
+ || start_elem->type == CHAR_CLASS
+ || end_elem->type == EQUIV_CLASS
+ || end_elem->type == CHAR_CLASS))
+ return REG_ERANGE;
- /* FIXME: Implement rational ranges here, too. */
- start_collseq = lookup_collation_sequence_value (start_elem);
- end_collseq = lookup_collation_sequence_value (end_elem);
- /* Check start/end collation sequence values. */
- if (__glibc_unlikely (start_collseq == UINT_MAX
- || end_collseq == UINT_MAX))
- return REG_ECOLLATE;
- if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
- && start_collseq > end_collseq))
- return REG_ERANGE;
+ /* FIXME: Implement rational ranges here, too. */
+ start_collseq = lookup_collation_sequence_value (start_elem, nrules, collseqmb, collseqwc,
+ table_size, symb_table, extra);
+ end_collseq = lookup_collation_sequence_value (end_elem, nrules, collseqmb, collseqwc,
+ table_size, symb_table, extra);
+ /* Check start/end collation sequence values. */
+ if (__glibc_unlikely (start_collseq == UINT_MAX
+ || end_collseq == UINT_MAX))
+ return REG_ECOLLATE;
+ if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
+ && start_collseq > end_collseq))
+ return REG_ERANGE;
- /* Got valid collation sequence values, add them as a new entry.
- However, if we have no collation elements, and the character set
- is single byte, the single byte character set that we
- build below suffices. */
- if (nrules > 0 || dfa->mb_cur_max > 1)
+ /* Got valid collation sequence values, add them as a new entry.
+ However, if we have no collation elements, and the character set
+ is single byte, the single byte character set that we
+ build below suffices. */
+ if (nrules > 0 || dfa->mb_cur_max > 1)
+ {
+ /* Check the space of the arrays. */
+ if (__glibc_unlikely (*range_alloc == mbcset->nranges))
{
- /* Check the space of the arrays. */
- if (__glibc_unlikely (*range_alloc == mbcset->nranges))
- {
- /* There is not enough space, need realloc. */
- uint32_t *new_array_start;
- uint32_t *new_array_end;
- Idx new_nranges;
-
- /* +1 in case of mbcset->nranges is 0. */
- new_nranges = 2 * mbcset->nranges + 1;
- new_array_start = re_realloc (mbcset->range_starts, uint32_t,
- new_nranges);
- new_array_end = re_realloc (mbcset->range_ends, uint32_t,
- new_nranges);
-
- if (__glibc_unlikely (new_array_start == NULL
- || new_array_end == NULL))
- return REG_ESPACE;
+ /* There is not enough space, need realloc. */
+ uint32_t *new_array_start;
+ uint32_t *new_array_end;
+ int new_nranges;
- mbcset->range_starts = new_array_start;
- mbcset->range_ends = new_array_end;
- *range_alloc = new_nranges;
- }
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+ new_nranges);
- mbcset->range_starts[mbcset->nranges] = start_collseq;
- mbcset->range_ends[mbcset->nranges++] = end_collseq;
- }
+ if (__glibc_unlikely (new_array_start == NULL
+ || new_array_end == NULL))
+ return REG_ESPACE;
- /* Build the table for single byte characters. */
- for (ch = 0; ch < SBC_MAX; ch++)
- {
- uint32_t ch_collseq;
- /*
- if (MB_CUR_MAX == 1)
- */
- if (nrules == 0)
- ch_collseq = collseqmb[ch];
- else
- ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
- if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
- bitset_set (sbcset, ch);
+ mbcset->range_starts = new_array_start;
+ mbcset->range_ends = new_array_end;
+ *range_alloc = new_nranges;
}
- return REG_NOERROR;
+
+ mbcset->range_starts[mbcset->nranges] = start_collseq;
+ mbcset->range_ends[mbcset->nranges++] = end_collseq;
}
- /* Local function for parse_bracket_exp used in _LIBC environment.
- Build the collating element which is represented by NAME.
- The result are written to MBCSET and SBCSET.
- COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
- pointer argument since we may update it. */
+ /* Build the table for single byte characters. */
+ for (ch = 0; ch < SBC_MAX; ch++)
+ {
+ uint32_t ch_collseq;
+ /* if (MB_CUR_MAX == 1) */
+ if (nrules == 0)
+ ch_collseq = collseqmb[ch];
+ else
+ ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
+ if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
+ bitset_set (sbcset, ch);
+ }
+ return REG_NOERROR;
+}
- auto inline reg_errcode_t
- __attribute__ ((always_inline))
- build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
- Idx *coll_sym_alloc, const unsigned char *name)
+/* Local function for parse_bracket_exp used in _LIBC environment.
+ Build the collating element which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+ pointer argument since we may update it. */
+
+static __always_inline reg_errcode_t
+build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
+ Idx *coll_sym_alloc, const unsigned char *name,
+ uint_fast32_t nrules, int_fast32_t table_size,
+ const int32_t *symb_table, const unsigned char *extra)
+{
+ int32_t elem, idx;
+ size_t name_len = strlen ((const char *) name);
+ if (nrules != 0)
{
- int32_t elem, idx;
- size_t name_len = strlen ((const char *) name);
- if (nrules != 0)
+ elem = seek_collating_symbol_entry (name, name_len, symb_table,
+ table_size, extra);
+ if (elem != -1)
{
- elem = seek_collating_symbol_entry (name, name_len);
- if (elem != -1)
- {
- /* We found the entry. */
- idx = symb_table[2 * elem + 1];
- /* Skip the name of collating element name. */
- idx += 1 + extra[idx];
- }
- else if (name_len == 1)
- {
- /* No valid character, treat it as a normal
- character. */
- bitset_set (sbcset, name[0]);
- return REG_NOERROR;
- }
- else
- return REG_ECOLLATE;
-
- /* Got valid collation sequence, add it as a new entry. */
- /* Check the space of the arrays. */
- if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
- {
- /* Not enough, realloc it. */
- /* +1 in case of mbcset->ncoll_syms is 0. */
- Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
- /* Use realloc since mbcset->coll_syms is NULL
- if *alloc == 0. */
- int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
- new_coll_sym_alloc);
- if (__glibc_unlikely (new_coll_syms == NULL))
- return REG_ESPACE;
- mbcset->coll_syms = new_coll_syms;
- *coll_sym_alloc = new_coll_sym_alloc;
- }
- mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+ /* We found the entry. */
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ }
+ else if (name_len == 1)
+ {
+ /* No valid character, treat it as a normal
+ character. */
+ bitset_set (sbcset, name[0]);
return REG_NOERROR;
}
else
+ return REG_ECOLLATE;
+
+ /* Got valid collation sequence, add it as a new entry. */
+ /* Check the space of the arrays. */
+ if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
{
- if (__glibc_unlikely (name_len != 1))
- return REG_ECOLLATE;
- else
- {
- bitset_set (sbcset, name[0]);
- return REG_NOERROR;
- }
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->ncoll_syms is 0. */
+ int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+ /* Use realloc since mbcset->coll_syms is NULL
+ if *alloc == 0. */
+ int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+ new_coll_sym_alloc);
+ if (__glibc_unlikely (new_coll_syms == NULL))
+ return REG_ESPACE;
+ mbcset->coll_syms = new_coll_syms;
+ *coll_sym_alloc = new_coll_sym_alloc;
}
+ mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+ return REG_NOERROR;
}
-#endif
+ else
+ {
+ if (__glibc_unlikely (name_len != 1))
+ return REG_ECOLLATE;
+ else
+ {
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
+ }
+ }
+}
+#endif /* _LIBC */
+
+/* This function parse bracket expression like "[abc]", "[a-c]",
+ "[[.a-a.]]" etc. */
+
+static bin_tree_t *
+parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
+ reg_syntax_t syntax, reg_errcode_t *err)
+{
+ const unsigned char *collseqmb = NULL;
+ const char *collseqwc = NULL;
+ uint_fast32_t nrules = 0;
+ int_fast32_t table_size = 0;
+ const void *symb_table = NULL;
+ const unsigned char *extra = NULL;
re_token_t br_token;
re_bitset_ptr_t sbcset;
-#ifdef RE_ENABLE_I18N
re_charset_t *mbcset;
Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
Idx equiv_class_alloc = 0, char_class_alloc = 0;
-#endif /* not RE_ENABLE_I18N */
bool non_match = false;
bin_tree_t *work_tree;
int token_len;
@@ -3111,26 +3040,17 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
*/
collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
- symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
- _NL_COLLATE_SYMB_TABLEMB);
+ symb_table = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_TABLEMB);
extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
_NL_COLLATE_SYMB_EXTRAMB);
}
#endif
sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
-#ifdef RE_ENABLE_I18N
mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
-#endif /* RE_ENABLE_I18N */
-#ifdef RE_ENABLE_I18N
if (__glibc_unlikely (sbcset == NULL || mbcset == NULL))
-#else
- if (__glibc_unlikely (sbcset == NULL))
-#endif /* RE_ENABLE_I18N */
{
re_free (sbcset);
-#ifdef RE_ENABLE_I18N
re_free (mbcset);
-#endif
*err = REG_ESPACE;
return NULL;
}
@@ -3143,9 +3063,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
}
if (token->type == OP_NON_MATCH_LIST)
{
-#ifdef RE_ENABLE_I18N
mbcset->non_match = 1;
-#endif /* not RE_ENABLE_I18N */
non_match = true;
if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
bitset_set (sbcset, '\n');
@@ -3228,18 +3146,10 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
token_len = peek_token_bracket (token, regexp, syntax);
-#ifdef _LIBC
*err = build_range_exp (sbcset, mbcset, &range_alloc,
- &start_elem, &end_elem);
-#else
-# ifdef RE_ENABLE_I18N
- *err = build_range_exp (syntax, sbcset,
- dfa->mb_cur_max > 1 ? mbcset : NULL,
- &range_alloc, &start_elem, &end_elem);
-# else
- *err = build_range_exp (syntax, sbcset, &start_elem, &end_elem);
-# endif
-#endif /* RE_ENABLE_I18N */
+ &start_elem, &end_elem,
+ dfa, syntax, nrules, collseqmb, collseqwc,
+ table_size, symb_table, extra);
if (__glibc_unlikely (*err != REG_NOERROR))
goto parse_bracket_exp_free_return;
}
@@ -3250,7 +3160,6 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
case SB_CHAR:
bitset_set (sbcset, start_elem.opr.ch);
break;
-#ifdef RE_ENABLE_I18N
case MB_CHAR:
/* Check whether the array has enough space. */
if (__glibc_unlikely (mbchar_alloc == mbcset->nmbchars))
@@ -3268,30 +3177,24 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
}
mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
break;
-#endif /* RE_ENABLE_I18N */
case EQUIV_CLASS:
*err = build_equiv_class (sbcset,
-#ifdef RE_ENABLE_I18N
mbcset, &equiv_class_alloc,
-#endif /* RE_ENABLE_I18N */
start_elem.opr.name);
if (__glibc_unlikely (*err != REG_NOERROR))
goto parse_bracket_exp_free_return;
break;
case COLL_SYM:
*err = build_collating_symbol (sbcset,
-#ifdef RE_ENABLE_I18N
mbcset, &coll_sym_alloc,
-#endif /* RE_ENABLE_I18N */
- start_elem.opr.name);
+ start_elem.opr.name,
+ nrules, table_size, symb_table, extra);
if (__glibc_unlikely (*err != REG_NOERROR))
goto parse_bracket_exp_free_return;
break;
case CHAR_CLASS:
*err = build_charclass (regexp->trans, sbcset,
-#ifdef RE_ENABLE_I18N
mbcset, &char_class_alloc,
-#endif /* RE_ENABLE_I18N */
(const char *) start_elem.opr.name,
syntax);
if (__glibc_unlikely (*err != REG_NOERROR))
@@ -3317,7 +3220,6 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
if (non_match)
bitset_not (sbcset);
-#ifdef RE_ENABLE_I18N
/* Ensure only single byte characters are set. */
if (dfa->mb_cur_max > 1)
bitset_mask (sbcset, dfa->sb_char);
@@ -3361,11 +3263,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
}
}
else
-#endif /* not RE_ENABLE_I18N */
{
-#ifdef RE_ENABLE_I18N
free_charset (mbcset);
-#endif
/* Build a tree for simple bracket. */
br_token.type = SIMPLE_BRACKET;
br_token.opr.sbcset = sbcset;
@@ -3379,9 +3278,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
*err = REG_ESPACE;
parse_bracket_exp_free_return:
re_free (sbcset);
-#ifdef RE_ENABLE_I18N
free_charset (mbcset);
-#endif /* RE_ENABLE_I18N */
return NULL;
}
@@ -3392,7 +3289,6 @@ parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
re_token_t *token, int token_len, re_dfa_t *dfa,
reg_syntax_t syntax, bool accept_hyphen)
{
-#ifdef RE_ENABLE_I18N
int cur_char_size;
cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
if (cur_char_size > 1)
@@ -3402,7 +3298,6 @@ parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp,
re_string_skip_bytes (regexp, cur_char_size);
return REG_NOERROR;
}
-#endif /* RE_ENABLE_I18N */
re_string_skip_bytes (regexp, token_len); /* Skip a token. */
if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS
|| token->type == OP_OPEN_EQUIV_CLASS)
@@ -3475,12 +3370,8 @@ parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp,
is a pointer argument since we may update it. */
static reg_errcode_t
-#ifdef RE_ENABLE_I18N
build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
Idx *equiv_class_alloc, const unsigned char *name)
-#else /* not RE_ENABLE_I18N */
-build_equiv_class (bitset_t sbcset, const unsigned char *name)
-#endif /* not RE_ENABLE_I18N */
{
#ifdef _LIBC
uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
@@ -3560,14 +3451,9 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
is a pointer argument since we may update it. */
static reg_errcode_t
-#ifdef RE_ENABLE_I18N
build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
re_charset_t *mbcset, Idx *char_class_alloc,
const char *class_name, reg_syntax_t syntax)
-#else /* not RE_ENABLE_I18N */
-build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
- const char *class_name, reg_syntax_t syntax)
-#endif /* not RE_ENABLE_I18N */
{
int i;
const char *name = class_name;
@@ -3578,7 +3464,6 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
&& (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))
name = "alpha";
-#ifdef RE_ENABLE_I18N
/* Check the space of the arrays. */
if (__glibc_unlikely (*char_class_alloc == mbcset->nchar_classes))
{
@@ -3594,7 +3479,6 @@ build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
*char_class_alloc = new_char_class_alloc;
}
mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
-#endif /* RE_ENABLE_I18N */
#define BUILD_CHARCLASS_LOOP(ctype_func) \
do { \
@@ -3649,10 +3533,8 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
reg_errcode_t *err)
{
re_bitset_ptr_t sbcset;
-#ifdef RE_ENABLE_I18N
re_charset_t *mbcset;
Idx alloc = 0;
-#endif /* not RE_ENABLE_I18N */
reg_errcode_t ret;
bin_tree_t *tree;
@@ -3662,7 +3544,6 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
*err = REG_ESPACE;
return NULL;
}
-#ifdef RE_ENABLE_I18N
mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
if (__glibc_unlikely (mbcset == NULL))
{
@@ -3671,21 +3552,14 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
return NULL;
}
mbcset->non_match = non_match;
-#endif /* RE_ENABLE_I18N */
/* We don't care the syntax in this case. */
- ret = build_charclass (trans, sbcset,
-#ifdef RE_ENABLE_I18N
- mbcset, &alloc,
-#endif /* RE_ENABLE_I18N */
- class_name, 0);
+ ret = build_charclass (trans, sbcset, mbcset, &alloc, class_name, 0);
if (__glibc_unlikely (ret != REG_NOERROR))
{
re_free (sbcset);
-#ifdef RE_ENABLE_I18N
free_charset (mbcset);
-#endif /* RE_ENABLE_I18N */
*err = ret;
return NULL;
}
@@ -3697,11 +3571,9 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
if (non_match)
bitset_not (sbcset);
-#ifdef RE_ENABLE_I18N
/* Ensure only single byte characters are set. */
if (dfa->mb_cur_max > 1)
bitset_mask (sbcset, dfa->sb_char);
-#endif
/* Build a tree for simple bracket. */
re_token_t br_token = { .type = SIMPLE_BRACKET, .opr.sbcset = sbcset };
@@ -3709,7 +3581,6 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
if (__glibc_unlikely (tree == NULL))
goto build_word_op_espace;
-#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
{
bin_tree_t *mbc_tree;
@@ -3730,15 +3601,10 @@ build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
free_charset (mbcset);
return tree;
}
-#else /* not RE_ENABLE_I18N */
- return tree;
-#endif /* not RE_ENABLE_I18N */
build_word_op_espace:
re_free (sbcset);
-#ifdef RE_ENABLE_I18N
free_charset (mbcset);
-#endif /* RE_ENABLE_I18N */
*err = REG_ESPACE;
return NULL;
}
@@ -3771,21 +3637,19 @@ fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax)
return num;
}
-#ifdef RE_ENABLE_I18N
static void
free_charset (re_charset_t *cset)
{
re_free (cset->mbchars);
-# ifdef _LIBC
+#ifdef _LIBC
re_free (cset->coll_syms);
re_free (cset->equiv_classes);
-# endif
+#endif
re_free (cset->range_starts);
re_free (cset->range_ends);
re_free (cset->char_classes);
re_free (cset);
}
-#endif /* RE_ENABLE_I18N */
/* Functions for binary tree operation. */
@@ -3851,13 +3715,10 @@ mark_opt_subexp (void *extra, bin_tree_t *node)
static void
free_token (re_token_t *node)
{
-#ifdef RE_ENABLE_I18N
if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
free_charset (node->opr.mbcset);
- else
-#endif /* RE_ENABLE_I18N */
- if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
- re_free (node->opr.sbcset);
+ else if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
+ re_free (node->opr.sbcset);
}
/* Worker function for tree walking. Free the allocated memory inside NODE