diff options
-rw-r--r-- | doc/sed.1 | 8 | ||||
-rw-r--r-- | lib/regcomp.c | 479 | ||||
-rw-r--r-- | lib/regex_.h | 114 | ||||
-rw-r--r-- | lib/regex_internal.c | 233 | ||||
-rw-r--r-- | lib/regex_internal.h | 161 | ||||
-rw-r--r-- | lib/regexec.c | 617 | ||||
-rw-r--r-- | po/nl.po | 179 |
7 files changed, 771 insertions, 1020 deletions
@@ -1,7 +1,7 @@ -.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.29. -.TH SED "1" "September 2005" "sed version 4.1.4" "User Commands" +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.28. +.TH SED "1" "December 2005" "sed version 4.1.4" "User Commands" .SH NAME -sed \- stream editor for filtering and transforming text +sed \- stream editor for filtering file content .SH SYNOPSIS .B sed [\fIOPTION\fR]... \fI{script-only-if-no-other-script} \fR[\fIinput-file\fR]... @@ -358,7 +358,7 @@ to the extent permitted by law. sed.info, any of various books on \*(sd, .na -the \*(sd FAQ (http://sed.sf.net/grabbag/tutorials/sedfaq.html), +the \*(sd FAQ (http://sed.sf.net/grabbag/tutorials/sedfaq.txt), http://sed.sf.net/grabbag/. .PP The full documentation for diff --git a/lib/regcomp.c b/lib/regcomp.c index c93f79e..d35ae5f 100644 --- a/lib/regcomp.c +++ b/lib/regcomp.c @@ -24,7 +24,6 @@ static void re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, char *fastmap); static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len); -static void init_word_char (re_dfa_t *dfa); #ifdef RE_ENABLE_I18N static void free_charset (re_charset_t *cset); #endif /* RE_ENABLE_I18N */ @@ -47,9 +46,6 @@ static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg, static reg_errcode_t calc_first (void *extra, bin_tree_t *node); static reg_errcode_t calc_next (void *extra, bin_tree_t *node); static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node); -static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node, - int top_clone_node, int root_node, - unsigned int constraint); static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint); static int search_duplicated_node (const re_dfa_t *dfa, int org_node, unsigned int constraint); @@ -59,12 +55,8 @@ static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, static reg_errcode_t calc_inveclosure (re_dfa_t *dfa); static int fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax); -static void fetch_token (re_token_t *result, re_string_t *input, - reg_syntax_t syntax); static int peek_token (re_token_t *token, re_string_t *input, - reg_syntax_t syntax); -static int peek_token_bracket (re_token_t *token, re_string_t *input, - reg_syntax_t syntax); + reg_syntax_t syntax) internal_function; static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, reg_errcode_t *err); static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, @@ -94,45 +86,27 @@ static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, re_token_t *token); -#ifndef _LIBC -# ifdef RE_ENABLE_I18N -static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset, - re_charset_t *mbcset, int *range_alloc, - bracket_elem_t *start_elem, - bracket_elem_t *end_elem); -static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, - re_charset_t *mbcset, - int *coll_sym_alloc, - const unsigned char *name); -# else /* not RE_ENABLE_I18N */ -static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset, - bracket_elem_t *start_elem, - bracket_elem_t *end_elem); -static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, - const unsigned char *name); -# endif /* not RE_ENABLE_I18N */ -#endif /* not _LIBC */ #ifdef RE_ENABLE_I18N -static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, +static reg_errcode_t build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, int *equiv_class_alloc, const unsigned char *name); -static reg_errcode_t build_charclass (unsigned RE_TRANSLATE_TYPE trans, - re_bitset_ptr_t sbcset, +static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, + bitset_t sbcset, re_charset_t *mbcset, int *char_class_alloc, const unsigned char *class_name, reg_syntax_t syntax); #else /* not RE_ENABLE_I18N */ -static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, +static reg_errcode_t build_equiv_class (bitset_t sbcset, const unsigned char *name); -static reg_errcode_t build_charclass (unsigned RE_TRANSLATE_TYPE trans, - re_bitset_ptr_t sbcset, +static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, + bitset_t sbcset, const unsigned char *class_name, reg_syntax_t syntax); #endif /* not RE_ENABLE_I18N */ static bin_tree_t *build_charclass_op (re_dfa_t *dfa, - unsigned RE_TRANSLATE_TYPE trans, + RE_TRANSLATE_TYPE trans, const unsigned char *class_name, const unsigned char *extra, int non_match, reg_errcode_t *err); @@ -325,10 +299,8 @@ re_set_fastmap (char *fastmap, int icase, int ch) Compile fastmap for the initial_state INIT_STATE. */ static void -re_compile_fastmap_iter (bufp, init_state, fastmap) - regex_t *bufp; - const re_dfastate_t *init_state; - char *fastmap; +re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, + char *fastmap) { re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; int node_cnt; @@ -354,7 +326,7 @@ re_compile_fastmap_iter (bufp, init_state, fastmap) && dfa->nodes[node].type == CHARACTER && dfa->nodes[node].mb_partial) *p++ = dfa->nodes[node].opr.c; - memset (&state, 0, sizeof (state)); + memset (&state, '\0', sizeof (state)); if (mbrtowc (&wc, (const char *) buf, p - buf, &state) == p - buf && (__wcrtomb ((char *) buf, towlower (wc), &state) @@ -365,11 +337,15 @@ re_compile_fastmap_iter (bufp, init_state, fastmap) } else if (type == SIMPLE_BRACKET) { - int i, j, ch; - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) - if (dfa->nodes[node].opr.sbcset[i] & (1u << j)) - re_set_fastmap (fastmap, icase, ch); + int i, ch; + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + { + int j; + bitset_word_t w = dfa->nodes[node].opr.sbcset[i]; + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + if (w & ((bitset_word_t) 1 << j)) + re_set_fastmap (fastmap, icase, ch); + } } #ifdef RE_ENABLE_I18N else if (type == COMPLEX_BRACKET) @@ -388,13 +364,11 @@ re_compile_fastmap_iter (bufp, init_state, fastmap) is a valid collation element, and don't catch 'b' since 'b' is the only collation element which starts from 'b'. */ - int j, ch; const int32_t *table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) - if (table[ch] < 0) - re_set_fastmap (fastmap, icase, ch); + for (i = 0; i < SBC_MAX; ++i) + if (table[i] < 0) + re_set_fastmap (fastmap, icase, i); } # else if (dfa->mb_cur_max > 1) @@ -581,14 +555,10 @@ weak_alias (__regerror, regerror) UTF-8 is used. Otherwise we would allocate memory just to initialize it the same all the time. UTF-8 is the preferred encoding so this is a worthwhile optimization. */ -static const bitset utf8_sb_map = +static const bitset_t utf8_sb_map = { /* Set the first 128 bits. */ -# if UINT_MAX == 0xffffffff - 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff -# else -# error "Add case for new unsigned int size" -# endif + [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX }; #endif @@ -737,11 +707,8 @@ libc_freeres_fn (free_mem) SYNTAX indicate regular expression's syntax. */ static reg_errcode_t -re_compile_internal (preg, pattern, length, syntax) - regex_t *preg; - const char * pattern; - size_t length; - reg_syntax_t syntax; +re_compile_internal (regex_t *preg, const char * pattern, size_t length, + reg_syntax_t syntax) { reg_errcode_t err = REG_NOERROR; re_dfa_t *dfa; @@ -839,9 +806,7 @@ re_compile_internal (preg, pattern, length, syntax) as the initial length of some arrays. */ static reg_errcode_t -init_dfa (dfa, pat_len) - re_dfa_t *dfa; - size_t pat_len; +init_dfa (re_dfa_t *dfa, size_t pat_len) { unsigned int table_size; #ifndef _LIBC @@ -908,20 +873,17 @@ init_dfa (dfa, pat_len) { int i, j, ch; - dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1); + dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); if (BE (dfa->sb_char == NULL, 0)) return REG_ESPACE; - /* Clear all bits by, then set those corresponding to single - byte chars. */ - bitset_empty (dfa->sb_char); - - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) + /* Set the bits corresponding to single byte chars. */ + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) { wint_t wch = __btowc (ch); if (wch != WEOF) - dfa->sb_char[i] |= 1u << j; + dfa->sb_char[i] |= (bitset_word_t) 1 << j; # ifndef _LIBC if (isascii (ch) && wch != ch) dfa->map_notascii = 1; @@ -941,22 +903,21 @@ init_dfa (dfa, pat_len) character used by some operators like "\<", "\>", etc. */ static void -init_word_char (dfa) - re_dfa_t *dfa; +internal_function +init_word_char (re_dfa_t *dfa) { int i, j, ch; dfa->word_ops_used = 1; - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) if (isalnum (ch) || ch == '_') - dfa->word_char[i] |= 1u << j; + dfa->word_char[i] |= (bitset_word_t) 1 << j; } /* Free the work area which are only used while compiling. */ static void -free_workarea_compile (preg) - regex_t *preg; +free_workarea_compile (regex_t *preg) { re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_storage_t *storage, *next; @@ -975,8 +936,7 @@ free_workarea_compile (preg) /* Create initial states for all contexts. */ static reg_errcode_t -create_initial_state (dfa) - re_dfa_t *dfa; +create_initial_state (re_dfa_t *dfa) { int first, i; reg_errcode_t err; @@ -1058,8 +1018,7 @@ create_initial_state (dfa) DFA nodes where needed. */ static void -optimize_utf8 (dfa) - re_dfa_t *dfa; +optimize_utf8 (re_dfa_t *dfa) { int node, i, mb_chars = 0, has_period = 0; @@ -1096,8 +1055,9 @@ optimize_utf8 (dfa) case COMPLEX_BRACKET: return; case SIMPLE_BRACKET: - /* Just double check. */ - for (i = 0x80 / UINT_BITS; i < BITSET_UINTS; ++i) + /* Just double check. The non-ASCII range starts at 0x80. */ + assert (0x80 % BITSET_WORD_BITS == 0); + for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i) if (dfa->nodes[node].opr.sbcset[i]) return; break; @@ -1126,8 +1086,7 @@ optimize_utf8 (dfa) "eclosure", and "inveclosure". */ static reg_errcode_t -analyze (preg) - regex_t *preg; +analyze (regex_t *preg) { re_dfa_t *dfa = (re_dfa_t *) preg->buffer; reg_errcode_t ret; @@ -1190,10 +1149,8 @@ analyze (preg) implement parse tree visits. Instead, we use parent pointers and some hairy code in these two functions. */ static reg_errcode_t -postorder (root, fn, extra) - bin_tree_t *root; - reg_errcode_t (fn (void *, bin_tree_t *)); - void *extra; +postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra) { bin_tree_t *node, *prev; @@ -1224,10 +1181,8 @@ postorder (root, fn, extra) } static reg_errcode_t -preorder (root, fn, extra) - bin_tree_t *root; - reg_errcode_t (fn (void *, bin_tree_t *)); - void *extra; +preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), + void *extra) { bin_tree_t *node; @@ -1259,9 +1214,7 @@ preorder (root, fn, extra) re_search_internal to map the inner one's opr.idx to this one's. Adjust backreferences as well. Requires a preorder visit. */ static reg_errcode_t -optimize_subexps (extra, node) - void *extra; - bin_tree_t *node; +optimize_subexps (void *extra, bin_tree_t *node) { re_dfa_t *dfa = (re_dfa_t *) extra; @@ -1282,8 +1235,8 @@ optimize_subexps (extra, node) node->left->parent = node; dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; - if (other_idx < CHAR_BIT * sizeof dfa->used_bkref_map) - dfa->used_bkref_map &= ~(1u << other_idx); + if (other_idx < BITSET_WORD_BITS) + dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx); } return REG_NOERROR; @@ -1292,9 +1245,7 @@ optimize_subexps (extra, node) /* Lowering pass: Turn each SUBEXP node into the appropriate concatenation of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */ static reg_errcode_t -lower_subexps (extra, node) - void *extra; - bin_tree_t *node; +lower_subexps (void *extra, bin_tree_t *node) { regex_t *preg = (regex_t *) extra; reg_errcode_t err = REG_NOERROR; @@ -1316,10 +1267,7 @@ lower_subexps (extra, node) } static bin_tree_t * -lower_subexp (err, preg, node) - reg_errcode_t *err; - regex_t *preg; - bin_tree_t *node; +lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node) { re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *body = node->left; @@ -1331,8 +1279,9 @@ lower_subexp (err, preg, node) very common, so we do not lose much. An example that triggers this case is the sed "script" /\(\)/x. */ && node->left != NULL - && (node->token.opr.idx >= CHAR_BIT * sizeof dfa->used_bkref_map - || !(dfa->used_bkref_map & (1u << node->token.opr.idx)))) + && (node->token.opr.idx >= BITSET_WORD_BITS + || !(dfa->used_bkref_map + & ((bitset_word_t) 1 << node->token.opr.idx)))) return node->left; /* Convert the SUBEXP node to the concatenation of an @@ -1355,9 +1304,7 @@ lower_subexp (err, preg, node) /* Pass 1 in building the NFA: compute FIRST and create unlinked automaton nodes. Requires a postorder visit. */ static reg_errcode_t -calc_first (extra, node) - void *extra; - bin_tree_t *node; +calc_first (void *extra, bin_tree_t *node) { re_dfa_t *dfa = (re_dfa_t *) extra; if (node->token.type == CONCAT) @@ -1377,9 +1324,7 @@ calc_first (extra, node) /* Pass 2: compute NEXT on the tree. Preorder visit. */ static reg_errcode_t -calc_next (extra, node) - void *extra; - bin_tree_t *node; +calc_next (void *extra, bin_tree_t *node) { switch (node->token.type) { @@ -1402,9 +1347,7 @@ calc_next (extra, node) /* Pass 3: link all DFA nodes to their NEXT node (any order will do). */ static reg_errcode_t -link_nfa_nodes (extra, node) - void *extra; - bin_tree_t *node; +link_nfa_nodes (void *extra, bin_tree_t *node) { re_dfa_t *dfa = (re_dfa_t *) extra; int idx = node->node_idx; @@ -1464,11 +1407,9 @@ link_nfa_nodes (extra, node) to their own constraint. */ static reg_errcode_t -duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node, - init_constraint) - re_dfa_t *dfa; - int top_org_node, top_clone_node, root_node; - unsigned int init_constraint; +internal_function +duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node, + int root_node, unsigned int init_constraint) { int org_node, clone_node, ret; unsigned int constraint = init_constraint; @@ -1578,10 +1519,8 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node, satisfies the constraint CONSTRAINT. */ static int -search_duplicated_node (dfa, org_node, constraint) - const re_dfa_t *dfa; - int org_node; - unsigned int constraint; +search_duplicated_node (const re_dfa_t *dfa, int org_node, + unsigned int constraint) { int idx; for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx) @@ -1598,10 +1537,7 @@ search_duplicated_node (dfa, org_node, constraint) available. */ static int -duplicate_node (dfa, org_idx, constraint) - re_dfa_t *dfa; - int org_idx; - unsigned int constraint; +duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint) { int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); if (BE (dup_idx != -1, 1)) @@ -1618,8 +1554,7 @@ duplicate_node (dfa, org_idx, constraint) } static reg_errcode_t -calc_inveclosure (dfa) - re_dfa_t *dfa; +calc_inveclosure (re_dfa_t *dfa) { int src, idx, ret; for (idx = 0; idx < dfa->nodes_len; ++idx) @@ -1642,8 +1577,7 @@ calc_inveclosure (dfa) /* Calculate "eclosure" for all the node in DFA. */ static reg_errcode_t -calc_eclosure (dfa) - re_dfa_t *dfa; +calc_eclosure (re_dfa_t *dfa) { int node_idx, incomplete; #ifdef DEBUG @@ -1687,10 +1621,7 @@ calc_eclosure (dfa) /* Calculate epsilon closure of NODE. */ static reg_errcode_t -calc_eclosure_iter (new_set, dfa, node, root) - re_node_set *new_set; - re_dfa_t *dfa; - int node, root; +calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root) { reg_errcode_t err; unsigned int constraint; @@ -1770,10 +1701,8 @@ calc_eclosure_iter (new_set, dfa, node, root) We must not use this function inside bracket expressions. */ static void -fetch_token (result, input, syntax) - re_token_t *result; - re_string_t *input; - reg_syntax_t syntax; +internal_function +fetch_token (re_token_t *result, re_string_t *input, reg_syntax_t syntax) { re_string_skip_bytes (input, peek_token (result, input, syntax)); } @@ -1782,10 +1711,8 @@ fetch_token (result, input, syntax) We must not use this function inside bracket expressions. */ static int -peek_token (token, input, syntax) - re_token_t *token; - re_string_t *input; - reg_syntax_t syntax; +internal_function +peek_token (re_token_t *token, re_string_t *input, reg_syntax_t syntax) { unsigned char c; @@ -2023,10 +1950,8 @@ peek_token (token, input, syntax) We must not use this function out of bracket expressions. */ static int -peek_token_bracket (token, input, syntax) - re_token_t *token; - re_string_t *input; - reg_syntax_t syntax; +internal_function +peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) { unsigned char c; if (re_string_eoi (input)) @@ -2122,11 +2047,8 @@ peek_token_bracket (token, input, syntax) EOR means end of regular expression. */ static bin_tree_t * -parse (regexp, preg, syntax, err) - re_string_t *regexp; - regex_t *preg; - reg_syntax_t syntax; - reg_errcode_t *err; +parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, + reg_errcode_t *err) { re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *tree, *eor, *root; @@ -2159,13 +2081,8 @@ parse (regexp, preg, syntax, err) ALT means alternative, which represents the operator `|'. */ static bin_tree_t * -parse_reg_exp (regexp, preg, token, syntax, nest, err) - re_string_t *regexp; - regex_t *preg; - re_token_t *token; - reg_syntax_t syntax; - int nest; - reg_errcode_t *err; +parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) { re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *tree, *branch = NULL; @@ -2205,13 +2122,8 @@ parse_reg_exp (regexp, preg, token, syntax, nest, err) CAT means concatenation. */ static bin_tree_t * -parse_branch (regexp, preg, token, syntax, nest, err) - re_string_t *regexp; - regex_t *preg; - re_token_t *token; - reg_syntax_t syntax; - int nest; - reg_errcode_t *err; +parse_branch (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) { bin_tree_t *tree, *exp; re_dfa_t *dfa = (re_dfa_t *) preg->buffer; @@ -2250,13 +2162,8 @@ parse_branch (regexp, preg, token, syntax, nest, err) */ static bin_tree_t * -parse_expression (regexp, preg, token, syntax, nest, err) - re_string_t *regexp; - regex_t *preg; - re_token_t *token; - reg_syntax_t syntax; - int nest; - reg_errcode_t *err; +parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) { re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *tree; @@ -2471,13 +2378,8 @@ parse_expression (regexp, preg, token, syntax, nest, err) */ static bin_tree_t * -parse_sub_exp (regexp, preg, token, syntax, nest, err) - re_string_t *regexp; - regex_t *preg; - re_token_t *token; - reg_syntax_t syntax; - int nest; - reg_errcode_t *err; +parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, + reg_syntax_t syntax, int nest, reg_errcode_t *err) { re_dfa_t *dfa = (re_dfa_t *) preg->buffer; bin_tree_t *tree; @@ -2514,13 +2416,8 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err) /* This function parse repetition operators like "*", "+", "{1,3}" etc. */ static bin_tree_t * -parse_dup_op (elem, regexp, dfa, token, syntax, err) - bin_tree_t *elem; - re_string_t *regexp; - re_dfa_t *dfa; - re_token_t *token; - reg_syntax_t syntax; - reg_errcode_t *err; +parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, + re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err) { bin_tree_t *tree = NULL, *old_tree = NULL; int i, start, end, start_idx = re_string_cur_idx (regexp); @@ -2659,15 +2556,14 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err) update it. */ static reg_errcode_t +internal_function # ifdef RE_ENABLE_I18N -build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) - re_charset_t *mbcset; - int *range_alloc; +build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, + bracket_elem_t *start_elem, bracket_elem_t *end_elem) # else /* not RE_ENABLE_I18N */ -build_range_exp (sbcset, start_elem, end_elem) +build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, + bracket_elem_t *end_elem) # endif /* not RE_ENABLE_I18N */ - re_bitset_ptr_t sbcset; - bracket_elem_t *start_elem, *end_elem; { unsigned int start_ch, end_ch; /* Equivalence Classes and Character Classes can't be a range start/end. */ @@ -2781,15 +2677,13 @@ build_range_exp (sbcset, start_elem, end_elem) pointer argument since we may update it. */ static reg_errcode_t +internal_function # ifdef RE_ENABLE_I18N -build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) - re_charset_t *mbcset; - int *coll_sym_alloc; +build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, + int *coll_sym_alloc, const unsigned char *name) # else /* not RE_ENABLE_I18N */ -build_collating_symbol (sbcset, name) +build_collating_symbol (bitset_t sbcset, const unsigned char *name) # endif /* not RE_ENABLE_I18N */ - re_bitset_ptr_t sbcset; - const unsigned char *name; { size_t name_len = strlen ((const char *) name); if (BE (name_len != 1, 0)) @@ -2806,12 +2700,8 @@ build_collating_symbol (sbcset, name) "[[.a-a.]]" etc. */ static bin_tree_t * -parse_bracket_exp (regexp, dfa, token, syntax, err) - re_string_t *regexp; - re_dfa_t *dfa; - re_token_t *token; - reg_syntax_t syntax; - reg_errcode_t *err; +parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, + reg_syntax_t syntax, reg_errcode_t *err) { #ifdef _LIBC const unsigned char *collseqmb; @@ -2833,23 +2723,28 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) { int32_t hash = elem_hash ((const char *) name, name_len); int32_t elem = hash % table_size; - int32_t second = hash % (table_size - 2); - while (symb_table[2 * elem] != 0) - { - /* First compare the hashing value. */ - if (symb_table[2 * elem] == hash - /* Compare the length of the name. */ - && name_len == extra[symb_table[2 * elem + 1]] - /* Compare the name. */ - && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], - name_len) == 0) + if (symb_table[2 * elem] != 0) + { + int32_t second = hash % (table_size - 2) + 1; + + do { - /* Yep, this is the entry. */ - break; - } + /* First compare the hashing value. */ + if (symb_table[2 * elem] == hash + /* Compare the length of the name. */ + && name_len == extra[symb_table[2 * elem + 1]] + /* Compare the name. */ + && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], + name_len) == 0) + { + /* Yep, this is the entry. */ + break; + } - /* Next entry. */ - elem += second; + /* Next entry. */ + elem += second; + } + while (symb_table[2 * elem] != 0); } return elem; } @@ -2931,7 +2826,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) re_charset_t *mbcset; int *range_alloc; - re_bitset_ptr_t sbcset; + bitset_t sbcset; bracket_elem_t *start_elem, *end_elem; { unsigned int ch; @@ -3014,7 +2909,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) re_charset_t *mbcset; int *coll_sym_alloc; - re_bitset_ptr_t sbcset; + bitset_t sbcset; const unsigned char *name; { int32_t elem, idx; @@ -3091,7 +2986,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) /* if (MB_CUR_MAX > 1) */ - collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); + collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_TABLEMB); @@ -3099,7 +2994,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) _NL_COLLATE_SYMB_EXTRAMB); } #endif - sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS); + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); #ifdef RE_ENABLE_I18N mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); #endif /* RE_ENABLE_I18N */ @@ -3309,12 +3204,12 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); if (BE (mbc_tree == NULL, 0)) goto parse_bracket_exp_espace; - for (sbc_idx = 0; sbc_idx < BITSET_UINTS; ++sbc_idx) + for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx) if (sbcset[sbc_idx]) break; /* If there are no bits set in sbcset, there is no point of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ - if (sbc_idx < BITSET_UINTS) + if (sbc_idx < BITSET_WORDS) { /* Build a tree for simple bracket. */ br_token.type = SIMPLE_BRACKET; @@ -3362,15 +3257,9 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) /* Parse an element in the bracket expression. */ static reg_errcode_t -parse_bracket_element (elem, regexp, token, token_len, dfa, syntax, - accept_hyphen) - bracket_elem_t *elem; - re_string_t *regexp; - re_token_t *token; - int token_len; - re_dfa_t *dfa; - reg_syntax_t syntax; - int accept_hyphen; +parse_bracket_element (bracket_elem_t *elem, re_string_t *regexp, + re_token_t *token, int token_len, re_dfa_t *dfa, + reg_syntax_t syntax, int accept_hyphen) { #ifdef RE_ENABLE_I18N int cur_char_size; @@ -3408,10 +3297,8 @@ parse_bracket_element (elem, regexp, token, token_len, dfa, syntax, [=<equivalent_class>=]. */ static reg_errcode_t -parse_bracket_symbol (elem, regexp, token) - bracket_elem_t *elem; - re_string_t *regexp; - re_token_t *token; +parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, + re_token_t *token) { unsigned char ch, delim = token->opr.c; int i = 0; @@ -3458,16 +3345,13 @@ parse_bracket_symbol (elem, regexp, token) static reg_errcode_t #ifdef RE_ENABLE_I18N -build_equiv_class (sbcset, mbcset, equiv_class_alloc, name) - re_charset_t *mbcset; - int *equiv_class_alloc; +build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, + int *equiv_class_alloc, const unsigned char *name) #else /* not RE_ENABLE_I18N */ -build_equiv_class (sbcset, name) +build_equiv_class (bitset_t sbcset, const unsigned char *name) #endif /* not RE_ENABLE_I18N */ - re_bitset_ptr_t sbcset; - const unsigned char *name; { -#if defined _LIBC +#ifdef _LIBC uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); if (nrules != 0) { @@ -3553,16 +3437,13 @@ build_equiv_class (sbcset, name) static reg_errcode_t #ifdef RE_ENABLE_I18N -build_charclass (trans, sbcset, mbcset, char_class_alloc, class_name, syntax) - re_charset_t *mbcset; - int *char_class_alloc; +build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, + re_charset_t *mbcset, int *char_class_alloc, + const unsigned char *class_name, reg_syntax_t syntax) #else /* not RE_ENABLE_I18N */ -build_charclass (trans, sbcset, class_name, syntax) +build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset, + const unsigned char *class_name, reg_syntax_t syntax) #endif /* not RE_ENABLE_I18N */ - unsigned RE_TRANSLATE_TYPE trans; - re_bitset_ptr_t sbcset; - const unsigned char *class_name; - reg_syntax_t syntax; { int i; const char *name = (const char *) class_name; @@ -3592,39 +3473,45 @@ build_charclass (trans, sbcset, class_name, syntax) #endif /* RE_ENABLE_I18N */ #define BUILD_CHARCLASS_LOOP(ctype_func) \ - for (i = 0; i < SBC_MAX; ++i) \ + do { \ + if (BE (trans != NULL, 0)) \ { \ - if (ctype_func (i)) \ - { \ - int ch = trans ? trans[i] : i; \ - bitset_set (sbcset, ch); \ - } \ - } + for (i = 0; i < SBC_MAX; ++i) \ + if (ctype_func (i)) \ + bitset_set (sbcset, trans[i]); \ + } \ + else \ + { \ + for (i = 0; i < SBC_MAX; ++i) \ + if (ctype_func (i)) \ + bitset_set (sbcset, i); \ + } \ + } while (0) if (strcmp (name, "alnum") == 0) - BUILD_CHARCLASS_LOOP (isalnum) + BUILD_CHARCLASS_LOOP (isalnum); else if (strcmp (name, "cntrl") == 0) - BUILD_CHARCLASS_LOOP (iscntrl) + BUILD_CHARCLASS_LOOP (iscntrl); else if (strcmp (name, "lower") == 0) - BUILD_CHARCLASS_LOOP (islower) + BUILD_CHARCLASS_LOOP (islower); else if (strcmp (name, "space") == 0) - BUILD_CHARCLASS_LOOP (isspace) + BUILD_CHARCLASS_LOOP (isspace); else if (strcmp (name, "alpha") == 0) - BUILD_CHARCLASS_LOOP (isalpha) + BUILD_CHARCLASS_LOOP (isalpha); else if (strcmp (name, "digit") == 0) - BUILD_CHARCLASS_LOOP (isdigit) + BUILD_CHARCLASS_LOOP (isdigit); else if (strcmp (name, "print") == 0) - BUILD_CHARCLASS_LOOP (isprint) + BUILD_CHARCLASS_LOOP (isprint); else if (strcmp (name, "upper") == 0) - BUILD_CHARCLASS_LOOP (isupper) + BUILD_CHARCLASS_LOOP (isupper); else if (strcmp (name, "blank") == 0) - BUILD_CHARCLASS_LOOP (isblank) + BUILD_CHARCLASS_LOOP (isblank); else if (strcmp (name, "graph") == 0) - BUILD_CHARCLASS_LOOP (isgraph) + BUILD_CHARCLASS_LOOP (isgraph); else if (strcmp (name, "punct") == 0) - BUILD_CHARCLASS_LOOP (ispunct) + BUILD_CHARCLASS_LOOP (ispunct); else if (strcmp (name, "xdigit") == 0) - BUILD_CHARCLASS_LOOP (isxdigit) + BUILD_CHARCLASS_LOOP (isxdigit); else return REG_ECTYPE; @@ -3632,13 +3519,10 @@ build_charclass (trans, sbcset, class_name, syntax) } static bin_tree_t * -build_charclass_op (dfa, trans, class_name, extra, non_match, err) - re_dfa_t *dfa; - unsigned RE_TRANSLATE_TYPE trans; - const unsigned char *class_name; - const unsigned char *extra; - int non_match; - reg_errcode_t *err; +build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, + const unsigned char *class_name, + const unsigned char *extra, int non_match, + reg_errcode_t *err) { re_bitset_ptr_t sbcset; #ifdef RE_ENABLE_I18N @@ -3649,7 +3533,7 @@ build_charclass_op (dfa, trans, class_name, extra, non_match, err) re_token_t br_token; bin_tree_t *tree; - sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS); + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); #ifdef RE_ENABLE_I18N mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); #endif /* RE_ENABLE_I18N */ @@ -3752,10 +3636,7 @@ build_charclass_op (dfa, trans, class_name, extra, non_match, err) Return -2, If an error is occured. */ static int -fetch_number (input, token, syntax) - re_string_t *input; - re_token_t *token; - reg_syntax_t syntax; +fetch_number (re_string_t *input, re_token_t *token, reg_syntax_t syntax) { int num = -1; unsigned char c; @@ -3795,11 +3676,8 @@ free_charset (re_charset_t *cset) /* Create a tree node. */ static bin_tree_t * -create_tree (dfa, left, right, type) - re_dfa_t *dfa; - bin_tree_t *left; - bin_tree_t *right; - re_token_type_t type; +create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, + re_token_type_t type) { re_token_t t; t.type = type; @@ -3807,11 +3685,8 @@ create_tree (dfa, left, right, type) } static bin_tree_t * -create_token_tree (dfa, left, right, token) - re_dfa_t *dfa; - bin_tree_t *left; - bin_tree_t *right; - const re_token_t *token; +create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right, + const re_token_t *token) { bin_tree_t *tree; if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0)) @@ -3847,9 +3722,7 @@ create_token_tree (dfa, left, right, token) To be called from preorder or postorder. */ static reg_errcode_t -mark_opt_subexp (extra, node) - void *extra; - bin_tree_t *node; +mark_opt_subexp (void *extra, bin_tree_t *node) { int idx = (int) (long) extra; if (node->token.type == SUBEXP && node->token.opr.idx == idx) @@ -3889,9 +3762,7 @@ free_tree (void *extra, bin_tree_t *node) it's easier to duplicate. */ static bin_tree_t * -duplicate_tree (root, dfa) - const bin_tree_t *root; - re_dfa_t *dfa; +duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa) { const bin_tree_t *node; bin_tree_t *dup_root; diff --git a/lib/regex_.h b/lib/regex_.h index e5ec398..f6c145d 100644 --- a/lib/regex_.h +++ b/lib/regex_.h @@ -346,75 +346,71 @@ typedef enum private to the regex routines. */ #ifndef RE_TRANSLATE_TYPE -# define RE_TRANSLATE_TYPE char * +# define RE_TRANSLATE_TYPE unsigned char * #endif struct re_pattern_buffer { -/* [[[begin pattern_buffer]]] */ - /* Space that holds the compiled pattern. It is declared as - `unsigned char *' because its elements are - sometimes used as array indexes. */ + /* Space that holds the compiled pattern. It is declared as + `unsigned char *' because its elements are sometimes used as + array indexes. */ unsigned char *buffer; - /* Number of bytes to which `buffer' points. */ + /* Number of bytes to which `buffer' points. */ unsigned long int allocated; - /* Number of bytes actually used in `buffer'. */ + /* Number of bytes actually used in `buffer'. */ unsigned long int used; - /* Syntax setting with which the pattern was compiled. */ + /* Syntax setting with which the pattern was compiled. */ reg_syntax_t syntax; - /* Pointer to a fastmap, if any, otherwise zero. re_search uses - the fastmap, if there is one, to skip over impossible - starting points for matches. */ + /* Pointer to a fastmap, if any, otherwise zero. re_search uses the + fastmap, if there is one, to skip over impossible starting points + for matches. */ char *fastmap; - /* Either a translate table to apply to all characters before - comparing them, or zero for no translation. The translation - is applied to a pattern when it is compiled and to a string - when it is matched. */ + /* Either a translate table to apply to all characters before + comparing them, or zero for no translation. The translation is + applied to a pattern when it is compiled and to a string when it + is matched. */ RE_TRANSLATE_TYPE translate; - /* Number of subexpressions found by the compiler. */ + /* Number of subexpressions found by the compiler. */ size_t re_nsub; - /* Zero if this pattern cannot match the empty string, one else. - Well, in truth it's used only in `re_search_2', to see - whether or not we should use the fastmap, so we don't set - this absolutely perfectly; see `re_compile_fastmap' (the - `duplicate' case). */ + /* Zero if this pattern cannot match the empty string, one else. + Well, in truth it's used only in `re_search_2', to see whether or + not we should use the fastmap, so we don't set this absolutely + perfectly; see `re_compile_fastmap' (the `duplicate' case). */ unsigned can_be_null : 1; - /* If REGS_UNALLOCATED, allocate space in the `regs' structure - for `max (RE_NREGS, re_nsub + 1)' groups. - If REGS_REALLOCATE, reallocate space if necessary. - If REGS_FIXED, use what's there. */ + /* If REGS_UNALLOCATED, allocate space in the `regs' structure + for `max (RE_NREGS, re_nsub + 1)' groups. + If REGS_REALLOCATE, reallocate space if necessary. + If REGS_FIXED, use what's there. */ #define REGS_UNALLOCATED 0 #define REGS_REALLOCATE 1 #define REGS_FIXED 2 unsigned regs_allocated : 2; - /* Set to zero when `regex_compile' compiles a pattern; set to one - by `re_compile_fastmap' if it updates the fastmap. */ + /* Set to zero when `regex_compile' compiles a pattern; set to one + by `re_compile_fastmap' if it updates the fastmap. */ unsigned fastmap_accurate : 1; - /* If set, `re_match_2' does not return information about - subexpressions. */ + /* If set, `re_match_2' does not return information about + subexpressions. */ unsigned no_sub : 1; - /* If set, a beginning-of-line anchor doesn't match at the - beginning of the string. */ + /* If set, a beginning-of-line anchor doesn't match at the beginning + of the string. */ unsigned not_bol : 1; - /* Similarly for an end-of-line anchor. */ + /* Similarly for an end-of-line anchor. */ unsigned not_eol : 1; - /* If true, an anchor at a newline matches. */ + /* If true, an anchor at a newline matches. */ unsigned newline_anchor : 1; - -/* [[[end pattern_buffer]]] */ }; typedef struct re_pattern_buffer regex_t; @@ -454,19 +450,19 @@ typedef struct /* Sets the current default syntax to SYNTAX, and return the old syntax. You can also simply assign to the `re_syntax_options' variable. */ -extern reg_syntax_t re_set_syntax (reg_syntax_t syntax); +extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); /* Compile the regular expression PATTERN, with length LENGTH and syntax given by the global `re_syntax_options', into the buffer BUFFER. Return NULL if successful, and an error string if not. */ -extern const char *re_compile_pattern (const char *pattern, size_t length, - struct re_pattern_buffer *buffer); +extern const char *re_compile_pattern (const char *__pattern, size_t __length, + struct re_pattern_buffer *__buffer); /* Compile a fastmap for the compiled pattern in BUFFER; used to accelerate searches. Return 0 if successful and -2 if was an internal error. */ -extern int re_compile_fastmap (struct re_pattern_buffer *buffer); +extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); /* Search in the string STRING (with length LENGTH) for the pattern @@ -474,29 +470,30 @@ extern int re_compile_fastmap (struct re_pattern_buffer *buffer); characters. Return the starting position of the match, -1 for no match, or -2 for an internal error. Also return register information in REGS (if REGS and BUFFER->no_sub are nonzero). */ -extern int re_search (struct re_pattern_buffer *buffer, const char *string, - int length, int start, int range, - struct re_registers *regs); +extern int re_search (struct re_pattern_buffer *__buffer, const char *__string, + int __length, int __start, int __range, + struct re_registers *__regs); /* Like `re_search', but search in the concatenation of STRING1 and STRING2. Also, stop searching at index START + STOP. */ -extern int re_search_2 (struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, int range, struct re_registers *regs, - int stop); +extern int re_search_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + int __range, struct re_registers *__regs, int __stop); /* Like `re_search', but return how many characters in STRING the regexp in BUFFER matched, starting at position START. */ -extern int re_match (struct re_pattern_buffer *buffer, const char *string, - int length, int start, struct re_registers *regs); +extern int re_match (struct re_pattern_buffer *__buffer, const char *__string, + int __length, int __start, struct re_registers *__regs); /* Relates to `re_match' as `re_search_2' relates to `re_search'. */ -extern int re_match_2 (struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, struct re_registers *regs, int stop); +extern int re_match_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + struct re_registers *__regs, int __stop); /* Set REGS to hold NUM_REGS registers, storing them in STARTS and @@ -511,9 +508,10 @@ extern int re_match_2 (struct re_pattern_buffer *buffer, const char *string1, Unless this function is called, the first search or match using PATTERN_BUFFER will allocate its own register data, without freeing the old data. */ -extern void re_set_registers (struct re_pattern_buffer *buffer, - struct re_registers *regs, unsigned num_regs, - regoff_t *starts, regoff_t *ends); +extern void re_set_registers (struct re_pattern_buffer *__buffer, + struct re_registers *__regs, + unsigned int __num_regs, + regoff_t *__starts, regoff_t *__ends); #if defined _REGEX_RE_COMP || defined _LIBC # ifndef _CRAY @@ -564,11 +562,3 @@ extern void regfree (regex_t *__preg); #endif /* C++ */ #endif /* regex.h */ - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/lib/regex_internal.c b/lib/regex_internal.c index 240e887..a0b3fa7 100644 --- a/lib/regex_internal.c +++ b/lib/regex_internal.c @@ -22,13 +22,6 @@ static void re_string_construct_common (const char *str, int len, re_string_t *pstr, RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa) internal_function; -#ifdef RE_ENABLE_I18N -static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx, - wint_t *last_wc) internal_function; -#endif /* RE_ENABLE_I18N */ -static reg_errcode_t register_state (const re_dfa_t *dfa, - re_dfastate_t *newstate, - unsigned int hash) internal_function; static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, unsigned int hash) internal_function; @@ -36,8 +29,6 @@ static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, unsigned int context, unsigned int hash) internal_function; -static inline unsigned int calc_state_hash (const re_node_set *nodes, - unsigned int context) internal_function; /* Functions for string operation. */ @@ -45,12 +36,9 @@ static inline unsigned int calc_state_hash (const re_node_set *nodes, re_string_reconstruct before using the object. */ static reg_errcode_t -re_string_allocate (pstr, str, len, init_len, trans, icase, dfa) - re_string_t *pstr; - const char *str; - int len, init_len, icase; - RE_TRANSLATE_TYPE trans; - const re_dfa_t *dfa; +internal_function +re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len, + RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa) { reg_errcode_t ret; int init_buf_len; @@ -76,12 +64,9 @@ re_string_allocate (pstr, str, len, init_len, trans, icase, dfa) /* This function allocate the buffers, and initialize them. */ static reg_errcode_t -re_string_construct (pstr, str, len, trans, icase, dfa) - re_string_t *pstr; - const char *str; - int len, icase; - RE_TRANSLATE_TYPE trans; - const re_dfa_t *dfa; +internal_function +re_string_construct (re_string_t *pstr, const char *str, int len, + RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa) { reg_errcode_t ret; memset (pstr, '\0', sizeof (re_string_t)); @@ -142,9 +127,8 @@ re_string_construct (pstr, str, len, trans, icase, dfa) /* Helper functions for re_string_allocate, and re_string_construct. */ static reg_errcode_t -re_string_realloc_buffers (pstr, new_buf_len) - re_string_t *pstr; - int new_buf_len; +internal_function +re_string_realloc_buffers (re_string_t *pstr, int new_buf_len) { #ifdef RE_ENABLE_I18N if (pstr->mb_cur_max > 1) @@ -176,18 +160,15 @@ re_string_realloc_buffers (pstr, new_buf_len) static void -re_string_construct_common (str, len, pstr, trans, icase, dfa) - const char *str; - int len; - re_string_t *pstr; - RE_TRANSLATE_TYPE trans; - int icase; - const re_dfa_t *dfa; +internal_function +re_string_construct_common (const char *str, int len, re_string_t *pstr, + RE_TRANSLATE_TYPE trans, int icase, + const re_dfa_t *dfa) { pstr->raw_mbs = (const unsigned char *) str; pstr->len = len; pstr->raw_len = len; - pstr->trans = (unsigned RE_TRANSLATE_TYPE) trans; + pstr->trans = trans; pstr->icase = icase ? 1 : 0; pstr->mbs_allocated = (trans != NULL || icase); pstr->mb_cur_max = dfa->mb_cur_max; @@ -211,8 +192,8 @@ re_string_construct_common (str, len, pstr, trans, icase, dfa) built and starts from PSTR->VALID_LEN. */ static void -build_wcs_buffer (pstr) - re_string_t *pstr; +internal_function +build_wcs_buffer (re_string_t *pstr) { #ifdef _LIBC unsigned char buf[MB_LEN_MAX]; @@ -279,8 +260,8 @@ build_wcs_buffer (pstr) but for REG_ICASE. */ static int -build_wcs_upper_buffer (pstr) - re_string_t *pstr; +internal_function +build_wcs_upper_buffer (re_string_t *pstr) { mbstate_t prev_st; int src_idx, byte_idx, end_idx, remain_len; @@ -495,10 +476,8 @@ build_wcs_upper_buffer (pstr) Return the index. */ static int -re_string_skip_chars (pstr, new_raw_idx, last_wc) - re_string_t *pstr; - int new_raw_idx; - wint_t *last_wc; +internal_function +re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc) { mbstate_t prev_st; int rawbuf_idx; @@ -532,8 +511,8 @@ re_string_skip_chars (pstr, new_raw_idx, last_wc) This function is used in case of REG_ICASE. */ static void -build_upper_buffer (pstr) - re_string_t *pstr; +internal_function +build_upper_buffer (re_string_t *pstr) { int char_idx, end_idx; end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; @@ -555,8 +534,8 @@ build_upper_buffer (pstr) /* Apply TRANS to the buffer in PSTR. */ static void -re_string_translate_buffer (pstr) - re_string_t *pstr; +internal_function +re_string_translate_buffer (re_string_t *pstr) { int buf_idx, end_idx; end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; @@ -576,9 +555,8 @@ re_string_translate_buffer (pstr) convert to upper case in case of REG_ICASE, apply translation. */ static reg_errcode_t -re_string_reconstruct (pstr, idx, eflags) - re_string_t *pstr; - int idx, eflags; +internal_function +re_string_reconstruct (re_string_t *pstr, int idx, int eflags) { int offset = idx - pstr->raw_mbs_idx; if (BE (offset < 0, 0)) @@ -767,9 +745,8 @@ re_string_reconstruct (pstr, idx, eflags) } static unsigned char -re_string_peek_byte_case (pstr, idx) - const re_string_t *pstr; - int idx; +internal_function __attribute ((pure)) +re_string_peek_byte_case (const re_string_t *pstr, int idx) { int ch, off; @@ -804,8 +781,8 @@ re_string_peek_byte_case (pstr, idx) } static unsigned char -re_string_fetch_byte_case (pstr) - re_string_t *pstr; +internal_function __attribute ((pure)) +re_string_fetch_byte_case (re_string_t *pstr) { if (BE (!pstr->mbs_allocated, 1)) return re_string_fetch_byte (pstr); @@ -841,8 +818,8 @@ re_string_fetch_byte_case (pstr) } static void -re_string_destruct (pstr) - re_string_t *pstr; +internal_function +re_string_destruct (re_string_t *pstr) { #ifdef RE_ENABLE_I18N re_free (pstr->wcs); @@ -855,9 +832,8 @@ re_string_destruct (pstr) /* Return the context at IDX in INPUT. */ static unsigned int -re_string_context_at (input, idx, eflags) - const re_string_t *input; - int idx, eflags; +internal_function +re_string_context_at (const re_string_t *input, int idx, int eflags) { int c; if (BE (idx < 0, 0)) @@ -901,9 +877,8 @@ re_string_context_at (input, idx, eflags) /* Functions for set operation. */ static reg_errcode_t -re_node_set_alloc (set, size) - re_node_set *set; - int size; +internal_function +re_node_set_alloc (re_node_set *set, int size) { set->alloc = size; set->nelem = 0; @@ -914,9 +889,8 @@ re_node_set_alloc (set, size) } static reg_errcode_t -re_node_set_init_1 (set, elem) - re_node_set *set; - int elem; +internal_function +re_node_set_init_1 (re_node_set *set, int elem) { set->alloc = 1; set->nelem = 1; @@ -931,9 +905,8 @@ re_node_set_init_1 (set, elem) } static reg_errcode_t -re_node_set_init_2 (set, elem1, elem2) - re_node_set *set; - int elem1, elem2; +internal_function +re_node_set_init_2 (re_node_set *set, int elem1, int elem2) { set->alloc = 2; set->elems = re_malloc (int, 2); @@ -962,9 +935,8 @@ re_node_set_init_2 (set, elem1, elem2) } static reg_errcode_t -re_node_set_init_copy (dest, src) - re_node_set *dest; - const re_node_set *src; +internal_function +re_node_set_init_copy (re_node_set *dest, const re_node_set *src) { dest->nelem = src->nelem; if (src->nelem > 0) @@ -988,9 +960,9 @@ re_node_set_init_copy (dest, src) Note: We assume dest->elems is NULL, when dest->alloc is 0. */ static reg_errcode_t -re_node_set_add_intersect (dest, src1, src2) - re_node_set *dest; - const re_node_set *src1, *src2; +internal_function +re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, + const re_node_set *src2) { int i1, i2, is, id, delta, sbase; if (src1->nelem == 0 || src2->nelem == 0) @@ -1079,9 +1051,9 @@ re_node_set_add_intersect (dest, src1, src2) DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ static reg_errcode_t -re_node_set_init_union (dest, src1, src2) - re_node_set *dest; - const re_node_set *src1, *src2; +internal_function +re_node_set_init_union (re_node_set *dest, const re_node_set *src1, + const re_node_set *src2) { int i1, i2, id; if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0) @@ -1132,9 +1104,8 @@ re_node_set_init_union (dest, src1, src2) DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ static reg_errcode_t -re_node_set_merge (dest, src) - re_node_set *dest; - const re_node_set *src; +internal_function +re_node_set_merge (re_node_set *dest, const re_node_set *src) { int is, id, sbase, delta; if (src == NULL || src->nelem == 0) @@ -1216,9 +1187,8 @@ re_node_set_merge (dest, src) return -1 if an error is occured, return 1 otherwise. */ static int -re_node_set_insert (set, elem) - re_node_set *set; - int elem; +internal_function +re_node_set_insert (re_node_set *set, int elem) { int idx; /* In case the set is empty. */ @@ -1274,9 +1244,8 @@ re_node_set_insert (set, elem) Return -1 if an error is occured, return 1 otherwise. */ static int -re_node_set_insert_last (set, elem) - re_node_set *set; - int elem; +internal_function +re_node_set_insert_last (re_node_set *set, int elem) { /* Realloc if we need. */ if (set->alloc == set->nelem) @@ -1298,8 +1267,8 @@ re_node_set_insert_last (set, elem) return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */ static int -re_node_set_compare (set1, set2) - const re_node_set *set1, *set2; +internal_function __attribute ((pure)) +re_node_set_compare (const re_node_set *set1, const re_node_set *set2) { int i; if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem) @@ -1313,9 +1282,8 @@ re_node_set_compare (set1, set2) /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ static int -re_node_set_contains (set, elem) - const re_node_set *set; - int elem; +internal_function __attribute ((pure)) +re_node_set_contains (const re_node_set *set, int elem) { unsigned int idx, right, mid; if (set->nelem <= 0) @@ -1336,9 +1304,8 @@ re_node_set_contains (set, elem) } static void -re_node_set_remove_at (set, idx) - re_node_set *set; - int idx; +internal_function +re_node_set_remove_at (re_node_set *set, int idx) { if (idx < 0 || idx >= set->nelem) return; @@ -1352,9 +1319,8 @@ re_node_set_remove_at (set, idx) Or return -1, if an error will be occured. */ static int -re_dfa_add_node (dfa, token) - re_dfa_t *dfa; - re_token_t token; +internal_function +re_dfa_add_node (re_dfa_t *dfa, re_token_t token) { int type = token.type; if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0)) @@ -1398,9 +1364,8 @@ re_dfa_add_node (dfa, token) } static inline unsigned int -calc_state_hash (nodes, context) - const re_node_set *nodes; - unsigned int context; +internal_function +calc_state_hash (const re_node_set *nodes, unsigned int context) { unsigned int hash = nodes->nelem + context; int i; @@ -1418,11 +1383,10 @@ calc_state_hash (nodes, context) - We never return non-NULL value in case of any errors, it is for optimization. */ -static re_dfastate_t* -re_acquire_state (err, dfa, nodes) - reg_errcode_t *err; - const re_dfa_t *dfa; - const re_node_set *nodes; +static re_dfastate_t * +internal_function +re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa, + const re_node_set *nodes) { unsigned int hash; re_dfastate_t *new_state; @@ -1463,12 +1427,10 @@ re_acquire_state (err, dfa, nodes) - We never return non-NULL value in case of any errors, it is for optimization. */ -static re_dfastate_t* -re_acquire_state_context (err, dfa, nodes, context) - reg_errcode_t *err; - const re_dfa_t *dfa; - const re_node_set *nodes; - unsigned int context; +static re_dfastate_t * +internal_function +re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa, + const re_node_set *nodes, unsigned int context) { unsigned int hash; re_dfastate_t *new_state; @@ -1503,10 +1465,8 @@ re_acquire_state_context (err, dfa, nodes, context) indicates the error code if failed. */ static reg_errcode_t -register_state (dfa, newstate, hash) - const re_dfa_t *dfa; - re_dfastate_t *newstate; - unsigned int hash; +register_state (const re_dfa_t *dfa, re_dfastate_t *newstate, + unsigned int hash) { struct re_state_table_entry *spot; reg_errcode_t err; @@ -1538,14 +1498,29 @@ register_state (dfa, newstate, hash) return REG_NOERROR; } +static void +free_state (re_dfastate_t *state) +{ + re_node_set_free (&state->non_eps_nodes); + re_node_set_free (&state->inveclosure); + if (state->entrance_nodes != &state->nodes) + { + re_node_set_free (state->entrance_nodes); + re_free (state->entrance_nodes); + } + re_node_set_free (&state->nodes); + re_free (state->word_trtable); + re_free (state->trtable); + re_free (state); +} + /* Create the new state which is independ of contexts. Return the new state if succeeded, otherwise return NULL. */ static re_dfastate_t * -create_ci_newstate (dfa, nodes, hash) - const re_dfa_t *dfa; - const re_node_set *nodes; - unsigned int hash; +internal_function +create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, + unsigned int hash) { int i; reg_errcode_t err; @@ -1593,10 +1568,9 @@ create_ci_newstate (dfa, nodes, hash) Return the new state if succeeded, otherwise return NULL. */ static re_dfastate_t * -create_cd_newstate (dfa, nodes, context, hash) - const re_dfa_t *dfa; - const re_node_set *nodes; - unsigned int context, hash; +internal_function +create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, + unsigned int context, unsigned int hash) { int i, nctx_nodes = 0; reg_errcode_t err; @@ -1667,20 +1641,3 @@ create_cd_newstate (dfa, nodes, context, hash) } return newstate; } - -static void -free_state (state) - re_dfastate_t *state; -{ - re_node_set_free (&state->non_eps_nodes); - re_node_set_free (&state->inveclosure); - if (state->entrance_nodes != &state->nodes) - { - re_node_set_free (state->entrance_nodes); - re_free (state->entrance_nodes); - } - re_node_set_free (&state->nodes); - re_free (state->word_trtable); - re_free (state->trtable); - re_free (state); -} diff --git a/lib/regex_internal.h b/lib/regex_internal.h index 4d6a7a8..f22c13c 100644 --- a/lib/regex_internal.h +++ b/lib/regex_internal.h @@ -39,6 +39,9 @@ #if defined HAVE_WCTYPE_H || defined _LIBC # include <wctype.h> #endif /* HAVE_WCTYPE_H || _LIBC */ +#if defined HAVE_STDBOOL_H || defined _LIBC +# include <stdbool.h> +#endif /* HAVE_STDBOOL_H || _LIBC */ #if defined _LIBC # include <bits/libc-lock.h> #else @@ -117,29 +120,35 @@ # define __attribute(arg) #endif +#ifndef SIZE_MAX +#define SIZE_MAX ((size_t)-1) +#endif + extern const char __re_error_msgid[] attribute_hidden; extern const size_t __re_error_msgid_idx[] attribute_hidden; -/* Number of bits in an unsinged int. */ -#define UINT_BITS (sizeof (unsigned int) * CHAR_BIT) -/* Number of unsigned int in an bit_set. */ -#define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS) -typedef unsigned int bitset[BITSET_UINTS]; -typedef unsigned int *re_bitset_ptr_t; -typedef const unsigned int *re_const_bitset_ptr_t; - -#define bitset_set(set,i) (set[i / UINT_BITS] |= 1u << i % UINT_BITS) -#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1u << i % UINT_BITS)) -#define bitset_contain(set,i) (set[i / UINT_BITS] & (1u << i % UINT_BITS)) -#define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS) -#define bitset_set_all(set) \ - memset (set, 255, sizeof (unsigned int) * BITSET_UINTS) -#define bitset_copy(dest,src) \ - memcpy (dest, src, sizeof (unsigned int) * BITSET_UINTS) -static inline void bitset_not (bitset set); -static inline void bitset_merge (bitset dest, const bitset src); -static inline void bitset_not_merge (bitset dest, const bitset src); -static inline void bitset_mask (bitset dest, const bitset src); +/* An integer used to represent a set of bits. It must be unsigned, + and must be at least as wide as unsigned int. */ +typedef unsigned long int bitset_word_t; +/* All bits set in a bitset_word_t. */ +#define BITSET_WORD_MAX ULONG_MAX +/* Number of bits in a bitset_word_t. */ +#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT) +/* Number of bitset_word_t in a bit_set. */ +#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS) +typedef bitset_word_t bitset_t[BITSET_WORDS]; +typedef bitset_word_t *re_bitset_ptr_t; +typedef const bitset_word_t *re_const_bitset_ptr_t; + +#define bitset_set(set,i) \ + (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS) +#define bitset_clear(set,i) \ + (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_contain(set,i) \ + (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t)) +#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t)) +#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t)) #define PREV_WORD_CONSTRAINT 0x0001 #define PREV_NOTWORD_CONSTRAINT 0x0002 @@ -345,7 +354,7 @@ struct re_string_t the beginning of the input string. */ unsigned int tip_context; /* The translation passed as a part of an argument of re_compile_pattern. */ - unsigned RE_TRANSLATE_TYPE trans; + RE_TRANSLATE_TYPE trans; /* Copy of re_dfa_t's word_char. */ re_const_bitset_ptr_t word_char; /* 1 if REG_ICASE. */ @@ -372,45 +381,18 @@ typedef struct re_dfa_t re_dfa_t; # endif #endif -#ifndef RE_NO_INTERNAL_PROTOTYPES -static reg_errcode_t re_string_allocate (re_string_t *pstr, const char *str, - int len, int init_len, - RE_TRANSLATE_TYPE trans, int icase, - const re_dfa_t *dfa) - internal_function; -static reg_errcode_t re_string_construct (re_string_t *pstr, const char *str, - int len, RE_TRANSLATE_TYPE trans, - int icase, const re_dfa_t *dfa) - internal_function; -static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx, - int eflags) internal_function; static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, int new_buf_len) internal_function; -# ifdef RE_ENABLE_I18N +#ifdef RE_ENABLE_I18N static void build_wcs_buffer (re_string_t *pstr) internal_function; static int build_wcs_upper_buffer (re_string_t *pstr) internal_function; -# endif /* RE_ENABLE_I18N */ +#endif /* RE_ENABLE_I18N */ static void build_upper_buffer (re_string_t *pstr) internal_function; static void re_string_translate_buffer (re_string_t *pstr) internal_function; -static void re_string_destruct (re_string_t *pstr) internal_function; -# ifdef RE_ENABLE_I18N -static int re_string_elem_size_at (const re_string_t *pstr, int idx) - internal_function __attribute ((pure)); -static inline int re_string_char_size_at (const re_string_t *pstr, int idx) - internal_function __attribute ((pure)); -static inline wint_t re_string_wchar_at (const re_string_t *pstr, int idx) - internal_function __attribute ((pure)); -# endif /* RE_ENABLE_I18N */ static unsigned int re_string_context_at (const re_string_t *input, int idx, int eflags) internal_function __attribute ((pure)); -static unsigned char re_string_peek_byte_case (const re_string_t *pstr, - int idx) - internal_function __attribute ((pure)); -static unsigned char re_string_fetch_byte_case (re_string_t *pstr) - internal_function __attribute ((pure)); -#endif #define re_string_peek_byte(pstr, offset) \ ((pstr)->mbs[(pstr)->cur_idx + offset]) #define re_string_fetch_byte(pstr) \ @@ -576,9 +558,9 @@ typedef struct /* The string object corresponding to the input string. */ re_string_t input; #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) - re_dfa_t *const dfa; + const re_dfa_t *const dfa; #else - re_dfa_t *dfa; + const re_dfa_t *dfa; #endif /* EFLAGS of the argument of regexec. */ int eflags; @@ -648,8 +630,8 @@ struct re_dfa_t int nbackref; /* The number of backreference in this dfa. */ /* Bitmap expressing which backreference is used. */ - unsigned int used_bkref_map; - unsigned int completed_bkref_map; + bitset_word_t used_bkref_map; + bitset_word_t completed_bkref_map; unsigned int has_plural_match : 1; /* If this dfa has "multibyte node", which is a backreference or @@ -660,7 +642,7 @@ struct re_dfa_t unsigned int map_notascii : 1; unsigned int word_ops_used : 1; int mb_cur_max; - bitset word_char; + bitset_t word_char; reg_syntax_t syntax; int *subexp_map; #ifdef DEBUG @@ -669,46 +651,11 @@ struct re_dfa_t __libc_lock_define (, lock) }; -#ifndef RE_NO_INTERNAL_PROTOTYPES -static reg_errcode_t re_node_set_alloc (re_node_set *set, int size) internal_function; -static reg_errcode_t re_node_set_init_1 (re_node_set *set, int elem) internal_function; -static reg_errcode_t re_node_set_init_2 (re_node_set *set, int elem1, - int elem2) internal_function; #define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) -static reg_errcode_t re_node_set_init_copy (re_node_set *dest, - const re_node_set *src) internal_function; -static reg_errcode_t re_node_set_add_intersect (re_node_set *dest, - const re_node_set *src1, - const re_node_set *src2) internal_function; -static reg_errcode_t re_node_set_init_union (re_node_set *dest, - const re_node_set *src1, - const re_node_set *src2) internal_function; -static reg_errcode_t re_node_set_merge (re_node_set *dest, - const re_node_set *src) internal_function; -static int re_node_set_insert (re_node_set *set, int elem) internal_function; -static int re_node_set_insert_last (re_node_set *set, - int elem) internal_function; -static int re_node_set_compare (const re_node_set *set1, - const re_node_set *set2) - internal_function __attribute ((pure)); -static int re_node_set_contains (const re_node_set *set, int elem) - internal_function __attribute ((pure)); -static void re_node_set_remove_at (re_node_set *set, int idx) internal_function; #define re_node_set_remove(set,id) \ (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1)) #define re_node_set_empty(p) ((p)->nelem = 0) #define re_node_set_free(set) re_free ((set)->elems) -static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token) internal_function; -static re_dfastate_t *re_acquire_state (reg_errcode_t *err, const - re_dfa_t *dfa, - const re_node_set *nodes) - internal_function; -static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err, - const re_dfa_t *dfa, - const re_node_set *nodes, - unsigned int context) internal_function; -static void free_state (re_dfastate_t *state) internal_function; -#endif typedef enum @@ -734,41 +681,33 @@ typedef struct /* Inline functions for bitset operation. */ static inline void -bitset_not (bitset set) +bitset_not (bitset_t set) { int bitset_i; - for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i) + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) set[bitset_i] = ~set[bitset_i]; } static inline void -bitset_merge (bitset dest, const bitset src) +bitset_merge (bitset_t dest, const bitset_t src) { int bitset_i; - for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i) + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) dest[bitset_i] |= src[bitset_i]; } static inline void -bitset_not_merge (bitset dest, const bitset src) -{ - int i; - for (i = 0; i < BITSET_UINTS; ++i) - dest[i] |= ~src[i]; -} - -static inline void -bitset_mask (bitset dest, const bitset src) +bitset_mask (bitset_t dest, const bitset_t src) { int bitset_i; - for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i) + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) dest[bitset_i] &= src[bitset_i]; } -#if defined RE_ENABLE_I18N && !defined RE_NO_INTERNAL_PROTOTYPES +#ifdef RE_ENABLE_I18N /* Inline functions for re_string. */ static inline int -internal_function +internal_function __attribute ((pure)) re_string_char_size_at (const re_string_t *pstr, int idx) { int byte_idx; @@ -781,7 +720,7 @@ re_string_char_size_at (const re_string_t *pstr, int idx) } static inline wint_t -internal_function +internal_function __attribute ((pure)) re_string_wchar_at (const re_string_t *pstr, int idx) { if (pstr->mb_cur_max == 1) @@ -790,14 +729,14 @@ re_string_wchar_at (const re_string_t *pstr, int idx) } static int -internal_function +internal_function __attribute ((pure)) re_string_elem_size_at (const re_string_t *pstr, int idx) { -#ifdef _LIBC +# ifdef _LIBC const unsigned char *p, *extra; const int32_t *table, *indirect; int32_t tmp; -# include <locale/weight.h> +# include <locale/weight.h> uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); if (nrules != 0) @@ -812,7 +751,7 @@ re_string_elem_size_at (const re_string_t *pstr, int idx) return p - pstr->mbs - idx; } else -#endif /* _LIBC */ +# endif /* _LIBC */ return 1; } #endif /* RE_ENABLE_I18N */ diff --git a/lib/regexec.c b/lib/regexec.c index f6747a2..bdfa355 100644 --- a/lib/regexec.c +++ b/lib/regexec.c @@ -52,82 +52,76 @@ static int re_search_stub (struct re_pattern_buffer *bufp, int ret_len) internal_function; static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, int nregs, int regs_allocated) internal_function; -static inline re_dfastate_t *acquire_init_state_context - (reg_errcode_t *err, const re_match_context_t *mctx, int idx) - __attribute ((always_inline)) internal_function; static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) internal_function; static int check_matching (re_match_context_t *mctx, int fl_longest_match, - int *p_match_first) - internal_function; -static int check_halt_node_context (const re_dfa_t *dfa, int node, - unsigned int context) internal_function; + int *p_match_first) internal_function; static int check_halt_state_context (const re_match_context_t *mctx, const re_dfastate_t *state, int idx) internal_function; -static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, +static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch) internal_function; -static int proceed_next_node (const re_match_context_t *mctx, - int nregs, regmatch_t *regs, - int *pidx, int node, re_node_set *eps_via_nodes, - struct re_fail_stack_t *fs) internal_function; static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node, int nregs, regmatch_t *regs, - re_node_set *eps_via_nodes) internal_function; -static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, - regmatch_t *regs, re_node_set *eps_via_nodes) internal_function; + re_node_set *eps_via_nodes) + internal_function; static reg_errcode_t set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, regmatch_t *pmatch, int fl_backtrack) internal_function; -static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function; +static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) + internal_function; #ifdef RE_ENABLE_I18N static int sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, - int node_idx, int str_idx, int max_str_idx) internal_function; + int node_idx, int str_idx, int max_str_idx) + internal_function; #endif /* RE_ENABLE_I18N */ -static reg_errcode_t sift_states_backward (re_match_context_t *mctx, - re_sift_context_t *sctx) internal_function; -static reg_errcode_t build_sifted_states (re_match_context_t *mctx, +static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, + re_sift_context_t *sctx) + internal_function; +static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, int str_idx, - re_node_set *cur_dest) internal_function; -static reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx, + re_node_set *cur_dest) + internal_function; +static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx, re_sift_context_t *sctx, int str_idx, - re_node_set *dest_nodes) internal_function; -static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa, - re_node_set *dest_nodes, - const re_node_set *candidates) internal_function; -static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node, + re_node_set *dest_nodes) + internal_function; +static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, - const re_node_set *and_nodes) internal_function; + const re_node_set *candidates) + internal_function; static int check_dst_limits (const re_match_context_t *mctx, re_node_set *limits, int dst_node, int dst_idx, int src_node, int src_idx) internal_function; static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, int subexp_idx, - int from_node, int bkref_idx) internal_function; + int from_node, int bkref_idx) + internal_function; static int check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit, int subexp_idx, int node, int str_idx, int bkref_idx) internal_function; -static reg_errcode_t check_subexp_limits (re_dfa_t *dfa, +static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, const re_node_set *candidates, re_node_set *limits, struct re_backref_cache_entry *bkref_ents, int str_idx) internal_function; -static reg_errcode_t sift_states_bkref (re_match_context_t *mctx, +static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, - int str_idx, const re_node_set *candidates) internal_function; -static reg_errcode_t clean_state_log_if_needed (re_match_context_t *mctx, - int next_state_log_idx) internal_function; -static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst, - re_dfastate_t **src, int num) internal_function; + int str_idx, const re_node_set *candidates) + internal_function; +static reg_errcode_t merge_state_array (const re_dfa_t *dfa, + re_dfastate_t **dst, + re_dfastate_t **src, int num) + internal_function; static re_dfastate_t *find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) internal_function; static re_dfastate_t *transit_state (reg_errcode_t *err, @@ -135,27 +129,33 @@ static re_dfastate_t *transit_state (reg_errcode_t *err, re_dfastate_t *state) internal_function; static re_dfastate_t *merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, - re_dfastate_t *next_state) internal_function; + re_dfastate_t *next_state) + internal_function; static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, int str_idx) internal_function; #if 0 static re_dfastate_t *transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, - re_dfastate_t *pstate) internal_function; + re_dfastate_t *pstate) + internal_function; #endif #ifdef RE_ENABLE_I18N static reg_errcode_t transit_state_mb (re_match_context_t *mctx, - re_dfastate_t *pstate) internal_function; + re_dfastate_t *pstate) + internal_function; #endif /* RE_ENABLE_I18N */ static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, - const re_node_set *nodes) internal_function; + const re_node_set *nodes) + internal_function; static reg_errcode_t get_subexp (re_match_context_t *mctx, - int bkref_node, int bkref_str_idx) internal_function; + int bkref_node, int bkref_str_idx) + internal_function; static reg_errcode_t get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, re_sub_match_last_t *sub_last, - int bkref_node, int bkref_str) internal_function; + int bkref_node, int bkref_str) + internal_function; static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, int subexp_idx, int type) internal_function; static reg_errcode_t check_arrival (re_match_context_t *mctx, @@ -165,34 +165,41 @@ static reg_errcode_t check_arrival (re_match_context_t *mctx, static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx, re_node_set *cur_nodes, - re_node_set *next_nodes) internal_function; -static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa, + re_node_set *next_nodes) + internal_function; +static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, - int ex_subexp, int type) internal_function; -static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa, + int ex_subexp, int type) + internal_function; +static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, int target, int ex_subexp, int type) internal_function; static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, int cur_str, - int subexp_num, int type) internal_function; -static int build_trtable (re_dfa_t *dfa, + int subexp_num, int type) + internal_function; +static int build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) internal_function; #ifdef RE_ENABLE_I18N static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, - const re_string_t *input, int idx) internal_function; + const re_string_t *input, int idx) + internal_function; # ifdef _LIBC static unsigned int find_collation_sequence_value (const unsigned char *mbs, - size_t name_len) internal_function; + size_t name_len) + internal_function; # endif /* _LIBC */ #endif /* RE_ENABLE_I18N */ static int group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, re_node_set *states_node, - bitset *states_ch) internal_function; + bitset_t *states_ch) internal_function; static int check_node_accept (const re_match_context_t *mctx, - const re_token_t *node, int idx) internal_function; -static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function; + const re_token_t *node, int idx) + internal_function; +static reg_errcode_t extend_buffers (re_match_context_t *mctx) + internal_function; /* Entry point for POSIX code. */ @@ -220,7 +227,7 @@ regexec (preg, string, nmatch, pmatch, eflags) { reg_errcode_t err; int start, length; - re_dfa_t *dfa = (re_dfa_t *)preg->buffer; + re_dfa_t *dfa = (re_dfa_t *) preg->buffer; if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) return REG_BADPAT; @@ -411,7 +418,7 @@ re_search_stub (bufp, string, length, start, range, stop, regs, ret_len) regmatch_t *pmatch; int nregs, rval; int eflags = 0; - re_dfa_t *dfa = (re_dfa_t *)bufp->buffer; + re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; /* Check for out-of-range. */ if (BE (start < 0 || start > length, 0)) @@ -619,7 +626,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, regmatch_t pmatch[]; { reg_errcode_t err; - re_dfa_t *dfa = (re_dfa_t *)preg->buffer; + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; int left_lim, right_lim, incr; int fl_longest_match, match_first, match_kind, match_last = -1; int extra_nmatch; @@ -631,7 +638,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, #endif char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate && range && !preg->can_be_null) ? preg->fastmap : NULL; - unsigned RE_TRANSLATE_TYPE t = (unsigned RE_TRANSLATE_TYPE) preg->translate; + RE_TRANSLATE_TYPE t = preg->translate; #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)) memset (&mctx, '\0', sizeof (re_match_context_t)); @@ -932,7 +939,7 @@ static reg_errcode_t prune_impossible_nodes (mctx) re_match_context_t *mctx; { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; int halt_node, match_last; reg_errcode_t ret; re_dfastate_t **sifted_states; @@ -1015,12 +1022,11 @@ prune_impossible_nodes (mctx) since initial states may have constraints like "\<", "^", etc.. */ static inline re_dfastate_t * -acquire_init_state_context (err, mctx, idx) - reg_errcode_t *err; - const re_match_context_t *mctx; - int idx; +__attribute ((always_inline)) internal_function +acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, + int idx) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; if (dfa->init_state->has_constraint) { unsigned int context; @@ -1058,12 +1064,11 @@ acquire_init_state_context (err, mctx, idx) index of the buffer. */ static int -check_matching (mctx, fl_longest_match, p_match_first) - re_match_context_t *mctx; - int fl_longest_match; - int *p_match_first; +internal_function +check_matching (re_match_context_t *mctx, int fl_longest_match, + int *p_match_first) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; reg_errcode_t err; int match = 0; int match_last = -1; @@ -1190,10 +1195,9 @@ check_matching (mctx, fl_longest_match, p_match_first) /* Check NODE match the current context. */ -static int check_halt_node_context (dfa, node, context) - const re_dfa_t *dfa; - int node; - unsigned int context; +static int +internal_function +check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context) { re_token_type_t type = dfa->nodes[node].type; unsigned int constraint = dfa->nodes[node].constraint; @@ -1211,10 +1215,9 @@ static int check_halt_node_context (dfa, node, context) match the context, return the node. */ static int -check_halt_state_context (mctx, state, idx) - const re_match_context_t *mctx; - const re_dfastate_t *state; - int idx; +internal_function +check_halt_state_context (const re_match_context_t *mctx, + const re_dfastate_t *state, int idx) { int i; unsigned int context; @@ -1234,14 +1237,12 @@ check_halt_state_context (mctx, state, idx) of errors. */ static int -proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs) - const re_match_context_t *mctx; - regmatch_t *regs; - int nregs, *pidx, node; - re_node_set *eps_via_nodes; - struct re_fail_stack_t *fs; +internal_function +proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs, + int *pidx, int node, re_node_set *eps_via_nodes, + struct re_fail_stack_t *fs) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; int i, err; if (IS_EPSILON_NODE (dfa->nodes[node].type)) { @@ -1336,11 +1337,9 @@ proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs) } static reg_errcode_t -push_fail_stack (fs, str_idx, dest_node, nregs, regs, eps_via_nodes) - struct re_fail_stack_t *fs; - int str_idx, dest_node, nregs; - regmatch_t *regs; - re_node_set *eps_via_nodes; +internal_function +push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node, + int nregs, regmatch_t *regs, re_node_set *eps_via_nodes) { reg_errcode_t err; int num = fs->num++; @@ -1365,11 +1364,9 @@ push_fail_stack (fs, str_idx, dest_node, nregs, regs, eps_via_nodes) } static int -pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes) - struct re_fail_stack_t *fs; - int *pidx, nregs; - regmatch_t *regs; - re_node_set *eps_via_nodes; +internal_function +pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, + regmatch_t *regs, re_node_set *eps_via_nodes) { int num = --fs->num; assert (num >= 0); @@ -1387,14 +1384,11 @@ pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes) pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ static reg_errcode_t -set_regs (preg, mctx, nmatch, pmatch, fl_backtrack) - const regex_t *preg; - const re_match_context_t *mctx; - size_t nmatch; - regmatch_t *pmatch; - int fl_backtrack; +internal_function +set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, + regmatch_t *pmatch, int fl_backtrack) { - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; int idx, cur_node; re_node_set eps_via_nodes; struct re_fail_stack_t *fs; @@ -1497,8 +1491,8 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack) } static reg_errcode_t -free_fail_stack_return (fs) - struct re_fail_stack_t *fs; +internal_function +free_fail_stack_return (struct re_fail_stack_t *fs) { if (fs) { @@ -1514,10 +1508,9 @@ free_fail_stack_return (fs) } static void -update_regs (dfa, pmatch, prev_idx_match, cur_node, cur_idx, nmatch) - re_dfa_t *dfa; - regmatch_t *pmatch, *prev_idx_match; - int cur_node, cur_idx, nmatch; +internal_function +update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, + regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch) { int type = dfa->nodes[cur_node].type; if (type == OP_OPEN_SUBEXP) @@ -1587,9 +1580,8 @@ update_regs (dfa, pmatch, prev_idx_match, cur_node, cur_idx, nmatch) ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) static reg_errcode_t -sift_states_backward (mctx, sctx) - re_match_context_t *mctx; - re_sift_context_t *sctx; +internal_function +sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) { reg_errcode_t err; int null_cnt = 0; @@ -1646,14 +1638,12 @@ sift_states_backward (mctx, sctx) } static reg_errcode_t -build_sifted_states (mctx, sctx, str_idx, cur_dest) - re_match_context_t *mctx; - re_sift_context_t *sctx; - int str_idx; - re_node_set *cur_dest; +internal_function +build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, + int str_idx, re_node_set *cur_dest) { - re_dfa_t *const dfa = mctx->dfa; - re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; int i; /* Then build the next sifted state. @@ -1710,9 +1700,8 @@ build_sifted_states (mctx, sctx, str_idx, cur_dest) /* Helper functions. */ static reg_errcode_t -clean_state_log_if_needed (mctx, next_state_log_idx) - re_match_context_t *mctx; - int next_state_log_idx; +internal_function +clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx) { int top = mctx->state_log_top; @@ -1736,11 +1725,9 @@ clean_state_log_if_needed (mctx, next_state_log_idx) } static reg_errcode_t -merge_state_array (dfa, dst, src, num) - re_dfa_t *dfa; - re_dfastate_t **dst; - re_dfastate_t **src; - int num; +internal_function +merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, + re_dfastate_t **src, int num) { int st_idx; reg_errcode_t err; @@ -1765,14 +1752,13 @@ merge_state_array (dfa, dst, src, num) } static reg_errcode_t -update_cur_sifted_state (mctx, sctx, str_idx, dest_nodes) - re_match_context_t *mctx; - re_sift_context_t *sctx; - int str_idx; - re_node_set *dest_nodes; +internal_function +update_cur_sifted_state (const re_match_context_t *mctx, + re_sift_context_t *sctx, int str_idx, + re_node_set *dest_nodes) { - re_dfa_t *const dfa = mctx->dfa; - reg_errcode_t err; + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; const re_node_set *candidates; candidates = ((mctx->state_log[str_idx] == NULL) ? NULL : &mctx->state_log[str_idx]->nodes); @@ -1814,10 +1800,9 @@ update_cur_sifted_state (mctx, sctx, str_idx, dest_nodes) } static reg_errcode_t -add_epsilon_src_nodes (dfa, dest_nodes, candidates) - re_dfa_t *dfa; - re_node_set *dest_nodes; - const re_node_set *candidates; +internal_function +add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, + const re_node_set *candidates) { reg_errcode_t err = REG_NOERROR; int i; @@ -1840,11 +1825,9 @@ add_epsilon_src_nodes (dfa, dest_nodes, candidates) } static reg_errcode_t -sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates) - re_dfa_t *dfa; - int node; - re_node_set *dest_nodes; - const re_node_set *candidates; +internal_function +sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes, + const re_node_set *candidates) { int ecl_idx; reg_errcode_t err; @@ -1891,12 +1874,11 @@ sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates) } static int -check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx) - const re_match_context_t *mctx; - re_node_set *limits; - int dst_node, dst_idx, src_node, src_idx; +internal_function +check_dst_limits (const re_match_context_t *mctx, re_node_set *limits, + int dst_node, int dst_idx, int src_node, int src_idx) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; int lim_idx, src_pos, dst_pos; int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx); @@ -1928,12 +1910,12 @@ check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx) } static int -check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx) - const re_match_context_t *mctx; - int boundaries, subexp_idx, from_node, bkref_idx; +internal_function +check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, + int subexp_idx, int from_node, int bkref_idx) { - re_dfa_t *const dfa = mctx->dfa; - re_node_set *eclosures = dfa->eclosures + from_node; + const re_dfa_t *const dfa = mctx->dfa; + const re_node_set *eclosures = dfa->eclosures + from_node; int node_idx; /* Else, we are on the boundary: examine the nodes on the epsilon @@ -1954,9 +1936,9 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx) if (ent->node != node) continue; - if (subexp_idx - < CHAR_BIT * sizeof ent->eps_reachable_subexps_map - && !(ent->eps_reachable_subexps_map & (1u << subexp_idx))) + if (subexp_idx < BITSET_WORD_BITS + && !(ent->eps_reachable_subexps_map + & ((bitset_word_t) 1 << subexp_idx))) continue; /* Recurse trying to reach the OP_OPEN_SUBEXP and @@ -1982,9 +1964,9 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx) if (cpos == 0 && (boundaries & 2)) return 0; - if (subexp_idx - < CHAR_BIT * sizeof ent->eps_reachable_subexps_map) - ent->eps_reachable_subexps_map &= ~(1u << subexp_idx); + if (subexp_idx < BITSET_WORD_BITS) + ent->eps_reachable_subexps_map + &= ~((bitset_word_t) 1 << subexp_idx); } while (ent++->more); } @@ -2009,9 +1991,10 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx) } static int -check_dst_limits_calc_pos (mctx, limit, subexp_idx, from_node, str_idx, bkref_idx) - const re_match_context_t *mctx; - int limit, subexp_idx, from_node, str_idx, bkref_idx; +internal_function +check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit, + int subexp_idx, int from_node, int str_idx, + int bkref_idx) { struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; int boundaries; @@ -2038,13 +2021,10 @@ check_dst_limits_calc_pos (mctx, limit, subexp_idx, from_node, str_idx, bkref_id which are against limitations from DEST_NODES. */ static reg_errcode_t -check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx) - re_dfa_t *dfa; - re_node_set *dest_nodes; - const re_node_set *candidates; - re_node_set *limits; - struct re_backref_cache_entry *bkref_ents; - int str_idx; +internal_function +check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, + const re_node_set *candidates, re_node_set *limits, + struct re_backref_cache_entry *bkref_ents, int str_idx) { reg_errcode_t err; int node_idx, lim_idx; @@ -2129,13 +2109,11 @@ check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx) } static reg_errcode_t -sift_states_bkref (mctx, sctx, str_idx, candidates) - re_match_context_t *mctx; - re_sift_context_t *sctx; - int str_idx; - const re_node_set *candidates; +internal_function +sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, + int str_idx, const re_node_set *candidates) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; reg_errcode_t err; int node_idx, node; re_sift_context_t local_sctx; @@ -2231,10 +2209,9 @@ sift_states_bkref (mctx, sctx, str_idx, candidates) #ifdef RE_ENABLE_I18N static int -sift_states_iter_mb (mctx, sctx, node_idx, str_idx, max_str_idx) - const re_match_context_t *mctx; - re_sift_context_t *sctx; - int node_idx, str_idx, max_str_idx; +internal_function +sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, + int node_idx, int str_idx, int max_str_idx) { const re_dfa_t *const dfa = mctx->dfa; int naccepted; @@ -2262,10 +2239,9 @@ sift_states_iter_mb (mctx, sctx, node_idx, str_idx, max_str_idx) update the destination of STATE_LOG. */ static re_dfastate_t * -transit_state (err, mctx, state) - reg_errcode_t *err; - re_match_context_t *mctx; - re_dfastate_t *state; +internal_function +transit_state (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *state) { re_dfastate_t **trtable; unsigned char ch; @@ -2321,10 +2297,9 @@ transit_state (err, mctx, state) /* Update the state_log if we need */ re_dfastate_t * -merge_state_with_log (err, mctx, next_state) - reg_errcode_t *err; - re_match_context_t *mctx; - re_dfastate_t *next_state; +internal_function +merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *next_state) { const re_dfa_t *const dfa = mctx->dfa; int cur_idx = re_string_cur_idx (&mctx->input); @@ -2401,11 +2376,10 @@ merge_state_with_log (err, mctx, next_state) multi-byte match, then look in the log for a state from which to restart matching. */ re_dfastate_t * -find_recover_state (err, mctx) - reg_errcode_t *err; - re_match_context_t *mctx; +internal_function +find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) { - re_dfastate_t *cur_state = NULL; + re_dfastate_t *cur_state; do { int max = mctx->state_log_top; @@ -2433,12 +2407,11 @@ find_recover_state (err, mctx) correspoding back references. */ static reg_errcode_t -check_subexp_matching_top (mctx, cur_nodes, str_idx) - re_match_context_t *mctx; - re_node_set *cur_nodes; - int str_idx; +internal_function +check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, + int str_idx) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; int node_idx; reg_errcode_t err; @@ -2451,8 +2424,9 @@ check_subexp_matching_top (mctx, cur_nodes, str_idx) { int node = cur_nodes->elems[node_idx]; if (dfa->nodes[node].type == OP_OPEN_SUBEXP - && dfa->nodes[node].opr.idx < CHAR_BIT * sizeof dfa->used_bkref_map - && dfa->used_bkref_map & (1u << dfa->nodes[node].opr.idx)) + && dfa->nodes[node].opr.idx < BITSET_WORD_BITS + && (dfa->used_bkref_map + & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx))) { err = match_ctx_add_subtop (mctx, node, str_idx); if (BE (err != REG_NOERROR, 0)) @@ -2467,10 +2441,8 @@ check_subexp_matching_top (mctx, cur_nodes, str_idx) accepting the current input byte. */ static re_dfastate_t * -transit_state_sb (err, mctx, state) - reg_errcode_t *err; - re_match_context_t *mctx; - re_dfastate_t *state; +transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, + re_dfastate_t *state) { const re_dfa_t *const dfa = mctx->dfa; re_node_set next_nodes; @@ -2508,9 +2480,8 @@ transit_state_sb (err, mctx, state) #ifdef RE_ENABLE_I18N static reg_errcode_t -transit_state_mb (mctx, pstate) - re_match_context_t *mctx; - re_dfastate_t *pstate; +internal_function +transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) { const re_dfa_t *const dfa = mctx->dfa; reg_errcode_t err; @@ -2579,11 +2550,10 @@ transit_state_mb (mctx, pstate) #endif /* RE_ENABLE_I18N */ static reg_errcode_t -transit_state_bkref (mctx, nodes) - re_match_context_t *mctx; - const re_node_set *nodes; +internal_function +transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; reg_errcode_t err; int i; int cur_str_idx = re_string_cur_idx (&mctx->input); @@ -2694,11 +2664,10 @@ transit_state_bkref (mctx, nodes) delay these checking for prune_impossible_nodes(). */ static reg_errcode_t -get_subexp (mctx, bkref_node, bkref_str_idx) - re_match_context_t *mctx; - int bkref_node, bkref_str_idx; +internal_function +get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; int subexp_num, sub_top_idx; const char *buf = (const char *) re_string_get_buffer (&mctx->input); /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ @@ -2844,11 +2813,9 @@ get_subexp (mctx, bkref_node, bkref_str_idx) and SUB_LAST. */ static reg_errcode_t -get_subexp_sub (mctx, sub_top, sub_last, bkref_node, bkref_str) - re_match_context_t *mctx; - const re_sub_match_top_t *sub_top; - re_sub_match_last_t *sub_last; - int bkref_node, bkref_str; +internal_function +get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, + re_sub_match_last_t *sub_last, int bkref_node, int bkref_str) { reg_errcode_t err; int to_idx; @@ -2875,10 +2842,9 @@ get_subexp_sub (mctx, sub_top, sub_last, bkref_node, bkref_str) E.g. RE: (a){2} */ static int -find_subexp_node (dfa, nodes, subexp_idx, type) - const re_dfa_t *dfa; - const re_node_set *nodes; - int subexp_idx, type; +internal_function +find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, + int subexp_idx, int type) { int cls_idx; for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx) @@ -2898,14 +2864,12 @@ find_subexp_node (dfa, nodes, subexp_idx, type) Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ static reg_errcode_t -check_arrival (mctx, path, top_node, top_str, last_node, last_str, - type) - re_match_context_t *mctx; - state_array_t *path; - int top_node, top_str, last_node, last_str, type; +internal_function +check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node, + int top_str, int last_node, int last_str, int type) { - re_dfa_t *const dfa = mctx->dfa; - reg_errcode_t err; + const re_dfa_t *const dfa = mctx->dfa; + reg_errcode_t err = REG_NOERROR; int subexp_num, backup_cur_idx, str_idx, null_cnt; re_dfastate_t *cur_state = NULL; re_node_set *cur_nodes, next_nodes; @@ -2920,7 +2884,7 @@ check_arrival (mctx, path, top_node, top_str, last_node, last_str, int old_alloc = path->alloc; path->alloc += last_str + mctx->max_mb_elem_len + 1; new_array = re_realloc (path->array, re_dfastate_t *, path->alloc); - if (new_array == NULL) + if (BE (new_array == NULL, 0)) { path->alloc = old_alloc; return REG_ESPACE; @@ -2930,7 +2894,7 @@ check_arrival (mctx, path, top_node, top_str, last_node, last_str, sizeof (re_dfastate_t *) * (path->alloc - old_alloc)); } - str_idx = path->next_idx == 0 ? top_str : path->next_idx; + str_idx = path->next_idx ?: top_str; /* Temporary modify MCTX. */ backup_state_log = mctx->state_log; @@ -2958,7 +2922,7 @@ check_arrival (mctx, path, top_node, top_str, last_node, last_str, if (cur_state && cur_state->has_backref) { err = re_node_set_init_copy (&next_nodes, &cur_state->nodes); - if (BE ( err != REG_NOERROR, 0)) + if (BE (err != REG_NOERROR, 0)) return err; } else @@ -2970,7 +2934,7 @@ check_arrival (mctx, path, top_node, top_str, last_node, last_str, { err = expand_bkref_cache (mctx, &next_nodes, str_idx, subexp_num, type); - if (BE ( err != REG_NOERROR, 0)) + if (BE (err != REG_NOERROR, 0)) { re_node_set_free (&next_nodes); return err; @@ -3001,7 +2965,8 @@ check_arrival (mctx, path, top_node, top_str, last_node, last_str, if (cur_state) { err = check_arrival_add_next_nodes (mctx, str_idx, - &cur_state->non_eps_nodes, &next_nodes); + &cur_state->non_eps_nodes, + &next_nodes); if (BE (err != REG_NOERROR, 0)) { re_node_set_free (&next_nodes); @@ -3019,7 +2984,7 @@ check_arrival (mctx, path, top_node, top_str, last_node, last_str, } err = expand_bkref_cache (mctx, &next_nodes, str_idx, subexp_num, type); - if (BE ( err != REG_NOERROR, 0)) + if (BE (err != REG_NOERROR, 0)) { re_node_set_free (&next_nodes); return err; @@ -3060,15 +3025,14 @@ check_arrival (mctx, path, top_node, top_str, last_node, last_str, Can't we unify them? */ static reg_errcode_t -check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes) - re_match_context_t *mctx; - int str_idx; - re_node_set *cur_nodes, *next_nodes; +internal_function +check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx, + re_node_set *cur_nodes, re_node_set *next_nodes) { const re_dfa_t *const dfa = mctx->dfa; int result; int cur_idx; - reg_errcode_t err; + reg_errcode_t err = REG_NOERROR; re_node_set union_set; re_node_set_init_empty (&union_set); for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) @@ -3140,10 +3104,9 @@ check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes) */ static reg_errcode_t -check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, type) - re_dfa_t *dfa; - re_node_set *cur_nodes; - int ex_subexp, type; +internal_function +check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, + int ex_subexp, int type) { reg_errcode_t err; int idx, outside_node; @@ -3160,7 +3123,7 @@ check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, type) for (idx = 0; idx < cur_nodes->nelem; ++idx) { int cur_node = cur_nodes->elems[idx]; - re_node_set *eclosure = dfa->eclosures + cur_node; + const re_node_set *eclosure = dfa->eclosures + cur_node; outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); if (outside_node == -1) { @@ -3194,10 +3157,9 @@ check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, type) problematic append it to DST_NODES. */ static reg_errcode_t -check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, type) - re_dfa_t *dfa; - int target, ex_subexp, type; - re_node_set *dst_nodes; +internal_function +check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, + int target, int ex_subexp, int type) { int cur_node; for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);) @@ -3239,13 +3201,11 @@ check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, type) in MCTX->BKREF_ENTS. */ static reg_errcode_t -expand_bkref_cache (mctx, cur_nodes, cur_str, subexp_num, - type) - re_match_context_t *mctx; - int cur_str, subexp_num, type; - re_node_set *cur_nodes; +internal_function +expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, + int cur_str, int subexp_num, int type) { - re_dfa_t *const dfa = mctx->dfa; + const re_dfa_t *const dfa = mctx->dfa; reg_errcode_t err; int cache_idx_start = search_cur_bkref_entry (mctx, cur_str); struct re_backref_cache_entry *ent; @@ -3330,37 +3290,42 @@ expand_bkref_cache (mctx, cur_nodes, cur_str, subexp_num, Return 1 if succeeded, otherwise return NULL. */ static int -build_trtable (dfa, state) - re_dfa_t *dfa; - re_dfastate_t *state; +internal_function +build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) { reg_errcode_t err; int i, j, ch, need_word_trtable = 0; - unsigned int elem, mask; - int dests_node_malloced = 0, dest_states_malloced = 0; + bitset_word_t elem, mask; + bool dests_node_malloced = false; + bool dest_states_malloced = false; int ndests; /* Number of the destination states from `state'. */ re_dfastate_t **trtable; re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; re_node_set follows, *dests_node; - bitset *dests_ch; - bitset acceptable; + bitset_t *dests_ch; + bitset_t acceptable; + + struct dests_alloc + { + re_node_set dests_node[SBC_MAX]; + bitset_t dests_ch[SBC_MAX]; + } *dests_alloc; /* We build DFA states which corresponds to the destination nodes from `state'. `dests_node[i]' represents the nodes which i-th destination state contains, and `dests_ch[i]' represents the characters which i-th destination state accepts. */ - if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX)) - dests_node = (re_node_set *) - alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX); + if (__libc_use_alloca (sizeof (struct dests_alloc))) + dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); else { - dests_node = (re_node_set *) - malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX); - if (BE (dests_node == NULL, 0)) + dests_alloc = re_malloc (struct dests_alloc, 1); + if (BE (dests_alloc == NULL, 0)) return 0; - dests_node_malloced = 1; + dests_node_malloced = true; } - dests_ch = (bitset *) (dests_node + SBC_MAX); + dests_node = dests_alloc->dests_node; + dests_ch = dests_alloc->dests_ch; /* Initialize transiton table. */ state->word_trtable = state->trtable = NULL; @@ -3371,7 +3336,7 @@ build_trtable (dfa, state) if (BE (ndests <= 0, 0)) { if (dests_node_malloced) - free (dests_node); + free (dests_alloc); /* Return 0 in case of an error, 1 otherwise. */ if (ndests == 0) { @@ -3386,7 +3351,7 @@ build_trtable (dfa, state) if (BE (err != REG_NOERROR, 0)) goto out_free; - if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX + if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX + ndests * 3 * sizeof (re_dfastate_t *))) dest_states = (re_dfastate_t **) alloca (ndests * 3 * sizeof (re_dfastate_t *)); @@ -3403,10 +3368,10 @@ out_free: for (i = 0; i < ndests; ++i) re_node_set_free (dests_node + i); if (dests_node_malloced) - free (dests_node); + free (dests_alloc); return 0; } - dest_states_malloced = 1; + dest_states_malloced = true; } dest_states_word = dest_states + ndests; dest_states_nl = dest_states_word + ndests; @@ -3468,8 +3433,8 @@ out_free: goto out_free; /* For all characters ch...: */ - for (i = 0; i < BITSET_UINTS; ++i) - for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1; + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; elem; mask <<= 1, elem >>= 1, ++ch) if (BE (elem & 1, 0)) @@ -3499,8 +3464,8 @@ out_free: goto out_free; /* For all characters ch...: */ - for (i = 0; i < BITSET_UINTS; ++i) - for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1; + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; elem; mask <<= 1, elem >>= 1, ++ch) if (BE (elem & 1, 0)) @@ -3541,7 +3506,7 @@ out_free: re_node_set_free (dests_node + i); if (dests_node_malloced) - free (dests_node); + free (dests_alloc); return 1; } @@ -3552,17 +3517,15 @@ out_free: to DEST_CH[i]. This function return the number of destinations. */ static int -group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) - const re_dfa_t *dfa; - const re_dfastate_t *state; - re_node_set *dests_node; - bitset *dests_ch; +internal_function +group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, + re_node_set *dests_node, bitset_t *dests_ch) { reg_errcode_t err; int result; int i, j, k; int ndests; /* Number of the destinations from `state'. */ - bitset accepts; /* Characters a node can accept. */ + bitset_t accepts; /* Characters a node can accept. */ const re_node_set *cur_nodes = &state->nodes; bitset_empty (accepts); ndests = 0; @@ -3597,7 +3560,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) #ifdef RE_ENABLE_I18N else if (type == OP_UTF8_PERIOD) { - memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2); + memset (accepts, '\xff', sizeof (bitset_t) / 2); if (!(dfa->syntax & RE_DOT_NEWLINE)) bitset_clear (accepts, '\n'); if (dfa->syntax & RE_DOT_NOT_NULL) @@ -3613,7 +3576,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) { if (constraint & NEXT_NEWLINE_CONSTRAINT) { - int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); + bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); bitset_empty (accepts); if (accepts_newline) bitset_set (accepts, NEWLINE_CHAR); @@ -3628,7 +3591,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) if (constraint & NEXT_WORD_CONSTRAINT) { - unsigned int any_set = 0; + bitset_word_t any_set = 0; if (type == CHARACTER && !node->word_char) { bitset_empty (accepts); @@ -3636,18 +3599,18 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) } #ifdef RE_ENABLE_I18N if (dfa->mb_cur_max > 1) - for (j = 0; j < BITSET_UINTS; ++j) + for (j = 0; j < BITSET_WORDS; ++j) any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); else #endif - for (j = 0; j < BITSET_UINTS; ++j) + for (j = 0; j < BITSET_WORDS; ++j) any_set |= (accepts[j] &= dfa->word_char[j]); if (!any_set) continue; } if (constraint & NEXT_NOTWORD_CONSTRAINT) { - unsigned int any_set = 0; + bitset_word_t any_set = 0; if (type == CHARACTER && node->word_char) { bitset_empty (accepts); @@ -3655,11 +3618,11 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) } #ifdef RE_ENABLE_I18N if (dfa->mb_cur_max > 1) - for (j = 0; j < BITSET_UINTS; ++j) + for (j = 0; j < BITSET_WORDS; ++j) any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); else #endif - for (j = 0; j < BITSET_UINTS; ++j) + for (j = 0; j < BITSET_WORDS; ++j) any_set |= (accepts[j] &= ~dfa->word_char[j]); if (!any_set) continue; @@ -3670,10 +3633,10 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) state. Above, we make sure that accepts is not empty. */ for (j = 0; j < ndests; ++j) { - bitset intersec; /* Intersection sets, see below. */ - bitset remains; + bitset_t intersec; /* Intersection sets, see below. */ + bitset_t remains; /* Flags, see below. */ - int has_intersec, not_subset, not_consumed; + bitset_word_t has_intersec, not_subset, not_consumed; /* Optimization, skip if this state doesn't accept the character. */ if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) @@ -3681,7 +3644,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) /* Enumerate the intersection set of this state and `accepts'. */ has_intersec = 0; - for (k = 0; k < BITSET_UINTS; ++k) + for (k = 0; k < BITSET_WORDS; ++k) has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; /* And skip if the intersection set is empty. */ if (!has_intersec) @@ -3689,7 +3652,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) /* Then check if this state is a subset of `accepts'. */ not_subset = not_consumed = 0; - for (k = 0; k < BITSET_UINTS; ++k) + for (k = 0; k < BITSET_WORDS; ++k) { not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k]; not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; @@ -3744,10 +3707,9 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) can only accept one byte. */ static int -check_node_accept_bytes (dfa, node_idx, input, str_idx) - const re_dfa_t *dfa; - int node_idx, str_idx; - const re_string_t *input; +internal_function +check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, + const re_string_t *input, int str_idx) { const re_token_t *node = dfa->nodes + node_idx; int char_len, elem_len; @@ -3981,9 +3943,8 @@ check_node_accept_bytes (dfa, node_idx, input, str_idx) # ifdef _LIBC static unsigned int -find_collation_sequence_value (mbs, mbs_len) - const unsigned char *mbs; - size_t mbs_len; +internal_function +find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) { uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); if (nrules == 0) @@ -4045,10 +4006,9 @@ find_collation_sequence_value (mbs, mbs_len) byte of the INPUT. */ static int -check_node_accept (mctx, node, idx) - const re_match_context_t *mctx; - const re_token_t *node; - int idx; +internal_function +check_node_accept (const re_match_context_t *mctx, const re_token_t *node, + int idx) { unsigned char ch; ch = re_string_byte_at (&mctx->input, idx); @@ -4096,8 +4056,8 @@ check_node_accept (mctx, node, idx) /* Extend the buffers, if the buffers have run out. */ static reg_errcode_t -extend_buffers (mctx) - re_match_context_t *mctx; +internal_function +extend_buffers (re_match_context_t *mctx) { reg_errcode_t ret; re_string_t *pstr = &mctx->input; @@ -4155,9 +4115,8 @@ extend_buffers (mctx) /* Initialize MCTX. */ static reg_errcode_t -match_ctx_init (mctx, eflags, n) - re_match_context_t *mctx; - int eflags, n; +internal_function +match_ctx_init (re_match_context_t *mctx, int eflags, int n) { mctx->eflags = eflags; mctx->match_last = -1; @@ -4184,8 +4143,8 @@ match_ctx_init (mctx, eflags, n) of the input, or changes the input string. */ static void -match_ctx_clean (mctx) - re_match_context_t *mctx; +internal_function +match_ctx_clean (re_match_context_t *mctx) { int st_idx; for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx) @@ -4214,8 +4173,8 @@ match_ctx_clean (mctx) /* Free all the memory associated with MCTX. */ static void -match_ctx_free (mctx) - re_match_context_t *mctx; +internal_function +match_ctx_free (re_match_context_t *mctx) { /* First, free all the memory associated with MCTX->SUB_TOPS. */ match_ctx_clean (mctx); @@ -4229,9 +4188,9 @@ match_ctx_free (mctx) */ static reg_errcode_t -match_ctx_add_entry (mctx, node, str_idx, from, to) - re_match_context_t *mctx; - int node, str_idx, from, to; +internal_function +match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from, + int to) { if (mctx->nbkref_ents >= mctx->abkref_ents) { @@ -4278,9 +4237,8 @@ match_ctx_add_entry (mctx, node, str_idx, from, to) found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ static int -search_cur_bkref_entry (mctx, str_idx) - const re_match_context_t *mctx; - int str_idx; +internal_function +search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) { int left, right, mid, last; last = right = mctx->nbkref_ents; @@ -4302,9 +4260,8 @@ search_cur_bkref_entry (mctx, str_idx) at STR_IDX. */ static reg_errcode_t -match_ctx_add_subtop (mctx, node, str_idx) - re_match_context_t *mctx; - int node, str_idx; +internal_function +match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx) { #ifdef DEBUG assert (mctx->sub_tops != NULL); @@ -4333,9 +4290,8 @@ match_ctx_add_subtop (mctx, node, str_idx) at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ static re_sub_match_last_t * -match_ctx_add_sublast (subtop, node, str_idx) - re_sub_match_top_t *subtop; - int node, str_idx; +internal_function +match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx) { re_sub_match_last_t *new_entry; if (BE (subtop->nlasts == subtop->alasts, 0)) @@ -4361,10 +4317,9 @@ match_ctx_add_sublast (subtop, node, str_idx) } static void -sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx) - re_sift_context_t *sctx; - re_dfastate_t **sifted_sts, **limited_sts; - int last_node, last_str_idx; +internal_function +sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, + re_dfastate_t **limited_sts, int last_node, int last_str_idx) { sctx->sifted_states = sifted_sts; sctx->limited_states = limited_sts; @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: sed 4.1.1\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2004-07-01 18:32+0200\n" +"POT-Creation-Date: 2005-04-11 15:53+0200\n" "PO-Revision-Date: 2005-04-12 20:09+0200\n" "Last-Translator: Taco Witte <tcwitte@cs.uu.nl>\n" "Language-Team: Dutch <vertaling@vrijschrift.org>\n" @@ -133,38 +133,42 @@ msgstr "%s: -e expressie #%lu, teken %lu: %s\n" msgid "can't find label for jump to `%s'" msgstr "kan label voor sprong naar `%s' niet vinden" -#: sed/execute.c:649 +#: sed/execute.c:651 #, c-format msgid "%s: can't read %s: %s\n" msgstr "%s: kan %s niet lezen: %s\n" -#: sed/execute.c:672 +#: sed/execute.c:674 #, c-format msgid "couldn't edit %s: is a terminal" msgstr "kon %s niet bewerken: is een terminal" -#: sed/execute.c:676 +#: sed/execute.c:678 #, c-format msgid "couldn't edit %s: not a regular file" msgstr "kon %s niet bewerken: geen gewoon bestand" -#: sed/execute.c:683 lib/utils.c:196 +#: sed/execute.c:685 lib/utils.c:196 #, c-format msgid "couldn't open temporary file %s: %s" msgstr "kon tijdelijk bestand %s niet openen: %s" -#: sed/execute.c:1207 sed/execute.c:1388 +#: sed/execute.c:1208 sed/execute.c:1389 msgid "error in subprocess" msgstr "fout in deelproces" -#: sed/execute.c:1209 +#: sed/execute.c:1210 msgid "option `e' not supported" msgstr "optie `e' wordt niet ondersteund" -#: sed/execute.c:1390 +#: sed/execute.c:1391 msgid "`e' command not supported" msgstr "`e'-opdracht wordt niet ondersteund" +#: sed/execute.c:1716 +msgid "no input files" +msgstr "" + #: sed/regexp.c:39 msgid "no previous regular expression" msgstr "geen eerdere reguliere expressie" @@ -173,37 +177,41 @@ msgstr "geen eerdere reguliere expressie" msgid "cannot specify modifiers on empty regexp" msgstr "kan geen veranderaars opgeven bij een lege reguliere expressie" -#: sed/regexp.c:134 +#: sed/regexp.c:115 #, c-format msgid "invalid reference \\%d on `s' command's RHS" msgstr "ongeldige verwijzing \\%d op rechterhandzijde van `s'-opdracht" -#: sed/sed.c:96 +#: sed/sed.c:93 msgid "" " -R, --regexp-perl\n" " use Perl 5's regular expressions syntax in the script.\n" msgstr "" " -R, --regexp-perl\n" -" de syntaxis van Perl 5 voor reguliere expressie gebruiken in het script.\n" +" de syntaxis van Perl 5 voor reguliere expressie gebruiken " +"in het script.\n" -#: sed/sed.c:101 +#: sed/sed.c:98 #, c-format msgid "" "Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n" "\n" msgstr "" -"Gebruik: %s [OPTIE]... {script-alleen-als-geen-ander-script} [invoerbestand]...\n" +"Gebruik: %s [OPTIE]... {script-alleen-als-geen-ander-script} " +"[invoerbestand]...\n" "\n" -#: sed/sed.c:105 +#: sed/sed.c:102 +#, fuzzy, c-format msgid "" -"-n, --quiet, --silent\n" +" -n, --quiet, --silent\n" " suppress automatic printing of pattern space\n" msgstr "" "-n, --quiet, --silent\n" " automatische weergave van patroonruimte onderdrukken\n" -#: sed/sed.c:107 +#: sed/sed.c:104 +#, c-format msgid "" " -e script, --expression=script\n" " add the script to the commands to be executed\n" @@ -211,23 +219,29 @@ msgstr "" " -e script, --expression=SCRIPT\n" " het SCRIPT toevoegen aan uit te voeren opdrachten\n" -#: sed/sed.c:109 +#: sed/sed.c:106 +#, c-format msgid "" " -f script-file, --file=script-file\n" -" add the contents of script-file to the commands to be executed\n" +" add the contents of script-file to the commands to be " +"executed\n" msgstr "" " -f script-file, --file=SCRIPTBESTAND\n" -" de inhoud van SCRIPTBESTAND toevoegen aan uit te voeren opdrachten\n" +" de inhoud van SCRIPTBESTAND toevoegen aan uit te voeren " +"opdrachten\n" -#: sed/sed.c:111 +#: sed/sed.c:108 +#, c-format msgid "" " -i[SUFFIX], --in-place[=SUFFIX]\n" " edit files in place (makes backup if extension supplied)\n" msgstr "" " -i[ACHTERVOEGSEL], --in-place[=ACHTERVOEGSEL]\n" -" bestanden ter plekke bewerken (maakt reservekopie als extensie gegeven)\n" +" bestanden ter plekke bewerken (maakt reservekopie als " +"extensie gegeven)\n" -#: sed/sed.c:113 +#: sed/sed.c:110 +#, c-format msgid "" " -l N, --line-length=N\n" " specify the desired line-wrap length for the `l' command\n" @@ -235,7 +249,8 @@ msgstr "" " -l AANTAL, --line-length=AANTAL\n" " gewenste regelafbreeklengte opgeven voor `l'-opdracht\n" -#: sed/sed.c:115 +#: sed/sed.c:112 +#, c-format msgid "" " --posix\n" " disable all GNU extensions.\n" @@ -243,7 +258,8 @@ msgstr "" " --posix\n" " alle GNU-uitbreidingen uit zetten.\n" -#: sed/sed.c:117 +#: sed/sed.c:114 +#, c-format msgid "" " -r, --regexp-extended\n" " use extended regular expressions in the script.\n" @@ -251,34 +267,42 @@ msgstr "" " -r, --regexp-extended\n" " uitgebreide reguliere expressies gebruiken in het script.\n" -#: sed/sed.c:120 +#: sed/sed.c:117 +#, c-format msgid "" " -s, --separate\n" -" consider files as separate rather than as a single continuous\n" +" consider files as separate rather than as a single " +"continuous\n" " long stream.\n" msgstr "" " -s, --separate\n" -" bestanden als losstaand beschouwen i.p.v. als enkele continue stroom.\n" +" bestanden als losstaand beschouwen i.p.v. als enkele " +"continue stroom.\n" -#: sed/sed.c:123 +#: sed/sed.c:120 +#, c-format msgid "" " -u, --unbuffered\n" -" load minimal amounts of data from the input files and flush\n" +" load minimal amounts of data from the input files and " +"flush\n" " the output buffers more often\n" msgstr "" " -u, --unbuffered\n" " minimale hoeveelheid gegevens laden uit invoerbestanden en\n" " uitvoerbuffers vaker leegmaken\n" -#: sed/sed.c:126 +#: sed/sed.c:123 +#, c-format msgid " --help display this help and exit\n" msgstr " --help deze hulp weergeven en afsluiten\n" -#: sed/sed.c:127 +#: sed/sed.c:124 +#, c-format msgid " --version output version information and exit\n" msgstr " --version versie-informatie weergeven en afsluiten\n" -#: sed/sed.c:128 +#: sed/sed.c:125 +#, c-format msgid "" "\n" "If no -e, --expression, -f, or --file option is given, then the first\n" @@ -294,7 +318,7 @@ msgstr "" "invoerbestanden opgegeven zijn, wordt standaardinvoer gelezen.\n" "\n" -#: sed/sed.c:134 +#: sed/sed.c:131 #, c-format msgid "" "E-mail bug reports to: %s .\n" @@ -304,12 +328,12 @@ msgstr "" "Zorg ervoor dat het woord ``%s'' ergens in het ``Onderwerp:''-veld staat.\n" "Rapporteer fouten in de vertalingen bij <vertaling@vrijschrift.org>.\n" -#: sed/sed.c:271 +#: sed/sed.c:268 #, c-format msgid "super-sed version %s\n" msgstr "super-sed versie %s\n" -#: sed/sed.c:272 +#: sed/sed.c:269 #, c-format msgid "" "based on GNU sed version %s\n" @@ -318,12 +342,12 @@ msgstr "" "gebaseerd op GNU sed versie %s\n" "\n" -#: sed/sed.c:274 +#: sed/sed.c:271 #, c-format msgid "GNU sed version %s\n" msgstr "GNU sed versie %s\n" -#: sed/sed.c:276 +#: sed/sed.c:273 #, c-format msgid "" "%s\n" @@ -363,75 +387,75 @@ msgstr "leesfout op %s: %s" msgid "cannot rename %s: %s" msgstr "kon %s niet hernoemen: %s" -#: lib/regcomp.c:150 +#: lib/regcomp.c:160 msgid "Success" msgstr "Gelukt" -#: lib/regcomp.c:153 +#: lib/regcomp.c:163 msgid "No match" msgstr "Geen overeenkomst" -#: lib/regcomp.c:156 +#: lib/regcomp.c:166 msgid "Invalid regular expression" msgstr "Ongeldige reguliere expressie" -#: lib/regcomp.c:159 +#: lib/regcomp.c:169 msgid "Invalid collation character" msgstr "Ongeldig sorteerteken" -#: lib/regcomp.c:162 +#: lib/regcomp.c:172 msgid "Invalid character class name" msgstr "Ongeldige tekenklassenaam" -#: lib/regcomp.c:165 +#: lib/regcomp.c:175 msgid "Trailing backslash" msgstr "Backslash aan het einde" -#: lib/regcomp.c:168 +#: lib/regcomp.c:178 msgid "Invalid back reference" msgstr "Ongeldige terugverwijzing" -#: lib/regcomp.c:171 +#: lib/regcomp.c:181 msgid "Unmatched [ or [^" msgstr "Ongepaarde [ of [^" -#: lib/regcomp.c:174 +#: lib/regcomp.c:184 msgid "Unmatched ( or \\(" msgstr "Ongepaarde ( of \\(" -#: lib/regcomp.c:177 +#: lib/regcomp.c:187 msgid "Unmatched \\{" msgstr "Ongepaarde \\{" -#: lib/regcomp.c:180 +#: lib/regcomp.c:190 msgid "Invalid content of \\{\\}" msgstr "Ongeldige inhoud van \\{\\}" -#: lib/regcomp.c:183 +#: lib/regcomp.c:193 msgid "Invalid range end" msgstr "Ongeldig bereikeinde" -#: lib/regcomp.c:186 +#: lib/regcomp.c:196 msgid "Memory exhausted" msgstr "Onvoldoende geheugen" -#: lib/regcomp.c:189 +#: lib/regcomp.c:199 msgid "Invalid preceding regular expression" msgstr "Ongeldige voorafgaande reguliere expressie" -#: lib/regcomp.c:192 +#: lib/regcomp.c:202 msgid "Premature end of regular expression" msgstr "Voortijdig einde van reguliere expressie" -#: lib/regcomp.c:195 +#: lib/regcomp.c:205 msgid "Regular expression too big" msgstr "Reguliere expressie is te groot" -#: lib/regcomp.c:198 +#: lib/regcomp.c:208 msgid "Unmatched ) or \\)" msgstr "Ongepaarde ) of \\)" -#: lib/regcomp.c:672 +#: lib/regcomp.c:690 msgid "No previous regular expression" msgstr "Geen eerdere reguliere expressie" @@ -452,18 +476,23 @@ msgstr "Geen eerdere reguliere expressie" #~ " -e script, --expression=script\n" #~ " add the script to the commands to be executed\n" #~ " -f script-file, --file=script-file\n" -#~ " add the contents of script-file to the commands to be executed\n" +#~ " add the contents of script-file to the commands to be " +#~ "executed\n" #~ " -i[suffix], --in-place[=suffix]\n" -#~ " edit files in place (makes backup if extension supplied)\n" +#~ " edit files in place (makes backup if extension " +#~ "supplied)\n" #~ " -l N, --line-length=N\n" -#~ " specify the desired line-wrap length for the `l' command\n" +#~ " specify the desired line-wrap length for the `l' " +#~ "command\n" #~ " -r, --regexp-extended\n" #~ " use extended regular expressions in the script.\n" #~ "%s -s, --separate\n" -#~ " consider files as separate rather than as a single continuous\n" +#~ " consider files as separate rather than as a single " +#~ "continuous\n" #~ " long stream.\n" #~ " -u, --unbuffered\n" -#~ " load minimal amounts of data from the input files and flush\n" +#~ " load minimal amounts of data from the input files and " +#~ "flush\n" #~ " the output buffers more often\n" #~ " --help display this help and exit\n" #~ " -V, --version output version information and exit\n" @@ -474,29 +503,39 @@ msgstr "Geen eerdere reguliere expressie" #~ "specified, then the standard input is read.\n" #~ "\n" #~ msgstr "" -#~ "Gebruik: %s [OPTIE]... {script-alleen-als-geen-ander-script} [invoerbestand]...\n" +#~ "Gebruik: %s [OPTIE]... {script-alleen-als-geen-ander-script} " +#~ "[invoerbestand]...\n" #~ "\n" #~ " -n, --quiet, --silent\n" #~ " automatische weergave van patroonruimte onderdrukken\n" #~ " -e script, --expression=script\n" #~ " het script toevoegen aan de uit te voeren opdrachten\n" #~ " -f scriptbestand, --file=scriptbestand\n" -#~ " de inhoud van scriptbestand toevoegen aan de uit te voeren opdrachten\n" +#~ " de inhoud van scriptbestand toevoegen aan de uit te " +#~ "voeren opdrachten\n" #~ " -i[achtervoegsel], --in-place[=achtervoegsel]\n" -#~ " bestanden ter plekke bewerken (maakt reservekopie als achtervoegsel is opgegeven)\n" +#~ " bestanden ter plekke bewerken (maakt reservekopie als " +#~ "achtervoegsel is opgegeven)\n" #~ " -l N, --line-length=N\n" -#~ " de gewenste afkaplengte voor regels opgeven voor de `l'-opdracht\n" +#~ " de gewenste afkaplengte voor regels opgeven voor de `l'-" +#~ "opdracht\n" #~ " -r, --regexp-extended\n" -#~ " uitgebreide reguliere uitdrukkingen gebruiken in het script.\n" +#~ " uitgebreide reguliere uitdrukkingen gebruiken in het " +#~ "script.\n" #~ "%s -s, --separate\n" -#~ " bestanden afzonderlijk beschouwen in plaats van als één continue, lange stroom.\n" +#~ " bestanden afzonderlijk beschouwen in plaats van als één " +#~ "continue, lange stroom.\n" #~ " -u, --unbuffered\n" -#~ " minimale hoeveelheden gegevens van invoerbestanden laden en vaker de uitvoerbuffers legen\n" +#~ " minimale hoeveelheden gegevens van invoerbestanden laden " +#~ "en vaker de uitvoerbuffers legen\n" #~ " --help deze hulp weergeven en afsluiten\n" #~ " -V, --version versie-informatie weergeven en afsluiten\n" #~ "\n" -#~ "Als geen -e, --expression, -f of --file optie gegeven is, wordt het eerste niet-optie\n" -#~ "argument gebruikt als sed script om te interpreteren. Alle overblijvende argumenten\n" -#~ "zijn namen van invoerbestanden; als geen invoerbestanden zijn opgegeven, wordt gelezen\n" +#~ "Als geen -e, --expression, -f of --file optie gegeven is, wordt het " +#~ "eerste niet-optie\n" +#~ "argument gebruikt als sed script om te interpreteren. Alle overblijvende " +#~ "argumenten\n" +#~ "zijn namen van invoerbestanden; als geen invoerbestanden zijn opgegeven, " +#~ "wordt gelezen\n" #~ "vanaf standaard invoer.\n" #~ "\n" |