From 367417c47be947ed7d0af7b430af5ff1626bf7c1 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Sun, 14 Jun 2015 19:06:21 +0300 Subject: Sync regex routines wit GLIBC. --- ChangeLog | 4 ++++ regcomp.c | 64 ++++++++++++++++++++++++++++---------------------------- regex_internal.h | 5 ++++- regexec.c | 4 ++++ 4 files changed, 44 insertions(+), 33 deletions(-) diff --git a/ChangeLog b/ChangeLog index da14af06..0fadaf16 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2015-06-14 Arnold D. Robbins + + * regcomp.c, regex_internal.h, regexec.c: Sync with GLIBC. + 2015-05-26 Paul Eggert * floatcomp.c (count_trailing_zeros): New function. diff --git a/regcomp.c b/regcomp.c index f813a1ef..fd3ab84a 100644 --- a/regcomp.c +++ b/regcomp.c @@ -235,7 +235,7 @@ btowc (int c) compiles PATTERN (of length LENGTH) and puts the result in BUFP. Returns 0 if the pattern was valid, otherwise an error string. - Assumes the `allocated' (and perhaps `buffer') and `translate' fields + Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields are set in BUFP on entry. */ const char * @@ -264,7 +264,7 @@ re_compile_pattern (pattern, length, bufp) weak_alias (__re_compile_pattern, re_compile_pattern) #endif -/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can +/* Set by 're_set_syntax' to the current regexp syntax to recognize. Can also be assigned to arbitrarily: each pattern buffer stores its own syntax, so it can be changed between regex compilations. */ /* This has no initializer because initialized variables in Emacs @@ -315,8 +315,8 @@ weak_alias (__re_compile_fastmap, re_compile_fastmap) #endif static inline void -__attribute ((always_inline)) -re_set_fastmap (char *fastmap, int icase, int ch) +__attribute__ ((always_inline)) +re_set_fastmap (char *fastmap, bool icase, int ch) { fastmap[ch] = 1; if (icase) @@ -462,15 +462,15 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, PREG is a regex_t *. We do not expect any fields to be initialized, since POSIX says we shouldn't. Thus, we set - `buffer' to the compiled pattern; - `used' to the length of the compiled pattern; - `syntax' to RE_SYNTAX_POSIX_EXTENDED if the + 'buffer' to the compiled pattern; + 'used' to the length of the compiled pattern; + 'syntax' to RE_SYNTAX_POSIX_EXTENDED if the REG_EXTENDED bit in CFLAGS is set; otherwise, to RE_SYNTAX_POSIX_BASIC; - `newline_anchor' to REG_NEWLINE being set in CFLAGS; - `fastmap' to an allocated space for the fastmap; - `fastmap_accurate' to zero; - `re_nsub' to the number of subexpressions in PATTERN. + 'newline_anchor' to REG_NEWLINE being set in CFLAGS; + 'fastmap' to an allocated space for the fastmap; + 'fastmap_accurate' to zero; + 're_nsub' to the number of subexpressions in PATTERN. PATTERN is the address of the pattern string. @@ -730,7 +730,7 @@ re_comp (s) + __re_error_msgid_idx[(int) REG_ESPACE]); } - /* Since `re_exec' always passes NULL for the `regs' argument, we + /* Since 're_exec' always passes NULL for the 'regs' argument, we don't need to initialize the pattern buffer fields which affect it. */ /* Match anchors at newlines. */ @@ -1570,7 +1570,7 @@ duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node, destination. */ org_dest = dfa->edests[org_node].elems[0]; re_node_set_empty (dfa->edests + clone_node); - /* If the node is root_node itself, it means the epsilon clsoure + /* If the node is root_node itself, it means the epsilon closure has a loop. Then tie it to the destination of the root_node. */ if (org_node == root_node && clone_node != org_node) { @@ -1579,7 +1579,7 @@ duplicate_node_closure (re_dfa_t *dfa, int top_org_node, int top_clone_node, return REG_ESPACE; break; } - /* In case of the node has another constraint, add it. */ + /* In case the node has another constraint, append it. */ constraint |= dfa->nodes[org_node].constraint; clone_dest = duplicate_node (dfa, org_dest, constraint); if (BE (clone_dest == -1, 0)) @@ -1722,7 +1722,7 @@ calc_eclosure (re_dfa_t *dfa) /* If we have already calculated, skip it. */ if (dfa->eclosures[node_idx].nelem != 0) continue; - /* Calculate epsilon closure of `node_idx'. */ + /* Calculate epsilon closure of 'node_idx'. */ err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1); if (BE (err != REG_NOERROR, 0)) return err; @@ -1789,11 +1789,11 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, int node, int root) } else eclosure_elem = dfa->eclosures[edest]; - /* Merge the epsilon closure of `edest'. */ + /* Merge the epsilon closure of 'edest'. */ err = re_node_set_merge (&eclosure, &eclosure_elem); if (BE (err != REG_NOERROR, 0)) return err; - /* If the epsilon closure of `edest' is incomplete, + /* If the epsilon closure of 'edest' is incomplete, the epsilon closure of this node is also incomplete. */ if (dfa->eclosures[edest].nelem == 0) { @@ -2155,7 +2155,7 @@ peek_token_bracket (re_token_t *token, re_string_t *input, reg_syntax_t syntax) /* Entry point of the parser. Parse the regular expression REGEXP and return the structure tree. - If an error is occured, ERR is set by error code, and return NULL. + If an error occurs, ERR is set by error code, and return NULL. This function build the following tree, from regular expression : CAT / \ @@ -2197,7 +2197,7 @@ parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax, / \ - ALT means alternative, which represents the operator `|'. */ + ALT means alternative, which represents the operator '|'. */ static bin_tree_t * parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, @@ -2693,7 +2693,7 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, Build the range expression which starts from START_ELEM, and ends at END_ELEM. The result are written to MBCSET and SBCSET. RANGE_ALLOC is the allocated size of mbcset->range_starts, and - mbcset->range_ends, is a pointer argument sinse we may + mbcset->range_ends, is a pointer argument since we may update it. */ static reg_errcode_t @@ -2867,13 +2867,13 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, const int32_t *symb_table; const unsigned char *extra; - /* Local function for parse_bracket_exp used in _LIBC environement. - Seek the collating symbol entry correspondings to NAME. + /* Local function for parse_bracket_exp used in _LIBC environment. + Seek the collating symbol entry corresponding to NAME. Return the index of the symbol in the SYMB_TABLE, or -1 if not found. */ auto inline int32_t - __attribute ((always_inline)) + __attribute__ ((always_inline)) seek_collating_symbol_entry (const unsigned char *name, size_t name_len) { int32_t elem; @@ -2899,7 +2899,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, Return the value if succeeded, UINT_MAX otherwise. */ auto inline unsigned int - __attribute ((always_inline)) + __attribute__ ((always_inline)) lookup_collation_sequence_value (bracket_elem_t *br_elem) { if (br_elem->type == SB_CHAR) @@ -2959,15 +2959,15 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, return UINT_MAX; } - /* Local function for parse_bracket_exp used in _LIBC environement. + /* Local function for parse_bracket_exp used in _LIBC environment. Build the range expression which starts from START_ELEM, and ends at END_ELEM. The result are written to MBCSET and SBCSET. RANGE_ALLOC is the allocated size of mbcset->range_starts, and - mbcset->range_ends, is a pointer argument sinse we may + mbcset->range_ends, is a pointer argument since we may update it. */ auto inline reg_errcode_t - __attribute ((always_inline)) + __attribute__ ((always_inline)) build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc, bracket_elem_t *start_elem, bracket_elem_t *end_elem) { @@ -3040,14 +3040,14 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, return REG_NOERROR; } - /* Local function for parse_bracket_exp used in _LIBC environement. + /* Local function for parse_bracket_exp used in _LIBC environment. Build the collating element which is represented by NAME. The result are written to MBCSET and SBCSET. COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a - pointer argument sinse we may update it. */ + pointer argument since we may update it. */ auto inline reg_errcode_t - __attribute ((always_inline)) + __attribute__ ((always_inline)) build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset, int *coll_sym_alloc, const unsigned char *name) { @@ -3486,7 +3486,7 @@ parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, Build the equivalence class which is represented by NAME. The result are written to MBCSET and SBCSET. EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes, - is a pointer argument sinse we may update it. */ + is a pointer argument since we may update it. */ static reg_errcode_t #ifdef RE_ENABLE_I18N @@ -3580,7 +3580,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name) Build the character class which is represented by NAME. The result are written to MBCSET and SBCSET. CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes, - is a pointer argument sinse we may update it. */ + is a pointer argument since we may update it. */ static reg_errcode_t #ifdef RE_ENABLE_I18N diff --git a/regex_internal.h b/regex_internal.h index 327bd7e0..e0dd3517 100644 --- a/regex_internal.h +++ b/regex_internal.h @@ -803,6 +803,10 @@ re_string_wchar_at (const re_string_t *pstr, int idx) } # ifndef NOT_IN_libc +# ifdef _LIBC +# include +# endif + static int internal_function __attribute__ ((pure, unused)) re_string_elem_size_at (const re_string_t *pstr, int idx) @@ -810,7 +814,6 @@ re_string_elem_size_at (const re_string_t *pstr, int idx) # ifdef _LIBC const unsigned char *p, *extra; const int32_t *table, *indirect; -# include uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); if (nrules != 0) diff --git a/regexec.c b/regexec.c index 46a4d0ae..31bb9e66 100644 --- a/regexec.c +++ b/regexec.c @@ -17,6 +17,10 @@ License along with the GNU C Library; if not, see . */ +#ifdef HAVE_STDINT_H +#include +#endif /* HAVE_STDINT_H */ + static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, int n) internal_function; static void match_ctx_clean (re_match_context_t *mctx) internal_function; -- cgit v1.2.1