From 56b168be5d3d56f8a729acfb5072e0eca429157e Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Fri, 2 Jan 2004 21:20:51 +0000 Subject: Update. 2004-01-02 Jakub Jelinek * posix/regex_internal.c (re_node_set_insert): Remove unused variables. * posix/regex_internal.h (re_dfa_t): Add syntax field. * posix/regcomp.c (parse): Initialize dfa->syntax. * posix/regexec.c (acquire_init_state_context, prune_impossible_nodes, check_matching, check_halt_state_context, proceed_next_node, sift_states_iter_mb, sift_states_backward, update_cur_sifted_state, sift_states_bkref, transit_state, transit_state_sb, transit_state_mb, transit_state_bkref, get_subexp, get_subexp_sub, check_arrival, expand_bkref_cache, build_trtable): Remove preg argument, add dfa argument instead and remove dfa = preg->buffer initialization in the body. Adjust all callers. (check_node_accept_bytes, group_nodes_into_DFAstates, check_node_accept): Likewise. Use dfa->syntax instead of preg->syntax. (check_arrival_add_next_nodes): Remove preg argument. * posix/regex_internal.h (re_match_context_t): Make input re_string_t instead of a pointer to it. * posix/regex_internal.c (re_string_construct_common): Don't clear pstr here... (re_string_construct): ... but only here. * posix/regexec.c (match_ctx_init): Remove input argument. Don't initialize fields to zero. (re_search_internal): Move input into mctx.input. (acquire_init_state_context, check_matching, check_halt_state_context, proceed_next_node, clean_state_log_if_needed, sift_states_bkref, sift_states_iter_mb, transit_state, transit_state_sb, transit_state_mb, transit_state_bkref, get_subexp, check_arrival, check_arrival_add_next_nodes, check_node_accept, extend_buffers): Change mctx->input into &mctx->input and mctx->input->field into mctx->input.field. 2004-01-02 Jakub Jelinek Paolo Bonzini * posix/regex_internal.h (re_const_bitset_ptr_t): New type. (re_string_t): Add newline_anchor, word_char and word_ops_used fields. (re_dfa_t): Change word_char type to bitset. Add word_ops_used field. (re_string_context_at, re_string_reconstruct): Remove last argument. * posix/regex_internal.c (re_string_allocate): Initialize pstr->word_char and pstr->word_ops_used. (re_string_context_at): Remove newline_anchor argument. Use input->newline_anchor instead, swap && conditions. Only use IS_WIDE_WORD_CHAR if input->word_ops_used != 0. Use input->word_char bitmap instead of IS_WORD_CHAR. (re_string_reconstruct): Likewise. Adjust re_string_context_at caller. * posix/regexec.c (acquire_init_state_context, check_halt_state_context, transit_state, transit_state_sb, transit_state_mb, transit_state_bkref, check_arrival, check_node_accept): Adjust re_string_context_at and re_string_reconstruct callers. (re_search_internal): Likewise. Set input.newline_anchor. (build_trtable): Use dfa->word_char bitmap instead of IS_WORD_CHAR. * posix/regcomp.c (init_word_char): Change return type to void. Set dfa->word_ops_used. (free_dfa_content): Don't free dfa->word_char. (parse_expression): Remove error handling for init_word_char. --- posix/regex_internal.c | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) (limited to 'posix/regex_internal.c') diff --git a/posix/regex_internal.c b/posix/regex_internal.c index f07d4a2e7f..2c6c407b02 100644 --- a/posix/regex_internal.c +++ b/posix/regex_internal.c @@ -67,6 +67,8 @@ re_string_allocate (pstr, str, len, init_len, trans, icase, dfa) if (BE (ret != REG_NOERROR, 0)) return ret; + pstr->word_char = dfa->word_char; + pstr->word_ops_used = dfa->word_ops_used; pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len; pstr->valid_raw_len = pstr->valid_len; @@ -84,6 +86,7 @@ re_string_construct (pstr, str, len, trans, icase, dfa) const re_dfa_t *dfa; { reg_errcode_t ret; + memset (pstr, '\0', sizeof (re_string_t)); re_string_construct_common (str, len, pstr, trans, icase, dfa); if (len > 0) @@ -183,7 +186,6 @@ re_string_construct_common (str, len, pstr, trans, icase, dfa) int icase; const re_dfa_t *dfa; { - memset (pstr, '\0', sizeof (re_string_t)); pstr->raw_mbs = (const unsigned char *) str; pstr->len = len; pstr->raw_len = len; @@ -572,9 +574,9 @@ re_string_translate_buffer (pstr) convert to upper case in case of REG_ICASE, apply translation. */ static reg_errcode_t -re_string_reconstruct (pstr, idx, eflags, newline) +re_string_reconstruct (pstr, idx, eflags) re_string_t *pstr; - int idx, eflags, newline; + int idx, eflags; { int offset = idx - pstr->raw_mbs_idx; if (offset < 0) @@ -609,8 +611,7 @@ re_string_reconstruct (pstr, idx, eflags, newline) ) { /* Yes, move them to the front of the buffer. */ - pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags, - newline); + pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags); #ifdef RE_ENABLE_I18N if (pstr->mb_cur_max > 1) memmove (pstr->wcs, pstr->wcs + offset, @@ -695,8 +696,11 @@ re_string_reconstruct (pstr, idx, eflags, newline) memset (pstr->mbs, 255, pstr->valid_len); } pstr->valid_raw_len = pstr->valid_len; - pstr->tip_context = (IS_WIDE_WORD_CHAR (wc) ? CONTEXT_WORD - : ((newline && IS_WIDE_NEWLINE (wc)) + pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0) + && IS_WIDE_WORD_CHAR (wc)) + ? CONTEXT_WORD + : ((IS_WIDE_NEWLINE (wc) + && pstr->newline_anchor) ? CONTEXT_NEWLINE : 0)); } else @@ -705,8 +709,9 @@ re_string_reconstruct (pstr, idx, eflags, newline) int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1]; if (pstr->trans) c = pstr->trans[c]; - pstr->tip_context = (IS_WORD_CHAR (c) ? CONTEXT_WORD - : ((newline && IS_NEWLINE (c)) + pstr->tip_context = (bitset_contain (pstr->word_char, c) + ? CONTEXT_WORD + : ((IS_NEWLINE (c) && pstr->newline_anchor) ? CONTEXT_NEWLINE : 0)); } } @@ -843,9 +848,9 @@ re_string_destruct (pstr) /* Return the context at IDX in INPUT. */ static unsigned int -re_string_context_at (input, idx, eflags, newline_anchor) +re_string_context_at (input, idx, eflags) const re_string_t *input; - int idx, eflags, newline_anchor; + int idx, eflags; { int c; if (idx < 0 || idx == input->len) @@ -874,17 +879,18 @@ re_string_context_at (input, idx, eflags, newline_anchor) return input->tip_context; } wc = input->wcs[wc_idx]; - if (IS_WIDE_WORD_CHAR (wc)) + if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc)) return CONTEXT_WORD; - return (newline_anchor && IS_WIDE_NEWLINE (wc)) ? CONTEXT_NEWLINE : 0; + return (IS_WIDE_NEWLINE (wc) && input->newline_anchor + ? CONTEXT_NEWLINE : 0); } else #endif { c = re_string_byte_at (input, idx); - if (IS_WORD_CHAR (c)) + if (bitset_contain (input->word_char, c)) return CONTEXT_WORD; - return (newline_anchor && IS_NEWLINE (c)) ? CONTEXT_NEWLINE : 0; + return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0; } } @@ -1156,7 +1162,7 @@ re_node_set_insert (set, elem) re_node_set *set; int elem; { - int idx, right, mid; + int idx; /* In case the set is empty. */ if (set->alloc == 0) { @@ -1206,7 +1212,7 @@ re_node_set_insert (set, elem) } /* Compare two node sets SET1 and SET2. - return 1 if SET1 and SET2 are equivalent, retrun 0 otherwise. */ + return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */ static int re_node_set_compare (set1, set2) -- cgit v1.2.1