diff options
author | Paolo Bonzini <bonzini@gnu.org> | 2004-12-03 20:36:09 +0000 |
---|---|---|
committer | Paolo Bonzini <bonzini@gnu.org> | 2008-01-09 16:11:36 +0100 |
commit | 14f43c2486ea334fa0042290315d1847f4fa911e (patch) | |
tree | 0816e2ee8e3c87f52ee72c3826dba8d4cd30cf65 /lib | |
parent | 52e232ab96bd80a26559c865ca1e274a0167c13d (diff) | |
download | sed-14f43c2486ea334fa0042290315d1847f4fa911e.tar.gz |
Fix bugs in the accept_mb optimization.
2004-12-03 Paolo Bonzini <bonzini@gnu.org>
* regcomp.c (peek_token, parse_expression, build_charclass_op): Do
not set accept_mb in the token.
(parse_bracket_exp): Do not modify existing DFA nodes.
* regex-internal.c (re_node_set_insert_last): Remove unused variable.
(re_dfa_add_node): Set accept_mb in the DFA node.
git-archimport-id: bonzini@gnu.org--2004b/sed--stable--4.1--patch-27
Diffstat (limited to 'lib')
-rw-r--r-- | lib/regcomp.c | 66 | ||||
-rw-r--r-- | lib/regex_internal.c | 7 |
2 files changed, 37 insertions, 36 deletions
diff --git a/lib/regcomp.c b/lib/regcomp.c index fb8f210..1f2165f 100644 --- a/lib/regcomp.c +++ b/lib/regcomp.c @@ -1788,7 +1788,6 @@ peek_token (token, input, syntax) token->word_char = 0; #ifdef RE_ENABLE_I18N - token->accept_mb = 0; token->mb_partial = 0; if (input->mb_cur_max > 1 && !re_string_first_byte (input, re_string_cur_idx (input))) @@ -2384,9 +2383,6 @@ parse_expression (regexp, preg, token, syntax, nest, err) fetch_token (token, regexp, syntax); return tree; case OP_PERIOD: -#ifdef RE_ENABLE_I18N - token->accept_mb = dfa->mb_cur_max > 1; -#endif tree = re_dfa_add_tree_node (dfa, NULL, NULL, token); if (BE (tree == NULL, 0)) { @@ -3286,58 +3282,61 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) /* Ensure only single byte characters are set. */ if (dfa->mb_cur_max > 1) bitset_mask (sbcset, dfa->sb_char); -#endif /* RE_ENABLE_I18N */ - - /* Build a tree for simple bracket. */ - br_token.type = SIMPLE_BRACKET; - br_token.opr.sbcset = sbcset; - work_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token); - if (BE (work_tree == NULL, 0)) - goto parse_bracket_exp_espace; -#ifdef RE_ENABLE_I18N if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes || mbcset->non_match))) { - re_token_t alt_token; bin_tree_t *mbc_tree; int sbc_idx; /* Build a tree for complex bracket. */ dfa->has_mb_node = 1; + br_token.type = COMPLEX_BRACKET; + br_token.opr.mbcset = mbcset; + mbc_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token); + if (BE (mbc_tree == NULL, 0)) + goto parse_bracket_exp_espace; for (sbc_idx = 0; sbc_idx < BITSET_UINTS; ++sbc_idx) if (sbcset[sbc_idx]) break; /* If there are no bits set in sbcset, there is no point of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ - if (sbc_idx == BITSET_UINTS) + if (sbc_idx < BITSET_UINTS) + { + re_token_t alt_token; + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + work_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + + /* Then join them by ALT node. */ + alt_token.type = OP_ALT; + dfa->has_plural_match = 1; + work_tree = re_dfa_add_tree_node (dfa, work_tree, mbc_tree, &alt_token); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + } + else { re_free (sbcset); - dfa->nodes[work_tree->node_idx].type = COMPLEX_BRACKET; - dfa->nodes[work_tree->node_idx].opr.mbcset = mbcset; - return work_tree; + work_tree = mbc_tree; } - br_token.type = COMPLEX_BRACKET; - br_token.accept_mb = 1; - br_token.opr.mbcset = mbcset; - mbc_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token); - if (BE (mbc_tree == NULL, 0)) - goto parse_bracket_exp_espace; - /* Then join them by ALT node. */ - alt_token.type = OP_ALT; - dfa->has_plural_match = 1; - work_tree = re_dfa_add_tree_node (dfa, work_tree, mbc_tree, &alt_token); - if (BE (mbc_tree != NULL, 1)) - return work_tree; } else { + /* Build a tree for simple bracket. */ + br_token.type = SIMPLE_BRACKET; + br_token.opr.sbcset = sbcset; + work_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token); + if (BE (work_tree == NULL, 0)) + goto parse_bracket_exp_espace; + free_charset (mbcset); - return work_tree; } -#else /* not RE_ENABLE_I18N */ - return work_tree; #endif /* not RE_ENABLE_I18N */ + return work_tree; parse_bracket_exp_espace: *err = REG_ESPACE; @@ -3709,7 +3708,6 @@ build_charclass_op (dfa, trans, class_name, extra, non_match, err) bin_tree_t *mbc_tree; /* Build a tree for complex bracket. */ br_token.type = COMPLEX_BRACKET; - br_token.accept_mb = 1; br_token.opr.mbcset = mbcset; dfa->has_mb_node = 1; mbc_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token); diff --git a/lib/regex_internal.c b/lib/regex_internal.c index 6fd92f7..4064218 100644 --- a/lib/regex_internal.c +++ b/lib/regex_internal.c @@ -1259,8 +1259,6 @@ re_node_set_insert_last (set, elem) re_node_set *set; int elem; { - int idx; - /* Realloc if we need. */ if (set->alloc == set->nelem) { @@ -1340,6 +1338,7 @@ re_dfa_add_node (dfa, token, mode) re_token_t token; int mode; { + int type = token.type; if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0)) { int new_nodes_alloc = dfa->nodes_alloc * 2; @@ -1376,6 +1375,10 @@ re_dfa_add_node (dfa, token, mode) dfa->nodes[dfa->nodes_len].opt_subexp = 0; dfa->nodes[dfa->nodes_len].duplicated = 0; dfa->nodes[dfa->nodes_len].constraint = 0; +#ifdef RE_ENABLE_I18N + dfa->nodes[dfa->nodes_len].accept_mb = + (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET; +#endif return dfa->nodes_len++; } |