summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorPaolo Bonzini <bonzini@gnu.org>2004-12-03 20:36:09 +0000
committerPaolo Bonzini <bonzini@gnu.org>2008-01-09 16:11:36 +0100
commit14f43c2486ea334fa0042290315d1847f4fa911e (patch)
tree0816e2ee8e3c87f52ee72c3826dba8d4cd30cf65 /lib
parent52e232ab96bd80a26559c865ca1e274a0167c13d (diff)
downloadsed-14f43c2486ea334fa0042290315d1847f4fa911e.tar.gz
Fix bugs in the accept_mb optimization.
2004-12-03 Paolo Bonzini <bonzini@gnu.org> * regcomp.c (peek_token, parse_expression, build_charclass_op): Do not set accept_mb in the token. (parse_bracket_exp): Do not modify existing DFA nodes. * regex-internal.c (re_node_set_insert_last): Remove unused variable. (re_dfa_add_node): Set accept_mb in the DFA node. git-archimport-id: bonzini@gnu.org--2004b/sed--stable--4.1--patch-27
Diffstat (limited to 'lib')
-rw-r--r--lib/regcomp.c66
-rw-r--r--lib/regex_internal.c7
2 files changed, 37 insertions, 36 deletions
diff --git a/lib/regcomp.c b/lib/regcomp.c
index fb8f210..1f2165f 100644
--- a/lib/regcomp.c
+++ b/lib/regcomp.c
@@ -1788,7 +1788,6 @@ peek_token (token, input, syntax)
token->word_char = 0;
#ifdef RE_ENABLE_I18N
- token->accept_mb = 0;
token->mb_partial = 0;
if (input->mb_cur_max > 1 &&
!re_string_first_byte (input, re_string_cur_idx (input)))
@@ -2384,9 +2383,6 @@ parse_expression (regexp, preg, token, syntax, nest, err)
fetch_token (token, regexp, syntax);
return tree;
case OP_PERIOD:
-#ifdef RE_ENABLE_I18N
- token->accept_mb = dfa->mb_cur_max > 1;
-#endif
tree = re_dfa_add_tree_node (dfa, NULL, NULL, token);
if (BE (tree == NULL, 0))
{
@@ -3286,58 +3282,61 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
/* Ensure only single byte characters are set. */
if (dfa->mb_cur_max > 1)
bitset_mask (sbcset, dfa->sb_char);
-#endif /* RE_ENABLE_I18N */
-
- /* Build a tree for simple bracket. */
- br_token.type = SIMPLE_BRACKET;
- br_token.opr.sbcset = sbcset;
- work_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
- if (BE (work_tree == NULL, 0))
- goto parse_bracket_exp_espace;
-#ifdef RE_ENABLE_I18N
if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
|| mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
|| mbcset->non_match)))
{
- re_token_t alt_token;
bin_tree_t *mbc_tree;
int sbc_idx;
/* Build a tree for complex bracket. */
dfa->has_mb_node = 1;
+ br_token.type = COMPLEX_BRACKET;
+ br_token.opr.mbcset = mbcset;
+ mbc_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
+ if (BE (mbc_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
for (sbc_idx = 0; sbc_idx < BITSET_UINTS; ++sbc_idx)
if (sbcset[sbc_idx])
break;
/* If there are no bits set in sbcset, there is no point
of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
- if (sbc_idx == BITSET_UINTS)
+ if (sbc_idx < BITSET_UINTS)
+ {
+ re_token_t alt_token;
+ /* Build a tree for simple bracket. */
+ br_token.type = SIMPLE_BRACKET;
+ br_token.opr.sbcset = sbcset;
+ work_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
+ if (BE (work_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+
+ /* Then join them by ALT node. */
+ alt_token.type = OP_ALT;
+ dfa->has_plural_match = 1;
+ work_tree = re_dfa_add_tree_node (dfa, work_tree, mbc_tree, &alt_token);
+ if (BE (work_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+ }
+ else
{
re_free (sbcset);
- dfa->nodes[work_tree->node_idx].type = COMPLEX_BRACKET;
- dfa->nodes[work_tree->node_idx].opr.mbcset = mbcset;
- return work_tree;
+ work_tree = mbc_tree;
}
- br_token.type = COMPLEX_BRACKET;
- br_token.accept_mb = 1;
- br_token.opr.mbcset = mbcset;
- mbc_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
- if (BE (mbc_tree == NULL, 0))
- goto parse_bracket_exp_espace;
- /* Then join them by ALT node. */
- alt_token.type = OP_ALT;
- dfa->has_plural_match = 1;
- work_tree = re_dfa_add_tree_node (dfa, work_tree, mbc_tree, &alt_token);
- if (BE (mbc_tree != NULL, 1))
- return work_tree;
}
else
{
+ /* Build a tree for simple bracket. */
+ br_token.type = SIMPLE_BRACKET;
+ br_token.opr.sbcset = sbcset;
+ work_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
+ if (BE (work_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+
free_charset (mbcset);
- return work_tree;
}
-#else /* not RE_ENABLE_I18N */
- return work_tree;
#endif /* not RE_ENABLE_I18N */
+ return work_tree;
parse_bracket_exp_espace:
*err = REG_ESPACE;
@@ -3709,7 +3708,6 @@ build_charclass_op (dfa, trans, class_name, extra, non_match, err)
bin_tree_t *mbc_tree;
/* Build a tree for complex bracket. */
br_token.type = COMPLEX_BRACKET;
- br_token.accept_mb = 1;
br_token.opr.mbcset = mbcset;
dfa->has_mb_node = 1;
mbc_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
diff --git a/lib/regex_internal.c b/lib/regex_internal.c
index 6fd92f7..4064218 100644
--- a/lib/regex_internal.c
+++ b/lib/regex_internal.c
@@ -1259,8 +1259,6 @@ re_node_set_insert_last (set, elem)
re_node_set *set;
int elem;
{
- int idx;
-
/* Realloc if we need. */
if (set->alloc == set->nelem)
{
@@ -1340,6 +1338,7 @@ re_dfa_add_node (dfa, token, mode)
re_token_t token;
int mode;
{
+ int type = token.type;
if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
{
int new_nodes_alloc = dfa->nodes_alloc * 2;
@@ -1376,6 +1375,10 @@ re_dfa_add_node (dfa, token, mode)
dfa->nodes[dfa->nodes_len].opt_subexp = 0;
dfa->nodes[dfa->nodes_len].duplicated = 0;
dfa->nodes[dfa->nodes_len].constraint = 0;
+#ifdef RE_ENABLE_I18N
+ dfa->nodes[dfa->nodes_len].accept_mb =
+ (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
+#endif
return dfa->nodes_len++;
}