summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorPaolo Bonzini <bonzini@gnu.org>2004-11-02 14:07:36 +0000
committerPaolo Bonzini <bonzini@gnu.org>2008-01-09 16:10:43 +0100
commit974f9f12dac89a6d67315075082049ac606ac54d (patch)
treeb2e1b7502d6893ddf8ebf11e92d821103719cd89 /lib
parent7246bc10216da6352a3db3969867e70d9b6e0e11 (diff)
downloadsed-974f9f12dac89a6d67315075082049ac606ac54d.tar.gz
speedup transit_state
2004-10-27 Paolo Bonzini <bonzini@gnu.org> * regex_internal.h (struct re_dfastate_t): Make word_trtable a pointer to the 512-item transition table. * regexec.c (build_trtable): Fill in either state->trtable or state->word_trtable. Return a boolean indicating success. (transit_state): Expect state->trtable to be a 256-item transition table. Reorganize code to have less tests in the common case, and to save an indentation level. git-archimport-id: bonzini@gnu.org--2004b/sed--stable--4.1--patch-5
Diffstat (limited to 'lib')
-rw-r--r--lib/regex_internal.c1
-rw-r--r--lib/regex_internal.h3
-rw-r--r--lib/regexec.c81
3 files changed, 43 insertions, 42 deletions
diff --git a/lib/regex_internal.c b/lib/regex_internal.c
index 0f0aba0..7eb9170 100644
--- a/lib/regex_internal.c
+++ b/lib/regex_internal.c
@@ -1647,6 +1647,7 @@ free_state (state)
re_free (state->entrance_nodes);
}
re_node_set_free (&state->nodes);
+ re_free (state->word_trtable);
re_free (state->trtable);
re_free (state);
}
diff --git a/lib/regex_internal.h b/lib/regex_internal.h
index d7d7d9a..4f765f5 100644
--- a/lib/regex_internal.h
+++ b/lib/regex_internal.h
@@ -477,7 +477,7 @@ struct re_dfastate_t
unsigned int hash;
re_node_set nodes;
re_node_set *entrance_nodes;
- struct re_dfastate_t **trtable;
+ struct re_dfastate_t **trtable, **word_trtable;
unsigned int context : 4;
unsigned int halt : 1;
/* If this state can accept `multi byte'.
@@ -487,7 +487,6 @@ struct re_dfastate_t
/* If this state has backreference node(s). */
unsigned int has_backref : 1;
unsigned int has_constraint : 1;
- unsigned int word_trtable : 1;
};
typedef struct re_dfastate_t re_dfastate_t;
diff --git a/lib/regexec.c b/lib/regexec.c
index 8bc6762..4b710f5 100644
--- a/lib/regexec.c
+++ b/lib/regexec.c
@@ -170,8 +170,8 @@ static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
re_node_set *cur_nodes, int cur_str,
int last_str, int subexp_num,
int type) internal_function;
-static re_dfastate_t **build_trtable (re_dfa_t *dfa,
- re_dfastate_t *state) internal_function;
+static int build_trtable (re_dfa_t *dfa,
+ re_dfastate_t *state) internal_function;
#ifdef RE_ENABLE_I18N
static int check_node_accept_bytes (re_dfa_t *dfa, int node_idx,
const re_string_t *input, int idx) internal_function;
@@ -2198,7 +2198,6 @@ transit_state (err, mctx, state)
re_match_context_t *mctx;
re_dfastate_t *state;
{
- re_dfa_t *const dfa = mctx->dfa;
re_dfastate_t **trtable;
unsigned char ch;
@@ -2213,21 +2212,22 @@ transit_state (err, mctx, state)
#endif /* RE_ENABLE_I18N */
/* Then decide the next state with the single byte. */
- if (1)
+#if 0
+ if (0)
+ /* don't use transition table */
+ return transit_state_sb (err, mctx, state);
+#endif
+
+ /* Use transition table */
+ ch = re_string_fetch_byte (&mctx->input);
+ for (;;)
{
- /* Use transition table */
- ch = re_string_fetch_byte (&mctx->input);
trtable = state->trtable;
- if (trtable == NULL)
- {
- trtable = build_trtable (dfa, state);
- if (trtable == NULL)
- {
- *err = REG_ESPACE;
- return NULL;
- }
- }
- if (BE (state->word_trtable, 0))
+ if (BE (trtable != NULL, 1))
+ return trtable[ch];
+
+ trtable = state->word_trtable;
+ if (BE (trtable != NULL, 1))
{
unsigned int context;
context
@@ -2239,14 +2239,15 @@ transit_state (err, mctx, state)
else
return trtable[ch];
}
- else
- return trtable[ch];
+
+ if (!build_trtable (mctx->dfa, state))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+
+ /* Retry, we now have a transition table. */
}
-#if 0
- else
- /* don't use transition table */
- return transit_state_sb (err, mctx, state);
-#endif
}
/* Update the state_log if we need */
@@ -3247,15 +3248,15 @@ expand_bkref_cache (mctx, cur_nodes, cur_str, last_str, subexp_num,
}
/* Build transition table for the state.
- Return the new table if succeeded, otherwise return NULL. */
+ Return 1 if succeeded, otherwise return NULL. */
-static re_dfastate_t **
+static int
build_trtable (dfa, state)
re_dfa_t *dfa;
re_dfastate_t *state;
{
reg_errcode_t err;
- int i, j, ch;
+ int i, j, ch, need_word_trtable = 0;
unsigned int elem, mask;
int dests_node_malloced = 0, dest_states_malloced = 0;
int ndests; /* Number of the destination states from `state'. */
@@ -3279,13 +3280,13 @@ build_trtable (dfa, state)
dests_node = (re_node_set *)
malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
if (BE (dests_node == NULL, 0))
- return NULL;
+ return 0;
dests_node_malloced = 1;
}
dests_ch = (bitset *) (dests_node + SBC_MAX);
/* Initialize transiton table. */
- state->word_trtable = 0;
+ state->word_trtable = state->trtable = 0;
/* At first, group all nodes belonging to `state' into several
destinations. */
@@ -3294,14 +3295,14 @@ build_trtable (dfa, state)
{
if (dests_node_malloced)
free (dests_node);
- /* Return NULL in case of an error, trtable otherwise. */
+ /* Return 0 in case of an error, 1 otherwise. */
if (ndests == 0)
{
state->trtable = (re_dfastate_t **)
calloc (sizeof (re_dfastate_t *), SBC_MAX);;
- return state->trtable;
+ return 1;
}
- return NULL;
+ return 0;
}
err = re_node_set_alloc (&follows, ndests + 1);
@@ -3328,7 +3329,7 @@ out_free:
re_node_set_free (dests_node + i);
if (dests_node_malloced)
free (dests_node);
- return NULL;
+ return 0;
}
dest_states_malloced = 1;
}
@@ -3366,7 +3367,7 @@ out_free:
if (dest_states[i] != dest_states_word[i]
&& dfa->mb_cur_max > 1)
- state->word_trtable = 1;
+ need_word_trtable = 1;
dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
CONTEXT_NEWLINE);
@@ -3381,13 +3382,14 @@ out_free:
bitset_merge (acceptable, dests_ch[i]);
}
- if (!BE (state->word_trtable, 0))
+ if (!BE (need_word_trtable, 0))
{
/* We don't care about whether the following character is a word
character, or we are in a single-byte character set so we can
discern by looking at the character code: allocate a
256-entry transition table. */
- trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
+ trtable = state->trtable =
+ (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
if (BE (trtable == NULL, 0))
goto out_free;
@@ -3417,8 +3419,8 @@ out_free:
by looking at the character code: build two 256-entry
transition tables, one starting at trtable[0] and one
starting at trtable[SBC_MAX]. */
- trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *),
- 2 * SBC_MAX);
+ trtable = state->word_trtable =
+ (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
if (BE (trtable == NULL, 0))
goto out_free;
@@ -3449,7 +3451,7 @@ out_free:
{
/* k-th destination accepts newline character. */
trtable[NEWLINE_CHAR] = dest_states_nl[j];
- if (state->word_trtable)
+ if (need_word_trtable)
trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
/* There must be only one destination which accepts
newline. See group_nodes_into_DFAstates. */
@@ -3467,8 +3469,7 @@ out_free:
if (dests_node_malloced)
free (dests_node);
- state->trtable = trtable;
- return trtable;
+ return 1;
}
/* Group all nodes belonging to STATE into several destinations.