summaryrefslogtreecommitdiff
path: root/lib/regexec.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/regexec.c')
-rw-r--r--lib/regexec.c179
1 files changed, 80 insertions, 99 deletions
diff --git a/lib/regexec.c b/lib/regexec.c
index 05a8e807e..afdc1737b 100644
--- a/lib/regexec.c
+++ b/lib/regexec.c
@@ -1,5 +1,5 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002-2014 Free Software Foundation, Inc.
+ Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
@@ -219,12 +219,8 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len)
We return 0 if we find a match and REG_NOMATCH if not. */
int
-regexec (preg, string, nmatch, pmatch, eflags)
- const regex_t *_Restrict_ preg;
- const char *_Restrict_ string;
- size_t nmatch;
- regmatch_t pmatch[_Restrict_arr_];
- int eflags;
+regexec (const regex_t *_Restrict_ preg, const char *_Restrict_ string,
+ size_t nmatch, regmatch_t pmatch[], int eflags)
{
reg_errcode_t err;
Idx start, length;
@@ -305,11 +301,8 @@ compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
match was found and -2 indicates an internal error. */
regoff_t
-re_match (bufp, string, length, start, regs)
- struct re_pattern_buffer *bufp;
- const char *string;
- Idx length, start;
- struct re_registers *regs;
+re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
+ Idx start, struct re_registers *regs)
{
return re_search_stub (bufp, string, length, start, 0, length, regs, true);
}
@@ -318,12 +311,8 @@ weak_alias (__re_match, re_match)
#endif
regoff_t
-re_search (bufp, string, length, start, range, regs)
- struct re_pattern_buffer *bufp;
- const char *string;
- Idx length, start;
- regoff_t range;
- struct re_registers *regs;
+re_search (struct re_pattern_buffer *bufp, const char *string, Idx length,
+ Idx start, regoff_t range, struct re_registers *regs)
{
return re_search_stub (bufp, string, length, start, range, length, regs,
false);
@@ -333,11 +322,9 @@ weak_alias (__re_search, re_search)
#endif
regoff_t
-re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
- struct re_pattern_buffer *bufp;
- const char *string1, *string2;
- Idx length1, length2, start, stop;
- struct re_registers *regs;
+re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
+ const char *string2, Idx length2, Idx start,
+ struct re_registers *regs, Idx stop)
{
return re_search_2_stub (bufp, string1, length1, string2, length2,
start, 0, regs, stop, true);
@@ -347,12 +334,9 @@ weak_alias (__re_match_2, re_match_2)
#endif
regoff_t
-re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
- struct re_pattern_buffer *bufp;
- const char *string1, *string2;
- Idx length1, length2, start, stop;
- regoff_t range;
- struct re_registers *regs;
+re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
+ const char *string2, Idx length2, Idx start, regoff_t range,
+ struct re_registers *regs, Idx stop)
{
return re_search_2_stub (bufp, string1, length1, string2, length2,
start, range, regs, stop, false);
@@ -362,10 +346,10 @@ weak_alias (__re_search_2, re_search_2)
#endif
static regoff_t
-re_search_2_stub (struct re_pattern_buffer *bufp,
- const char *string1, Idx length1,
- const char *string2, Idx length2,
- Idx start, regoff_t range, struct re_registers *regs,
+internal_function
+re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
+ Idx length1, const char *string2, Idx length2, Idx start,
+ regoff_t range, struct re_registers *regs,
Idx stop, bool ret_len)
{
const char *str;
@@ -409,8 +393,8 @@ re_search_2_stub (struct re_pattern_buffer *bufp,
otherwise the position of the match is returned. */
static regoff_t
-re_search_stub (struct re_pattern_buffer *bufp,
- const char *string, Idx length,
+internal_function
+re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length,
Idx start, regoff_t range, Idx stop, struct re_registers *regs,
bool ret_len)
{
@@ -499,6 +483,7 @@ re_search_stub (struct re_pattern_buffer *bufp,
}
static unsigned
+internal_function
re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
int regs_allocated)
{
@@ -577,11 +562,8 @@ re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
freeing the old data. */
void
-re_set_registers (bufp, regs, num_regs, starts, ends)
- struct re_pattern_buffer *bufp;
- struct re_registers *regs;
- __re_size_t num_regs;
- regoff_t *starts, *ends;
+re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
+ __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
{
if (num_regs)
{
@@ -609,8 +591,7 @@ int
# ifdef _LIBC
weak_function
# endif
-re_exec (s)
- const char *s;
+re_exec (const char *s)
{
return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
}
@@ -628,12 +609,10 @@ re_exec (s)
(0 <= LAST_START && LAST_START <= LENGTH) */
static reg_errcode_t
-__attribute_warn_unused_result__
-re_search_internal (const regex_t *preg,
- const char *string, Idx length,
- Idx start, Idx last_start, Idx stop,
- size_t nmatch, regmatch_t pmatch[],
- int eflags)
+__attribute_warn_unused_result__ internal_function
+re_search_internal (const regex_t *preg, const char *string, Idx length,
+ Idx start, Idx last_start, Idx stop, size_t nmatch,
+ regmatch_t pmatch[], int eflags)
{
reg_errcode_t err;
const re_dfa_t *dfa = preg->buffer;
@@ -642,7 +621,7 @@ re_search_internal (const regex_t *preg,
bool fl_longest_match;
int match_kind;
Idx match_first;
- Idx match_last = REG_MISSING;
+ Idx match_last = -1;
Idx extra_nmatch;
bool sb;
int ch;
@@ -851,9 +830,9 @@ re_search_internal (const regex_t *preg,
mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
match_last = check_matching (&mctx, fl_longest_match,
start <= last_start ? &match_first : NULL);
- if (match_last != REG_MISSING)
+ if (match_last != -1)
{
- if (BE (match_last == REG_ERROR, 0))
+ if (BE (match_last == -2, 0))
{
err = REG_ESPACE;
goto free_return;
@@ -875,7 +854,7 @@ re_search_internal (const regex_t *preg,
break;
if (BE (err != REG_NOMATCH, 0))
goto free_return;
- match_last = REG_MISSING;
+ match_last = -1;
}
else
break; /* We found a match. */
@@ -886,7 +865,7 @@ re_search_internal (const regex_t *preg,
}
#ifdef DEBUG
- assert (match_last != REG_MISSING);
+ assert (match_last != -1);
assert (err == REG_NOERROR);
#endif
@@ -964,7 +943,7 @@ re_search_internal (const regex_t *preg,
}
static reg_errcode_t
-__attribute_warn_unused_result__
+internal_function __attribute_warn_unused_result__
prune_impossible_nodes (re_match_context_t *mctx)
{
const re_dfa_t *const dfa = mctx->dfa;
@@ -1012,7 +991,7 @@ prune_impossible_nodes (re_match_context_t *mctx)
do
{
--match_last;
- if (! REG_VALID_INDEX (match_last))
+ if (match_last < 0)
{
ret = REG_NOMATCH;
goto free_return;
@@ -1093,8 +1072,8 @@ acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
}
/* Check whether the regular expression match input string INPUT or not,
- and return the index where the matching end. Return REG_MISSING if
- there is no match, and return REG_ERROR in case of an error.
+ and return the index where the matching end. Return -1 if
+ there is no match, and return -2 in case of an error.
FL_LONGEST_MATCH means we want the POSIX longest matching.
If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
next place where we may want to try matching.
@@ -1109,7 +1088,7 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match,
const re_dfa_t *const dfa = mctx->dfa;
reg_errcode_t err;
Idx match = 0;
- Idx match_last = REG_MISSING;
+ Idx match_last = -1;
Idx cur_str_idx = re_string_cur_idx (&mctx->input);
re_dfastate_t *cur_state;
bool at_init_state = p_match_first != NULL;
@@ -1121,7 +1100,7 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match,
if (BE (cur_state == NULL, 0))
{
assert (err == REG_ESPACE);
- return REG_ERROR;
+ return -2;
}
if (mctx->state_log != NULL)
@@ -1176,7 +1155,7 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match,
if (BE (err != REG_NOERROR, 0))
{
assert (err == REG_ESPACE);
- return REG_ERROR;
+ return -2;
}
}
@@ -1190,7 +1169,7 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match,
state using the state log, if available and if we have not
already found a valid (even if not the longest) match. */
if (BE (err != REG_NOERROR, 0))
- return REG_ERROR;
+ return -2;
if (mctx->state_log == NULL
|| (match && !fl_longest_match)
@@ -1273,7 +1252,7 @@ check_halt_state_context (const re_match_context_t *mctx,
/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
corresponding to the DFA).
Return the destination node, and update EPS_VIA_NODES;
- return REG_MISSING in case of errors. */
+ return -1 in case of errors. */
static Idx
internal_function
@@ -1291,15 +1270,15 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
Idx dest_node;
ok = re_node_set_insert (eps_via_nodes, node);
if (BE (! ok, 0))
- return REG_ERROR;
- /* Pick up a valid destination, or return REG_MISSING if none
+ return -2;
+ /* Pick up a valid destination, or return -1 if none
is found. */
- for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
+ for (dest_node = -1, i = 0; i < edests->nelem; ++i)
{
Idx candidate = edests->elems[i];
if (!re_node_set_contains (cur_nodes, candidate))
continue;
- if (dest_node == REG_MISSING)
+ if (dest_node == -1)
dest_node = candidate;
else
@@ -1313,7 +1292,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
else if (fs != NULL
&& push_fail_stack (fs, *pidx, candidate, nregs, regs,
eps_via_nodes))
- return REG_ERROR;
+ return -2;
/* We know we are going to exit. */
break;
@@ -1338,13 +1317,13 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
if (fs != NULL)
{
if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
- return REG_MISSING;
+ return -1;
else if (naccepted)
{
char *buf = (char *) re_string_get_buffer (&mctx->input);
if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
naccepted) != 0)
- return REG_MISSING;
+ return -1;
}
}
@@ -1353,7 +1332,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
Idx dest_node;
ok = re_node_set_insert (eps_via_nodes, node);
if (BE (! ok, 0))
- return REG_ERROR;
+ return -2;
dest_node = dfa->edests[node].elems[0];
if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
dest_node))
@@ -1369,12 +1348,12 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
|| !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
dest_node)))
- return REG_MISSING;
+ return -1;
re_node_set_empty (eps_via_nodes);
return dest_node;
}
}
- return REG_MISSING;
+ return -1;
}
static reg_errcode_t
@@ -1410,7 +1389,7 @@ pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
regmatch_t *regs, re_node_set *eps_via_nodes)
{
Idx num = --fs->num;
- assert (REG_VALID_INDEX (num));
+ assert (num >= 0);
*pidx = fs->stack[num].idx;
memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
re_node_set_free (eps_via_nodes);
@@ -1503,9 +1482,9 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
&eps_via_nodes, fs);
- if (BE (! REG_VALID_INDEX (cur_node), 0))
+ if (BE (cur_node < 0, 0))
{
- if (BE (cur_node == REG_ERROR, 0))
+ if (BE (cur_node == -2, 0))
{
re_node_set_free (&eps_via_nodes);
if (prev_idx_match_malloced)
@@ -1889,10 +1868,10 @@ sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
{
Idx edst1 = dfa->edests[cur_node].elems[0];
Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
- ? dfa->edests[cur_node].elems[1] : REG_MISSING);
+ ? dfa->edests[cur_node].elems[1] : -1);
if ((!re_node_set_contains (inv_eclosure, edst1)
&& re_node_set_contains (dest_nodes, edst1))
- || (REG_VALID_NONZERO_INDEX (edst2)
+ || (edst2 > 0
&& !re_node_set_contains (inv_eclosure, edst2)
&& re_node_set_contains (dest_nodes, edst2)))
{
@@ -1972,7 +1951,7 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
switch (dfa->nodes[node].type)
{
case OP_BACK_REF:
- if (bkref_idx != REG_MISSING)
+ if (bkref_idx != -1)
{
struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
do
@@ -2088,8 +2067,8 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
subexp_idx = dfa->nodes[ent->node].opr.idx;
if (ent->subexp_to == str_idx)
{
- Idx ops_node = REG_MISSING;
- Idx cls_node = REG_MISSING;
+ Idx ops_node = -1;
+ Idx cls_node = -1;
for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
{
Idx node = dest_nodes->elems[node_idx];
@@ -2104,7 +2083,7 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
/* Check the limitation of the open subexpression. */
/* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
- if (REG_VALID_INDEX (ops_node))
+ if (ops_node >= 0)
{
err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
candidates);
@@ -2113,7 +2092,7 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
}
/* Check the limitation of the close subexpression. */
- if (REG_VALID_INDEX (cls_node))
+ if (cls_node >= 0)
for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
{
Idx node = dest_nodes->elems[node_idx];
@@ -2166,7 +2145,7 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
re_sift_context_t local_sctx;
Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
- if (first_idx == REG_MISSING)
+ if (first_idx == -1)
return REG_NOERROR;
local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
@@ -2570,7 +2549,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
if (BE (err != REG_NOERROR, 0))
return err;
#ifdef DEBUG
- assert (dfa->nexts[cur_node_idx] != REG_MISSING);
+ assert (dfa->nexts[cur_node_idx] != -1);
#endif
new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
@@ -2636,7 +2615,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
/* And add the epsilon closures (which is 'new_dest_nodes') of
the backreference to appropriate state_log. */
#ifdef DEBUG
- assert (dfa->nexts[node_idx] != REG_MISSING);
+ assert (dfa->nexts[node_idx] != -1);
#endif
for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
{
@@ -2720,7 +2699,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
const char *buf = (const char *) re_string_get_buffer (&mctx->input);
/* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
- if (cache_idx != REG_MISSING)
+ if (cache_idx != -1)
{
const struct re_backref_cache_entry *entry
= mctx->bkref_ents + cache_idx;
@@ -2825,7 +2804,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
nodes = &mctx->state_log[sl_str]->nodes;
cls_node = find_subexp_node (dfa, nodes, subexp_num,
OP_CLOSE_SUBEXP);
- if (cls_node == REG_MISSING)
+ if (cls_node == -1)
continue; /* No. */
if (sub_top->path == NULL)
{
@@ -2904,7 +2883,7 @@ find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
&& node->opr.idx == subexp_idx)
return cls_node;
}
- return REG_MISSING;
+ return -1;
}
/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
@@ -3180,7 +3159,7 @@ check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
Idx cur_node = cur_nodes->elems[idx];
const re_node_set *eclosure = dfa->eclosures + cur_node;
outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
- if (outside_node == REG_MISSING)
+ if (outside_node == -1)
{
/* There are no problematic nodes, just merge them. */
err = re_node_set_merge (&new_nodes, eclosure);
@@ -3266,7 +3245,7 @@ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
struct re_backref_cache_entry *ent;
- if (cache_idx_start == REG_MISSING)
+ if (cache_idx_start == -1)
return REG_NOERROR;
restart:
@@ -3391,7 +3370,7 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
/* At first, group all nodes belonging to 'state' into several
destinations. */
ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
- if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
+ if (BE (ndests <= 0, 0))
{
if (dests_node_malloced)
free (dests_alloc);
@@ -3453,7 +3432,7 @@ out_free:
for (j = 0; j < dests_node[i].nelem; ++j)
{
next_node = dfa->nexts[dests_node[i].elems[j]];
- if (next_node != REG_MISSING)
+ if (next_node != -1)
{
err = re_node_set_merge (&follows, dfa->eclosures + next_node);
if (BE (err != REG_NOERROR, 0))
@@ -3764,7 +3743,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
error_return:
for (j = 0; j < ndests; ++j)
re_node_set_free (dests_node + j);
- return REG_MISSING;
+ return -1;
}
#ifdef RE_ENABLE_I18N
@@ -3776,6 +3755,10 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
one collating element like '.', '[a-z]', opposite to the other nodes
can only accept one byte. */
+# ifdef _LIBC
+# include <locale/weight.h>
+# endif
+
static int
internal_function
check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
@@ -3895,8 +3878,6 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
const int32_t *table, *indirect;
const unsigned char *weights, *extra;
const char *collseqwc;
- /* This #include defines a local function! */
-# include <locale/weight.h>
/* match with collating_symbol? */
if (cset->ncoll_syms)
@@ -3953,7 +3934,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
indirect = (const int32_t *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
- int32_t idx = findidx (&cp, elem_len);
+ int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
if (idx > 0)
for (i = 0; i < cset->nequiv_classes; ++i)
{
@@ -4193,7 +4174,7 @@ internal_function __attribute_warn_unused_result__
match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
{
mctx->eflags = eflags;
- mctx->match_last = REG_MISSING;
+ mctx->match_last = -1;
if (n > 0)
{
/* Avoid overflow. */
@@ -4314,7 +4295,7 @@ match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
return REG_NOERROR;
}
-/* Return the first entry with the same str_idx, or REG_MISSING if none is
+/* Return the first entry with the same str_idx, or -1 if none is
found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
static Idx
@@ -4334,7 +4315,7 @@ search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
return left;
else
- return REG_MISSING;
+ return -1;
}
/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches