summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2015-07-08 19:56:31 +0300
committerArnold D. Robbins <arnold@skeeve.com>2015-07-08 19:56:31 +0300
commit2eee1b8e780b00062b2a8eef649ebbebaba4682c (patch)
treefeacf9c1c625cd4d584192637836cbf6190e7476
parent247ac37e87ece08aab495f2c794796d91ccae031 (diff)
downloadgawk-2eee1b8e780b00062b2a8eef649ebbebaba4682c.tar.gz
Sync dfa with GNU grep.
-rw-r--r--ChangeLog4
-rw-r--r--dfa.c83
-rw-r--r--dfa.h12
3 files changed, 46 insertions, 53 deletions
diff --git a/ChangeLog b/ChangeLog
index 4c9949fd..41c55da1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2015-07-08 Arnold D. Robbins <arnold@skeeve.com>
+
+ * dfa.h, dfa.c: Sync with GNU grep.
+
2015-06-14 Arnold D. Robbins <arnold@skeeve.com>
* regcomp.c, regex_internal.h, regexec.c: Sync with GLIBC.
diff --git a/dfa.c b/dfa.c
index 96497865..95cd9276 100644
--- a/dfa.c
+++ b/dfa.c
@@ -462,9 +462,6 @@ struct dfa
as a sentinel at the end of the buffer. */
state_num initstate_letter; /* Initial state for letter context. */
state_num initstate_others; /* Initial state for other contexts. */
- struct dfamust *musts; /* List of strings, at least one of which
- is known to appear in any r.e. matching
- the dfa. */
position_set mb_follows; /* Follow set added by ANYCHAR and/or MBCSET
on demand. */
int *mb_match_lens; /* Array of length reduced by ANYCHAR and/or
@@ -481,7 +478,6 @@ struct dfa
#define ACCEPTS_IN_CONTEXT(prev, curr, state, dfa) \
SUCCEEDS_IN_CONTEXT ((dfa).states[state].constraint, prev, curr)
-static void dfamust (struct dfa *dfa);
static void regexp (void);
static void
@@ -953,11 +949,11 @@ enum
/* Find the characters equal to C after case-folding, other than C
itself, and store them into FOLDED. Return the number of characters
stored. */
-static int
+static unsigned int
case_folded_counterparts (wchar_t c, wchar_t folded[CASE_FOLDED_BUFSIZE])
{
- int i;
- int n = 0;
+ unsigned int i;
+ unsigned int n = 0;
wint_t uc = towupper (c);
wint_t lc = towlower (uc);
if (uc != c)
@@ -1244,9 +1240,10 @@ parse_bracket_exp (void)
else
{
wchar_t folded[CASE_FOLDED_BUFSIZE + 1];
- int i;
- int n = (case_fold ? case_folded_counterparts (wc, folded + 1) + 1
- : 1);
+ unsigned int i;
+ unsigned int n = (case_fold
+ ? case_folded_counterparts (wc, folded + 1) + 1
+ : 1);
folded[0] = wc;
for (i = 0; i < n; i++)
if (!setbit_wc (folded[i], ccl))
@@ -1279,7 +1276,7 @@ parse_bracket_exp (void)
assert (!dfa->multibyte);
notset (ccl);
if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
- clrbit (eolbyte, ccl);
+ clrbit ('\n', ccl);
}
return CSET + charclass_index (ccl);
@@ -1524,7 +1521,7 @@ lex (void)
zeroset (ccl);
notset (ccl);
if (!(syntax_bits & RE_DOT_NEWLINE))
- clrbit (eolbyte, ccl);
+ clrbit ('\n', ccl);
if (syntax_bits & RE_DOT_NOT_NULL)
clrbit ('\0', ccl);
laststart = false;
@@ -1796,7 +1793,7 @@ add_utf8_anychar (void)
if (i == 1)
{
if (!(syntax_bits & RE_DOT_NEWLINE))
- clrbit (eolbyte, c);
+ clrbit ('\n', c);
if (syntax_bits & RE_DOT_NOT_NULL)
clrbit ('\0', c);
}
@@ -1872,7 +1869,7 @@ atom (void)
if (case_fold)
{
wchar_t folded[CASE_FOLDED_BUFSIZE];
- int i, n = case_folded_counterparts (wctok, folded);
+ unsigned int i, n = case_folded_counterparts (wctok, folded);
for (i = 0; i < n; i++)
{
addtok_wc (folded[i]);
@@ -3028,7 +3025,7 @@ match_anychar (struct dfa *d, state_num s, position pos,
int context;
/* Check syntax bits. */
- if (wc == (wchar_t) eolbyte)
+ if (wc == (wchar_t) '\n')
{
if (!(syntax_bits & RE_DOT_NEWLINE))
return 0;
@@ -3684,7 +3681,6 @@ dfassbuild (struct dfa *d)
sup->fails = NULL;
sup->success = NULL;
sup->newlines = NULL;
- sup->musts = NULL;
sup->charclasses = xnmalloc (sup->calloc, sizeof *sup->charclasses);
if (d->cindex)
@@ -3751,7 +3747,6 @@ dfacomp (char const *s, size_t len, struct dfa *d, int searchflag)
dfainit (d);
dfambcache (d);
dfaparse (s, len, d);
- dfamust (d);
dfassbuild (d);
dfaoptimize (d);
dfaanalyze (d, searchflag);
@@ -3767,7 +3762,6 @@ void
dfafree (struct dfa *d)
{
size_t i;
- struct dfamust *dm, *ndm;
free (d->charclasses);
free (d->tokens);
@@ -3803,13 +3797,6 @@ dfafree (struct dfa *d)
free (d->success);
}
- for (dm = d->musts; dm; dm = ndm)
- {
- ndm = dm->next;
- free (dm->must);
- free (dm);
- }
-
if (d->superset)
dfafree (d->superset);
}
@@ -3830,9 +3817,7 @@ dfafree (struct dfa *d)
sequences that must constitute the match ("is")
When we get to the root of the tree, we use one of the longest of its
- calculated "in" sequences as our answer. The sequence we find is returned in
- d->must (where "d" is the single argument passed to "dfamust");
- the length of the sequence is returned in d->mustn.
+ calculated "in" sequences as our answer.
The sequences calculated for the various types of node (in pseudo ANSI c)
are shown below. "p" is the operand of unary operators (and the left-hand
@@ -4056,8 +4041,8 @@ freemust (must *mp)
free (mp);
}
-static void
-dfamust (struct dfa *d)
+struct dfamust *
+dfamust (struct dfa const *d)
{
must *mp = NULL;
char const *result = "";
@@ -4066,7 +4051,6 @@ dfamust (struct dfa *d)
bool exact = false;
bool begline = false;
bool endline = false;
- struct dfamust *dm;
for (ri = 0; ri < d->tindex; ++ri)
{
@@ -4227,30 +4211,28 @@ dfamust (struct dfa *d)
t = j;
while (++j < NOTCHAR)
if (tstbit (j, *ccl)
- && ! (case_fold && !d->multibyte
+ && ! (case_fold && MB_CUR_MAX == 1
&& toupper (j) == toupper (t)))
break;
if (j < NOTCHAR)
break;
}
mp->is[0] = mp->left[0] = mp->right[0]
- = case_fold && !d->multibyte ? toupper (t) : t;
+ = case_fold && MB_CUR_MAX == 1 ? toupper (t) : t;
mp->is[1] = mp->left[1] = mp->right[1] = '\0';
mp->in = enlist (mp->in, mp->is, 1);
break;
}
}
done:
- if (*result)
- {
- dm = xmalloc (sizeof *dm);
- dm->exact = exact;
- dm->begline = begline;
- dm->endline = endline;
- dm->must = xstrdup (result);
- dm->next = d->musts;
- d->musts = dm;
- }
+ if (!*result)
+ return NULL;
+
+ struct dfamust *dm = xmalloc (sizeof *dm);
+ dm->exact = exact;
+ dm->begline = begline;
+ dm->endline = endline;
+ dm->must = xstrdup (result);
while (mp)
{
@@ -4258,18 +4240,21 @@ done:
freemust (mp);
mp = prev;
}
+
+ return dm;
}
-struct dfa *
-dfaalloc (void)
+void
+dfamustfree (struct dfamust *dm)
{
- return xmalloc (sizeof (struct dfa));
+ free (dm->must);
+ free (dm);
}
-struct dfamust *_GL_ATTRIBUTE_PURE
-dfamusts (struct dfa const *d)
+struct dfa *
+dfaalloc (void)
{
- return d->musts;
+ return xmalloc (sizeof (struct dfa));
}
/* vim:set shiftwidth=2: */
diff --git a/dfa.h b/dfa.h
index 79027810..4060dfaf 100644
--- a/dfa.h
+++ b/dfa.h
@@ -26,6 +26,8 @@
#endif /* HAVE_STDBOOL_H */
#include <stddef.h>
+#define _GL_ATTRIBUTE_MALLOC
+
/* Element of a list of strings, at least one of which is known to
appear in any R.E. matching the DFA. */
struct dfamust
@@ -34,7 +36,6 @@ struct dfamust
bool begline;
bool endline;
char *must;
- struct dfamust *next;
};
/* The dfa structure. It is completely opaque. */
@@ -45,10 +46,13 @@ struct dfa;
/* Allocate a struct dfa. The struct dfa is completely opaque.
The returned pointer should be passed directly to free() after
calling dfafree() on it. */
-extern struct dfa *dfaalloc (void);
+extern struct dfa *dfaalloc (void) _GL_ATTRIBUTE_MALLOC;
+
+/* Build and return the struct dfamust from the given struct dfa. */
+extern struct dfamust *dfamust (struct dfa const *);
-/* Return the dfamusts associated with a dfa. */
-extern struct dfamust *dfamusts (struct dfa const *);
+/* Free the storage held by the components of a struct dfamust. */
+extern void dfamustfree (struct dfamust *);
/* dfasyntax() takes three arguments; the first sets the syntax bits described
earlier in this file, the second sets the case-folding flag, and the