summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2016-08-23 06:05:01 +0300
committerArnold D. Robbins <arnold@skeeve.com>2016-08-23 06:05:01 +0300
commit273c35356b59a2d3b989bd568f577aaf800dc4a5 (patch)
tree3bc9b2f5a3c5466f2d436c1029e92cf2f221fb4c
parent4a920a6b4214dfc118eeeffd28b3dff33127a9b5 (diff)
parent546f826812728eb8a7b9e51408929c7314b92d31 (diff)
downloadgawk-273c35356b59a2d3b989bd568f577aaf800dc4a5.tar.gz
Merge branch 'gawk-4.1-stable'
-rw-r--r--ChangeLog14
-rw-r--r--Makefile.am15
-rw-r--r--Makefile.in15
-rw-r--r--dfa.c855
-rw-r--r--dfa.h13
-rw-r--r--doc/ChangeLog8
-rw-r--r--doc/Makefile.am5
-rw-r--r--doc/Makefile.in5
-rw-r--r--doc/awkcard.in6
-rw-r--r--doc/gawk.info546
-rw-r--r--doc/gawk.texi12
-rw-r--r--doc/gawktexi.in12
-rw-r--r--node.c2
-rw-r--r--po/gawk.pot26
-rw-r--r--re.c17
15 files changed, 820 insertions, 731 deletions
diff --git a/ChangeLog b/ChangeLog
index fb123cf8..e3c8527c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2016-08-23 Arnold D. Robbins <arnold@skeeve.com>
+
+ * dfa.h: Sync with grep. API changes.
+ * dfa.c: Sync with grep.
+ * re.c (make_regexp): Adjust for API changes, move call to dfasyntax
+ into stanza that compiles the regex.
+ (resetup): Call dfa_init.
+ * node.c (str2wstr): using_utf8 is now called dfa_using_utf8.
+
+ Unrelated:
+
+ * Makefile.am: Quote all uses of $(srcdir) and $(distdir).
+ (spell): New target.
+
2016-08-18 Arnold D. Robbins <arnold@skeeve.com>
* dfa.c: Sync with grep.
diff --git a/Makefile.am b/Makefile.am
index 0bc2143e..dce65018 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -185,16 +185,16 @@ check-local: gawk$(EXEEXT)
# A little extra clean up when making distributions.
# And additional set up for the pc directory.
dist-hook:
- cd $(distdir)/extension ; rm -f *.o *.so
- cd $(srcdir)/pc ; \
+ cd "$(distdir)"/extension ; rm -f *.o *.so
+ cd "$(srcdir)"/pc ; \
chmod u+w config.h ; \
sed -n -f configpk.sed < ../configure.ac > /tmp/tmp.sed ; \
sed -f config.sed < ../configh.in > /tmp/config.tmp ; \
sed -f /tmp/tmp.sed < /tmp/config.tmp > config.h ; \
$(RM) /tmp/tmp.sed /tmp/config.tmp
pwd
- chmod u+w $(distdir)/pc/config.h
- cp $(srcdir)/pc/config.h $(distdir)/pc/config.h
+ chmod u+w "$(distdir)"/pc/config.h
+ cp "$(srcdir)"/pc/config.h "$(distdir)"/pc/config.h
# Special rules for individual files
# Use of awk instead of $(AWK) is deliberate, in case gawk doesn't build
@@ -202,14 +202,14 @@ dist-hook:
awkgram.c: awkgram.y
$(YACC) $(AM_YFLAGS) $(YFLAGS) $<
- sed 's/parse error/syntax error/g' < y.tab.c | awk -f $(srcdir)/bisonfix.awk awkgram > $*.c && rm y.tab.c
+ sed 's/parse error/syntax error/g' < y.tab.c | awk -f "$(srcdir)"/bisonfix.awk awkgram > $*.c && rm y.tab.c
if test -f y.tab.h; then \
if cmp -s y.tab.h $*.h; then rm -f y.tab.h; else mv y.tab.h $*.h; fi; \
else :; fi
command.c: command.y
$(YACC) -p zz $<
- sed 's/parse error/syntax error/g' < y.tab.c | awk -f $(srcdir)/bisonfix.awk command > $*.c && rm y.tab.c
+ sed 's/parse error/syntax error/g' < y.tab.c | awk -f "$(srcdir)"/bisonfix.awk command > $*.c && rm y.tab.c
# This is for my development & testing.
efence: gawk
@@ -227,3 +227,6 @@ valgrind-noleak:
cd test; rm -f log.[0-9]*; \
make check VALGRIND="valgrind --leak-check=no --log-file=log.%p"; \
make valgrind-scan
+
+spell:
+ cd "$(srcdir)"/doc ; $(MAKE) spell
diff --git a/Makefile.in b/Makefile.in
index 5585046e..036361cb 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -1197,16 +1197,16 @@ check-local: gawk$(EXEEXT)
# A little extra clean up when making distributions.
# And additional set up for the pc directory.
dist-hook:
- cd $(distdir)/extension ; rm -f *.o *.so
- cd $(srcdir)/pc ; \
+ cd "$(distdir)"/extension ; rm -f *.o *.so
+ cd "$(srcdir)"/pc ; \
chmod u+w config.h ; \
sed -n -f configpk.sed < ../configure.ac > /tmp/tmp.sed ; \
sed -f config.sed < ../configh.in > /tmp/config.tmp ; \
sed -f /tmp/tmp.sed < /tmp/config.tmp > config.h ; \
$(RM) /tmp/tmp.sed /tmp/config.tmp
pwd
- chmod u+w $(distdir)/pc/config.h
- cp $(srcdir)/pc/config.h $(distdir)/pc/config.h
+ chmod u+w "$(distdir)"/pc/config.h
+ cp "$(srcdir)"/pc/config.h "$(distdir)"/pc/config.h
# Special rules for individual files
# Use of awk instead of $(AWK) is deliberate, in case gawk doesn't build
@@ -1214,14 +1214,14 @@ dist-hook:
awkgram.c: awkgram.y
$(YACC) $(AM_YFLAGS) $(YFLAGS) $<
- sed 's/parse error/syntax error/g' < y.tab.c | awk -f $(srcdir)/bisonfix.awk awkgram > $*.c && rm y.tab.c
+ sed 's/parse error/syntax error/g' < y.tab.c | awk -f "$(srcdir)"/bisonfix.awk awkgram > $*.c && rm y.tab.c
if test -f y.tab.h; then \
if cmp -s y.tab.h $*.h; then rm -f y.tab.h; else mv y.tab.h $*.h; fi; \
else :; fi
command.c: command.y
$(YACC) -p zz $<
- sed 's/parse error/syntax error/g' < y.tab.c | awk -f $(srcdir)/bisonfix.awk command > $*.c && rm y.tab.c
+ sed 's/parse error/syntax error/g' < y.tab.c | awk -f "$(srcdir)"/bisonfix.awk command > $*.c && rm y.tab.c
# This is for my development & testing.
efence: gawk
@@ -1240,6 +1240,9 @@ valgrind-noleak:
make check VALGRIND="valgrind --leak-check=no --log-file=log.%p"; \
make valgrind-scan
+spell:
+ cd "$(srcdir)"/doc ; $(MAKE) spell
+
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:
diff --git a/dfa.c b/dfa.c
index 25dd868b..cb11043e 100644
--- a/dfa.c
+++ b/dfa.c
@@ -355,14 +355,85 @@ struct mb_char_classes
size_t nchars;
};
+struct regex_syntax
+{
+ /* Syntax bits controlling the behavior of the lexical analyzer. */
+ reg_syntax_t syntax_bits;
+ bool syntax_bits_set;
+
+ /* Flag for case-folding letters into sets. */
+ bool case_fold;
+
+ /* End-of-line byte in data. */
+ unsigned char eolbyte;
+
+ /* Cache of char-context values. */
+ int sbit[NOTCHAR];
+
+ /* If never_trail[B], the byte B cannot be a non-initial byte in a
+ multibyte character. */
+ bool never_trail[NOTCHAR];
+
+ /* Set of characters considered letters. */
+ charclass letters;
+
+ /* Set of characters that are newline. */
+ charclass newline;
+};
+
+/* Lexical analyzer. All the dross that deals with the obnoxious
+ GNU Regex syntax bits is located here. The poor, suffering
+ reader is referred to the GNU Regex documentation for the
+ meaning of the @#%!@#%^!@ syntax bits. */
+struct lexer_state
+{
+ char const *lexptr; /* Pointer to next input character. */
+ size_t lexleft; /* Number of characters remaining. */
+ token lasttok; /* Previous token returned; initially END. */
+ size_t parens; /* Count of outstanding left parens. */
+ int minrep, maxrep; /* Repeat counts for {m,n}. */
+
+ /* Wide character representation of the current multibyte character,
+ or WEOF if there was an encoding error. Used only if
+ MB_CUR_MAX > 1. */
+ wint_t wctok;
+
+ /* Length of the multibyte representation of wctok. */
+ int cur_mb_len;
+
+ /* We're separated from beginning or (, | only by zero-width characters. */
+ bool laststart;
+};
+
+/* Recursive descent parser for regular expressions. */
+
+struct parser_state
+{
+ token tok; /* Lookahead token. */
+ size_t depth; /* Current depth of a hypothetical stack
+ holding deferred productions. This is
+ used to determine the depth that will be
+ required of the real stack later on in
+ dfaanalyze. */
+};
+
/* A compiled regular expression. */
struct dfa
{
+ /* Syntax configuration */
+ struct regex_syntax syntax;
+
/* Fields filled by the scanner. */
charclass *charclasses; /* Array of character sets for CSET tokens. */
size_t cindex; /* Index for adding new charclasses. */
size_t calloc; /* Number of charclasses allocated. */
+ /* Scanner state */
+ struct lexer_state lexstate;
+
+ /* Parser state */
+ struct parser_state parsestate;
+
/* Fields filled by the parser. */
token *tokens; /* Postfix parse array. */
size_t tindex; /* Index for adding new tokens. */
@@ -478,7 +549,7 @@ struct dfa
#define ACCEPTS_IN_CONTEXT(prev, curr, state, dfa) \
SUCCEEDS_IN_CONTEXT ((dfa).states[state].constraint, prev, curr)
-static void regexp (void);
+static void regexp (struct dfa *dfa);
/* A table indexed by byte values that contains the corresponding wide
character (if any) for that byte. WEOF means the byte is not a
@@ -697,39 +768,6 @@ dfa_charclass_index (struct dfa *d, charclass const s)
return i;
}
-/* A pointer to the current dfa is kept here during parsing. */
-static struct dfa *dfa;
-
-/* Find the index of charclass S in the current DFA, or allocate a new one. */
-static size_t
-charclass_index (charclass const s)
-{
- return dfa_charclass_index (dfa, s);
-}
-
-/* Syntax bits controlling the behavior of the lexical analyzer. */
-static reg_syntax_t syntax_bits;
-static bool syntax_bits_set;
-
-/* Flag for case-folding letters into sets. */
-static bool case_fold;
-
-/* End-of-line byte in data. */
-static unsigned char eolbyte;
-
-/* Cache of char-context values. */
-static int sbit[NOTCHAR];
-
-/* If never_trail[B], the byte B cannot be a non-initial byte in a
- multibyte character. */
-static bool never_trail[NOTCHAR];
-
-/* Set of characters considered letters. */
-static charclass letters;
-
-/* Set of characters that are newline. */
-static charclass newline;
-
static bool
unibyte_word_constituent (unsigned char c)
{
@@ -737,25 +775,29 @@ unibyte_word_constituent (unsigned char c)
}
static int
-char_context (unsigned char c)
+char_context (struct dfa const *dfa, unsigned char c)
{
- if (c == eolbyte)
+ if (c == dfa->syntax.eolbyte)
return CTX_NEWLINE;
if (unibyte_word_constituent (c))
return CTX_LETTER;
return CTX_NONE;
}
-/* Entry point to set syntax options. */
-void
-dfasyntax (reg_syntax_t bits, bool fold, unsigned char eol)
+/* UTF-8 encoding allows some optimizations that we can't otherwise
+ assume in a multibyte encoding. */
+static bool using_utf8;
+
+bool
+dfa_using_utf8 (void)
{
- int i;
- syntax_bits_set = true;
- syntax_bits = bits;
- case_fold = fold;
- eolbyte = eol;
+ return using_utf8;
+}
+static void
+init_mbrtowc_cache (void)
+{
+ int i;
for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
{
char c = i;
@@ -763,23 +805,39 @@ dfasyntax (reg_syntax_t bits, bool fold, unsigned char eol)
mbstate_t s = { 0 };
wchar_t wc;
mbrtowc_cache[uc] = mbrtowc (&wc, &c, 1, &s) <= 1 ? wc : WEOF;
+ }
+}
+
+/* Entry point to set syntax options. */
+void
+dfasyntax (struct dfa *dfa, reg_syntax_t bits, bool fold, unsigned char eol)
+{
+ int i;
+ dfa->syntax.syntax_bits_set = true;
+ dfa->syntax.syntax_bits = bits;
+ dfa->syntax.case_fold = fold;
+ dfa->syntax.eolbyte = eol;
- /* Now that mbrtowc_cache[uc] is set, use it to calculate sbit. */
- sbit[uc] = char_context (uc);
- switch (sbit[uc])
+ for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
+ {
+ unsigned char uc = i;
+
+ /* Use mbrtowc_cache to calculate sbit. */
+ dfa->syntax.sbit[uc] = char_context (dfa, uc);
+ switch (dfa->syntax.sbit[uc])
{
case CTX_LETTER:
- setbit (uc, letters);
+ setbit (uc, dfa->syntax.letters);
break;
case CTX_NEWLINE:
- setbit (uc, newline);
+ setbit (uc, dfa->syntax.newline);
break;
}
/* POSIX requires that the five bytes in "\n\r./" (including the
terminating NUL) cannot occur inside a multibyte character. */
- never_trail[uc] = (using_utf8 () ? (uc & 0xc0) != 0x80
- : strchr ("\n\r./", uc) != NULL);
+ dfa->syntax.never_trail[uc] = (using_utf8 ? (uc & 0xc0) != 0x80
+ : strchr ("\n\r./", uc) != NULL);
}
}
@@ -811,21 +869,21 @@ setbit_case_fold_c (int b, charclass c)
setbit (i, c);
}
+static void check_utf8 (void)
+{
+ wchar_t wc;
+ mbstate_t mbs = { 0 };
+ using_utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
+}
+static bool unibyte_c;
-/* UTF-8 encoding allows some optimizations that we can't otherwise
- assume in a multibyte encoding. */
-bool
-using_utf8 (void)
+static void check_unibyte_c (void)
{
- static int utf8 = -1;
- if (utf8 < 0)
- {
- wchar_t wc;
- mbstate_t mbs = { 0 };
- utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
- }
- return utf8;
+ char const *locale = setlocale (LC_ALL, NULL);
+ unibyte_c = (!locale
+ || STREQ (locale, "C")
+ || STREQ (locale, "POSIX"));
}
/* The current locale is known to be a unibyte locale
@@ -834,7 +892,7 @@ using_utf8 (void)
processed more efficiently. */
static bool
-using_simple_locale (void)
+using_simple_locale (struct dfa const *dfa)
{
/* The native character set is known to be compatible with
the C locale. The following test isn't perfect, but it's good
@@ -852,44 +910,9 @@ using_simple_locale (void)
&& '}' == 125 && '~' == 126)
};
- if (! native_c_charset || dfa->multibyte)
- return false;
- else
- {
- static int unibyte_c = -1;
- if (unibyte_c < 0)
- {
- char const *locale = setlocale (LC_ALL, NULL);
- unibyte_c = (!locale
- || STREQ (locale, "C")
- || STREQ (locale, "POSIX"));
- }
- return unibyte_c;
- }
+ return (!native_c_charset || dfa->multibyte) ? false : unibyte_c;
}
-/* Lexical analyzer. All the dross that deals with the obnoxious
- GNU Regex syntax bits is located here. The poor, suffering
- reader is referred to the GNU Regex documentation for the
- meaning of the @#%!@#%^!@ syntax bits. */
-
-static char const *lexptr; /* Pointer to next input character. */
-static size_t lexleft; /* Number of characters remaining. */
-static token lasttok; /* Previous token returned; initially END. */
-static bool laststart; /* We're separated from beginning or (,
- | only by zero-width characters. */
-static size_t parens; /* Count of outstanding left parens. */
-static int minrep, maxrep; /* Repeat counts for {m,n}. */
-
-static int cur_mb_len = 1; /* Length of the multibyte representation of
- wctok. */
-
-static wint_t wctok; /* Wide character representation of the current
- multibyte character, or WEOF if there was
- an encoding error. Used only if
- MB_CUR_MAX > 1. */
-
-
/* Fetch the next lexical input character. Set C (of type int) to the
next input byte, except set C to EOF if the input is a multibyte
character of length greater than 1. Set WC (of type wint_t) to the
@@ -897,24 +920,25 @@ static wint_t wctok; /* Wide character representation of the current
of length 1); otherwise set WC to WEOF. If there is no more input,
report EOFERR if EOFERR is not null, and return lasttok = END
otherwise. */
-# define FETCH_WC(c, wc, eoferr) \
+# define FETCH_WC(dfa, c, wc, eoferr) \
do { \
- if (! lexleft) \
+ if (! dfa->lexstate.lexleft) \
{ \
if ((eoferr) != 0) \
dfaerror (eoferr); \
else \
- return lasttok = END; \
+ return dfa->lexstate.lasttok = END; \
} \
else \
{ \
wint_t _wc; \
- size_t nbytes = mbs_to_wchar (&_wc, lexptr, lexleft, dfa); \
- cur_mb_len = nbytes; \
+ size_t nbytes = mbs_to_wchar (&_wc, dfa->lexstate.lexptr, \
+ dfa->lexstate.lexleft, dfa); \
+ dfa->lexstate.cur_mb_len = nbytes; \
(wc) = _wc; \
- (c) = nbytes == 1 ? to_uchar (*lexptr) : EOF; \
- lexptr += nbytes; \
- lexleft -= nbytes; \
+ (c) = nbytes == 1 ? to_uchar (*dfa->lexstate.lexptr) : EOF; \
+ dfa->lexstate.lexptr += nbytes; \
+ dfa->lexstate.lexleft -= nbytes; \
} \
} while (false)
@@ -1011,7 +1035,7 @@ find_pred (const char *str)
/* Multibyte character handling sub-routine for lex.
Parse a bracket expression and build a struct mb_char_classes. */
static token
-parse_bracket_exp (void)
+parse_bracket_exp (struct dfa *dfa)
{
bool invert;
int c, c1, c2;
@@ -1055,12 +1079,12 @@ parse_bracket_exp (void)
work_mbc = NULL;
memset (ccl, 0, sizeof ccl);
- FETCH_WC (c, wc, _("unbalanced ["));
+ FETCH_WC (dfa, c, wc, _("unbalanced ["));
if (c == '^')
{
- FETCH_WC (c, wc, _("unbalanced ["));
+ FETCH_WC (dfa, c, wc, _("unbalanced ["));
invert = true;
- known_bracket_exp = using_simple_locale ();
+ known_bracket_exp = using_simple_locale (dfa);
}
else
invert = false;
@@ -1077,9 +1101,9 @@ parse_bracket_exp (void)
dfa is ever called. */
if (c == '[')
{
- FETCH_WC (c1, wc1, _("unbalanced ["));
+ FETCH_WC (dfa, c1, wc1, _("unbalanced ["));
- if ((c1 == ':' && (syntax_bits & RE_CHAR_CLASSES))
+ if ((c1 == ':' && (dfa->syntax.syntax_bits & RE_CHAR_CLASSES))
|| c1 == '.' || c1 == '=')
{
enum { MAX_BRACKET_STRING_LEN = 32 };
@@ -1087,8 +1111,9 @@ parse_bracket_exp (void)
size_t len = 0;
for (;;)
{
- FETCH_WC (c, wc, _("unbalanced ["));
- if ((c == c1 && *lexptr == ']') || lexleft == 0)
+ FETCH_WC (dfa, c, wc, _("unbalanced ["));
+ if ((c == c1 && *dfa->lexstate.lexptr == ']')
+ || dfa->lexstate.lexleft == 0)
break;
if (len < MAX_BRACKET_STRING_LEN)
str[len++] = c;
@@ -1099,7 +1124,7 @@ parse_bracket_exp (void)
str[len] = '\0';
/* Fetch bracket. */
- FETCH_WC (c, wc, _("unbalanced ["));
+ FETCH_WC (dfa, c, wc, _("unbalanced ["));
if (c1 == ':')
/* Build character class. POSIX allows character
classes to match multicharacter collating elements,
@@ -1107,8 +1132,9 @@ parse_bracket_exp (void)
worry about that possibility. */
{
char const *class
- = (case_fold && (STREQ (str, "upper")
- || STREQ (str, "lower")) ? "alpha" : str);
+ = (dfa->syntax.case_fold && (STREQ (str, "upper")
+ || STREQ (str, "lower")) ?
+ "alpha" : str);
const struct dfa_ctype *pred = find_pred (class);
if (!pred)
dfaerror (_("invalid character class"));
@@ -1126,7 +1152,7 @@ parse_bracket_exp (void)
colon_warning_state |= 8;
/* Fetch new lookahead character. */
- FETCH_WC (c1, wc1, _("unbalanced ["));
+ FETCH_WC (dfa, c1, wc1, _("unbalanced ["));
continue;
}
@@ -1134,21 +1160,21 @@ parse_bracket_exp (void)
are already set up. */
}
- if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
- FETCH_WC (c, wc, _("unbalanced ["));
+ if (c == '\\' && (dfa->syntax.syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
+ FETCH_WC (dfa, c, wc, _("unbalanced ["));
if (c1 == NOTCHAR)
- FETCH_WC (c1, wc1, _("unbalanced ["));
+ FETCH_WC (dfa, c1, wc1, _("unbalanced ["));
if (c1 == '-')
/* build range characters. */
{
- FETCH_WC (c2, wc2, _("unbalanced ["));
+ FETCH_WC (dfa, c2, wc2, _("unbalanced ["));
/* A bracket expression like [a-[.aa.]] matches an unknown set.
Treat it like [-a[.aa.]] while parsing it, and
remember that the set is unknown. */
- if (c2 == '[' && *lexptr == '.')
+ if (c2 == '[' && *dfa->lexstate.lexptr == '.')
{
known_bracket_exp = false;
c2 = ']';
@@ -1158,28 +1184,29 @@ parse_bracket_exp (void)
{
/* In the case [x-], the - is an ordinary hyphen,
which is left in c1, the lookahead character. */
- lexptr -= cur_mb_len;
- lexleft += cur_mb_len;
+ dfa->lexstate.lexptr -= dfa->lexstate.cur_mb_len;
+ dfa->lexstate.lexleft += dfa->lexstate.cur_mb_len;
}
else
{
- if (c2 == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
- FETCH_WC (c2, wc2, _("unbalanced ["));
+ if (c2 == '\\' && (dfa->syntax.syntax_bits
+ & RE_BACKSLASH_ESCAPE_IN_LISTS))
+ FETCH_WC (dfa, c2, wc2, _("unbalanced ["));
colon_warning_state |= 8;
- FETCH_WC (c1, wc1, _("unbalanced ["));
+ FETCH_WC (dfa, c1, wc1, _("unbalanced ["));
/* Treat [x-y] as a range if x != y. */
if (wc != wc2 || wc == WEOF)
{
if (dfa->multibyte)
known_bracket_exp = false;
- else if (using_simple_locale ())
+ else if (using_simple_locale (dfa))
{
int ci;
for (ci = c; ci <= c2; ci++)
setbit (ci, ccl);
- if (case_fold)
+ if (dfa->syntax.case_fold)
{
int uc = toupper (c);
int uc2 = toupper (c2);
@@ -1203,7 +1230,7 @@ parse_bracket_exp (void)
if (!dfa->multibyte)
{
- if (case_fold)
+ if (dfa->syntax.case_fold)
setbit_case_fold_c (c, ccl);
else
setbit (c, ccl);
@@ -1216,7 +1243,7 @@ parse_bracket_exp (void)
{
wchar_t folded[CASE_FOLDED_BUFSIZE + 1];
unsigned int i;
- unsigned int n = (case_fold
+ unsigned int n = (dfa->syntax.case_fold
? case_folded_counterparts (wc, folded + 1) + 1
: 1);
folded[0] = wc;
@@ -1241,7 +1268,7 @@ parse_bracket_exp (void)
if (dfa->multibyte)
{
work_mbc->invert = invert;
- work_mbc->cset = emptyset (ccl) ? -1 : charclass_index (ccl);
+ work_mbc->cset = emptyset (ccl) ? -1 : dfa_charclass_index (dfa, ccl);
return MBCSET;
}
@@ -1249,29 +1276,29 @@ parse_bracket_exp (void)
{
assert (!dfa->multibyte);
notset (ccl);
- if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
+ if (dfa->syntax.syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
clrbit ('\n', ccl);
}
- return CSET + charclass_index (ccl);
+ return CSET + dfa_charclass_index (dfa, ccl);
}
#define PUSH_LEX_STATE(s) \
do \
{ \
- char const *lexptr_saved = lexptr; \
- size_t lexleft_saved = lexleft; \
- lexptr = (s); \
- lexleft = strlen (lexptr)
+ char const *lexptr_saved = dfa->lexstate.lexptr; \
+ size_t lexleft_saved = dfa->lexstate.lexleft; \
+ dfa->lexstate.lexptr = (s); \
+ dfa->lexstate.lexleft = strlen (dfa->lexstate.lexptr)
#define POP_LEX_STATE() \
- lexptr = lexptr_saved; \
- lexleft = lexleft_saved; \
+ dfa->lexstate.lexptr = lexptr_saved; \
+ dfa->lexstate.lexleft = lexleft_saved; \
} \
while (false)
static token
-lex (void)
+lex (struct dfa *dfa)
{
int c, c2;
bool backslash = false;
@@ -1286,14 +1313,14 @@ lex (void)
"if (backslash) ...". */
for (i = 0; i < 2; ++i)
{
- FETCH_WC (c, wctok, NULL);
+ FETCH_WC (dfa, c, dfa->lexstate.wctok, NULL);
switch (c)
{
case '\\':
if (backslash)
goto normal_char;
- if (lexleft == 0)
+ if (dfa->lexstate.lexleft == 0)
dfaerror (_("unfinished \\ escape"));
backslash = true;
break;
@@ -1301,25 +1328,29 @@ lex (void)
case '^':
if (backslash)
goto normal_char;
- if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
- || lasttok == END || lasttok == LPAREN || lasttok == OR)
- return lasttok = BEGLINE;
+ if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS
+ || dfa->lexstate.lasttok == END || dfa->lexstate.lasttok == LPAREN
+ || dfa->lexstate.lasttok == OR)
+ return dfa->lexstate.lasttok = BEGLINE;
goto normal_char;
case '$':
if (backslash)
goto normal_char;
- if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS
- || lexleft == 0
- || (syntax_bits & RE_NO_BK_PARENS
- ? lexleft > 0 && *lexptr == ')'
- : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == ')')
- || (syntax_bits & RE_NO_BK_VBAR
- ? lexleft > 0 && *lexptr == '|'
- : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == '|')
- || ((syntax_bits & RE_NEWLINE_ALT)
- && lexleft > 0 && *lexptr == '\n'))
- return lasttok = ENDLINE;
+ if (dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_ANCHORS
+ || dfa->lexstate.lexleft == 0
+ || (dfa->syntax.syntax_bits & RE_NO_BK_PARENS
+ ? dfa->lexstate.lexleft > 0 && *dfa->lexstate.lexptr == ')'
+ : dfa->lexstate.lexleft > 1 && dfa->lexstate.lexptr[0] == '\\'
+ && dfa->lexstate.lexptr[1] == ')')
+ || (dfa->syntax.syntax_bits & RE_NO_BK_VBAR
+ ? dfa->lexstate.lexleft > 0 && *dfa->lexstate.lexptr == '|'
+ : dfa->lexstate.lexleft > 1 && dfa->lexstate.lexptr[0] == '\\'
+ && dfa->lexstate.lexptr[1] == '|')
+ || ((dfa->syntax.syntax_bits & RE_NEWLINE_ALT)
+ && dfa->lexstate.lexleft > 0
+ && *dfa->lexstate.lexptr == '\n'))
+ return dfa->lexstate.lasttok = ENDLINE;
goto normal_char;
case '1':
@@ -1331,74 +1362,84 @@ lex (void)
case '7':
case '8':
case '9':
- if (backslash && !(syntax_bits & RE_NO_BK_REFS))
+ if (backslash && !(dfa->syntax.syntax_bits & RE_NO_BK_REFS))
{
- laststart = false;
- return lasttok = BACKREF;
+ dfa->lexstate.laststart = false;
+ return dfa->lexstate.lasttok = BACKREF;
}
goto normal_char;
case '`':
- if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
- return lasttok = BEGLINE; /* FIXME: should be beginning of string */
+ if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
+ {
+ /* FIXME: should be beginning of string */
+ return dfa->lexstate.lasttok = BEGLINE;
+ }
goto normal_char;
case '\'':
- if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
- return lasttok = ENDLINE; /* FIXME: should be end of string */
+ if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
+ {
+ /* FIXME: should be end of string */
+ return dfa->lexstate.lasttok = ENDLINE;
+ }
goto normal_char;
case '<':
- if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
- return lasttok = BEGWORD;
+ if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
+ return dfa->lexstate.lasttok = BEGWORD;
goto normal_char;
case '>':
- if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
- return lasttok = ENDWORD;
+ if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
+ return dfa->lexstate.lasttok = ENDWORD;
goto normal_char;
case 'b':
- if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
- return lasttok = LIMWORD;
+ if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
+ return dfa->lexstate.lasttok = LIMWORD;
goto normal_char;
case 'B':
- if (backslash && !(syntax_bits & RE_NO_GNU_OPS))
- return lasttok = NOTLIMWORD;
+ if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
+ return dfa->lexstate.lasttok = NOTLIMWORD;
goto normal_char;
case '?':
- if (syntax_bits & RE_LIMITED_OPS)
+ if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
goto normal_char;
- if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
+ if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0))
goto normal_char;
- if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+ if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
+ && dfa->lexstate.laststart)
goto normal_char;
- return lasttok = QMARK;
+ return dfa->lexstate.lasttok = QMARK;
case '*':
if (backslash)
goto normal_char;
- if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+ if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
+ && dfa->lexstate.laststart)
goto normal_char;
- return lasttok = STAR;
+ return dfa->lexstate.lasttok = STAR;
case '+':
- if (syntax_bits & RE_LIMITED_OPS)
+ if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
goto normal_char;
- if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0))
+ if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0))
goto normal_char;
- if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+ if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
+ && dfa->lexstate.laststart)
goto normal_char;
- return lasttok = PLUS;
+ return dfa->lexstate.lasttok = PLUS;
case '{':
- if (!(syntax_bits & RE_INTERVALS))
+ if (!(dfa->syntax.syntax_bits & RE_INTERVALS))
goto normal_char;
- if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0))
+ if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_BRACES) == 0))
goto normal_char;
- if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
+ if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
+ && dfa->lexstate.laststart)
goto normal_char;
/* Cases:
@@ -1408,79 +1449,86 @@ lex (void)
{,} - 0 to infinity (same as '*')
{M,N} - M through N */
{
- char const *p = lexptr;
- char const *lim = p + lexleft;
- minrep = maxrep = -1;
+ char const *p = dfa->lexstate.lexptr;
+ char const *lim = p + dfa->lexstate.lexleft;
+ dfa->lexstate.minrep = dfa->lexstate.maxrep = -1;
for (; p != lim && ISASCIIDIGIT (*p); p++)
{
- if (minrep < 0)
- minrep = *p - '0';
+ if (dfa->lexstate.minrep < 0)
+ dfa->lexstate.minrep = *p - '0';
else
- minrep = MIN (RE_DUP_MAX + 1, minrep * 10 + *p - '0');
+ dfa->lexstate.minrep = MIN (RE_DUP_MAX + 1,
+ (dfa->lexstate.minrep
+ * 10 + *p - '0'));
}
if (p != lim)
{
if (*p != ',')
- maxrep = minrep;
+ dfa->lexstate.maxrep = dfa->lexstate.minrep;
else
{
- if (minrep < 0)
- minrep = 0;
+ if (dfa->lexstate.minrep < 0)
+ dfa->lexstate.minrep = 0;
while (++p != lim && ISASCIIDIGIT (*p))
{
- if (maxrep < 0)
- maxrep = *p - '0';
+ if (dfa->lexstate.maxrep < 0)
+ dfa->lexstate.maxrep = *p - '0';
else
- maxrep = MIN (RE_DUP_MAX + 1, maxrep * 10 + *p - '0');
+ dfa->lexstate.maxrep = MIN (RE_DUP_MAX + 1,
+ (dfa->lexstate.maxrep
+ * 10 + *p - '0'));
}
}
}
if (! ((! backslash || (p != lim && *p++ == '\\'))
&& p != lim && *p++ == '}'
- && 0 <= minrep && (maxrep < 0 || minrep <= maxrep)))
+ && 0 <= dfa->lexstate.minrep
+ && (dfa->lexstate.maxrep < 0
+ || dfa->lexstate.minrep <= dfa->lexstate.maxrep)))
{
- if (syntax_bits & RE_INVALID_INTERVAL_ORD)
+ if (dfa->syntax.syntax_bits & RE_INVALID_INTERVAL_ORD)
goto normal_char;
dfaerror (_("invalid content of \\{\\}"));
}
- if (RE_DUP_MAX < maxrep)
+ if (RE_DUP_MAX < dfa->lexstate.maxrep)
dfaerror (_("regular expression too big"));
- lexptr = p;
- lexleft = lim - p;
+ dfa->lexstate.lexptr = p;
+ dfa->lexstate.lexleft = lim - p;
}
- laststart = false;
- return lasttok = REPMN;
+ dfa->lexstate.laststart = false;
+ return dfa->lexstate.lasttok = REPMN;
case '|':
- if (syntax_bits & RE_LIMITED_OPS)
+ if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
goto normal_char;
- if (backslash != ((syntax_bits & RE_NO_BK_VBAR) == 0))
+ if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_VBAR) == 0))
goto normal_char;
- laststart = true;
- return lasttok = OR;
+ dfa->lexstate.laststart = true;
+ return dfa->lexstate.lasttok = OR;
case '\n':
- if (syntax_bits & RE_LIMITED_OPS
- || backslash || !(syntax_bits & RE_NEWLINE_ALT))
+ if (dfa->syntax.syntax_bits & RE_LIMITED_OPS
+ || backslash || !(dfa->syntax.syntax_bits & RE_NEWLINE_ALT))
goto normal_char;
- laststart = true;
- return lasttok = OR;
+ dfa->lexstate.laststart = true;
+ return dfa->lexstate.lasttok = OR;
case '(':
- if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
+ if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0))
goto normal_char;
- ++parens;
- laststart = true;
- return lasttok = LPAREN;
+ ++dfa->lexstate.parens;
+ dfa->lexstate.laststart = true;
+ return dfa->lexstate.lasttok = LPAREN;
case ')':
- if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0))
+ if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_PARENS) == 0))
goto normal_char;
- if (parens == 0 && syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ if (dfa->lexstate.parens == 0
+ && dfa->syntax.syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD)
goto normal_char;
- --parens;
- laststart = false;
- return lasttok = RPAREN;
+ --dfa->lexstate.parens;
+ dfa->lexstate.laststart = false;
+ return dfa->lexstate.lasttok = RPAREN;
case '.':
if (backslash)
@@ -1489,21 +1537,21 @@ lex (void)
{
/* In multibyte environment period must match with a single
character not a byte. So we use ANYCHAR. */
- laststart = false;
- return lasttok = ANYCHAR;
+ dfa->lexstate.laststart = false;
+ return dfa->lexstate.lasttok = ANYCHAR;
}
zeroset (ccl);
notset (ccl);
- if (!(syntax_bits & RE_DOT_NEWLINE))
+ if (!(dfa->syntax.syntax_bits & RE_DOT_NEWLINE))
clrbit ('\n', ccl);
- if (syntax_bits & RE_DOT_NOT_NULL)
+ if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL)
clrbit ('\0', ccl);
- laststart = false;
- return lasttok = CSET + charclass_index (ccl);
+ dfa->lexstate.laststart = false;
+ return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa, ccl);
case 's':
case 'S':
- if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
+ if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
goto normal_char;
if (!dfa->multibyte)
{
@@ -1513,8 +1561,9 @@ lex (void)
setbit (c2, ccl);
if (c == 'S')
notset (ccl);
- laststart = false;
- return lasttok = CSET + charclass_index (ccl);
+ dfa->lexstate.laststart = false;
+ return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa,
+ ccl);
}
/* FIXME: see if optimizing this, as is done with ANYCHAR and
@@ -1525,16 +1574,16 @@ lex (void)
strings, each minus its "already processed" '['. */
PUSH_LEX_STATE (c == 's' ? "[:space:]]" : "^[:space:]]");
- lasttok = parse_bracket_exp ();
+ dfa->lexstate.lasttok = parse_bracket_exp (dfa);
POP_LEX_STATE ();
- laststart = false;
- return lasttok;
+ dfa->lexstate.laststart = false;
+ return dfa->lexstate.lasttok;
case 'w':
case 'W':
- if (!backslash || (syntax_bits & RE_NO_GNU_OPS))
+ if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
goto normal_char;
if (!dfa->multibyte)
@@ -1545,8 +1594,9 @@ lex (void)
setbit (c2, ccl);
if (c == 'W')
notset (ccl);
- laststart = false;
- return lasttok = CSET + charclass_index (ccl);
+ dfa->lexstate.laststart = false;
+ return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa,
+ ccl);
}
/* FIXME: see if optimizing this, as is done with ANYCHAR and
@@ -1557,35 +1607,36 @@ lex (void)
strings, each minus its "already processed" '['. */
PUSH_LEX_STATE (c == 'w' ? "_[:alnum:]]" : "^_[:alnum:]]");
- lasttok = parse_bracket_exp ();
+ dfa->lexstate.lasttok = parse_bracket_exp (dfa);
POP_LEX_STATE ();
- laststart = false;
- return lasttok;
+ dfa->lexstate.laststart = false;
+ return dfa->lexstate.lasttok;
case '[':
if (backslash)
goto normal_char;
- laststart = false;
- return lasttok = parse_bracket_exp ();
+ dfa->lexstate.laststart = false;
+ return dfa->lexstate.lasttok = parse_bracket_exp (dfa);
default:
normal_char:
- laststart = false;
+ dfa->lexstate.laststart = false;
/* For multibyte character sets, folding is done in atom. Always
return WCHAR. */
if (dfa->multibyte)
- return lasttok = WCHAR;
+ return dfa->lexstate.lasttok = WCHAR;
- if (case_fold && isalpha (c))
+ if (dfa->syntax.case_fold && isalpha (c))
{
zeroset (ccl);
setbit_case_fold_c (c, ccl);
- return lasttok = CSET + charclass_index (ccl);
+ return dfa->lexstate.lasttok = CSET + dfa_charclass_index (dfa,
+ ccl);
}
- return lasttok = c;
+ return dfa->lexstate.lasttok = c;
}
}
@@ -1595,17 +1646,8 @@ lex (void)
return END; /* keeps pedantic compilers happy. */
}
-/* Recursive descent parser for regular expressions. */
-
-static token tok; /* Lookahead token. */
-static size_t depth; /* Current depth of a hypothetical stack
- holding deferred productions. This is
- used to determine the depth that will be
- required of the real stack later on in
- dfaanalyze. */
-
static void
-addtok_mb (token t, int mbprop)
+addtok_mb (struct dfa *dfa, token t, int mbprop)
{
if (dfa->talloc == dfa->tindex)
{
@@ -1628,7 +1670,7 @@ addtok_mb (token t, int mbprop)
case CAT:
case OR:
- --depth;
+ --dfa->parsestate.depth;
break;
case BACKREF:
@@ -1638,19 +1680,19 @@ addtok_mb (token t, int mbprop)
++dfa->nleaves;
/* fallthrough */
case EMPTY:
- ++depth;
+ ++dfa->parsestate.depth;
break;
}
- if (depth > dfa->depth)
- dfa->depth = depth;
+ if (dfa->parsestate.depth > dfa->depth)
+ dfa->depth = dfa->parsestate.depth;
}
-static void addtok_wc (wint_t wc);
+static void addtok_wc (struct dfa *dfa, wint_t wc);
/* Add the given token to the parse tree, maintaining the depth count and
updating the maximum depth if necessary. */
static void
-addtok (token t)
+addtok (struct dfa *dfa, token t)
{
if (dfa->multibyte && t == MBCSET)
{
@@ -1662,9 +1704,9 @@ addtok (token t)
This does not require UTF-8. */
for (i = 0; i < work_mbc->nchars; i++)
{
- addtok_wc (work_mbc->chars[i]);
+ addtok_wc (dfa, work_mbc->chars[i]);
if (need_or)
- addtok (OR);
+ addtok (dfa, OR);
need_or = true;
}
work_mbc->nchars = 0;
@@ -1673,14 +1715,14 @@ addtok (token t)
that the mbcset is empty now. Do nothing in that case. */
if (work_mbc->cset != -1)
{
- addtok (CSET + work_mbc->cset);
+ addtok (dfa, CSET + work_mbc->cset);
if (need_or)
- addtok (OR);
+ addtok (dfa, OR);
}
}
else
{
- addtok_mb (t, 3);
+ addtok_mb (dfa, t, 3);
}
}
@@ -1691,7 +1733,7 @@ addtok (token t)
<mb1(1st-byte)><mb1(2nd-byte)><CAT><mb1(3rd-byte)><CAT>
<mb2(1st-byte)><mb2(2nd-byte)><CAT><mb2(3rd-byte)><CAT><CAT> */
static void
-addtok_wc (wint_t wc)
+addtok_wc (struct dfa *dfa, wint_t wc)
{
unsigned char buf[MB_LEN_MAX];
mbstate_t s = { 0 };
@@ -1699,25 +1741,25 @@ addtok_wc (wint_t wc)
size_t stored_bytes = wcrtomb ((char *) buf, wc, &s);
if (stored_bytes != (size_t) -1)
- cur_mb_len = stored_bytes;
+ dfa->lexstate.cur_mb_len = stored_bytes;
else
{
/* This is merely stop-gap. buf[0] is undefined, yet skipping
the addtok_mb call altogether can corrupt the heap. */
- cur_mb_len = 1;
+ dfa->lexstate.cur_mb_len = 1;
buf[0] = 0;
}
- addtok_mb (buf[0], cur_mb_len == 1 ? 3 : 1);
- for (i = 1; i < cur_mb_len; i++)
+ addtok_mb (dfa, buf[0], dfa->lexstate.cur_mb_len == 1 ? 3 : 1);
+ for (i = 1; i < dfa->lexstate.cur_mb_len; i++)
{
- addtok_mb (buf[i], i == cur_mb_len - 1 ? 2 : 0);
- addtok (CAT);
+ addtok_mb (dfa, buf[i], i == dfa->lexstate.cur_mb_len - 1 ? 2 : 0);
+ addtok (dfa, CAT);
}
}
static void
-add_utf8_anychar (void)
+add_utf8_anychar (struct dfa *dfa)
{
static charclass const utf8_classes[5] = {
/* 80-bf: non-leading bytes. */
@@ -1746,12 +1788,12 @@ add_utf8_anychar (void)
copyset (utf8_classes[i], c);
if (i == 1)
{
- if (!(syntax_bits & RE_DOT_NEWLINE))
+ if (!(dfa->syntax.syntax_bits & RE_DOT_NEWLINE))
clrbit ('\n', c);
- if (syntax_bits & RE_DOT_NOT_NULL)
+ if (dfa->syntax.syntax_bits & RE_DOT_NOT_NULL)
clrbit ('\0', c);
}
- dfa->utf8_anychar_classes[i] = CSET + charclass_index (c);
+ dfa->utf8_anychar_classes[i] = CSET + dfa_charclass_index (dfa, c);
}
/* A valid UTF-8 character is
@@ -1765,12 +1807,12 @@ add_utf8_anychar (void)
and you get "B|(C|(D|EA)A)A". And since the token buffer is in reverse
Polish notation, you get "B C D E A CAT OR A CAT OR A CAT OR". */
for (i = 1; i < n; i++)
- addtok (dfa->utf8_anychar_classes[i]);
+ addtok (dfa, dfa->utf8_anychar_classes[i]);
while (--i > 1)
{
- addtok (dfa->utf8_anychar_classes[0]);
- addtok (CAT);
- addtok (OR);
+ addtok (dfa, dfa->utf8_anychar_classes[0]);
+ addtok (dfa, CAT);
+ addtok (dfa, OR);
}
}
@@ -1810,31 +1852,32 @@ add_utf8_anychar (void)
The parser builds a parse tree in postfix form in an array of tokens. */
static void
-atom (void)
+atom (struct dfa *dfa)
{
- if (tok == WCHAR)
+ if (dfa->parsestate.tok == WCHAR)
{
- if (wctok == WEOF)
- addtok (BACKREF);
+ if (dfa->lexstate.wctok == WEOF)
+ addtok (dfa, BACKREF);
else
{
- addtok_wc (wctok);
+ addtok_wc (dfa, dfa->lexstate.wctok);
- if (case_fold)
+ if (dfa->syntax.case_fold)
{
wchar_t folded[CASE_FOLDED_BUFSIZE];
- unsigned int i, n = case_folded_counterparts (wctok, folded);
+ unsigned int i, n = case_folded_counterparts (dfa->lexstate.wctok,
+ folded);
for (i = 0; i < n; i++)
{
- addtok_wc (folded[i]);
- addtok (OR);
+ addtok_wc (dfa, folded[i]);
+ addtok (dfa, OR);
}
}
}
- tok = lex ();
+ dfa->parsestate.tok = lex (dfa);
}
- else if (tok == ANYCHAR && using_utf8 ())
+ else if (dfa->parsestate.tok == ANYCHAR && using_utf8)
{
/* For UTF-8 expand the period to a series of CSETs that define a valid
UTF-8 character. This avoids using the slow multibyte path. I'm
@@ -1843,32 +1886,35 @@ atom (void)
it is done above in add_utf8_anychar. So, let's start with
UTF-8: it is the most used, and the structure of the encoding
makes the correctness more obvious. */
- add_utf8_anychar ();
- tok = lex ();
+ add_utf8_anychar (dfa);
+ dfa->parsestate.tok = lex (dfa);
}
- else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
- || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
- || tok == ANYCHAR || tok == MBCSET
- || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
+ else if ((dfa->parsestate.tok >= 0 && dfa->parsestate.tok < NOTCHAR)
+ || dfa->parsestate.tok >= CSET || dfa->parsestate.tok == BACKREF
+ || dfa->parsestate.tok == BEGLINE || dfa->parsestate.tok == ENDLINE
+ || dfa->parsestate.tok == BEGWORD || dfa->parsestate.tok == ANYCHAR
+ || dfa->parsestate.tok == MBCSET || dfa->parsestate.tok == ENDWORD
+ || dfa->parsestate.tok == LIMWORD
+ || dfa->parsestate.tok == NOTLIMWORD)
{
- addtok (tok);
- tok = lex ();
+ addtok (dfa, dfa->parsestate.tok);
+ dfa->parsestate.tok = lex (dfa);
}
- else if (tok == LPAREN)
+ else if (dfa->parsestate.tok == LPAREN)
{
- tok = lex ();
- regexp ();
- if (tok != RPAREN)
+ dfa->parsestate.tok = lex (dfa);
+ regexp (dfa);
+ if (dfa->parsestate.tok != RPAREN)
dfaerror (_("unbalanced ("));
- tok = lex ();
+ dfa->parsestate.tok = lex (dfa);
}
else
- addtok (EMPTY);
+ addtok (dfa, EMPTY);
}
/* Return the number of tokens in the given subexpression. */
static size_t _GL_ATTRIBUTE_PURE
-nsubtoks (size_t tindex)
+nsubtoks (struct dfa const *dfa, size_t tindex)
{
size_t ntoks1;
@@ -1879,90 +1925,93 @@ nsubtoks (size_t tindex)
case QMARK:
case STAR:
case PLUS:
- return 1 + nsubtoks (tindex - 1);
+ return 1 + nsubtoks (dfa, tindex - 1);
case CAT:
case OR:
- ntoks1 = nsubtoks (tindex - 1);
- return 1 + ntoks1 + nsubtoks (tindex - 1 - ntoks1);
+ ntoks1 = nsubtoks (dfa, tindex - 1);
+ return 1 + ntoks1 + nsubtoks (dfa, tindex - 1 - ntoks1);
}
}
/* Copy the given subexpression to the top of the tree. */
static void
-copytoks (size_t tindex, size_t ntokens)
+copytoks (struct dfa *dfa, size_t tindex, size_t ntokens)
{
size_t i;
if (dfa->multibyte)
for (i = 0; i < ntokens; ++i)
- addtok_mb (dfa->tokens[tindex + i], dfa->multibyte_prop[tindex + i]);
+ addtok_mb (dfa, dfa->tokens[tindex + i], dfa->multibyte_prop[tindex + i]);
else
for (i = 0; i < ntokens; ++i)
- addtok_mb (dfa->tokens[tindex + i], 3);
+ addtok_mb (dfa, dfa->tokens[tindex + i], 3);
}
static void
-closure (void)
+closure (struct dfa *dfa)
{
int i;
size_t tindex, ntokens;
- atom ();
- while (tok == QMARK || tok == STAR || tok == PLUS || tok == REPMN)
- if (tok == REPMN && (minrep || maxrep))
+ atom (dfa);
+ while (dfa->parsestate.tok == QMARK || dfa->parsestate.tok == STAR
+ || dfa->parsestate.tok == PLUS || dfa->parsestate.tok == REPMN)
+ if (dfa->parsestate.tok == REPMN
+ && (dfa->lexstate.minrep || dfa->lexstate.maxrep))
{
- ntokens = nsubtoks (dfa->tindex);
+ ntokens = nsubtoks (dfa, dfa->tindex);
tindex = dfa->tindex - ntokens;
- if (maxrep < 0)
- addtok (PLUS);
- if (minrep == 0)
- addtok (QMARK);
- for (i = 1; i < minrep; ++i)
+ if (dfa->lexstate.maxrep < 0)
+ addtok (dfa, PLUS);
+ if (dfa->lexstate.minrep == 0)
+ addtok (dfa, QMARK);
+ for (i = 1; i < dfa->lexstate.minrep; ++i)
{
- copytoks (tindex, ntokens);
- addtok (CAT);
+ copytoks (dfa, tindex, ntokens);
+ addtok (dfa, CAT);
}
- for (; i < maxrep; ++i)
+ for (; i < dfa->lexstate.maxrep; ++i)
{
- copytoks (tindex, ntokens);
- addtok (QMARK);
- addtok (CAT);
+ copytoks (dfa, tindex, ntokens);
+ addtok (dfa, QMARK);
+ addtok (dfa, CAT);
}
- tok = lex ();
+ dfa->parsestate.tok = lex (dfa);
}
- else if (tok == REPMN)
+ else if (dfa->parsestate.tok == REPMN)
{
- dfa->tindex -= nsubtoks (dfa->tindex);
- tok = lex ();
- closure ();
+ dfa->tindex -= nsubtoks (dfa, dfa->tindex);
+ dfa->parsestate.tok = lex (dfa);
+ closure (dfa);
}
else
{
- addtok (tok);
- tok = lex ();
+ addtok (dfa, dfa->parsestate.tok);
+ dfa->parsestate.tok = lex (dfa);
}
}
static void
-branch (void)
+branch (struct dfa* dfa)
{
- closure ();
- while (tok != RPAREN && tok != OR && tok >= 0)
+ closure (dfa);
+ while (dfa->parsestate.tok != RPAREN && dfa->parsestate.tok != OR
+ && dfa->parsestate.tok >= 0)
{
- closure ();
- addtok (CAT);
+ closure (dfa);
+ addtok (dfa, CAT);
}
}
static void
-regexp (void)
+regexp (struct dfa *dfa)
{
- branch ();
- while (tok == OR)
+ branch (dfa);
+ while (dfa->parsestate.tok == OR)
{
- tok = lex ();
- branch ();
- addtok (OR);
+ dfa->parsestate.tok = lex (dfa);
+ branch (dfa);
+ addtok (dfa, OR);
}
}
@@ -1972,34 +2021,33 @@ regexp (void)
static void
dfaparse (char const *s, size_t len, struct dfa *d)
{
- dfa = d;
- lexptr = s;
- lexleft = len;
- lasttok = END;
- laststart = true;
- parens = 0;
- if (dfa->multibyte)
+ d->lexstate.lexptr = s;
+ d->lexstate.lexleft = len;
+ d->lexstate.lasttok = END;
+ d->lexstate.laststart = true;
+ d->lexstate.parens = 0;
+ if (d->multibyte)
{
- cur_mb_len = 0;
+ d->lexstate.cur_mb_len = 0;
memset (&d->mbs, 0, sizeof d->mbs);
}
- if (!syntax_bits_set)
+ if (!d->syntax.syntax_bits_set)
dfaerror (_("no syntax specified"));
- tok = lex ();
- depth = d->depth;
+ d->parsestate.tok = lex (d);
+ d->parsestate.depth = d->depth;
- regexp ();
+ regexp (d);
- if (tok != END)
+ if (d->parsestate.tok != END)
dfaerror (_("unbalanced )"));
- addtok (END - d->nregexps);
- addtok (CAT);
+ addtok (d, END - d->nregexps);
+ addtok (d, CAT);
if (d->nregexps)
- addtok (OR);
+ addtok (d, OR);
++d->nregexps;
}
@@ -2270,19 +2318,19 @@ epsclosure (position_set *s, struct dfa const *d, char *visited)
character included in C. */
static int
-charclass_context (charclass c)
+charclass_context (struct dfa const *dfa, charclass c)
{
int context = 0;
unsigned int j;
- if (tstbit (eolbyte, c))
+ if (tstbit (dfa->syntax.eolbyte, c))
context |= CTX_NEWLINE;
for (j = 0; j < CHARCLASS_WORDS; ++j)
{
- if (c[j] & letters[j])
+ if (c[j] & dfa->syntax.letters[j])
context |= CTX_LETTER;
- if (c[j] & ~(letters[j] | newline[j]))
+ if (c[j] & ~(dfa->syntax.letters[j] | dfa->syntax.newline[j]))
context |= CTX_NONE;
}
@@ -2677,15 +2725,15 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
d->states[s].context, CTX_NEWLINE))
for (j = 0; j < CHARCLASS_WORDS; ++j)
- matches[j] &= ~newline[j];
+ matches[j] &= ~d->syntax.newline[j];
if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
d->states[s].context, CTX_LETTER))
for (j = 0; j < CHARCLASS_WORDS; ++j)
- matches[j] &= ~letters[j];
+ matches[j] &= ~d->syntax.letters[j];
if (!SUCCEEDS_IN_CONTEXT (pos.constraint,
d->states[s].context, CTX_NONE))
for (j = 0; j < CHARCLASS_WORDS; ++j)
- matches[j] &= letters[j] | newline[j];
+ matches[j] &= d->syntax.letters[j] | d->syntax.newline[j];
/* If there are no characters left, there's no point in going on. */
for (j = 0; j < CHARCLASS_WORDS && !matches[j]; ++j)
@@ -2791,7 +2839,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
for (i = 0; i < NOTCHAR; ++i)
trans[i] = unibyte_word_constituent (i) ? state_letter : state;
- trans[eolbyte] = state_newline;
+ trans[d->syntax.eolbyte] = state_newline;
}
else
for (i = 0; i < NOTCHAR; ++i)
@@ -2847,7 +2895,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
}
/* Find out if the new state will want any context information. */
- possible_contexts = charclass_context (labels[i]);
+ possible_contexts = charclass_context (d, labels[i]);
separate_contexts = state_separate_contexts (&follows);
/* Find the state(s) corresponding to the union of the follows. */
@@ -2894,7 +2942,7 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
{
int c = j * CHARCLASS_WORD_BITS + k;
- if (c == eolbyte)
+ if (c == d->syntax.eolbyte)
trans[c] = state_newline;
else if (unibyte_word_constituent (c))
trans[c] = state_letter;
@@ -3020,8 +3068,8 @@ build_state (state_num s, struct dfa *d)
/* Keep the newline transition in a special place so we can use it as
a sentinel. */
- d->newlines[s] = trans[eolbyte];
- trans[eolbyte] = -1;
+ d->newlines[s] = trans[d->syntax.eolbyte];
+ trans[d->syntax.eolbyte] = -1;
if (ACCEPTING (s, *d))
d->fails[s] = trans;
@@ -3040,7 +3088,7 @@ transit_state_singlebyte (struct dfa *d, state_num s, unsigned char const **pp)
{
state_num *t;
- if (**pp == eolbyte)
+ if (**pp == d->syntax.eolbyte)
{
/* S is always an initial state in transit_state, so the
transition table for the state must have been built already. */
@@ -3083,7 +3131,7 @@ transit_state (struct dfa *d, state_num s, unsigned char const **pp,
size_t i, j;
int mbclen = mbs_to_wchar (&wc, (char const *) *pp, end - *pp, d);
- int context = wc == eolbyte ? CTX_NEWLINE : CTX_NONE;
+ int context = wc == d->syntax.eolbyte ? CTX_NEWLINE : CTX_NONE;
bool context_newline = context == CTX_NEWLINE;
/* This state has some operators which can match a multibyte character. */
@@ -3201,7 +3249,7 @@ skip_remains_mb (struct dfa *d, unsigned char const *p,
unsigned char const *mbp, char const *end, wint_t *wcp)
{
wint_t wc = WEOF;
- if (never_trail[*p])
+ if (d->syntax.never_trail[*p])
return p;
while (mbp < p)
mbp += mbs_to_wchar (&wc, (char const *) mbp,
@@ -3239,7 +3287,7 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl,
unsigned char const *p, *mbp; /* Current input character. */
state_num **trans, *t; /* Copy of d->trans so it can be optimized
into a register. */
- unsigned char eol = eolbyte; /* Likewise for eolbyte. */
+ unsigned char eol = d->syntax.eolbyte; /* Likewise for eolbyte. */
unsigned char saved_end;
size_t nlcount = 0;
@@ -3306,8 +3354,8 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl,
}
if (d->states[s].mbps.nelem == 0 || (*p == eol && !allow_nl)
- || (*p == '\n' && !(syntax_bits & RE_DOT_NEWLINE))
- || (*p == '\0' && (syntax_bits & RE_DOT_NOT_NULL))
+ || (*p == '\n' && !(d->syntax.syntax_bits & RE_DOT_NEWLINE))
+ || (*p == '\0' && (d->syntax.syntax_bits & RE_DOT_NOT_NULL))
|| (char *) p >= end)
{
/* If an input character does not match ANYCHAR, do it
@@ -3370,14 +3418,14 @@ dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl,
}
else if (d->fails[s])
{
- if (d->success[s] & sbit[*p])
+ if (d->success[s] & d->syntax.sbit[*p])
goto done;
s1 = s;
if (!multibyte || d->states[s].mbps.nelem == 0
|| (*p == eol && !allow_nl)
- || (*p == '\n' && !(syntax_bits & RE_DOT_NEWLINE))
- || (*p == '\0' && (syntax_bits & RE_DOT_NOT_NULL))
+ || (*p == '\n' && !(d->syntax.syntax_bits & RE_DOT_NEWLINE))
+ || (*p == '\0' && (d->syntax.syntax_bits & RE_DOT_NOT_NULL))
|| (char *) p >= end)
{
/* If a input character does not match ANYCHAR, do it
@@ -3479,17 +3527,6 @@ free_mbdata (struct dfa *d)
}
}
-/* Initialize the components of a dfa that the other routines don't
- initialize for themselves. */
-static void
-dfainit (struct dfa *d)
-{
- memset (d, 0, sizeof *d);
- d->multibyte = MB_CUR_MAX > 1;
- d->dfaexec = d->multibyte ? dfaexec_mb : dfaexec_sb;
- d->fast = !d->multibyte;
-}
-
/* Return true if every construct in D is supported by this DFA matcher. */
static bool _GL_ATTRIBUTE_PURE
dfa_supported (struct dfa const *d)
@@ -3521,7 +3558,7 @@ dfaoptimize (struct dfa *d)
size_t i;
bool have_backref = false;
- if (!using_utf8 ())
+ if (!using_utf8)
return;
for (i = 0; i < d->tindex; ++i)
@@ -3640,7 +3677,6 @@ dfassbuild (struct dfa *d)
void
dfacomp (char const *s, size_t len, struct dfa *d, bool searchflag)
{
- dfainit (d);
dfaparse (s, len, d);
dfassbuild (d);
@@ -3957,7 +3993,7 @@ dfamust (struct dfa const *d)
size_t rj;
bool need_begline = false;
bool need_endline = false;
- bool case_fold_unibyte = case_fold && MB_CUR_MAX == 1;
+ bool case_fold_unibyte = d->syntax.case_fold && MB_CUR_MAX == 1;
struct dfamust *dm;
for (ri = 0; ri < d->tindex; ++ri)
@@ -4194,7 +4230,20 @@ dfamustfree (struct dfamust *dm)
struct dfa *
dfaalloc (void)
{
- return xmalloc (sizeof (struct dfa));
+ struct dfa *d = xcalloc (1, sizeof (struct dfa));
+ d->multibyte = MB_CUR_MAX > 1;
+ d->dfaexec = d->multibyte ? dfaexec_mb : dfaexec_sb;
+ d->fast = !d->multibyte;
+ d->lexstate.cur_mb_len = 1;
+ return d;
+}
+
+void
+dfa_init (void)
+{
+ check_utf8 ();
+ check_unibyte_c ();
+ init_mbrtowc_cache ();
}
/* vim:set shiftwidth=2: */
diff --git a/dfa.h b/dfa.h
index 242f4cfa..02f56f44 100644
--- a/dfa.h
+++ b/dfa.h
@@ -54,10 +54,10 @@ extern struct dfamust *dfamust (struct dfa const *);
/* Free the storage held by the components of a struct dfamust. */
extern void dfamustfree (struct dfamust *);
-/* dfasyntax() takes three arguments; the first sets the syntax bits described
- earlier in this file, the second sets the case-folding flag, and the
- third specifies the line terminator. */
-extern void dfasyntax (reg_syntax_t, bool, unsigned char);
+/* dfasyntax() takes four arguments; the first is the dfa to operate on, the
+ second sets the syntax bits described earlier in this file, the third sets
+ the case-folding flag, and the fourth specifies the line terminator. */
+extern void dfasyntax (struct dfa *, reg_syntax_t, bool, unsigned char);
/* Compile the given string of the given length into the given struct dfa.
Final argument is a flag specifying whether to build a searching or an
@@ -104,4 +104,7 @@ extern void dfawarn (const char *);
The user must supply a dfaerror. */
extern _Noreturn void dfaerror (const char *);
-extern bool using_utf8 (void);
+extern bool dfa_using_utf8 (void) _GL_ATTRIBUTE_PURE;
+
+/* This must be called before calling any of the above dfa*() functions. */
+extern void dfa_init (void);
diff --git a/doc/ChangeLog b/doc/ChangeLog
index 53d1d346..5bc066f4 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,11 @@
+2016-08-23 Arnold D. Robbins <arnold@skeeve.com>
+
+ * Makefile.am (EXTRA_DIST): Add new file, wordlist.
+ (spell): New target.
+ * wordlist: New file.
+ * gawktexi.in: Fix typos, adjust update date.
+ * awkcard.in: Update copyright years.
+
2016-08-03 Arnold D. Robbins <arnold@skeeve.com>
Remove typed regexes until they can be done properly.
diff --git a/doc/Makefile.am b/doc/Makefile.am
index 5eefaadf..a2015629 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -47,6 +47,7 @@ EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block setter.outline \
lflashlight-small.xpic lflashlight.eps lflashlight.pdf \
rflashlight-small.xpic rflashlight.eps rflashlight.pdf \
statist.jpg statist.eps statist.pdf \
+ wordlist \
bc_notes
# Get rid of generated files when cleaning
@@ -103,3 +104,7 @@ awkcard.nc: $(CARDFILES)
awkcard.pdf: awkcard.ps
ps2pdf awkcard.ps awkcard.pdf
+
+spell:
+ export LC_ALL=C ; spell "$(srcdir)"/gawktexi.in | \
+ sort -u | comm -23 - "$(srcdir)"/wordlist
diff --git a/doc/Makefile.in b/doc/Makefile.in
index 2fa1fac0..9c7bbc2e 100644
--- a/doc/Makefile.in
+++ b/doc/Makefile.in
@@ -373,6 +373,7 @@ EXTRA_DIST = ChangeLog ChangeLog.0 README.card ad.block setter.outline \
lflashlight-small.xpic lflashlight.eps lflashlight.pdf \
rflashlight-small.xpic rflashlight.eps rflashlight.pdf \
statist.jpg statist.eps statist.pdf \
+ wordlist \
bc_notes
@@ -909,6 +910,10 @@ awkcard.nc: $(CARDFILES)
awkcard.pdf: awkcard.ps
ps2pdf awkcard.ps awkcard.pdf
+spell:
+ export LC_ALL=C ; spell "$(srcdir)"/gawktexi.in | \
+ sort -u | comm -23 - "$(srcdir)"/wordlist
+
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:
diff --git a/doc/awkcard.in b/doc/awkcard.in
index 0b377ee5..16e4b19d 100644
--- a/doc/awkcard.in
+++ b/doc/awkcard.in
@@ -1,7 +1,7 @@
.\" AWK Reference Card --- Arnold Robbins, arnold@skeeve.com
.\"
.\" Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-.\" 2003, 2004, 2005, 2007, 2009, 2010, 2011, 2012, 2013, 2014, 2015
+.\" 2003, 2004, 2005, 2007, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016
.\" Free Software Foundation, Inc.
.\"
.\" Permission is granted to make and distribute verbatim copies of
@@ -100,7 +100,7 @@ Brian Kernighan and Michael Brennan who reviewed it.
\*(CD
.SL
.nf
-\*(FRCopyright \(co 1996\(en2005, 2007, 2009\(en2014
+\*(FRCopyright \(co 1996\(en2005, 2007, 2009\(en2016
Free Software Foundation, Inc.
.nf
.BT
@@ -1980,7 +1980,7 @@ maintains it.\*(CX
.ES
.fi
\*(CDCopyright \(co 1996\(en2005,
-2007, 2009\(en2014 Free Software Foundation, Inc.
+2007, 2009\(en2016 Free Software Foundation, Inc.
.sp .5
Permission is granted to make and distribute verbatim copies of this
reference card provided the copyright notice and this permission notice
diff --git a/doc/gawk.info b/doc/gawk.info
index cbcf450c..a708ef66 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -13077,7 +13077,7 @@ would see the latter (undesirable) output.
terminal device. On modern systems, this means your keyboard and
screen.
- (2) In private correspondance, Dr. Kernighan has indicated to me that
+ (2) In private correspondence, Dr. Kernighan has indicated to me that
the way this was done was probably a mistake.

@@ -15539,7 +15539,7 @@ anyway, because 'gawk' goes to the next file as soon as an 'ENDFILE'
rule finishes!)
You need to be careful calling 'rewind()'. You can end up causing
-infinite recursion if you don't pay attenion. Here is an example use:
+infinite recursion if you don't pay attention. Here is an example use:
$ cat data
-| a
@@ -30781,7 +30781,7 @@ Reference Counts
assumed by a variable is used in more than one place, only one copy
of the value itself is kept, and the associated reference count is
increased when the same value is used by an additional variable,
- and decresed when the related variable is no longer in use. When
+ and decreased when the related variable is no longer in use. When
the reference count goes to zero, the memory space used to store
the value of the variable is freed.
@@ -33326,13 +33326,13 @@ Index
* elements of arrays: Reference to Elements.
(line 6)
* email address for bug reports, bug-gawk@gnu.org: Bugs. (line 30)
-* EMISTERED: TCP/IP Networking. (line 6)
* empty array elements: Reference to Elements.
(line 18)
* empty pattern: Empty. (line 6)
* empty strings: awk split records. (line 114)
* empty strings, See null strings: Regexp Field Splitting.
(line 43)
+* EMRED: TCP/IP Networking. (line 6)
* enable breakpoint: Breakpoint Control. (line 73)
* enable debugger command: Breakpoint Control. (line 73)
* end debugger command: Debugger Execution Control.
@@ -35554,274 +35554,274 @@ Node: Shell Quoting641557
Node: Data File Management642958
Node: Filetrans Function643590
Node: Rewind Function647686
-Node: File Checking649591
-Ref: File Checking-Footnote-1650925
-Node: Empty Files651126
-Node: Ignoring Assigns653105
-Node: Getopt Function654655
-Ref: Getopt Function-Footnote-1666124
-Node: Passwd Functions666324
-Ref: Passwd Functions-Footnote-1675163
-Node: Group Functions675251
-Ref: Group Functions-Footnote-1683148
-Node: Walking Arrays683355
-Node: Library Functions Summary686363
-Node: Library Exercises687769
-Node: Sample Programs688234
-Node: Running Examples689004
-Node: Clones689732
-Node: Cut Program690956
-Node: Egrep Program700885
-Ref: Egrep Program-Footnote-1708397
-Node: Id Program708507
-Node: Split Program712187
-Ref: Split Program-Footnote-1715646
-Node: Tee Program715775
-Node: Uniq Program718565
-Node: Wc Program725991
-Ref: Wc Program-Footnote-1730246
-Node: Miscellaneous Programs730340
-Node: Dupword Program731553
-Node: Alarm Program733583
-Node: Translate Program738438
-Ref: Translate Program-Footnote-1743003
-Node: Labels Program743273
-Ref: Labels Program-Footnote-1746624
-Node: Word Sorting746708
-Node: History Sorting750780
-Node: Extract Program752615
-Node: Simple Sed760144
-Node: Igawk Program763218
-Ref: Igawk Program-Footnote-1777549
-Ref: Igawk Program-Footnote-2777751
-Ref: Igawk Program-Footnote-3777873
-Node: Anagram Program777988
-Node: Signature Program781050
-Node: Programs Summary782297
-Node: Programs Exercises783511
-Ref: Programs Exercises-Footnote-1787640
-Node: Advanced Features787731
-Node: Nondecimal Data789721
-Node: Array Sorting791312
-Node: Controlling Array Traversal792012
-Ref: Controlling Array Traversal-Footnote-1800379
-Node: Array Sorting Functions800497
-Ref: Array Sorting Functions-Footnote-1805588
-Node: Two-way I/O805784
-Ref: Two-way I/O-Footnote-1812334
-Ref: Two-way I/O-Footnote-2812521
-Node: TCP/IP Networking812603
-Node: Profiling815721
-Ref: Profiling-Footnote-1824214
-Node: Advanced Features Summary824537
-Node: Internationalization826381
-Node: I18N and L10N827861
-Node: Explaining gettext828548
-Ref: Explaining gettext-Footnote-1834440
-Ref: Explaining gettext-Footnote-2834625
-Node: Programmer i18n834790
-Ref: Programmer i18n-Footnote-1839645
-Node: Translator i18n839694
-Node: String Extraction840488
-Ref: String Extraction-Footnote-1841620
-Node: Printf Ordering841706
-Ref: Printf Ordering-Footnote-1844492
-Node: I18N Portability844556
-Ref: I18N Portability-Footnote-1847012
-Node: I18N Example847075
-Ref: I18N Example-Footnote-1849881
-Node: Gawk I18N849954
-Node: I18N Summary850599
-Node: Debugger851940
-Node: Debugging852962
-Node: Debugging Concepts853403
-Node: Debugging Terms855212
-Node: Awk Debugging857787
-Node: Sample Debugging Session858693
-Node: Debugger Invocation859227
-Node: Finding The Bug860613
-Node: List of Debugger Commands867091
-Node: Breakpoint Control868424
-Node: Debugger Execution Control872118
-Node: Viewing And Changing Data875480
-Node: Execution Stack878854
-Node: Debugger Info880491
-Node: Miscellaneous Debugger Commands884562
-Node: Readline Support889650
-Node: Limitations890546
-Ref: Limitations-Footnote-1894777
-Node: Debugging Summary894828
-Node: Arbitrary Precision Arithmetic896107
-Node: Computer Arithmetic897523
-Ref: table-numeric-ranges901114
-Ref: Computer Arithmetic-Footnote-1901836
-Node: Math Definitions901893
-Ref: table-ieee-formats905207
-Ref: Math Definitions-Footnote-1905810
-Node: MPFR features905915
-Node: FP Math Caution907632
-Ref: FP Math Caution-Footnote-1908704
-Node: Inexactness of computations909073
-Node: Inexact representation910033
-Node: Comparing FP Values911393
-Node: Errors accumulate912475
-Node: Getting Accuracy913908
-Node: Try To Round916618
-Node: Setting precision917517
-Ref: table-predefined-precision-strings918214
-Node: Setting the rounding mode920044
-Ref: table-gawk-rounding-modes920418
-Ref: Setting the rounding mode-Footnote-1923826
-Node: Arbitrary Precision Integers924005
-Ref: Arbitrary Precision Integers-Footnote-1928922
-Node: POSIX Floating Point Problems929071
-Ref: POSIX Floating Point Problems-Footnote-1932953
-Node: Floating point summary932991
-Node: Dynamic Extensions935181
-Node: Extension Intro936734
-Node: Plugin License938000
-Node: Extension Mechanism Outline938797
-Ref: figure-load-extension939236
-Ref: figure-register-new-function940801
-Ref: figure-call-new-function941893
-Node: Extension API Description943955
-Node: Extension API Functions Introduction945487
-Node: General Data Types950346
-Ref: General Data Types-Footnote-1956301
-Node: Memory Allocation Functions956600
-Ref: Memory Allocation Functions-Footnote-1959445
-Node: Constructor Functions959544
-Node: Registration Functions961289
-Node: Extension Functions961974
-Node: Exit Callback Functions964597
-Node: Extension Version String965847
-Node: Input Parsers966510
-Node: Output Wrappers976392
-Node: Two-way processors980904
-Node: Printing Messages983169
-Ref: Printing Messages-Footnote-1984340
-Node: Updating ERRNO984493
-Node: Requesting Values985232
-Ref: table-value-types-returned985969
-Node: Accessing Parameters986852
-Node: Symbol Table Access988087
-Node: Symbol table by name988599
-Node: Symbol table by cookie990620
-Ref: Symbol table by cookie-Footnote-1994772
-Node: Cached values994836
-Ref: Cached values-Footnote-1998343
-Node: Array Manipulation998434
-Ref: Array Manipulation-Footnote-1999525
-Node: Array Data Types999562
-Ref: Array Data Types-Footnote-11002220
-Node: Array Functions1002312
-Node: Flattening Arrays1006170
-Node: Creating Arrays1013078
-Node: Redirection API1017847
-Node: Extension API Variables1020678
-Node: Extension Versioning1021311
-Ref: gawk-api-version1021748
-Node: Extension API Informational Variables1023504
-Node: Extension API Boilerplate1024568
-Node: Finding Extensions1028382
-Node: Extension Example1028941
-Node: Internal File Description1029739
-Node: Internal File Ops1033819
-Ref: Internal File Ops-Footnote-11045581
-Node: Using Internal File Ops1045721
-Ref: Using Internal File Ops-Footnote-11048104
-Node: Extension Samples1048378
-Node: Extension Sample File Functions1049907
-Node: Extension Sample Fnmatch1057556
-Node: Extension Sample Fork1059043
-Node: Extension Sample Inplace1060261
-Node: Extension Sample Ord1063471
-Node: Extension Sample Readdir1064307
-Ref: table-readdir-file-types1065196
-Node: Extension Sample Revout1066001
-Node: Extension Sample Rev2way1066590
-Node: Extension Sample Read write array1067330
-Node: Extension Sample Readfile1069272
-Node: Extension Sample Time1070367
-Node: Extension Sample API Tests1071715
-Node: gawkextlib1072207
-Node: Extension summary1074654
-Node: Extension Exercises1078356
-Node: Language History1079854
-Node: V7/SVR3.11081510
-Node: SVR41083662
-Node: POSIX1085096
-Node: BTL1086475
-Node: POSIX/GNU1087204
-Node: Feature History1093066
-Node: Common Extensions1107436
-Node: Ranges and Locales1108719
-Ref: Ranges and Locales-Footnote-11113335
-Ref: Ranges and Locales-Footnote-21113362
-Ref: Ranges and Locales-Footnote-31113597
-Node: Contributors1113818
-Node: History summary1119378
-Node: Installation1120758
-Node: Gawk Distribution1121702
-Node: Getting1122186
-Node: Extracting1123147
-Node: Distribution contents1124785
-Node: Unix Installation1130879
-Node: Quick Installation1131561
-Node: Shell Startup Files1133975
-Node: Additional Configuration Options1135053
-Node: Configuration Philosophy1136858
-Node: Non-Unix Installation1139227
-Node: PC Installation1139685
-Node: PC Binary Installation1141005
-Node: PC Compiling1142857
-Ref: PC Compiling-Footnote-11145651
-Node: PC Testing1145760
-Node: PC Using1146940
-Ref: PC Using-Footnote-11151093
-Node: Cygwin1151166
-Node: MSYS1151936
-Node: VMS Installation1152437
-Node: VMS Compilation1153228
-Ref: VMS Compilation-Footnote-11154457
-Node: VMS Dynamic Extensions1154515
-Node: VMS Installation Details1156200
-Node: VMS Running1158453
-Node: VMS GNV1162732
-Node: VMS Old Gawk1163467
-Node: Bugs1163938
-Node: Other Versions1168253
-Node: Installation summary1174837
-Node: Notes1175888
-Node: Compatibility Mode1176753
-Node: Additions1177535
-Node: Accessing The Source1178460
-Node: Adding Code1179895
-Node: New Ports1186114
-Node: Derived Files1190602
-Ref: Derived Files-Footnote-11196087
-Ref: Derived Files-Footnote-21196122
-Ref: Derived Files-Footnote-31196720
-Node: Future Extensions1196834
-Node: Implementation Limitations1197492
-Node: Extension Design1198675
-Node: Old Extension Problems1199829
-Ref: Old Extension Problems-Footnote-11201347
-Node: Extension New Mechanism Goals1201404
-Ref: Extension New Mechanism Goals-Footnote-11204768
-Node: Extension Other Design Decisions1204957
-Node: Extension Future Growth1207070
-Node: Old Extension Mechanism1207906
-Node: Notes summary1209669
-Node: Basic Concepts1210851
-Node: Basic High Level1211532
-Ref: figure-general-flow1211814
-Ref: figure-process-flow1212499
-Ref: Basic High Level-Footnote-11215800
-Node: Basic Data Typing1215985
-Node: Glossary1219313
-Node: Copying1251259
-Node: GNU Free Documentation License1288798
-Node: Index1313916
+Node: File Checking649592
+Ref: File Checking-Footnote-1650926
+Node: Empty Files651127
+Node: Ignoring Assigns653106
+Node: Getopt Function654656
+Ref: Getopt Function-Footnote-1666125
+Node: Passwd Functions666325
+Ref: Passwd Functions-Footnote-1675164
+Node: Group Functions675252
+Ref: Group Functions-Footnote-1683149
+Node: Walking Arrays683356
+Node: Library Functions Summary686364
+Node: Library Exercises687770
+Node: Sample Programs688235
+Node: Running Examples689005
+Node: Clones689733
+Node: Cut Program690957
+Node: Egrep Program700886
+Ref: Egrep Program-Footnote-1708398
+Node: Id Program708508
+Node: Split Program712188
+Ref: Split Program-Footnote-1715647
+Node: Tee Program715776
+Node: Uniq Program718566
+Node: Wc Program725992
+Ref: Wc Program-Footnote-1730247
+Node: Miscellaneous Programs730341
+Node: Dupword Program731554
+Node: Alarm Program733584
+Node: Translate Program738439
+Ref: Translate Program-Footnote-1743004
+Node: Labels Program743274
+Ref: Labels Program-Footnote-1746625
+Node: Word Sorting746709
+Node: History Sorting750781
+Node: Extract Program752616
+Node: Simple Sed760145
+Node: Igawk Program763219
+Ref: Igawk Program-Footnote-1777550
+Ref: Igawk Program-Footnote-2777752
+Ref: Igawk Program-Footnote-3777874
+Node: Anagram Program777989
+Node: Signature Program781051
+Node: Programs Summary782298
+Node: Programs Exercises783512
+Ref: Programs Exercises-Footnote-1787641
+Node: Advanced Features787732
+Node: Nondecimal Data789722
+Node: Array Sorting791313
+Node: Controlling Array Traversal792013
+Ref: Controlling Array Traversal-Footnote-1800380
+Node: Array Sorting Functions800498
+Ref: Array Sorting Functions-Footnote-1805589
+Node: Two-way I/O805785
+Ref: Two-way I/O-Footnote-1812335
+Ref: Two-way I/O-Footnote-2812522
+Node: TCP/IP Networking812604
+Node: Profiling815722
+Ref: Profiling-Footnote-1824215
+Node: Advanced Features Summary824538
+Node: Internationalization826382
+Node: I18N and L10N827862
+Node: Explaining gettext828549
+Ref: Explaining gettext-Footnote-1834441
+Ref: Explaining gettext-Footnote-2834626
+Node: Programmer i18n834791
+Ref: Programmer i18n-Footnote-1839646
+Node: Translator i18n839695
+Node: String Extraction840489
+Ref: String Extraction-Footnote-1841621
+Node: Printf Ordering841707
+Ref: Printf Ordering-Footnote-1844493
+Node: I18N Portability844557
+Ref: I18N Portability-Footnote-1847013
+Node: I18N Example847076
+Ref: I18N Example-Footnote-1849882
+Node: Gawk I18N849955
+Node: I18N Summary850600
+Node: Debugger851941
+Node: Debugging852963
+Node: Debugging Concepts853404
+Node: Debugging Terms855213
+Node: Awk Debugging857788
+Node: Sample Debugging Session858694
+Node: Debugger Invocation859228
+Node: Finding The Bug860614
+Node: List of Debugger Commands867092
+Node: Breakpoint Control868425
+Node: Debugger Execution Control872119
+Node: Viewing And Changing Data875481
+Node: Execution Stack878855
+Node: Debugger Info880492
+Node: Miscellaneous Debugger Commands884563
+Node: Readline Support889651
+Node: Limitations890547
+Ref: Limitations-Footnote-1894778
+Node: Debugging Summary894829
+Node: Arbitrary Precision Arithmetic896108
+Node: Computer Arithmetic897524
+Ref: table-numeric-ranges901115
+Ref: Computer Arithmetic-Footnote-1901837
+Node: Math Definitions901894
+Ref: table-ieee-formats905208
+Ref: Math Definitions-Footnote-1905811
+Node: MPFR features905916
+Node: FP Math Caution907633
+Ref: FP Math Caution-Footnote-1908705
+Node: Inexactness of computations909074
+Node: Inexact representation910034
+Node: Comparing FP Values911394
+Node: Errors accumulate912476
+Node: Getting Accuracy913909
+Node: Try To Round916619
+Node: Setting precision917518
+Ref: table-predefined-precision-strings918215
+Node: Setting the rounding mode920045
+Ref: table-gawk-rounding-modes920419
+Ref: Setting the rounding mode-Footnote-1923827
+Node: Arbitrary Precision Integers924006
+Ref: Arbitrary Precision Integers-Footnote-1928923
+Node: POSIX Floating Point Problems929072
+Ref: POSIX Floating Point Problems-Footnote-1932954
+Node: Floating point summary932992
+Node: Dynamic Extensions935182
+Node: Extension Intro936735
+Node: Plugin License938001
+Node: Extension Mechanism Outline938798
+Ref: figure-load-extension939237
+Ref: figure-register-new-function940802
+Ref: figure-call-new-function941894
+Node: Extension API Description943956
+Node: Extension API Functions Introduction945488
+Node: General Data Types950347
+Ref: General Data Types-Footnote-1956302
+Node: Memory Allocation Functions956601
+Ref: Memory Allocation Functions-Footnote-1959446
+Node: Constructor Functions959545
+Node: Registration Functions961290
+Node: Extension Functions961975
+Node: Exit Callback Functions964598
+Node: Extension Version String965848
+Node: Input Parsers966511
+Node: Output Wrappers976393
+Node: Two-way processors980905
+Node: Printing Messages983170
+Ref: Printing Messages-Footnote-1984341
+Node: Updating ERRNO984494
+Node: Requesting Values985233
+Ref: table-value-types-returned985970
+Node: Accessing Parameters986853
+Node: Symbol Table Access988088
+Node: Symbol table by name988600
+Node: Symbol table by cookie990621
+Ref: Symbol table by cookie-Footnote-1994773
+Node: Cached values994837
+Ref: Cached values-Footnote-1998344
+Node: Array Manipulation998435
+Ref: Array Manipulation-Footnote-1999526
+Node: Array Data Types999563
+Ref: Array Data Types-Footnote-11002221
+Node: Array Functions1002313
+Node: Flattening Arrays1006171
+Node: Creating Arrays1013079
+Node: Redirection API1017848
+Node: Extension API Variables1020679
+Node: Extension Versioning1021312
+Ref: gawk-api-version1021749
+Node: Extension API Informational Variables1023505
+Node: Extension API Boilerplate1024569
+Node: Finding Extensions1028383
+Node: Extension Example1028942
+Node: Internal File Description1029740
+Node: Internal File Ops1033820
+Ref: Internal File Ops-Footnote-11045582
+Node: Using Internal File Ops1045722
+Ref: Using Internal File Ops-Footnote-11048105
+Node: Extension Samples1048379
+Node: Extension Sample File Functions1049908
+Node: Extension Sample Fnmatch1057557
+Node: Extension Sample Fork1059044
+Node: Extension Sample Inplace1060262
+Node: Extension Sample Ord1063472
+Node: Extension Sample Readdir1064308
+Ref: table-readdir-file-types1065197
+Node: Extension Sample Revout1066002
+Node: Extension Sample Rev2way1066591
+Node: Extension Sample Read write array1067331
+Node: Extension Sample Readfile1069273
+Node: Extension Sample Time1070368
+Node: Extension Sample API Tests1071716
+Node: gawkextlib1072208
+Node: Extension summary1074655
+Node: Extension Exercises1078357
+Node: Language History1079855
+Node: V7/SVR3.11081511
+Node: SVR41083663
+Node: POSIX1085097
+Node: BTL1086476
+Node: POSIX/GNU1087205
+Node: Feature History1093067
+Node: Common Extensions1107437
+Node: Ranges and Locales1108720
+Ref: Ranges and Locales-Footnote-11113336
+Ref: Ranges and Locales-Footnote-21113363
+Ref: Ranges and Locales-Footnote-31113598
+Node: Contributors1113819
+Node: History summary1119379
+Node: Installation1120759
+Node: Gawk Distribution1121703
+Node: Getting1122187
+Node: Extracting1123148
+Node: Distribution contents1124786
+Node: Unix Installation1130880
+Node: Quick Installation1131562
+Node: Shell Startup Files1133976
+Node: Additional Configuration Options1135054
+Node: Configuration Philosophy1136859
+Node: Non-Unix Installation1139228
+Node: PC Installation1139686
+Node: PC Binary Installation1141006
+Node: PC Compiling1142858
+Ref: PC Compiling-Footnote-11145652
+Node: PC Testing1145761
+Node: PC Using1146941
+Ref: PC Using-Footnote-11151094
+Node: Cygwin1151167
+Node: MSYS1151937
+Node: VMS Installation1152438
+Node: VMS Compilation1153229
+Ref: VMS Compilation-Footnote-11154458
+Node: VMS Dynamic Extensions1154516
+Node: VMS Installation Details1156201
+Node: VMS Running1158454
+Node: VMS GNV1162733
+Node: VMS Old Gawk1163468
+Node: Bugs1163939
+Node: Other Versions1168254
+Node: Installation summary1174838
+Node: Notes1175889
+Node: Compatibility Mode1176754
+Node: Additions1177536
+Node: Accessing The Source1178461
+Node: Adding Code1179896
+Node: New Ports1186115
+Node: Derived Files1190603
+Ref: Derived Files-Footnote-11196088
+Ref: Derived Files-Footnote-21196123
+Ref: Derived Files-Footnote-31196721
+Node: Future Extensions1196835
+Node: Implementation Limitations1197493
+Node: Extension Design1198676
+Node: Old Extension Problems1199830
+Ref: Old Extension Problems-Footnote-11201348
+Node: Extension New Mechanism Goals1201405
+Ref: Extension New Mechanism Goals-Footnote-11204769
+Node: Extension Other Design Decisions1204958
+Node: Extension Future Growth1207071
+Node: Old Extension Mechanism1207907
+Node: Notes summary1209670
+Node: Basic Concepts1210852
+Node: Basic High Level1211533
+Ref: figure-general-flow1211815
+Ref: figure-process-flow1212500
+Ref: Basic High Level-Footnote-11215801
+Node: Basic Data Typing1215986
+Node: Glossary1219314
+Node: Copying1251261
+Node: GNU Free Documentation License1288800
+Node: Index1313918

End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index 5e40e86a..2dfd067b 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -56,7 +56,7 @@
@c applies to and all the info about who's publishing this edition
@c These apply across the board.
-@set UPDATE-MONTH June, 2016
+@set UPDATE-MONTH August, 2016
@set VERSION 4.1
@set PATCHLEVEL 4
@@ -18648,7 +18648,7 @@ signal (bit 7) and if so, the guilty signal number (bits 0--6).
Traditionally, @command{awk}'s @code{system()} function has simply
returned the exit status value divided by 256. In the normal case this
gives the exit status but in the case of death-by-signal it yields
-a fractional floating-point value.@footnote{In private correspondance,
+a fractional floating-point value.@footnote{In private correspondence,
Dr.@: Kernighan has indicated to me that the way this was done
was probably a mistake.} POSIX states that @command{awk}'s
@code{system()} should return the full 16-bit value.
@@ -22009,7 +22009,7 @@ Because of this, you should not call it from an @code{ENDFILE} rule.
file as soon as an @code{ENDFILE} rule finishes!)
You need to be careful calling @code{rewind()}. You can end up
-causing infinite recursion if you don't pay attenion. Here is an
+causing infinite recursion if you don't pay attention. Here is an
example use:
@example
@@ -27891,7 +27891,7 @@ programming and knowledge of the behavior of the coprocess are required.
@cindex files, @code{/inet4/@dots{}} (@command{gawk})
@cindex @code{/inet6/@dots{}} special files (@command{gawk})
@cindex files, @code{/inet6/@dots{}} (@command{gawk})
-@cindex @code{EMISTERED}
+@cindex @code{EMRED}
@ifnotdocbook
@quotation
@code{EMRED}:@*
@@ -27906,7 +27906,7 @@ programming and knowledge of the behavior of the coprocess are required.
@docbook
<blockquote>
<attribution>Mike O'Brien (aka Mr.&nbsp;Protocol)</attribution>
-<literallayout class="normal"><literal>EMISTERED</literal>:
+<literallayout class="normal"><literal>EMRED</literal>:
&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>A host is a host from coast to coast,</emphasis>
&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>and no-one can talk to host that's close,</emphasis>
&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>unless the host that isn't close</emphasis>
@@ -41417,7 +41417,7 @@ An internal mechanism in @command{gawk} to minimize the amount of memory
needed to store the value of string variables. If the value assumed by
a variable is used in more than one place, only one copy of the value
itself is kept, and the associated reference count is increased when the
-same value is used by an additional variable, and decresed when the related
+same value is used by an additional variable, and decreased when the related
variable is no longer in use. When the reference count goes to zero,
the memory space used to store the value of the variable is freed.
diff --git a/doc/gawktexi.in b/doc/gawktexi.in
index c5b8999e..513bb80d 100644
--- a/doc/gawktexi.in
+++ b/doc/gawktexi.in
@@ -51,7 +51,7 @@
@c applies to and all the info about who's publishing this edition
@c These apply across the board.
-@set UPDATE-MONTH June, 2016
+@set UPDATE-MONTH August, 2016
@set VERSION 4.1
@set PATCHLEVEL 4
@@ -17831,7 +17831,7 @@ signal (bit 7) and if so, the guilty signal number (bits 0--6).
Traditionally, @command{awk}'s @code{system()} function has simply
returned the exit status value divided by 256. In the normal case this
gives the exit status but in the case of death-by-signal it yields
-a fractional floating-point value.@footnote{In private correspondance,
+a fractional floating-point value.@footnote{In private correspondence,
Dr.@: Kernighan has indicated to me that the way this was done
was probably a mistake.} POSIX states that @command{awk}'s
@code{system()} should return the full 16-bit value.
@@ -21091,7 +21091,7 @@ Because of this, you should not call it from an @code{ENDFILE} rule.
file as soon as an @code{ENDFILE} rule finishes!)
You need to be careful calling @code{rewind()}. You can end up
-causing infinite recursion if you don't pay attenion. Here is an
+causing infinite recursion if you don't pay attention. Here is an
example use:
@example
@@ -26973,7 +26973,7 @@ programming and knowledge of the behavior of the coprocess are required.
@cindex files, @code{/inet4/@dots{}} (@command{gawk})
@cindex @code{/inet6/@dots{}} special files (@command{gawk})
@cindex files, @code{/inet6/@dots{}} (@command{gawk})
-@cindex @code{EMISTERED}
+@cindex @code{EMRED}
@ifnotdocbook
@quotation
@code{EMRED}:@*
@@ -26988,7 +26988,7 @@ programming and knowledge of the behavior of the coprocess are required.
@docbook
<blockquote>
<attribution>Mike O'Brien (aka Mr.&nbsp;Protocol)</attribution>
-<literallayout class="normal"><literal>EMISTERED</literal>:
+<literallayout class="normal"><literal>EMRED</literal>:
&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>A host is a host from coast to coast,</emphasis>
&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>and no-one can talk to host that's close,</emphasis>
&nbsp;&nbsp;&nbsp;&nbsp;<emphasis>unless the host that isn't close</emphasis>
@@ -40499,7 +40499,7 @@ An internal mechanism in @command{gawk} to minimize the amount of memory
needed to store the value of string variables. If the value assumed by
a variable is used in more than one place, only one copy of the value
itself is kept, and the associated reference count is increased when the
-same value is used by an additional variable, and decresed when the related
+same value is used by an additional variable, and decreased when the related
variable is no longer in use. When the reference count goes to zero,
the memory space used to store the value of the variable is freed.
diff --git a/node.c b/node.c
index bb2fe437..c6c9af81 100644
--- a/node.c
+++ b/node.c
@@ -752,7 +752,7 @@ str2wstr(NODE *n, size_t **ptr)
* stopping early. This is particularly important
* for match() where we need to build the indices.
*/
- if (using_utf8()) {
+ if (dfa_using_utf8()) {
count = 1;
wc = 0xFFFD; /* unicode replacement character */
goto set_wc;
diff --git a/po/gawk.pot b/po/gawk.pot
index 609f90d3..e17f194e 100644
--- a/po/gawk.pot
+++ b/po/gawk.pot
@@ -6,9 +6,9 @@
#, fuzzy
msgid ""
msgstr ""
-"Project-Id-Version: gawk 4.1.3i\n"
+"Project-Id-Version: gawk 4.1.3j\n"
"Report-Msgid-Bugs-To: bug-gawk@gnu.org\n"
-"POT-Creation-Date: 2016-08-01 22:56+0300\n"
+"POT-Creation-Date: 2016-08-23 05:55+0300\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
@@ -1798,40 +1798,40 @@ msgstr ""
msgid "No symbol `%s' in current context"
msgstr ""
-#: dfa.c:1029 dfa.c:1032 dfa.c:1051 dfa.c:1061 dfa.c:1073 dfa.c:1100 dfa.c:1109
-#: dfa.c:1112 dfa.c:1117 dfa.c:1138 dfa.c:1141
+#: dfa.c:1090 dfa.c:1093 dfa.c:1112 dfa.c:1122 dfa.c:1135 dfa.c:1163 dfa.c:1172
+#: dfa.c:1175 dfa.c:1180 dfa.c:1202 dfa.c:1205
msgid "unbalanced ["
msgstr ""
-#: dfa.c:1085
+#: dfa.c:1148
msgid "invalid character class"
msgstr ""
-#: dfa.c:1207
+#: dfa.c:1271
msgid "character class syntax is [[:space:]], not [:space:]"
msgstr ""
-#: dfa.c:1269
+#: dfa.c:1332
msgid "unfinished \\ escape"
msgstr ""
-#: dfa.c:1416
+#: dfa.c:1499
msgid "invalid content of \\{\\}"
msgstr ""
-#: dfa.c:1419
+#: dfa.c:1502
msgid "regular expression too big"
msgstr ""
-#: dfa.c:1835
+#: dfa.c:1916
msgid "unbalanced ("
msgstr ""
-#: dfa.c:1961
+#: dfa.c:2044
msgid "no syntax specified"
msgstr ""
-#: dfa.c:1969
+#: dfa.c:2052
msgid "unbalanced )"
msgstr ""
@@ -3327,7 +3327,7 @@ msgstr ""
msgid "redir2str: unknown redirection type %d"
msgstr ""
-#: re.c:607
+#: re.c:610
#, c-format
msgid "regexp component `%.*s' should probably be `[%.*s]'"
msgstr ""
diff --git a/re.c b/re.c
index 593ed166..c7899694 100644
--- a/re.c
+++ b/re.c
@@ -203,14 +203,11 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
syn &= ~RE_ICASE;
}
- /* only call dfasyntax if we're using dfa; saves time */
- if (dfa && ! no_dfa) {
- dfa_syn = syn;
- /* FIXME: dfa doesn't pay attention RE_ICASE */
- if (ignorecase)
- dfa_syn |= RE_ICASE;
- dfasyntax(dfa_syn, ignorecase, '\n');
- }
+ dfa_syn = syn;
+ /* FIXME: dfa doesn't pay attention RE_ICASE */
+ if (ignorecase)
+ dfa_syn |= RE_ICASE;
+
re_set_syntax(syn);
if ((rerr = re_compile_pattern(buf, len, &(rp->pat))) != NULL) {
@@ -228,6 +225,7 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool dfa, bool canfatal)
if (dfa && ! no_dfa) {
rp->dfa = true;
rp->dfareg = dfaalloc();
+ dfasyntax(rp->dfareg, dfa_syn, ignorecase, '\n');
dfacomp(buf, len, rp->dfareg, true);
} else
rp->dfa = false;
@@ -423,7 +421,8 @@ resetup()
syn |= RE_INTERVALS | RE_INVALID_INTERVAL_ORD | RE_NO_BK_BRACES;
(void) re_set_syntax(syn);
- dfasyntax(syn, false, '\n');
+
+ dfa_init();
}
/* avoid_dfa --- return true if we should not use the DFA matcher */