summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2017-03-23 21:28:21 +0200
committerArnold D. Robbins <arnold@skeeve.com>2017-03-23 21:28:21 +0200
commit10c17fe33a10746bd44fc4b62f764ca4c3f3c5a6 (patch)
tree71df7b516b649aeeff993b04583eee0a5da3902a
parent41915dfdb8cd59fcdf56c8f74b7d6e62448eadfa (diff)
downloadgawk-10c17fe33a10746bd44fc4b62f764ca4c3f3c5a6.tar.gz
Sync dfa.c with GNULIB.
-rw-r--r--support/ChangeLog4
-rw-r--r--support/dfa.c35
2 files changed, 16 insertions, 23 deletions
diff --git a/support/ChangeLog b/support/ChangeLog
index 027b6db7..1c6015f3 100644
--- a/support/ChangeLog
+++ b/support/ChangeLog
@@ -1,3 +1,7 @@
+2017-03-23 Arnold D. Robbins <arnold@skeeve.com>
+
+ * dfa.c: Sync with GNULIB.
+
2017-01-19 Arnold D. Robbins <arnold@skeeve.com>
* localeinfo.h: Sync with GNULIB.
diff --git a/support/dfa.c b/support/dfa.c
index ad5b3a41..2003ac55 100644
--- a/support/dfa.c
+++ b/support/dfa.c
@@ -581,8 +581,9 @@ struct dfa
bool, size_t *, bool *);
/* The locale is simple, like the C locale. These locales can be
- processed more efficiently, e.g., the relationship between lower-
- and upper-case letters is 1-1. */
+ processed more efficiently, as they are single-byte, their native
+ character set is in collating-sequence order, and they do not
+ have multi-character collating elements. */
bool simple_locale;
/* Other cached information derived from the locale. */
@@ -1059,7 +1060,6 @@ parse_bracket_exp (struct dfa *dfa)
if (invert)
{
c = bracket_fetch_wc (dfa);
- invert = true;
known_bracket_exp = dfa->simple_locale;
}
wint_t wc = dfa->lex.wctok;
@@ -1190,24 +1190,14 @@ parse_bracket_exp (struct dfa *dfa)
/* Treat [x-y] as a range if x != y. */
if (wc != wc2 || wc == WEOF)
{
- if (dfa->localeinfo.multibyte)
- known_bracket_exp = false;
- else if (dfa->simple_locale)
+ if (dfa->simple_locale
+ || (isasciidigit (c) & isasciidigit (c2)))
{
- int ci;
- for (ci = c; ci <= c2; ci++)
- setbit (ci, &ccl);
- if (dfa->syntax.case_fold)
- {
- int uc = toupper (c);
- int uc2 = toupper (c2);
- for (ci = 0; ci < NOTCHAR; ci++)
- {
- int uci = toupper (ci);
- if (uc <= uci && uci <= uc2)
- setbit (ci, &ccl);
- }
- }
+ for (int ci = c; ci <= c2; ci++)
+ if (dfa->syntax.case_fold && isalpha (ci))
+ setbit_case_fold_c (ci, &ccl);
+ else
+ setbit (ci, &ccl);
}
else
known_bracket_exp = false;
@@ -1221,7 +1211,7 @@ parse_bracket_exp (struct dfa *dfa)
if (!dfa->localeinfo.multibyte)
{
- if (dfa->syntax.case_fold)
+ if (dfa->syntax.case_fold && isalpha (c))
setbit_case_fold_c (c, &ccl);
else
setbit (c, &ccl);
@@ -1256,7 +1246,7 @@ parse_bracket_exp (struct dfa *dfa)
if (! known_bracket_exp)
return BACKREF;
- if (dfa->localeinfo.multibyte)
+ if (dfa->localeinfo.multibyte && (invert || dfa->lex.brack.nchars != 0))
{
dfa->lex.brack.invert = invert;
dfa->lex.brack.cset = emptyset (&ccl) ? -1 : charclass_index (dfa, &ccl);
@@ -1265,7 +1255,6 @@ parse_bracket_exp (struct dfa *dfa)
if (invert)
{
- assert (!dfa->localeinfo.multibyte);
notset (&ccl);
if (dfa->syntax.syntax_bits & RE_HAT_LISTS_NOT_NEWLINE)
clrbit ('\n', &ccl);