summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2019-12-17 21:57:54 -0800
committerPaul Eggert <eggert@cs.ucla.edu>2019-12-17 22:03:37 -0800
commitc9a6e4bf919e1b28970e11b29aa720a7e6144834 (patch)
tree198ab266ef09b0102695eded977f9334b038bf25
parentfdd45db167c9e553f8c667fa1db0f96d8ae8eab4 (diff)
downloadgrep-c9a6e4bf919e1b28970e11b29aa720a7e6144834.tar.gz
grep: do not match invalid UTF-8
Update Gnulib to latest. Also: * src/dfasearch.c (EGexecute): Use ptrdiff_t, not size_t, to match new Gnulib API. * tests/Makefile.am (TESTS): Add dfa-invalid-utf8. * tests/dfa-invalid-utf8: New file.
-rw-r--r--NEWS5
m---------gnulib0
-rw-r--r--src/dfasearch.c2
-rw-r--r--tests/Makefile.am1
-rwxr-xr-xtests/dfa-invalid-utf829
5 files changed, 35 insertions, 2 deletions
diff --git a/NEWS b/NEWS
index b106e2f9..b6ff57c1 100644
--- a/NEWS
+++ b/NEWS
@@ -9,7 +9,10 @@ GNU grep NEWS -*- outline -*-
** Bug fixes
- grep -Fw can no longer false match in non-UTF8 multibyte locales
+ '.' no longer matches some invalid byte sequences in UTF-8 locales.
+ [bug introduced in grep 2.7]
+
+ grep -Fw can no longer false match in non-UTF-8 multibyte locales
For example, this command would erroneously print its input line:
echo ab | LC_CTYPE=ja_JP.eucjp grep -Fw b
[Bug#38223 introduced in grep 2.28]
diff --git a/gnulib b/gnulib
-Subproject b7bf9f4361c8d78ccfda7a30ff31f7a406ea972
+Subproject 1219c343014ede881069bab554408b40e5455d9
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 6c95d8cb..153281df 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -234,7 +234,7 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size,
if (!start_ptr)
{
char const *next_beg, *dfa_beg = beg;
- size_t count = 0;
+ ptrdiff_t count = 0;
bool exact_kwset_match = false;
bool backref = false;
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 82aebbf9..dee6f46e 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -86,6 +86,7 @@ TESTS = \
dfa-coverage \
dfa-heap-overrun \
dfa-infloop \
+ dfa-invalid-utf8 \
dfaexec-multibyte \
empty \
empty-line \
diff --git a/tests/dfa-invalid-utf8 b/tests/dfa-invalid-utf8
new file mode 100755
index 00000000..17480432
--- /dev/null
+++ b/tests/dfa-invalid-utf8
@@ -0,0 +1,29 @@
+#! /bin/sh
+# Test whether "grep '.'" matches invalid UTF-8 byte sequences.
+#
+# Copyright 2019 Free Software Foundation, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+require_en_utf8_locale_
+require_compiled_in_MB_support
+
+fail=0
+
+printf 'a\360\202\202\254b\n' >in1 || framework_failure_
+LC_ALL=en_US.UTF-8 grep 'a.b' in1 > out1 2> err
+test $? -eq 1 || fail=1
+compare /dev/null out1 || fail=1
+compare /dev/null err1 || fail=1
+
+printf 'a\360\202\202\254ba\360\202\202\254b\n' >in2 ||
+ framework_failure_
+LC_ALL=en_US.UTF-8 grep -E '(a.b)\1' in2 > out2 2> err
+test $? -eq 1 || fail=1
+compare /dev/null out2 || fail=1
+compare /dev/null err2 || fail=1
+
+Exit $fail