diff options
author | Paul Eggert <eggert@cs.ucla.edu> | 2019-12-17 21:57:54 -0800 |
---|---|---|
committer | Paul Eggert <eggert@cs.ucla.edu> | 2019-12-17 22:03:37 -0800 |
commit | c9a6e4bf919e1b28970e11b29aa720a7e6144834 (patch) | |
tree | 198ab266ef09b0102695eded977f9334b038bf25 | |
parent | fdd45db167c9e553f8c667fa1db0f96d8ae8eab4 (diff) | |
download | grep-c9a6e4bf919e1b28970e11b29aa720a7e6144834.tar.gz |
grep: do not match invalid UTF-8
Update Gnulib to latest. Also:
* src/dfasearch.c (EGexecute): Use ptrdiff_t, not size_t,
to match new Gnulib API.
* tests/Makefile.am (TESTS): Add dfa-invalid-utf8.
* tests/dfa-invalid-utf8: New file.
-rw-r--r-- | NEWS | 5 | ||||
m--------- | gnulib | 0 | ||||
-rw-r--r-- | src/dfasearch.c | 2 | ||||
-rw-r--r-- | tests/Makefile.am | 1 | ||||
-rwxr-xr-x | tests/dfa-invalid-utf8 | 29 |
5 files changed, 35 insertions, 2 deletions
@@ -9,7 +9,10 @@ GNU grep NEWS -*- outline -*- ** Bug fixes - grep -Fw can no longer false match in non-UTF8 multibyte locales + '.' no longer matches some invalid byte sequences in UTF-8 locales. + [bug introduced in grep 2.7] + + grep -Fw can no longer false match in non-UTF-8 multibyte locales For example, this command would erroneously print its input line: echo ab | LC_CTYPE=ja_JP.eucjp grep -Fw b [Bug#38223 introduced in grep 2.28] diff --git a/gnulib b/gnulib -Subproject b7bf9f4361c8d78ccfda7a30ff31f7a406ea972 +Subproject 1219c343014ede881069bab554408b40e5455d9 diff --git a/src/dfasearch.c b/src/dfasearch.c index 6c95d8cb..153281df 100644 --- a/src/dfasearch.c +++ b/src/dfasearch.c @@ -234,7 +234,7 @@ EGexecute (void *vdc, char const *buf, size_t size, size_t *match_size, if (!start_ptr) { char const *next_beg, *dfa_beg = beg; - size_t count = 0; + ptrdiff_t count = 0; bool exact_kwset_match = false; bool backref = false; diff --git a/tests/Makefile.am b/tests/Makefile.am index 82aebbf9..dee6f46e 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -86,6 +86,7 @@ TESTS = \ dfa-coverage \ dfa-heap-overrun \ dfa-infloop \ + dfa-invalid-utf8 \ dfaexec-multibyte \ empty \ empty-line \ diff --git a/tests/dfa-invalid-utf8 b/tests/dfa-invalid-utf8 new file mode 100755 index 00000000..17480432 --- /dev/null +++ b/tests/dfa-invalid-utf8 @@ -0,0 +1,29 @@ +#! /bin/sh +# Test whether "grep '.'" matches invalid UTF-8 byte sequences. +# +# Copyright 2019 Free Software Foundation, Inc. +# +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notice and this notice are preserved. + +. "${srcdir=.}/init.sh"; path_prepend_ ../src +require_en_utf8_locale_ +require_compiled_in_MB_support + +fail=0 + +printf 'a\360\202\202\254b\n' >in1 || framework_failure_ +LC_ALL=en_US.UTF-8 grep 'a.b' in1 > out1 2> err +test $? -eq 1 || fail=1 +compare /dev/null out1 || fail=1 +compare /dev/null err1 || fail=1 + +printf 'a\360\202\202\254ba\360\202\202\254b\n' >in2 || + framework_failure_ +LC_ALL=en_US.UTF-8 grep -E '(a.b)\1' in2 > out2 2> err +test $? -eq 1 || fail=1 +compare /dev/null out2 || fail=1 +compare /dev/null err2 || fail=1 + +Exit $fail |