summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2022-05-13 23:46:21 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2022-05-13 23:48:18 -0700
commit5447010fdbdf3f1a874689dd41a7c916bb262b2a (patch)
tree9f58605715e7de18e16fcc9bb2d6c99e52e34482
parentef6c7768b300678895348ba7c827fa919e3f1d5c (diff)
downloadgrep-5447010fdbdf3f1a874689dd41a7c916bb262b2a.tar.gz
grep: fix bug with . and some Hangul Syllables
* NEWS: Mention the fix, which comes from the recent Gnulib update. * tests/hangul-syllable: New file. * tests/Makefile.am (TESTS): Add it.
-rw-r--r--NEWS7
-rw-r--r--tests/Makefile.am1
-rwxr-xr-xtests/hangul-syllable88
3 files changed, 96 insertions, 0 deletions
diff --git a/NEWS b/NEWS
index 86c82ed8..fb0e4cf1 100644
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,13 @@ GNU grep NEWS -*- outline -*-
** Bug fixes
+ In locales using UTF-8 encoding, the regular expression '.' no
+ longer sometimes fails to match Unicode characters U+D400 through
+ U+D7FF (some Hangul Syllables, and Hangul Jamo Extended-B) and
+ Unicode characters U+108000 through U+10FFFF (half of Supplemental
+ Private Use Area plane B).
+ [bug introduced in grep 3.4]
+
The -s option no longer suppresses "binary file matches" messages.
[Bug#51860 introduced in grep 3.5]
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 708980df..d72637f7 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -110,6 +110,7 @@ TESTS = \
grep-dev-null \
grep-dev-null-out \
grep-dir \
+ hangul-syllable \
hash-collision-perf \
help-version \
high-bit-range \
diff --git a/tests/hangul-syllable b/tests/hangul-syllable
new file mode 100755
index 00000000..9f94d2eb
--- /dev/null
+++ b/tests/hangul-syllable
@@ -0,0 +1,88 @@
+#!/bin/sh
+# grep 3.4 through 3.7 mishandled matching '.' against the valid UTF-8
+# sequences (ED)(90-9F)(80-BF) corresponding to U+D400 through U+D7FF,
+# which are some Hangul Syllables and Hangul Jamo Extended-B. They
+# also mishandled (F4)(88-8F)(80-BF)(80-BF) which correspond to
+# U+108000 through U+10FFFF (Supplemental Private Use Area plane B).
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+
+require_en_utf8_locale_
+
+LC_ALL=en_US.UTF-8
+export LC_ALL
+
+check_char ()
+{
+ printf "$1\\n" >in || framewmork_failure_
+
+ grep $2 '^.$' in >out || fail=1
+ cmp in out || fail=1
+}
+
+fail=0
+
+# "." should match U+D45C HANGUL SYLLABLE PYO.
+check_char '\355\221\234'
+
+# Check boundary-condition characters
+# while we are at it.
+
+check_char '\0' -a
+check_char '\177'
+
+for i in 302 337; do
+ for j in 200 277; do
+ check_char "\\$i\\$j"
+ done
+done
+for i in 340; do
+ for j in 240 277; do
+ for k in 200 277; do
+ check_char "\\$i\\$j\\$k"
+ done
+ done
+done
+for i in 341 354 356 357; do
+ for j in 200 277; do
+ for k in 200 277; do
+ check_char "\\$i\\$j\\$k"
+ done
+ done
+done
+for i in 355; do
+ for j in 200 237; do
+ for k in 200 277; do
+ check_char "\\$i\\$j\\$k"
+ done
+ done
+done
+for i in 360; do
+ for j in 220 277; do
+ for k in 200 277; do
+ for l in 200 277; do
+ check_char "\\$i\\$j\\$k\\$l"
+ done
+ done
+ done
+done
+for i in 361 363; do
+ for j in 200 277; do
+ for k in 200 277; do
+ for l in 200 277; do
+ check_char "\\$i\\$j\\$k\\$l"
+ done
+ done
+ done
+done
+for i in 364; do
+ for j in 200 217; do
+ for k in 200 277; do
+ for l in 200 277; do
+ check_char "\\$i\\$j\\$k\\$l"
+ done
+ done
+ done
+done
+
+Exit $fail