summaryrefslogtreecommitdiff
path: root/tests/sjis-mb
diff options
context:
space:
mode:
Diffstat (limited to 'tests/sjis-mb')
-rwxr-xr-xtests/sjis-mb60
1 files changed, 60 insertions, 0 deletions
diff --git a/tests/sjis-mb b/tests/sjis-mb
new file mode 100755
index 0000000..39459fe
--- /dev/null
+++ b/tests/sjis-mb
@@ -0,0 +1,60 @@
+#!/bin/sh
+# similar to euc-mb and fgrep-infloop, but tests SJIS encoding.
+# in this encoding, an ASCII character is both a valid single-byte
+# character, and a suffix of a valid double-byte character
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+
+# Add "." to PATH for the use of get-mb-cur-max.
+path_prepend_ .
+
+require_compiled_in_MB_support
+require_timeout_
+
+# Sequences used in this test ("%" and "@" become 8-bit characters, while "A"
+# is the real ASCII character for "A"):
+# - "%" becomes an half-width katakana in SJIS, but it is an invalid sequence
+# in UTF-8.
+# - "@@" and "@A" are both valid sequences in SJIS. We try to fool grep into
+# matching "A" against "@A", or mistaking a valid "A" match for the second
+# byte of a multi-byte character.
+
+encode() { echo "$1" | tr @% '\203\301'; }
+
+for locale in ja_JP.SHIFT_JIS ja_JP.SJIS ja_JP.PCK ''; do
+ test "$(get-mb-cur-max $locale)" = 2 && break
+done
+test -n "$locale" || skip_ 'SJIS locale not found'
+
+k=0
+test_grep_reject() {
+ k=$(expr $k + 1)
+ encode "$2" | \
+ LC_ALL=$locale \
+ timeout 10s grep $1 $(encode "$3") > out$k 2>&1
+ test $? = 1 && compare /dev/null out$k
+}
+
+test_grep() {
+ k=$(expr $k + 1)
+ encode "$2" > exp$k
+ LC_ALL=$locale \
+ timeout 10s grep $1 $(encode "$3") exp$k > out$k 2>&1
+ test $? = 0 && compare exp$k out$k
+}
+
+failure_tests=@A
+successful_tests='%%AA @AA @A@@A'
+
+fail=0
+for i in $successful_tests; do
+ test_grep -F $i A || fail=1
+ test_grep -E $i A || fail=1
+done
+
+for i in $failure_tests; do
+ test_grep_reject -F $i A || fail=1
+ test_grep_reject -E $i A || fail=1
+done
+
+Exit $fail