#! /bin/sh # Test whether \s matches SP and UTF-8 multi-byte white space characters. # # Copyright (C) 2013-2016 Free Software Foundation, Inc. # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. . "${srcdir=.}/init.sh"; path_prepend_ ../src require_en_utf8_locale_ LC_ALL=en_US.UTF-8 export LC_ALL # It would have been nice to be able to use all UTF8 characters # with the Unicode WSpace=Y character property, # https://en.wikipedia.org/wiki/Whitespace_character, but that # would currently cause distracting failures everywhere I've tried. # FIXME: including any the following in the list below would # make this test fail on Fedora 19/glibc-2.17-18.fc19. # Restore them to the list once it is fixed. these_fail_with_glibc=' U+00A0 NO-BREAK SPACE: c2 a0 U+2007 FIGURE SPACE: e2 80 87 U+200B ZERO WIDTH SPACE: e2 80 8b U+202F NARROW NO-BREAK SPACE: e2 80 af ' fail_with_other=' U+000A Line feed: 0a U+0085 Next line: 85 ' utf8_space_characters=$(sed 's/.*://;s/ */\\x/g' <<\EOF U+0009 Horizontal Tab: 09 U+000B Vertical Tab: 0b U+000C Form feed: 0c U+000D Carriage return: 0d U+0020 SPACE: 20 U+1680 OGHAM SPACE MARK: e1 9a 80 U+2000 EN QUAD: e2 80 80 U+2001 EM QUAD: e2 80 81 U+2002 EN SPACE: e2 80 82 U+2003 EM SPACE: e2 80 83 U+2004 THREE-PER-EM SPACE: e2 80 84 U+2005 FOUR-PER-EM SPACE: e2 80 85 U+2006 SIX-PER-EM SPACE: e2 80 86 U+2008 PUNCTUATION SPACE: e2 80 88 U+2009 THIN SPACE: e2 80 89 U+200A HAIR SPACE: e2 80 8a U+205F MEDIUM MATHEMATICAL SPACE: e2 81 9f U+3000 IDEOGRAPHIC SPACE: e3 80 80 EOF ) fail=0 for i in $utf8_space_characters; do hex_printf_ "$i" | grep -q '^\s$' \ || { warn_ "$i FAILED to match \\s"; fail=1; } hex_printf_ "$i" | grep -q '\S' test $? = 1 \ || { warn_ "$i vs. \\S FAILED"; fail=1; } done # This is a separate test, only nominally related to \s. # It is solely to get coverage of a code path (exercising dfa.c's # match_mb_charset function) that would have otherwise been untouched. # However, as of the change-set adding this new test, match_mb_charset # is unreachable via grep. printf '\0' | grep -aE '^\s?$' > out 2>&1 test $? = 1 || fail=1 compare /dev/null out Exit $fail