diff options
author | Bruno Haible <bruno@clisp.org> | 2023-03-30 13:25:20 +0200 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2023-03-30 19:39:46 +0200 |
commit | 11fe27f76c63c14b6f88c815b656f3f72d37ea41 (patch) | |
tree | 8a4661c83efd2d544ff729373b33955d54054d47 | |
parent | 60745587a2e0073df7fe4cb1ec462f423c8f2bb2 (diff) | |
download | gnulib-11fe27f76c63c14b6f88c815b656f3f72d37ea41.tar.gz |
btowc: Fix behaviour in the C locale.
* lib/btowc.c: Include <string.h>
(btowc): Use mbrtowc instead of mbtowc when possible.
* m4/btowc.m4 (gl_FUNC_BTOWC): Test for the mingw bug in the C locale.
Invoke gl_MBRTOWC_C_LOCALE. If mbrtowc is buggy in the C locale,
override also btowc.
(gl_PREREQ_BTOWC): Test whether mbrtowc exists.
* modules/btowc (Files): Add m4/mbrtowc.m4.
(Depends-on): Add mbrtowc.
* tests/test-btowc.c (main): Add a test of the C locale, based on
tests/test-mbrtowc.c.
* tests/test-btowc3.sh: New file, based on tests/test-mbrtowc5.sh.
* modules/btowc-tests (Files): Add it.
(Makefile.am): Test it.
* doc/posix-functions/btowc.texi: Mention the two C locale behaviour
bugs and that they are worked around.
-rw-r--r-- | ChangeLog | 19 | ||||
-rw-r--r-- | doc/posix-functions/btowc.texi | 12 | ||||
-rw-r--r-- | lib/btowc.c | 8 | ||||
-rw-r--r-- | m4/btowc.m4 | 57 | ||||
-rw-r--r-- | modules/btowc | 2 | ||||
-rw-r--r-- | modules/btowc-tests | 3 | ||||
-rw-r--r-- | tests/test-btowc.c | 19 | ||||
-rwxr-xr-x | tests/test-btowc3.sh | 9 |
8 files changed, 123 insertions, 6 deletions
@@ -1,5 +1,24 @@ 2023-03-30 Bruno Haible <bruno@clisp.org> + btowc: Fix behaviour in the C locale. + * lib/btowc.c: Include <string.h> + (btowc): Use mbrtowc instead of mbtowc when possible. + * m4/btowc.m4 (gl_FUNC_BTOWC): Test for the mingw bug in the C locale. + Invoke gl_MBRTOWC_C_LOCALE. If mbrtowc is buggy in the C locale, + override also btowc. + (gl_PREREQ_BTOWC): Test whether mbrtowc exists. + * modules/btowc (Files): Add m4/mbrtowc.m4. + (Depends-on): Add mbrtowc. + * tests/test-btowc.c (main): Add a test of the C locale, based on + tests/test-mbrtowc.c. + * tests/test-btowc3.sh: New file, based on tests/test-mbrtowc5.sh. + * modules/btowc-tests (Files): Add it. + (Makefile.am): Test it. + * doc/posix-functions/btowc.texi: Mention the two C locale behaviour + bugs and that they are worked around. + +2023-03-30 Bruno Haible <bruno@clisp.org> + mbrtowc tests: Add comment. * tests/test-mbrtowc.c: Add comment. * tests/test-mbrtowc5.sh: Use symmetric coding style. diff --git a/doc/posix-functions/btowc.texi b/doc/posix-functions/btowc.texi index fa1ea9b503..f4ca5d450a 100644 --- a/doc/posix-functions/btowc.texi +++ b/doc/posix-functions/btowc.texi @@ -17,6 +17,14 @@ Cygwin 1.7.2. @item This function does not return WEOF for an EOF argument on some platforms: IRIX 6.5. +@item +In the C or POSIX locales, this function is not consistent with +Gnulib's @code{mbrtowc} and can return @code{WEOF}: +glibc 2.35, MirOS BSD #10. +@item +In the C or POSIX locales, this function is not consistent with @code{mbrtowc} +on some platforms: +mingw. @end itemize Portability problems not fixed by Gnulib: @@ -27,8 +35,4 @@ therefore cannot accommodate all Unicode characters. However, the Gnulib function @code{btoc32}, provided by Gnulib module @code{btoc32}, operates on 32-bit wide characters and therefore does not have this limitation. -@item -In the C or POSIX locales, this function is not consistent with -Gnulib's @code{mbrtowc} and can return @code{WEOF}: -glibc 2.23, MirOS BSD #10. @end itemize diff --git a/lib/btowc.c b/lib/btowc.c index caadbd7608..4defbdda72 100644 --- a/lib/btowc.c +++ b/lib/btowc.c @@ -22,6 +22,7 @@ #include <stdio.h> #include <stdlib.h> +#include <string.h> wint_t btowc (int c) @@ -32,7 +33,14 @@ btowc (int c) wchar_t wc; buf[0] = c; +#if HAVE_MBRTOWC + mbstate_t state; + memset (&state, 0, sizeof (mbstate_t)); + size_t ret = mbrtowc (&wc, buf, 1, &state); + if (!(ret == (size_t)(-1) || ret == (size_t)(-2))) +#else if (mbtowc (&wc, buf, 1) >= 0) +#endif return wc; } return WEOF; diff --git a/m4/btowc.m4 b/m4/btowc.m4 index 77218a7d1c..1cd100a2d7 100644 --- a/m4/btowc.m4 +++ b/m4/btowc.m4 @@ -1,4 +1,4 @@ -# btowc.m4 serial 12 +# btowc.m4 serial 13 dnl Copyright (C) 2008-2023 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -88,6 +88,49 @@ int main () fi ]) + dnl On mingw, in the C locale, btowc is inconsistent with mbrtowc: + dnl mbrtowc avoids calling MultiByteToWideChar when MB_CUR_MAX is 1 and + dnl ___lc_codepage_func() is 0, but btowc is lacking this special case. + AC_CHECK_FUNCS_ONCE([mbrtowc]) + AC_CACHE_CHECK([whether btowc is consistent with mbrtowc in the C locale], + [gl_cv_func_btowc_consistent], + [ + AC_RUN_IFELSE( + [AC_LANG_SOURCE([[ +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +int main () +{ +#if HAVE_MBRTOWC + wint_t wc1 = btowc (0x80); + wchar_t wc2 = (wchar_t) 0xbadface; + char buf[1] = { 0x80 }; + mbstate_t state; + memset (&state, 0, sizeof (mbstate_t)); + if (mbrtowc (&wc2, buf, 1, &state) != 1 || wc1 != wc2) + return 1; +#endif + return 0; +}]])], + [gl_cv_func_btowc_consistent=yes], + [gl_cv_func_btowc_consistent=no], + [case "$host_os" in + # Guess no on mingw. + mingw*) AC_EGREP_CPP([Problem], [ +#ifdef __MINGW32__ + Problem +#endif + ], + [gl_cv_func_btowc_consistent="guessing no"], + [gl_cv_func_btowc_consistent="guessing yes"]) + ;; + # Guess yes otherwise. + *) gl_cv_func_btowc_consistent="guessing yes" ;; + esac + ]) + ]) + case "$gl_cv_func_btowc_nul" in *yes) ;; *) REPLACE_BTOWC=1 ;; @@ -96,10 +139,22 @@ int main () *yes) ;; *) REPLACE_BTOWC=1 ;; esac + case "$gl_cv_func_btowc_consistent" in + *yes) ;; + *) REPLACE_BTOWC=1 ;; + esac + if test $REPLACE_BTOWC = 0; then + gl_MBRTOWC_C_LOCALE + case "$gl_cv_func_mbrtowc_C_locale_sans_EILSEQ" in + *yes) ;; + *) REPLACE_BTOWC=1 ;; + esac + fi fi ]) # Prerequisites of lib/btowc.c. AC_DEFUN([gl_PREREQ_BTOWC], [ : + AC_CHECK_FUNCS_ONCE([mbrtowc]) ]) diff --git a/modules/btowc b/modules/btowc index 80d786cfa6..4788b3ec13 100644 --- a/modules/btowc +++ b/modules/btowc @@ -4,11 +4,13 @@ btowc() function: convert unibyte character to wide character. Files: lib/btowc.c m4/btowc.m4 +m4/mbrtowc.m4 m4/locale-fr.m4 Depends-on: wchar mbtowc [test $HAVE_BTOWC = 0 || test $REPLACE_BTOWC = 1] +mbrtowc [test $HAVE_BTOWC = 0 || test $REPLACE_BTOWC = 1] configure.ac: gl_FUNC_BTOWC diff --git a/modules/btowc-tests b/modules/btowc-tests index 6bd3258520..59d33eb00b 100644 --- a/modules/btowc-tests +++ b/modules/btowc-tests @@ -1,6 +1,7 @@ Files: tests/test-btowc1.sh tests/test-btowc2.sh +tests/test-btowc3.sh tests/test-btowc.c tests/signature.h tests/macros.h @@ -15,7 +16,7 @@ gt_LOCALE_FR gt_LOCALE_FR_UTF8 Makefile.am: -TESTS += test-btowc1.sh test-btowc2.sh +TESTS += test-btowc1.sh test-btowc2.sh test-btowc3.sh TESTS_ENVIRONMENT += LOCALE_FR='@LOCALE_FR@' LOCALE_FR_UTF8='@LOCALE_FR_UTF8@' check_PROGRAMS += test-btowc test_btowc_LDADD = $(LDADD) $(SETLOCALE_LIB) diff --git a/tests/test-btowc.c b/tests/test-btowc.c index 849db470f4..e5918c36e9 100644 --- a/tests/test-btowc.c +++ b/tests/test-btowc.c @@ -57,6 +57,25 @@ main (int argc, char *argv[]) for (c = 0x80; c < 0x100; c++) ASSERT (btowc (c) == WEOF); return 0; + + case '3': + /* C or POSIX locale. */ + for (c = 0; c < 0x100; c++) + if (c != 0) + { + /* We are testing all nonnull bytes. */ + wint_t wc = btowc (c); + /* POSIX:2018 says: "In the POSIX locale, btowc() shall not return + WEOF if c has a value in the range 0 to 255 inclusive." */ + if (c < 0x80) + /* c is an ASCII character. */ + ASSERT (wc == c); + else + /* On most platforms, the bytes 0x80..0xFF map to U+0080..U+00FF. + But on musl libc, the bytes 0x80..0xFF map to U+DF80..U+DFFF. */ + ASSERT (wc == c || wc == 0xDF00 + c); + } + return 0; } return 1; diff --git a/tests/test-btowc3.sh b/tests/test-btowc3.sh new file mode 100755 index 0000000000..ee9e143c1c --- /dev/null +++ b/tests/test-btowc3.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +# Test whether the POSIX locale has encoding errors. +LC_ALL=C \ +${CHECKER} ./test-btowc${EXEEXT} 3 || exit 1 +LC_ALL=POSIX \ +${CHECKER} ./test-btowc${EXEEXT} 3 || exit 1 + +exit 0 |