summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog19
-rw-r--r--doc/posix-functions/btowc.texi12
-rw-r--r--lib/btowc.c8
-rw-r--r--m4/btowc.m457
-rw-r--r--modules/btowc2
-rw-r--r--modules/btowc-tests3
-rw-r--r--tests/test-btowc.c19
-rwxr-xr-xtests/test-btowc3.sh9
8 files changed, 123 insertions, 6 deletions
diff --git a/ChangeLog b/ChangeLog
index a118bfc950..a4af96ed5e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,24 @@
2023-03-30 Bruno Haible <bruno@clisp.org>
+ btowc: Fix behaviour in the C locale.
+ * lib/btowc.c: Include <string.h>
+ (btowc): Use mbrtowc instead of mbtowc when possible.
+ * m4/btowc.m4 (gl_FUNC_BTOWC): Test for the mingw bug in the C locale.
+ Invoke gl_MBRTOWC_C_LOCALE. If mbrtowc is buggy in the C locale,
+ override also btowc.
+ (gl_PREREQ_BTOWC): Test whether mbrtowc exists.
+ * modules/btowc (Files): Add m4/mbrtowc.m4.
+ (Depends-on): Add mbrtowc.
+ * tests/test-btowc.c (main): Add a test of the C locale, based on
+ tests/test-mbrtowc.c.
+ * tests/test-btowc3.sh: New file, based on tests/test-mbrtowc5.sh.
+ * modules/btowc-tests (Files): Add it.
+ (Makefile.am): Test it.
+ * doc/posix-functions/btowc.texi: Mention the two C locale behaviour
+ bugs and that they are worked around.
+
+2023-03-30 Bruno Haible <bruno@clisp.org>
+
mbrtowc tests: Add comment.
* tests/test-mbrtowc.c: Add comment.
* tests/test-mbrtowc5.sh: Use symmetric coding style.
diff --git a/doc/posix-functions/btowc.texi b/doc/posix-functions/btowc.texi
index fa1ea9b503..f4ca5d450a 100644
--- a/doc/posix-functions/btowc.texi
+++ b/doc/posix-functions/btowc.texi
@@ -17,6 +17,14 @@ Cygwin 1.7.2.
@item
This function does not return WEOF for an EOF argument on some platforms:
IRIX 6.5.
+@item
+In the C or POSIX locales, this function is not consistent with
+Gnulib's @code{mbrtowc} and can return @code{WEOF}:
+glibc 2.35, MirOS BSD #10.
+@item
+In the C or POSIX locales, this function is not consistent with @code{mbrtowc}
+on some platforms:
+mingw.
@end itemize
Portability problems not fixed by Gnulib:
@@ -27,8 +35,4 @@ therefore cannot accommodate all Unicode characters.
However, the Gnulib function @code{btoc32}, provided by Gnulib module
@code{btoc32}, operates on 32-bit wide characters and therefore does not have
this limitation.
-@item
-In the C or POSIX locales, this function is not consistent with
-Gnulib's @code{mbrtowc} and can return @code{WEOF}:
-glibc 2.23, MirOS BSD #10.
@end itemize
diff --git a/lib/btowc.c b/lib/btowc.c
index caadbd7608..4defbdda72 100644
--- a/lib/btowc.c
+++ b/lib/btowc.c
@@ -22,6 +22,7 @@
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
wint_t
btowc (int c)
@@ -32,7 +33,14 @@ btowc (int c)
wchar_t wc;
buf[0] = c;
+#if HAVE_MBRTOWC
+ mbstate_t state;
+ memset (&state, 0, sizeof (mbstate_t));
+ size_t ret = mbrtowc (&wc, buf, 1, &state);
+ if (!(ret == (size_t)(-1) || ret == (size_t)(-2)))
+#else
if (mbtowc (&wc, buf, 1) >= 0)
+#endif
return wc;
}
return WEOF;
diff --git a/m4/btowc.m4 b/m4/btowc.m4
index 77218a7d1c..1cd100a2d7 100644
--- a/m4/btowc.m4
+++ b/m4/btowc.m4
@@ -1,4 +1,4 @@
-# btowc.m4 serial 12
+# btowc.m4 serial 13
dnl Copyright (C) 2008-2023 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
@@ -88,6 +88,49 @@ int main ()
fi
])
+ dnl On mingw, in the C locale, btowc is inconsistent with mbrtowc:
+ dnl mbrtowc avoids calling MultiByteToWideChar when MB_CUR_MAX is 1 and
+ dnl ___lc_codepage_func() is 0, but btowc is lacking this special case.
+ AC_CHECK_FUNCS_ONCE([mbrtowc])
+ AC_CACHE_CHECK([whether btowc is consistent with mbrtowc in the C locale],
+ [gl_cv_func_btowc_consistent],
+ [
+ AC_RUN_IFELSE(
+ [AC_LANG_SOURCE([[
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+int main ()
+{
+#if HAVE_MBRTOWC
+ wint_t wc1 = btowc (0x80);
+ wchar_t wc2 = (wchar_t) 0xbadface;
+ char buf[1] = { 0x80 };
+ mbstate_t state;
+ memset (&state, 0, sizeof (mbstate_t));
+ if (mbrtowc (&wc2, buf, 1, &state) != 1 || wc1 != wc2)
+ return 1;
+#endif
+ return 0;
+}]])],
+ [gl_cv_func_btowc_consistent=yes],
+ [gl_cv_func_btowc_consistent=no],
+ [case "$host_os" in
+ # Guess no on mingw.
+ mingw*) AC_EGREP_CPP([Problem], [
+#ifdef __MINGW32__
+ Problem
+#endif
+ ],
+ [gl_cv_func_btowc_consistent="guessing no"],
+ [gl_cv_func_btowc_consistent="guessing yes"])
+ ;;
+ # Guess yes otherwise.
+ *) gl_cv_func_btowc_consistent="guessing yes" ;;
+ esac
+ ])
+ ])
+
case "$gl_cv_func_btowc_nul" in
*yes) ;;
*) REPLACE_BTOWC=1 ;;
@@ -96,10 +139,22 @@ int main ()
*yes) ;;
*) REPLACE_BTOWC=1 ;;
esac
+ case "$gl_cv_func_btowc_consistent" in
+ *yes) ;;
+ *) REPLACE_BTOWC=1 ;;
+ esac
+ if test $REPLACE_BTOWC = 0; then
+ gl_MBRTOWC_C_LOCALE
+ case "$gl_cv_func_mbrtowc_C_locale_sans_EILSEQ" in
+ *yes) ;;
+ *) REPLACE_BTOWC=1 ;;
+ esac
+ fi
fi
])
# Prerequisites of lib/btowc.c.
AC_DEFUN([gl_PREREQ_BTOWC], [
:
+ AC_CHECK_FUNCS_ONCE([mbrtowc])
])
diff --git a/modules/btowc b/modules/btowc
index 80d786cfa6..4788b3ec13 100644
--- a/modules/btowc
+++ b/modules/btowc
@@ -4,11 +4,13 @@ btowc() function: convert unibyte character to wide character.
Files:
lib/btowc.c
m4/btowc.m4
+m4/mbrtowc.m4
m4/locale-fr.m4
Depends-on:
wchar
mbtowc [test $HAVE_BTOWC = 0 || test $REPLACE_BTOWC = 1]
+mbrtowc [test $HAVE_BTOWC = 0 || test $REPLACE_BTOWC = 1]
configure.ac:
gl_FUNC_BTOWC
diff --git a/modules/btowc-tests b/modules/btowc-tests
index 6bd3258520..59d33eb00b 100644
--- a/modules/btowc-tests
+++ b/modules/btowc-tests
@@ -1,6 +1,7 @@
Files:
tests/test-btowc1.sh
tests/test-btowc2.sh
+tests/test-btowc3.sh
tests/test-btowc.c
tests/signature.h
tests/macros.h
@@ -15,7 +16,7 @@ gt_LOCALE_FR
gt_LOCALE_FR_UTF8
Makefile.am:
-TESTS += test-btowc1.sh test-btowc2.sh
+TESTS += test-btowc1.sh test-btowc2.sh test-btowc3.sh
TESTS_ENVIRONMENT += LOCALE_FR='@LOCALE_FR@' LOCALE_FR_UTF8='@LOCALE_FR_UTF8@'
check_PROGRAMS += test-btowc
test_btowc_LDADD = $(LDADD) $(SETLOCALE_LIB)
diff --git a/tests/test-btowc.c b/tests/test-btowc.c
index 849db470f4..e5918c36e9 100644
--- a/tests/test-btowc.c
+++ b/tests/test-btowc.c
@@ -57,6 +57,25 @@ main (int argc, char *argv[])
for (c = 0x80; c < 0x100; c++)
ASSERT (btowc (c) == WEOF);
return 0;
+
+ case '3':
+ /* C or POSIX locale. */
+ for (c = 0; c < 0x100; c++)
+ if (c != 0)
+ {
+ /* We are testing all nonnull bytes. */
+ wint_t wc = btowc (c);
+ /* POSIX:2018 says: "In the POSIX locale, btowc() shall not return
+ WEOF if c has a value in the range 0 to 255 inclusive." */
+ if (c < 0x80)
+ /* c is an ASCII character. */
+ ASSERT (wc == c);
+ else
+ /* On most platforms, the bytes 0x80..0xFF map to U+0080..U+00FF.
+ But on musl libc, the bytes 0x80..0xFF map to U+DF80..U+DFFF. */
+ ASSERT (wc == c || wc == 0xDF00 + c);
+ }
+ return 0;
}
return 1;
diff --git a/tests/test-btowc3.sh b/tests/test-btowc3.sh
new file mode 100755
index 0000000000..ee9e143c1c
--- /dev/null
+++ b/tests/test-btowc3.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+# Test whether the POSIX locale has encoding errors.
+LC_ALL=C \
+${CHECKER} ./test-btowc${EXEEXT} 3 || exit 1
+LC_ALL=POSIX \
+${CHECKER} ./test-btowc${EXEEXT} 3 || exit 1
+
+exit 0