summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog9
-rw-r--r--doc/posix-functions/mbrtowc.texi4
-rw-r--r--lib/mbrtowc.c6
-rw-r--r--m4/mbrtowc.m4130
4 files changed, 148 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 09564ed3ef..6c71b81367 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2020-01-02 Bruno Haible <bruno@clisp.org>
+
+ mbrtowc: Fix test failures on MSVC (regression by previous commit).
+ * m4/mbrtowc.m4 (gl_MBRTOWC_STORES_INCOMPLETE): New macro.
+ (gl_FUNC_MBRTOWC): Invoke it. Define MBRTOWC_STORES_INCOMPLETE_BUG.
+ * lib/mbrtowc.c (rpl_mbrtowc): Add workaround for
+ MBRTOWC_STORES_INCOMPLETE_BUG.
+ * doc/posix-functions/mbrtowc.texi: Mention the MSVC bug.
+
2020-01-02 Paul Eggert <eggert@cs.ucla.edu>
doc: mention glibc bug 24269
diff --git a/doc/posix-functions/mbrtowc.texi b/doc/posix-functions/mbrtowc.texi
index c7c34e0e23..3b7aed0c9c 100644
--- a/doc/posix-functions/mbrtowc.texi
+++ b/doc/posix-functions/mbrtowc.texi
@@ -28,6 +28,10 @@ This function does not put the state into non-initial state when parsing an
incomplete multibyte character on some platforms:
AIX 7.2.
@item
+This function stores a wide character when when parsing an incomplete multibyte
+character on some platforms:
+MSVC 14.
+@item
This function returns the total number of bytes that make up the multibyte
character, not the number of bytes that were needed to complete the multibyte
character, on some platforms:
diff --git a/lib/mbrtowc.c b/lib/mbrtowc.c
index 9cc9f5d026..1cdd1af204 100644
--- a/lib/mbrtowc.c
+++ b/lib/mbrtowc.c
@@ -524,7 +524,13 @@ rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
}
# endif
+# if MBRTOWC_STORES_INCOMPLETE_BUG
+ ret = mbrtowc (&wc, s, n, ps);
+ if (ret < (size_t) -2 && pwc != NULL)
+ *pwc = wc;
+# else
ret = mbrtowc (pwc, s, n, ps);
+# endif
# if MBRTOWC_NUL_RETVAL_BUG
if (ret < (size_t) -2 && !*pwc)
diff --git a/m4/mbrtowc.m4 b/m4/mbrtowc.m4
index af4e25d75a..5ad246fd4f 100644
--- a/m4/mbrtowc.m4
+++ b/m4/mbrtowc.m4
@@ -1,4 +1,4 @@
-# mbrtowc.m4 serial 35 -*- coding: utf-8 -*-
+# mbrtowc.m4 serial 36 -*- coding: utf-8 -*-
dnl Copyright (C) 2001-2002, 2004-2005, 2008-2020 Free Software Foundation,
dnl Inc.
dnl This file is free software; the Free Software Foundation
@@ -39,6 +39,7 @@ AC_DEFUN([gl_FUNC_MBRTOWC],
gl_MBRTOWC_NULL_ARG2
gl_MBRTOWC_RETVAL
gl_MBRTOWC_NUL_RETVAL
+ gl_MBRTOWC_STORES_INCOMPLETE
gl_MBRTOWC_EMPTY_INPUT
gl_MBRTOWC_C_LOCALE
case "$gl_cv_func_mbrtowc_null_arg1" in
@@ -69,6 +70,13 @@ AC_DEFUN([gl_FUNC_MBRTOWC],
REPLACE_MBRTOWC=1
;;
esac
+ case "$gl_cv_func_mbrtowc_stores_incomplete" in
+ *no) ;;
+ *) AC_DEFINE([MBRTOWC_STORES_INCOMPLETE_BUG], [1],
+ [Define if the mbrtowc function stores a wide character when reporting incomplete input.])
+ REPLACE_MBRTOWC=1
+ ;;
+ esac
case "$gl_cv_func_mbrtowc_empty_input" in
*yes) ;;
*) AC_DEFINE([MBRTOWC_EMPTY_INPUT_BUG], [1],
@@ -592,6 +600,126 @@ int main ()
])
])
+dnl Test whether mbrtowc stores a wide character when reporting incomplete
+dnl input.
+
+AC_DEFUN([gl_MBRTOWC_STORES_INCOMPLETE],
+[
+ AC_REQUIRE([AC_PROG_CC])
+ AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
+ AC_CACHE_CHECK([whether mbrtowc stores incomplete characters],
+ [gl_cv_func_mbrtowc_stores_incomplete],
+ [
+ dnl Initial guess, used when cross-compiling or when no suitable locale
+ dnl is present.
+changequote(,)dnl
+ case "$host_os" in
+ # Guess yes on native Windows.
+ mingw*) gl_cv_func_mbrtowc_stores_incomplete="guessing yes" ;;
+ *) gl_cv_func_mbrtowc_stores_incomplete="guessing no" ;;
+ esac
+changequote([,])dnl
+ case "$host_os" in
+ mingw*)
+ AC_RUN_IFELSE(
+ [AC_LANG_SOURCE([[
+#include <locale.h>
+#include <string.h>
+/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
+ <wchar.h>.
+ BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
+ included before <wchar.h>. */
+#include <stddef.h>
+#include <stdio.h>
+#include <time.h>
+#include <wchar.h>
+int main ()
+{
+ int result = 0;
+ if (setlocale (LC_ALL, "French_France.65001") != NULL)
+ {
+ wchar_t wc = (wchar_t) 0xBADFACE;
+ mbstate_t state;
+
+ memset (&state, '\0', sizeof (mbstate_t));
+ if (mbrtowc (&wc, "\303", 1, &state) == (size_t)(-2)
+ && wc != (wchar_t) 0xBADFACE)
+ result |= 1;
+ }
+ if (setlocale (LC_ALL, "Japanese_Japan.932") != NULL)
+ {
+ wchar_t wc = (wchar_t) 0xBADFACE;
+ mbstate_t state;
+
+ memset (&state, '\0', sizeof (mbstate_t));
+ if (mbrtowc (&wc, "\226", 1, &state) == (size_t)(-2)
+ && wc != (wchar_t) 0xBADFACE)
+ result |= 2;
+ }
+ if (setlocale (LC_ALL, "Chinese_Taiwan.950") != NULL)
+ {
+ wchar_t wc = (wchar_t) 0xBADFACE;
+ mbstate_t state;
+
+ memset (&state, '\0', sizeof (mbstate_t));
+ if (mbrtowc (&wc, "\245", 1, &state) == (size_t)(-2)
+ && wc != (wchar_t) 0xBADFACE)
+ result |= 4;
+ }
+ if (setlocale (LC_ALL, "Chinese_China.936") != NULL)
+ {
+ wchar_t wc = (wchar_t) 0xBADFACE;
+ mbstate_t state;
+
+ memset (&state, '\0', sizeof (mbstate_t));
+ if (mbrtowc (&wc, "\261", 1, &state) == (size_t)(-2)
+ && wc != (wchar_t) 0xBADFACE)
+ result |= 8;
+ }
+ return result;
+}]])],
+ [gl_cv_func_mbrtowc_stores_incomplete=no],
+ [gl_cv_func_mbrtowc_stores_incomplete=yes],
+ [:])
+ ;;
+ *)
+ AC_REQUIRE([gt_LOCALE_FR_UTF8])
+ if test $LOCALE_FR_UTF8 != none; then
+ AC_RUN_IFELSE(
+ [AC_LANG_SOURCE([[
+#include <locale.h>
+#include <string.h>
+/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
+ <wchar.h>.
+ BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
+ included before <wchar.h>. */
+#include <stddef.h>
+#include <stdio.h>
+#include <time.h>
+#include <wchar.h>
+int main ()
+{
+ if (setlocale (LC_ALL, "$LOCALE_FR_UTF8") != NULL)
+ {
+ wchar_t wc = (wchar_t) 0xBADFACE;
+ mbstate_t state;
+
+ memset (&state, '\0', sizeof (mbstate_t));
+ if (mbrtowc (&wc, "\303", 1, &state) == (size_t)(-2)
+ && wc != (wchar_t) 0xBADFACE)
+ return 1;
+ }
+ return 0;
+}]])],
+ [gl_cv_func_mbrtowc_stores_incomplete=no],
+ [gl_cv_func_mbrtowc_stores_incomplete=yes],
+ [:])
+ fi
+ ;;
+ esac
+ ])
+])
+
dnl Test whether mbrtowc returns the correct value on empty input.
AC_DEFUN([gl_MBRTOWC_EMPTY_INPUT],