summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2020-01-09 01:47:17 +0100
committerBruno Haible <bruno@clisp.org>2020-01-09 01:47:17 +0100
commit9be236d67f3d78235c5cbe4381c5dd7b3cddb179 (patch)
tree09bcc0427c1bd58585080336ebed1fe1bfde6898
parent877b0c46019d34e722c94248edbfaf5bfbaa17ec (diff)
downloadgnulib-9be236d67f3d78235c5cbe4381c5dd7b3cddb179.tar.gz
mbrtoc32: Use the system's mbrtoc32 if it exists and basically works.
* m4/mbrtoc32.m4 (gl_MBRTOC32_SANITYCHECK): New macro. (gl_FUNC_MBRTOC32): Require it. Set REPLACE_MBRTOC32 if mbrtoc32 exists but is not working. * lib/mbrtoc32.c: Include hard-locale.h, <locale.h>. (mbrtoc32): If the char32_t encoding and the wchar_t encoding may differ, use the system's mbrtoc32, adding workarounds. * modules/mbrtoc32 (Depends-on): Add hard-locale. * doc/posix-functions/mbrtoc32.texi: Mention the Solaris and native Windows problem. * lib/btoc32.c: Include <stdio.h>, <string.h>. (btoc32): If the char32_t encoding and the wchar_t encoding may differ, use mbrtoc32, not btowc. * modules/btoc32 (Depends-on): Add mbrtoc32. * lib/mbsrtoc32s.c (mbsrtoc32s): If the char32_t encoding and the wchar_t encoding may differ, use mbrtoc32, not mbsrtowcs. * modules/mbsrtoc32s (Depends-on): Update conditions. (configure.ac): Compile mbsrtoc32s-state.c unconditionally. * lib/mbsnrtoc32s.c (mbsnrtoc32s): If the char32_t encoding and the wchar_t encoding may differ, use mbrtoc32, not mbsnrtowcs. * modules/mbsnrtoc32s (Depends-on): Update conditions. (configure.ac): Compile mbsrtoc32s-state.c unconditionally.
-rw-r--r--ChangeLog25
-rw-r--r--doc/posix-functions/mbrtoc32.texi4
-rw-r--r--lib/btoc32.c20
-rw-r--r--lib/mbrtoc32.c53
-rw-r--r--lib/mbsnrtoc32s.c4
-rw-r--r--lib/mbsrtoc32s.c4
-rw-r--r--m4/mbrtoc32.m4102
-rw-r--r--modules/btoc321
-rw-r--r--modules/mbrtoc321
-rw-r--r--modules/mbsnrtoc32s10
-rw-r--r--modules/mbsrtoc32s8
11 files changed, 204 insertions, 28 deletions
diff --git a/ChangeLog b/ChangeLog
index ea35e7e95e..4b5a419562 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,28 @@
+2020-01-08 Bruno Haible <bruno@clisp.org>
+
+ mbrtoc32: Use the system's mbrtoc32 if it exists and basically works.
+ * m4/mbrtoc32.m4 (gl_MBRTOC32_SANITYCHECK): New macro.
+ (gl_FUNC_MBRTOC32): Require it. Set REPLACE_MBRTOC32 if mbrtoc32 exists
+ but is not working.
+ * lib/mbrtoc32.c: Include hard-locale.h, <locale.h>.
+ (mbrtoc32): If the char32_t encoding and the wchar_t encoding may
+ differ, use the system's mbrtoc32, adding workarounds.
+ * modules/mbrtoc32 (Depends-on): Add hard-locale.
+ * doc/posix-functions/mbrtoc32.texi: Mention the Solaris and native
+ Windows problem.
+ * lib/btoc32.c: Include <stdio.h>, <string.h>.
+ (btoc32): If the char32_t encoding and the wchar_t encoding may differ,
+ use mbrtoc32, not btowc.
+ * modules/btoc32 (Depends-on): Add mbrtoc32.
+ * lib/mbsrtoc32s.c (mbsrtoc32s): If the char32_t encoding and the
+ wchar_t encoding may differ, use mbrtoc32, not mbsrtowcs.
+ * modules/mbsrtoc32s (Depends-on): Update conditions.
+ (configure.ac): Compile mbsrtoc32s-state.c unconditionally.
+ * lib/mbsnrtoc32s.c (mbsnrtoc32s): If the char32_t encoding and the
+ wchar_t encoding may differ, use mbrtoc32, not mbsnrtowcs.
+ * modules/mbsnrtoc32s (Depends-on): Update conditions.
+ (configure.ac): Compile mbsrtoc32s-state.c unconditionally.
+
2020-01-07 Bruno Haible <bruno@clisp.org>
wcrtomb: Make multithread-safe, except possibly on IRIX.
diff --git a/doc/posix-functions/mbrtoc32.texi b/doc/posix-functions/mbrtoc32.texi
index 1aa15a3434..9789bef130 100644
--- a/doc/posix-functions/mbrtoc32.texi
+++ b/doc/posix-functions/mbrtoc32.texi
@@ -17,6 +17,10 @@ glibc 2.23.
This function returns 0 instead of @code{(size_t) -2} when the input
is empty:
glibc 2.19.
+@item
+This function does not recognize multibyte sequences that @code{mbrtowc}
+recognizes on some platforms:
+Solaris 11.4, mingw, MSVC 14.
@end itemize
Portability problems not fixed by Gnulib:
diff --git a/lib/btoc32.c b/lib/btoc32.c
index 8b27875813..d8ce087ec6 100644
--- a/lib/btoc32.c
+++ b/lib/btoc32.c
@@ -21,10 +21,30 @@
/* Specification. */
#include <uchar.h>
+#include <stdio.h>
+#include <string.h>
+
wint_t
btoc32 (int c)
{
+#if HAVE_WORKING_MBRTOC32 && !defined __GLIBC__
+ /* The char32_t encoding of a multibyte character may be different than its
+ wchar_t encoding. */
+ if (c != EOF)
+ {
+ mbstate_t state;
+ char s[1];
+ char32_t wc;
+
+ memset (&state, '\0', sizeof (mbstate_t));
+ s[0] = (unsigned char) c;
+ if (mbrtoc32 (&wc, s, 1, &state) <= 1)
+ return wc;
+ }
+ return WEOF;
+#else
/* In all known locale encodings, unibyte characters correspond only to
characters in the BMP. */
return btowc (c);
+#endif
}
diff --git a/lib/mbrtoc32.c b/lib/mbrtoc32.c
index f2cf71ec11..facf28bc54 100644
--- a/lib/mbrtoc32.c
+++ b/lib/mbrtoc32.c
@@ -24,13 +24,13 @@
#include <errno.h>
#include <stdlib.h>
-# ifndef FALLTHROUGH
-# if __GNUC__ < 7
-# define FALLTHROUGH ((void) 0)
-# else
-# define FALLTHROUGH __attribute__ ((__fallthrough__))
-# endif
+#ifndef FALLTHROUGH
+# if __GNUC__ < 7
+# define FALLTHROUGH ((void) 0)
+# else
+# define FALLTHROUGH __attribute__ ((__fallthrough__))
# endif
+#endif
#if GNULIB_defined_mbstate_t /* AIX, IRIX */
/* Implement mbrtoc32() on top of mbtowc() for the non-UTF-8 locales
@@ -74,17 +74,23 @@ mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps)
#else /* glibc, macOS, FreeBSD, NetBSD, OpenBSD, HP-UX, Solaris, Cygwin, mingw, MSVC, Minix, Android */
-/* Implement mbrtoc32() based on mbrtowc(). */
+/* Implement mbrtoc32() based on the original mbrtoc32() or on mbrtowc(). */
# include <wchar.h>
# include "localcharset.h"
# include "streq.h"
+# if MBRTOC32_IN_C_LOCALE_MAYBE_EILSEQ
+# include "hard-locale.h"
+# include <locale.h>
+# endif
+
static mbstate_t internal_state;
size_t
mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps)
+# undef mbrtoc32
{
/* It's simpler to handle the case s == NULL upfront, than to worry about
this case later, before every test of pwc and n. */
@@ -103,7 +109,31 @@ mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps)
if (ps == NULL)
ps = &internal_state;
-# if _GL_LARGE_CHAR32_T
+# if HAVE_WORKING_MBRTOC32
+ /* mbrtoc32() may produce different values for wc than mbrtowc(). Therefore
+ use mbrtoc32(). */
+
+# if defined _WIN32 && !defined __CYGWIN__
+ char32_t wc;
+ size_t ret = mbrtoc32 (&wc, s, n, ps);
+ if (ret < (size_t) -2 && pwc != NULL)
+ *pwc = wc;
+# else
+ size_t ret = mbrtoc32 (pwc, s, n, ps);
+# endif
+
+# if MBRTOC32_IN_C_LOCALE_MAYBE_EILSEQ
+ if ((size_t) -2 <= ret && n != 0 && ! hard_locale (LC_CTYPE))
+ {
+ if (pwc != NULL)
+ *pwc = (unsigned char) *s;
+ return 1;
+ }
+# endif
+
+ return ret;
+
+# elif _GL_LARGE_CHAR32_T
/* Special-case all encodings that may produce wide character values
> WCHAR_MAX. */
@@ -209,12 +239,7 @@ mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps)
# else
- /* char32_t and wchar_t are equivalent.
- Two implementations are possible:
- - We can call the original mbrtoc32 (if it exists) and handle
- MBRTOC32_IN_C_LOCALE_MAYBE_EILSEQ.
- - We can call mbrtowc.
- The latter is simpler. */
+ /* char32_t and wchar_t are equivalent. Use mbrtowc(). */
wchar_t wc;
size_t ret = mbrtowc (&wc, s, n, ps);
if (ret < (size_t) -2 && pwc != NULL)
diff --git a/lib/mbsnrtoc32s.c b/lib/mbsnrtoc32s.c
index 7ba0415949..c0f6e1fb8d 100644
--- a/lib/mbsnrtoc32s.c
+++ b/lib/mbsnrtoc32s.c
@@ -22,7 +22,9 @@
#include <wchar.h>
-#if _GL_LARGE_CHAR32_T
+#if (HAVE_WORKING_MBRTOC32 && !defined __GLIBC__) || _GL_LARGE_CHAR32_T
+/* The char32_t encoding of a multibyte character may be different than its
+ wchar_t encoding, or char32_t is wider than wchar_t. */
/* For Cygwin >= 1.7 it would be possible to speed this up a bit by cutting
the source into chunks, calling mbsnrtowcs on a chunk, then u16_to_u32 on
diff --git a/lib/mbsrtoc32s.c b/lib/mbsrtoc32s.c
index 432ffaf08e..8887ddf74a 100644
--- a/lib/mbsrtoc32s.c
+++ b/lib/mbsrtoc32s.c
@@ -22,7 +22,9 @@
#include <wchar.h>
-#if _GL_LARGE_CHAR32_T
+#if (HAVE_WORKING_MBRTOC32 && !defined __GLIBC__) || _GL_LARGE_CHAR32_T
+/* The char32_t encoding of a multibyte character may be different than its
+ wchar_t encoding, or char32_t is wider than wchar_t. */
# include <errno.h>
# include <limits.h>
diff --git a/m4/mbrtoc32.m4 b/m4/mbrtoc32.m4
index 5039fc718c..3dee9006e1 100644
--- a/m4/mbrtoc32.m4
+++ b/m4/mbrtoc32.m4
@@ -1,4 +1,4 @@
-# mbrtoc32.m4 serial 1
+# mbrtoc32.m4 serial 2
dnl Copyright (C) 2014-2020 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
@@ -11,6 +11,8 @@ AC_DEFUN([gl_FUNC_MBRTOC32],
AC_REQUIRE([AC_TYPE_MBSTATE_T])
gl_MBSTATE_T_BROKEN
+ AC_REQUIRE([gl_MBRTOC32_SANITYCHECK])
+
AC_CHECK_FUNCS_ONCE([mbrtoc32])
if test $ac_cv_func_mbrtoc32 = no; then
HAVE_MBRTOC32=0
@@ -35,6 +37,9 @@ AC_DEFUN([gl_FUNC_MBRTOC32],
;;
esac
fi
+ if test $HAVE_WORKING_MBRTOC32 = 0; then
+ REPLACE_MBRTOC32=1
+ fi
fi
])
@@ -111,6 +116,101 @@ AC_DEFUN([gl_MBRTOC32_C_LOCALE],
])
])
+dnl Test whether mbrtoc32 works not worse than mbrtowc.
+dnl Result is HAVE_WORKING_MBRTOC32.
+
+AC_DEFUN([gl_MBRTOC32_SANITYCHECK],
+[
+ AC_REQUIRE([AC_PROG_CC])
+ AC_CHECK_FUNCS_ONCE([mbrtoc32])
+ AC_REQUIRE([gt_LOCALE_FR])
+ AC_REQUIRE([gt_LOCALE_ZH_CN])
+ AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
+ if test $ac_cv_func_mbrtoc32 = no; then
+ HAVE_WORKING_MBRTOC32=0
+ else
+ AC_CACHE_CHECK([whether mbrtoc32 works as well as mbrtowc],
+ [gl_cv_func_mbrtoc32_sanitycheck],
+ [
+ dnl Initial guess, used when cross-compiling or when no suitable locale
+ dnl is present.
+changequote(,)dnl
+ case "$host_os" in
+ # Guess no on Solaris, native Windows.
+ solaris* | mingw*) gl_cv_func_mbrtoc32_sanitycheck="guessing no" ;;
+ # Guess yes otherwise.
+ *) gl_cv_func_mbrtoc32_sanitycheck="guessing yes" ;;
+ esac
+changequote([,])dnl
+ if test $LOCALE_FR != none || test $LOCALE_ZH_CN != none; then
+ AC_RUN_IFELSE(
+ [AC_LANG_SOURCE([[
+#include <locale.h>
+#include <stdlib.h>
+#include <string.h>
+/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
+ <wchar.h>.
+ BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
+ included before <wchar.h>. */
+#include <stddef.h>
+#include <stdio.h>
+#include <time.h>
+#include <wchar.h>
+#include <uchar.h>
+int main ()
+{
+ int result = 0;
+ /* This fails on native Windows:
+ mbrtoc32 returns (size_t)-1.
+ mbrtowc returns 1 (correct). */
+ if (setlocale (LC_ALL, "$LOCALE_FR") != NULL)
+ {
+ mbstate_t state;
+ wchar_t wc = (wchar_t) 0xBADFACE;
+ memset (&state, '\0', sizeof (mbstate_t));
+ if (mbrtowc (&wc, "\374", 1, &state) == 1)
+ {
+ char32_t c32 = (wchar_t) 0xBADFACE;
+ memset (&state, '\0', sizeof (mbstate_t));
+ if (mbrtoc32 (&c32, "\374", 1, &state) != 1)
+ result |= 1;
+ }
+ }
+ /* This fails on Solaris 11.4:
+ mbrtoc32 returns (size_t)-1.
+ mbrtowc returns 4 (correct). */
+ if (setlocale (LC_ALL, "$LOCALE_ZH_CN") != NULL)
+ {
+ mbstate_t state;
+ wchar_t wc = (wchar_t) 0xBADFACE;
+ memset (&state, '\0', sizeof (mbstate_t));
+ if (mbrtowc (&wc, "\224\071\375\067", 4, &state) == 4)
+ {
+ char32_t c32 = (wchar_t) 0xBADFACE;
+ memset (&state, '\0', sizeof (mbstate_t));
+ if (mbrtoc32 (&c32, "\224\071\375\067", 4, &state) != 4)
+ result |= 2;
+ }
+ }
+ return result;
+}]])],
+ [gl_cv_func_mbrtoc32_sanitycheck=yes],
+ [gl_cv_func_mbrtoc32_sanitycheck=no],
+ [:])
+ fi
+ ])
+ case "$gl_cv_func_mbrtoc32_sanitycheck" in
+ *yes)
+ HAVE_WORKING_MBRTOC32=1
+ AC_DEFINE([HAVE_WORKING_MBRTOC32], [1],
+ [Define if the mbrtoc32 function basically works.])
+ ;;
+ *) HAVE_WORKING_MBRTOC32=0 ;;
+ esac
+ fi
+ AC_SUBST([HAVE_WORKING_MBRTOC32])
+])
+
# Prerequisites of lib/mbrtoc32.c and lib/lc-charset-dispatch.c.
AC_DEFUN([gl_PREREQ_MBRTOC32], [
:
diff --git a/modules/btoc32 b/modules/btoc32
index 5e5d4a94e2..caf36d346b 100644
--- a/modules/btoc32
+++ b/modules/btoc32
@@ -6,6 +6,7 @@ lib/btoc32.c
Depends-on:
uchar
+mbrtoc32
btowc
configure.ac:
diff --git a/modules/mbrtoc32 b/modules/mbrtoc32
index 2575394ce2..cf418464eb 100644
--- a/modules/mbrtoc32
+++ b/modules/mbrtoc32
@@ -18,6 +18,7 @@ m4/visibility.m4
Depends-on:
uchar
+hard-locale [{ test $HAVE_MBRTOC32 = 0 || test $REPLACE_MBRTOC32 = 1; } && test $REPLACE_MBSTATE_T = 0]
mbrtowc [{ test $HAVE_MBRTOC32 = 0 || test $REPLACE_MBRTOC32 = 1; } && test $REPLACE_MBSTATE_T = 0]
localcharset [test $HAVE_MBRTOC32 = 0 || test $REPLACE_MBRTOC32 = 1]
streq [test $HAVE_MBRTOC32 = 0 || test $REPLACE_MBRTOC32 = 1]
diff --git a/modules/mbsnrtoc32s b/modules/mbsnrtoc32s
index 44784d8076..ac464a8950 100644
--- a/modules/mbsnrtoc32s
+++ b/modules/mbsnrtoc32s
@@ -10,16 +10,14 @@ Depends-on:
uchar
wchar
verify
-mbrtoc32 [test $SMALL_WCHAR_T = 1]
-minmax [test $SMALL_WCHAR_T = 1]
-strnlen1 [test $SMALL_WCHAR_T = 1]
+mbrtoc32
+minmax
+strnlen1
mbsnrtowcs [test $SMALL_WCHAR_T = 0]
configure.ac:
AC_REQUIRE([gl_UCHAR_H])
-if test $SMALL_WCHAR_T = 1; then
- AC_LIBOBJ([mbsrtoc32s-state])
-fi
+AC_LIBOBJ([mbsrtoc32s-state])
gl_UCHAR_MODULE_INDICATOR([mbsnrtoc32s])
Makefile.am:
diff --git a/modules/mbsrtoc32s b/modules/mbsrtoc32s
index e7e5ee245b..64892cf2a7 100644
--- a/modules/mbsrtoc32s
+++ b/modules/mbsrtoc32s
@@ -10,15 +10,13 @@ Depends-on:
uchar
wchar
verify
-mbrtoc32 [test $SMALL_WCHAR_T = 1]
-strnlen1 [test $SMALL_WCHAR_T = 1]
+mbrtoc32
+strnlen1
mbsrtowcs [test $SMALL_WCHAR_T = 0]
configure.ac:
AC_REQUIRE([gl_UCHAR_H])
-if test $SMALL_WCHAR_T = 1; then
- AC_LIBOBJ([mbsrtoc32s-state])
-fi
+AC_LIBOBJ([mbsrtoc32s-state])
gl_UCHAR_MODULE_INDICATOR([mbsrtoc32s])
Makefile.am: