summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog16
-rw-r--r--doc/posix-functions/c32rtomb.texi11
-rw-r--r--doc/posix-functions/wcrtomb.texi7
-rw-r--r--lib/c32rtomb.c124
-rw-r--r--lib/uchar.in.h25
-rw-r--r--m4/c32rtomb.m455
-rw-r--r--m4/uchar.m47
-rw-r--r--modules/c32rtomb32
-rw-r--r--modules/uchar3
-rw-r--r--tests/test-uchar-c++.cc5
10 files changed, 277 insertions, 8 deletions
diff --git a/ChangeLog b/ChangeLog
index 4b5a419562..3ad99ff912 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,21 @@
2020-01-08 Bruno Haible <bruno@clisp.org>
+ c32rtomb: New module.
+ * lib/uchar.in.h (c32rtomb): New declaration.
+ * lib/c32rtomb.c: New file, based on lib/unistr/u8-uctomb-aux.c.
+ * m4/c32rtomb.m4: New file.
+ * m4/uchar.m4 (gl_UCHAR_H): Test whether c32rtomb is declared.
+ (gl_UCHAR_H_DEFAULTS): Initialize GNULIB_C32RTOMB, HAVE_C32RTOMB,
+ REPLACE_C32RTOMB.
+ * modules/uchar (Makefile.am): Substitute GNULIB_C32RTOMB,
+ HAVE_C32RTOMB, REPLACE_C32RTOMB.
+ * modules/c32rtomb: New file.
+ * tests/test-uchar-c++.cc: Test the signature of c32rtomb.
+ * doc/posix-functions/c32rtomb.texi: Document the new module.
+ * doc/posix-functions/wcrtomb.texi: Mention the new module.
+
+2020-01-08 Bruno Haible <bruno@clisp.org>
+
mbrtoc32: Use the system's mbrtoc32 if it exists and basically works.
* m4/mbrtoc32.m4 (gl_MBRTOC32_SANITYCHECK): New macro.
(gl_FUNC_MBRTOC32): Require it. Set REPLACE_MBRTOC32 if mbrtoc32 exists
diff --git a/doc/posix-functions/c32rtomb.texi b/doc/posix-functions/c32rtomb.texi
index 392bbe97d5..4a1a617bab 100644
--- a/doc/posix-functions/c32rtomb.texi
+++ b/doc/posix-functions/c32rtomb.texi
@@ -2,15 +2,18 @@
@section @code{c32rtomb}
@findex c32rtomb
-Gnulib module: ---
+Gnulib module: c32rtomb
Portability problems fixed by Gnulib:
@itemize
+@item
+This function is missing on most non-glibc platforms:
+glibc 2.15, Mac OS X 10.5, FreeBSD 6.4, NetBSD 5.0, OpenBSD 3.8, Minix 3.1.8, AIX 7.1, HP-UX 11.31, IRIX 6.5, Solaris 11.3, Cygwin, mingw, MSVC 9, Android 4.4.
+@item
+This function returns 0 when the first argument is NULL in some locales on some platforms:
+AIX 7.2.
@end itemize
Portability problems not fixed by Gnulib:
@itemize
-@item
-This function is missing on most non-glibc platforms:
-glibc 2.15, Mac OS X 10.5, FreeBSD 6.4, NetBSD 5.0, OpenBSD 3.8, Minix 3.1.8, AIX 7.1, HP-UX 11.31, IRIX 6.5, Solaris 11.3, Cygwin, mingw, MSVC 9, Android 4.4.
@end itemize
diff --git a/doc/posix-functions/wcrtomb.texi b/doc/posix-functions/wcrtomb.texi
index 232bea4278..28b8dfe851 100644
--- a/doc/posix-functions/wcrtomb.texi
+++ b/doc/posix-functions/wcrtomb.texi
@@ -25,6 +25,9 @@ MSVC 14.
Portability problems not fixed by Gnulib:
@itemize
@item
-On Windows and 32-bit AIX platforms, @code{wchar_t} is a 16-bit type and therefore cannot
-accommodate all Unicode characters.
+On Windows and 32-bit AIX platforms, @code{wchar_t} is a 16-bit type and
+therefore cannot accommodate all Unicode characters.
+However, the ISO C11 function @code{c32rtomb}, provided by Gnulib module
+@code{c32rtomb}, operates on 32-bit wide characters and therefore does not have
+this limitation.
@end itemize
diff --git a/lib/c32rtomb.c b/lib/c32rtomb.c
new file mode 100644
index 0000000000..ba399295ad
--- /dev/null
+++ b/lib/c32rtomb.c
@@ -0,0 +1,124 @@
+/* Convert 32-bit wide character to multibyte character.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2020. */
+
+#include <config.h>
+
+/* Specification. */
+#include <uchar.h>
+
+#include <errno.h>
+#include <wchar.h>
+
+#include "localcharset.h"
+#include "streq.h"
+
+#ifndef FALLTHROUGH
+# if __GNUC__ < 7
+# define FALLTHROUGH ((void) 0)
+# else
+# define FALLTHROUGH __attribute__ ((__fallthrough__))
+# endif
+#endif
+
+size_t
+c32rtomb (char *s, char32_t wc, mbstate_t *ps)
+#undef c32rtomb
+{
+#if HAVE_WORKING_MBRTOC32
+
+# if C32RTOMB_RETVAL_BUG
+ if (s == NULL)
+ /* We know the NUL wide character corresponds to the NUL character. */
+ return 1;
+# endif
+
+ return c32rtomb (s, wc, ps);
+
+#elif _GL_LARGE_CHAR32_T
+
+ if (s == NULL)
+ return wcrtomb (NULL, 0, ps);
+ else
+ {
+ /* Special-case all encodings that may produce wide character values
+ > WCHAR_MAX. */
+ const char *encoding = locale_charset ();
+ if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
+ {
+ /* Special-case the UTF-8 encoding. Assume that the wide-character
+ encoding in a UTF-8 locale is UCS-2 or, equivalently, UTF-16. */
+ if (wc < 0x80)
+ {
+ s[0] = (unsigned char) wc;
+ return 1;
+ }
+ else
+ {
+ int count;
+
+ if (wc < 0x800)
+ count = 2;
+ else if (wc < 0x10000)
+ {
+ if (wc < 0xd800 || wc >= 0xe000)
+ count = 3;
+ else
+ {
+ errno = EILSEQ;
+ return (size_t)(-1);
+ }
+ }
+ else if (wc < 0x110000)
+ count = 4;
+ else
+ {
+ errno = EILSEQ;
+ return (size_t)(-1);
+ }
+
+ switch (count) /* note: code falls through cases! */
+ {
+ case 4: s[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000;
+ FALLTHROUGH;
+ case 3: s[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800;
+ FALLTHROUGH;
+ case 2: s[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0;
+ /*case 1:*/ s[0] = wc;
+ }
+ return count;
+ }
+ }
+ else
+ {
+ if ((wchar_t) wc == wc)
+ return wcrtomb (s, (wchar_t) wc, ps);
+ else
+ {
+ errno = EILSEQ;
+ return (size_t)(-1);
+ }
+ }
+ }
+
+#else
+
+ /* char32_t and wchar_t are equivalent. */
+ return wcrtomb (s, (wchar_t) wc, ps);
+
+#endif
+}
diff --git a/lib/uchar.in.h b/lib/uchar.in.h
index 513fa8c0ba..dbbfc30bc3 100644
--- a/lib/uchar.in.h
+++ b/lib/uchar.in.h
@@ -68,6 +68,31 @@ _GL_CXXALIASWARN (btoc32);
#endif
+/* Converts a 32-bit wide character to a multibyte character. */
+#if @GNULIB_C32RTOMB@
+# if @REPLACE_C32RTOMB@
+# if !(defined __cplusplus && defined GNULIB_NAMESPACE)
+# undef c32rtomb
+# define c32rtomb rpl_c32rtomb
+# endif
+_GL_FUNCDECL_RPL (c32rtomb, size_t, (char *s, char32_t wc, mbstate_t *ps));
+_GL_CXXALIAS_RPL (c32rtomb, size_t, (char *s, char32_t wc, mbstate_t *ps));
+# else
+# if !@HAVE_C32RTOMB@
+_GL_FUNCDECL_SYS (c32rtomb, size_t, (char *s, char32_t wc, mbstate_t *ps));
+# endif
+_GL_CXXALIAS_SYS (c32rtomb, size_t, (char *s, char32_t wc, mbstate_t *ps));
+# endif
+_GL_CXXALIASWARN (c32rtomb);
+#elif defined GNULIB_POSIXCHECK
+# undef c32rtomb
+# if HAVE_RAW_DECL_C32RTOMB
+_GL_WARN_ON_USE (mbrtoc32, "c32rtomb is not portable - "
+ "use gnulib module c32rtomb for portability");
+# endif
+#endif
+
+
/* Converts a 32-bit wide character to unibyte character.
Returns the single-byte representation of WC if it exists,
or EOF otherwise. */
diff --git a/m4/c32rtomb.m4 b/m4/c32rtomb.m4
new file mode 100644
index 0000000000..4cf0e4deda
--- /dev/null
+++ b/m4/c32rtomb.m4
@@ -0,0 +1,55 @@
+# c32rtomb.m4 serial 1
+dnl Copyright (C) 2020 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+AC_DEFUN([gl_FUNC_C32RTOMB],
+[
+ AC_REQUIRE([gl_UCHAR_H_DEFAULTS])
+
+ AC_REQUIRE([gl_MBRTOC32_SANITYCHECK])
+
+ AC_CHECK_FUNCS_ONCE([c32rtomb])
+ if test $ac_cv_func_c32rtomb = no; then
+ HAVE_C32RTOMB=0
+ else
+ dnl When we override mbrtoc32, redefining the meaning of the char32_t
+ dnl values, we need to override c32rtomb as well, for consistency.
+ if test $HAVE_WORKING_MBRTOC32 = 0; then
+ REPLACE_C32RTOMB=1
+ fi
+ AC_CACHE_CHECK([whether c32rtomb return value is correct],
+ [gl_cv_func_c32rtomb_retval],
+ [
+ dnl Initial guess, used when cross-compiling.
+changequote(,)dnl
+ case "$host_os" in
+ # Guess no on AIX.
+ aix*) gl_cv_func_c32rtomb_retval="guessing no" ;;
+ # Guess yes otherwise.
+ *) gl_cv_func_c32rtomb_retval="guessing yes" ;;
+ esac
+changequote([,])dnl
+ AC_RUN_IFELSE(
+ [AC_LANG_SOURCE([[
+#include <uchar.h>
+int main ()
+{
+ int result = 0;
+ if (c32rtomb (NULL, 0, NULL) != 1)
+ result |= 1;
+ return result;
+}]])],
+ [gl_cv_func_c32rtomb_retval=yes],
+ [gl_cv_func_c32rtomb_retval=no],
+ [:])
+ ])
+ case "$gl_cv_func_c32rtomb_retval" in
+ *yes) ;;
+ *) AC_DEFINE([C32RTOMB_RETVAL_BUG], [1],
+ [Define if the wcrtomb function has an incorrect return value.])
+ REPLACE_C32RTOMB=1 ;;
+ esac
+ fi
+])
diff --git a/m4/uchar.m4 b/m4/uchar.m4
index 0b5c662953..be71196619 100644
--- a/m4/uchar.m4
+++ b/m4/uchar.m4
@@ -1,4 +1,4 @@
-# uchar.m4 serial 8
+# uchar.m4 serial 9
dnl Copyright (C) 2019-2020 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
@@ -33,7 +33,7 @@ AC_DEFUN_ONCE([gl_UCHAR_H],
dnl corresponding gnulib module is not in use, and which is not
dnl guaranteed by C11.
gl_WARN_ON_USE_PREPARE([[#include <uchar.h>
- ]], [mbrtoc32])
+ ]], [c32rtomb mbrtoc32])
])
AC_DEFUN([gl_UCHAR_MODULE_INDICATOR],
@@ -48,12 +48,15 @@ AC_DEFUN([gl_UCHAR_MODULE_INDICATOR],
AC_DEFUN([gl_UCHAR_H_DEFAULTS],
[
GNULIB_BTOC32=0; AC_SUBST([GNULIB_BTOC32])
+ GNULIB_C32RTOMB=0; AC_SUBST([GNULIB_C32RTOMB])
GNULIB_C32TOB=0; AC_SUBST([GNULIB_C32TOB])
GNULIB_MBRTOC32=0; AC_SUBST([GNULIB_MBRTOC32])
GNULIB_MBSNRTOC32S=0; AC_SUBST([GNULIB_MBSNRTOC32S])
GNULIB_MBSRTOC32S=0; AC_SUBST([GNULIB_MBSRTOC32S])
GNULIB_MBSTOC32S=0; AC_SUBST([GNULIB_MBSTOC32S])
dnl Assume proper GNU behavior unless another module says otherwise.
+ HAVE_C32RTOMB=1; AC_SUBST([HAVE_C32RTOMB])
HAVE_MBRTOC32=1; AC_SUBST([HAVE_MBRTOC32])
+ REPLACE_C32RTOMB=0; AC_SUBST([REPLACE_C32RTOMB])
REPLACE_MBRTOC32=0; AC_SUBST([REPLACE_MBRTOC32])
])
diff --git a/modules/c32rtomb b/modules/c32rtomb
new file mode 100644
index 0000000000..ea227dfd5d
--- /dev/null
+++ b/modules/c32rtomb
@@ -0,0 +1,32 @@
+Description:
+c32rtomb() function: convert 32-bit wide character to multibyte character.
+
+Files:
+lib/c32rtomb.c
+m4/c32rtomb.m4
+m4/mbrtoc32.m4
+
+Depends-on:
+uchar
+wchar [test $HAVE_C32RTOMB = 0 || test $REPLACE_C32RTOMB = 1]
+wcrtomb [test $HAVE_C32RTOMB = 0 || test $REPLACE_C32RTOMB = 1]
+localcharset [{ test $HAVE_C32RTOMB = 0 || test $REPLACE_C32RTOMB = 1; } && test $SMALL_WCHAR_T = 1]
+streq [{ test $HAVE_C32RTOMB = 0 || test $REPLACE_C32RTOMB = 1; } && test $SMALL_WCHAR_T = 1]
+
+configure.ac:
+gl_FUNC_C32RTOMB
+if test $HAVE_C32RTOMB = 0 || test $REPLACE_C32RTOMB = 1; then
+ AC_LIBOBJ([c32rtomb])
+fi
+gl_UCHAR_MODULE_INDICATOR([c32rtomb])
+
+Makefile.am:
+
+Include:
+<uchar.h>
+
+License:
+LGPLv2+
+
+Maintainer:
+Bruno Haible
diff --git a/modules/uchar b/modules/uchar
index 29bc7ae3ea..cab45181a0 100644
--- a/modules/uchar
+++ b/modules/uchar
@@ -29,12 +29,15 @@ uchar.h: uchar.in.h $(top_builddir)/config.status $(CXXDEFS_H)
-e 's|@''NEXT_UCHAR_H''@|$(NEXT_UCHAR_H)|g' \
-e 's|@''SMALL_WCHAR_T''@|$(SMALL_WCHAR_T)|g' \
-e 's/@''GNULIB_BTOC32''@/$(GNULIB_BTOC32)/g' \
+ -e 's/@''GNULIB_C32RTOMB''@/$(GNULIB_C32RTOMB)/g' \
-e 's/@''GNULIB_C32TOB''@/$(GNULIB_C32TOB)/g' \
-e 's/@''GNULIB_MBRTOC32''@/$(GNULIB_MBRTOC32)/g' \
-e 's/@''GNULIB_MBSNRTOC32S''@/$(GNULIB_MBSNRTOC32S)/g' \
-e 's/@''GNULIB_MBSRTOC32S''@/$(GNULIB_MBSRTOC32S)/g' \
-e 's/@''GNULIB_MBSTOC32S''@/$(GNULIB_MBSTOC32S)/g' \
+ -e 's|@''HAVE_C32RTOMB''@|$(HAVE_C32RTOMB)|g' \
-e 's|@''HAVE_MBRTOC32''@|$(HAVE_MBRTOC32)|g' \
+ -e 's|@''REPLACE_C32RTOMB''@|$(REPLACE_C32RTOMB)|g' \
-e 's|@''REPLACE_MBRTOC32''@|$(REPLACE_MBRTOC32)|g' \
-e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \
< $(srcdir)/uchar.in.h; \
diff --git a/tests/test-uchar-c++.cc b/tests/test-uchar-c++.cc
index 3e71c89c82..ed45da2e83 100644
--- a/tests/test-uchar-c++.cc
+++ b/tests/test-uchar-c++.cc
@@ -28,6 +28,11 @@
SIGNATURE_CHECK (GNULIB_NAMESPACE::btoc32, wint_t, (int));
#endif
+#if GNULIB_TEST_C32RTOMB
+SIGNATURE_CHECK (GNULIB_NAMESPACE::c32rtomb, size_t,
+ (char *, char32_t , mbstate_t *));
+#endif
+
#if GNULIB_TEST_C32TOB
SIGNATURE_CHECK (GNULIB_NAMESPACE::c32tob, int, (wint_t));
#endif