From 4b440d3568b01dd9acd5242bea8b63fc43428f5a Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Wed, 19 Apr 2023 02:14:09 +0200 Subject: wcscmp: Work around two ISO C compliance bugs on several platforms. * lib/wchar.in.h (wcscmp): Consider REPLACE_WCSCMP. * lib/wcscmp-impl.h (wcscmp): Don't assume that the two wide characters are in the range 0..INT_MAX. * m4/wcscmp.m4 (gl_FUNC_WCSCMP): Test whether wcscmp works for all wide characters. Set REPLACE_WCSCMP. * m4/wchar_h.m4 (gl_WCHAR_H_DEFAULTS): Initialize REPLACE_WCSCMP. * modules/wchar (Makefile.am): Substitute REPLACE_WCSCMP. * modules/wcscmp (Status, Notice): Un-obsolete this module. (configure.ac): Consider REPLACE_WCSCMP. * doc/posix-functions/wcscmp.texi: Mention the two bugs. --- ChangeLog | 14 ++++++++++ doc/posix-functions/wcscmp.texi | 8 ++++++ lib/wchar.in.h | 14 ++++++++-- lib/wcscmp-impl.h | 5 ++-- m4/wchar_h.m4 | 3 ++- m4/wcscmp.m4 | 58 ++++++++++++++++++++++++++++++++++++++++- modules/wchar | 1 + modules/wcscmp | 9 ++----- 8 files changed, 99 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index 17596c4b23..bc02f2d5f5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2023-04-18 Bruno Haible + + wcscmp: Work around two ISO C compliance bugs on several platforms. + * lib/wchar.in.h (wcscmp): Consider REPLACE_WCSCMP. + * lib/wcscmp-impl.h (wcscmp): Don't assume that the two wide characters + are in the range 0..INT_MAX. + * m4/wcscmp.m4 (gl_FUNC_WCSCMP): Test whether wcscmp works for all wide + characters. Set REPLACE_WCSCMP. + * m4/wchar_h.m4 (gl_WCHAR_H_DEFAULTS): Initialize REPLACE_WCSCMP. + * modules/wchar (Makefile.am): Substitute REPLACE_WCSCMP. + * modules/wcscmp (Status, Notice): Un-obsolete this module. + (configure.ac): Consider REPLACE_WCSCMP. + * doc/posix-functions/wcscmp.texi: Mention the two bugs. + 2023-04-18 Bruno Haible wmemcmp: Add tests. diff --git a/doc/posix-functions/wcscmp.texi b/doc/posix-functions/wcscmp.texi index 4c4de8c6dc..bc64d28f56 100644 --- a/doc/posix-functions/wcscmp.texi +++ b/doc/posix-functions/wcscmp.texi @@ -8,6 +8,14 @@ Gnulib module: wcscmp Portability problems fixed by Gnulib: @itemize +@item +This function compares the wide characters as if they were unsigned, although +@code{wchar_t} is signed, on some platforms: +glibc 2.14.1 on x86 or x86_64, musl libc 1.2.3, macOS 12.5, FreeBSD 13.2, NetBSD 9.0, OpenBSD 7.2, Solaris 11.4. +@item +This function may return a wrong result if the two arguments are of different +length, on some platforms: +AIX 7.2 in 64-bit mode. @end itemize Portability problems not fixed by Gnulib: diff --git a/lib/wchar.in.h b/lib/wchar.in.h index 6a5b18d39d..c347256368 100644 --- a/lib/wchar.in.h +++ b/lib/wchar.in.h @@ -938,11 +938,21 @@ _GL_WARN_ON_USE (wcsncat, "wcsncat is unportable - " /* Compare S1 and S2. */ #if @GNULIB_WCSCMP@ -# if !@HAVE_WCSCMP@ +# if @REPLACE_WCSCMP@ +# if !(defined __cplusplus && defined GNULIB_NAMESPACE) +# undef wcscmp +# define wcscmp rpl_wcscmp +# endif +_GL_FUNCDECL_RPL (wcscmp, int, (const wchar_t *s1, const wchar_t *s2) + _GL_ATTRIBUTE_PURE); +_GL_CXXALIAS_RPL (wcscmp, int, (const wchar_t *s1, const wchar_t *s2)); +# else +# if !@HAVE_WCSCMP@ _GL_FUNCDECL_SYS (wcscmp, int, (const wchar_t *s1, const wchar_t *s2) _GL_ATTRIBUTE_PURE); -# endif +# endif _GL_CXXALIAS_SYS (wcscmp, int, (const wchar_t *s1, const wchar_t *s2)); +# endif # if __GLIBC__ >= 2 _GL_CXXALIASWARN (wcscmp); # endif diff --git a/lib/wcscmp-impl.h b/lib/wcscmp-impl.h index ba96db4f77..bc17c75d94 100644 --- a/lib/wcscmp-impl.h +++ b/lib/wcscmp-impl.h @@ -24,8 +24,9 @@ wcscmp (const wchar_t *s1, const wchar_t *s2) wchar_t wc2 = *s2++; if (wc1 != (wchar_t)'\0' && wc1 == wc2) continue; - /* Note that wc1 and wc2 each have at most 31 bits. */ - return (int)wc1 - (int)wc2; + /* ISO C requires wcscmp to work with all wchar_t values. + We cannot assume that wc1 and wc2 are in the range 0..INT_MAX. */ + return _GL_CMP (wc1, wc2); /* > 0 if wc1 > wc2, < 0 if wc1 < wc2, = 0 if wc1 and wc2 are both '\0'. */ } diff --git a/m4/wchar_h.m4 b/m4/wchar_h.m4 index dfd154f8e9..b9fa7cec84 100644 --- a/m4/wchar_h.m4 +++ b/m4/wchar_h.m4 @@ -7,7 +7,7 @@ dnl with or without modifications, as long as this notice is preserved. dnl Written by Eric Blake. -# wchar_h.m4 serial 58 +# wchar_h.m4 serial 59 AC_DEFUN_ONCE([gl_WCHAR_H], [ @@ -253,6 +253,7 @@ AC_DEFUN([gl_WCHAR_H_DEFAULTS], REPLACE_WCWIDTH=0; AC_SUBST([REPLACE_WCWIDTH]) REPLACE_WCSWIDTH=0; AC_SUBST([REPLACE_WCSWIDTH]) REPLACE_WCSFTIME=0; AC_SUBST([REPLACE_WCSFTIME]) + REPLACE_WCSCMP=0; AC_SUBST([REPLACE_WCSCMP]) REPLACE_WCSSTR=0; AC_SUBST([REPLACE_WCSSTR]) REPLACE_WCSTOK=0; AC_SUBST([REPLACE_WCSTOK]) REPLACE_WMEMCMP=0; AC_SUBST([REPLACE_WMEMCMP]) diff --git a/m4/wcscmp.m4 b/m4/wcscmp.m4 index 61fd800e4d..a3000ed275 100644 --- a/m4/wcscmp.m4 +++ b/m4/wcscmp.m4 @@ -1,4 +1,4 @@ -# wcscmp.m4 serial 2 +# wcscmp.m4 serial 3 dnl Copyright (C) 2011-2023 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -10,5 +10,61 @@ AC_DEFUN([gl_FUNC_WCSCMP], AC_CHECK_FUNCS_ONCE([wcscmp]) if test $ac_cv_func_wcscmp = no; then HAVE_WCSCMP=0 + else + AC_CACHE_CHECK([whether wcscmp works for all wide characters], + [gl_cv_func_wcscmp_works], + [AC_RUN_IFELSE( + [AC_LANG_SOURCE([[ + #include + int main () + { + int result = 0; + { /* This test fails on glibc < 2.15, musl libc 1.2.3, macOS 12.5, + FreeBSD 13.2, NetBSD 9.0, OpenBSD 7.2, Solaris 11.4. */ + wchar_t a[2] = { (wchar_t) 0x76547654, 0 }; + wchar_t b[2] = { (wchar_t) 0x9abc9abc, 0 }; + int cmp = wcscmp (a, b); + if (!((wchar_t)-1 < 0 ? cmp > 0 : cmp < 0)) + result |= 1; + } + { /* This test fails on AIX in 64-bit mode. */ + wchar_t c[2] = { (wchar_t) 'x', 0 }; + wchar_t d[3] = { (wchar_t) 'x', (wchar_t) 0x9abc9abc, 0 }; + int cmp = wcscmp (c, d); + if (!((wchar_t)-1 < 0 ? cmp > 0 : cmp < 0)) + result |= 2; + } + return result; + } + ]]) + ], + [gl_cv_func_wcscmp_works=yes], + [gl_cv_func_wcscmp_works=no], + [case "$host_on" in + # Guess no on glibc versions < 2.15. + *-gnu* | gnu*) + AC_EGREP_CPP([Unlucky], + [ +#include +#ifdef __GNU_LIBRARY__ + #if (__GLIBC__ == 2 && __GLIBC_MINOR__ < 15) + Unlucky GNU user + #endif +#endif + ], + [gl_cv_func_wcscmp_works="guessing no"], + [gl_cv_func_wcscmp_works="guessing yes"]) + ;; + # Guess no on musl systems. + *-musl* | midipix*) gl_cv_func_wcscmp_works="guessing no" ;; + # If we don't know, obey --enable-cross-guesses. + *) gl_cv_func_wcscmp_works="$gl_cross_guess_normal" ;; + esac + ]) + ]) + case "$gl_cv_func_wcscmp_works" in + *yes) ;; + *) REPLACE_WCSCMP=1 ;; + esac fi ]) diff --git a/modules/wchar b/modules/wchar index 180c94309c..88b442b525 100644 --- a/modules/wchar +++ b/modules/wchar @@ -142,6 +142,7 @@ wchar.h: wchar.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H) -e 's|@''REPLACE_WCWIDTH''@|$(REPLACE_WCWIDTH)|g' \ -e 's|@''REPLACE_WCSWIDTH''@|$(REPLACE_WCSWIDTH)|g' \ -e 's|@''REPLACE_WCSFTIME''@|$(REPLACE_WCSFTIME)|g' \ + -e 's|@''REPLACE_WCSCMP''@|$(REPLACE_WCSCMP)|g' \ -e 's|@''REPLACE_WCSSTR''@|$(REPLACE_WCSSTR)|g' \ -e 's|@''REPLACE_WCSTOK''@|$(REPLACE_WCSTOK)|g' \ -e 's|@''REPLACE_WMEMCMP''@|$(REPLACE_WMEMCMP)|g' \ diff --git a/modules/wcscmp b/modules/wcscmp index 1b4fde704a..6df70f0543 100644 --- a/modules/wcscmp +++ b/modules/wcscmp @@ -1,12 +1,6 @@ Description: wcscmp() function: compare two wide strings. -Status: -obsolete - -Notice: -This module is obsolete. - Files: lib/wcscmp.c lib/wcscmp-impl.h @@ -17,7 +11,8 @@ wchar configure.ac: gl_FUNC_WCSCMP -gl_CONDITIONAL([GL_COND_OBJ_WCSCMP], [test $HAVE_WCSCMP = 0]) +gl_CONDITIONAL([GL_COND_OBJ_WCSCMP], + [test $HAVE_WCSCMP = 0 || test $REPLACE_WCSCMP = 1]) gl_WCHAR_MODULE_INDICATOR([wcscmp]) Makefile.am: -- cgit v1.2.1