summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2018-06-24 16:19:31 +0200
committerBruno Haible <bruno@clisp.org>2018-06-24 16:19:31 +0200
commit7903cdbf6022f3f28a94d484926672e865311905 (patch)
treeeeb583547973a92c3daa3562466d7ce0c23024ae
parent58ee14c1ec71f63018db2e8f2d0e3d9cde4f3019 (diff)
downloadgnulib-7903cdbf6022f3f28a94d484926672e865311905.tar.gz
mbrtowc, wcwidth: Fix MT-safety bug (regression from 2018-06-23).
* lib/mbrtowc.c (enc_t): New enum type. (locale_enc, locale_enc_cached): New functions. (mbrtowc): Eliminate static variables. Use locale_enc_cached instead. * lib/wcwidth.c (is_locale_utf8, is_locale_utf8_cached): New functions. (wcwidth): Eliminate static variables. Use is_locale_utf8_cached instead. * m4/mbrtowc.m4 (gl_PREREQ_MBRTOWC): Require AC_C_INLINE. * m4/wcwidth.m4 (gl_PREREQ_WCWIDTH): New macro. * modules/wcwidth (configure.ac): Invoke it.
-rw-r--r--ChangeLog13
-rw-r--r--lib/mbrtowc.c104
-rw-r--r--lib/wcwidth.c38
-rw-r--r--m4/mbrtowc.m43
-rw-r--r--m4/wcwidth.m48
-rw-r--r--modules/wcwidth1
6 files changed, 117 insertions, 50 deletions
diff --git a/ChangeLog b/ChangeLog
index f5ea912415..125c66b1bf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,18 @@
2018-06-24 Bruno Haible <bruno@clisp.org>
+ mbrtowc, wcwidth: Fix MT-safety bug (regression from 2018-06-23).
+ * lib/mbrtowc.c (enc_t): New enum type.
+ (locale_enc, locale_enc_cached): New functions.
+ (mbrtowc): Eliminate static variables. Use locale_enc_cached instead.
+ * lib/wcwidth.c (is_locale_utf8, is_locale_utf8_cached): New functions.
+ (wcwidth): Eliminate static variables. Use is_locale_utf8_cached
+ instead.
+ * m4/mbrtowc.m4 (gl_PREREQ_MBRTOWC): Require AC_C_INLINE.
+ * m4/wcwidth.m4 (gl_PREREQ_WCWIDTH): New macro.
+ * modules/wcwidth (configure.ac): Invoke it.
+
+2018-06-24 Bruno Haible <bruno@clisp.org>
+
wchar-single: Fix test failure in wcwidth tests.
* tests/test-wcwidth.c (main): If the wchar-single module is present,
skip the tests in the C locale.
diff --git a/lib/mbrtowc.c b/lib/mbrtowc.c
index dc3597f29d..2f6df287a5 100644
--- a/lib/mbrtowc.c
+++ b/lib/mbrtowc.c
@@ -35,12 +35,60 @@
# include "streq.h"
# include "verify.h"
-#ifndef FALLTHROUGH
-# if __GNUC__ < 7
-# define FALLTHROUGH ((void) 0)
-# else
-# define FALLTHROUGH __attribute__ ((__fallthrough__))
+# ifndef FALLTHROUGH
+# if __GNUC__ < 7
+# define FALLTHROUGH ((void) 0)
+# else
+# define FALLTHROUGH __attribute__ ((__fallthrough__))
+# endif
# endif
+
+/* Returns a classification of special values of the encoding of the current
+ locale. */
+typedef enum {
+ enc_other, /* other */
+ enc_utf8, /* UTF-8 */
+ enc_eucjp, /* EUC-JP */
+ enc_94, /* EUC-KR, GB2312, BIG5 */
+ enc_euctw, /* EUC-TW */
+ enc_gb18030, /* GB18030 */
+ enc_sjis /* SJIS */
+} enc_t;
+static inline enc_t
+locale_enc (void)
+{
+ const char *encoding = locale_charset ();
+ if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
+ return enc_utf8;
+ if (STREQ_OPT (encoding, "EUC-JP", 'E', 'U', 'C', '-', 'J', 'P', 0, 0, 0))
+ return enc_eucjp;
+ if (STREQ_OPT (encoding, "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0)
+ || STREQ_OPT (encoding, "GB2312", 'G', 'B', '2', '3', '1', '2', 0, 0, 0)
+ || STREQ_OPT (encoding, "BIG5", 'B', 'I', 'G', '5', 0, 0, 0, 0, 0))
+ return enc_94;
+ if (STREQ_OPT (encoding, "EUC-TW", 'E', 'U', 'C', '-', 'T', 'W', 0, 0, 0))
+ return enc_euctw;
+ if (STREQ_OPT (encoding, "GB18030", 'G', 'B', '1', '8', '0', '3', '0', 0, 0))
+ return enc_gb18030;
+ if (STREQ_OPT (encoding, "SJIS", 'S', 'J', 'I', 'S', 0, 0, 0, 0, 0))
+ return enc_sjis;
+ return enc_other;
+}
+
+#if GNULIB_WCHAR_SINGLE
+/* When we know that the locale does not change, provide a speedup by
+ caching the value of locale_enc. */
+static int cached_locale_enc = -1;
+static inline enc_t
+locale_enc_cached (void)
+{
+ if (cached_locale_enc < 0)
+ cached_locale_enc = locale_enc ();
+ return cached_locale_enc;
+}
+#else
+/* By default, don't make assumptions, hence no caching. */
+# define locale_enc_cached locale_enc
#endif
verify (sizeof (mbstate_t) >= 4);
@@ -137,20 +185,9 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
if (m >= 4 || m >= MB_CUR_MAX)
goto invalid;
/* Here MB_CUR_MAX > 1 and 0 < m < 4. */
- {
- static int utf8_charset = -1;
- static const char *encoding;
-
-# if GNULIB_WCHAR_SINGLE
- if (utf8_charset == -1)
-# endif
- {
- encoding = locale_charset ();
- utf8_charset = STREQ_OPT (encoding,
- "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0 ,0);
- }
-
- if (utf8_charset)
+ switch (locale_enc_cached ())
+ {
+ case enc_utf8: /* UTF-8 */
{
/* Cf. unistr/u8-mblen.c. */
unsigned char c = (unsigned char) p[0];
@@ -207,8 +244,7 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
/* As a reference for this code, you can use the GNU libiconv
implementation. Look for uses of the RET_TOOFEW macro. */
- if (STREQ_OPT (encoding,
- "EUC-JP", 'E', 'U', 'C', '-', 'J', 'P', 0, 0, 0))
+ case enc_eucjp: /* EUC-JP */
{
if (m == 1)
{
@@ -231,12 +267,8 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
}
goto invalid;
}
- if (STREQ_OPT (encoding,
- "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0)
- || STREQ_OPT (encoding,
- "GB2312", 'G', 'B', '2', '3', '1', '2', 0, 0, 0)
- || STREQ_OPT (encoding,
- "BIG5", 'B', 'I', 'G', '5', 0, 0, 0, 0, 0))
+
+ case enc_94: /* EUC-KR, GB2312, BIG5 */
{
if (m == 1)
{
@@ -247,8 +279,8 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
}
goto invalid;
}
- if (STREQ_OPT (encoding,
- "EUC-TW", 'E', 'U', 'C', '-', 'T', 'W', 0, 0, 0))
+
+ case enc_euctw: /* EUC-TW */
{
if (m == 1)
{
@@ -266,8 +298,8 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
}
goto invalid;
}
- if (STREQ_OPT (encoding,
- "GB18030", 'G', 'B', '1', '8', '0', '3', '0', 0, 0))
+
+ case enc_gb18030: /* GB18030 */
{
if (m == 1)
{
@@ -300,7 +332,8 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
}
goto invalid;
}
- if (STREQ_OPT (encoding, "SJIS", 'S', 'J', 'I', 'S', 0, 0, 0, 0, 0))
+
+ case enc_sjis: /* SJIS */
{
if (m == 1)
{
@@ -313,9 +346,10 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
goto invalid;
}
- /* An unknown multibyte encoding. */
- goto incomplete;
- }
+ default:
+ /* An unknown multibyte encoding. */
+ goto incomplete;
+ }
incomplete:
{
diff --git a/lib/wcwidth.c b/lib/wcwidth.c
index d03a50f3eb..d33b6a9a55 100644
--- a/lib/wcwidth.c
+++ b/lib/wcwidth.c
@@ -26,28 +26,40 @@
#include "streq.h"
#include "uniwidth.h"
-int
-wcwidth (wchar_t wc)
-#undef wcwidth
+/* Returns 1 if the current locale is an UTF-8 locale, 0 otherwise. */
+static inline int
+is_locale_utf8 (void)
{
- static int utf8_charset = -1;
- static const char *encoding;
+ const char *encoding = locale_charset ();
+ return STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0);
+}
#if GNULIB_WCHAR_SINGLE
- if (utf8_charset == -1)
+/* When we know that the locale does not change, provide a speedup by
+ caching the value of is_locale_utf8. */
+static int cached_is_locale_utf8 = -1;
+static inline int
+is_locale_utf8_cached (void)
+{
+ if (cached_is_locale_utf8 < 0)
+ cached_is_locale_utf8 = is_locale_utf8 ();
+ return cached_is_locale_utf8;
+}
+#else
+/* By default, don't make assumptions, hence no caching. */
+# define is_locale_utf8_cached is_locale_utf8
#endif
- {
- encoding = locale_charset ();
- utf8_charset = STREQ_OPT (encoding,
- "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0 ,0);
- }
+int
+wcwidth (wchar_t wc)
+#undef wcwidth
+{
/* In UTF-8 locales, use a Unicode aware width function. */
- if (utf8_charset)
+ if (is_locale_utf8_cached ())
{
/* We assume that in a UTF-8 locale, a wide character is the same as a
Unicode character. */
- return uc_width (wc, encoding);
+ return uc_width (wc, "UTF-8");
}
else
{
diff --git a/m4/mbrtowc.m4 b/m4/mbrtowc.m4
index f789875eb4..c706d04bca 100644
--- a/m4/mbrtowc.m4
+++ b/m4/mbrtowc.m4
@@ -1,4 +1,4 @@
-# mbrtowc.m4 serial 30 -*- coding: utf-8 -*-
+# mbrtowc.m4 serial 31 -*- coding: utf-8 -*-
dnl Copyright (C) 2001-2002, 2004-2005, 2008-2018 Free Software Foundation,
dnl Inc.
dnl This file is free software; the Free Software Foundation
@@ -634,6 +634,7 @@ AC_DEFUN([gl_MBRTOWC_C_LOCALE],
# Prerequisites of lib/mbrtowc.c.
AC_DEFUN([gl_PREREQ_MBRTOWC], [
+ AC_REQUIRE([AC_C_INLINE])
:
])
diff --git a/m4/wcwidth.m4 b/m4/wcwidth.m4
index 0605ce8c90..1cd489b667 100644
--- a/m4/wcwidth.m4
+++ b/m4/wcwidth.m4
@@ -1,4 +1,4 @@
-# wcwidth.m4 serial 26
+# wcwidth.m4 serial 27
dnl Copyright (C) 2006-2018 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
@@ -115,3 +115,9 @@ changequote([,])dnl
dnl We don't substitute HAVE_WCWIDTH. We assume that if the system does not
dnl have the wcwidth function, then it does not declare it.
])
+
+# Prerequisites of lib/wcwidth.c.
+AC_DEFUN([gl_PREREQ_WCWIDTH], [
+ AC_REQUIRE([AC_C_INLINE])
+ :
+])
diff --git a/modules/wcwidth b/modules/wcwidth
index 5a27713dcc..372c21041a 100644
--- a/modules/wcwidth
+++ b/modules/wcwidth
@@ -19,6 +19,7 @@ configure.ac:
gl_FUNC_WCWIDTH
if test $HAVE_WCWIDTH = 0 || test $REPLACE_WCWIDTH = 1; then
AC_LIBOBJ([wcwidth])
+ gl_PREREQ_WCWIDTH
fi
gl_WCHAR_MODULE_INDICATOR([wcwidth])