diff options
author | Bruno Haible <bruno@clisp.org> | 2018-06-24 16:19:31 +0200 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2018-06-24 16:19:31 +0200 |
commit | 7903cdbf6022f3f28a94d484926672e865311905 (patch) | |
tree | eeb583547973a92c3daa3562466d7ce0c23024ae /lib/mbrtowc.c | |
parent | 58ee14c1ec71f63018db2e8f2d0e3d9cde4f3019 (diff) | |
download | gnulib-7903cdbf6022f3f28a94d484926672e865311905.tar.gz |
mbrtowc, wcwidth: Fix MT-safety bug (regression from 2018-06-23).
* lib/mbrtowc.c (enc_t): New enum type.
(locale_enc, locale_enc_cached): New functions.
(mbrtowc): Eliminate static variables. Use locale_enc_cached instead.
* lib/wcwidth.c (is_locale_utf8, is_locale_utf8_cached): New functions.
(wcwidth): Eliminate static variables. Use is_locale_utf8_cached
instead.
* m4/mbrtowc.m4 (gl_PREREQ_MBRTOWC): Require AC_C_INLINE.
* m4/wcwidth.m4 (gl_PREREQ_WCWIDTH): New macro.
* modules/wcwidth (configure.ac): Invoke it.
Diffstat (limited to 'lib/mbrtowc.c')
-rw-r--r-- | lib/mbrtowc.c | 104 |
1 files changed, 69 insertions, 35 deletions
diff --git a/lib/mbrtowc.c b/lib/mbrtowc.c index dc3597f29d..2f6df287a5 100644 --- a/lib/mbrtowc.c +++ b/lib/mbrtowc.c @@ -35,12 +35,60 @@ # include "streq.h" # include "verify.h" -#ifndef FALLTHROUGH -# if __GNUC__ < 7 -# define FALLTHROUGH ((void) 0) -# else -# define FALLTHROUGH __attribute__ ((__fallthrough__)) +# ifndef FALLTHROUGH +# if __GNUC__ < 7 +# define FALLTHROUGH ((void) 0) +# else +# define FALLTHROUGH __attribute__ ((__fallthrough__)) +# endif # endif + +/* Returns a classification of special values of the encoding of the current + locale. */ +typedef enum { + enc_other, /* other */ + enc_utf8, /* UTF-8 */ + enc_eucjp, /* EUC-JP */ + enc_94, /* EUC-KR, GB2312, BIG5 */ + enc_euctw, /* EUC-TW */ + enc_gb18030, /* GB18030 */ + enc_sjis /* SJIS */ +} enc_t; +static inline enc_t +locale_enc (void) +{ + const char *encoding = locale_charset (); + if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0)) + return enc_utf8; + if (STREQ_OPT (encoding, "EUC-JP", 'E', 'U', 'C', '-', 'J', 'P', 0, 0, 0)) + return enc_eucjp; + if (STREQ_OPT (encoding, "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0) + || STREQ_OPT (encoding, "GB2312", 'G', 'B', '2', '3', '1', '2', 0, 0, 0) + || STREQ_OPT (encoding, "BIG5", 'B', 'I', 'G', '5', 0, 0, 0, 0, 0)) + return enc_94; + if (STREQ_OPT (encoding, "EUC-TW", 'E', 'U', 'C', '-', 'T', 'W', 0, 0, 0)) + return enc_euctw; + if (STREQ_OPT (encoding, "GB18030", 'G', 'B', '1', '8', '0', '3', '0', 0, 0)) + return enc_gb18030; + if (STREQ_OPT (encoding, "SJIS", 'S', 'J', 'I', 'S', 0, 0, 0, 0, 0)) + return enc_sjis; + return enc_other; +} + +#if GNULIB_WCHAR_SINGLE +/* When we know that the locale does not change, provide a speedup by + caching the value of locale_enc. */ +static int cached_locale_enc = -1; +static inline enc_t +locale_enc_cached (void) +{ + if (cached_locale_enc < 0) + cached_locale_enc = locale_enc (); + return cached_locale_enc; +} +#else +/* By default, don't make assumptions, hence no caching. */ +# define locale_enc_cached locale_enc #endif verify (sizeof (mbstate_t) >= 4); @@ -137,20 +185,9 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) if (m >= 4 || m >= MB_CUR_MAX) goto invalid; /* Here MB_CUR_MAX > 1 and 0 < m < 4. */ - { - static int utf8_charset = -1; - static const char *encoding; - -# if GNULIB_WCHAR_SINGLE - if (utf8_charset == -1) -# endif - { - encoding = locale_charset (); - utf8_charset = STREQ_OPT (encoding, - "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0 ,0); - } - - if (utf8_charset) + switch (locale_enc_cached ()) + { + case enc_utf8: /* UTF-8 */ { /* Cf. unistr/u8-mblen.c. */ unsigned char c = (unsigned char) p[0]; @@ -207,8 +244,7 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) /* As a reference for this code, you can use the GNU libiconv implementation. Look for uses of the RET_TOOFEW macro. */ - if (STREQ_OPT (encoding, - "EUC-JP", 'E', 'U', 'C', '-', 'J', 'P', 0, 0, 0)) + case enc_eucjp: /* EUC-JP */ { if (m == 1) { @@ -231,12 +267,8 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) } goto invalid; } - if (STREQ_OPT (encoding, - "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0) - || STREQ_OPT (encoding, - "GB2312", 'G', 'B', '2', '3', '1', '2', 0, 0, 0) - || STREQ_OPT (encoding, - "BIG5", 'B', 'I', 'G', '5', 0, 0, 0, 0, 0)) + + case enc_94: /* EUC-KR, GB2312, BIG5 */ { if (m == 1) { @@ -247,8 +279,8 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) } goto invalid; } - if (STREQ_OPT (encoding, - "EUC-TW", 'E', 'U', 'C', '-', 'T', 'W', 0, 0, 0)) + + case enc_euctw: /* EUC-TW */ { if (m == 1) { @@ -266,8 +298,8 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) } goto invalid; } - if (STREQ_OPT (encoding, - "GB18030", 'G', 'B', '1', '8', '0', '3', '0', 0, 0)) + + case enc_gb18030: /* GB18030 */ { if (m == 1) { @@ -300,7 +332,8 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) } goto invalid; } - if (STREQ_OPT (encoding, "SJIS", 'S', 'J', 'I', 'S', 0, 0, 0, 0, 0)) + + case enc_sjis: /* SJIS */ { if (m == 1) { @@ -313,9 +346,10 @@ mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) goto invalid; } - /* An unknown multibyte encoding. */ - goto incomplete; - } + default: + /* An unknown multibyte encoding. */ + goto incomplete; + } incomplete: { |