/* Test of conversion of string to 32-bit wide string. Copyright (C) 2008-2023 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* Written by Bruno Haible , 2008. */ #include #include #include "signature.h" SIGNATURE_CHECK (mbstoc32s, size_t, (char32_t *, const char *, size_t)); #include #include #include #include "macros.h" int main (int argc, char *argv[]) { char32_t wc; size_t ret; /* configure should already have checked that the locale is supported. */ if (setlocale (LC_ALL, "") == NULL) return 1; /* Test NUL byte input. */ { const char *src; src = ""; ret = mbstoc32s (NULL, src, 0); ASSERT (ret == 0); src = ""; ret = mbstoc32s (NULL, src, 1); ASSERT (ret == 0); wc = (char32_t) 0xBADFACE; src = ""; ret = mbstoc32s (&wc, src, 0); ASSERT (ret == 0); ASSERT (wc == (char32_t) 0xBADFACE); wc = (char32_t) 0xBADFACE; src = ""; ret = mbstoc32s (&wc, src, 1); ASSERT (ret == 0); ASSERT (wc == 0); } #ifdef __ANDROID__ /* On Android ≥ 5.0, the default locale is the "C.UTF-8" locale, not the "C" locale. Furthermore, when you attempt to set the "C" or "POSIX" locale via setlocale(), what you get is a "C" locale with UTF-8 encoding, that is, effectively the "C.UTF-8" locale. */ if (argc > 1 && strcmp (argv[1], "5") == 0 && MB_CUR_MAX > 1) argv[1] = "2"; #endif if (argc > 1) { int unlimited; for (unlimited = 0; unlimited < 2; unlimited++) { #define BUFSIZE 10 char32_t buf[BUFSIZE]; const char *src; { size_t i; for (i = 0; i < BUFSIZE; i++) buf[i] = (char32_t) 0xBADFACE; } switch (argv[1][0]) { case '1': /* Locale encoding is ISO-8859-1 or ISO-8859-15. */ { char input[] = "B\374\337er"; /* "Büßer" */ wc = (char32_t) 0xBADFACE; ret = mbstoc32s (&wc, input, 1); ASSERT (ret == 1); ASSERT (wc == 'B'); input[0] = '\0'; wc = (char32_t) 0xBADFACE; ret = mbstoc32s (&wc, input + 1, 1); ASSERT (ret == 1); ASSERT (c32tob (wc) == (unsigned char) '\374'); input[1] = '\0'; src = input + 2; ret = mbstoc32s (NULL, src, unlimited ? BUFSIZE : 1); ASSERT (ret == 3); src = input + 2; ret = mbstoc32s (buf, src, unlimited ? BUFSIZE : 1); ASSERT (ret == (unlimited ? 3 : 1)); ASSERT (c32tob (buf[0]) == (unsigned char) '\337'); if (unlimited) { ASSERT (buf[1] == 'e'); ASSERT (buf[2] == 'r'); ASSERT (buf[3] == 0); ASSERT (buf[4] == (char32_t) 0xBADFACE); } else ASSERT (buf[1] == (char32_t) 0xBADFACE); } break; case '2': /* Locale encoding is UTF-8. */ { char input[] = "s\303\274\303\237\360\237\230\213!"; /* "süß😋!" */ wc = (char32_t) 0xBADFACE; ret = mbstoc32s (&wc, input, 1); ASSERT (ret == 1); ASSERT (wc == 's'); input[0] = '\0'; wc = (char32_t) 0xBADFACE; ret = mbstoc32s (&wc, input + 1, 1); ASSERT (ret == 1); ASSERT (wc == 0x00FC); /* expect Unicode encoding */ input[1] = '\0'; input[2] = '\0'; src = input + 3; ret = mbstoc32s (NULL, src, unlimited ? BUFSIZE : 2); ASSERT (ret == 3); src = input + 3; ret = mbstoc32s (buf, src, unlimited ? BUFSIZE : 2); ASSERT (ret == (unlimited ? 3 : 2)); ASSERT (buf[0] == 0x00DF); /* expect Unicode encoding */ ASSERT (buf[1] == 0x1F60B); /* expect Unicode encoding */ if (unlimited) { ASSERT (buf[2] == '!'); ASSERT (buf[3] == 0); ASSERT (buf[4] == (char32_t) 0xBADFACE); } else ASSERT (buf[2] == (char32_t) 0xBADFACE); } break; case '3': /* Locale encoding is EUC-JP. */ { char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */ wc = (char32_t) 0xBADFACE; ret = mbstoc32s (&wc, input, 1); ASSERT (ret == 1); ASSERT (wc == '<'); input[0] = '\0'; wc = (char32_t) 0xBADFACE; ret = mbstoc32s (&wc, input + 1, 1); ASSERT (ret == 1); ASSERT (c32tob (wc) == EOF); input[1] = '\0'; input[2] = '\0'; src = input + 3; ret = mbstoc32s (NULL, src, unlimited ? BUFSIZE : 2); ASSERT (ret == 3); src = input + 3; ret = mbstoc32s (buf, src, unlimited ? BUFSIZE : 2); ASSERT (ret == (unlimited ? 3 : 2)); ASSERT (c32tob (buf[0]) == EOF); ASSERT (c32tob (buf[1]) == EOF); if (unlimited) { ASSERT (buf[2] == '>'); ASSERT (buf[3] == 0); ASSERT (buf[4] == (char32_t) 0xBADFACE); } else ASSERT (buf[2] == (char32_t) 0xBADFACE); } break; case '4': /* Locale encoding is GB18030. */ { char input[] = "s\250\271\201\060\211\070\224\071\375\067!"; /* "süß😋!" */ wc = (char32_t) 0xBADFACE; ret = mbstoc32s (&wc, input, 1); ASSERT (ret == 1); ASSERT (wc == 's'); input[0] = '\0'; wc = (char32_t) 0xBADFACE; ret = mbstoc32s (&wc, input + 1, 1); ASSERT (ret == 1); ASSERT (c32tob (wc) == EOF); input[1] = '\0'; src = input + 3; ret = mbstoc32s (NULL, src, unlimited ? BUFSIZE : 2); ASSERT (ret == 3); src = input + 3; ret = mbstoc32s (buf, src, unlimited ? BUFSIZE : 2); ASSERT (ret == (unlimited ? 3 : 2)); ASSERT (c32tob (buf[0]) == EOF); ASSERT (c32tob (buf[1]) == EOF); if (unlimited) { ASSERT (buf[2] == '!'); ASSERT (buf[3] == 0); ASSERT (buf[4] == (char32_t) 0xBADFACE); } else ASSERT (buf[2] == (char32_t) 0xBADFACE); } break; case '5': /* C or POSIX locale. */ { char input[] = "n/a"; src = input; ret = mbstoc32s (NULL, src, unlimited ? BUFSIZE : 1); ASSERT (ret == 3); src = input; ret = mbstoc32s (buf, src, unlimited ? BUFSIZE : 1); ASSERT (ret == (unlimited ? 3 : 1)); ASSERT (buf[0] == 'n'); if (unlimited) { ASSERT (buf[1] == '/'); ASSERT (buf[2] == 'a'); ASSERT (buf[3] == 0); ASSERT (buf[4] == (char32_t) 0xBADFACE); } else ASSERT (buf[1] == (char32_t) 0xBADFACE); } { int c; char input[2]; for (c = 0; c < 0x100; c++) if (c != 0) { /* We are testing all nonnull bytes. */ input[0] = c; input[1] = '\0'; src = input; ret = mbstoc32s (NULL, src, unlimited ? BUFSIZE : 1); ASSERT (ret == 1); buf[0] = buf[1] = (char32_t) 0xBADFACE; src = input; ret = mbstoc32s (buf, src, unlimited ? BUFSIZE : 1); /* POSIX:2018 says regarding mbstowcs: "In the POSIX locale an [EILSEQ] error cannot occur since all byte values are valid characters." It is reasonable to expect mbstoc32s to behave in the same way. */ ASSERT (ret == 1); if (c < 0x80) /* c is an ASCII character. */ ASSERT (buf[0] == c); else /* On most platforms, the bytes 0x80..0xFF map to U+0080..U+00FF. But on musl libc, the bytes 0x80..0xFF map to U+DF80..U+DFFF. */ ASSERT (buf[0] == (btoc32 (c) == 0xDF00 + c ? btoc32 (c) : c)); } } break; default: return 1; } } return 0; } return 1; }