diff options
author | Vladislav Vaintroub <wlad@mariadb.com> | 2019-06-17 09:56:00 +0100 |
---|---|---|
committer | Vladislav Vaintroub <wlad@mariadb.com> | 2019-06-17 18:04:47 +0100 |
commit | 5804bb4ef0acd7ac42e628c2d8c404957dc86cf9 (patch) | |
tree | d8180213953c76959d0a19aeb9c3e688722b23e2 /mysys/charset.c | |
parent | 81f60e8adeb241ed845427e0792349f3a05af99b (diff) | |
download | mariadb-git-5804bb4ef0acd7ac42e628c2d8c404957dc86cf9.tar.gz |
MDEV-19750 mysql command wrong encoding
Restore the detection of default charset in command line utilities.
It worked up to 10.1, but was broken by Connector/C.
Moved code for detection of default charset from sql-common/client.c
to mysys, and make command line utilities to use this code if charset
was not specified on the command line.
Diffstat (limited to 'mysys/charset.c')
-rw-r--r-- | mysys/charset.c | 218 |
1 files changed, 217 insertions, 1 deletions
diff --git a/mysys/charset.c b/mysys/charset.c index 51afbb16cf0..f44dc7606c1 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -20,7 +20,12 @@ #include <m_string.h> #include <my_dir.h> #include <my_xml.h> - +#ifdef HAVE_LANGINFO_H +#include <langinfo.h> +#endif +#ifdef HAVE_LOCALE_H +#include <locale.h> +#endif /* The code below implements this functionality: @@ -1216,3 +1221,214 @@ size_t escape_quotes_for_mysql(CHARSET_INFO *charset_info, *to= 0; return overflow ? (ulong)~0 : (ulong) (to - to_start); } + + +typedef enum my_cs_match_type_enum +{ + /* MySQL and OS charsets are fully compatible */ + my_cs_exact, + /* MySQL charset is very close to OS charset */ + my_cs_approx, + /* + MySQL knows this charset, but it is not supported as client character set. + */ + my_cs_unsupp +} my_cs_match_type; + + +typedef struct str2str_st +{ + const char* os_name; + const char* my_name; + my_cs_match_type param; +} MY_CSET_OS_NAME; + +static const MY_CSET_OS_NAME charsets[] = +{ +#ifdef _WIN32 + {"cp437", "cp850", my_cs_approx}, + {"cp850", "cp850", my_cs_exact}, + {"cp852", "cp852", my_cs_exact}, + {"cp858", "cp850", my_cs_approx}, + {"cp866", "cp866", my_cs_exact}, + {"cp874", "tis620", my_cs_approx}, + {"cp932", "cp932", my_cs_exact}, + {"cp936", "gbk", my_cs_approx}, + {"cp949", "euckr", my_cs_approx}, + {"cp950", "big5", my_cs_exact}, + {"cp1200", "utf16le", my_cs_unsupp}, + {"cp1201", "utf16", my_cs_unsupp}, + {"cp1250", "cp1250", my_cs_exact}, + {"cp1251", "cp1251", my_cs_exact}, + {"cp1252", "latin1", my_cs_exact}, + {"cp1253", "greek", my_cs_exact}, + {"cp1254", "latin5", my_cs_exact}, + {"cp1255", "hebrew", my_cs_approx}, + {"cp1256", "cp1256", my_cs_exact}, + {"cp1257", "cp1257", my_cs_exact}, + {"cp10000", "macroman", my_cs_exact}, + {"cp10001", "sjis", my_cs_approx}, + {"cp10002", "big5", my_cs_approx}, + {"cp10008", "gb2312", my_cs_approx}, + {"cp10021", "tis620", my_cs_approx}, + {"cp10029", "macce", my_cs_exact}, + {"cp12001", "utf32", my_cs_unsupp}, + {"cp20107", "swe7", my_cs_exact}, + {"cp20127", "latin1", my_cs_approx}, + {"cp20866", "koi8r", my_cs_exact}, + {"cp20932", "ujis", my_cs_exact}, + {"cp20936", "gb2312", my_cs_approx}, + {"cp20949", "euckr", my_cs_approx}, + {"cp21866", "koi8u", my_cs_exact}, + {"cp28591", "latin1", my_cs_approx}, + {"cp28592", "latin2", my_cs_exact}, + {"cp28597", "greek", my_cs_exact}, + {"cp28598", "hebrew", my_cs_exact}, + {"cp28599", "latin5", my_cs_exact}, + {"cp28603", "latin7", my_cs_exact}, +#ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE + {"cp28605", "latin9", my_cs_exact}, +#endif + {"cp38598", "hebrew", my_cs_exact}, + {"cp51932", "ujis", my_cs_exact}, + {"cp51936", "gb2312", my_cs_exact}, + {"cp51949", "euckr", my_cs_exact}, + {"cp51950", "big5", my_cs_exact}, +#ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE + {"cp54936", "gb18030", my_cs_exact}, +#endif + {"cp65001", "utf8", my_cs_exact}, + +#else /* not Windows */ + + {"646", "latin1", my_cs_approx}, /* Default on Solaris */ + {"ANSI_X3.4-1968", "latin1", my_cs_approx}, + {"ansi1251", "cp1251", my_cs_exact}, + {"armscii8", "armscii8", my_cs_exact}, + {"armscii-8", "armscii8", my_cs_exact}, + {"ASCII", "latin1", my_cs_approx}, + {"Big5", "big5", my_cs_exact}, + {"cp1251", "cp1251", my_cs_exact}, + {"cp1255", "hebrew", my_cs_approx}, + {"CP866", "cp866", my_cs_exact}, + {"eucCN", "gb2312", my_cs_exact}, + {"euc-CN", "gb2312", my_cs_exact}, + {"eucJP", "ujis", my_cs_exact}, + {"euc-JP", "ujis", my_cs_exact}, + {"eucKR", "euckr", my_cs_exact}, + {"euc-KR", "euckr", my_cs_exact}, +#ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE + {"gb18030", "gb18030", my_cs_exact}, +#endif + {"gb2312", "gb2312", my_cs_exact}, + {"gbk", "gbk", my_cs_exact}, + {"georgianps", "geostd8", my_cs_exact}, + {"georgian-ps", "geostd8", my_cs_exact}, + {"IBM-1252", "cp1252", my_cs_exact}, + + {"iso88591", "latin1", my_cs_approx}, + {"ISO_8859-1", "latin1", my_cs_approx}, + {"ISO8859-1", "latin1", my_cs_approx}, + {"ISO-8859-1", "latin1", my_cs_approx}, + + {"iso885913", "latin7", my_cs_exact}, + {"ISO_8859-13", "latin7", my_cs_exact}, + {"ISO8859-13", "latin7", my_cs_exact}, + {"ISO-8859-13", "latin7", my_cs_exact}, + +#ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE + {"iso885915", "latin9", my_cs_exact}, + {"ISO_8859-15", "latin9", my_cs_exact}, + {"ISO8859-15", "latin9", my_cs_exact}, + {"ISO-8859-15", "latin9", my_cs_exact}, +#endif + + {"iso88592", "latin2", my_cs_exact}, + {"ISO_8859-2", "latin2", my_cs_exact}, + {"ISO8859-2", "latin2", my_cs_exact}, + {"ISO-8859-2", "latin2", my_cs_exact}, + + {"iso88597", "greek", my_cs_exact}, + {"ISO_8859-7", "greek", my_cs_exact}, + {"ISO8859-7", "greek", my_cs_exact}, + {"ISO-8859-7", "greek", my_cs_exact}, + + {"iso88598", "hebrew", my_cs_exact}, + {"ISO_8859-8", "hebrew", my_cs_exact}, + {"ISO8859-8", "hebrew", my_cs_exact}, + {"ISO-8859-8", "hebrew", my_cs_exact}, + + {"iso88599", "latin5", my_cs_exact}, + {"ISO_8859-9", "latin5", my_cs_exact}, + {"ISO8859-9", "latin5", my_cs_exact}, + {"ISO-8859-9", "latin5", my_cs_exact}, + + {"koi8r", "koi8r", my_cs_exact}, + {"KOI8-R", "koi8r", my_cs_exact}, + {"koi8u", "koi8u", my_cs_exact}, + {"KOI8-U", "koi8u", my_cs_exact}, + + {"roman8", "hp8", my_cs_exact}, /* Default on HP UX */ + + {"Shift_JIS", "sjis", my_cs_exact}, + {"SJIS", "sjis", my_cs_exact}, + {"shiftjisx0213", "sjis", my_cs_exact}, + + {"tis620", "tis620", my_cs_exact}, + {"tis-620", "tis620", my_cs_exact}, + + {"ujis", "ujis", my_cs_exact}, + + {"US-ASCII", "latin1", my_cs_approx}, + + {"utf8", "utf8", my_cs_exact}, + {"utf-8", "utf8", my_cs_exact}, +#endif + {NULL, NULL, 0} +}; + + +static const char* +my_os_charset_to_mysql_charset(const char* csname) +{ + const MY_CSET_OS_NAME* csp; + for (csp = charsets; csp->os_name; csp++) + { + if (!strcasecmp(csp->os_name, csname)) + { + switch (csp->param) + { + case my_cs_exact: + return csp->my_name; + + case my_cs_approx: + /* + Maybe we should print a warning eventually: + character set correspondence is not exact. + */ + return csp->my_name; + + default: + return NULL; + } + } + } + return NULL; +} + +const char* my_default_csname() +{ + const char* csname = NULL; +#ifdef _WIN32 + char cpbuf[64]; + int cp = GetConsoleCP(); + if (cp == 0) + cp = GetACP(); + snprintf(cpbuf, sizeof(cpbuf), "cp%d", (int)cp); + csname = my_os_charset_to_mysql_charset(cpbuf); +#elif defined(HAVE_SETLOCALE) && defined(HAVE_NL_LANGINFO) + if (setlocale(LC_CTYPE, "") && (csname = nl_langinfo(CODESET))) + csname = my_os_charset_to_mysql_charset(csname); +#endif + return csname ? csname : MYSQL_DEFAULT_CHARSET_NAME; +}
\ No newline at end of file |