diff options
author | Alexander Barkov <bar@mysql.com> | 2009-10-21 17:59:47 +0500 |
---|---|---|
committer | Alexander Barkov <bar@mysql.com> | 2009-10-21 17:59:47 +0500 |
commit | 99eae48a97fdf4399ca55b7e28ff97a9332f0f23 (patch) | |
tree | 9daa322eb14aa0cd7af60162e5b2b6c77446def6 /sql-common/client.c | |
parent | 1b26332898746bd3738fec850eb1206114c19448 (diff) | |
download | mariadb-git-99eae48a97fdf4399ca55b7e28ff97a9332f0f23.tar.gz |
WL#1349 Use operating system localization to send it as a default client character set
Diffstat (limited to 'sql-common/client.c')
-rw-r--r-- | sql-common/client.c | 320 |
1 files changed, 283 insertions, 37 deletions
diff --git a/sql-common/client.c b/sql-common/client.c index 3ee6c600387..5ae3a2816a5 100644 --- a/sql-common/client.c +++ b/sql-common/client.c @@ -1819,52 +1819,298 @@ static MYSQL_METHODS client_methods= #endif }; + + +typedef enum my_cs_match_type_enum +{ + /* MySQL and OS charsets are fully compatible */ + my_cs_exact, + /* MySQL charset is very close to OS charset */ + my_cs_approx, + /* + MySQL knows this charset, but it is not supported as client character set. + */ + my_cs_unsupp +} my_cs_match_type; + + +typedef struct str2str_st +{ + const char *os_name; + const char *my_name; + my_cs_match_type param; +} MY_CSET_OS_NAME; + +const MY_CSET_OS_NAME charsets[]= +{ +#ifdef __WIN__ + {"cp437", "cp850", my_cs_approx}, + {"cp850", "cp850", my_cs_exact}, + {"cp852", "cp852", my_cs_exact}, + {"cp858", "cp850", my_cs_approx}, + {"cp866", "cp866", my_cs_exact}, + {"cp874", "tis620", my_cs_approx}, + {"cp932", "cp932", my_cs_exact}, + {"cp936", "gbk", my_cs_approx}, + {"cp949", "euckr", my_cs_approx}, + {"cp950", "big5", my_cs_exact}, + {"cp1200", "utf16le", my_cs_unsupp}, + {"cp1201", "utf16", my_cs_unsupp}, + {"cp1250", "cp1250", my_cs_exact}, + {"cp1251", "cp1251", my_cs_exact}, + {"cp1252", "latin1", my_cs_exact}, + {"cp1253", "greek", my_cs_exact}, + {"cp1254", "latin5", my_cs_exact}, + {"cp1255", "hebrew", my_cs_approx}, + {"cp1256", "cp1256", my_cs_exact}, + {"cp1257", "cp1257", my_cs_exact}, + {"cp10000", "macroman", my_cs_exact}, + {"cp10001", "sjis", my_cs_approx}, + {"cp10002", "big5", my_cs_approx}, + {"cp10008", "gb2312", my_cs_approx}, + {"cp10021", "tis620", my_cs_approx}, + {"cp10029", "macce", my_cs_exact}, + {"cp12001", "utf32", my_cs_unsupp}, + {"cp20107", "swe7", my_cs_exact}, + {"cp20127", "ascii", my_cs_exact}, + {"cp20866", "koi8r", my_cs_exact}, + {"cp20932", "ujis", my_cs_exact}, + {"cp20936", "gb2312", my_cs_approx}, + {"cp20949", "euckr", my_cs_approx}, + {"cp21866", "koi8u", my_cs_exact}, + {"cp28591", "latin1", my_cs_approx}, + {"cp28592", "latin2", my_cs_exact}, + {"cp28597", "greek", my_cs_exact}, + {"cp28598", "hebrew", my_cs_exact}, + {"cp28599", "latin5", my_cs_exact}, + {"cp28603", "latin7", my_cs_exact}, +#ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE + {"cp28605", "latin9", my_cs_exact}, +#endif + {"cp38598", "hebrew", my_cs_exact}, + {"cp51932", "ujis", my_cs_exact}, + {"cp51936", "gb2312", my_cs_exact}, + {"cp51949", "euckr", my_cs_exact}, + {"cp51950", "big5", my_cs_exact}, +#ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE + {"cp54936", "gb18030", my_cs_exact}, +#endif + {"cp65001", "utf8", my_cs_exact}, + +#else /* not Windows */ + + {"646", "latin1", my_cs_approx}, /* Default on Solaris */ + {"ANSI_X3.4-1968", "ascii", my_cs_exact}, + {"ansi1251", "cp1251", my_cs_exact}, + {"armscii8", "armscii8", my_cs_exact}, + {"armscii-8", "armscii8", my_cs_exact}, + {"ASCII", "ascii", my_cs_exact}, + {"Big5", "big5", my_cs_exact}, + {"cp1251", "cp1251", my_cs_exact}, + {"cp1255", "hebrew", my_cs_approx}, + {"CP866", "cp866", my_cs_exact}, + {"eucCN", "gb2312", my_cs_exact}, + {"euc-CN", "gb2312", my_cs_exact}, + {"eucJP", "ujis", my_cs_exact}, + {"euc-JP", "ujis", my_cs_exact}, + {"eucKR", "euckr", my_cs_exact}, + {"euc-KR", "euckr", my_cs_exact}, +#ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE + {"gb18030", "gb18030", my_cs_exact}, +#endif + {"gb2312", "gb2312", my_cs_exact}, + {"gbk", "gbk", my_cs_exact}, + {"georgianps", "geostd8", my_cs_exact}, + {"georgian-ps", "geostd8", my_cs_exact}, + {"IBM-1252", "cp1252", my_cs_exact}, + + {"iso88591", "latin1", my_cs_approx}, + {"ISO_8859-1", "latin1", my_cs_approx}, + {"ISO8859-1", "latin1", my_cs_approx}, + {"ISO-8859-1", "latin1", my_cs_approx}, + + {"iso885913", "latin7", my_cs_exact}, + {"ISO_8859-13", "latin7", my_cs_exact}, + {"ISO8859-13", "latin7", my_cs_exact}, + {"ISO-8859-13", "latin7", my_cs_exact}, + +#ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE + {"iso885915", "latin9", my_cs_exact}, + {"ISO_8859-15", "latin9", my_cs_exact}, + {"ISO8859-15", "latin9", my_cs_exact}, + {"ISO-8859-15", "latin9", my_cs_exact}, +#endif + + {"iso88592", "latin2", my_cs_exact}, + {"ISO_8859-2", "latin2", my_cs_exact}, + {"ISO8859-2", "latin2", my_cs_exact}, + {"ISO-8859-2", "latin2", my_cs_exact}, + + {"iso88597", "greek", my_cs_exact}, + {"ISO_8859-7", "greek", my_cs_exact}, + {"ISO8859-7", "greek", my_cs_exact}, + {"ISO-8859-7", "greek", my_cs_exact}, + + {"iso88598", "hebrew", my_cs_exact}, + {"ISO_8859-8", "hebrew", my_cs_exact}, + {"ISO8859-8", "hebrew", my_cs_exact}, + {"ISO-8859-8", "hebrew", my_cs_exact}, + + {"iso88599", "latin5", my_cs_exact}, + {"ISO_8859-9", "latin5", my_cs_exact}, + {"ISO8859-9", "latin5", my_cs_exact}, + {"ISO-8859-9", "latin5", my_cs_exact}, + + {"koi8r", "koi8r", my_cs_exact}, + {"KOI8-R", "koi8r", my_cs_exact}, + {"koi8u", "koi8u", my_cs_exact}, + {"KOI8-U", "koi8u", my_cs_exact}, + + {"roman8", "hp8", my_cs_exact}, /* Default on HP UX */ + + {"Shift_JIS", "sjis", my_cs_exact}, + {"SJIS", "sjis", my_cs_exact}, + {"shiftjisx0213", "sjis", my_cs_exact}, + + {"tis620", "tis620", my_cs_exact}, + {"tis-620", "tis620", my_cs_exact}, + + {"ujis", "ujis", my_cs_exact}, + + {"US-ASCII", "ascii", my_cs_exact}, + + {"utf8", "utf8", my_cs_exact}, + {"utf-8", "utf8", my_cs_exact}, +#endif + {NULL, NULL, 0} +}; + + +static const char * +my_os_charset_to_mysql_charset(const char *csname) +{ + const MY_CSET_OS_NAME *csp; + for (csp= charsets; csp->os_name; csp++) + { + if (!my_strcasecmp(&my_charset_latin1, csp->os_name, csname)) + { + switch (csp->param) + { + case my_cs_exact: + return csp->my_name; + + case my_cs_approx: + /* + Maybe we should print a warning eventually: + character set correspondence is not exact. + */ + return csp->my_name; + + default: + my_printf_error(ER_UNKNOWN_ERROR, + "OS character set '%s'" + " is not supported by MySQL client", + MYF(0), csp->my_name); + goto def; + } + } + } + + my_printf_error(ER_UNKNOWN_ERROR, + "Unknown OS character set '%s'.", + MYF(0), csname); + +def: + csname= MYSQL_DEFAULT_CHARSET_NAME; + my_printf_error(ER_UNKNOWN_ERROR, + "Switching to the default character set '%s'.", + MYF(0), csname); + return csname; +} + + +#ifndef __WIN__ +#include <stdlib.h> /* for getenv() */ +#ifdef HAVE_LANGINFO_H +#include <langinfo.h> +#endif +#ifdef HAVE_LOCALE_H +#include <locale.h> +#endif +#endif /* __WIN__ */ + + +static int +mysql_autodetect_character_set(MYSQL *mysql) +{ + const char *csname= MYSQL_DEFAULT_CHARSET_NAME; + +#ifdef __WIN__ + char cpbuf[64]; + { + my_snprintf(cpbuf, sizeof(cpbuf), "cp%d", (int) GetConsoleCP()); + csname= my_os_charset_to_mysql_charset(cpbuf); + } +#elif defined(HAVE_SETLOCALE) && defined(HAVE_NL_LANGINFO) + { + if (setlocale(LC_CTYPE, "") && (csname= nl_langinfo(CODESET))) + csname= my_os_charset_to_mysql_charset(csname); + } +#endif + + if (!(mysql->options.charset_name= my_strdup(csname, MYF(MY_WME)))) + return 1; + return 0; +} + + +static void +mysql_set_character_set_with_default_collation(MYSQL *mysql) +{ + const char *save= charsets_dir; + if (mysql->options.charset_dir) + charsets_dir=mysql->options.charset_dir; + + if ((mysql->charset= get_charset_by_csname(mysql->options.charset_name, + MY_CS_PRIMARY, MYF(MY_WME)))) + { + /* Try to set compiled default collation when it's possible. */ + CHARSET_INFO *collation; + if ((collation= + get_charset_by_name(MYSQL_DEFAULT_COLLATION_NAME, MYF(MY_WME))) && + my_charset_same(mysql->charset, collation)) + { + mysql->charset= collation; + } + else + { + /* + Default compiled collation not found, or is not applicable + to the requested character set. + Continue with the default collation of the character set. + */ + } + } + charsets_dir= save; +} + + C_MODE_START int mysql_init_character_set(MYSQL *mysql) { - const char *default_collation_name; - /* Set character set */ if (!mysql->options.charset_name) { - default_collation_name= MYSQL_DEFAULT_COLLATION_NAME; if (!(mysql->options.charset_name= my_strdup(MYSQL_DEFAULT_CHARSET_NAME,MYF(MY_WME)))) - return 1; - } - else - default_collation_name= NULL; - - { - const char *save= charsets_dir; - if (mysql->options.charset_dir) - charsets_dir=mysql->options.charset_dir; - mysql->charset=get_charset_by_csname(mysql->options.charset_name, - MY_CS_PRIMARY, MYF(MY_WME)); - if (mysql->charset && default_collation_name) - { - CHARSET_INFO *collation; - if ((collation= - get_charset_by_name(default_collation_name, MYF(MY_WME)))) - { - if (!my_charset_same(mysql->charset, collation)) - { - my_printf_error(ER_UNKNOWN_ERROR, - "COLLATION %s is not valid for CHARACTER SET %s", - MYF(0), - default_collation_name, mysql->options.charset_name); - mysql->charset= NULL; - } - else - { - mysql->charset= collation; - } - } - else - mysql->charset= NULL; - } - charsets_dir= save; + return 1; } + else if (!strcmp(mysql->options.charset_name, + MYSQL_AUTODETECT_CHARSET_NAME) && + mysql_autodetect_character_set(mysql)) + return 1; + + mysql_set_character_set_with_default_collation(mysql); if (!mysql->charset) { |