summaryrefslogtreecommitdiff
path: root/mysys/charset.c
diff options
context:
space:
mode:
authorVladislav Vaintroub <wlad@mariadb.com>2019-06-17 09:56:00 +0100
committerVladislav Vaintroub <wlad@mariadb.com>2019-06-17 18:04:47 +0100
commit5804bb4ef0acd7ac42e628c2d8c404957dc86cf9 (patch)
treed8180213953c76959d0a19aeb9c3e688722b23e2 /mysys/charset.c
parent81f60e8adeb241ed845427e0792349f3a05af99b (diff)
downloadmariadb-git-5804bb4ef0acd7ac42e628c2d8c404957dc86cf9.tar.gz
MDEV-19750 mysql command wrong encoding
Restore the detection of default charset in command line utilities. It worked up to 10.1, but was broken by Connector/C. Moved code for detection of default charset from sql-common/client.c to mysys, and make command line utilities to use this code if charset was not specified on the command line.
Diffstat (limited to 'mysys/charset.c')
-rw-r--r--mysys/charset.c218
1 files changed, 217 insertions, 1 deletions
diff --git a/mysys/charset.c b/mysys/charset.c
index 51afbb16cf0..f44dc7606c1 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -20,7 +20,12 @@
#include <m_string.h>
#include <my_dir.h>
#include <my_xml.h>
-
+#ifdef HAVE_LANGINFO_H
+#include <langinfo.h>
+#endif
+#ifdef HAVE_LOCALE_H
+#include <locale.h>
+#endif
/*
The code below implements this functionality:
@@ -1216,3 +1221,214 @@ size_t escape_quotes_for_mysql(CHARSET_INFO *charset_info,
*to= 0;
return overflow ? (ulong)~0 : (ulong) (to - to_start);
}
+
+
+typedef enum my_cs_match_type_enum
+{
+ /* MySQL and OS charsets are fully compatible */
+ my_cs_exact,
+ /* MySQL charset is very close to OS charset */
+ my_cs_approx,
+ /*
+ MySQL knows this charset, but it is not supported as client character set.
+ */
+ my_cs_unsupp
+} my_cs_match_type;
+
+
+typedef struct str2str_st
+{
+ const char* os_name;
+ const char* my_name;
+ my_cs_match_type param;
+} MY_CSET_OS_NAME;
+
+static const MY_CSET_OS_NAME charsets[] =
+{
+#ifdef _WIN32
+ {"cp437", "cp850", my_cs_approx},
+ {"cp850", "cp850", my_cs_exact},
+ {"cp852", "cp852", my_cs_exact},
+ {"cp858", "cp850", my_cs_approx},
+ {"cp866", "cp866", my_cs_exact},
+ {"cp874", "tis620", my_cs_approx},
+ {"cp932", "cp932", my_cs_exact},
+ {"cp936", "gbk", my_cs_approx},
+ {"cp949", "euckr", my_cs_approx},
+ {"cp950", "big5", my_cs_exact},
+ {"cp1200", "utf16le", my_cs_unsupp},
+ {"cp1201", "utf16", my_cs_unsupp},
+ {"cp1250", "cp1250", my_cs_exact},
+ {"cp1251", "cp1251", my_cs_exact},
+ {"cp1252", "latin1", my_cs_exact},
+ {"cp1253", "greek", my_cs_exact},
+ {"cp1254", "latin5", my_cs_exact},
+ {"cp1255", "hebrew", my_cs_approx},
+ {"cp1256", "cp1256", my_cs_exact},
+ {"cp1257", "cp1257", my_cs_exact},
+ {"cp10000", "macroman", my_cs_exact},
+ {"cp10001", "sjis", my_cs_approx},
+ {"cp10002", "big5", my_cs_approx},
+ {"cp10008", "gb2312", my_cs_approx},
+ {"cp10021", "tis620", my_cs_approx},
+ {"cp10029", "macce", my_cs_exact},
+ {"cp12001", "utf32", my_cs_unsupp},
+ {"cp20107", "swe7", my_cs_exact},
+ {"cp20127", "latin1", my_cs_approx},
+ {"cp20866", "koi8r", my_cs_exact},
+ {"cp20932", "ujis", my_cs_exact},
+ {"cp20936", "gb2312", my_cs_approx},
+ {"cp20949", "euckr", my_cs_approx},
+ {"cp21866", "koi8u", my_cs_exact},
+ {"cp28591", "latin1", my_cs_approx},
+ {"cp28592", "latin2", my_cs_exact},
+ {"cp28597", "greek", my_cs_exact},
+ {"cp28598", "hebrew", my_cs_exact},
+ {"cp28599", "latin5", my_cs_exact},
+ {"cp28603", "latin7", my_cs_exact},
+#ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE
+ {"cp28605", "latin9", my_cs_exact},
+#endif
+ {"cp38598", "hebrew", my_cs_exact},
+ {"cp51932", "ujis", my_cs_exact},
+ {"cp51936", "gb2312", my_cs_exact},
+ {"cp51949", "euckr", my_cs_exact},
+ {"cp51950", "big5", my_cs_exact},
+#ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE
+ {"cp54936", "gb18030", my_cs_exact},
+#endif
+ {"cp65001", "utf8", my_cs_exact},
+
+#else /* not Windows */
+
+ {"646", "latin1", my_cs_approx}, /* Default on Solaris */
+ {"ANSI_X3.4-1968", "latin1", my_cs_approx},
+ {"ansi1251", "cp1251", my_cs_exact},
+ {"armscii8", "armscii8", my_cs_exact},
+ {"armscii-8", "armscii8", my_cs_exact},
+ {"ASCII", "latin1", my_cs_approx},
+ {"Big5", "big5", my_cs_exact},
+ {"cp1251", "cp1251", my_cs_exact},
+ {"cp1255", "hebrew", my_cs_approx},
+ {"CP866", "cp866", my_cs_exact},
+ {"eucCN", "gb2312", my_cs_exact},
+ {"euc-CN", "gb2312", my_cs_exact},
+ {"eucJP", "ujis", my_cs_exact},
+ {"euc-JP", "ujis", my_cs_exact},
+ {"eucKR", "euckr", my_cs_exact},
+ {"euc-KR", "euckr", my_cs_exact},
+#ifdef UNCOMMENT_THIS_WHEN_WL_WL_4024_IS_DONE
+ {"gb18030", "gb18030", my_cs_exact},
+#endif
+ {"gb2312", "gb2312", my_cs_exact},
+ {"gbk", "gbk", my_cs_exact},
+ {"georgianps", "geostd8", my_cs_exact},
+ {"georgian-ps", "geostd8", my_cs_exact},
+ {"IBM-1252", "cp1252", my_cs_exact},
+
+ {"iso88591", "latin1", my_cs_approx},
+ {"ISO_8859-1", "latin1", my_cs_approx},
+ {"ISO8859-1", "latin1", my_cs_approx},
+ {"ISO-8859-1", "latin1", my_cs_approx},
+
+ {"iso885913", "latin7", my_cs_exact},
+ {"ISO_8859-13", "latin7", my_cs_exact},
+ {"ISO8859-13", "latin7", my_cs_exact},
+ {"ISO-8859-13", "latin7", my_cs_exact},
+
+#ifdef UNCOMMENT_THIS_WHEN_WL_4579_IS_DONE
+ {"iso885915", "latin9", my_cs_exact},
+ {"ISO_8859-15", "latin9", my_cs_exact},
+ {"ISO8859-15", "latin9", my_cs_exact},
+ {"ISO-8859-15", "latin9", my_cs_exact},
+#endif
+
+ {"iso88592", "latin2", my_cs_exact},
+ {"ISO_8859-2", "latin2", my_cs_exact},
+ {"ISO8859-2", "latin2", my_cs_exact},
+ {"ISO-8859-2", "latin2", my_cs_exact},
+
+ {"iso88597", "greek", my_cs_exact},
+ {"ISO_8859-7", "greek", my_cs_exact},
+ {"ISO8859-7", "greek", my_cs_exact},
+ {"ISO-8859-7", "greek", my_cs_exact},
+
+ {"iso88598", "hebrew", my_cs_exact},
+ {"ISO_8859-8", "hebrew", my_cs_exact},
+ {"ISO8859-8", "hebrew", my_cs_exact},
+ {"ISO-8859-8", "hebrew", my_cs_exact},
+
+ {"iso88599", "latin5", my_cs_exact},
+ {"ISO_8859-9", "latin5", my_cs_exact},
+ {"ISO8859-9", "latin5", my_cs_exact},
+ {"ISO-8859-9", "latin5", my_cs_exact},
+
+ {"koi8r", "koi8r", my_cs_exact},
+ {"KOI8-R", "koi8r", my_cs_exact},
+ {"koi8u", "koi8u", my_cs_exact},
+ {"KOI8-U", "koi8u", my_cs_exact},
+
+ {"roman8", "hp8", my_cs_exact}, /* Default on HP UX */
+
+ {"Shift_JIS", "sjis", my_cs_exact},
+ {"SJIS", "sjis", my_cs_exact},
+ {"shiftjisx0213", "sjis", my_cs_exact},
+
+ {"tis620", "tis620", my_cs_exact},
+ {"tis-620", "tis620", my_cs_exact},
+
+ {"ujis", "ujis", my_cs_exact},
+
+ {"US-ASCII", "latin1", my_cs_approx},
+
+ {"utf8", "utf8", my_cs_exact},
+ {"utf-8", "utf8", my_cs_exact},
+#endif
+ {NULL, NULL, 0}
+};
+
+
+static const char*
+my_os_charset_to_mysql_charset(const char* csname)
+{
+ const MY_CSET_OS_NAME* csp;
+ for (csp = charsets; csp->os_name; csp++)
+ {
+ if (!strcasecmp(csp->os_name, csname))
+ {
+ switch (csp->param)
+ {
+ case my_cs_exact:
+ return csp->my_name;
+
+ case my_cs_approx:
+ /*
+ Maybe we should print a warning eventually:
+ character set correspondence is not exact.
+ */
+ return csp->my_name;
+
+ default:
+ return NULL;
+ }
+ }
+ }
+ return NULL;
+}
+
+const char* my_default_csname()
+{
+ const char* csname = NULL;
+#ifdef _WIN32
+ char cpbuf[64];
+ int cp = GetConsoleCP();
+ if (cp == 0)
+ cp = GetACP();
+ snprintf(cpbuf, sizeof(cpbuf), "cp%d", (int)cp);
+ csname = my_os_charset_to_mysql_charset(cpbuf);
+#elif defined(HAVE_SETLOCALE) && defined(HAVE_NL_LANGINFO)
+ if (setlocale(LC_CTYPE, "") && (csname = nl_langinfo(CODESET)))
+ csname = my_os_charset_to_mysql_charset(csname);
+#endif
+ return csname ? csname : MYSQL_DEFAULT_CHARSET_NAME;
+} \ No newline at end of file