summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVladislav Vaintroub <wlad@mariadb.com>2021-11-22 12:29:15 +0100
committerSergei Golubchik <serg@mariadb.org>2021-12-15 19:13:57 +0100
commitba9d231b5ab75bc3614e53bdd95026e5fe9dd565 (patch)
tree4995ed2d51b72babc3865c2f474ff8d292b30959
parent4d3ac328482ea2e363cb0be00fd8654d0af5cce0 (diff)
downloadmariadb-git-ba9d231b5ab75bc3614e53bdd95026e5fe9dd565.tar.gz
MDEV-26713 Set activeCodePage=UTF8 for windows programs
- Use corresponding entry in the manifest, as described in https://docs.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page - If if ANSI codepage is UTF8 (i.e for Windows 1903 and later) Use UTF8 as default client charset Set console codepage(s) to UTF8, in case process is using console - Allow some previously disabled MTR tests, that used Unicode for in "exec", for the recent Windows versions
-rw-r--r--cmake/win_compatibility.manifest5
-rw-r--r--mysql-test/include/check_utf8_cli.inc3
-rw-r--r--mysql-test/include/no_utf8_cli.inc3
-rw-r--r--mysql-test/main/charset_client_win.test1
-rw-r--r--mysql-test/main/charset_client_win_utf8mb4.result2
-rw-r--r--mysql-test/main/charset_client_win_utf8mb4.test3
-rw-r--r--mysql-test/main/grant_utf8_cli.result (renamed from mysql-test/main/grant_not_windows.result)0
-rw-r--r--mysql-test/main/grant_utf8_cli.test (renamed from mysql-test/main/grant_not_windows.test)3
-rw-r--r--mysql-test/suite.pm22
-rw-r--r--mysys/charset.c12
-rw-r--r--mysys/get_password.c2
-rw-r--r--mysys/my_getopt.c53
-rw-r--r--mysys/my_init.c75
13 files changed, 174 insertions, 10 deletions
diff --git a/cmake/win_compatibility.manifest b/cmake/win_compatibility.manifest
index 2e4b27a6dc4..0e7ce667d68 100644
--- a/cmake/win_compatibility.manifest
+++ b/cmake/win_compatibility.manifest
@@ -19,4 +19,9 @@
</application>
</compatibility>
+ <application>
+ <windowsSettings>
+ <activeCodePage xmlns="http://schemas.microsoft.com/SMI/2019/WindowsSettings">UTF-8</activeCodePage>
+ </windowsSettings>
+ </application>
</asmv1:assembly>
diff --git a/mysql-test/include/check_utf8_cli.inc b/mysql-test/include/check_utf8_cli.inc
new file mode 100644
index 00000000000..a1fac216446
--- /dev/null
+++ b/mysql-test/include/check_utf8_cli.inc
@@ -0,0 +1,3 @@
+# Check if utf8 can be used on the command line for --exec
+# The real check is done in the suite.pm
+#
diff --git a/mysql-test/include/no_utf8_cli.inc b/mysql-test/include/no_utf8_cli.inc
new file mode 100644
index 00000000000..19f9aa6df42
--- /dev/null
+++ b/mysql-test/include/no_utf8_cli.inc
@@ -0,0 +1,3 @@
+# Check if utf8 can't be used on the command line for --exec
+# The real check is done in the suite.pm
+#
diff --git a/mysql-test/main/charset_client_win.test b/mysql-test/main/charset_client_win.test
index b4a21d4f0a4..c3f649cb7d4 100644
--- a/mysql-test/main/charset_client_win.test
+++ b/mysql-test/main/charset_client_win.test
@@ -1,2 +1,3 @@
--source include/windows.inc
+--source include/no_utf8_cli.inc
--exec chcp 1257 > NUL && $MYSQL --default-character-set=auto -e "select @@character_set_client"
diff --git a/mysql-test/main/charset_client_win_utf8mb4.result b/mysql-test/main/charset_client_win_utf8mb4.result
new file mode 100644
index 00000000000..f7b5d376f9a
--- /dev/null
+++ b/mysql-test/main/charset_client_win_utf8mb4.result
@@ -0,0 +1,2 @@
+@@character_set_client
+utf8mb4
diff --git a/mysql-test/main/charset_client_win_utf8mb4.test b/mysql-test/main/charset_client_win_utf8mb4.test
new file mode 100644
index 00000000000..2baf0d7c050
--- /dev/null
+++ b/mysql-test/main/charset_client_win_utf8mb4.test
@@ -0,0 +1,3 @@
+--source include/windows.inc
+--source include/check_utf8_cli.inc
+--exec $MYSQL --default-character-set=auto -e "select @@character_set_client"
diff --git a/mysql-test/main/grant_not_windows.result b/mysql-test/main/grant_utf8_cli.result
index fedfaf984b2..fedfaf984b2 100644
--- a/mysql-test/main/grant_not_windows.result
+++ b/mysql-test/main/grant_utf8_cli.result
diff --git a/mysql-test/main/grant_not_windows.test b/mysql-test/main/grant_utf8_cli.test
index 55b09232edc..bc811d5298e 100644
--- a/mysql-test/main/grant_not_windows.test
+++ b/mysql-test/main/grant_utf8_cli.test
@@ -1,6 +1,5 @@
- # UTF8 parameters to mysql client do not work on Windows
---source include/not_windows.inc
--source include/not_embedded.inc
+--source include/check_utf8_cli.inc
#
# Bug#21432 Database/Table name limited to 64 bytes, not chars, problems with multi-byte
diff --git a/mysql-test/suite.pm b/mysql-test/suite.pm
index 4cc6b410fa1..ad67117a229 100644
--- a/mysql-test/suite.pm
+++ b/mysql-test/suite.pm
@@ -87,6 +87,28 @@ sub skip_combinations {
$skip{'main/ssl_verify_ip.test'} = 'x509v3 support required'
unless $openssl_ver ge "1.0.2";
+ sub utf8_command_line_ok() {
+ if (IS_WINDOWS) {
+ # Can use UTF8 on command line since Windows 10 1903 (10.0.18362)
+ # or if OS codepage is set to UTF8
+ my($os_name, $os_major, $os_minor, $os_build, $os_id) = Win32::GetOSVersion();
+ if($os_major lt 10){
+ return 0;
+ } elsif($os_major gt 10 or $os_minor gt 0 or $os_build ge 18362){
+ return 1;
+ } elsif(Win32::GetACP() eq 65001) {
+ return 1;
+ }
+ return 0;
+ }
+ return 1;
+ }
+
+ $skip{'include/check_utf8_cli.inc'} = 'No utf8 command line support'
+ unless utf8_command_line_ok();
+
+ $skip{'include/no_utf8_cli.inc'} = 'Not tested with utf8 command line support'
+ unless !utf8_command_line_ok();
%skip;
}
diff --git a/mysys/charset.c b/mysys/charset.c
index 19cad76fdf4..2a8ac6e1ca5 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -1517,9 +1517,15 @@ const char* my_default_csname()
const char* csname = NULL;
#ifdef _WIN32
char cpbuf[64];
- int cp = GetConsoleCP();
- if (cp == 0)
- cp = GetACP();
+ UINT cp;
+ if (GetACP() == CP_UTF8)
+ cp= CP_UTF8;
+ else
+ {
+ cp= GetConsoleCP();
+ if (cp == 0)
+ cp= GetACP();
+ }
snprintf(cpbuf, sizeof(cpbuf), "cp%d", (int)cp);
csname = my_os_charset_to_mysql_charset(cpbuf);
#elif defined(HAVE_SETLOCALE) && defined(HAVE_NL_LANGINFO)
diff --git a/mysys/get_password.c b/mysys/get_password.c
index bdd20d0349b..18286fd9e39 100644
--- a/mysys/get_password.c
+++ b/mysys/get_password.c
@@ -100,7 +100,7 @@ char *get_tty_password(const char *opt_message)
/*
Allocate output string, and convert UTF16 password to output codepage.
*/
- cp= GetConsoleCP();
+ cp= GetACP() == CP_UTF8 ? CP_UTF8 : GetConsoleCP();
if (!(to_len= WideCharToMultiByte(cp, 0, wbuf, -1, NULL, 0, NULL, NULL)))
DBUG_RETURN(NULL);
diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c
index 3fe025ba808..6e9c6334620 100644
--- a/mysys/my_getopt.c
+++ b/mysys/my_getopt.c
@@ -38,7 +38,7 @@ static double getopt_double(char *arg, const struct my_option *optp, int *err);
static void init_variables(const struct my_option *, init_func_p);
static void init_one_value(const struct my_option *, void *, longlong);
static void fini_one_value(const struct my_option *, void *, longlong);
-static int setval(const struct my_option *, void *, char *, my_bool);
+static int setval(const struct my_option *, void *, char *, my_bool, const char *);
static char *check_struct_option(char *cur_arg, char *key_name);
/*
@@ -133,6 +133,50 @@ double getopt_ulonglong2double(ulonglong v)
return u.dbl;
}
+#ifdef _WIN32
+/**
+
+ On Windows, if program is running in UTF8 mode, but some arguments are not UTF8.
+
+ This will mostly likely be a sign of old "ANSI" my.ini, and it is likely that
+ something will go wrong, e.g file access error.
+*/
+static void validate_value(const char *key, const char *value,
+ const char *filename)
+{
+ MY_STRCOPY_STATUS status;
+ const struct charset_info_st *cs= &my_charset_utf8mb4_bin;
+ size_t len;
+ if (GetACP() != CP_UTF8)
+ return;
+ if (!(len= strlen(value)))
+ return;
+ cs->cset->well_formed_char_length(cs, value, value + len, len, &status);
+ if (!status.m_well_formed_error_pos)
+ return;
+ if (filename && *filename)
+ {
+ my_getopt_error_reporter(WARNING_LEVEL,
+ "%s: invalid (non-UTF8) characters found for option '%s'"
+ " in file '%s'",
+ my_progname, key, filename);
+ }
+ else
+ {
+ /*
+ Should never happen, non-UTF8 can be read from option's
+ file only.
+ */
+ DBUG_ASSERT(0);
+ my_getopt_error_reporter(
+ WARNING_LEVEL, "%s: invalid (non-UTF8) characters for option %s",
+ my_progname, key);
+ }
+}
+#else
+#define validate_value(key, value, filename) (void)filename
+#endif
+
/**
Handle command line options.
Sort options.
@@ -564,7 +608,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
}
}
if ((error= setval(optp, optp->value, argument,
- set_maximum_value)))
+ set_maximum_value,filename)))
DBUG_RETURN(error);
if (get_one_option(optp, argument, filename))
DBUG_RETURN(EXIT_UNSPECIFIED_ERROR);
@@ -610,7 +654,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
continue;
}
if ((!option_is_autoset) &&
- ((error= setval(optp, value, argument, set_maximum_value))) &&
+ ((error= setval(optp, value, argument, set_maximum_value,filename))) &&
!option_is_loose)
DBUG_RETURN(error);
if (get_one_option(optp, argument, filename))
@@ -711,7 +755,7 @@ static my_bool get_bool_argument(const struct my_option *opts,
*/
static int setval(const struct my_option *opts, void *value, char *argument,
- my_bool set_maximum_value)
+ my_bool set_maximum_value, const char *option_file)
{
int err= 0, res= 0;
DBUG_ENTER("setval");
@@ -858,6 +902,7 @@ static int setval(const struct my_option *opts, void *value, char *argument,
goto ret;
};
}
+ validate_value(opts->name, argument, option_file);
DBUG_RETURN(0);
ret:
diff --git a/mysys/my_init.c b/mysys/my_init.c
index d201d45a4ee..2f21bcb735f 100644
--- a/mysys/my_init.c
+++ b/mysys/my_init.c
@@ -34,6 +34,7 @@
#endif
static void my_win_init(void);
static my_bool win32_init_tcp_ip();
+static void setup_codepages();
#else
#define my_win_init()
#endif
@@ -67,6 +68,69 @@ static ulong atoi_octal(const char *str)
MYSQL_FILE *mysql_stdin= NULL;
static MYSQL_FILE instrumented_stdin;
+#ifdef _WIN32
+static UINT orig_console_cp, orig_console_output_cp;
+
+static void reset_console_cp(void)
+{
+ /*
+ We try not to call SetConsoleCP unnecessarily, to workaround a bug on
+ older Windows 10 (1803), which could switch truetype console fonts to
+ raster, eventhough SetConsoleCP would be a no-op (switch from UTF8 to UTF8).
+ */
+ if (GetConsoleCP() != orig_console_cp)
+ SetConsoleCP(orig_console_cp);
+ if (GetConsoleOutputCP() != orig_console_output_cp)
+ SetConsoleOutputCP(orig_console_output_cp);
+}
+
+/*
+ The below fixes discrepancies in console output and
+ command line parameter encoding. command line is in
+ ANSI codepage, output to console by default is in OEM, but
+ we like them to be in the same encoding.
+
+ We do this only if current codepage is UTF8, i.e when we
+ know we're on Windows that can handle UTF8 well.
+*/
+static void setup_codepages()
+{
+ UINT acp;
+ BOOL is_a_tty= fileno(stdout) >= 0 && isatty(fileno(stdout));
+
+ if (is_a_tty)
+ {
+ /*
+ Save console codepages, in case we change them,
+ to restore them on exit.
+ */
+ orig_console_cp= GetConsoleCP();
+ orig_console_output_cp= GetConsoleOutputCP();
+ if (orig_console_cp && orig_console_output_cp)
+ atexit(reset_console_cp);
+ }
+
+ if ((acp= GetACP()) != CP_UTF8)
+ return;
+
+ /*
+ Use setlocale to make mbstowcs/mkdir/getcwd behave, see
+ https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/setlocale-wsetlocale
+ */
+ setlocale(LC_ALL, "en_US.UTF8");
+
+ if (is_a_tty && (orig_console_cp != acp || orig_console_output_cp != acp))
+ {
+ /*
+ If ANSI codepage is UTF8, we actually want to switch console
+ to it as well.
+ */
+ SetConsoleCP(acp);
+ SetConsoleOutputCP(acp);
+ }
+}
+#endif
+
/**
Initialize my_sys functions, resources and variables
@@ -337,6 +401,17 @@ static void my_win_init(void)
_tzset();
+ /*
+ We do not want text translation (LF->CRLF)
+ when stdout is console/terminal, it is buggy
+ */
+ if (fileno(stdout) >= 0 && isatty(fileno(stdout)))
+ (void)setmode(fileno(stdout), O_BINARY);
+
+ if (fileno(stderr) >= 0 && isatty(fileno(stderr)))
+ (void) setmode(fileno(stderr), O_BINARY);
+
+ setup_codepages();
DBUG_VOID_RETURN;
}