diff options
author | Alexander Barkov <bar@mariadb.org> | 2014-09-18 12:40:55 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.org> | 2014-09-18 12:40:55 +0400 |
commit | 8286bcd721a4555a1b49502d83caafc54c1752a6 (patch) | |
tree | 50d2ffae19ff3537631c709a829317b83c08fc29 | |
parent | 391fddf6604c733f271b96189caa366049cc6ee4 (diff) | |
download | mariadb-git-8286bcd721a4555a1b49502d83caafc54c1752a6.tar.gz |
MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion
-rw-r--r-- | mysql-test/r/ctype_latin1.result | 55 | ||||
-rw-r--r-- | mysql-test/suite/sys_vars/r/character_set_client_func.result | 2 | ||||
-rw-r--r-- | mysql-test/t/ctype_latin1.test | 23 | ||||
-rw-r--r-- | sql/sql_string.cc | 11 | ||||
-rw-r--r-- | strings/ctype.c | 9 |
5 files changed, 96 insertions, 4 deletions
diff --git a/mysql-test/r/ctype_latin1.result b/mysql-test/r/ctype_latin1.result index eb8af191657..fac9824401f 100644 --- a/mysql-test/r/ctype_latin1.result +++ b/mysql-test/r/ctype_latin1.result @@ -7660,5 +7660,60 @@ DROP FUNCTION iswellformed; DROP TABLE allbytes; # End of ctype_backslash.inc # +# MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion +# +SET NAMES utf8, character_set_connection=latin1; +SELECT 'Â'; +? +? +SELECT HEX('Â'); +HEX('Â') +3F +SELECT HEX(CAST('Â' AS CHAR CHARACTER SET utf8)); +HEX(CAST('Â' AS CHAR CHARACTER SET utf8)) +3F +SELECT HEX(CAST('Â' AS CHAR CHARACTER SET latin1)); +HEX(CAST('Â' AS CHAR CHARACTER SET latin1)) +3F +SELECT HEX(CONVERT('Â' USING utf8)); +HEX(CONVERT('Â' USING utf8)) +3F +SELECT HEX(CONVERT('Â' USING latin1)); +HEX(CONVERT('Â' USING latin1)) +3F +SELECT 'Âx'; +?x +?x +SELECT HEX('Âx'); +HEX('Âx') +3F78 +SELECT HEX(CAST('Âx' AS CHAR CHARACTER SET utf8)); +HEX(CAST('Âx' AS CHAR CHARACTER SET utf8)) +3F78 +SELECT HEX(CAST('Âx' AS CHAR CHARACTER SET latin1)); +HEX(CAST('Âx' AS CHAR CHARACTER SET latin1)) +3F78 +SELECT HEX(CONVERT('Âx' USING utf8)); +HEX(CONVERT('Âx' USING utf8)) +3F78 +SELECT HEX(CONVERT('Âx' USING latin1)); +HEX(CONVERT('Âx' USING latin1)) +3F78 +SET NAMES utf8; +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1); +INSERT INTO t1 VALUES ('Â'),('Â#'); +Warnings: +Warning 1366 Incorrect string value: '\xC2' for column 'a' at row 1 +Warning 1366 Incorrect string value: '\xC2#' for column 'a' at row 2 +SHOW WARNINGS; +Level Code Message +Warning 1366 Incorrect string value: '\xC2' for column 'a' at row 1 +Warning 1366 Incorrect string value: '\xC2#' for column 'a' at row 2 +SELECT HEX(a),a FROM t1; +HEX(a) a +3F ? +3F23 ?# +DROP TABLE t1; +# # End of 10.0 tests # diff --git a/mysql-test/suite/sys_vars/r/character_set_client_func.result b/mysql-test/suite/sys_vars/r/character_set_client_func.result index 82c1548d438..3e39b24c8db 100644 --- a/mysql-test/suite/sys_vars/r/character_set_client_func.result +++ b/mysql-test/suite/sys_vars/r/character_set_client_func.result @@ -30,7 +30,7 @@ SET @@session.character_set_client = utf8; INSERT INTO t1 values('è'); SELECT hex(a),CHAR_LENGTH(a) FROM t1; hex(a) CHAR_LENGTH(a) -03 1 +033F 2 DELETE FROM t1; DROP TABLE IF EXISTS t1; SET @@global.character_set_client = @global_character_set_client; diff --git a/mysql-test/t/ctype_latin1.test b/mysql-test/t/ctype_latin1.test index c0db8dd1fd1..5da1534029b 100644 --- a/mysql-test/t/ctype_latin1.test +++ b/mysql-test/t/ctype_latin1.test @@ -211,5 +211,28 @@ let $ctype_unescape_combinations=selected; --source include/ctype_unescape.inc --echo # +--echo # MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion +--echo # +SET NAMES utf8, character_set_connection=latin1; +SELECT 'Â'; +SELECT HEX('Â'); +SELECT HEX(CAST('Â' AS CHAR CHARACTER SET utf8)); +SELECT HEX(CAST('Â' AS CHAR CHARACTER SET latin1)); +SELECT HEX(CONVERT('Â' USING utf8)); +SELECT HEX(CONVERT('Â' USING latin1)); +SELECT 'Âx'; +SELECT HEX('Âx'); +SELECT HEX(CAST('Âx' AS CHAR CHARACTER SET utf8)); +SELECT HEX(CAST('Âx' AS CHAR CHARACTER SET latin1)); +SELECT HEX(CONVERT('Âx' USING utf8)); +SELECT HEX(CONVERT('Âx' USING latin1)); +SET NAMES utf8; +CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1); +INSERT INTO t1 VALUES ('Â'),('Â#'); +SHOW WARNINGS; +SELECT HEX(a),a FROM t1; +DROP TABLE t1; + +--echo # --echo # End of 10.0 tests --echo # diff --git a/sql/sql_string.cc b/sql/sql_string.cc index 55bf0f9b1c7..a7bfa6c1455 100644 --- a/sql/sql_string.cc +++ b/sql/sql_string.cc @@ -1022,8 +1022,15 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs, wc= '?'; } else - break; // Not enough characters - + { + if ((uchar *) from >= from_end) + break; // End of line + // Incomplete byte sequence + if (!*well_formed_error_pos) + *well_formed_error_pos= from; + from++; + wc= '?'; + } outp: if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0) to+= cnvres; diff --git a/strings/ctype.c b/strings/ctype.c index 38c377c6da5..048fbe3d368 100644 --- a/strings/ctype.c +++ b/strings/ctype.c @@ -1066,7 +1066,14 @@ my_convert_internal(char *to, uint32 to_length, wc= '?'; } else - break; // Not enough characters + { + if ((uchar *) from >= from_end) + break; /* End of line */ + /* Incomplete byte sequence */ + error_count++; + from++; + wc= '?'; + } outp: if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0) |