summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.org>2014-09-18 12:40:55 +0400
committerAlexander Barkov <bar@mariadb.org>2014-09-18 12:40:55 +0400
commit8286bcd721a4555a1b49502d83caafc54c1752a6 (patch)
tree50d2ffae19ff3537631c709a829317b83c08fc29
parent391fddf6604c733f271b96189caa366049cc6ee4 (diff)
downloadmariadb-git-8286bcd721a4555a1b49502d83caafc54c1752a6.tar.gz
MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion
-rw-r--r--mysql-test/r/ctype_latin1.result55
-rw-r--r--mysql-test/suite/sys_vars/r/character_set_client_func.result2
-rw-r--r--mysql-test/t/ctype_latin1.test23
-rw-r--r--sql/sql_string.cc11
-rw-r--r--strings/ctype.c9
5 files changed, 96 insertions, 4 deletions
diff --git a/mysql-test/r/ctype_latin1.result b/mysql-test/r/ctype_latin1.result
index eb8af191657..fac9824401f 100644
--- a/mysql-test/r/ctype_latin1.result
+++ b/mysql-test/r/ctype_latin1.result
@@ -7660,5 +7660,60 @@ DROP FUNCTION iswellformed;
DROP TABLE allbytes;
# End of ctype_backslash.inc
#
+# MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion
+#
+SET NAMES utf8, character_set_connection=latin1;
+SELECT 'Â';
+?
+?
+SELECT HEX('Â');
+HEX('Â')
+3F
+SELECT HEX(CAST('Â' AS CHAR CHARACTER SET utf8));
+HEX(CAST('Â' AS CHAR CHARACTER SET utf8))
+3F
+SELECT HEX(CAST('Â' AS CHAR CHARACTER SET latin1));
+HEX(CAST('Â' AS CHAR CHARACTER SET latin1))
+3F
+SELECT HEX(CONVERT('Â' USING utf8));
+HEX(CONVERT('Â' USING utf8))
+3F
+SELECT HEX(CONVERT('Â' USING latin1));
+HEX(CONVERT('Â' USING latin1))
+3F
+SELECT 'Âx';
+?x
+?x
+SELECT HEX('Âx');
+HEX('Âx')
+3F78
+SELECT HEX(CAST('Âx' AS CHAR CHARACTER SET utf8));
+HEX(CAST('Âx' AS CHAR CHARACTER SET utf8))
+3F78
+SELECT HEX(CAST('Âx' AS CHAR CHARACTER SET latin1));
+HEX(CAST('Âx' AS CHAR CHARACTER SET latin1))
+3F78
+SELECT HEX(CONVERT('Âx' USING utf8));
+HEX(CONVERT('Âx' USING utf8))
+3F78
+SELECT HEX(CONVERT('Âx' USING latin1));
+HEX(CONVERT('Âx' USING latin1))
+3F78
+SET NAMES utf8;
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
+INSERT INTO t1 VALUES ('Â'),('Â#');
+Warnings:
+Warning 1366 Incorrect string value: '\xC2' for column 'a' at row 1
+Warning 1366 Incorrect string value: '\xC2#' for column 'a' at row 2
+SHOW WARNINGS;
+Level Code Message
+Warning 1366 Incorrect string value: '\xC2' for column 'a' at row 1
+Warning 1366 Incorrect string value: '\xC2#' for column 'a' at row 2
+SELECT HEX(a),a FROM t1;
+HEX(a) a
+3F ?
+3F23 ?#
+DROP TABLE t1;
+#
# End of 10.0 tests
#
diff --git a/mysql-test/suite/sys_vars/r/character_set_client_func.result b/mysql-test/suite/sys_vars/r/character_set_client_func.result
index 82c1548d438..3e39b24c8db 100644
--- a/mysql-test/suite/sys_vars/r/character_set_client_func.result
+++ b/mysql-test/suite/sys_vars/r/character_set_client_func.result
@@ -30,7 +30,7 @@ SET @@session.character_set_client = utf8;
INSERT INTO t1 values('è');
SELECT hex(a),CHAR_LENGTH(a) FROM t1;
hex(a) CHAR_LENGTH(a)
-03 1
+033F 2
DELETE FROM t1;
DROP TABLE IF EXISTS t1;
SET @@global.character_set_client = @global_character_set_client;
diff --git a/mysql-test/t/ctype_latin1.test b/mysql-test/t/ctype_latin1.test
index c0db8dd1fd1..5da1534029b 100644
--- a/mysql-test/t/ctype_latin1.test
+++ b/mysql-test/t/ctype_latin1.test
@@ -211,5 +211,28 @@ let $ctype_unescape_combinations=selected;
--source include/ctype_unescape.inc
--echo #
+--echo # MDEV-6752 Trailing incomplete characters are not replaced to question marks on conversion
+--echo #
+SET NAMES utf8, character_set_connection=latin1;
+SELECT 'Â';
+SELECT HEX('Â');
+SELECT HEX(CAST('Â' AS CHAR CHARACTER SET utf8));
+SELECT HEX(CAST('Â' AS CHAR CHARACTER SET latin1));
+SELECT HEX(CONVERT('Â' USING utf8));
+SELECT HEX(CONVERT('Â' USING latin1));
+SELECT 'Âx';
+SELECT HEX('Âx');
+SELECT HEX(CAST('Âx' AS CHAR CHARACTER SET utf8));
+SELECT HEX(CAST('Âx' AS CHAR CHARACTER SET latin1));
+SELECT HEX(CONVERT('Âx' USING utf8));
+SELECT HEX(CONVERT('Âx' USING latin1));
+SET NAMES utf8;
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
+INSERT INTO t1 VALUES ('Â'),('Â#');
+SHOW WARNINGS;
+SELECT HEX(a),a FROM t1;
+DROP TABLE t1;
+
+--echo #
--echo # End of 10.0 tests
--echo #
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index 55bf0f9b1c7..a7bfa6c1455 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -1022,8 +1022,15 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
wc= '?';
}
else
- break; // Not enough characters
-
+ {
+ if ((uchar *) from >= from_end)
+ break; // End of line
+ // Incomplete byte sequence
+ if (!*well_formed_error_pos)
+ *well_formed_error_pos= from;
+ from++;
+ wc= '?';
+ }
outp:
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
to+= cnvres;
diff --git a/strings/ctype.c b/strings/ctype.c
index 38c377c6da5..048fbe3d368 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -1066,7 +1066,14 @@ my_convert_internal(char *to, uint32 to_length,
wc= '?';
}
else
- break; // Not enough characters
+ {
+ if ((uchar *) from >= from_end)
+ break; /* End of line */
+ /* Incomplete byte sequence */
+ error_count++;
+ from++;
+ wc= '?';
+ }
outp:
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)