summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.org>2016-04-06 09:13:49 +0400
committerAlexander Barkov <bar@mariadb.org>2016-04-06 09:13:49 +0400
commitd516a2ae0cbd09d3b5b1667ec62b421330ab9902 (patch)
tree8180665f03dcf47a233a56346f2724cfa141a93f
parentbddd63cfbaa5dba96e934166a2a234e13e20db4b (diff)
downloadmariadb-git-d516a2ae0cbd09d3b5b1667ec62b421330ab9902.tar.gz
MDEV-9823 LOAD DATA INFILE silently truncates incomplete byte sequences
-rw-r--r--mysql-test/r/ctype_eucjpms.result21
-rw-r--r--mysql-test/r/ctype_ujis.result21
-rw-r--r--mysql-test/r/ctype_utf8.result22
-rw-r--r--mysql-test/r/ctype_utf8mb4.result27
-rw-r--r--mysql-test/std_data/loaddata/mdev9823.ujis.txt11
-rw-r--r--mysql-test/std_data/loaddata/mdev9823.utf8mb4.txt12
-rw-r--r--mysql-test/t/ctype_eucjpms.test16
-rw-r--r--mysql-test/t/ctype_ujis.test17
-rw-r--r--mysql-test/t/ctype_utf8.test8
-rw-r--r--mysql-test/t/ctype_utf8mb4.test17
-rw-r--r--sql/sql_load.cc48
-rw-r--r--strings/ctype-eucjpms.c1
-rw-r--r--strings/ctype-mb.ic6
-rw-r--r--strings/ctype-ujis.c1
14 files changed, 202 insertions, 26 deletions
diff --git a/mysql-test/r/ctype_eucjpms.result b/mysql-test/r/ctype_eucjpms.result
index f9cb4f1eecc..8d4d8f6d5f9 100644
--- a/mysql-test/r/ctype_eucjpms.result
+++ b/mysql-test/r/ctype_eucjpms.result
@@ -33913,3 +33913,24 @@ DROP TABLE t1;
#
# End of 10.1 tests
#
+#
+# End of 10.2 tests
+#
+#
+# MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
+#
+CREATE TABLE t1 (a TEXT CHARACTER SET eucjpms);
+LOAD DATA INFILE '../../std_data/loaddata/mdev9823.ujis.txt' INTO TABLE t1 CHARACTER SET eucjpms IGNORE 4 LINES;
+SELECT HEX(a) FROM t1;
+HEX(a)
+3F
+78787831
+3F3F
+78787832
+8FA1A1
+78787833
+3F3F
+DROP TABLE t1;
+#
+# End of 10.2 tests
+#
diff --git a/mysql-test/r/ctype_ujis.result b/mysql-test/r/ctype_ujis.result
index 61541ec7678..5eb9a3e1db5 100644
--- a/mysql-test/r/ctype_ujis.result
+++ b/mysql-test/r/ctype_ujis.result
@@ -26218,3 +26218,24 @@ DROP TABLE t1;
#
# End of 10.1 tests
#
+#
+# End of 10.2 tests
+#
+#
+# MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
+#
+CREATE TABLE t1 (a TEXT CHARACTER SET ujis);
+LOAD DATA INFILE '../../std_data/loaddata/mdev9823.ujis.txt' INTO TABLE t1 CHARACTER SET ujis IGNORE 4 LINES;
+SELECT HEX(a) FROM t1;
+HEX(a)
+3F
+78787831
+3F3F
+78787832
+8FA1A1
+78787833
+3F3F
+DROP TABLE t1;
+#
+# End of 10.2 tests
+#
diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result
index f52e08a676f..af85841725b 100644
--- a/mysql-test/r/ctype_utf8.result
+++ b/mysql-test/r/ctype_utf8.result
@@ -10426,5 +10426,27 @@ b
c
DROP TABLE t1;
#
+# MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
+#
+CREATE TABLE t1 (a TEXT CHARACTER SET utf8);
+LOAD DATA INFILE '../../std_data/loaddata/mdev9823.utf8mb4.txt' INTO TABLE t1 CHARACTER SET utf8 IGNORE 4 LINES;
+Warnings:
+Warning 1366 Incorrect string value: '\xD0' for column 'a' at row 1
+Warning 1366 Incorrect string value: '\xE1\x80' for column 'a' at row 3
+Warning 1366 Incorrect string value: '\xF0\x9F\x98' for column 'a' at row 5
+Warning 1366 Incorrect string value: '\xF0\x9F\x98\x8E' for column 'a' at row 7
+Warning 1366 Incorrect string value: '\xF0\x9F\x98' for column 'a' at row 8
+SELECT HEX(a) FROM t1;
+HEX(a)
+3F
+78787831
+3F3F
+78787832
+3F3F3F
+78787833
+3F3F3F3F
+3F3F3F
+DROP TABLE t1;
+#
# End of 10.2 tests
#
diff --git a/mysql-test/r/ctype_utf8mb4.result b/mysql-test/r/ctype_utf8mb4.result
index 10d77ae1502..558aba9c466 100644
--- a/mysql-test/r/ctype_utf8mb4.result
+++ b/mysql-test/r/ctype_utf8mb4.result
@@ -3398,3 +3398,30 @@ DROP FUNCTION f1;
#
# End of 10.1 tests
#
+#
+# End of 10.2 tests
+#
+#
+# MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
+#
+CREATE TABLE t1 (a TEXT CHARACTER SET utf8mb4);
+LOAD DATA INFILE '../../std_data/loaddata/mdev9823.utf8mb4.txt' INTO TABLE t1 CHARACTER SET utf8mb4 IGNORE 4 LINES;
+Warnings:
+Warning 1366 Incorrect string value: '\xD0' for column 'a' at row 1
+Warning 1366 Incorrect string value: '\xE1\x80' for column 'a' at row 3
+Warning 1366 Incorrect string value: '\xF0\x9F\x98' for column 'a' at row 5
+Warning 1366 Incorrect string value: '\xF0\x9F\x98' for column 'a' at row 8
+SELECT HEX(a) FROM t1;
+HEX(a)
+3F
+78787831
+3F3F
+78787832
+3F3F3F
+78787833
+F09F988E
+3F3F3F
+DROP TABLE t1;
+#
+# End of 10.2 tests
+#
diff --git a/mysql-test/std_data/loaddata/mdev9823.ujis.txt b/mysql-test/std_data/loaddata/mdev9823.ujis.txt
new file mode 100644
index 00000000000..5468c999585
--- /dev/null
+++ b/mysql-test/std_data/loaddata/mdev9823.ujis.txt
@@ -0,0 +1,11 @@
+# This file has incomplete UJIS sequences {8F}, {8FA1},
+# has a valid UJIS sequence {8FA1A1},
+# and has no NL at the end:
+# {8F} \n xxx1 {8FA1} \n xxx2 {8FA1A1} \n xxx3 \n {8FA1} EOF
+xxx1
+ก
+xxx2
+กก
+xxx3
+ก \ No newline at end of file
diff --git a/mysql-test/std_data/loaddata/mdev9823.utf8mb4.txt b/mysql-test/std_data/loaddata/mdev9823.utf8mb4.txt
new file mode 100644
index 00000000000..87739567de1
--- /dev/null
+++ b/mysql-test/std_data/loaddata/mdev9823.utf8mb4.txt
@@ -0,0 +1,12 @@
+# This file has incomplete utf8mb4 sequences {D0}, {E180}, {F09F98},
+# has a valid utf8mb4 sequence {F09F988E}
+# and has no NL at the end:
+# {D0} \n xxx1 {E180} xxx2 \n {F09F98} \n xxx3 {F09F988E} {F09F98} EOF
+ะ
+xxx1
+แ€
+xxx2
+๐Ÿ˜
+xxx3
+๐Ÿ˜Ž
+๐Ÿ˜ \ No newline at end of file
diff --git a/mysql-test/t/ctype_eucjpms.test b/mysql-test/t/ctype_eucjpms.test
index d533e38b2a2..b5bd92d1d07 100644
--- a/mysql-test/t/ctype_eucjpms.test
+++ b/mysql-test/t/ctype_eucjpms.test
@@ -566,3 +566,19 @@ DROP TABLE t1;
--echo #
--echo # End of 10.1 tests
--echo #
+
+--echo #
+--echo # End of 10.2 tests
+--echo #
+
+--echo #
+--echo # MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
+--echo #
+CREATE TABLE t1 (a TEXT CHARACTER SET eucjpms);
+LOAD DATA INFILE '../../std_data/loaddata/mdev9823.ujis.txt' INTO TABLE t1 CHARACTER SET eucjpms IGNORE 4 LINES;
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+--echo #
+--echo # End of 10.2 tests
+--echo #
diff --git a/mysql-test/t/ctype_ujis.test b/mysql-test/t/ctype_ujis.test
index 3f444580b13..db85585efd9 100644
--- a/mysql-test/t/ctype_ujis.test
+++ b/mysql-test/t/ctype_ujis.test
@@ -1396,3 +1396,20 @@ SELECT HEX(a) FROM t1 ORDER BY a;DROP TABLE t1;
--echo #
--echo # End of 10.1 tests
--echo #
+
+
+--echo #
+--echo # End of 10.2 tests
+--echo #
+
+--echo #
+--echo # MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
+--echo #
+CREATE TABLE t1 (a TEXT CHARACTER SET ujis);
+LOAD DATA INFILE '../../std_data/loaddata/mdev9823.ujis.txt' INTO TABLE t1 CHARACTER SET ujis IGNORE 4 LINES;
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+--echo #
+--echo # End of 10.2 tests
+--echo #
diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test
index f3a9e63b57d..edf66f87eba 100644
--- a/mysql-test/t/ctype_utf8.test
+++ b/mysql-test/t/ctype_utf8.test
@@ -1967,5 +1967,13 @@ SELECT c1 FROM t1;
DROP TABLE t1;
--echo #
+--echo # MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
+--echo #
+CREATE TABLE t1 (a TEXT CHARACTER SET utf8);
+LOAD DATA INFILE '../../std_data/loaddata/mdev9823.utf8mb4.txt' INTO TABLE t1 CHARACTER SET utf8 IGNORE 4 LINES;
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+--echo #
--echo # End of 10.2 tests
--echo #
diff --git a/mysql-test/t/ctype_utf8mb4.test b/mysql-test/t/ctype_utf8mb4.test
index 2fe9b5e6544..74e39a80e5b 100644
--- a/mysql-test/t/ctype_utf8mb4.test
+++ b/mysql-test/t/ctype_utf8mb4.test
@@ -1919,3 +1919,20 @@ DROP FUNCTION f1;
--echo #
--echo # End of 10.1 tests
--echo #
+
+
+--echo #
+--echo # End of 10.2 tests
+--echo #
+
+--echo #
+--echo # MDEV-9842 LOAD DATA INFILE does not work well with a TEXT column when using sjis
+--echo #
+CREATE TABLE t1 (a TEXT CHARACTER SET utf8mb4);
+LOAD DATA INFILE '../../std_data/loaddata/mdev9823.utf8mb4.txt' INTO TABLE t1 CHARACTER SET utf8mb4 IGNORE 4 LINES;
+SELECT HEX(a) FROM t1;
+DROP TABLE t1;
+
+--echo #
+--echo # End of 10.2 tests
+--echo #
diff --git a/sql/sql_load.cc b/sql/sql_load.cc
index e2d579bac2c..a1bb84cf328 100644
--- a/sql/sql_load.cc
+++ b/sql/sql_load.cc
@@ -1589,38 +1589,34 @@ int READ_INFO::read_field()
return 0;
}
}
-#ifdef USE_MB
- if (my_mbcharlen(read_charset, chr) > 1)
+ data.append(chr);
+ if (use_mb(read_charset))
{
- uint32 length0= data.length();
- int ml= my_mbcharlen(read_charset, chr);
- data.append(chr);
-
- for (int i= 1; i < ml; i++)
+ int chlen;
+ if ((chlen= my_charlen(read_charset, data.end() - 1,
+ data.end())) != 1)
{
- chr= GET;
- if (chr == my_b_EOF)
+ for (uint32 length0= data.length() - 1 ; MY_CS_IS_TOOSMALL(chlen); )
{
- /*
- Need to back up the bytes already ready from illformed
- multi-byte char
- */
- data.length(length0);
- goto found_eof;
+ chr= GET;
+ if (chr == my_b_EOF)
+ goto found_eof;
+ data.append(chr);
+ chlen= my_charlen(read_charset, data.ptr() + length0, data.end());
+ if (chlen == MY_CS_ILSEQ)
+ {
+ /**
+ It has been an incomplete (but a valid) sequence so far,
+ but the last byte turned it into a bad byte sequence.
+ Unget the very last byte.
+ */
+ data.length(data.length() - 1);
+ PUSH(chr);
+ break;
+ }
}
- data.append(chr);
}
- if (my_ismbchar(read_charset,
- (const char *) data.ptr() + length0,
- (const char *) data.end()))
- continue;
- for (int i= 0; i < ml; i++)
- PUSH(data.end()[-1 - i]);
- data.length(length0);
- chr= GET;
}
-#endif
- data.append(chr);
}
/*
** We come here if buffer is too small. Enlarge it and continue
diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c
index 52494b7dfb3..469d3a5be6c 100644
--- a/strings/ctype-eucjpms.c
+++ b/strings/ctype-eucjpms.c
@@ -199,6 +199,7 @@ static const uchar sort_order_eucjpms[]=
#define IS_MB2_KATA(x,y) (iseucjpms_ss2(x) && iskata(y))
#define IS_MB2_CHAR(x,y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
#define IS_MB3_CHAR(x,y,z) (iseucjpms_ss3(x) && IS_MB2_JIS(y,z))
+#define IS_MB_PREFIX2(x,y) (iseucjpms_ss3(x) && iseucjpms(y))
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
diff --git a/strings/ctype-mb.ic b/strings/ctype-mb.ic
index 6fc4d6e3db4..2df9c9d5e49 100644
--- a/strings/ctype-mb.ic
+++ b/strings/ctype-mb.ic
@@ -75,7 +75,13 @@ MY_FUNCTION_NAME(charlen)(CHARSET_INFO *cs __attribute__((unused)),
#ifdef IS_MB3_CHAR
if (b + 3 > e)
+ {
+#ifdef IS_MB_PREFIX2
+ if (!IS_MB_PREFIX2(b[0], b[1]))
+ return MY_CS_ILSEQ;
+#endif
return MY_CS_TOOSMALLN(3);
+ }
if (IS_MB3_CHAR(b[0], b[1], b[2]))
return 3; /* Three-byte character */
#endif
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index 67e68901573..b24fdb3075f 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -198,6 +198,7 @@ static const uchar sort_order_ujis[]=
#define IS_MB2_KATA(x,y) (isujis_ss2(x) && iskata(y))
#define IS_MB2_CHAR(x, y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
#define IS_MB3_CHAR(x, y, z) (isujis_ss3(x) && IS_MB2_JIS(y,z))
+#define IS_MB_PREFIX2(x,y) (isujis_ss3(x) && isujis(y))
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"