summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/r/ctype_big5.result13
-rw-r--r--mysql-test/t/ctype_big5.test18
-rw-r--r--sql/sql_class.cc59
-rw-r--r--strings/ctype-big5.c4
-rw-r--r--strings/ctype-gbk.c4
-rw-r--r--strings/ctype-sjis.c4
6 files changed, 89 insertions, 13 deletions
diff --git a/mysql-test/r/ctype_big5.result b/mysql-test/r/ctype_big5.result
index 6574908101c..3f1a87838cf 100644
--- a/mysql-test/r/ctype_big5.result
+++ b/mysql-test/r/ctype_big5.result
@@ -192,3 +192,16 @@ drop table t1;
select hex(convert(_big5 0xC84041 using ucs2));
hex(convert(_big5 0xC84041 using ucs2))
003F0041
+End of 4.1 tests
+create table t1 (a blob);
+insert into t1 values (0xEE00);
+delete from t1;
+select hex(load_file('test/t1.txt'));
+hex(load_file('test/t1.txt'))
+5CEE5C300A
+load data infile 't1.txt' into table t1;
+select hex(a) from t1;
+hex(a)
+EE00
+drop table t1;
+End of 5.0 tests
diff --git a/mysql-test/t/ctype_big5.test b/mysql-test/t/ctype_big5.test
index 200002cd235..8e17a27c550 100644
--- a/mysql-test/t/ctype_big5.test
+++ b/mysql-test/t/ctype_big5.test
@@ -63,4 +63,20 @@ drop table t1;
#
select hex(convert(_big5 0xC84041 using ucs2));
-# End of 4.1 tests
+--echo End of 4.1 tests
+
+#
+# Bug#26711 "binary content 0x00 sometimes becomes 0x5C 0x00 after dump/load"
+#
+create table t1 (a blob);
+insert into t1 values (0xEE00);
+--exec $MYSQL_DUMP --default-character-set=big5 -T $MYSQLTEST_VARDIR/master-data/test test t1
+delete from t1;
+select hex(load_file('test/t1.txt'));
+load data infile 't1.txt' into table t1;
+select hex(a) from t1;
+--exec rm $MYSQLTEST_VARDIR/master-data/test/t1.txt
+--exec rm $MYSQLTEST_VARDIR/master-data/test/t1.sql
+drop table t1;
+
+--echo End of 5.0 tests
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index 010dc101e0d..788026d2f67 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -1221,6 +1221,11 @@ select_export::prepare(List<Item> &list, SELECT_LEX_UNIT *u)
}
+#define NEED_ESCAPING(x) ((int) (uchar) (x) == escape_char || \
+ (int) (uchar) (x) == field_sep_char || \
+ (int) (uchar) (x) == line_sep_char || \
+ !(x))
+
bool select_export::send_data(List<Item> &items)
{
@@ -1280,14 +1285,20 @@ bool select_export::send_data(List<Item> &items)
used_length=res->length();
if (result_type == STRING_RESULT && escape_char != -1)
{
- char *pos,*start,*end;
-
+ char *pos, *start, *end;
+ CHARSET_INFO *res_charset= res->charset();
+ CHARSET_INFO *character_set_client= thd->variables.
+ character_set_client;
+ bool check_second_byte= (res_charset == &my_charset_bin) &&
+ character_set_client->
+ escape_with_backslash_is_dangerous;
+ DBUG_ASSERT(character_set_client->mbmaxlen == 2 ||
+ !character_set_client->escape_with_backslash_is_dangerous);
for (start=pos=(char*) res->ptr(),end=pos+used_length ;
pos != end ;
pos++)
{
#ifdef USE_MB
- CHARSET_INFO *res_charset=res->charset();
if (use_mb(res_charset))
{
int l;
@@ -1298,9 +1309,45 @@ bool select_export::send_data(List<Item> &items)
}
}
#endif
- if ((int) *pos == escape_char || (int) *pos == field_sep_char ||
- (int) *pos == line_sep_char || !*pos)
- {
+
+ /*
+ Special case when dumping BINARY/VARBINARY/BLOB values
+ for the clients with character sets big5, cp932, gbk and sjis,
+ which can have the escape character (0x5C "\" by default)
+ as the second byte of a multi-byte sequence.
+
+ If
+ - pos[0] is a valid multi-byte head (e.g 0xEE) and
+ - pos[1] is 0x00, which will be escaped as "\0",
+
+ then we'll get "0xEE + 0x5C + 0x30" in the output file.
+
+ If this file is later loaded using this sequence of commands:
+
+ mysql> create table t1 (a varchar(128)) character set big5;
+ mysql> LOAD DATA INFILE 'dump.txt' INTO TABLE t1;
+
+ then 0x5C will be misinterpreted as the second byte
+ of a multi-byte character "0xEE + 0x5C", instead of
+ escape character for 0x00.
+
+ To avoid this confusion, we'll escape the multi-byte
+ head character too, so the sequence "0xEE + 0x00" will be
+ dumped as "0x5C + 0xEE + 0x5C + 0x30".
+
+ Note, in the condition below we only check if
+ mbcharlen is equal to 2, because there are no
+ character sets with mbmaxlen longer than 2
+ and with escape_with_backslash_is_dangerous set.
+ DBUG_ASSERT before the loop makes that sure.
+ */
+
+ if (NEED_ESCAPING(*pos) ||
+ (check_second_byte &&
+ my_mbcharlen(character_set_client, (uchar) *pos) == 2 &&
+ pos + 1 < end &&
+ NEED_ESCAPING(pos[1])))
+ {
char tmp_buff[2];
tmp_buff[0]= escape_char;
tmp_buff[1]= *pos ? *pos : '0';
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index be04d19a756..8bbbcac63e4 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -6400,7 +6400,7 @@ CHARSET_INFO my_charset_big5_chinese_ci=
0, /* min_sort_char */
255, /* max_sort_char */
' ', /* pad char */
- 0, /* escape_with_backslash_is_dangerous */
+ 1, /* escape_with_backslash_is_dangerous */
&my_charset_big5_handler,
&my_collation_big5_chinese_ci_handler
};
@@ -6433,7 +6433,7 @@ CHARSET_INFO my_charset_big5_bin=
0, /* min_sort_char */
255, /* max_sort_char */
' ', /* pad char */
- 0, /* escape_with_backslash_is_dangerous */
+ 1, /* escape_with_backslash_is_dangerous */
&my_charset_big5_handler,
&my_collation_mb_bin_handler
};
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index c7160e923a9..8ac7d62c9da 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -10046,7 +10046,7 @@ CHARSET_INFO my_charset_gbk_chinese_ci=
0, /* min_sort_char */
255, /* max_sort_char */
' ', /* pad char */
- 0, /* escape_with_backslash_is_dangerous */
+ 1, /* escape_with_backslash_is_dangerous */
&my_charset_handler,
&my_collation_ci_handler
};
@@ -10078,7 +10078,7 @@ CHARSET_INFO my_charset_gbk_bin=
0, /* min_sort_char */
255, /* max_sort_char */
' ', /* pad char */
- 0, /* escape_with_backslash_is_dangerous */
+ 1, /* escape_with_backslash_is_dangerous */
&my_charset_handler,
&my_collation_mb_bin_handler
};
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index a66a7a000a6..6a7c7d7c0a0 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -4694,7 +4694,7 @@ CHARSET_INFO my_charset_sjis_japanese_ci=
0, /* min_sort_char */
255, /* max_sort_char */
' ', /* pad char */
- 0, /* escape_with_backslash_is_dangerous */
+ 1, /* escape_with_backslash_is_dangerous */
&my_charset_handler,
&my_collation_ci_handler
};
@@ -4726,7 +4726,7 @@ CHARSET_INFO my_charset_sjis_bin=
0, /* min_sort_char */
255, /* max_sort_char */
' ', /* pad char */
- 0, /* escape_with_backslash_is_dangerous */
+ 1, /* escape_with_backslash_is_dangerous */
&my_charset_handler,
&my_collation_mb_bin_handler
};