summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Barkov <bar@mariadb.com>2020-05-07 19:20:17 +0400
committerAlexander Barkov <bar@mariadb.com>2020-05-09 16:01:30 +0400
commitcfe5ee90c8e4b9dfa98a41fcd299197a59261be7 (patch)
tree35fdaabac55d4b36d228bc9600112e986850b162
parentc675886dcdecd29571bd08605a409325ee81004c (diff)
downloadmariadb-git-cfe5ee90c8e4b9dfa98a41fcd299197a59261be7.tar.gz
MDEV-22043 Special character leads to assertion in my_wc_to_printable_generic on 10.5.2 (debug)
The code did not take into account that: - U+005C (backslash) can occupy more than mbminlen characters (e.g. in sjis) - Some character sets do not have a code for U+005C (e.g. swe7) Adding a new function my_wc_to_printable into MY_CHARSET_HANDLER to cover all special cases easier.
-rw-r--r--include/m_ctype.h9
-rw-r--r--mysql-test/main/ctype_filename.result13
-rw-r--r--mysql-test/main/ctype_filename.test19
-rw-r--r--mysql-test/main/ctype_sjis.result22
-rw-r--r--mysql-test/main/ctype_sjis.test30
-rw-r--r--mysql-test/main/ctype_swe7.result18
-rw-r--r--mysql-test/main/ctype_swe7.test26
-rw-r--r--sql/sql_error.cc4
-rw-r--r--sql/sql_string.cc2
-rw-r--r--strings/ctype-big5.c1
-rw-r--r--strings/ctype-bin.c1
-rw-r--r--strings/ctype-cp932.c1
-rw-r--r--strings/ctype-euc_kr.c1
-rw-r--r--strings/ctype-eucjpms.c1
-rw-r--r--strings/ctype-gb2312.c1
-rw-r--r--strings/ctype-gbk.c1
-rw-r--r--strings/ctype-latin1.c1
-rw-r--r--strings/ctype-simple.c1
-rw-r--r--strings/ctype-sjis.c10
-rw-r--r--strings/ctype-tis620.c1
-rw-r--r--strings/ctype-ucs2.c4
-rw-r--r--strings/ctype-ujis.c1
-rw-r--r--strings/ctype-utf8.c13
-rw-r--r--strings/ctype.c64
-rw-r--r--strings/strings_def.h15
25 files changed, 242 insertions, 18 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h
index 802c00908a9..8440efa971c 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -541,6 +541,7 @@ struct my_charset_handler_st
my_ci_native_to_mb() rather than my_ci_wc_mb().
*/
my_charset_conv_wc_mb native_to_mb;
+ my_charset_conv_wc_mb wc_to_printable;
};
extern MY_CHARSET_HANDLER my_charset_8bit_handler;
@@ -660,6 +661,11 @@ struct charset_info_st
return (cset->native_to_mb)(this, wc, s, e);
}
+ int wc_to_printable(my_wc_t wc, uchar *s, uchar *e) const
+ {
+ return (cset->wc_to_printable)(this, wc, s, e);
+ }
+
int ctype(int *to, const uchar *s, const uchar *e) const
{
return (cset->ctype)(this, to, s, e);
@@ -1249,9 +1255,6 @@ int my_wc_mb_bin(CHARSET_INFO *cs,my_wc_t wc, uchar *s, uchar *e);
int my_mb_ctype_8bit(CHARSET_INFO *,int *, const uchar *,const uchar *);
int my_mb_ctype_mb(CHARSET_INFO *,int *, const uchar *,const uchar *);
-int my_wc_to_printable_generic(CHARSET_INFO *cs, my_wc_t wc,
- uchar *s, uchar *e);
-
size_t my_scan_8bit(CHARSET_INFO *cs, const char *b, const char *e, int sq);
size_t my_snprintf_8bit(CHARSET_INFO *, char *to, size_t n,
diff --git a/mysql-test/main/ctype_filename.result b/mysql-test/main/ctype_filename.result
index c6d7d1e39b9..ec4a14c4a23 100644
--- a/mysql-test/main/ctype_filename.result
+++ b/mysql-test/main/ctype_filename.result
@@ -21,3 +21,16 @@ SET NAMES utf8;
SELECT @a:=CONVERT('aя' USING filename) AS `@a`, BINARY @a, REVERSE(@a), HEX(@a), HEX(REVERSE(@a));
@a BINARY @a REVERSE(@a) HEX(@a) HEX(REVERSE(@a))
aя a@r1 яa 61407231 40723161
+#
+# Start of 10.5 tests
+#
+#
+# MDEV-22043 Special character leads to assertion in my_wc_to_printable_generic on 10.5.2 (debug)
+#
+SET NAMES filename;
+EXECUTE IMMEDIATE _latin1 0x01;
+ERROR 42000: You@0020have@0020an@0020error@0020in@0020your@0020SQL@0020syntax@003b@0020check@0020the@0020manual@0020that@0020corresponds@0020to@0020your@0020MariaDB@0020server@0020version@0020for@0020the@0020right@0020syntax@0020to@0020use@0020near@0020@0027@005c0001@0027@0020at@0020line@00201
+SET NAMES utf8;
+#
+# End of 10.5 tests
+#
diff --git a/mysql-test/main/ctype_filename.test b/mysql-test/main/ctype_filename.test
index 7ec07293a2b..bd82714ca0e 100644
--- a/mysql-test/main/ctype_filename.test
+++ b/mysql-test/main/ctype_filename.test
@@ -27,3 +27,22 @@ select convert(convert(',' using filename) using binary);
--echo #
SET NAMES utf8;
SELECT @a:=CONVERT('aя' USING filename) AS `@a`, BINARY @a, REVERSE(@a), HEX(@a), HEX(REVERSE(@a));
+
+
+--echo #
+--echo # Start of 10.5 tests
+--echo #
+
+--echo #
+--echo # MDEV-22043 Special character leads to assertion in my_wc_to_printable_generic on 10.5.2 (debug)
+--echo #
+
+SET NAMES filename;
+--error ER_PARSE_ERROR
+EXECUTE IMMEDIATE _latin1 0x01;
+SET NAMES utf8;
+
+
+--echo #
+--echo # End of 10.5 tests
+--echo #
diff --git a/mysql-test/main/ctype_sjis.result b/mysql-test/main/ctype_sjis.result
index 4917a1973a1..fae022b60e1 100644
--- a/mysql-test/main/ctype_sjis.result
+++ b/mysql-test/main/ctype_sjis.result
@@ -19296,3 +19296,25 @@ SET DEFAULT_STORAGE_ENGINE=Default;
#
# End of 10.2 tests
#
+#
+# Start of 10.5 tests
+#
+#
+# MDEV-22043 Special character leads to assertion in my_wc_to_printable_generic on 10.5.2 (debug)
+#
+SET NAMES sjis;
+SET @@CHARACTER_SET_CLIENT='cp1257';
+(a(b 'т'));
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'a(b '_0143_201A'))' at line 1
+SET NAMES sjis;
+SET @@CHARACTER_SET_CLIENT='cp1257';
+'т';
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near ''_0143_201A'' at line 1
+SET NAMES sjis;
+SET @@CHARACTER_SET_CLIENT='cp1257';
+EXECUTE IMMEDIATE _cp1257 0xD182;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '_0143_201A' at line 1
+SET NAMES sjis;
+#
+# End of 10.5 tests
+#
diff --git a/mysql-test/main/ctype_sjis.test b/mysql-test/main/ctype_sjis.test
index 9a8ce414c14..804ba557faa 100644
--- a/mysql-test/main/ctype_sjis.test
+++ b/mysql-test/main/ctype_sjis.test
@@ -260,3 +260,33 @@ let $coll_pad='sjis_bin';
--echo #
--echo # End of 10.2 tests
--echo #
+
+--echo #
+--echo # Start of 10.5 tests
+--echo #
+
+--echo #
+--echo # MDEV-22043 Special character leads to assertion in my_wc_to_printable_generic on 10.5.2 (debug)
+--echo #
+
+SET NAMES sjis;
+SET @@CHARACTER_SET_CLIENT='cp1257';
+--error ER_PARSE_ERROR
+(a(b 'т'));
+
+SET NAMES sjis;
+SET @@CHARACTER_SET_CLIENT='cp1257';
+--error ER_PARSE_ERROR
+'т';
+
+SET NAMES sjis;
+SET @@CHARACTER_SET_CLIENT='cp1257';
+--error ER_PARSE_ERROR
+EXECUTE IMMEDIATE _cp1257 0xD182;
+
+SET NAMES sjis;
+
+
+--echo #
+--echo # End of 10.5 tests
+--echo #
diff --git a/mysql-test/main/ctype_swe7.result b/mysql-test/main/ctype_swe7.result
index e1913fa5dcd..db65b6b8a1a 100644
--- a/mysql-test/main/ctype_swe7.result
+++ b/mysql-test/main/ctype_swe7.result
@@ -3635,3 +3635,21 @@ SET DEFAULT_STORAGE_ENGINE=Default;
#
# End of 10.2 tests
#
+#
+# Start of 10.5 tests
+#
+#
+# MDEV-22043 Special character leads to assertion in my_wc_to_printable_generic on 10.5.2 (debug)
+#
+SET NAMES swe7;
+SELECT `T`;
+ERROR HY000: Invalid swe7 character string: '.xEF.xBC.xB4'
+SET NAMES swe7;
+SELECT `龔`;
+ERROR HY000: Invalid swe7 character string: '.xE9.xBE.x94'
+SET NAMES swe7;
+EXECUTE IMMEDIATE _swe7 0x01;
+ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near '.0001' at line 1
+#
+# End of 10.5 tests
+#
diff --git a/mysql-test/main/ctype_swe7.test b/mysql-test/main/ctype_swe7.test
index 2e648a89406..a8d79935aa6 100644
--- a/mysql-test/main/ctype_swe7.test
+++ b/mysql-test/main/ctype_swe7.test
@@ -38,3 +38,29 @@ let $coll_pad='swe7_bin';
--echo #
--echo # End of 10.2 tests
--echo #
+
+
+--echo #
+--echo # Start of 10.5 tests
+--echo #
+
+--echo #
+--echo # MDEV-22043 Special character leads to assertion in my_wc_to_printable_generic on 10.5.2 (debug)
+--echo #
+
+SET NAMES swe7;
+--error ER_INVALID_CHARACTER_STRING
+SELECT `T`;
+
+SET NAMES swe7;
+--error ER_INVALID_CHARACTER_STRING
+SELECT `龔`;
+
+SET NAMES swe7;
+--error ER_PARSE_ERROR
+EXECUTE IMMEDIATE _swe7 0x01;
+
+
+--echo #
+--echo # End of 10.5 tests
+--echo #
diff --git a/sql/sql_error.cc b/sql/sql_error.cc
index e5dda9904d2..b3ef0d89a98 100644
--- a/sql/sql_error.cc
+++ b/sql/sql_error.cc
@@ -850,7 +850,7 @@ extern "C" int my_wc_mb_utf8_null_terminated(CHARSET_INFO *cs,
my_wc_t wc, uchar *r, uchar *e)
{
return wc == '\0' ?
- my_wc_to_printable_generic(cs, wc, r, e) :
+ cs->wc_to_printable(wc, r, e) :
my_charset_utf8mb3_handler.wc_mb(cs, wc, r, e);
}
@@ -951,7 +951,7 @@ size_t convert_error_message(char *to, size_t to_length, CHARSET_INFO *to_cs,
to_cs= system_charset_info;
uint32 cnv_length= my_convert_using_func(to, to_length,
to_cs,
- my_wc_to_printable_generic,
+ to_cs->cset->wc_to_printable,
from, from_length,
from_cs, from_cs->cset->mb_wc,
errors);
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index 2fc6ae0ad7d..030075aa793 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -791,7 +791,7 @@ bool Binary_string::copy_printable_hhhh(CHARSET_INFO *to_cs,
if (bytes_needed >= UINT_MAX32 || alloc((size_t) bytes_needed))
return true;
str_length= my_convert_using_func(Ptr, Alloced_length, to_cs,
- my_wc_to_printable_generic,
+ to_cs->cset->wc_to_printable,
from, from_length,
from_cs,
from_cs->cset->mb_wc,
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index 3991a219ab5..945bbdfdc62 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -6800,6 +6800,7 @@ static MY_CHARSET_HANDLER my_charset_big5_handler=
my_well_formed_char_length_big5,
my_copy_fix_mb,
my_native_to_mb_big5,
+ my_wc_to_printable_generic
};
struct charset_info_st my_charset_big5_chinese_ci=
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index 0324c0665e2..fe28752a3f7 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -560,6 +560,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_well_formed_char_length_8bit,
my_copy_8bit,
my_wc_mb_bin,
+ my_wc_to_printable_generic
};
diff --git a/strings/ctype-cp932.c b/strings/ctype-cp932.c
index bf97d1feb83..45b5bde9510 100644
--- a/strings/ctype-cp932.c
+++ b/strings/ctype-cp932.c
@@ -34756,6 +34756,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_well_formed_char_length_cp932,
my_copy_fix_mb,
my_native_to_mb_cp932,
+ my_wc_to_printable_generic
};
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index deb13957900..0362f799fc6 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -10046,6 +10046,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_well_formed_char_length_euckr,
my_copy_fix_mb,
my_native_to_mb_euckr,
+ my_wc_to_printable_generic
};
diff --git a/strings/ctype-eucjpms.c b/strings/ctype-eucjpms.c
index 118e8286703..1dd179fed57 100644
--- a/strings/ctype-eucjpms.c
+++ b/strings/ctype-eucjpms.c
@@ -67584,6 +67584,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_well_formed_char_length_eucjpms,
my_copy_fix_mb,
my_native_to_mb_eucjpms,
+ my_wc_to_printable_generic
};
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index 166619bf5cc..266799f32a3 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -6451,6 +6451,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_well_formed_char_length_gb2312,
my_copy_fix_mb,
my_native_to_mb_gb2312,
+ my_wc_to_printable_generic
};
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index efaa2e5c728..fa6dba9bfb5 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -10733,6 +10733,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_well_formed_char_length_gbk,
my_copy_fix_mb,
my_native_to_mb_gbk,
+ my_wc_to_printable_generic
};
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index f9fa1488aa6..53ce27e491e 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -423,6 +423,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_well_formed_char_length_8bit,
my_copy_8bit,
my_wc_mb_bin, /* native_to_mb */
+ my_wc_to_printable_generic
};
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index 975cb503872..eac05ea68f5 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -2088,6 +2088,7 @@ MY_CHARSET_HANDLER my_charset_8bit_handler=
my_well_formed_char_length_8bit,
my_copy_8bit,
my_wc_mb_bin, /* native_to_mb */
+ my_wc_to_printable_8bit
};
MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index 902034b435d..e1c6a871772 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -34004,6 +34004,15 @@ size_t my_numcells_sjis(CHARSET_INFO *cs __attribute__((unused)),
}
+static int
+my_wc_to_printable_sjis(CHARSET_INFO *cs, my_wc_t wc,
+ uchar *str, uchar *end)
+{
+ return my_wc_to_printable_ex(cs, wc, str, end,
+ '\\', 2, 1);
+}
+
+
/*
sjis_chinese_ci and sjis_bin sort character blocks in this order:
1. [00..7F] - 7BIT characters (ASCII)
@@ -34135,6 +34144,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_well_formed_char_length_sjis,
my_copy_fix_mb,
my_native_to_mb_sjis,
+ my_wc_to_printable_sjis
};
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index 6a351c05823..772294fb5c0 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -905,6 +905,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_well_formed_char_length_8bit,
my_copy_8bit,
my_wc_mb_bin, /* native_to_mb */
+ my_wc_to_printable_generic
};
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index e4234a9582a..d764849c01e 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1591,6 +1591,7 @@ MY_CHARSET_HANDLER my_charset_utf16_handler=
my_well_formed_char_length_utf16,
my_copy_fix_mb2_or_mb4,
my_uni_utf16,
+ my_wc_to_printable_generic
};
@@ -1931,6 +1932,7 @@ static MY_CHARSET_HANDLER my_charset_utf16le_handler=
my_well_formed_char_length_utf16,
my_copy_fix_mb2_or_mb4,
my_uni_utf16le,
+ my_wc_to_printable_generic
};
@@ -2753,6 +2755,7 @@ MY_CHARSET_HANDLER my_charset_utf32_handler=
my_well_formed_char_length_utf32,
my_copy_fix_mb2_or_mb4,
my_uni_utf32,
+ my_wc_to_printable_generic
};
@@ -3343,6 +3346,7 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler=
my_well_formed_char_length_ucs2,
my_copy_fix_mb2_or_mb4,
my_uni_ucs2,
+ my_wc_to_printable_generic
};
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index 949f3aadc36..9ec3b578549 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -67328,6 +67328,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_well_formed_char_length_ujis,
my_copy_fix_mb,
my_native_to_mb_ujis,
+ my_wc_to_printable_generic
};
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 3329b6d23ef..b8e71b1f7a9 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -5466,6 +5466,7 @@ MY_CHARSET_HANDLER my_charset_utf8mb3_handler=
my_well_formed_char_length_utf8mb3,
my_copy_fix_mb,
my_uni_utf8mb3,
+ my_wc_to_printable_generic
};
@@ -7030,6 +7031,16 @@ my_charlen_filename(CHARSET_INFO *cs, const uchar *str, const uchar *end)
}
+static int
+my_wc_to_printable_filename(CHARSET_INFO *cs, my_wc_t wc,
+ uchar *str, uchar *end)
+{
+ return my_wc_to_printable_ex(cs, wc, str, end,
+ '\\', 5, 1);
+}
+
+
+
#define MY_FUNCTION_NAME(x) my_ ## x ## _filename
#define CHARLEN(cs,str,end) my_charlen_filename(cs,str,end)
#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
@@ -7102,6 +7113,7 @@ static MY_CHARSET_HANDLER my_charset_filename_handler=
my_well_formed_char_length_filename,
my_copy_fix_mb,
my_wc_mb_filename,
+ my_wc_to_printable_filename
};
@@ -7792,6 +7804,7 @@ MY_CHARSET_HANDLER my_charset_utf8mb4_handler=
my_well_formed_char_length_utf8mb4,
my_copy_fix_mb,
my_wc_mb_utf8mb4,
+ my_wc_to_printable_generic
};
diff --git a/strings/ctype.c b/strings/ctype.c
index 3fbe4143da2..4df9b9c2f09 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -1020,7 +1020,7 @@ my_is_printable(my_wc_t wc)
}
-static uint to_printable_8bit(uchar *dst, my_wc_t wc)
+static uint to_printable_8bit(uchar *dst, my_wc_t wc, uint bs)
{
/*
This function is used only in context of error messages for now.
@@ -1028,7 +1028,7 @@ static uint to_printable_8bit(uchar *dst, my_wc_t wc)
when a message is put into diagnostics area.
*/
DBUG_ASSERT(wc < 0x10000);
- *dst++= '\\';
+ *dst++= (char) bs;
*dst++= _dig_vec_upper[(wc >> 12) & 0x0F];
*dst++= _dig_vec_upper[(wc >> 8) & 0x0F];
*dst++= _dig_vec_upper[(wc >> 4) & 0x0F];
@@ -1037,18 +1037,25 @@ static uint to_printable_8bit(uchar *dst, my_wc_t wc)
}
+static uint my_printable_length(uint bslen, uint diglen)
+{
+ return bslen + (MY_CS_PRINTABLE_CHAR_LENGTH - 1) * diglen;
+}
+
+
/**
Encode an Unicode character "wc" into a printable string.
This function is suitable for any character set, including
ASCII-incompatible multi-byte character sets, e.g. ucs2, utf16, utf32.
*/
int
-my_wc_to_printable_generic(CHARSET_INFO *cs, my_wc_t wc,
- uchar *str, uchar *end)
+my_wc_to_printable_ex(CHARSET_INFO *cs, my_wc_t wc,
+ uchar *str, uchar *end,
+ uint bs, uint bslen, uint diglen)
{
uchar *str0;
uint i, length;
- uchar tmp[MY_CS_PRINTABLE_CHAR_LENGTH];
+ uchar tmp[MY_CS_PRINTABLE_CHAR_LENGTH * MY_CS_MBMAXLEN];
if (my_is_printable(wc))
{
@@ -1057,27 +1064,62 @@ my_wc_to_printable_generic(CHARSET_INFO *cs, my_wc_t wc,
return mblen;
}
- if (str + MY_CS_PRINTABLE_CHAR_LENGTH * cs->mbminlen > end)
- return MY_CS_TOOSMALLN(MY_CS_PRINTABLE_CHAR_LENGTH * cs->mbminlen);
+ if (str + my_printable_length(bslen, diglen) > end)
+ return MY_CS_TOOSMALLN(my_printable_length(bslen, diglen));
if ((cs->state & MY_CS_NONASCII) == 0)
- return to_printable_8bit(str, wc);
+ return to_printable_8bit(str, wc, bs);
- length= to_printable_8bit(tmp, wc);
+ length= to_printable_8bit(tmp, wc, bs);
str0= str;
for (i= 0; i < length; i++)
{
- if (my_ci_wc_mb(cs, tmp[i], str, end) != (int) cs->mbminlen)
+ uint expected_length= i == 0 ? bslen : diglen;
+ if (my_ci_wc_mb(cs, tmp[i], str, end) != (int) expected_length)
{
DBUG_ASSERT(0);
return MY_CS_ILSEQ;
}
- str+= cs->mbminlen;
+ str+= expected_length;
}
return (int) (str - str0);
}
+int
+my_wc_to_printable_8bit(CHARSET_INFO *cs, my_wc_t wc,
+ uchar *str, uchar *end)
+{
+ /*
+ Special case: swe7 does not have the backslash character.
+ Use dot instead of backslash for escaping.
+ */
+ uint bs= cs->tab_to_uni && cs->tab_to_uni['\\'] != '\\' ? '.' : '\\';
+ DBUG_ASSERT(cs->mbminlen == 1);
+ /*
+ Additionally, if the original swe7 string contains backslashes,
+ replace them to dots, so this error message:
+ Invalid swe7 character string: '\xEF\xBC\xB4'
+ is displayed as:
+ Invalid swe7 character string: '.xEF.xBC.xB4'
+ which is more readable than what would happen without '\'-to-dot mapping:
+ Invalid swe7 character string: '.005CxEF.005CxBC.005CxB4'
+ */
+ if (bs == '.' && wc == '\\')
+ wc= '.';
+ return my_wc_to_printable_ex(cs, wc, str, end, bs, 1, 1);
+}
+
+
+int
+my_wc_to_printable_generic(CHARSET_INFO *cs, my_wc_t wc,
+ uchar *str, uchar *end)
+{
+ return my_wc_to_printable_ex(cs, wc, str, end, '\\',
+ cs->mbminlen, cs->mbminlen);
+}
+
+
/*
Convert a string between two character sets.
'to' must be large enough to store (form_length * to_cs->mbmaxlen) bytes.
diff --git a/strings/strings_def.h b/strings/strings_def.h
index b3727321e19..d4f51bcd0a5 100644
--- a/strings/strings_def.h
+++ b/strings/strings_def.h
@@ -117,4 +117,17 @@ uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs);
#define MY_HASH_ADD_16(A, B, value) \
do { MY_HASH_ADD(A, B, ((value) & 0xFF)) ; MY_HASH_ADD(A, B, ((value >>8 ))); } while(0)
-#endif
+
+#define my_wc_t ulong
+
+int my_wc_to_printable_ex(CHARSET_INFO *cs, my_wc_t wc,
+ uchar *s, uchar *e,
+ uint bs, uint bslen, uint diglen);
+
+int my_wc_to_printable_generic(CHARSET_INFO *cs, my_wc_t wc,
+ uchar *s, uchar *e);
+
+int my_wc_to_printable_8bit(CHARSET_INFO *cs, my_wc_t wc,
+ uchar *s, uchar *e);
+
+#endif /*STRINGS_DEF_INCLUDED */