diff options
-rw-r--r-- | mysql-test/r/ctype_many.result | 21 | ||||
-rw-r--r-- | mysql-test/t/ctype_many.test | 19 | ||||
-rw-r--r-- | strings/ctype-simple.c | 23 |
3 files changed, 62 insertions, 1 deletions
diff --git a/mysql-test/r/ctype_many.result b/mysql-test/r/ctype_many.result index 3c1c96fc47c..f0f98b15e11 100644 --- a/mysql-test/r/ctype_many.result +++ b/mysql-test/r/ctype_many.result @@ -1787,3 +1787,24 @@ SELECT * FROM t2 WHERE a=(SELECT a FROM t1) AND a=_LATIN1'x'; ERROR 21000: Subquery returns more than 1 row DROP TABLE t2; DROP TABLE t1; +# +# Start of 10.1 tests +# +# +# MDEV-8362 dash '-' is not recognized in charset armscii8 on select where query +# +CREATE TABLE t1 (a VARCHAR(64) CHARACTER SET armscii8); +INSERT INTO t1 VALUES ('abc-def'); +SELECT * FROM t1 WHERE a='abc-def'; +a +abc-def +SELECT * FROM t1 WHERE a LIKE 'abc%'; +a +abc-def +DROP TABLE t1; +SELECT HEX(CONVERT(_utf8 0x2728292C2D2E USING armscii8)); +HEX(CONVERT(_utf8 0x2728292C2D2E USING armscii8)) +2728292C2D2E +# +# End of 10.1 tests +# diff --git a/mysql-test/t/ctype_many.test b/mysql-test/t/ctype_many.test index 11ad942e19c..a51ead67eb2 100644 --- a/mysql-test/t/ctype_many.test +++ b/mysql-test/t/ctype_many.test @@ -331,3 +331,22 @@ CREATE TABLE t2 (a BINARY(1)); SELECT * FROM t2 WHERE a=(SELECT a FROM t1) AND a=_LATIN1'x'; DROP TABLE t2; DROP TABLE t1; + + +--echo # +--echo # Start of 10.1 tests +--echo # + +--echo # +--echo # MDEV-8362 dash '-' is not recognized in charset armscii8 on select where query +--echo # +CREATE TABLE t1 (a VARCHAR(64) CHARACTER SET armscii8); +INSERT INTO t1 VALUES ('abc-def'); +SELECT * FROM t1 WHERE a='abc-def'; +SELECT * FROM t1 WHERE a LIKE 'abc%'; +DROP TABLE t1; +SELECT HEX(CONVERT(_utf8 0x2728292C2D2E USING armscii8)); + +--echo # +--echo # End of 10.1 tests +--echo # diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c index d7a1b3f33b4..394924c8209 100644 --- a/strings/ctype-simple.c +++ b/strings/ctype-simple.c @@ -1303,7 +1303,28 @@ create_fromuni(struct charset_info_st *cs, if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc) { int ofs= wc - idx[i].uidx.from; - tab[ofs]= ch; + if (!tab[ofs] || tab[ofs] > 0x7F) /* Prefer ASCII*/ + { + /* + Some character sets can have double encoding. For example, + in ARMSCII8, the following characters are encoded twice: + + Encoding#1 Encoding#2 Unicode Character Name + ---------- ---------- ------- -------------- + 0x27 0xFF U+0027 APOSTROPHE + 0x28 0xA5 U+0028 LEFT PARENTHESIS + 0x29 0xA4 U+0029 RIGHT PARENTHESIS + 0x2C 0xAB U+002C COMMA + 0x2D 0xAC U+002D HYPHEN-MINUS + 0x2E 0xA9 U+002E FULL STOP + + That is, both 0x27 and 0xFF convert to Unicode U+0027. + When converting back from Unicode to ARMSCII, + we prefer the ASCII range, that is we want U+0027 + to convert to 0x27 rather than to 0xFF. + */ + tab[ofs]= ch; + } } } } |