summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mysql-test/r/ctype_many.result21
-rw-r--r--mysql-test/t/ctype_many.test19
-rw-r--r--strings/ctype-simple.c23
3 files changed, 62 insertions, 1 deletions
diff --git a/mysql-test/r/ctype_many.result b/mysql-test/r/ctype_many.result
index 3c1c96fc47c..f0f98b15e11 100644
--- a/mysql-test/r/ctype_many.result
+++ b/mysql-test/r/ctype_many.result
@@ -1787,3 +1787,24 @@ SELECT * FROM t2 WHERE a=(SELECT a FROM t1) AND a=_LATIN1'x';
ERROR 21000: Subquery returns more than 1 row
DROP TABLE t2;
DROP TABLE t1;
+#
+# Start of 10.1 tests
+#
+#
+# MDEV-8362 dash '-' is not recognized in charset armscii8 on select where query
+#
+CREATE TABLE t1 (a VARCHAR(64) CHARACTER SET armscii8);
+INSERT INTO t1 VALUES ('abc-def');
+SELECT * FROM t1 WHERE a='abc-def';
+a
+abc-def
+SELECT * FROM t1 WHERE a LIKE 'abc%';
+a
+abc-def
+DROP TABLE t1;
+SELECT HEX(CONVERT(_utf8 0x2728292C2D2E USING armscii8));
+HEX(CONVERT(_utf8 0x2728292C2D2E USING armscii8))
+2728292C2D2E
+#
+# End of 10.1 tests
+#
diff --git a/mysql-test/t/ctype_many.test b/mysql-test/t/ctype_many.test
index 11ad942e19c..a51ead67eb2 100644
--- a/mysql-test/t/ctype_many.test
+++ b/mysql-test/t/ctype_many.test
@@ -331,3 +331,22 @@ CREATE TABLE t2 (a BINARY(1));
SELECT * FROM t2 WHERE a=(SELECT a FROM t1) AND a=_LATIN1'x';
DROP TABLE t2;
DROP TABLE t1;
+
+
+--echo #
+--echo # Start of 10.1 tests
+--echo #
+
+--echo #
+--echo # MDEV-8362 dash '-' is not recognized in charset armscii8 on select where query
+--echo #
+CREATE TABLE t1 (a VARCHAR(64) CHARACTER SET armscii8);
+INSERT INTO t1 VALUES ('abc-def');
+SELECT * FROM t1 WHERE a='abc-def';
+SELECT * FROM t1 WHERE a LIKE 'abc%';
+DROP TABLE t1;
+SELECT HEX(CONVERT(_utf8 0x2728292C2D2E USING armscii8));
+
+--echo #
+--echo # End of 10.1 tests
+--echo #
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index d7a1b3f33b4..394924c8209 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -1303,7 +1303,28 @@ create_fromuni(struct charset_info_st *cs,
if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
{
int ofs= wc - idx[i].uidx.from;
- tab[ofs]= ch;
+ if (!tab[ofs] || tab[ofs] > 0x7F) /* Prefer ASCII*/
+ {
+ /*
+ Some character sets can have double encoding. For example,
+ in ARMSCII8, the following characters are encoded twice:
+
+ Encoding#1 Encoding#2 Unicode Character Name
+ ---------- ---------- ------- --------------
+ 0x27 0xFF U+0027 APOSTROPHE
+ 0x28 0xA5 U+0028 LEFT PARENTHESIS
+ 0x29 0xA4 U+0029 RIGHT PARENTHESIS
+ 0x2C 0xAB U+002C COMMA
+ 0x2D 0xAC U+002D HYPHEN-MINUS
+ 0x2E 0xA9 U+002E FULL STOP
+
+ That is, both 0x27 and 0xFF convert to Unicode U+0027.
+ When converting back from Unicode to ARMSCII,
+ we prefer the ASCII range, that is we want U+0027
+ to convert to 0x27 rather than to 0xFF.
+ */
+ tab[ofs]= ch;
+ }
}
}
}