Bug #3928 regexp [[:>:]] and UTF-8

author: bar@bar.intranet.mysql.r18.ru <> 2004-06-07 12:51:18 +0500
committer: bar@bar.intranet.mysql.r18.ru <> 2004-06-07 12:51:18 +0500
commit: b30b1ccc6ae5027ac315de1b60c5ec00858698f9 (patch)
tree: b95e6a3d5908a015cf9163612fd462eb0d498c8e /strings
parent: dabc0e774eeb6e70f2fda743c50f19a4c72f8d3d (diff)
download: mariadb-git-b30b1ccc6ae5027ac315de1b60c5ec00858698f9.tar.gz
1 files changed, 13 insertions, 7 deletions
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 29d2c5d1358..09b918b0777 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -1524,8 +1524,12 @@ MY_UNICASE_INFO *uni_plane[256]={
 
 #ifdef HAVE_CHARSET_utf8
 
-/* These arrays are taken from usa7 implementation */
-
+/* 
+  We consider bytes with code more than 127 as a letter.
+  This garantees that word boundaries work fine with regular
+  expressions. Note, there is no need to mark byte 255  as a
+  letter, it is illegal byte in UTF8.
+*/
 static uchar ctype_utf8[] = {
     0,
    32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
@@ -1536,16 +1540,18 @@ static uchar ctype_utf8[] = {
     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 16, 16, 16, 16, 16,
    16,130,130,130,130,130,130,  2,  2,  2,  2,  2,  2,  2,  2,  2,
     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, 16, 16, 16, 16, 32,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-    0,  0,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+    3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+    3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+    3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+    3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+    3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
     3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
     3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
     3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  0
 };
 
+/* The below are taken from usa7 implementation */
+
 static uchar to_lower_utf8[] = {
     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
author	bar@bar.intranet.mysql.r18.ru <>	2004-06-07 12:51:18 +0500
committer	bar@bar.intranet.mysql.r18.ru <>	2004-06-07 12:51:18 +0500
commit	b30b1ccc6ae5027ac315de1b60c5ec00858698f9 (patch)
tree	b95e6a3d5908a015cf9163612fd462eb0d498c8e /strings
parent	dabc0e774eeb6e70f2fda743c50f19a4c72f8d3d (diff)
download	mariadb-git-b30b1ccc6ae5027ac315de1b60c5ec00858698f9.tar.gz