summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <svoj@mysql.com/june.mysql.com>2007-06-27 18:10:19 +0500
committerunknown <svoj@mysql.com/june.mysql.com>2007-06-27 18:10:19 +0500
commit030d98d3971aaabbbeb37ed641629944a97186f5 (patch)
tree8521349d513536e124d936118690ca0abb5320a9
parentb3b8d5165d76de270532120dd8ce2a216308c0bd (diff)
downloadmariadb-git-030d98d3971aaabbbeb37ed641629944a97186f5.tar.gz
BUG#29299 - repeatable myisam fulltext index corruption
Fulltext index may get corrupt by certain gbk characters. The problem was that when skipping leading non-true-word-characters, we assumed that these characters are always 1 byte long. This is not the case with gbk character set, since non-true-word-characters may be 2 bytes long. Affects 5.0 only. myisam/ft_parser.c: Leading non-true-word-characters may also be multi-byte (e.g. in gbk character set). mysql-test/r/fulltext2.result: A test case for BUG#29299. mysql-test/t/fulltext2.test: A test case for BUG#29299.
-rw-r--r--myisam/ft_parser.c6
-rw-r--r--mysql-test/r/fulltext2.result12
-rw-r--r--mysql-test/t/fulltext2.test12
3 files changed, 28 insertions, 2 deletions
diff --git a/myisam/ft_parser.c b/myisam/ft_parser.c
index 6c79f9249cf..6d68542e4e2 100644
--- a/myisam/ft_parser.c
+++ b/myisam/ft_parser.c
@@ -111,7 +111,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
while (doc<end)
{
- for (;doc<end;doc++)
+ for (; doc < end; doc+= mbl)
{
if (true_word_char(cs,*doc)) break;
if (*doc == FTB_RQUOT && param->quot)
@@ -120,6 +120,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
*start=doc+1;
return 3; /* FTB_RBR */
}
+ mbl= my_mbcharlen(cs, *(uchar *)doc);
if (!param->quot)
{
if (*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT)
@@ -187,10 +188,11 @@ byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, const byte *end,
do
{
- for (;; doc++)
+ for (;; doc+= mbl)
{
if (doc >= end) DBUG_RETURN(0);
if (true_word_char(cs, *doc)) break;
+ mbl= my_mbcharlen(cs, *(uchar *)doc);
}
mwc= length= 0;
diff --git a/mysql-test/r/fulltext2.result b/mysql-test/r/fulltext2.result
index f6a4b20bc22..f6bb4e4965a 100644
--- a/mysql-test/r/fulltext2.result
+++ b/mysql-test/r/fulltext2.result
@@ -241,3 +241,15 @@ select * from t1 where match a against('ab c' in boolean mode);
a
drop table t1;
set names latin1;
+CREATE TABLE t1(a VARCHAR(255) CHARACTER SET gbk, FULLTEXT(a));
+SET NAMES utf8;
+INSERT INTO t1 VALUES(0xF043616161),(0xBEF361616197C22061616161);
+SELECT HEX(a) FROM t1 WHERE MATCH(a) AGAINST(0x97C22061616161 IN BOOLEAN MODE);
+HEX(a)
+BEF361616197C22061616161
+DELETE FROM t1 LIMIT 1;
+CHECK TABLE t1;
+Table Op Msg_type Msg_text
+test.t1 check status OK
+SET NAMES latin1;
+DROP TABLE t1;
diff --git a/mysql-test/t/fulltext2.test b/mysql-test/t/fulltext2.test
index fd97f795534..99209dc8543 100644
--- a/mysql-test/t/fulltext2.test
+++ b/mysql-test/t/fulltext2.test
@@ -220,4 +220,16 @@ select * from t1 where match a against('ab c' in boolean mode);
drop table t1;
set names latin1;
+#
+# BUG#29299 - repeatable myisam fulltext index corruption
+#
+CREATE TABLE t1(a VARCHAR(255) CHARACTER SET gbk, FULLTEXT(a));
+SET NAMES utf8;
+INSERT INTO t1 VALUES(0xF043616161),(0xBEF361616197C22061616161);
+SELECT HEX(a) FROM t1 WHERE MATCH(a) AGAINST(0x97C22061616161 IN BOOLEAN MODE);
+DELETE FROM t1 LIMIT 1;
+CHECK TABLE t1;
+SET NAMES latin1;
+DROP TABLE t1;
+
# End of 4.1 tests