summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThirunarayanan Balathandayuthapani <thiru@mariadb.com>2022-07-05 18:21:21 +0530
committerThirunarayanan Balathandayuthapani <thiru@mariadb.com>2022-07-05 18:21:46 +0530
commit68d7f7a4baacc8d93a87eb53d6b698b640d2534d (patch)
tree6a66b48d226d76b4d7ce9f7c89233e4aebf708b4
parent990cde800a4aafc5f5647eb06db3eec461fd172a (diff)
downloadmariadb-git-bb-10.3-MDEV-20797.tar.gz
MDEV-20797 FULLTEXT search with apostrophe, and mandatory wordsbb-10.3-MDEV-20797
- InnoDB should ignore the single word followed by apostrophe while tokenising the document. Example is that if the input string is O'brien then right now, InnoDB seperates into two tokens as O, brien. But after this patch, InnoDB can ignore the token 'O' and consider only 'brien'.
-rw-r--r--mysql-test/suite/innodb_fts/r/fulltext.result11
-rw-r--r--mysql-test/suite/innodb_fts/t/fulltext.test9
-rw-r--r--storage/innobase/handler/ha_innodb.cc14
3 files changed, 33 insertions, 1 deletions
diff --git a/mysql-test/suite/innodb_fts/r/fulltext.result b/mysql-test/suite/innodb_fts/r/fulltext.result
index 7775a46e0d8..6a889f503b4 100644
--- a/mysql-test/suite/innodb_fts/r/fulltext.result
+++ b/mysql-test/suite/innodb_fts/r/fulltext.result
@@ -732,4 +732,15 @@ ALTER TABLE t1 DROP KEY `ftidx` ;
INSERT INTO t1 (col_int, col_text) VALUES ( 1255, NULL);
DROP TABLE t1;
SET @@global.innodb_file_per_table = @save;
+#
+# MDEV-20797 FULLTEXT search with apostrophe,
+# and mandatory words
+#
+CREATE TABLE t1(f1 TINYTEXT NOT NULL, FULLTEXT(f1))ENGINE=InnoDB;
+INSERT INTO t1 VALUES('O''Brien'), ('O Brien');
+SELECT * FROM t1 WHERE MATCH (f1) AGAINST ("+O'Brien" IN BOOLEAN MODE);
+f1
+O'Brien
+O Brien
+DROP TABLE t1;
# End of 10.3 tests
diff --git a/mysql-test/suite/innodb_fts/t/fulltext.test b/mysql-test/suite/innodb_fts/t/fulltext.test
index 4878a0bee3f..43c8de3fd5e 100644
--- a/mysql-test/suite/innodb_fts/t/fulltext.test
+++ b/mysql-test/suite/innodb_fts/t/fulltext.test
@@ -757,4 +757,13 @@ INSERT INTO t1 (col_int, col_text) VALUES ( 1255, NULL);
DROP TABLE t1;
SET @@global.innodb_file_per_table = @save;
+--echo #
+--echo # MDEV-20797 FULLTEXT search with apostrophe,
+--echo # and mandatory words
+--echo #
+CREATE TABLE t1(f1 TINYTEXT NOT NULL, FULLTEXT(f1))ENGINE=InnoDB;
+INSERT INTO t1 VALUES('O''Brien'), ('O Brien');
+SELECT * FROM t1 WHERE MATCH (f1) AGAINST ("+O'Brien" IN BOOLEAN MODE);
+DROP TABLE t1;
+
--echo # End of 10.3 tests
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 514e6fa31a2..e1b645c1cc3 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -6912,7 +6912,8 @@ innobase_mysql_fts_get_token(
ulint mwc = 0;
ulint length = 0;
-
+ bool reset_token_str = false;
+reset:
token->f_str = const_cast<byte*>(doc);
while (doc < end) {
@@ -6923,6 +6924,9 @@ innobase_mysql_fts_get_token(
cs, &ctype, (uchar*) doc, (uchar*) end);
if (true_word_char(ctype, *doc)) {
mwc = 0;
+ } else if (*doc == '\'' && length == 1) {
+ /* Could be apostrophe */
+ reset_token_str = true;
} else if (!misc_word_char(*doc) || mwc) {
break;
} else {
@@ -6932,6 +6936,14 @@ innobase_mysql_fts_get_token(
++length;
doc += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
+ if (reset_token_str) {
+ /* Reset the token if the single character
+ followed by apostrophe */
+ mwc = 0;
+ length = 0;
+ reset_token_str = false;
+ goto reset;
+ }
}
token->f_len = (uint) (doc - token->f_str) - mwc;