diff options
author | Thirunarayanan Balathandayuthapani <thiru@mariadb.com> | 2022-07-05 18:21:21 +0530 |
---|---|---|
committer | Thirunarayanan Balathandayuthapani <thiru@mariadb.com> | 2022-07-05 18:21:46 +0530 |
commit | 68d7f7a4baacc8d93a87eb53d6b698b640d2534d (patch) | |
tree | 6a66b48d226d76b4d7ce9f7c89233e4aebf708b4 | |
parent | 990cde800a4aafc5f5647eb06db3eec461fd172a (diff) | |
download | mariadb-git-bb-10.3-MDEV-20797.tar.gz |
MDEV-20797 FULLTEXT search with apostrophe, and mandatory wordsbb-10.3-MDEV-20797
- InnoDB should ignore the single word followed by apostrophe while
tokenising the document. Example is that if the input string is O'brien
then right now, InnoDB seperates into two tokens as O, brien. But
after this patch, InnoDB can ignore the token 'O' and consider
only 'brien'.
-rw-r--r-- | mysql-test/suite/innodb_fts/r/fulltext.result | 11 | ||||
-rw-r--r-- | mysql-test/suite/innodb_fts/t/fulltext.test | 9 | ||||
-rw-r--r-- | storage/innobase/handler/ha_innodb.cc | 14 |
3 files changed, 33 insertions, 1 deletions
diff --git a/mysql-test/suite/innodb_fts/r/fulltext.result b/mysql-test/suite/innodb_fts/r/fulltext.result index 7775a46e0d8..6a889f503b4 100644 --- a/mysql-test/suite/innodb_fts/r/fulltext.result +++ b/mysql-test/suite/innodb_fts/r/fulltext.result @@ -732,4 +732,15 @@ ALTER TABLE t1 DROP KEY `ftidx` ; INSERT INTO t1 (col_int, col_text) VALUES ( 1255, NULL); DROP TABLE t1; SET @@global.innodb_file_per_table = @save; +# +# MDEV-20797 FULLTEXT search with apostrophe, +# and mandatory words +# +CREATE TABLE t1(f1 TINYTEXT NOT NULL, FULLTEXT(f1))ENGINE=InnoDB; +INSERT INTO t1 VALUES('O''Brien'), ('O Brien'); +SELECT * FROM t1 WHERE MATCH (f1) AGAINST ("+O'Brien" IN BOOLEAN MODE); +f1 +O'Brien +O Brien +DROP TABLE t1; # End of 10.3 tests diff --git a/mysql-test/suite/innodb_fts/t/fulltext.test b/mysql-test/suite/innodb_fts/t/fulltext.test index 4878a0bee3f..43c8de3fd5e 100644 --- a/mysql-test/suite/innodb_fts/t/fulltext.test +++ b/mysql-test/suite/innodb_fts/t/fulltext.test @@ -757,4 +757,13 @@ INSERT INTO t1 (col_int, col_text) VALUES ( 1255, NULL); DROP TABLE t1; SET @@global.innodb_file_per_table = @save; +--echo # +--echo # MDEV-20797 FULLTEXT search with apostrophe, +--echo # and mandatory words +--echo # +CREATE TABLE t1(f1 TINYTEXT NOT NULL, FULLTEXT(f1))ENGINE=InnoDB; +INSERT INTO t1 VALUES('O''Brien'), ('O Brien'); +SELECT * FROM t1 WHERE MATCH (f1) AGAINST ("+O'Brien" IN BOOLEAN MODE); +DROP TABLE t1; + --echo # End of 10.3 tests diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 514e6fa31a2..e1b645c1cc3 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -6912,7 +6912,8 @@ innobase_mysql_fts_get_token( ulint mwc = 0; ulint length = 0; - + bool reset_token_str = false; +reset: token->f_str = const_cast<byte*>(doc); while (doc < end) { @@ -6923,6 +6924,9 @@ innobase_mysql_fts_get_token( cs, &ctype, (uchar*) doc, (uchar*) end); if (true_word_char(ctype, *doc)) { mwc = 0; + } else if (*doc == '\'' && length == 1) { + /* Could be apostrophe */ + reset_token_str = true; } else if (!misc_word_char(*doc) || mwc) { break; } else { @@ -6932,6 +6936,14 @@ innobase_mysql_fts_get_token( ++length; doc += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1); + if (reset_token_str) { + /* Reset the token if the single character + followed by apostrophe */ + mwc = 0; + length = 0; + reset_token_str = false; + goto reset; + } } token->f_len = (uint) (doc - token->f_str) - mwc; |