From e391417f0fdbd746e23808b3d15d6cbe5a3b0aac Mon Sep 17 00:00:00 2001 From: Sachin Agarwal Date: Mon, 26 Oct 2020 12:21:29 +0200 Subject: Bug #30933728 INNODB FTS PHRASE SEARCH HIT AN ASSERT Problem: In Full-text phrase search, we filter out row that do not contain all the tokens in the phrase. If we do not filter out doc_id that doesn't appear in all the token's doc_id lists then we hit an assert. Fix: if any of the token has last doc_id equal to ith doc_id of the first token doc_id list then filter out rest of the higher doc_ids. RB: 24909 Reviewed by : Annamalai Gurusami This is a cherry-pick of mysql/mysql-server@5aa075277dfe84a17a0331c57a6fe9b91dafb4cf but without a test case, because the test case depends on an n-gram tokenizer that will be missing from MariaDB until MDEV-10267 is added. --- storage/innobase/fts/fts0que.cc | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'storage/innobase/fts') diff --git a/storage/innobase/fts/fts0que.cc b/storage/innobase/fts/fts0que.cc index be758c13d52..df2b330fe4b 100644 --- a/storage/innobase/fts/fts0que.cc +++ b/storage/innobase/fts/fts0que.cc @@ -4421,24 +4421,27 @@ fts_phrase_or_proximity_search( if (k == ib_vector_size(query->match_array[j])) { end_list = TRUE; - if (match[j]->doc_id != match[0]->doc_id) { - /* no match */ - if (query->flags & FTS_PHRASE) { - ulint s; + if (query->flags & FTS_PHRASE) { + ulint s; + /* Since i is the last doc id in the + match_array[j], remove all doc ids > i + from the match_array[0]. */ + fts_match_t* match_temp; + for (s = i + 1; s < n_matched; s++) { + match_temp = static_cast< + fts_match_t*>(ib_vector_get( + query->match_array[0], s)); + match_temp->doc_id = 0; + } + if (match[j]->doc_id != + match[0]->doc_id) { + /* no match */ match[0]->doc_id = 0; - - for (s = i + 1; s < n_matched; - s++) { - match[0] = static_cast< - fts_match_t*>( - ib_vector_get( - query->match_array[0], - s)); - match[0]->doc_id = 0; - } } + } + if (match[j]->doc_id != match[0]->doc_id) { goto func_exit; } } -- cgit v1.2.1 From 1ff8588c3f1f07e7f6e1b00fb5f485488324cc0a Mon Sep 17 00:00:00 2001 From: Sachin Agarwal Date: Fri, 28 Aug 2020 18:18:25 +0530 Subject: Bug #31228694 FTS QUERY WITH LIMIT HIT AN ASSERT Problem: 1. The server terminates abnormally when phrase search doesn't filter out doc_ids correctly. This problem has been fixed in bug 2. Wrong query result: It's a regression from the bug #22709692 fix. This fix optimize full-text search query with limit clause. when FTS expression involves only union operation, we fetch only number of doc_ids specified with the limit clause. Fulltext phrase search is not an union operation and we consider phrase search with plugin parser a union operation. In phrase search with limit clause, we fetch limited doc_ids for each token and if any of the selected doc_id does not contain all tokens in correct order then we do not include that row_id in the result set. Therefore phrase search gets fewer number of rows than the qualified rows exist in the table. Fix: Added a condition that phrase search with plugin parser is not a union operation. RB: 24925 Reviewed by : Annamalai Gurusami This is a cherry-pick of mysql/mysql-server@5549920b7a33ef33034461d973a9ecb17ce49799 without a test case, because the test case depends on an n-gram tokenizer that will be missing from MariaDB until MDEV-10267 is added. --- storage/innobase/fts/fts0ast.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'storage/innobase/fts') diff --git a/storage/innobase/fts/fts0ast.cc b/storage/innobase/fts/fts0ast.cc index e22613a265b..6be4fb0d52b 100644 --- a/storage/innobase/fts/fts0ast.cc +++ b/storage/innobase/fts/fts0ast.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2018, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2007, 2020, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -557,8 +557,7 @@ fts_ast_node_check_union( fts_ast_node_t* node) { if (node->type == FTS_AST_LIST - || node->type == FTS_AST_SUBEXP_LIST - || node->type == FTS_AST_PARSER_PHRASE_LIST) { + || node->type == FTS_AST_SUBEXP_LIST) { for (node = node->list.head; node; node = node->next) { if (!fts_ast_node_check_union(node)) { @@ -566,6 +565,9 @@ fts_ast_node_check_union( } } + } else if (node->type == FTS_AST_PARSER_PHRASE_LIST) { + /* Phrase search for plugin parser */ + return(false); } else if (node->type == FTS_AST_OPER && (node->oper == FTS_IGNORE || node->oper == FTS_EXIST)) { -- cgit v1.2.1