diff options
author | Sergei Golubchik <sergii@pisem.net> | 2014-05-07 17:33:33 +0200 |
---|---|---|
committer | Sergei Golubchik <sergii@pisem.net> | 2014-05-07 17:33:33 +0200 |
commit | a2807e41e8fcac00711cf4465e910327bfd69fe2 (patch) | |
tree | c94b0a32226b09e8675f8b9b559a610c554cda6b /storage/xtradb/fts/fts0que.cc | |
parent | 8ee9d19607d84aeebf97b704a19453f6a772299b (diff) | |
parent | 6cb3146af896eb7d27aed6815428008f105e8ae8 (diff) | |
download | mariadb-git-a2807e41e8fcac00711cf4465e910327bfd69fe2.tar.gz |
xtradb 5.6.17-65.0
Diffstat (limited to 'storage/xtradb/fts/fts0que.cc')
-rw-r--r-- | storage/xtradb/fts/fts0que.cc | 58 |
1 files changed, 31 insertions, 27 deletions
diff --git a/storage/xtradb/fts/fts0que.cc b/storage/xtradb/fts/fts0que.cc index 189c43768cd..c5c5f954789 100644 --- a/storage/xtradb/fts/fts0que.cc +++ b/storage/xtradb/fts/fts0que.cc @@ -47,9 +47,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang #define RANK_DOWNGRADE (-1.0F) #define RANK_UPGRADE (1.0F) -/* Maximum number of words supported in a proximity search. -FIXME, this limitation can be removed easily. Need to see -if we want to enforce such limitation */ +/* Maximum number of words supported in a phrase or proximity search. */ #define MAX_PROXIMITY_ITEM 128 /* Memory used by rbt itself for create and node add */ @@ -183,6 +181,8 @@ struct fts_select_t { the FTS index */ }; +typedef std::vector<ulint> pos_vector_t; + /** structure defines a set of ranges for original documents, each of which has a minimum position and maximum position. Text in such range should contain all words in the proximity search. We will need to count the @@ -192,9 +192,9 @@ struct fts_proximity_t { ulint n_pos; /*!< number of position set, defines a range (min to max) containing all matching words */ - ulint* min_pos; /*!< the minimum position (in bytes) + pos_vector_t min_pos; /*!< the minimum position (in bytes) of the range */ - ulint* max_pos; /*!< the maximum position (in bytes) + pos_vector_t max_pos; /*!< the maximum position (in bytes) of the range */ }; @@ -758,7 +758,7 @@ fts_query_union_doc_id( fts_update_t* array = (fts_update_t*) query->deleted->doc_ids->data; /* Check if the doc id is deleted and it's not already in our set. */ - if (fts_bsearch(array, 0, size, doc_id) < 0 + if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0 && rbt_search(query->doc_ids, &parent, &doc_id) != 0) { fts_ranking_t ranking; @@ -789,7 +789,7 @@ fts_query_remove_doc_id( fts_update_t* array = (fts_update_t*) query->deleted->doc_ids->data; /* Check if the doc id is deleted and it's in our set. */ - if (fts_bsearch(array, 0, size, doc_id) < 0 + if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0 && rbt_search(query->doc_ids, &parent, &doc_id) == 0) { ut_free(rbt_remove_node(query->doc_ids, parent.last)); @@ -819,7 +819,7 @@ fts_query_change_ranking( fts_update_t* array = (fts_update_t*) query->deleted->doc_ids->data; /* Check if the doc id is deleted and it's in our set. */ - if (fts_bsearch(array, 0, size, doc_id) < 0 + if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0 && rbt_search(query->doc_ids, &parent, &doc_id) == 0) { fts_ranking_t* ranking; @@ -865,7 +865,7 @@ fts_query_intersect_doc_id( if it matches 'b' and it's in doc_ids.(multi_exist = true). */ /* Check if the doc id is deleted and it's in our set */ - if (fts_bsearch(array, 0, size, doc_id) < 0) { + if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0) { fts_ranking_t new_ranking; if (rbt_search(query->doc_ids, &parent, &doc_id) != 0) { @@ -1705,6 +1705,9 @@ fts_proximity_is_word_in_range( { fts_proximity_t* proximity_pos = phrase->proximity_pos; + ut_ad(proximity_pos->n_pos == proximity_pos->min_pos.size()); + ut_ad(proximity_pos->n_pos == proximity_pos->max_pos.size()); + /* Search each matched position pair (with min and max positions) and count the number of words in the range */ for (ulint i = 0; i < proximity_pos->n_pos; i++) { @@ -1922,6 +1925,7 @@ fts_query_fetch_document( if (cur_len != UNIV_SQL_NULL && cur_len != 0) { if (phrase->proximity_pos) { + ut_ad(prev_len + cur_len <= total_len); memcpy(document_text + prev_len, data, cur_len); } else { /* For phrase search */ @@ -1932,17 +1936,18 @@ fts_query_fetch_document( cur_len, prev_len, phrase->heap); } + + /* Document positions are calculated from the beginning + of the first field, need to save the length for each + searched field to adjust the doc position when search + phrases. */ + prev_len += cur_len + 1; } if (phrase->found) { break; } - /* Document positions are calculated from the beginning - of the first field, need to save the length for each - searched field to adjust the doc position when search - phrases. */ - prev_len += cur_len + 1; exp = que_node_get_next(exp); } @@ -2588,6 +2593,11 @@ fts_query_phrase_search( } num_token = ib_vector_size(tokens); + if (num_token > MAX_PROXIMITY_ITEM) { + query->error = DB_FTS_TOO_MANY_WORDS_IN_PHRASE; + goto func_exit; + } + ut_ad(ib_vector_size(orig_tokens) >= num_token); /* Ignore empty strings. */ @@ -2613,7 +2623,7 @@ fts_query_phrase_search( heap_alloc, sizeof(fts_match_t), 64); } else { - ut_a(num_token < MAX_PROXIMITY_ITEM); + ut_a(num_token <= MAX_PROXIMITY_ITEM); query->match_array = (ib_vector_t**) mem_heap_alloc( heap, @@ -3497,14 +3507,14 @@ fts_query_prepare_result( doc_freq = rbt_value(fts_doc_freq_t, node); /* Don't put deleted docs into result */ - if (fts_bsearch(array, 0, size, doc_freq->doc_id) + if (fts_bsearch(array, 0, static_cast<int>(size), doc_freq->doc_id) >= 0) { continue; } ranking.doc_id = doc_freq->doc_id; - ranking.rank = doc_freq->freq * word_freq->idf - * word_freq->idf; + ranking.rank = static_cast<fts_rank_t>( + doc_freq->freq * word_freq->idf * word_freq->idf); ranking.words = NULL; fts_query_add_ranking(query, result->rankings_by_id, @@ -4236,10 +4246,6 @@ fts_phrase_or_proximity_search( ulint j; ulint k = 0; fts_proximity_t qualified_pos; - ulint qualified_pos_buf[MAX_PROXIMITY_ITEM * 2]; - - qualified_pos.min_pos = &qualified_pos_buf[0]; - qualified_pos.max_pos = &qualified_pos_buf[MAX_PROXIMITY_ITEM]; match[0] = static_cast<fts_match_t*>( ib_vector_get(query->match_array[0], i)); @@ -4371,7 +4377,7 @@ fts_proximity_get_positions( qualified_pos->n_pos = 0; - ut_a(num_match < MAX_PROXIMITY_ITEM); + ut_a(num_match <= MAX_PROXIMITY_ITEM); /* Each word could appear multiple times in a doc. So we need to walk through each word's position list, and find @@ -4426,8 +4432,8 @@ fts_proximity_get_positions( length encoding, record the min_pos and max_pos, we will need to verify the actual number of characters */ - qualified_pos->min_pos[qualified_pos->n_pos] = min_pos; - qualified_pos->max_pos[qualified_pos->n_pos] = max_pos; + qualified_pos->min_pos.push_back(min_pos); + qualified_pos->max_pos.push_back(max_pos); qualified_pos->n_pos++; } @@ -4436,7 +4442,5 @@ fts_proximity_get_positions( idx[min_idx]++; } - ut_ad(qualified_pos->n_pos <= MAX_PROXIMITY_ITEM); - return(qualified_pos->n_pos != 0); } |