summaryrefslogtreecommitdiff
path: root/storage/xtradb/fts/fts0que.cc
diff options
context:
space:
mode:
authorSergei Golubchik <sergii@pisem.net>2014-05-07 17:33:33 +0200
committerSergei Golubchik <sergii@pisem.net>2014-05-07 17:33:33 +0200
commita2807e41e8fcac00711cf4465e910327bfd69fe2 (patch)
treec94b0a32226b09e8675f8b9b559a610c554cda6b /storage/xtradb/fts/fts0que.cc
parent8ee9d19607d84aeebf97b704a19453f6a772299b (diff)
parent6cb3146af896eb7d27aed6815428008f105e8ae8 (diff)
downloadmariadb-git-a2807e41e8fcac00711cf4465e910327bfd69fe2.tar.gz
xtradb 5.6.17-65.0
Diffstat (limited to 'storage/xtradb/fts/fts0que.cc')
-rw-r--r--storage/xtradb/fts/fts0que.cc58
1 files changed, 31 insertions, 27 deletions
diff --git a/storage/xtradb/fts/fts0que.cc b/storage/xtradb/fts/fts0que.cc
index 189c43768cd..c5c5f954789 100644
--- a/storage/xtradb/fts/fts0que.cc
+++ b/storage/xtradb/fts/fts0que.cc
@@ -47,9 +47,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang
#define RANK_DOWNGRADE (-1.0F)
#define RANK_UPGRADE (1.0F)
-/* Maximum number of words supported in a proximity search.
-FIXME, this limitation can be removed easily. Need to see
-if we want to enforce such limitation */
+/* Maximum number of words supported in a phrase or proximity search. */
#define MAX_PROXIMITY_ITEM 128
/* Memory used by rbt itself for create and node add */
@@ -183,6 +181,8 @@ struct fts_select_t {
the FTS index */
};
+typedef std::vector<ulint> pos_vector_t;
+
/** structure defines a set of ranges for original documents, each of which
has a minimum position and maximum position. Text in such range should
contain all words in the proximity search. We will need to count the
@@ -192,9 +192,9 @@ struct fts_proximity_t {
ulint n_pos; /*!< number of position set, defines
a range (min to max) containing all
matching words */
- ulint* min_pos; /*!< the minimum position (in bytes)
+ pos_vector_t min_pos; /*!< the minimum position (in bytes)
of the range */
- ulint* max_pos; /*!< the maximum position (in bytes)
+ pos_vector_t max_pos; /*!< the maximum position (in bytes)
of the range */
};
@@ -758,7 +758,7 @@ fts_query_union_doc_id(
fts_update_t* array = (fts_update_t*) query->deleted->doc_ids->data;
/* Check if the doc id is deleted and it's not already in our set. */
- if (fts_bsearch(array, 0, size, doc_id) < 0
+ if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0
&& rbt_search(query->doc_ids, &parent, &doc_id) != 0) {
fts_ranking_t ranking;
@@ -789,7 +789,7 @@ fts_query_remove_doc_id(
fts_update_t* array = (fts_update_t*) query->deleted->doc_ids->data;
/* Check if the doc id is deleted and it's in our set. */
- if (fts_bsearch(array, 0, size, doc_id) < 0
+ if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0
&& rbt_search(query->doc_ids, &parent, &doc_id) == 0) {
ut_free(rbt_remove_node(query->doc_ids, parent.last));
@@ -819,7 +819,7 @@ fts_query_change_ranking(
fts_update_t* array = (fts_update_t*) query->deleted->doc_ids->data;
/* Check if the doc id is deleted and it's in our set. */
- if (fts_bsearch(array, 0, size, doc_id) < 0
+ if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0
&& rbt_search(query->doc_ids, &parent, &doc_id) == 0) {
fts_ranking_t* ranking;
@@ -865,7 +865,7 @@ fts_query_intersect_doc_id(
if it matches 'b' and it's in doc_ids.(multi_exist = true). */
/* Check if the doc id is deleted and it's in our set */
- if (fts_bsearch(array, 0, size, doc_id) < 0) {
+ if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0) {
fts_ranking_t new_ranking;
if (rbt_search(query->doc_ids, &parent, &doc_id) != 0) {
@@ -1705,6 +1705,9 @@ fts_proximity_is_word_in_range(
{
fts_proximity_t* proximity_pos = phrase->proximity_pos;
+ ut_ad(proximity_pos->n_pos == proximity_pos->min_pos.size());
+ ut_ad(proximity_pos->n_pos == proximity_pos->max_pos.size());
+
/* Search each matched position pair (with min and max positions)
and count the number of words in the range */
for (ulint i = 0; i < proximity_pos->n_pos; i++) {
@@ -1922,6 +1925,7 @@ fts_query_fetch_document(
if (cur_len != UNIV_SQL_NULL && cur_len != 0) {
if (phrase->proximity_pos) {
+ ut_ad(prev_len + cur_len <= total_len);
memcpy(document_text + prev_len, data, cur_len);
} else {
/* For phrase search */
@@ -1932,17 +1936,18 @@ fts_query_fetch_document(
cur_len, prev_len,
phrase->heap);
}
+
+ /* Document positions are calculated from the beginning
+ of the first field, need to save the length for each
+ searched field to adjust the doc position when search
+ phrases. */
+ prev_len += cur_len + 1;
}
if (phrase->found) {
break;
}
- /* Document positions are calculated from the beginning
- of the first field, need to save the length for each
- searched field to adjust the doc position when search
- phrases. */
- prev_len += cur_len + 1;
exp = que_node_get_next(exp);
}
@@ -2588,6 +2593,11 @@ fts_query_phrase_search(
}
num_token = ib_vector_size(tokens);
+ if (num_token > MAX_PROXIMITY_ITEM) {
+ query->error = DB_FTS_TOO_MANY_WORDS_IN_PHRASE;
+ goto func_exit;
+ }
+
ut_ad(ib_vector_size(orig_tokens) >= num_token);
/* Ignore empty strings. */
@@ -2613,7 +2623,7 @@ fts_query_phrase_search(
heap_alloc, sizeof(fts_match_t),
64);
} else {
- ut_a(num_token < MAX_PROXIMITY_ITEM);
+ ut_a(num_token <= MAX_PROXIMITY_ITEM);
query->match_array =
(ib_vector_t**) mem_heap_alloc(
heap,
@@ -3497,14 +3507,14 @@ fts_query_prepare_result(
doc_freq = rbt_value(fts_doc_freq_t, node);
/* Don't put deleted docs into result */
- if (fts_bsearch(array, 0, size, doc_freq->doc_id)
+ if (fts_bsearch(array, 0, static_cast<int>(size), doc_freq->doc_id)
>= 0) {
continue;
}
ranking.doc_id = doc_freq->doc_id;
- ranking.rank = doc_freq->freq * word_freq->idf
- * word_freq->idf;
+ ranking.rank = static_cast<fts_rank_t>(
+ doc_freq->freq * word_freq->idf * word_freq->idf);
ranking.words = NULL;
fts_query_add_ranking(query, result->rankings_by_id,
@@ -4236,10 +4246,6 @@ fts_phrase_or_proximity_search(
ulint j;
ulint k = 0;
fts_proximity_t qualified_pos;
- ulint qualified_pos_buf[MAX_PROXIMITY_ITEM * 2];
-
- qualified_pos.min_pos = &qualified_pos_buf[0];
- qualified_pos.max_pos = &qualified_pos_buf[MAX_PROXIMITY_ITEM];
match[0] = static_cast<fts_match_t*>(
ib_vector_get(query->match_array[0], i));
@@ -4371,7 +4377,7 @@ fts_proximity_get_positions(
qualified_pos->n_pos = 0;
- ut_a(num_match < MAX_PROXIMITY_ITEM);
+ ut_a(num_match <= MAX_PROXIMITY_ITEM);
/* Each word could appear multiple times in a doc. So
we need to walk through each word's position list, and find
@@ -4426,8 +4432,8 @@ fts_proximity_get_positions(
length encoding, record the min_pos and
max_pos, we will need to verify the actual
number of characters */
- qualified_pos->min_pos[qualified_pos->n_pos] = min_pos;
- qualified_pos->max_pos[qualified_pos->n_pos] = max_pos;
+ qualified_pos->min_pos.push_back(min_pos);
+ qualified_pos->max_pos.push_back(max_pos);
qualified_pos->n_pos++;
}
@@ -4436,7 +4442,5 @@ fts_proximity_get_positions(
idx[min_idx]++;
}
- ut_ad(qualified_pos->n_pos <= MAX_PROXIMITY_ITEM);
-
return(qualified_pos->n_pos != 0);
}