xtradb 5.6.17-65.0

author: Sergei Golubchik <sergii@pisem.net> 2014-05-07 17:33:33 +0200
committer: Sergei Golubchik <sergii@pisem.net> 2014-05-07 17:33:33 +0200
commit: a2807e41e8fcac00711cf4465e910327bfd69fe2 (patch)
tree: c94b0a32226b09e8675f8b9b559a610c554cda6b /storage/xtradb/fts/fts0que.cc
parent: 8ee9d19607d84aeebf97b704a19453f6a772299b (diff)
parent: 6cb3146af896eb7d27aed6815428008f105e8ae8 (diff)
download: mariadb-git-a2807e41e8fcac00711cf4465e910327bfd69fe2.tar.gz
1 files changed, 31 insertions, 27 deletions
diff --git a/storage/xtradb/fts/fts0que.cc b/storage/xtradb/fts/fts0que.cc
index 189c43768cd..c5c5f954789 100644
--- a/storage/xtradb/fts/fts0que.cc
+++ b/storage/xtradb/fts/fts0que.cc
@@ -47,9 +47,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang
 #define RANK_DOWNGRADE		(-1.0F)
 #define RANK_UPGRADE		(1.0F)
 
-/* Maximum number of words supported in a proximity search.
-FIXME, this limitation can be removed easily. Need to see
-if we want to enforce such limitation */
+/* Maximum number of words supported in a phrase or proximity search. */
 #define MAX_PROXIMITY_ITEM	128
 
 /* Memory used by rbt itself for create and node add */
@@ -183,6 +181,8 @@ struct fts_select_t {
 					the FTS index */
 };
 
+typedef std::vector<ulint>       pos_vector_t;
+
 /** structure defines a set of ranges for original documents, each of which
 has a minimum position and maximum position. Text in such range should
 contain all words in the proximity search. We will need to count the
@@ -192,9 +192,9 @@ struct fts_proximity_t {
 	ulint		n_pos;		/*!< number of position set, defines
 					a range (min to max) containing all
 					matching words */
-	ulint*		min_pos;	/*!< the minimum position (in bytes)
+	pos_vector_t	min_pos;	/*!< the minimum position (in bytes)
 					of the range */
-	ulint*		max_pos;	/*!< the maximum position (in bytes)
+	pos_vector_t	max_pos;	/*!< the maximum position (in bytes)
 					of the range */
 };
 
@@ -758,7 +758,7 @@ fts_query_union_doc_id(
 	fts_update_t*	array = (fts_update_t*) query->deleted->doc_ids->data;
 
 	/* Check if the doc id is deleted and it's not already in our set. */
-	if (fts_bsearch(array, 0, size, doc_id) < 0
+	if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0
 	    && rbt_search(query->doc_ids, &parent, &doc_id) != 0) {
 
 		fts_ranking_t	ranking;
@@ -789,7 +789,7 @@ fts_query_remove_doc_id(
 	fts_update_t*	array = (fts_update_t*) query->deleted->doc_ids->data;
 
 	/* Check if the doc id is deleted and it's in our set. */
-	if (fts_bsearch(array, 0, size, doc_id) < 0
+	if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0
 	    && rbt_search(query->doc_ids, &parent, &doc_id) == 0) {
 		ut_free(rbt_remove_node(query->doc_ids, parent.last));
 
@@ -819,7 +819,7 @@ fts_query_change_ranking(
 	fts_update_t*	array = (fts_update_t*) query->deleted->doc_ids->data;
 
 	/* Check if the doc id is deleted and it's in our set. */
-	if (fts_bsearch(array, 0, size, doc_id) < 0
+	if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0
 	    && rbt_search(query->doc_ids, &parent, &doc_id) == 0) {
 
 		fts_ranking_t*	ranking;
@@ -865,7 +865,7 @@ fts_query_intersect_doc_id(
 	      if it matches 'b' and it's in doc_ids.(multi_exist = true). */
 
 	/* Check if the doc id is deleted and it's in our set */
-	if (fts_bsearch(array, 0, size, doc_id) < 0) {
+	if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0) {
 		fts_ranking_t	new_ranking;
 
 		if (rbt_search(query->doc_ids, &parent, &doc_id) != 0) {
@@ -1705,6 +1705,9 @@ fts_proximity_is_word_in_range(
 {
 	fts_proximity_t*	proximity_pos = phrase->proximity_pos;
 
+	ut_ad(proximity_pos->n_pos == proximity_pos->min_pos.size());
+	ut_ad(proximity_pos->n_pos == proximity_pos->max_pos.size());
+
 	/* Search each matched position pair (with min and max positions)
 	and count the number of words in the range */
 	for (ulint i = 0; i < proximity_pos->n_pos; i++) {
@@ -1922,6 +1925,7 @@ fts_query_fetch_document(
 
 		if (cur_len != UNIV_SQL_NULL && cur_len != 0) {
 			if (phrase->proximity_pos) {
+				ut_ad(prev_len + cur_len <= total_len);
 				memcpy(document_text + prev_len, data, cur_len);
 			} else {
 				/* For phrase search */
@@ -1932,17 +1936,18 @@ fts_query_fetch_document(
 						cur_len, prev_len,
 						phrase->heap);
 			}
+
+			/* Document positions are calculated from the beginning
+			of the first field, need to save the length for each
+			searched field to adjust the doc position when search
+			phrases. */
+			prev_len += cur_len + 1;
 		}
 
 		if (phrase->found) {
 			break;
 		}
 
-		/* Document positions are calculated from the beginning
-		of the first field, need to save the length for each
-		searched field to adjust the doc position when search
-		phrases. */
-		prev_len += cur_len + 1;
 		exp = que_node_get_next(exp);
 	}
 
@@ -2588,6 +2593,11 @@ fts_query_phrase_search(
 	}
 
 	num_token = ib_vector_size(tokens);
+	if (num_token > MAX_PROXIMITY_ITEM) {
+		query->error = DB_FTS_TOO_MANY_WORDS_IN_PHRASE;
+		goto func_exit;
+	}
+
 	ut_ad(ib_vector_size(orig_tokens) >= num_token);
 
 	/* Ignore empty strings. */
@@ -2613,7 +2623,7 @@ fts_query_phrase_search(
 					heap_alloc, sizeof(fts_match_t),
 					64);
 			} else {
-				ut_a(num_token < MAX_PROXIMITY_ITEM);
+				ut_a(num_token <= MAX_PROXIMITY_ITEM);
 				query->match_array =
 					(ib_vector_t**) mem_heap_alloc(
 						heap,
@@ -3497,14 +3507,14 @@ fts_query_prepare_result(
 			doc_freq = rbt_value(fts_doc_freq_t, node);
 
 			/* Don't put deleted docs into result */
-			if (fts_bsearch(array, 0, size, doc_freq->doc_id)
+			if (fts_bsearch(array, 0, static_cast<int>(size), doc_freq->doc_id)
 			    >= 0) {
 				continue;
 			}
 
 			ranking.doc_id = doc_freq->doc_id;
-			ranking.rank = doc_freq->freq * word_freq->idf
-				* word_freq->idf;
+			ranking.rank = static_cast<fts_rank_t>(
+				doc_freq->freq * word_freq->idf * word_freq->idf);
 			ranking.words = NULL;
 
 			fts_query_add_ranking(query, result->rankings_by_id,
@@ -4236,10 +4246,6 @@ fts_phrase_or_proximity_search(
 		ulint		j;
 		ulint		k = 0;
 		fts_proximity_t	qualified_pos;
-		ulint		qualified_pos_buf[MAX_PROXIMITY_ITEM * 2];
-
-		qualified_pos.min_pos = &qualified_pos_buf[0];
-		qualified_pos.max_pos = &qualified_pos_buf[MAX_PROXIMITY_ITEM];
 
 		match[0] = static_cast<fts_match_t*>(
 			ib_vector_get(query->match_array[0], i));
@@ -4371,7 +4377,7 @@ fts_proximity_get_positions(
 
 	qualified_pos->n_pos = 0;
 
-	ut_a(num_match < MAX_PROXIMITY_ITEM);
+	ut_a(num_match <= MAX_PROXIMITY_ITEM);
 
 	/* Each word could appear multiple times in a doc. So
 	we need to walk through each word's position list, and find
@@ -4426,8 +4432,8 @@ fts_proximity_get_positions(
 			length encoding, record the min_pos and
 			max_pos, we will need to verify the actual
 			number of characters */
-			qualified_pos->min_pos[qualified_pos->n_pos] = min_pos;
-			qualified_pos->max_pos[qualified_pos->n_pos] = max_pos;
+			qualified_pos->min_pos.push_back(min_pos);
+			qualified_pos->max_pos.push_back(max_pos);
 			qualified_pos->n_pos++;
 		}
 
@@ -4436,7 +4442,5 @@ fts_proximity_get_positions(
 		idx[min_idx]++;
 	}
 
-	ut_ad(qualified_pos->n_pos <= MAX_PROXIMITY_ITEM);
-
 	return(qualified_pos->n_pos != 0);
 }
author	Sergei Golubchik <sergii@pisem.net>	2014-05-07 17:33:33 +0200
committer	Sergei Golubchik <sergii@pisem.net>	2014-05-07 17:33:33 +0200
commit	a2807e41e8fcac00711cf4465e910327bfd69fe2 (patch)
tree	c94b0a32226b09e8675f8b9b559a610c554cda6b /storage/xtradb/fts/fts0que.cc
parent	8ee9d19607d84aeebf97b704a19453f6a772299b (diff)
parent	6cb3146af896eb7d27aed6815428008f105e8ae8 (diff)
download	mariadb-git-a2807e41e8fcac00711cf4465e910327bfd69fe2.tar.gz