summaryrefslogtreecommitdiff
path: root/storage/xtradb/row/row0sel.cc
diff options
context:
space:
mode:
authorVicențiu Ciorbaru <vicentiu@mariadb.org>2017-05-16 14:16:11 +0300
committerVicențiu Ciorbaru <vicentiu@mariadb.org>2017-05-16 14:16:11 +0300
commit360a4a037271d65ab6471f7ab3f9b6a893d90a31 (patch)
treee3ecdbb7930396ec2f7069a1d81b7ff169a288c3 /storage/xtradb/row/row0sel.cc
parent8d69ce7b821a88b5ba8749a08489839050283a63 (diff)
downloadmariadb-git-360a4a037271d65ab6471f7ab3f9b6a893d90a31.tar.gz
5.6.36-82.0
Diffstat (limited to 'storage/xtradb/row/row0sel.cc')
-rw-r--r--storage/xtradb/row/row0sel.cc361
1 files changed, 309 insertions, 52 deletions
diff --git a/storage/xtradb/row/row0sel.cc b/storage/xtradb/row/row0sel.cc
index d2821abdc2e..9f66d12283a 100644
--- a/storage/xtradb/row/row0sel.cc
+++ b/storage/xtradb/row/row0sel.cc
@@ -62,6 +62,9 @@ Created 12/19/1997 Heikki Tuuri
#include "my_sys.h" /* DEBUG_SYNC_C */
#include "my_compare.h" /* enum icp_result */
+#include "thr_lock.h"
+#include "handler.h"
+#include "ha_innodb.h"
/* Maximum number of rows to prefetch; MySQL interface has another parameter */
#define SEL_MAX_N_PREFETCH 16
@@ -2733,7 +2736,8 @@ row_sel_field_store_in_mysql_format_func(
|| !(templ->mysql_col_len % templ->mbmaxlen));
ut_ad(len * templ->mbmaxlen >= templ->mysql_col_len
|| (field_no == templ->icp_rec_field_no
- && field->prefix_len > 0));
+ && field->prefix_len > 0)
+ || templ->rec_field_is_prefix);
ut_ad(!(field->prefix_len % templ->mbmaxlen));
if (templ->mbminlen == 1 && templ->mbmaxlen != 1) {
@@ -2768,34 +2772,43 @@ row_sel_field_store_in_mysql_format_func(
#ifdef UNIV_DEBUG
/** Convert a field from Innobase format to MySQL format. */
-# define row_sel_store_mysql_field(m,p,r,i,o,f,t) \
- row_sel_store_mysql_field_func(m,p,r,i,o,f,t)
+# define row_sel_store_mysql_field(m,p,r,i,o,f,t,c) \
+ row_sel_store_mysql_field_func(m,p,r,i,o,f,t,c)
#else /* UNIV_DEBUG */
/** Convert a field from Innobase format to MySQL format. */
-# define row_sel_store_mysql_field(m,p,r,i,o,f,t) \
- row_sel_store_mysql_field_func(m,p,r,o,f,t)
+# define row_sel_store_mysql_field(m,p,r,i,o,f,t,c) \
+ row_sel_store_mysql_field_func(m,p,r,o,f,t,c)
#endif /* UNIV_DEBUG */
-/**************************************************************//**
-Convert a field in the Innobase format to a field in the MySQL format. */
+/** Convert a field in the Innobase format to a field in the MySQL format.
+@param[out] mysql_rec record in the MySQL format
+@param[in,out] prebuilt prebuilt struct
+@param[in] rec InnoDB record; must be protected
+ by a page latch
+@param[in] index index of rec
+@param[in] offsets array returned by rec_get_offsets()
+@param[in] field_no templ->rec_field_no or
+ templ->clust_rec_field_no
+ or templ->icp_rec_field_no
+ or sec field no if clust_templ_for_sec
+ is TRUE
+@param[in] templ row template
+@param[in] clust_templ_for_sec TRUE if rec belongs to secondary index
+ but prebuilt template is in clustered
+ index format and used only for end
+ range comparison. */
static MY_ATTRIBUTE((warn_unused_result))
ibool
row_sel_store_mysql_field_func(
-/*===========================*/
- byte* mysql_rec, /*!< out: record in the
- MySQL format */
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct */
- const rec_t* rec, /*!< in: InnoDB record;
- must be protected by
- a page latch */
+ byte* mysql_rec,
+ row_prebuilt_t* prebuilt,
+ const rec_t* rec,
#ifdef UNIV_DEBUG
- const dict_index_t* index, /*!< in: index of rec */
+ const dict_index_t* index,
#endif
- const ulint* offsets, /*!< in: array returned by
- rec_get_offsets() */
- ulint field_no, /*!< in: templ->rec_field_no or
- templ->clust_rec_field_no or
- templ->icp_rec_field_no */
- const mysql_row_templ_t*templ) /*!< in: row template */
+ const ulint* offsets,
+ ulint field_no,
+ const mysql_row_templ_t*templ,
+ bool clust_templ_for_sec)
{
const byte* data;
ulint len;
@@ -2804,10 +2817,12 @@ row_sel_store_mysql_field_func(
ut_ad(templ);
ut_ad(templ >= prebuilt->mysql_template);
ut_ad(templ < &prebuilt->mysql_template[prebuilt->n_template]);
- ut_ad(field_no == templ->clust_rec_field_no
+ ut_ad(clust_templ_for_sec
+ || field_no == templ->clust_rec_field_no
|| field_no == templ->rec_field_no
|| field_no == templ->icp_rec_field_no);
- ut_ad(rec_offs_validate(rec, index, offsets));
+ ut_ad(rec_offs_validate(rec,
+ clust_templ_for_sec == true ? prebuilt->index : index, offsets));
if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, field_no))) {
@@ -2924,30 +2939,37 @@ row_sel_store_mysql_field_func(
return(TRUE);
}
-/**************************************************************//**
-Convert a row in the Innobase format to a row in the MySQL format.
+/** Convert a row in the Innobase format to a row in the MySQL format.
Note that the template in prebuilt may advise us to copy only a few
columns to mysql_rec, other columns are left blank. All columns may not
be needed in the query.
+@param[out] mysql_rec row in the MySQL format
+@param[in] prebuilt prebuilt structure
+@param[in] rec Innobase record in the index
+ which was described in prebuilt's
+ template, or in the clustered index;
+ must be protected by a page latch
+@param[in] rec_clust TRUE if the rec in the clustered index
+@param[in] index index of rec
+@param[in] offsets array returned by rec_get_offsets(rec)
+@param[in] clust_templ_for_sec TRUE if rec belongs to secondary index
+ but the prebuilt->template is in
+ clustered index format and it is
+ used only for end range comparison
@return TRUE on success, FALSE if not all columns could be retrieved */
static MY_ATTRIBUTE((warn_unused_result))
ibool
row_sel_store_mysql_rec(
-/*====================*/
- byte* mysql_rec, /*!< out: row in the MySQL format */
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
- const rec_t* rec, /*!< in: Innobase record in the index
- which was described in prebuilt's
- template, or in the clustered index;
- must be protected by a page latch */
- ibool rec_clust, /*!< in: TRUE if rec is in the
- clustered index instead of
- prebuilt->index */
- const dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets) /*!< in: array returned by
- rec_get_offsets(rec) */
+ byte* mysql_rec,
+ row_prebuilt_t* prebuilt,
+ const rec_t* rec,
+ ibool rec_clust,
+ const dict_index_t* index,
+ const ulint* offsets,
+ bool clust_templ_for_sec)
{
- ulint i;
+ ulint i;
+ std::vector<ulint> template_col;
ut_ad(rec_clust || index == prebuilt->index);
ut_ad(!rec_clust || dict_index_is_clust(index));
@@ -2960,20 +2982,48 @@ row_sel_store_mysql_rec(
if (UNIV_LIKELY_NULL(prebuilt->compress_heap))
mem_heap_empty(prebuilt->compress_heap);
+ if (clust_templ_for_sec) {
+ /* Store all clustered index field of
+ secondary index record. */
+ for (i = 0; i < dict_index_get_n_fields(
+ prebuilt->index); i++) {
+ ulint sec_field = dict_index_get_nth_field_pos(
+ index, prebuilt->index, i);
+ template_col.push_back(sec_field);
+ }
+ }
+
for (i = 0; i < prebuilt->n_template; i++) {
const mysql_row_templ_t*templ = &prebuilt->mysql_template[i];
- const ulint field_no
+ ulint field_no
= rec_clust
? templ->clust_rec_field_no
: templ->rec_field_no;
/* We should never deliver column prefixes to MySQL,
- except for evaluating innobase_index_cond(). */
+ except for evaluating innobase_index_cond() and if the prefix
+ index is longer than the actual row data. */
+
ut_ad(dict_index_get_nth_field(index, field_no)->prefix_len
- == 0);
+ == 0 || templ->rec_field_is_prefix);
+
+ if (clust_templ_for_sec) {
+ std::vector<ulint>::iterator it;
+ it = std::find(template_col.begin(),
+ template_col.end(), field_no);
+
+ if (it == template_col.end()) {
+ continue;
+ }
+
+ ut_ad(templ->rec_field_no == templ->clust_rec_field_no);
+
+ field_no = it - template_col.begin();
+ }
if (!row_sel_store_mysql_field(mysql_rec, prebuilt,
rec, index, offsets,
- field_no, templ)) {
+ field_no, templ,
+ clust_templ_for_sec)) {
return(FALSE);
}
}
@@ -3063,6 +3113,8 @@ row_sel_get_clust_rec_for_mysql(
dberr_t err;
trx_t* trx;
+ os_atomic_increment_ulint(&srv_sec_rec_cluster_reads, 1);
+
*out_rec = NULL;
trx = thr_get_trx(thr);
@@ -3613,7 +3665,7 @@ row_search_idx_cond_check(
if (!row_sel_store_mysql_field(mysql_rec, prebuilt,
rec, prebuilt->index, offsets,
templ->icp_rec_field_no,
- templ)) {
+ templ, false)) {
return(ICP_NO_MATCH);
}
}
@@ -3634,7 +3686,7 @@ row_search_idx_cond_check(
|| dict_index_is_clust(prebuilt->index)) {
if (!row_sel_store_mysql_rec(
mysql_rec, prebuilt, rec, FALSE,
- prebuilt->index, offsets)) {
+ prebuilt->index, offsets, false)) {
ut_ad(dict_index_is_clust(prebuilt->index));
return(ICP_NO_MATCH);
}
@@ -3653,6 +3705,50 @@ row_search_idx_cond_check(
return(result);
}
+/** Check the pushed down end range condition to avoid extra traversal
+if records are not within view and also to avoid prefetching in the
+cache buffer.
+@param[in] mysql_rec record in MySQL format
+@param[in,out] handler the MySQL handler performing the scan
+@retval true if the row in mysql_rec is out of range
+@retval false if the row in mysql_rec is in range */
+static
+bool
+row_search_end_range_check(
+ const byte* mysql_rec,
+ ha_innobase* handler)
+{
+ if (handler->end_range &&
+ handler->compare_key_in_buffer(mysql_rec) > 0) {
+ return(true);
+ }
+
+ return(false);
+}
+
+/** Return the record field length in characters.
+@param[in] col table column of the field
+@param[in] field_no field number
+@param[in] rec physical record
+@param[in] offsets field offsets in the physical record
+
+@return field length in characters */
+static
+size_t
+rec_field_len_in_chars(const dict_col_t &col,
+ const ulint field_no,
+ const rec_t *rec,
+ const ulint *offsets)
+{
+ const ulint cset = dtype_get_charset_coll(col.prtype);
+ const CHARSET_INFO* cs = all_charsets[cset];
+ ulint rec_field_len;
+ const char* rec_field = reinterpret_cast<const char *>(
+ rec_get_nth_field(
+ rec, offsets, field_no, &rec_field_len));
+ return(cs->cset->numchars(cs, rec_field, rec_field + rec_field_len));
+}
+
/********************************************************************//**
Searches for rows in the database. This is used in the interface to
MySQL. This function opens a cursor, and also implements fetch next
@@ -3690,7 +3786,9 @@ row_search_for_mysql(
trx_t* trx = prebuilt->trx;
dict_index_t* clust_index;
que_thr_t* thr;
- const rec_t* rec;
+ const rec_t* prev_rec = NULL;
+ const rec_t* rec = NULL;
+ byte* end_range_cache = NULL;
const rec_t* result_rec = NULL;
const rec_t* clust_rec;
dberr_t err = DB_SUCCESS;
@@ -3715,6 +3813,8 @@ row_search_for_mysql(
ulint* offsets = offsets_;
ibool table_lock_waited = FALSE;
byte* next_buf = 0;
+ ulint end_loop = 0;
+ bool use_clustered_index = false;
rec_offs_init(offsets_);
@@ -3838,6 +3938,10 @@ row_search_for_mysql(
err = DB_SUCCESS;
goto func_exit;
+ } else if (prebuilt->end_range == true) {
+ prebuilt->end_range = false;
+ err = DB_RECORD_NOT_FOUND;
+ goto func_exit;
}
if (prebuilt->fetch_cache_first > 0
@@ -3970,7 +4074,8 @@ row_search_for_mysql(
if (!row_sel_store_mysql_rec(
buf, prebuilt,
- rec, FALSE, index, offsets)) {
+ rec, FALSE, index,
+ offsets, false)) {
/* Only fresh inserts may contain
incomplete externally stored
columns. Pretend that such
@@ -4224,11 +4329,62 @@ rec_loop:
and neither can a record lock be placed on it: we skip such
a record. */
+ prev_rec = NULL;
goto next_rec;
}
if (page_rec_is_supremum(rec)) {
+ /** Compare the last record of the page with end range
+ passed to InnoDB when there is no ICP and number of loops
+ in row_search_for_mysql for rows found but not
+ reporting due to search views etc. */
+ if (prev_rec != NULL
+ && prebuilt->mysql_handler->end_range != NULL
+ && prebuilt->idx_cond == NULL
+ && end_loop >= 100) {
+
+ dict_index_t* key_index = prebuilt->index;
+ bool clust_templ_for_sec = false;
+
+ if (end_range_cache == NULL) {
+ end_range_cache = static_cast<byte*>(
+ ut_malloc(prebuilt->mysql_row_len));
+ }
+
+ if (index != clust_index
+ && prebuilt->need_to_access_clustered) {
+ /** Secondary index record but the template
+ based on PK. */
+ key_index = clust_index;
+ clust_templ_for_sec = true;
+ }
+
+ /** Create offsets based on prebuilt index. */
+ offsets = rec_get_offsets(prev_rec, prebuilt->index,
+ offsets, ULINT_UNDEFINED, &heap);
+
+ if (row_sel_store_mysql_rec(
+ end_range_cache, prebuilt, prev_rec,
+ clust_templ_for_sec, key_index, offsets,
+ clust_templ_for_sec)) {
+
+ if (row_search_end_range_check(
+ end_range_cache,
+ prebuilt->mysql_handler)) {
+
+ /** In case of prebuilt->fetch,
+ set the error in prebuilt->end_range. */
+ if (prebuilt->n_fetch_cached > 0) {
+ prebuilt->end_range = true;
+ }
+
+ err = DB_RECORD_NOT_FOUND;
+ goto normal_return;
+ }
+ }
+ }
+
if (set_also_gap_locks
&& !(srv_locks_unsafe_for_binlog
|| trx->isolation_level <= TRX_ISO_READ_COMMITTED)
@@ -4260,6 +4416,7 @@ rec_loop:
/* A page supremum record cannot be in the result set: skip
it now that we have placed a possible lock on it */
+ prev_rec = NULL;
goto next_rec;
}
@@ -4334,6 +4491,7 @@ wrong_offs:
btr_pcur_move_to_last_on_page(pcur, &mtr);
+ prev_rec = NULL;
goto next_rec;
}
}
@@ -4362,10 +4520,13 @@ wrong_offs:
fputs(". We try to skip the record.\n",
stderr);
+ prev_rec = NULL;
goto next_rec;
}
}
+ prev_rec = rec;
+
/* Note that we cannot trust the up_match value in the cursor at this
place because we can arrive here after moving the cursor! Thus
we have to recompare rec and search_tuple to determine if they
@@ -4590,6 +4751,7 @@ no_gap_lock:
did_semi_consistent_read = TRUE;
rec = old_vers;
+ prev_rec = rec;
break;
default:
@@ -4636,6 +4798,7 @@ no_gap_lock:
}
rec = old_vers;
+ prev_rec = rec;
}
} else {
/* We are looking into a non-clustered index,
@@ -4729,10 +4892,97 @@ locks_ok:
}
/* Get the clustered index record if needed, if we did not do the
- search using the clustered index. */
+ search using the clustered index... */
+
+ use_clustered_index =
+ (index != clust_index && prebuilt->need_to_access_clustered);
+
+ if (use_clustered_index && prebuilt->n_template <= index->n_fields) {
+ /* ...but, perhaps avoid the clustered index lookup if
+ all of the following are true:
+ 1) all columns are in the secondary index
+ 2) all values for columns that are prefix-only
+ indexes are shorter than the prefix size
+ This optimization can avoid many IOs for certain schemas.
+ */
+ bool row_contains_all_values = true;
+ unsigned int i;
+ for (i = 0; i < prebuilt->n_template; i++) {
+ /* Condition (1) from above: is the field in the
+ index (prefix or not)? */
+ const mysql_row_templ_t* templ =
+ prebuilt->mysql_template + i;
+ ulint secondary_index_field_no =
+ templ->rec_prefix_field_no;
+ if (secondary_index_field_no == ULINT_UNDEFINED) {
+ row_contains_all_values = false;
+ break;
+ }
+ /* Condition (2) from above: if this is a
+ prefix, is this row's value size shorter
+ than the prefix? */
+ if (templ->rec_field_is_prefix) {
+ ulint record_size = rec_offs_nth_size(
+ offsets,
+ secondary_index_field_no);
+ const dict_field_t *field =
+ dict_index_get_nth_field(
+ index,
+ secondary_index_field_no);
+ ut_a(field->prefix_len > 0);
+ if (record_size
+ < field->prefix_len / templ->mbmaxlen) {
+
+ /* Record in bytes shorter than the
+ index prefix length in characters */
+ continue;
+
+ } else if (record_size * templ->mbminlen
+ >= field->prefix_len) {
+
+ /* The shortest represantable string by
+ the byte length of the record is longer
+ than the maximum possible index
+ prefix. */
+ row_contains_all_values = false;
+ break;
+ } else {
- if (index != clust_index && prebuilt->need_to_access_clustered) {
+ /* The record could or could not fit
+ into the index prefix, calculate length
+ to find out */
+
+ if (rec_field_len_in_chars(
+ *field->col,
+ secondary_index_field_no,
+ rec, offsets)
+ >= (field->prefix_len
+ / templ->mbmaxlen)) {
+
+ row_contains_all_values = false;
+ break;
+ }
+ }
+ }
+ }
+ /* If (1) and (2) were true for all columns above, use
+ rec_prefix_field_no instead of rec_field_no, and skip
+ the clustered lookup below. */
+ if (row_contains_all_values) {
+ for (i = 0; i < prebuilt->n_template; i++) {
+ mysql_row_templ_t* templ =
+ prebuilt->mysql_template + i;
+ templ->rec_field_no =
+ templ->rec_prefix_field_no;
+ ut_a(templ->rec_field_no != ULINT_UNDEFINED);
+ }
+ use_clustered_index = false;
+ os_atomic_increment_ulint(
+ &srv_sec_rec_cluster_reads_avoided, 1);
+ }
+ }
+ if (use_clustered_index) {
requires_clust_rec:
ut_ad(index != clust_index);
/* We use a 'goto' to the preceding label if a consistent
@@ -4813,7 +5063,7 @@ requires_clust_rec:
appropriate version of the clustered index record. */
if (!row_sel_store_mysql_rec(
buf, prebuilt, result_rec,
- TRUE, clust_index, offsets)) {
+ TRUE, clust_index, offsets, false)) {
goto next_rec;
}
}
@@ -4881,7 +5131,7 @@ requires_clust_rec:
next_buf, prebuilt, result_rec,
result_rec != rec,
result_rec != rec ? clust_index : index,
- offsets)) {
+ offsets, false)) {
if (next_buf == buf) {
ut_a(prebuilt->n_fetch_cached == 0);
@@ -4936,7 +5186,7 @@ requires_clust_rec:
buf, prebuilt, result_rec,
result_rec != rec,
result_rec != rec ? clust_index : index,
- offsets)) {
+ offsets, false)) {
/* Only fresh inserts may contain
incomplete externally stored
columns. Pretend that such records do
@@ -4988,6 +5238,8 @@ idx_cond_failed:
goto normal_return;
next_rec:
+ end_loop++;
+
/* Reset the old and new "did semi-consistent read" flags. */
if (UNIV_UNLIKELY(prebuilt->row_read_type
== ROW_READ_DID_SEMI_CONSISTENT)) {
@@ -5174,6 +5426,11 @@ normal_return:
func_exit:
trx->op_info = "";
+
+ if (end_range_cache != NULL) {
+ ut_free(end_range_cache);
+ }
+
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}