diff options
author | Vicențiu Ciorbaru <vicentiu@mariadb.org> | 2017-05-16 14:16:11 +0300 |
---|---|---|
committer | Vicențiu Ciorbaru <vicentiu@mariadb.org> | 2017-05-16 14:16:11 +0300 |
commit | 360a4a037271d65ab6471f7ab3f9b6a893d90a31 (patch) | |
tree | e3ecdbb7930396ec2f7069a1d81b7ff169a288c3 /storage/xtradb/row/row0sel.cc | |
parent | 8d69ce7b821a88b5ba8749a08489839050283a63 (diff) | |
download | mariadb-git-360a4a037271d65ab6471f7ab3f9b6a893d90a31.tar.gz |
5.6.36-82.0
Diffstat (limited to 'storage/xtradb/row/row0sel.cc')
-rw-r--r-- | storage/xtradb/row/row0sel.cc | 361 |
1 files changed, 309 insertions, 52 deletions
diff --git a/storage/xtradb/row/row0sel.cc b/storage/xtradb/row/row0sel.cc index d2821abdc2e..9f66d12283a 100644 --- a/storage/xtradb/row/row0sel.cc +++ b/storage/xtradb/row/row0sel.cc @@ -62,6 +62,9 @@ Created 12/19/1997 Heikki Tuuri #include "my_sys.h" /* DEBUG_SYNC_C */ #include "my_compare.h" /* enum icp_result */ +#include "thr_lock.h" +#include "handler.h" +#include "ha_innodb.h" /* Maximum number of rows to prefetch; MySQL interface has another parameter */ #define SEL_MAX_N_PREFETCH 16 @@ -2733,7 +2736,8 @@ row_sel_field_store_in_mysql_format_func( || !(templ->mysql_col_len % templ->mbmaxlen)); ut_ad(len * templ->mbmaxlen >= templ->mysql_col_len || (field_no == templ->icp_rec_field_no - && field->prefix_len > 0)); + && field->prefix_len > 0) + || templ->rec_field_is_prefix); ut_ad(!(field->prefix_len % templ->mbmaxlen)); if (templ->mbminlen == 1 && templ->mbmaxlen != 1) { @@ -2768,34 +2772,43 @@ row_sel_field_store_in_mysql_format_func( #ifdef UNIV_DEBUG /** Convert a field from Innobase format to MySQL format. */ -# define row_sel_store_mysql_field(m,p,r,i,o,f,t) \ - row_sel_store_mysql_field_func(m,p,r,i,o,f,t) +# define row_sel_store_mysql_field(m,p,r,i,o,f,t,c) \ + row_sel_store_mysql_field_func(m,p,r,i,o,f,t,c) #else /* UNIV_DEBUG */ /** Convert a field from Innobase format to MySQL format. */ -# define row_sel_store_mysql_field(m,p,r,i,o,f,t) \ - row_sel_store_mysql_field_func(m,p,r,o,f,t) +# define row_sel_store_mysql_field(m,p,r,i,o,f,t,c) \ + row_sel_store_mysql_field_func(m,p,r,o,f,t,c) #endif /* UNIV_DEBUG */ -/**************************************************************//** -Convert a field in the Innobase format to a field in the MySQL format. */ +/** Convert a field in the Innobase format to a field in the MySQL format. +@param[out] mysql_rec record in the MySQL format +@param[in,out] prebuilt prebuilt struct +@param[in] rec InnoDB record; must be protected + by a page latch +@param[in] index index of rec +@param[in] offsets array returned by rec_get_offsets() +@param[in] field_no templ->rec_field_no or + templ->clust_rec_field_no + or templ->icp_rec_field_no + or sec field no if clust_templ_for_sec + is TRUE +@param[in] templ row template +@param[in] clust_templ_for_sec TRUE if rec belongs to secondary index + but prebuilt template is in clustered + index format and used only for end + range comparison. */ static MY_ATTRIBUTE((warn_unused_result)) ibool row_sel_store_mysql_field_func( -/*===========================*/ - byte* mysql_rec, /*!< out: record in the - MySQL format */ - row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct */ - const rec_t* rec, /*!< in: InnoDB record; - must be protected by - a page latch */ + byte* mysql_rec, + row_prebuilt_t* prebuilt, + const rec_t* rec, #ifdef UNIV_DEBUG - const dict_index_t* index, /*!< in: index of rec */ + const dict_index_t* index, #endif - const ulint* offsets, /*!< in: array returned by - rec_get_offsets() */ - ulint field_no, /*!< in: templ->rec_field_no or - templ->clust_rec_field_no or - templ->icp_rec_field_no */ - const mysql_row_templ_t*templ) /*!< in: row template */ + const ulint* offsets, + ulint field_no, + const mysql_row_templ_t*templ, + bool clust_templ_for_sec) { const byte* data; ulint len; @@ -2804,10 +2817,12 @@ row_sel_store_mysql_field_func( ut_ad(templ); ut_ad(templ >= prebuilt->mysql_template); ut_ad(templ < &prebuilt->mysql_template[prebuilt->n_template]); - ut_ad(field_no == templ->clust_rec_field_no + ut_ad(clust_templ_for_sec + || field_no == templ->clust_rec_field_no || field_no == templ->rec_field_no || field_no == templ->icp_rec_field_no); - ut_ad(rec_offs_validate(rec, index, offsets)); + ut_ad(rec_offs_validate(rec, + clust_templ_for_sec == true ? prebuilt->index : index, offsets)); if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, field_no))) { @@ -2924,30 +2939,37 @@ row_sel_store_mysql_field_func( return(TRUE); } -/**************************************************************//** -Convert a row in the Innobase format to a row in the MySQL format. +/** Convert a row in the Innobase format to a row in the MySQL format. Note that the template in prebuilt may advise us to copy only a few columns to mysql_rec, other columns are left blank. All columns may not be needed in the query. +@param[out] mysql_rec row in the MySQL format +@param[in] prebuilt prebuilt structure +@param[in] rec Innobase record in the index + which was described in prebuilt's + template, or in the clustered index; + must be protected by a page latch +@param[in] rec_clust TRUE if the rec in the clustered index +@param[in] index index of rec +@param[in] offsets array returned by rec_get_offsets(rec) +@param[in] clust_templ_for_sec TRUE if rec belongs to secondary index + but the prebuilt->template is in + clustered index format and it is + used only for end range comparison @return TRUE on success, FALSE if not all columns could be retrieved */ static MY_ATTRIBUTE((warn_unused_result)) ibool row_sel_store_mysql_rec( -/*====================*/ - byte* mysql_rec, /*!< out: row in the MySQL format */ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ - const rec_t* rec, /*!< in: Innobase record in the index - which was described in prebuilt's - template, or in the clustered index; - must be protected by a page latch */ - ibool rec_clust, /*!< in: TRUE if rec is in the - clustered index instead of - prebuilt->index */ - const dict_index_t* index, /*!< in: index of rec */ - const ulint* offsets) /*!< in: array returned by - rec_get_offsets(rec) */ + byte* mysql_rec, + row_prebuilt_t* prebuilt, + const rec_t* rec, + ibool rec_clust, + const dict_index_t* index, + const ulint* offsets, + bool clust_templ_for_sec) { - ulint i; + ulint i; + std::vector<ulint> template_col; ut_ad(rec_clust || index == prebuilt->index); ut_ad(!rec_clust || dict_index_is_clust(index)); @@ -2960,20 +2982,48 @@ row_sel_store_mysql_rec( if (UNIV_LIKELY_NULL(prebuilt->compress_heap)) mem_heap_empty(prebuilt->compress_heap); + if (clust_templ_for_sec) { + /* Store all clustered index field of + secondary index record. */ + for (i = 0; i < dict_index_get_n_fields( + prebuilt->index); i++) { + ulint sec_field = dict_index_get_nth_field_pos( + index, prebuilt->index, i); + template_col.push_back(sec_field); + } + } + for (i = 0; i < prebuilt->n_template; i++) { const mysql_row_templ_t*templ = &prebuilt->mysql_template[i]; - const ulint field_no + ulint field_no = rec_clust ? templ->clust_rec_field_no : templ->rec_field_no; /* We should never deliver column prefixes to MySQL, - except for evaluating innobase_index_cond(). */ + except for evaluating innobase_index_cond() and if the prefix + index is longer than the actual row data. */ + ut_ad(dict_index_get_nth_field(index, field_no)->prefix_len - == 0); + == 0 || templ->rec_field_is_prefix); + + if (clust_templ_for_sec) { + std::vector<ulint>::iterator it; + it = std::find(template_col.begin(), + template_col.end(), field_no); + + if (it == template_col.end()) { + continue; + } + + ut_ad(templ->rec_field_no == templ->clust_rec_field_no); + + field_no = it - template_col.begin(); + } if (!row_sel_store_mysql_field(mysql_rec, prebuilt, rec, index, offsets, - field_no, templ)) { + field_no, templ, + clust_templ_for_sec)) { return(FALSE); } } @@ -3063,6 +3113,8 @@ row_sel_get_clust_rec_for_mysql( dberr_t err; trx_t* trx; + os_atomic_increment_ulint(&srv_sec_rec_cluster_reads, 1); + *out_rec = NULL; trx = thr_get_trx(thr); @@ -3613,7 +3665,7 @@ row_search_idx_cond_check( if (!row_sel_store_mysql_field(mysql_rec, prebuilt, rec, prebuilt->index, offsets, templ->icp_rec_field_no, - templ)) { + templ, false)) { return(ICP_NO_MATCH); } } @@ -3634,7 +3686,7 @@ row_search_idx_cond_check( || dict_index_is_clust(prebuilt->index)) { if (!row_sel_store_mysql_rec( mysql_rec, prebuilt, rec, FALSE, - prebuilt->index, offsets)) { + prebuilt->index, offsets, false)) { ut_ad(dict_index_is_clust(prebuilt->index)); return(ICP_NO_MATCH); } @@ -3653,6 +3705,50 @@ row_search_idx_cond_check( return(result); } +/** Check the pushed down end range condition to avoid extra traversal +if records are not within view and also to avoid prefetching in the +cache buffer. +@param[in] mysql_rec record in MySQL format +@param[in,out] handler the MySQL handler performing the scan +@retval true if the row in mysql_rec is out of range +@retval false if the row in mysql_rec is in range */ +static +bool +row_search_end_range_check( + const byte* mysql_rec, + ha_innobase* handler) +{ + if (handler->end_range && + handler->compare_key_in_buffer(mysql_rec) > 0) { + return(true); + } + + return(false); +} + +/** Return the record field length in characters. +@param[in] col table column of the field +@param[in] field_no field number +@param[in] rec physical record +@param[in] offsets field offsets in the physical record + +@return field length in characters */ +static +size_t +rec_field_len_in_chars(const dict_col_t &col, + const ulint field_no, + const rec_t *rec, + const ulint *offsets) +{ + const ulint cset = dtype_get_charset_coll(col.prtype); + const CHARSET_INFO* cs = all_charsets[cset]; + ulint rec_field_len; + const char* rec_field = reinterpret_cast<const char *>( + rec_get_nth_field( + rec, offsets, field_no, &rec_field_len)); + return(cs->cset->numchars(cs, rec_field, rec_field + rec_field_len)); +} + /********************************************************************//** Searches for rows in the database. This is used in the interface to MySQL. This function opens a cursor, and also implements fetch next @@ -3690,7 +3786,9 @@ row_search_for_mysql( trx_t* trx = prebuilt->trx; dict_index_t* clust_index; que_thr_t* thr; - const rec_t* rec; + const rec_t* prev_rec = NULL; + const rec_t* rec = NULL; + byte* end_range_cache = NULL; const rec_t* result_rec = NULL; const rec_t* clust_rec; dberr_t err = DB_SUCCESS; @@ -3715,6 +3813,8 @@ row_search_for_mysql( ulint* offsets = offsets_; ibool table_lock_waited = FALSE; byte* next_buf = 0; + ulint end_loop = 0; + bool use_clustered_index = false; rec_offs_init(offsets_); @@ -3838,6 +3938,10 @@ row_search_for_mysql( err = DB_SUCCESS; goto func_exit; + } else if (prebuilt->end_range == true) { + prebuilt->end_range = false; + err = DB_RECORD_NOT_FOUND; + goto func_exit; } if (prebuilt->fetch_cache_first > 0 @@ -3970,7 +4074,8 @@ row_search_for_mysql( if (!row_sel_store_mysql_rec( buf, prebuilt, - rec, FALSE, index, offsets)) { + rec, FALSE, index, + offsets, false)) { /* Only fresh inserts may contain incomplete externally stored columns. Pretend that such @@ -4224,11 +4329,62 @@ rec_loop: and neither can a record lock be placed on it: we skip such a record. */ + prev_rec = NULL; goto next_rec; } if (page_rec_is_supremum(rec)) { + /** Compare the last record of the page with end range + passed to InnoDB when there is no ICP and number of loops + in row_search_for_mysql for rows found but not + reporting due to search views etc. */ + if (prev_rec != NULL + && prebuilt->mysql_handler->end_range != NULL + && prebuilt->idx_cond == NULL + && end_loop >= 100) { + + dict_index_t* key_index = prebuilt->index; + bool clust_templ_for_sec = false; + + if (end_range_cache == NULL) { + end_range_cache = static_cast<byte*>( + ut_malloc(prebuilt->mysql_row_len)); + } + + if (index != clust_index + && prebuilt->need_to_access_clustered) { + /** Secondary index record but the template + based on PK. */ + key_index = clust_index; + clust_templ_for_sec = true; + } + + /** Create offsets based on prebuilt index. */ + offsets = rec_get_offsets(prev_rec, prebuilt->index, + offsets, ULINT_UNDEFINED, &heap); + + if (row_sel_store_mysql_rec( + end_range_cache, prebuilt, prev_rec, + clust_templ_for_sec, key_index, offsets, + clust_templ_for_sec)) { + + if (row_search_end_range_check( + end_range_cache, + prebuilt->mysql_handler)) { + + /** In case of prebuilt->fetch, + set the error in prebuilt->end_range. */ + if (prebuilt->n_fetch_cached > 0) { + prebuilt->end_range = true; + } + + err = DB_RECORD_NOT_FOUND; + goto normal_return; + } + } + } + if (set_also_gap_locks && !(srv_locks_unsafe_for_binlog || trx->isolation_level <= TRX_ISO_READ_COMMITTED) @@ -4260,6 +4416,7 @@ rec_loop: /* A page supremum record cannot be in the result set: skip it now that we have placed a possible lock on it */ + prev_rec = NULL; goto next_rec; } @@ -4334,6 +4491,7 @@ wrong_offs: btr_pcur_move_to_last_on_page(pcur, &mtr); + prev_rec = NULL; goto next_rec; } } @@ -4362,10 +4520,13 @@ wrong_offs: fputs(". We try to skip the record.\n", stderr); + prev_rec = NULL; goto next_rec; } } + prev_rec = rec; + /* Note that we cannot trust the up_match value in the cursor at this place because we can arrive here after moving the cursor! Thus we have to recompare rec and search_tuple to determine if they @@ -4590,6 +4751,7 @@ no_gap_lock: did_semi_consistent_read = TRUE; rec = old_vers; + prev_rec = rec; break; default: @@ -4636,6 +4798,7 @@ no_gap_lock: } rec = old_vers; + prev_rec = rec; } } else { /* We are looking into a non-clustered index, @@ -4729,10 +4892,97 @@ locks_ok: } /* Get the clustered index record if needed, if we did not do the - search using the clustered index. */ + search using the clustered index... */ + + use_clustered_index = + (index != clust_index && prebuilt->need_to_access_clustered); + + if (use_clustered_index && prebuilt->n_template <= index->n_fields) { + /* ...but, perhaps avoid the clustered index lookup if + all of the following are true: + 1) all columns are in the secondary index + 2) all values for columns that are prefix-only + indexes are shorter than the prefix size + This optimization can avoid many IOs for certain schemas. + */ + bool row_contains_all_values = true; + unsigned int i; + for (i = 0; i < prebuilt->n_template; i++) { + /* Condition (1) from above: is the field in the + index (prefix or not)? */ + const mysql_row_templ_t* templ = + prebuilt->mysql_template + i; + ulint secondary_index_field_no = + templ->rec_prefix_field_no; + if (secondary_index_field_no == ULINT_UNDEFINED) { + row_contains_all_values = false; + break; + } + /* Condition (2) from above: if this is a + prefix, is this row's value size shorter + than the prefix? */ + if (templ->rec_field_is_prefix) { + ulint record_size = rec_offs_nth_size( + offsets, + secondary_index_field_no); + const dict_field_t *field = + dict_index_get_nth_field( + index, + secondary_index_field_no); + ut_a(field->prefix_len > 0); + if (record_size + < field->prefix_len / templ->mbmaxlen) { + + /* Record in bytes shorter than the + index prefix length in characters */ + continue; + + } else if (record_size * templ->mbminlen + >= field->prefix_len) { + + /* The shortest represantable string by + the byte length of the record is longer + than the maximum possible index + prefix. */ + row_contains_all_values = false; + break; + } else { - if (index != clust_index && prebuilt->need_to_access_clustered) { + /* The record could or could not fit + into the index prefix, calculate length + to find out */ + + if (rec_field_len_in_chars( + *field->col, + secondary_index_field_no, + rec, offsets) + >= (field->prefix_len + / templ->mbmaxlen)) { + + row_contains_all_values = false; + break; + } + } + } + } + /* If (1) and (2) were true for all columns above, use + rec_prefix_field_no instead of rec_field_no, and skip + the clustered lookup below. */ + if (row_contains_all_values) { + for (i = 0; i < prebuilt->n_template; i++) { + mysql_row_templ_t* templ = + prebuilt->mysql_template + i; + templ->rec_field_no = + templ->rec_prefix_field_no; + ut_a(templ->rec_field_no != ULINT_UNDEFINED); + } + use_clustered_index = false; + os_atomic_increment_ulint( + &srv_sec_rec_cluster_reads_avoided, 1); + } + } + if (use_clustered_index) { requires_clust_rec: ut_ad(index != clust_index); /* We use a 'goto' to the preceding label if a consistent @@ -4813,7 +5063,7 @@ requires_clust_rec: appropriate version of the clustered index record. */ if (!row_sel_store_mysql_rec( buf, prebuilt, result_rec, - TRUE, clust_index, offsets)) { + TRUE, clust_index, offsets, false)) { goto next_rec; } } @@ -4881,7 +5131,7 @@ requires_clust_rec: next_buf, prebuilt, result_rec, result_rec != rec, result_rec != rec ? clust_index : index, - offsets)) { + offsets, false)) { if (next_buf == buf) { ut_a(prebuilt->n_fetch_cached == 0); @@ -4936,7 +5186,7 @@ requires_clust_rec: buf, prebuilt, result_rec, result_rec != rec, result_rec != rec ? clust_index : index, - offsets)) { + offsets, false)) { /* Only fresh inserts may contain incomplete externally stored columns. Pretend that such records do @@ -4988,6 +5238,8 @@ idx_cond_failed: goto normal_return; next_rec: + end_loop++; + /* Reset the old and new "did semi-consistent read" flags. */ if (UNIV_UNLIKELY(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT)) { @@ -5174,6 +5426,11 @@ normal_return: func_exit: trx->op_info = ""; + + if (end_range_cache != NULL) { + ut_free(end_range_cache); + } + if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } |