diff options
author | Igor Babaev <igor@askmonty.org> | 2011-06-01 20:49:37 -0700 |
---|---|---|
committer | Igor Babaev <igor@askmonty.org> | 2011-06-01 20:49:37 -0700 |
commit | 6dab04bda02cab4510f4c594c0625e0801a5a7b7 (patch) | |
tree | cf6f8865cb56ad38f44206cc90934b40d69c60a5 /storage | |
parent | 8bf69ab7bf71ee8e1cbed98d20b04ebf1e5567c1 (diff) | |
parent | 9a1e54658ebd073978f36bd535754768641f70a5 (diff) | |
download | mariadb-git-6dab04bda02cab4510f4c594c0625e0801a5a7b7.tar.gz |
Merge.
Diffstat (limited to 'storage')
-rw-r--r-- | storage/xtradb/dict/dict0dict.c | 13 | ||||
-rw-r--r-- | storage/xtradb/handler/ha_innodb.cc | 615 | ||||
-rw-r--r-- | storage/xtradb/handler/ha_innodb.h | 88 | ||||
-rw-r--r-- | storage/xtradb/include/dict0dict.h | 12 | ||||
-rw-r--r-- | storage/xtradb/include/dict0dict.ic | 14 | ||||
-rw-r--r-- | storage/xtradb/include/ha_prototypes.h | 9 | ||||
-rw-r--r-- | storage/xtradb/include/row0mysql.h | 29 | ||||
-rw-r--r-- | storage/xtradb/row/row0sel.c | 580 |
8 files changed, 846 insertions, 514 deletions
diff --git a/storage/xtradb/dict/dict0dict.c b/storage/xtradb/dict/dict0dict.c index 18880a5c72c..8d4bd76c32c 100644 --- a/storage/xtradb/dict/dict0dict.c +++ b/storage/xtradb/dict/dict0dict.c @@ -472,10 +472,12 @@ Looks for column n in an index. ULINT_UNDEFINED if not contained */ UNIV_INTERN ulint -dict_index_get_nth_col_pos( -/*=======================*/ - const dict_index_t* index, /*!< in: index */ - ulint n) /*!< in: column number */ +dict_index_get_nth_col_or_prefix_pos( +/*=================================*/ + const dict_index_t* index, /*!< in: index */ + ulint n, /*!< in: column number */ + ibool inc_prefix) /*!< in: TRUE=consider + column prefixes too */ { const dict_field_t* field; const dict_col_t* col; @@ -497,7 +499,8 @@ dict_index_get_nth_col_pos( for (pos = 0; pos < n_fields; pos++) { field = dict_index_get_nth_field(index, pos); - if (col == field->col && field->prefix_len == 0) { + if (col == field->col + && (inc_prefix || field->prefix_len == 0)) { return(pos); } diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index c37bfe6a2c4..48ecc0dfebe 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -124,11 +124,6 @@ static pthread_cond_t commit_cond; static pthread_mutex_t commit_cond_m; static bool innodb_inited = 0; -C_MODE_START -static xtradb_icp_result_t index_cond_func_innodb(void *arg); -C_MODE_END - - #define INSIDE_HA_INNOBASE_CC @@ -1986,14 +1981,21 @@ trx_is_strict( /**************************************************************//** Resets some fields of a prebuilt struct. The template is used in fast retrieval of just those column values MySQL needs in its processing. */ -static void -reset_template( -/*===========*/ - row_prebuilt_t* prebuilt) /*!< in/out: prebuilt struct */ +inline +ha_innobase::reset_template(void) +/*=============================*/ { + ut_ad(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED); + ut_ad(prebuilt->magic_n2 == prebuilt->magic_n); + prebuilt->keep_other_fields_on_keyread = 0; prebuilt->read_just_key = 0; + /* Reset index condition pushdown state */ + prebuilt->idx_cond = NULL; + prebuilt->idx_cond_n_cols = 0; + pushed_idx_cond = NULL; + pushed_idx_cond_keyno = MAX_KEY; } /*****************************************************************//** @@ -2055,7 +2057,7 @@ ha_innobase::init_table_handle_for_HANDLER(void) we???? */ prebuilt->used_in_HANDLER = TRUE; - reset_template(prebuilt); + reset_template(); } /*********************************************************************//** @@ -4243,8 +4245,8 @@ static inline uint get_field_offset( /*=============*/ - TABLE* table, /*!< in: MySQL table object */ - Field* field) /*!< in: MySQL field object */ + const TABLE* table, /*!< in: MySQL table object */ + const Field* field) /*!< in: MySQL field object */ { return((uint) (field->ptr - table->record[0])); } @@ -4810,44 +4812,170 @@ ha_innobase::store_key_val_for_row( } /**************************************************************//** +Determines if a field is needed in a prebuilt struct 'template'. +@return field to use, or NULL if the field is not needed */ +static +const Field* +build_template_needs_field( +/*=======================*/ + ibool index_contains, /*!< in: + dict_index_contains_col_or_prefix( + index, i) */ + ibool read_just_key, /*!< in: TRUE when MySQL calls + ha_innobase::extra with the + argument HA_EXTRA_KEYREAD; it is enough + to read just columns defined in + the index (i.e., no read of the + clustered index record necessary) */ + ibool fetch_all_in_key, + /*!< in: true=fetch all fields in + the index */ + ibool fetch_primary_key_cols, + /*!< in: true=fetch the + primary key columns */ + dict_index_t* index, /*!< in: InnoDB index to use */ + const TABLE* table, /*!< in: MySQL table object */ + ulint i, /*!< in: field index in InnoDB table */ + ulint sql_idx) /*!< in: field index in SQL table */ +{ + const Field* field = table->field[sql_idx]; + + ut_ad(index_contains == dict_index_contains_col_or_prefix(index, i)); + + if (!index_contains) { + if (read_just_key) { + /* If this is a 'key read', we do not need + columns that are not in the key */ + + return(NULL); + } + } else if (fetch_all_in_key) { + /* This field is needed in the query */ + + return(field); + } + + if (bitmap_is_set(table->read_set, sql_idx) + || bitmap_is_set(table->write_set, sql_idx)) { + /* This field is needed in the query */ + + return(field); + } + + if (fetch_primary_key_cols + && dict_table_col_in_clustered_key(index->table, i)) { + /* This field is needed in the query */ + + return(field); + } + + /* This field is not needed in the query, skip it */ + + return(NULL); +} + +/**************************************************************//** +Adds a field is to a prebuilt struct 'template'. +@return the field template */ +static +mysql_row_templ_t* +build_template_field( +/*=================*/ + row_prebuilt_t* prebuilt, /*!< in/out: template */ + dict_index_t* clust_index, /*!< in: InnoDB clustered index */ + dict_index_t* index, /*!< in: InnoDB index to use */ + TABLE* table, /*!< in: MySQL table object */ + const Field* field, /*!< in: field in MySQL table */ + ulint i) /*!< in: field index in InnoDB table */ +{ + mysql_row_templ_t* templ; + const dict_col_t* col; + + ut_ad(field == table->field[i]); + ut_ad(clust_index->table == index->table); + + col = dict_table_get_nth_col(index->table, i); + + templ = prebuilt->mysql_template + prebuilt->n_template++; + UNIV_MEM_INVALID(templ, sizeof *templ); + templ->col_no = i; + templ->clust_rec_field_no = dict_col_get_clust_pos(col, clust_index); + ut_a(templ->clust_rec_field_no != ULINT_UNDEFINED); + + if (dict_index_is_clust(index)) { + templ->rec_field_no = templ->clust_rec_field_no; + } else { + templ->rec_field_no = dict_index_get_nth_col_pos(index, i); + } + + if (field->null_ptr) { + templ->mysql_null_byte_offset = + (ulint) ((char*) field->null_ptr + - (char*) table->record[0]); + + templ->mysql_null_bit_mask = (ulint) field->null_bit; + } else { + templ->mysql_null_bit_mask = 0; + } + + templ->mysql_col_offset = (ulint) get_field_offset(table, field); + + templ->mysql_col_len = (ulint) field->pack_length(); + templ->type = col->mtype; + templ->mysql_type = (ulint)field->type(); + + if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) { + templ->mysql_length_bytes = (ulint) + (((Field_varstring*)field)->length_bytes); + } + + templ->charset = dtype_get_charset_coll(col->prtype); + templ->mbminlen = col->mbminlen; + templ->mbmaxlen = col->mbmaxlen; + templ->is_unsigned = col->prtype & DATA_UNSIGNED; + + if (!dict_index_is_clust(index) + && templ->rec_field_no == ULINT_UNDEFINED) { + prebuilt->need_to_access_clustered = TRUE; + } + + if (prebuilt->mysql_prefix_len < templ->mysql_col_offset + + templ->mysql_col_len) { + prebuilt->mysql_prefix_len = templ->mysql_col_offset + + templ->mysql_col_len; + } + + if (templ->type == DATA_BLOB) { + prebuilt->templ_contains_blob = TRUE; + } + + return(templ); +} + +/**************************************************************//** Builds a 'template' to the prebuilt struct. The template is used in fast retrieval of just those column values MySQL needs in its processing. */ -static +UNIV_INTERN void -build_template( -/*===========*/ - row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct */ - THD* thd, /*!< in: current user thread, used - only if templ_type is - ROW_MYSQL_REC_FIELDS */ - TABLE* table, /* in: MySQL table */ - ha_innobase* file, /* in: ha_innobase handler */ - uint templ_type) /* in: ROW_MYSQL_WHOLE_ROW or - ROW_MYSQL_REC_FIELDS */ +ha_innobase::build_template( +/*========================*/ + bool whole_row) /*!< in: true=ROW_MYSQL_WHOLE_ROW, + false=ROW_MYSQL_REC_FIELDS */ { dict_index_t* index; dict_index_t* clust_index; - mysql_row_templ_t* templ; - Field* field; ulint n_fields, n_stored_fields; - ulint n_requested_fields = 0; ibool fetch_all_in_key = FALSE; ibool fetch_primary_key_cols = FALSE; - ulint sql_idx, innodb_idx=0; - /* byte offset of the end of last requested column */ - ulint mysql_prefix_len = 0; - ibool do_idx_cond_push= FALSE; - ibool need_second_pass= FALSE; - + ulint i, sql_idx; + if (prebuilt->select_lock_type == LOCK_X) { /* We always retrieve the whole clustered index record if we use exclusive row level locks, for example, if the read is done in an UPDATE statement. */ - templ_type = ROW_MYSQL_WHOLE_ROW; - } - - if (templ_type == ROW_MYSQL_REC_FIELDS) { + whole_row = true; + } else if (!whole_row) { if (prebuilt->hint_need_to_fetch_extra_cols == ROW_RETRIEVE_ALL_COLS) { @@ -4864,7 +4992,7 @@ build_template( fetch_all_in_key = TRUE; } else { - templ_type = ROW_MYSQL_WHOLE_ROW; + whole_row = true; } } else if (prebuilt->hint_need_to_fetch_extra_cols == ROW_RETRIEVE_PRIMARY_KEY) { @@ -4881,19 +5009,12 @@ build_template( clust_index = dict_table_get_first_index(prebuilt->table); - if (templ_type == ROW_MYSQL_REC_FIELDS) { - index = prebuilt->index; - } else { - index = clust_index; - } + index = whole_row ? clust_index : prebuilt->index; - if (index == clust_index) { - prebuilt->need_to_access_clustered = TRUE; - } else { - prebuilt->need_to_access_clustered = FALSE; - /* Below we check column by column if we need to access - the clustered index */ - } + prebuilt->need_to_access_clustered = (index == clust_index); + + /* Below we check column by column if we need to access + the clustered index. */ n_fields = (ulint)table->s->fields; /* number of columns */ n_stored_fields= (ulint)table->s->stored_fields; /* number of stored columns */ @@ -4903,160 +5024,206 @@ build_template( mem_alloc(n_stored_fields * sizeof(mysql_row_templ_t)); } - prebuilt->template_type = templ_type; + prebuilt->template_type = whole_row + ? ROW_MYSQL_WHOLE_ROW : ROW_MYSQL_REC_FIELDS; prebuilt->null_bitmap_len = table->s->null_bytes; + /* Prepare to build prebuilt->mysql_template[]. */ prebuilt->templ_contains_blob = FALSE; + prebuilt->mysql_prefix_len = 0; + prebuilt->n_template = 0; + prebuilt->idx_cond_n_cols = 0; + + /* Note that in InnoDB, i is the column number in the table. + MySQL calls columns 'fields'. */ + + if (active_index != MAX_KEY && active_index == pushed_idx_cond_keyno) { + /* Push down an index condition or an end_range check. */ + for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) { + + while (!table->field[sql_idx]->stored_in_db) { + sql_idx++; + } + + const ibool index_contains + = dict_index_contains_col_or_prefix(index, i); + + /* Test if an end_range or an index condition + refers to the field. Note that "index" and + "index_contains" may refer to the clustered index. + Index condition pushdown is relative to prebuilt->index + (the index that is being looked up first). */ + + /* When join_read_always_key() invokes this + code via handler::ha_index_init() and + ha_innobase::index_init(), end_range is not + yet initialized. Because of that, we must + always check for index_contains, instead of + the subset + field->part_of_key.is_set(active_index) + which would be acceptable if end_range==NULL. */ + if (index == prebuilt->index + ? index_contains + : dict_index_contains_col_or_prefix( + prebuilt->index, i)) { + /* Needed in ICP */ + const Field* field; + mysql_row_templ_t* templ; + + if (whole_row) { + field = table->field[sql_idx]; + } else { + field = build_template_needs_field( + index_contains, + prebuilt->read_just_key, + fetch_all_in_key, + fetch_primary_key_cols, + index, table, i, sql_idx); + if (!field) { + continue; + } + } - - /* - Setup index condition pushdown (note: we don't need to check if - this is a scan on primary key as that is checked in idx_cond_push) - */ - if (file->active_index == file->pushed_idx_cond_keyno && - file->active_index != MAX_KEY && - templ_type == ROW_MYSQL_REC_FIELDS) - do_idx_cond_push= need_second_pass= TRUE; - - /* Note that in InnoDB, i is the column number. MySQL calls columns - 'fields'. */ - for (sql_idx = 0; sql_idx < n_fields; sql_idx++) { - templ = prebuilt->mysql_template + n_requested_fields; - field = table->field[sql_idx]; - if (!field->stored_in_db) - goto skip_field; - - if (UNIV_LIKELY(templ_type == ROW_MYSQL_REC_FIELDS)) { - /* Decide which columns we should fetch - and which we can skip. */ - register const ibool index_contains_field = - dict_index_contains_col_or_prefix(index, innodb_idx); - register const ibool index_covers_field = - field->part_of_key.is_set(file->active_index); - - if (!index_contains_field && prebuilt->read_just_key) { - /* If this is a 'key read', we do not need - columns that are not in the key */ - - goto skip_field; - } - - if (index_contains_field && fetch_all_in_key) { - /* This field is needed in the query */ - - goto include_field; - } - - if (bitmap_is_set(table->read_set, sql_idx) || - bitmap_is_set(table->write_set, sql_idx)) { - /* This field is needed in the query */ - - goto include_field; - } + templ = build_template_field( + prebuilt, clust_index, index, + table, field, i); + prebuilt->idx_cond_n_cols++; + ut_ad(prebuilt->idx_cond_n_cols + == prebuilt->n_template); + + if (index == prebuilt->index) { + templ->icp_rec_field_no + = templ->rec_field_no; + } else { + templ->icp_rec_field_no + = dict_index_get_nth_col_pos( + prebuilt->index, i); + } - if (fetch_primary_key_cols - && dict_table_col_in_clustered_key( - index->table, innodb_idx)) { - /* This field is needed in the query */ + if (dict_index_is_clust(prebuilt->index)) { + ut_ad(templ->icp_rec_field_no + != ULINT_UNDEFINED); + /* If the primary key includes + a column prefix, use it in + index condition pushdown, + because the condition is + evaluated before fetching any + off-page (externally stored) + columns. */ + if (templ->icp_rec_field_no + < prebuilt->index->n_uniq) { + /* This is a key column; + all set. */ + continue; + } + } else if (templ->icp_rec_field_no + != ULINT_UNDEFINED) { + continue; + } - goto include_field; + /* This is a column prefix index. + The column prefix can be used in + an end_range comparison. */ + + templ->icp_rec_field_no + = dict_index_get_nth_col_or_prefix_pos( + prebuilt->index, i, TRUE); + ut_ad(templ->icp_rec_field_no + != ULINT_UNDEFINED); + + /* Index condition pushdown can be used on + all columns of a secondary index, and on + the PRIMARY KEY columns. */ + /* TODO: enable this assertion + (but first ensure that end_range is + valid here and use an accurate condition + for end_range) + ut_ad(!dict_index_is_clust(prebuilt->index) + || templ->rec_field_no + < prebuilt->index->n_uniq); + */ } - - /* This field is not needed in the query, skip it */ - - goto skip_field; -include_field: - if (do_idx_cond_push && - ((need_second_pass && !index_covers_field) || - (!need_second_pass && index_covers_field))) - goto skip_field; } - n_requested_fields++; - templ->col_no = innodb_idx; - templ->clust_rec_field_no = dict_col_get_clust_pos( - &index->table->cols[innodb_idx], clust_index); - ut_ad(templ->clust_rec_field_no != ULINT_UNDEFINED); + ut_ad(prebuilt->idx_cond_n_cols > 0); + ut_ad(prebuilt->idx_cond_n_cols == prebuilt->n_template); + + /* Include the fields that are not needed in index condition + pushdown. */ + for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) { + + while (!table->field[sql_idx]->stored_in_db) { + sql_idx++; + } + + const ibool index_contains + = dict_index_contains_col_or_prefix(index, i); + + if (index == prebuilt->index + ? !index_contains + : !dict_index_contains_col_or_prefix( + prebuilt->index, i)) { + /* Not needed in ICP */ + const Field* field; + + if (whole_row) { + field = table->field[sql_idx]; + } else { + field = build_template_needs_field( + index_contains, + prebuilt->read_just_key, + fetch_all_in_key, + fetch_primary_key_cols, + index, table, i, sql_idx); + if (!field) { + continue; + } + } - if (index == clust_index) { - templ->rec_field_no = templ->clust_rec_field_no; - } else { - templ->rec_field_no = dict_index_get_nth_col_pos( - index, innodb_idx); - if (templ->rec_field_no == ULINT_UNDEFINED) { - prebuilt->need_to_access_clustered = TRUE; + build_template_field(prebuilt, + clust_index, index, + table, field, i); } } - if (field->null_ptr) { - templ->mysql_null_byte_offset = - (ulint) ((char*) field->null_ptr - - (char*) table->record[0]); + prebuilt->idx_cond = this; + } else { + /* No index condition pushdown */ + prebuilt->idx_cond = NULL; - templ->mysql_null_bit_mask = (ulint) field->null_bit; - } else { - templ->mysql_null_bit_mask = 0; - } - - templ->mysql_col_offset = (ulint) - get_field_offset(table, field); + for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) { + const Field* field; - templ->mysql_col_len = (ulint) field->pack_length(); - if (mysql_prefix_len < templ->mysql_col_offset - + templ->mysql_col_len) { - mysql_prefix_len = templ->mysql_col_offset - + templ->mysql_col_len; - } - templ->type = index->table->cols[innodb_idx].mtype; - templ->mysql_type = (ulint)field->type(); + while (!table->field[sql_idx]->stored_in_db) { + sql_idx++; + } - if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) { - templ->mysql_length_bytes = (ulint) - (((Field_varstring*)field)->length_bytes); - } + if (whole_row) { + field = table->field[sql_idx]; + } else { + field = build_template_needs_field( + dict_index_contains_col_or_prefix( + index, i), + prebuilt->read_just_key, + fetch_all_in_key, + fetch_primary_key_cols, + index, table, i, sql_idx); + if (!field) { + continue; + } + } - templ->charset = dtype_get_charset_coll( - index->table->cols[innodb_idx].prtype); - templ->mbminlen = index->table->cols[innodb_idx].mbminlen; - templ->mbmaxlen = index->table->cols[innodb_idx].mbmaxlen; - templ->is_unsigned = index->table->cols[innodb_idx].prtype - & DATA_UNSIGNED; - if (templ->type == DATA_BLOB) { - prebuilt->templ_contains_blob = TRUE; + build_template_field(prebuilt, clust_index, index, + table, field, i); } -skip_field: - if (need_second_pass && (sql_idx+1 == n_fields)) - { - prebuilt->n_index_fields= n_requested_fields; - need_second_pass= FALSE; - sql_idx= (~(ulint)0); /* to start from 0 */ - innodb_idx= (~(ulint)0); /* to start from 0 */ ///psergey-merge-merge-last-change - } - if (field->stored_in_db) { - innodb_idx++; - } - } - - prebuilt->n_template = n_requested_fields; - prebuilt->mysql_prefix_len = mysql_prefix_len; - - if (do_idx_cond_push) - { - prebuilt->idx_cond_func= index_cond_func_innodb; - prebuilt->idx_cond_func_arg= file; - } - else - { - prebuilt->idx_cond_func= NULL; - prebuilt->n_index_fields= n_requested_fields; } if (index != clust_index && prebuilt->need_to_access_clustered) { /* Change rec_field_no's to correspond to the clustered index record */ - for (ulint i = do_idx_cond_push? prebuilt->n_index_fields : 0; - i < n_requested_fields; i++) { - templ = prebuilt->mysql_template + i; + for (i = 0; i < prebuilt->n_template; i++) { + mysql_row_templ_t* templ + = &prebuilt->mysql_template[i]; templ->rec_field_no = templ->clust_rec_field_no; } } @@ -5319,7 +5486,7 @@ no_commit: /* Build the template used in converting quickly between the two database formats */ - build_template(prebuilt, NULL, table, this, ROW_MYSQL_WHOLE_ROW); + build_template(true); } innodb_srv_conc_enter_innodb(prebuilt->trx); @@ -6020,8 +6187,7 @@ ha_innobase::index_read( necessarily prebuilt->index, but can also be the clustered index */ if (prebuilt->sql_stat_start) { - build_template(prebuilt, user_thd, table, this, - ROW_MYSQL_REC_FIELDS); + build_template(false); } if (key_ptr) { @@ -6236,7 +6402,7 @@ ha_innobase::change_active_index( the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary copying. Starting from MySQL-4.1 we use a more efficient flag here. */ - build_template(prebuilt, user_thd, table, this, ROW_MYSQL_REC_FIELDS); + build_template(false); DBUG_RETURN(0); } @@ -8631,7 +8797,7 @@ ha_innobase::check( /* Build the template; we will use a dummy template in index scans done in checking */ - build_template(prebuilt, NULL, table, this, ROW_MYSQL_WHOLE_ROW); + build_template(true); } if (prebuilt->table->ibd_file_missing) { @@ -9123,12 +9289,7 @@ ha_innobase::extra( } break; case HA_EXTRA_RESET_STATE: - reset_template(prebuilt); - /* Reset index condition pushdown state */ - pushed_idx_cond= FALSE; - pushed_idx_cond_keyno= MAX_KEY; - prebuilt->idx_cond_func= NULL; - in_range_check_pushed_down= FALSE; + reset_template(); break; case HA_EXTRA_NO_KEYREAD: prebuilt->read_just_key = 0; @@ -9174,14 +9335,8 @@ ha_innobase::reset() row_mysql_prebuilt_free_blob_heap(prebuilt); } - reset_template(prebuilt); - - /* Reset index condition pushdown state */ - pushed_idx_cond_keyno= MAX_KEY; - pushed_idx_cond= NULL; - in_range_check_pushed_down= FALSE; + reset_template(); ds_mrr.dsmrr_close(); - prebuilt->idx_cond_func= NULL; /* TODO: This should really be reset in reset_template() but for now it's safer to do it explicitly here. */ @@ -9231,7 +9386,7 @@ ha_innobase::start_stmt( prebuilt->sql_stat_start = TRUE; prebuilt->hint_need_to_fetch_extra_cols = 0; - reset_template(prebuilt); + reset_template(); if (!prebuilt->mysql_has_locked) { /* This handle is for a temporary table created inside @@ -9350,7 +9505,7 @@ ha_innobase::external_lock( prebuilt->sql_stat_start = TRUE; prebuilt->hint_need_to_fetch_extra_cols = 0; - reset_template(prebuilt); + reset_template(); if (lock_type == F_WRLCK) { @@ -9533,7 +9688,7 @@ ha_innobase::transactional_table_lock( prebuilt->sql_stat_start = TRUE; prebuilt->hint_need_to_fetch_extra_cols = 0; - reset_template(prebuilt); + reset_template(); if (lock_type == F_WRLCK) { prebuilt->select_lock_type = LOCK_X; @@ -12355,39 +12510,47 @@ bool ha_innobase::is_thd_killed() * Index Condition Pushdown interface implementation */ -C_MODE_START - -/* - Index condition check function to be called from within Innobase. - See note on ICP_RESULT for return values description. -*/ - -static xtradb_icp_result_t index_cond_func_innodb(void *arg) +/*************************************************************//** +InnoDB index push-down condition check +@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */ +extern "C" UNIV_INTERN +enum icp_result +innobase_index_cond( +/*================*/ + void* file) /*!< in/out: pointer to ha_innobase */ { - ha_innobase *h= (ha_innobase*)arg; + ha_innobase *h= (ha_innobase*) file; + if (h->is_thd_killed()) - return XTRADB_ICP_ABORTED_BY_USER; + return ICP_ABORTED_BY_USER; if (h->end_range) { if (h->compare_key2(h->end_range) > 0) - return XTRADB_ICP_OUT_OF_RANGE; /* caller should return HA_ERR_END_OF_FILE already */ + return ICP_OUT_OF_RANGE; /* caller should return HA_ERR_END_OF_FILE already */ } - return h->pushed_idx_cond->val_int()? XTRADB_ICP_MATCH : XTRADB_ICP_NO_MATCH; + return h->pushed_idx_cond->val_int()? ICP_MATCH : ICP_NO_MATCH; } -C_MODE_END - - -Item *ha_innobase::idx_cond_push(uint keyno_arg, Item* idx_cond_arg) -{ - if (keyno_arg != primary_key && prebuilt->select_lock_type != LOCK_X) - { - pushed_idx_cond_keyno= keyno_arg; - pushed_idx_cond= idx_cond_arg; - in_range_check_pushed_down= TRUE; - return NULL; /* Table handler will check the entire condition */ - } - return idx_cond_arg; /* Table handler will not make any checks */ +/** Attempt to push down an index condition. +* @param[in] keyno MySQL key number +* @param[in] idx_cond Index condition to be checked +* @return idx_cond if pushed; NULL if not pushed +*/ +UNIV_INTERN +class Item* +ha_innobase::idx_cond_push( + uint keyno, + class Item* idx_cond) +{ + DBUG_ENTER("ha_innobase::idx_cond_push"); + DBUG_ASSERT(keyno != MAX_KEY); + DBUG_ASSERT(idx_cond != NULL); + + pushed_idx_cond = idx_cond; + pushed_idx_cond_keyno = keyno; + in_range_check_pushed_down = TRUE; + /* Table handler will check the entire condition */ + DBUG_RETURN(NULL); } diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index 599b48287e3..7b263db2537 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -223,27 +223,81 @@ class ha_innobase: public handler bool check_if_incompatible_data(HA_CREATE_INFO *info, uint table_changes); bool check_if_supported_virtual_columns(void) { return TRUE; } +private: + /** Builds a 'template' to the prebuilt struct. + + The template is used in fast retrieval of just those column + values MySQL needs in its processing. + @param whole_row true if access is needed to a whole row, + false if accessing individual fields is enough */ + void build_template(bool whole_row); + /** Resets a query execution 'template'. + @see build_template() */ + inline void reset_template(); + public: - /** - * Multi Range Read interface - */ - int multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param, - uint n_ranges, uint mode, HANDLER_BUFFER *buf); - int multi_range_read_next(range_id_t *range_info); - ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, - void *seq_init_param, - uint n_ranges, uint *bufsz, + /** @name Multi Range Read interface @{ */ + /** Initialize multi range read @see DsMrr_impl::dsmrr_init + * @param seq + * @param seq_init_param + * @param n_ranges + * @param mode + * @param buf + */ + int multi_range_read_init(RANGE_SEQ_IF* seq, + void* seq_init_param, + uint n_ranges, uint mode, + HANDLER_BUFFER *buf); + /** Process next multi range read @see DsMrr_impl::dsmrr_next + * @param range_info + */ + int multi_range_read_next(range_id_t *range_info); + /** Initialize multi range read and get information. + * @see ha_myisam::multi_range_read_info_const + * @see DsMrr_impl::dsmrr_info_const + * @param keyno + * @param seq + * @param seq_init_param + * @param n_ranges + * @param bufsz + * @param flags + * @param cost + */ + ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq, + void *seq_init_param, + uint n_ranges, uint *bufsz, + uint *flags, COST_VECT *cost); + /** Initialize multi range read and get information. + * @see DsMrr_impl::dsmrr_info + * @param keyno + * @param n_ranges + * @param keys + * @param key_parts + * @param bufsz + * @param flags + * @param cost + */ + ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, + uint key_parts, uint *bufsz, uint *flags, COST_VECT *cost); - ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys, - uint key_parts, uint *bufsz, - uint *flags, COST_VECT *cost); - int multi_range_read_explain_info(uint mrr_mode, char *str, size_t size); - DsMrr_impl ds_mrr; + int multi_range_read_explain_info(uint mrr_mode, + char *str, size_t size); + + /** Attempt to push down an index condition. + * @param[in] keyno MySQL key number + * @param[in] idx_cond Index condition to be checked + * @return idx_cond if pushed; NULL if not pushed + */ + class Item* idx_cond_push(uint keyno, class Item* idx_cond); + + /* An helper function for index_cond_func_innodb: */ + bool is_thd_killed(); - Item *idx_cond_push(uint keyno, Item* idx_cond); +private: + /** The multi range read session object */ + DsMrr_impl ds_mrr; - /* An helper function for index_cond_func_innodb: */ - bool is_thd_killed(); + /* @} */ }; /* Some accessor functions which the InnoDB plugin needs, but which diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index 2baecdc958a..f94ab32eb9b 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -839,6 +839,18 @@ dict_index_get_nth_col_pos( const dict_index_t* index, /*!< in: index */ ulint n); /*!< in: column number */ /********************************************************************//** +Looks for column n in an index. +@return position in internal representation of the index; +ULINT_UNDEFINED if not contained */ +UNIV_INTERN +ulint +dict_index_get_nth_col_or_prefix_pos( +/*=================================*/ + const dict_index_t* index, /*!< in: index */ + ulint n, /*!< in: column number */ + ibool inc_prefix); /*!< in: TRUE=consider + column prefixes too */ +/********************************************************************//** Returns TRUE if the index contains a column or a prefix of that column. @return TRUE if contains the column or its prefix */ UNIV_INTERN diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic index bd7534dc7e2..02527d8edd2 100644 --- a/storage/xtradb/include/dict0dict.ic +++ b/storage/xtradb/include/dict0dict.ic @@ -656,6 +656,20 @@ dict_index_get_nth_col_no( return(dict_col_get_no(dict_index_get_nth_col(index, pos))); } +/********************************************************************//** +Looks for column n in an index. +@return position in internal representation of the index; +ULINT_UNDEFINED if not contained */ +UNIV_INLINE +ulint +dict_index_get_nth_col_pos( +/*=======================*/ + const dict_index_t* index, /*!< in: index */ + ulint n) /*!< in: column number */ +{ + return(dict_index_get_nth_col_or_prefix_pos(index, n, FALSE)); +} + #ifndef UNIV_HOTBACKUP /********************************************************************//** Returns the minimum data size of an index record. diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h index 445d94eeabb..db71c37afc3 100644 --- a/storage/xtradb/include/ha_prototypes.h +++ b/storage/xtradb/include/ha_prototypes.h @@ -247,6 +247,15 @@ innobase_get_at_most_n_mbchars( ulint data_len, /*!< in: length of the string in bytes */ const char* str); /*!< in: character string */ +/*************************************************************//** +InnoDB index push-down condition check +@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */ +UNIV_INTERN +enum icp_result +innobase_index_cond( +/*================*/ + void* file) /*!< in/out: pointer to ha_innobase */ + __attribute__((nonnull, warn_unused_result)); /******************************************************************//** Returns true if the thread supports XA, global value of innodb_supports_xa if thd is NULL. diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h index cd9dec2f089..b18e1fca5dc 100644 --- a/storage/xtradb/include/row0mysql.h +++ b/storage/xtradb/include/row0mysql.h @@ -547,6 +547,10 @@ struct mysql_row_templ_struct { Innobase record in the clustered index; not defined if template_type is ROW_MYSQL_WHOLE_ROW */ + ulint icp_rec_field_no; /*!< field number of the column in an + Innobase record in the current index; + not defined unless + index condition pushdown is used */ ulint mysql_col_offset; /*!< offset of the column in the MySQL row format */ ulint mysql_col_len; /*!< length of the column in the MySQL @@ -585,16 +589,6 @@ struct mysql_row_templ_struct { #define ROW_PREBUILT_ALLOCATED 78540783 #define ROW_PREBUILT_FREED 26423527 - -typedef enum xtradb_icp_result { - XTRADB_ICP_ERROR=-1, - XTRADB_ICP_NO_MATCH=0, - XTRADB_ICP_MATCH=1, - XTRADB_ICP_OUT_OF_RANGE=2, - XTRADB_ICP_ABORTED_BY_USER=3, -} xtradb_icp_result_t; - -typedef xtradb_icp_result_t (*index_cond_func_t)(void *param); /** A struct for (sometimes lazily) prebuilt structures in an Innobase table handle used within MySQL; these are used to save CPU time. */ @@ -792,16 +786,15 @@ struct row_prebuilt_struct { store it here so that we can return it to MySQL */ /*----------------------*/ + void* idx_cond; /*!< In ICP, pointer to a ha_innobase, + passed to innobase_index_cond(). + NULL if index condition pushdown is + not used. */ + ulint idx_cond_n_cols;/*!< Number of fields in idx_cond_cols. + 0 if and only if idx_cond == NULL. */ + /*----------------------*/ ulint magic_n2; /*!< this should be the same as magic_n */ - /*----------------------*/ - index_cond_func_t idx_cond_func;/* Index Condition Pushdown function, - or NULL if there is none set */ - void* idx_cond_func_arg;/* ICP function argument */ - ulint n_index_fields; /* Number of fields at the start of - mysql_template. Valid only when using - ICP. */ - /*----------------------*/ }; #define ROW_PREBUILT_FETCH_MAGIC_N 465765687 diff --git a/storage/xtradb/row/row0sel.c b/storage/xtradb/row/row0sel.c index 13dccdffb96..1dbd9e3a42d 100644 --- a/storage/xtradb/row/row0sel.c +++ b/storage/xtradb/row/row0sel.c @@ -58,6 +58,8 @@ Created 12/19/1997 Heikki Tuuri #include "buf0lru.h" #include "ha_prototypes.h" +#include "my_compare.h" /* enum icp_result */ + /* Maximum number of rows to prefetch; MySQL interface has another parameter */ #define SEL_MAX_N_PREFETCH 16 @@ -2674,144 +2676,96 @@ row_sel_field_store_in_mysql_format( } /**************************************************************//** -Convert a row in the Innobase format to a row in the MySQL format. -Note that the template in prebuilt may advise us to copy only a few -columns to mysql_rec, other columns are left blank. All columns may not -be needed in the query. -@return TRUE on success, FALSE if not all columns could be retrieved */ +Convert a field in the Innobase format to a field in the MySQL format. */ static __attribute__((warn_unused_result)) ibool -row_sel_store_mysql_rec( -/*====================*/ - byte* mysql_rec, /*!< out: row in the MySQL format */ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ - const rec_t* rec, /*!< in: Innobase record in the index - which was described in prebuilt's - template, or in the clustered index; - must be protected by a page latch */ - ibool rec_clust, /*!< in: TRUE if rec is in the - clustered index instead of - prebuilt->index */ - const ulint* offsets, /* in: array returned by - rec_get_offsets() */ - ulint start_field_no, /* in: start from this field */ - ulint end_field_no) /* in: end at this field */ +row_sel_store_mysql_field( +/*======================*/ + byte* mysql_rec, /*!< out: record in the + MySQL format */ + row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct */ + const rec_t* rec, /*!< in: InnoDB record; + must be protected by + a page latch */ + const ulint* offsets, /*!< in: array returned by + rec_get_offsets() */ + ulint field_no, /*!< in: templ->rec_field_no or + templ->clust_rec_field_no */ + const mysql_row_templ_t*templ) /*!< in: row template */ { - mem_heap_t* extern_field_heap = NULL; - mem_heap_t* heap; - ulint i; + const byte* data; + ulint len; - ut_ad(prebuilt->mysql_template); ut_ad(prebuilt->default_rec); + ut_ad(templ); + ut_ad(templ >= prebuilt->mysql_template); + ut_ad(templ < &prebuilt->mysql_template[prebuilt->n_template]); + ut_ad(field_no == templ->clust_rec_field_no + || field_no == templ->rec_field_no + || field_no == templ->icp_rec_field_no); ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); - if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) { - mem_heap_free(prebuilt->blob_heap); - prebuilt->blob_heap = NULL; - } - - for (i = start_field_no; i < end_field_no /* prebuilt->n_template */ ; i++) { - - const mysql_row_templ_t*templ = prebuilt->mysql_template + i; - const byte* data; - ulint len; - ulint field_no; - - field_no = rec_clust - ? templ->clust_rec_field_no : templ->rec_field_no; + if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, field_no))) { - if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, field_no))) { + mem_heap_t* heap; + /* Copy an externally stored field to a temporary heap */ - /* Copy an externally stored field to the temporary - heap */ + ut_a(!prebuilt->trx->has_search_latch); + ut_ad(field_no == templ->clust_rec_field_no); - ut_a(!prebuilt->trx->has_search_latch); + if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) { + if (prebuilt->blob_heap == NULL) { + prebuilt->blob_heap = mem_heap_create( + UNIV_PAGE_SIZE); + } - if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) { - if (prebuilt->blob_heap == NULL) { - prebuilt->blob_heap = mem_heap_create( - UNIV_PAGE_SIZE); - } + heap = prebuilt->blob_heap; + } else { + heap = mem_heap_create(UNIV_PAGE_SIZE); + } - heap = prebuilt->blob_heap; - } else { - extern_field_heap - = mem_heap_create(UNIV_PAGE_SIZE); + /* NOTE: if we are retrieving a big BLOB, we may + already run out of memory in the next call, which + causes an assert */ - heap = extern_field_heap; - } + data = btr_rec_copy_externally_stored_field( + rec, offsets, + dict_table_zip_size(prebuilt->table), + field_no, &len, heap); - /* NOTE: if we are retrieving a big BLOB, we may - already run out of memory in the next call, which - causes an assert */ - - data = btr_rec_copy_externally_stored_field( - rec, offsets, - dict_table_zip_size(prebuilt->table), - field_no, &len, heap); - - if (UNIV_UNLIKELY(!data)) { - /* The externally stored field - was not written yet. This - record should only be seen by - recv_recovery_rollback_active() - or any TRX_ISO_READ_UNCOMMITTED - transactions. */ - - if (extern_field_heap) { - mem_heap_free(extern_field_heap); - } + if (UNIV_UNLIKELY(!data)) { + /* The externally stored field was not written + yet. This record should only be seen by + recv_recovery_rollback_active() or any + TRX_ISO_READ_UNCOMMITTED transactions. */ - return(FALSE); + if (heap != prebuilt->blob_heap) { + mem_heap_free(heap); } - ut_a(len != UNIV_SQL_NULL); - } else { - /* Field is stored in the row. */ - - data = rec_get_nth_field(rec, offsets, field_no, &len); + ut_a(prebuilt->trx->isolation_level + == TRX_ISO_READ_UNCOMMITTED); + return(FALSE); + } - if (UNIV_UNLIKELY(templ->type == DATA_BLOB) - && len != UNIV_SQL_NULL) { + ut_a(len != UNIV_SQL_NULL); - /* It is a BLOB field locally stored in the - InnoDB record: we MUST copy its contents to - prebuilt->blob_heap here because later code - assumes all BLOB values have been copied to a - safe place. */ + row_sel_field_store_in_mysql_format( + mysql_rec + templ->mysql_col_offset, + templ, data, len); - if (prebuilt->blob_heap == NULL) { - prebuilt->blob_heap = mem_heap_create( - UNIV_PAGE_SIZE); - } - - data = memcpy(mem_heap_alloc( - prebuilt->blob_heap, len), - data, len); - } + if (heap != prebuilt->blob_heap) { + mem_heap_free(heap); } + } else { + /* Field is stored in the row. */ - if (len != UNIV_SQL_NULL) { - row_sel_field_store_in_mysql_format( - mysql_rec + templ->mysql_col_offset, - templ, data, len); - - /* Cleanup */ - if (extern_field_heap) { - mem_heap_free(extern_field_heap); - extern_field_heap = NULL; - } + data = rec_get_nth_field(rec, offsets, field_no, &len); - if (templ->mysql_null_bit_mask) { - /* It is a nullable column with a non-NULL - value */ - mysql_rec[templ->mysql_null_byte_offset] - &= ~(byte) templ->mysql_null_bit_mask; - } - } else { + if (len == UNIV_SQL_NULL) { /* MySQL assumes that the field for an SQL NULL value is set to the default value. */ + ut_ad(templ->mysql_null_bit_mask); UNIV_MEM_ASSERT_RW(prebuilt->default_rec + templ->mysql_col_offset, @@ -2822,6 +2776,85 @@ row_sel_store_mysql_rec( (const byte*) prebuilt->default_rec + templ->mysql_col_offset, templ->mysql_col_len); + return(TRUE); + } + + if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) { + + /* It is a BLOB field locally stored in the + InnoDB record: we MUST copy its contents to + prebuilt->blob_heap here because + row_sel_field_store_in_mysql_format() stores a + pointer to the data, and the data passed to us + will be invalid as soon as the + mini-transaction is committed and the page + latch on the clustered index page is + released. */ + + if (prebuilt->blob_heap == NULL) { + prebuilt->blob_heap = mem_heap_create( + UNIV_PAGE_SIZE); + } + + data = mem_heap_dup(prebuilt->blob_heap, data, len); + } + + row_sel_field_store_in_mysql_format( + mysql_rec + templ->mysql_col_offset, + templ, data, len); + } + + ut_ad(len != UNIV_SQL_NULL); + + if (templ->mysql_null_bit_mask) { + /* It is a nullable column with a non-NULL + value */ + mysql_rec[templ->mysql_null_byte_offset] + &= ~(byte) templ->mysql_null_bit_mask; + } + + return(TRUE); +} + +/**************************************************************//** +Convert a row in the Innobase format to a row in the MySQL format. +Note that the template in prebuilt may advise us to copy only a few +columns to mysql_rec, other columns are left blank. All columns may not +be needed in the query. +@return TRUE on success, FALSE if not all columns could be retrieved */ +static __attribute__((warn_unused_result)) +ibool +row_sel_store_mysql_rec( +/*====================*/ + byte* mysql_rec, /*!< out: row in the MySQL format */ + row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ + const rec_t* rec, /*!< in: Innobase record in the index + which was described in prebuilt's + template, or in the clustered index; + must be protected by a page latch */ + ibool rec_clust, /*!< in: TRUE if rec is in the + clustered index instead of + prebuilt->index */ + const ulint* offsets) /*!< in: array returned by + rec_get_offsets(rec) */ +{ + ulint i; + + if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) { + mem_heap_free(prebuilt->blob_heap); + prebuilt->blob_heap = NULL; + } + + for (i = 0; i < prebuilt->n_template; i++) { + const mysql_row_templ_t*templ = &prebuilt->mysql_template[i]; + + if (!row_sel_store_mysql_field(mysql_rec, prebuilt, + rec, offsets, + rec_clust + ? templ->clust_rec_field_no + : templ->rec_field_no, + templ)) { + return(FALSE); } } @@ -3192,31 +3225,19 @@ UNIV_INLINE __attribute__((warn_unused_result)) ibool row_sel_push_cache_row_for_mysql( /*=============================*/ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ - const rec_t* rec, /*!< in: record to push, in the index - which was described in prebuilt's - template, or in the clustered index; - must be protected by a page latch */ - ibool rec_clust, /*!< in: TRUE if rec is in the - clustered index instead of - prebuilt->index */ - const ulint* offsets, /* in: rec_get_offsets() */ - ulint start_field_no, /* in: start from this field */ - byte* remainder_buf) /* in: if start_field_no !=0, - where to take prev fields */ + byte* mysql_rec, /*!< in/out: MySQL record */ + row_prebuilt_t* prebuilt) /*!< in/out: prebuilt struct */ { - byte* buf; - ulint i; - ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE); - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); ut_a(!prebuilt->templ_contains_blob); - if (prebuilt->fetch_cache[0] == NULL) { + if (UNIV_UNLIKELY(prebuilt->fetch_cache[0] == NULL)) { + ulint i; /* Allocate memory for the fetch cache */ + ut_ad(prebuilt->n_fetch_cached == 0); for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) { + byte* buf; /* A user has reported memory corruption in these buffers in Linux. Put magic numbers there to help @@ -3236,46 +3257,14 @@ row_sel_push_cache_row_for_mysql( UNIV_MEM_INVALID(prebuilt->fetch_cache[prebuilt->n_fetch_cached], prebuilt->mysql_row_len); - if (UNIV_UNLIKELY(!row_sel_store_mysql_rec( - prebuilt->fetch_cache[ - prebuilt->n_fetch_cached], - prebuilt, - rec, - rec_clust, - offsets, - start_field_no, - prebuilt->n_template))) { - return(FALSE); - } - - if (start_field_no) { - - for (i=0; i < start_field_no; i++) { - register ulint offs; - mysql_row_templ_t* templ; - register byte * null_byte; - - templ = prebuilt->mysql_template + i; + memcpy(prebuilt->fetch_cache[prebuilt->n_fetch_cached], + mysql_rec, prebuilt->mysql_row_len); - if (templ->mysql_null_bit_mask) { - offs = templ->mysql_null_byte_offset; - - null_byte= prebuilt->fetch_cache[ - prebuilt->n_fetch_cached]+offs; - (*null_byte)&= ~templ->mysql_null_bit_mask; - (*null_byte)|= (*(remainder_buf + offs) & - templ->mysql_null_bit_mask); - } - - offs = templ->mysql_col_offset; - memcpy(prebuilt->fetch_cache[prebuilt->n_fetch_cached] - + offs, - remainder_buf + offs, - templ->mysql_col_len); - } + if (++prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE) { + return(FALSE); } - prebuilt->n_fetch_cached++; + row_sel_pop_cached_row_for_mysql(mysql_rec, prebuilt); return(TRUE); } @@ -3353,6 +3342,81 @@ row_sel_try_search_shortcut_for_mysql( return(SEL_FOUND); } +/*********************************************************************//** +Check a pushed-down index condition. +@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */ +static +enum icp_result +row_search_idx_cond_check( +/*======================*/ + byte* mysql_rec, /*!< out: record + in MySQL format (invalid unless + prebuilt->idx_cond!=NULL and + we return ICP_MATCH) */ + row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct + for the table handle */ + const rec_t* rec, /*!< in: InnoDB record */ + const ulint* offsets) /*!< in: rec_get_offsets() */ +{ + enum icp_result result; + ulint i; + + ut_ad(rec_offs_validate(rec, prebuilt->index, offsets)); + + if (!prebuilt->idx_cond) { + return(ICP_MATCH); + } + + /* Convert to MySQL format those fields that are needed for + evaluating the index condition. */ + + if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) { + mem_heap_empty(prebuilt->blob_heap); + } + + for (i = 0; i < prebuilt->idx_cond_n_cols; i++) { + const mysql_row_templ_t*templ = &prebuilt->mysql_template[i]; + + if (!row_sel_store_mysql_field(mysql_rec, prebuilt, + rec, offsets, + templ->icp_rec_field_no, + templ)) { + return(ICP_NO_MATCH); + } + } + + /* We assume that the index conditions on + case-insensitive columns are case-insensitive. The + case of such columns may be wrong in a secondary + index, if the case of the column has been updated in + the past, or a record has been deleted and a record + inserted in a different case. */ + result = innobase_index_cond(prebuilt->idx_cond); + switch (result) { + case ICP_MATCH: + /* Convert the remaining fields to MySQL format. + If this is a secondary index record, we must defer + this until we have fetched the clustered index record. */ + if (!prebuilt->need_to_access_clustered + || dict_index_is_clust(prebuilt->index)) { + if (!row_sel_store_mysql_rec(mysql_rec, prebuilt, + rec, + FALSE, offsets)) { + ut_ad(dict_index_is_clust(prebuilt->index)); + result = ICP_NO_MATCH; + } + } + /* fall through */ + case ICP_NO_MATCH: + case ICP_OUT_OF_RANGE: + case ICP_ABORTED_BY_USER: + return(result); + default: ; + } + + ut_error; +} + /********************************************************************//** Searches for rows in the database. This is used in the interface to MySQL. This function opens a cursor, and also implements fetch next @@ -3415,10 +3479,8 @@ row_search_for_mysql( mem_heap_t* heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; - ibool some_fields_in_buffer; ibool table_lock_waited = FALSE; ibool problematic_use = FALSE; - ibool get_clust_rec = 0; rec_offs_init(offsets_); @@ -3681,10 +3743,24 @@ row_search_for_mysql( mtr_commit(&mtr). */ ut_ad(!rec_get_deleted_flag(rec, comp)); + if (prebuilt->idx_cond) { + switch (row_search_idx_cond_check( + buf, prebuilt, + rec, offsets)) { + case ICP_NO_MATCH: + case ICP_OUT_OF_RANGE: + case ICP_ABORTED_BY_USER: + goto shortcut_mismatch; + case ICP_MATCH: + goto shortcut_match; + default: ; + } + ut_error; + } + if (!row_sel_store_mysql_rec(buf, prebuilt, rec, FALSE, - offsets, 0, - prebuilt->n_template)) { + offsets)) { /* Only fresh inserts may contain incomplete externally stored columns. Pretend that such @@ -3695,13 +3771,12 @@ row_search_for_mysql( rolling back a recovered transaction. Rollback happens at a lower level, not here. */ - ut_a(trx->isolation_level - == TRX_ISO_READ_UNCOMMITTED); /* Proceed as in case SEL_RETRY. */ break; } + shortcut_match: mtr_commit(&mtr); /* ut_print_name(stderr, index->name); @@ -3713,6 +3788,7 @@ row_search_for_mysql( goto release_search_latch_if_needed; case SEL_EXHAUSTED: + shortcut_mismatch: mtr_commit(&mtr); /* ut_print_name(stderr, index->name); @@ -3804,8 +3880,9 @@ retry_check: if (!prebuilt->sql_stat_start) { /* No need to set an intention lock or assign a read view */ - if (trx->read_view == NULL - && prebuilt->select_lock_type == LOCK_NONE) { + if (UNIV_UNLIKELY + (trx->read_view == NULL + && prebuilt->select_lock_type == LOCK_NONE)) { fputs("InnoDB: Error: MySQL is trying to" " perform a consistent read\n" @@ -4265,6 +4342,16 @@ no_gap_lock: if (UNIV_LIKELY(trx->wait_lock != NULL)) { lock_cancel_waiting_and_release( trx->wait_lock); + mutex_exit(&kernel_mutex); + + if (old_vers == NULL) { + /* The row was not yet committed */ + + goto next_rec; + } + + did_semi_consistent_read = TRUE; + rec = old_vers; } else { mutex_exit(&kernel_mutex); @@ -4275,19 +4362,7 @@ no_gap_lock: offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - err = DB_SUCCESS; - break; } - mutex_exit(&kernel_mutex); - - if (old_vers == NULL) { - /* The row was not yet committed */ - - goto next_rec; - } - - did_semi_consistent_read = TRUE; - rec = old_vers; break; default: @@ -4346,8 +4421,27 @@ no_gap_lock: if (!lock_sec_rec_cons_read_sees( rec, trx->read_view)) { - get_clust_rec = TRUE; - goto idx_cond_check; + /* We should look at the clustered index. + However, as this is a non-locking read, + we can skip the clustered index lookup if + the condition does not match the secondary + index entry. */ + switch (row_search_idx_cond_check( + buf, prebuilt, rec, offsets)) { + case ICP_NO_MATCH: + goto next_rec; + case ICP_OUT_OF_RANGE: + err = DB_RECORD_NOT_FOUND; + goto idx_cond_failed; + case ICP_ABORTED_BY_USER: + err = DB_SEARCH_ABORTED_BY_USER; + goto idx_cond_failed; + case ICP_MATCH: + goto requires_clust_rec; + default: ; + } + + ut_error; } } } @@ -4392,38 +4486,31 @@ no_gap_lock: goto next_rec; } - -idx_cond_check: - if (prebuilt->idx_cond_func) { - int res; - ibool ib_res; - ut_ad(prebuilt->template_type != ROW_MYSQL_DUMMY_TEMPLATE); - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - ib_res= row_sel_store_mysql_rec(buf, prebuilt, rec, FALSE, - offsets, 0, prebuilt->n_index_fields); - /* - The above call will fail and return FALSE when requested to - store an "externally stored column" (afaiu, a blob). Index - Condition Pushdown is not supported for indexes with blob - columns, so we should never get this error. - */ - ut_ad(ib_res); - res= prebuilt->idx_cond_func(prebuilt->idx_cond_func_arg); - if (res == XTRADB_ICP_NO_MATCH) - goto next_rec; - else if (res != XTRADB_ICP_MATCH) { - err= (res == XTRADB_ICP_ABORTED_BY_USER ? - DB_SEARCH_ABORTED_BY_USER : - DB_RECORD_NOT_FOUND); - goto idx_cond_failed; - } - /* res == XTRADB_ICP_MATCH */ - } + /* Check if the record matches the index condition. */ + switch (row_search_idx_cond_check(buf, prebuilt, rec, offsets)) { + case ICP_NO_MATCH: + if (did_semi_consistent_read) { + row_unlock_for_mysql(prebuilt, TRUE); + } + goto next_rec; + case ICP_ABORTED_BY_USER: + err = DB_SEARCH_ABORTED_BY_USER; + goto idx_cond_failed; + case ICP_OUT_OF_RANGE: + err = DB_RECORD_NOT_FOUND; + goto idx_cond_failed; + case ICP_MATCH: + break; + default: + ut_error; + } /* Get the clustered index record if needed, if we did not do the search using the clustered index. */ - if (get_clust_rec || (index != clust_index - && prebuilt->need_to_access_clustered)) { + if (index != clust_index && prebuilt->need_to_access_clustered) { + +requires_clust_rec: + ut_ad(index != clust_index); /* We use a 'goto' to the preceding label if a consistent read of a secondary index record requires us to look up old @@ -4487,6 +4574,19 @@ idx_cond_check: result_rec = clust_rec; ut_ad(rec_offs_validate(result_rec, clust_index, offsets)); + + if (prebuilt->idx_cond) { + /* Convert the remaining fields to + MySQL format. We were unable to do + this in row_search_idx_cond_check(), + because the condition is on the + secondary index and the requested + column is in the clustered index. */ + if (!row_sel_store_mysql_rec(buf, prebuilt, result_rec, + TRUE, offsets)) { + goto next_rec; + } + } } else { result_rec = rec; } @@ -4520,15 +4620,10 @@ idx_cond_check: are BLOBs in the fields to be fetched. In HANDLER we do not cache rows because there the cursor is a scrollable cursor. */ - some_fields_in_buffer = (index != clust_index - && prebuilt->idx_cond_func); - - if (!row_sel_push_cache_row_for_mysql(prebuilt, result_rec, - result_rec != rec, - offsets, - some_fields_in_buffer? - prebuilt->n_index_fields : 0, - buf)) { + + if (!prebuilt->idx_cond + && !row_sel_store_mysql_rec(buf, prebuilt, result_rec, + result_rec != rec, offsets)) { /* Only fresh inserts may contain incomplete externally stored columns. Pretend that such records do not exist. Such records may only be @@ -4536,14 +4631,10 @@ idx_cond_check: level or when rolling back a recovered transaction. Rollback happens at a lower level, not here. */ - ut_a(trx->isolation_level == TRX_ISO_READ_UNCOMMITTED); - } else if (prebuilt->n_fetch_cached - == MYSQL_FETCH_CACHE_SIZE) { - - goto got_row; + goto next_rec; + } else if (row_sel_push_cache_row_for_mysql(buf, prebuilt)) { + goto next_rec; } - - goto next_rec; } else { if (UNIV_UNLIKELY (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE)) { @@ -4564,15 +4655,11 @@ idx_cond_check: rec_offs_size(offsets)); mach_write_to_4(buf, rec_offs_extra_size(offsets) + 4); - } else { - /* Returning a row to MySQL */ - - if (!row_sel_store_mysql_rec(buf, prebuilt, result_rec, - result_rec != rec, - offsets, - prebuilt->idx_cond_func? - prebuilt->n_index_fields: 0, - prebuilt->n_template)) { + } else if (!prebuilt->idx_cond) { + /* The record was not yet converted to MySQL format. */ + if (!row_sel_store_mysql_rec( + buf, prebuilt, + result_rec, result_rec != rec, offsets)) { /* Only fresh inserts may contain incomplete externally stored columns. Pretend that such records do @@ -4581,8 +4668,6 @@ idx_cond_check: isolation level or when rolling back a recovered transaction. Rollback happens at a lower level, not here. */ - ut_a(trx->isolation_level - == TRX_ISO_READ_UNCOMMITTED); goto next_rec; } } @@ -4600,7 +4685,6 @@ idx_cond_check: /* From this point on, 'offsets' are invalid. */ -got_row: /* We have an optimization to save CPU time: if this is a consistent read on a unique condition on the clustered index, then we do not store the pcur position, because any fetch next or prev will anyway @@ -4624,7 +4708,6 @@ idx_cond_failed: next_rec: /* Reset the old and new "did semi-consistent read" flags. */ - get_clust_rec = FALSE; if (UNIV_UNLIKELY(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT)) { prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; @@ -4635,6 +4718,7 @@ next_rec: /*-------------------------------------------------------------*/ /* PHASE 5: Move the cursor to the next index record */ + /*TODO: with ICP, do this when switching pages, every N pages */ if (UNIV_UNLIKELY(mtr_has_extra_clust_latch)) { /* We must commit mtr if we are moving to the next non-clustered index record, because we could break the |