diff options
author | Jimmy Yang <jimmy.yang@oracle.com> | 2011-05-31 02:12:32 -0700 |
---|---|---|
committer | Jimmy Yang <jimmy.yang@oracle.com> | 2011-05-31 02:12:32 -0700 |
commit | 9e2b7fa7d5f0cbe4920be5567314b6de1af660a4 (patch) | |
tree | 21100612140d5618d083e91268a4594a0836953c /storage | |
parent | 53e9aabe126ad73845958818f5872fcd4425588c (diff) | |
download | mariadb-git-9e2b7fa7d5f0cbe4920be5567314b6de1af660a4.tar.gz |
Implement worklog #5743 InnoDB: Lift the limit of index key prefixes.
With this change, the index prefix column length lifted from 767 bytes
to 3072 bytes if "innodb_large_prefix" is set to "true".
rb://603 approved by Marko
Diffstat (limited to 'storage')
22 files changed, 287 insertions, 83 deletions
diff --git a/storage/innobase/data/data0data.c b/storage/innobase/data/data0data.c index 0ef0cfa554a..6d07fc249fa 100644 --- a/storage/innobase/data/data0data.c +++ b/storage/innobase/data/data0data.c @@ -585,7 +585,8 @@ dtuple_convert_big_rec( if (dict_table_get_format(index->table) < DICT_TF_FORMAT_ZIP) { /* up to MySQL 5.1: store a 768-byte prefix locally */ - local_len = BTR_EXTERN_FIELD_REF_SIZE + DICT_MAX_INDEX_COL_LEN; + local_len = BTR_EXTERN_FIELD_REF_SIZE + + DICT_ANTELOPE_MAX_INDEX_COL_LEN; } else { /* new-format table: do not store any BLOB prefix locally */ local_len = BTR_EXTERN_FIELD_REF_SIZE; @@ -757,7 +758,10 @@ dtuple_convert_back_big_rec( local_len -= BTR_EXTERN_FIELD_REF_SIZE; - ut_ad(local_len <= DICT_MAX_INDEX_COL_LEN); + /* Only in REDUNDANT and COMPACT format, we store + up to DICT_ANTELOPE_MAX_INDEX_COL_LEN (768) bytes + locally */ + ut_ad(local_len <= DICT_ANTELOPE_MAX_INDEX_COL_LEN); dfield_set_data(dfield, (char*) b->data - local_len, diff --git a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c index df9db4d7428..1e3aed92cf7 100644 --- a/storage/innobase/dict/dict0dict.c +++ b/storage/innobase/dict/dict0dict.c @@ -1352,36 +1352,63 @@ dict_index_too_big_for_undo( ulint fixed_size = dict_col_get_fixed_size(col, dict_table_is_comp(table)); + ulint max_prefix + = col->max_prefix; if (fixed_size) { /* Fixed-size columns are stored locally. */ max_size = fixed_size; } else if (max_size <= BTR_EXTERN_FIELD_REF_SIZE * 2) { /* Short columns are stored locally. */ - } else if (!col->ord_part) { + } else if (!col->ord_part + || (col->max_prefix + < (ulint) DICT_MAX_FIELD_LEN_BY_FORMAT(table))) { /* See if col->ord_part would be set - because of new_index. */ + because of new_index. Also check if the new + index could have longer prefix on columns + that already had ord_part set */ ulint j; for (j = 0; j < new_index->n_uniq; j++) { if (dict_index_get_nth_col( new_index, j) == col) { + const dict_field_t* field + = dict_index_get_nth_field( + new_index, j); + + if (field->prefix_len + > col->max_prefix) { + max_prefix = + field->prefix_len; + } goto is_ord_part; } } + if (col->ord_part) { + goto is_ord_part; + } + /* This is not an ordering column in any index. Thus, it can be stored completely externally. */ max_size = BTR_EXTERN_FIELD_REF_SIZE; } else { + ulint max_field_len; is_ord_part: + max_field_len = DICT_MAX_FIELD_LEN_BY_FORMAT(table); + /* This is an ordering column in some index. A long enough prefix must be written to the undo log. See trx_undo_page_fetch_ext(). */ + max_size = ut_min(max_size, max_field_len); + + /* We only store the needed prefix length in undo log */ + if (max_prefix) { + ut_ad(dict_table_get_format(table) + >= DICT_TF_FORMAT_ZIP); - if (max_size > REC_MAX_INDEX_COL_LEN) { - max_size = REC_MAX_INDEX_COL_LEN; + max_size = ut_min(max_prefix, max_size); } max_size += BTR_EXTERN_FIELD_REF_SIZE; @@ -1635,15 +1662,16 @@ too_big: /* In dtuple_convert_big_rec(), variable-length columns that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2 may be chosen for external storage. If the column appears - in an ordering column of an index, a longer prefix of - REC_MAX_INDEX_COL_LEN will be copied to the undo log - by trx_undo_page_report_modify() and + in an ordering column of an index, a longer prefix determined + by dict_max_field_len_store_undo() will be copied to the undo + log by trx_undo_page_report_modify() and trx_undo_page_fetch_ext(). It suffices to check the capacity of the undo log whenever new_index includes a column prefix on a column that may be stored externally. */ if (field->prefix_len /* prefix index */ - && !col->ord_part /* not yet ordering column */ + && (!col->ord_part /* not yet ordering column */ + || field->prefix_len > col->max_prefix) && !dict_col_get_fixed_size(col, TRUE) /* variable-length */ && dict_col_get_max_size(col) > BTR_EXTERN_FIELD_REF_SIZE * 2 /* long enough */) { @@ -1660,11 +1688,17 @@ too_big: } undo_size_ok: - /* Flag the ordering columns */ + /* Flag the ordering columns and also set column max_prefix */ for (i = 0; i < n_ord; i++) { + const dict_field_t* field + = dict_index_get_nth_field(new_index, i); - dict_index_get_nth_field(new_index, i)->col->ord_part = 1; + field->col->ord_part = 1; + + if (field->prefix_len > field->col->max_prefix) { + field->col->max_prefix = field->prefix_len; + } } /* Add the new index as the last index for the table */ @@ -1867,14 +1901,14 @@ dict_index_add_col( variable-length fields, so that the extern flag can be embedded in the length word. */ - if (field->fixed_len > DICT_MAX_INDEX_COL_LEN) { + if (field->fixed_len > DICT_MAX_FIXED_COL_LEN) { field->fixed_len = 0; } -#if DICT_MAX_INDEX_COL_LEN != 768 +#if DICT_MAX_FIXED_COL_LEN != 768 /* The comparison limit above must be constant. If it were changed, the disk format of some fixed-length columns would change, which would be a disaster. */ -# error "DICT_MAX_INDEX_COL_LEN != 768" +# error "DICT_MAX_FIXED_COL_LEN != 768" #endif if (!(col->prtype & DATA_NOT_NULL)) { diff --git a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c index 7d531c26157..ab1fb16361e 100644 --- a/storage/innobase/dict/dict0load.c +++ b/storage/innobase/dict/dict0load.c @@ -1103,7 +1103,7 @@ err_len: goto err_len; } - if (prefix_len >= DICT_MAX_INDEX_COL_LEN) { + if (prefix_len > REC_VERSION_56_MAX_INDEX_COL_LEN) { if (addition_err_str) { ut_snprintf(addition_err_str, err_str_len, "index field '%s' has a prefix length" @@ -1205,7 +1205,7 @@ dict_load_fields( " innodb_force_recovery to load" " the table\n", index->name, addition_err_str, - (ulong) (DICT_MAX_INDEX_COL_LEN - 1)); + (ulong) (REC_VERSION_56_MAX_INDEX_COL_LEN)); } else { fprintf(stderr, "InnoDB: %s\n", err_msg); diff --git a/storage/innobase/dict/dict0mem.c b/storage/innobase/dict/dict0mem.c index 8785dfb57ed..982cca5a796 100644 --- a/storage/innobase/dict/dict0mem.c +++ b/storage/innobase/dict/dict0mem.c @@ -232,6 +232,7 @@ dict_mem_fill_column_struct( column->ind = (unsigned int) col_pos; column->ord_part = 0; + column->max_prefix = 0; column->mtype = (unsigned int) mtype; column->prtype = (unsigned int) prtype; column->len = (unsigned int) col_len; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 8efa0523927..34ee3c75946 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -166,6 +166,7 @@ static my_bool innobase_locks_unsafe_for_binlog = FALSE; static my_bool innobase_rollback_on_timeout = FALSE; static my_bool innobase_create_status_file = FALSE; static my_bool innobase_stats_on_metadata = TRUE; +static my_bool innobase_large_prefix = FALSE; static char* internal_innobase_data_file_path = NULL; @@ -904,7 +905,7 @@ int convert_error_code_to_mysql( /*========================*/ int error, /*!< in: InnoDB error code */ - ulint flags, /*!< in: InnoDB table flags, or 0 */ + ulint flags, /*!< in: InnoDB table flags, or 0 */ THD* thd) /*!< in: user thread handle or NULL */ { switch (error) { @@ -1008,6 +1009,11 @@ convert_error_code_to_mysql( & DICT_TF_COMPACT) / 2); return(HA_ERR_TO_BIG_ROW); + case DB_TOO_BIG_INDEX_COL: + my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0), + DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags)); + return(HA_ERR_INDEX_COL_TOO_LONG); + case DB_NO_SAVEPOINT: return(HA_ERR_NO_SAVEPOINT); @@ -3946,7 +3952,11 @@ UNIV_INTERN uint ha_innobase::max_supported_key_part_length() const { - return(DICT_MAX_INDEX_COL_LEN - 1); + /* A table format specific index column length check will be performed + at ha_innobase::add_index() and row_create_index_for_mysql() */ + return(innobase_large_prefix + ? REC_VERSION_56_MAX_INDEX_COL_LEN + : REC_ANTELOPE_MAX_INDEX_COL_LEN - 1); } /******************************************************************//** @@ -7010,8 +7020,8 @@ ha_innobase::create( if (i != (uint) primary_key_no) { - if ((error = create_index(trx, form, flags, norm_name, - i))) { + if ((error = create_index(trx, form, flags, + norm_name, i))) { goto cleanup; } } @@ -11076,6 +11086,11 @@ static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "With which method to flush data.", NULL, NULL, NULL); +static MYSQL_SYSVAR_BOOL(large_prefix, innobase_large_prefix, + PLUGIN_VAR_NOCMDARG, + "Support large index prefix length of REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes.", + NULL, NULL, FALSE); + static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, "Force InnoDB to not use next-key locking, to use only row-level locking.", @@ -11329,6 +11344,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(flush_log_at_trx_commit), MYSQL_SYSVAR(flush_method), MYSQL_SYSVAR(force_recovery), + MYSQL_SYSVAR(large_prefix), MYSQL_SYSVAR(locks_unsafe_for_binlog), MYSQL_SYSVAR(lock_wait_timeout), #ifdef UNIV_LOG_ARCHIVE diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index cc7e48ebd44..52607a49bac 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -539,7 +539,7 @@ innobase_create_key_def( if (!new_primary && (key_info->flags & HA_NOSAME) && (!(key_info->flags & HA_KEY_HAS_PART_KEY_SEG)) && row_table_got_default_clust_index(table)) { - uint key_part = key_info->key_parts; + uint key_part = key_info->key_parts; new_primary = TRUE; @@ -595,6 +595,27 @@ innobase_create_key_def( } /*******************************************************************//** +Check each index column size, make sure they do not exceed the max limit +@return HA_ERR_INDEX_COL_TOO_LONG if index column size exceeds limit */ +static +int +innobase_check_column_length( +/*=========================*/ + const dict_table_t*table, /*!< in: table definition */ + const KEY* key_info) /*!< in: Indexes to be created */ +{ + ulint max_col_len = DICT_MAX_FIELD_LEN_BY_FORMAT(table); + + for (ulint key_part = 0; key_part < key_info->key_parts; key_part++) { + if (key_info->key_part[key_part].length > max_col_len) { + my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0), max_col_len); + return(HA_ERR_INDEX_COL_TOO_LONG); + } + } + return(0); +} + +/*******************************************************************//** Create a temporary tablename using query id, thread id, and id @return temporary tablename */ static @@ -676,6 +697,17 @@ ha_innobase::add_index( DBUG_RETURN(error); } + /* Check each index's column length to make sure they do not + exceed limit */ + for (ulint i = 0; i < num_of_keys; i++) { + error = innobase_check_column_length(innodb_table, + &key_info[i]); + + if (error) { + DBUG_RETURN(error); + } + } + heap = mem_heap_create(1024); trx_start_if_not_started(prebuilt->trx); diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h index 8a71fa6511a..74a2354bce3 100644 --- a/storage/innobase/include/db0err.h +++ b/storage/innobase/include/db0err.h @@ -110,6 +110,8 @@ enum db_err { DB_PARENT_NO_INDEX, /* the parent table does not have an index that contains the foreign keys as its prefix columns */ + DB_TOO_BIG_INDEX_COL, /* index column size exceeds maximum + limit */ /* The following are partial failure codes */ DB_FAIL = 1000, diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index d6f2bebae3a..f979d0fcc96 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -136,6 +136,19 @@ dict_col_copy_type( /*===============*/ const dict_col_t* col, /*!< in: column */ dtype_t* type); /*!< out: data type */ +/**********************************************************************//** +Determine bytes of column prefix to be stored in the undo log. Please +note if the table format is UNIV_FORMAT_A (< DICT_TF_FORMAT_ZIP), no prefix +needs to be stored in the undo log. +@return bytes of column prefix to be stored in the undo log */ +UNIV_INLINE +ulint +dict_max_field_len_store_undo( +/*==========================*/ + dict_table_t* table, /*!< in: table */ + const dict_col_t* col); /*!< in: column which index prefix + is based on */ + #endif /* !UNIV_HOTBACKUP */ #ifdef UNIV_DEBUG /*********************************************************************//** diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic index 59606af7056..59811568556 100644 --- a/storage/innobase/include/dict0dict.ic +++ b/storage/innobase/include/dict0dict.ic @@ -911,4 +911,30 @@ dict_table_get_on_id_low( return(table); } + +/**********************************************************************//** +Determine bytes of column prefix to be stored in the undo log. Please +note if the table format is UNIV_FORMAT_A (< DICT_TF_FORMAT_ZIP), no prefix +needs to be stored in the undo log. +@return bytes of column prefix to be stored in the undo log */ +UNIV_INLINE +ulint +dict_max_field_len_store_undo( +/*==========================*/ + dict_table_t* table, /*!< in: table */ + const dict_col_t* col) /*!< in: column which index prefix + is based on */ +{ + ulint prefix_len = 0; + + if (dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP) + { + prefix_len = col->max_prefix + ? col->max_prefix + : DICT_MAX_FIELD_LEN_BY_FORMAT(table); + } + + return(prefix_len); +} + #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 51c9c2d1797..3a475fa85fc 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -302,32 +302,58 @@ struct dict_col_struct{ unsigned ord_part:1; /*!< nonzero if this column appears in the ordering fields of an index */ + unsigned max_prefix:12; /*!< maximum index prefix length on + this column. Our current max limit is + 3072 for Barracuda table */ }; -/** @brief DICT_MAX_INDEX_COL_LEN is measured in bytes and is the maximum -indexed column length (or indexed prefix length). +/** @brief DICT_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and +is the maximum indexed column length (or indexed prefix length) in +ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT. Also, in any format, +any fixed-length field that is longer than this will be encoded as +a variable-length field. It is set to 3*256, so that one can create a column prefix index on 256 characters of a TEXT or VARCHAR column also in the UTF-8 charset. In that charset, a character may take at most 3 bytes. This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data files would be at risk! */ -#define DICT_MAX_INDEX_COL_LEN REC_MAX_INDEX_COL_LEN +#define DICT_ANTELOPE_MAX_INDEX_COL_LEN REC_ANTELOPE_MAX_INDEX_COL_LEN + +/** Find out maximum indexed column length by its table format. +For ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT, the maximum +field length is REC_ANTELOPE_MAX_INDEX_COL_LEN - 1 (767). For new +barracuda format, the length could be REC_VERSION_56_MAX_INDEX_COL_LEN +(3072) bytes */ +#define DICT_MAX_FIELD_LEN_BY_FORMAT(table) \ + ((dict_table_get_format(table) < DICT_TF_FORMAT_ZIP) \ + ? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1) \ + : REC_VERSION_56_MAX_INDEX_COL_LEN) + +#define DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags) \ + ((((flags & DICT_TF_FORMAT_MASK) >> DICT_TF_FORMAT_SHIFT)\ + < DICT_TF_FORMAT_ZIP) \ + ? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1) \ + : REC_VERSION_56_MAX_INDEX_COL_LEN) + +/** Defines the maximum fixed length column size */ +#define DICT_MAX_FIXED_COL_LEN DICT_ANTELOPE_MAX_INDEX_COL_LEN /** Data structure for a field in an index */ struct dict_field_struct{ dict_col_t* col; /*!< pointer to the table column */ const char* name; /*!< name of the column */ - unsigned prefix_len:10; /*!< 0 or the length of the column + unsigned prefix_len:12; /*!< 0 or the length of the column prefix in bytes in a MySQL index of type, e.g., INDEX (textcol(25)); must be smaller than - DICT_MAX_INDEX_COL_LEN; NOTE that - in the UTF-8 charset, MySQL sets this - to 3 * the prefix len in UTF-8 chars */ + DICT_MAX_FIELD_LEN_BY_FORMAT; + NOTE that in the UTF-8 charset, MySQL + sets this to (mbmaxlen * the prefix len) + in UTF-8 chars */ unsigned fixed_len:10; /*!< 0 or the fixed length of the column if smaller than - DICT_MAX_INDEX_COL_LEN */ + DICT_ANTELOPE_MAX_INDEX_COL_LEN */ }; /** Data structure for an index. Most fields will be diff --git a/storage/innobase/include/rem0types.h b/storage/innobase/include/rem0types.h index 8b84d4af233..7afd595be90 100644 --- a/storage/innobase/include/rem0types.h +++ b/storage/innobase/include/rem0types.h @@ -34,13 +34,21 @@ typedef byte rec_t; #define REC_MAX_HEAP_NO (2 * 8192 - 1) #define REC_MAX_N_OWNED (16 - 1) -/* REC_MAX_INDEX_COL_LEN is measured in bytes and is the maximum -indexed column length (or indexed prefix length). It is set to 3*256, -so that one can create a column prefix index on 256 characters of a -TEXT or VARCHAR column also in the UTF-8 charset. In that charset, -a character may take at most 3 bytes. +/* REC_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and is the maximum +indexed field length (or indexed prefix length) for indexes on tables of +ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT format. +Before we support UTF-8 encodings with mbmaxlen = 4, a UTF-8 character +may take at most 3 bytes. So the limit was set to 3*256, so that one +can create a column prefix index on 256 characters of a TEXT or VARCHAR +column also in the UTF-8 charset. This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data files would be at risk! */ -#define REC_MAX_INDEX_COL_LEN 768 +#define REC_ANTELOPE_MAX_INDEX_COL_LEN 768 + +/** Maximum indexed field length for table format DICT_TF_FORMAT_ZIP and +beyond. +This (3072) is the maximum index row length allowed, so we cannot create index +prefix column longer than that. */ +#define REC_VERSION_56_MAX_INDEX_COL_LEN 3072 #endif diff --git a/storage/innobase/include/row0ext.h b/storage/innobase/include/row0ext.h index 43d82d644e6..557da2c4a82 100644 --- a/storage/innobase/include/row0ext.h +++ b/storage/innobase/include/row0ext.h @@ -30,6 +30,7 @@ Created September 2006 Marko Makela #include "row0types.h" #include "data0types.h" #include "mem0mem.h" +#include "dict0types.h" /********************************************************************//** Creates a cache of column prefixes of externally stored columns. @@ -43,13 +44,13 @@ row_ext_create( in the InnoDB table object, as reported by dict_col_get_no(); NOT relative to the records in the clustered index */ + ulint flags, /*!< in: table->flags */ const dtuple_t* tuple, /*!< in: data tuple containing the field references of the externally stored columns; must be indexed by col_no; the clustered index record must be covered by a lock or a page latch to prevent deletion (rollback or purge). */ - ulint zip_size,/*!< compressed page size in bytes, or 0 */ mem_heap_t* heap); /*!< in: heap where created */ /********************************************************************//** @@ -63,7 +64,8 @@ row_ext_lookup_ith( const row_ext_t* ext, /*!< in/out: column prefix cache */ ulint i, /*!< in: index of ext->ext[] */ ulint* len); /*!< out: length of prefix, in bytes, - at most REC_MAX_INDEX_COL_LEN */ + at most the length determined by + DICT_MAX_FIELD_LEN_BY_FORMAT() */ /********************************************************************//** Looks up a column prefix of an externally stored column. @return column prefix, or NULL if the column is not stored externally, @@ -78,13 +80,18 @@ row_ext_lookup( dict_col_get_no(); NOT relative to the records in the clustered index */ ulint* len); /*!< out: length of prefix, in bytes, - at most REC_MAX_INDEX_COL_LEN */ + at most the length determined by + DICT_MAX_FIELD_LEN_BY_FORMAT() */ /** Prefixes of externally stored columns */ struct row_ext_struct{ ulint n_ext; /*!< number of externally stored columns */ const ulint* ext; /*!< col_no's of externally stored columns */ byte* buf; /*!< backing store of the column prefix cache */ + ulint max_len;/*!< maximum prefix length, it could be + REC_ANTELOPE_MAX_INDEX_COL_LEN or + REC_VERSION_56_MAX_INDEX_COL_LEN depending + on row format */ ulint len[1]; /*!< prefix lengths; 0 if not cached */ }; diff --git a/storage/innobase/include/row0ext.ic b/storage/innobase/include/row0ext.ic index 82771a9312a..466046b2821 100644 --- a/storage/innobase/include/row0ext.ic +++ b/storage/innobase/include/row0ext.ic @@ -37,7 +37,7 @@ row_ext_lookup_ith( const row_ext_t* ext, /*!< in/out: column prefix cache */ ulint i, /*!< in: index of ext->ext[] */ ulint* len) /*!< out: length of prefix, in bytes, - at most REC_MAX_INDEX_COL_LEN */ + at most ext->max_len */ { ut_ad(ext); ut_ad(len); @@ -45,11 +45,14 @@ row_ext_lookup_ith( *len = ext->len[i]; + ut_ad(*len <= ext->max_len); + ut_ad(ext->max_len > 0); + if (UNIV_UNLIKELY(*len == 0)) { /* The BLOB could not be fetched to the cache. */ return(field_ref_zero); } else { - return(ext->buf + i * REC_MAX_INDEX_COL_LEN); + return(ext->buf + i * ext->max_len); } } @@ -67,7 +70,7 @@ row_ext_lookup( dict_col_get_no(); NOT relative to the records in the clustered index */ ulint* len) /*!< out: length of prefix, in bytes, - at most REC_MAX_INDEX_COL_LEN */ + at most ext->max_len */ { ulint i; diff --git a/storage/innobase/page/page0zip.c b/storage/innobase/page/page0zip.c index f077a56b087..c92ad9dc25b 100644 --- a/storage/innobase/page/page0zip.c +++ b/storage/innobase/page/page0zip.c @@ -464,7 +464,7 @@ page_zip_fields_encode( if (fixed_sum && UNIV_UNLIKELY (fixed_sum + field->fixed_len - > DICT_MAX_INDEX_COL_LEN)) { + > DICT_MAX_FIXED_COL_LEN)) { /* Write out the length of the preceding non-nullable fields, to avoid exceeding the maximum diff --git a/storage/innobase/rem/rem0rec.c b/storage/innobase/rem/rem0rec.c index a54b5013155..5a96e608ab5 100644 --- a/storage/innobase/rem/rem0rec.c +++ b/storage/innobase/rem/rem0rec.c @@ -1174,7 +1174,7 @@ rec_convert_dtuple_to_rec_comp( } else if (dfield_is_ext(field)) { ut_ad(ifield->col->len >= 256 || ifield->col->mtype == DATA_BLOB); - ut_ad(len <= REC_MAX_INDEX_COL_LEN + ut_ad(len <= REC_ANTELOPE_MAX_INDEX_COL_LEN + BTR_EXTERN_FIELD_REF_SIZE); *lens-- = (byte) (len >> 8) | 0xc0; *lens-- = (byte) len; diff --git a/storage/innobase/row/row0ext.c b/storage/innobase/row/row0ext.c index 7320f5b1dca..07e970cf485 100644 --- a/storage/innobase/row/row0ext.c +++ b/storage/innobase/row/row0ext.c @@ -44,8 +44,9 @@ row_ext_cache_fill( { const byte* field = dfield_get_data(dfield); ulint f_len = dfield_get_len(dfield); - byte* buf = ext->buf + i * REC_MAX_INDEX_COL_LEN; + byte* buf = ext->buf + i * ext->max_len; + ut_ad(ext->max_len > 0); ut_ad(i < ext->n_ext); ut_ad(dfield_is_ext(dfield)); ut_a(f_len >= BTR_EXTERN_FIELD_REF_SIZE); @@ -56,14 +57,14 @@ row_ext_cache_fill( /* The BLOB pointer is not set: we cannot fetch it */ ext->len[i] = 0; } else { - /* Fetch at most REC_MAX_INDEX_COL_LEN of the column. + /* Fetch at most ext->max_len of the column. The column should be non-empty. However, trx_rollback_or_clean_all_recovered() may try to access a half-deleted BLOB if the server previously crashed during the execution of btr_free_externally_stored_field(). */ ext->len[i] = btr_copy_externally_stored_field_prefix( - buf, REC_MAX_INDEX_COL_LEN, zip_size, field, f_len); + buf, ext->max_len, zip_size, field, f_len); } } @@ -79,16 +80,18 @@ row_ext_create( in the InnoDB table object, as reported by dict_col_get_no(); NOT relative to the records in the clustered index */ + ulint flags, /*!< in: table->flags */ const dtuple_t* tuple, /*!< in: data tuple containing the field references of the externally stored columns; must be indexed by col_no; the clustered index record must be covered by a lock or a page latch to prevent deletion (rollback or purge). */ - ulint zip_size,/*!< compressed page size in bytes, or 0 */ mem_heap_t* heap) /*!< in: heap where created */ { ulint i; + ulint zip_size = dict_table_flags_to_zip_size(flags); + row_ext_t* ret = mem_heap_alloc(heap, (sizeof *ret) + (n_ext - 1) * sizeof ret->len); @@ -97,10 +100,12 @@ row_ext_create( ret->n_ext = n_ext; ret->ext = ext; - ret->buf = mem_heap_alloc(heap, n_ext * REC_MAX_INDEX_COL_LEN); + ret->max_len = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags); + + ret->buf = mem_heap_alloc(heap, n_ext * ret->max_len); #ifdef UNIV_DEBUG - memset(ret->buf, 0xaa, n_ext * REC_MAX_INDEX_COL_LEN); - UNIV_MEM_ALLOC(ret->buf, n_ext * REC_MAX_INDEX_COL_LEN); + memset(ret->buf, 0xaa, n_ext * ret->max_len); + UNIV_MEM_ALLOC(ret->buf, n_ext * ret->max_len); #endif /* Fetch the BLOB prefixes */ diff --git a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c index aef4595f5fe..e1ada387729 100644 --- a/storage/innobase/row/row0mysql.c +++ b/storage/innobase/row/row0mysql.c @@ -1997,6 +1997,7 @@ row_create_index_for_mysql( ulint i; ulint len; char* table_name; + dict_table_t* table; #ifdef UNIV_SYNC_DEBUG ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); @@ -2010,6 +2011,8 @@ row_create_index_for_mysql( que_run_threads()) and thus index->table_name is not available. */ table_name = mem_strdup(index->table_name); + table = dict_table_get_low(table_name); + trx_start_if_not_started(trx); /* Check that the same column does not appear twice in the index. @@ -2042,7 +2045,7 @@ row_create_index_for_mysql( } /* Check also that prefix_len and actual length - < DICT_MAX_INDEX_COL_LEN */ + is less than that from DICT_MAX_FIELD_LEN_BY_FORMAT() */ len = dict_index_get_nth_field(index, i)->prefix_len; @@ -2050,8 +2053,9 @@ row_create_index_for_mysql( len = ut_max(len, field_lengths[i]); } - if (len >= DICT_MAX_INDEX_COL_LEN) { - err = DB_TOO_BIG_RECORD; + /* Column or prefix length exceeds maximum column length */ + if (len > (ulint) DICT_MAX_FIELD_LEN_BY_FORMAT(table)) { + err = DB_TOO_BIG_INDEX_COL; goto error_handling; } @@ -2076,6 +2080,7 @@ row_create_index_for_mysql( que_graph_free((que_t*) que_node_get_parent(thr)); error_handling: + if (err != DB_SUCCESS) { /* We have special error handling here */ diff --git a/storage/innobase/row/row0row.c b/storage/innobase/row/row0row.c index 58b4ca6bd87..8aa4195a70f 100644 --- a/storage/innobase/row/row0row.c +++ b/storage/innobase/row/row0row.c @@ -122,8 +122,6 @@ row_build_index_entry( } else if (dfield_is_ext(dfield)) { ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); len -= BTR_EXTERN_FIELD_REF_SIZE; - ut_a(ind_field->prefix_len <= len - || dict_index_is_clust(index)); } len = dtype_get_at_most_n_mbchars( @@ -272,8 +270,7 @@ row_build( ut_ad(dict_table_get_format(index->table) < DICT_TF_FORMAT_ZIP); } else if (j) { - *ext = row_ext_create(j, ext_cols, row, - dict_table_zip_size(index->table), + *ext = row_ext_create(j, ext_cols, index->table->flags, row, heap); } else { *ext = NULL; diff --git a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c index 66aff528f38..53d0c2ec232 100644 --- a/storage/innobase/row/row0sel.c +++ b/storage/innobase/row/row0sel.c @@ -99,10 +99,12 @@ row_sel_sec_rec_is_for_blob( ulint clust_len, /*!< in: length of clust_field */ const byte* sec_field, /*!< in: column in secondary index */ ulint sec_len, /*!< in: length of sec_field */ - ulint zip_size) /*!< in: compressed page size, or 0 */ + dict_table_t* table) /*!< in: table */ { ulint len; - byte buf[DICT_MAX_INDEX_COL_LEN]; + byte buf[REC_VERSION_56_MAX_INDEX_COL_LEN]; + ulint zip_size = dict_table_flags_to_zip_size(table->flags); + ulint max_prefix_len = DICT_MAX_FIELD_LEN_BY_FORMAT(table); ut_a(clust_len >= BTR_EXTERN_FIELD_REF_SIZE); @@ -116,7 +118,7 @@ row_sel_sec_rec_is_for_blob( return(FALSE); } - len = btr_copy_externally_stored_field_prefix(buf, sizeof buf, + len = btr_copy_externally_stored_field_prefix(buf, max_prefix_len, zip_size, clust_field, clust_len); @@ -222,8 +224,7 @@ row_sel_sec_rec_is_for_clust_rec( col->mbminmaxlen, clust_field, clust_len, sec_field, sec_len, - dict_table_zip_size( - clust_index->table))) { + clust_index->table)) { goto inequal; } diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c index ea71a16bed2..04b3dcb3a4a 100644 --- a/storage/innobase/row/row0upd.c +++ b/storage/innobase/row/row0upd.c @@ -1211,8 +1211,8 @@ row_upd_replace( } if (n_ext_cols) { - *ext = row_ext_create(n_ext_cols, ext_cols, row, - dict_table_zip_size(table), heap); + *ext = row_ext_create(n_ext_cols, ext_cols, table->flags, row, + heap); } else { *ext = NULL; } diff --git a/storage/innobase/trx/trx0rec.c b/storage/innobase/trx/trx0rec.c index e7e9a008db4..5a3b79d0ad5 100644 --- a/storage/innobase/trx/trx0rec.c +++ b/storage/innobase/trx/trx0rec.c @@ -351,10 +351,10 @@ trx_undo_rec_get_col_val( ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE); ut_ad(*len > *orig_len); /* @see dtuple_convert_big_rec() */ - ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE * 2); + ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE); /* we do not have access to index->table here ut_ad(dict_table_get_format(index->table) >= DICT_TF_FORMAT_ZIP - || *len >= REC_MAX_INDEX_COL_LEN + || *len >= col->max_prefix + BTR_EXTERN_FIELD_REF_SIZE); */ @@ -456,9 +456,10 @@ static byte* trx_undo_page_fetch_ext( /*====================*/ - byte* ext_buf, /*!< in: a buffer of - REC_MAX_INDEX_COL_LEN - + BTR_EXTERN_FIELD_REF_SIZE */ + byte* ext_buf, /*!< in: buffer to hold the prefix + data and BLOB pointer */ + ulint prefix_len, /*!< in: prefix size to store + in the undo log */ ulint zip_size, /*!< compressed page size in bytes, or 0 for uncompressed BLOB */ const byte* field, /*!< in: an externally stored column */ @@ -467,7 +468,7 @@ trx_undo_page_fetch_ext( { /* Fetch the BLOB. */ ulint ext_len = btr_copy_externally_stored_field_prefix( - ext_buf, REC_MAX_INDEX_COL_LEN, zip_size, field, *len); + ext_buf, prefix_len, zip_size, field, *len); /* BLOBs should always be nonempty. */ ut_a(ext_len); /* Append the BLOB pointer to the prefix. */ @@ -488,10 +489,11 @@ trx_undo_page_report_modify_ext( byte* ptr, /*!< in: undo log position, at least 15 bytes must be available */ byte* ext_buf, /*!< in: a buffer of - REC_MAX_INDEX_COL_LEN - + BTR_EXTERN_FIELD_REF_SIZE, + DICT_MAX_FIELD_LEN_BY_FORMAT() size, or NULL when should not fetch a longer prefix */ + ulint prefix_len, /*!< prefix size to store in the + undo log */ ulint zip_size, /*!< compressed page size in bytes, or 0 for uncompressed BLOB */ const byte** field, /*!< in/out: the locally stored part of @@ -499,6 +501,8 @@ trx_undo_page_report_modify_ext( ulint* len) /*!< in/out: length of field, in bytes */ { if (ext_buf) { + ut_a(prefix_len > 0); + /* If an ordering column is externally stored, we will have to store a longer prefix of the field. In this case, write to the log a marker followed by the @@ -507,7 +511,7 @@ trx_undo_page_report_modify_ext( ptr += mach_write_compressed(ptr, *len); - *field = trx_undo_page_fetch_ext(ext_buf, zip_size, + *field = trx_undo_page_fetch_ext(ext_buf, prefix_len, zip_size, *field, len); ptr += mach_write_compressed(ptr, *len); @@ -553,7 +557,7 @@ trx_undo_page_report_modify( ulint i; trx_id_t trx_id; ibool ignore_prefix = FALSE; - byte ext_buf[REC_MAX_INDEX_COL_LEN + byte ext_buf[REC_VERSION_56_MAX_INDEX_COL_LEN + BTR_EXTERN_FIELD_REF_SIZE]; ut_a(dict_index_is_clust(index)); @@ -665,6 +669,7 @@ trx_undo_page_report_modify( /* Save to the undo log the old values of the columns to be updated. */ if (update) { + if (trx_undo_left(undo_page, ptr) < 5) { return(0); @@ -693,13 +698,21 @@ trx_undo_page_report_modify( } if (rec_offs_nth_extern(offsets, pos)) { + const dict_col_t* col + = dict_index_get_nth_col(index, pos); + ulint prefix_len + = dict_max_field_len_store_undo( + table, col); + + ut_ad(prefix_len + BTR_EXTERN_FIELD_REF_SIZE + <= sizeof ext_buf); + ptr = trx_undo_page_report_modify_ext( ptr, - dict_index_get_nth_col(index, pos) - ->ord_part + col->ord_part && !ignore_prefix - && flen < REC_MAX_INDEX_COL_LEN - ? ext_buf : NULL, + && flen < REC_ANTELOPE_MAX_INDEX_COL_LEN + ? ext_buf : NULL, prefix_len, dict_table_zip_size(table), &field, &flen); @@ -778,11 +791,20 @@ trx_undo_page_report_modify( &flen); if (rec_offs_nth_extern(offsets, pos)) { + const dict_col_t* col = + dict_index_get_nth_col( + index, pos); + ulint prefix_len = + dict_max_field_len_store_undo( + table, col); + + ut_a(prefix_len < sizeof ext_buf); + ptr = trx_undo_page_report_modify_ext( ptr, - flen < REC_MAX_INDEX_COL_LEN + flen < REC_ANTELOPE_MAX_INDEX_COL_LEN && !ignore_prefix - ? ext_buf : NULL, + ? ext_buf : NULL, prefix_len, dict_table_zip_size(table), &field, &flen); } else { @@ -1082,11 +1104,11 @@ trx_undo_rec_get_partial_row( undo log record. */ if (!ignore_prefix && col->ord_part) { ut_a(dfield_get_len(dfield) - >= 2 * BTR_EXTERN_FIELD_REF_SIZE); + >= BTR_EXTERN_FIELD_REF_SIZE); ut_a(dict_table_get_format(index->table) >= DICT_TF_FORMAT_ZIP || dfield_get_len(dfield) - >= REC_MAX_INDEX_COL_LEN + >= REC_ANTELOPE_MAX_INDEX_COL_LEN + BTR_EXTERN_FIELD_REF_SIZE); } } diff --git a/storage/innobase/ut/ut0ut.c b/storage/innobase/ut/ut0ut.c index cd0894b132a..a9c0d381e16 100644 --- a/storage/innobase/ut/ut0ut.c +++ b/storage/innobase/ut/ut0ut.c @@ -662,6 +662,8 @@ ut_strerr( return("Table is being used"); case DB_TOO_BIG_RECORD: return("Record too big"); + case DB_TOO_BIG_INDEX_COL: + return("Index columns size too big"); case DB_LOCK_WAIT_TIMEOUT: return("Lock wait timeout"); case DB_NO_REFERENCED_ROW: |