diff options
author | Sergei Golubchik <vuvova@gmail.com> | 2015-05-04 19:15:28 +0200 |
---|---|---|
committer | Sergei Golubchik <vuvova@gmail.com> | 2015-05-04 19:15:28 +0200 |
commit | 14a142fca67b9e1fb3f0250fda093f5b967f0138 (patch) | |
tree | dd49e0666c863d80b5c50642e36a9c945ea12b8a /storage/xtradb/dict | |
parent | dfb001edcd4b16bd4370b08b0176df78c4c5523f (diff) | |
download | mariadb-git-14a142fca67b9e1fb3f0250fda093f5b967f0138.tar.gz |
move to storage/xtradb
Diffstat (limited to 'storage/xtradb/dict')
-rw-r--r-- | storage/xtradb/dict/dict0boot.cc | 522 | ||||
-rw-r--r-- | storage/xtradb/dict/dict0crea.cc | 1845 | ||||
-rw-r--r-- | storage/xtradb/dict/dict0dict.cc | 6750 | ||||
-rw-r--r-- | storage/xtradb/dict/dict0load.cc | 3149 | ||||
-rw-r--r-- | storage/xtradb/dict/dict0mem.cc | 755 | ||||
-rw-r--r-- | storage/xtradb/dict/dict0stats.cc | 4182 | ||||
-rw-r--r-- | storage/xtradb/dict/dict0stats_bg.cc | 367 |
7 files changed, 17570 insertions, 0 deletions
diff --git a/storage/xtradb/dict/dict0boot.cc b/storage/xtradb/dict/dict0boot.cc new file mode 100644 index 00000000000..b57a8873bd5 --- /dev/null +++ b/storage/xtradb/dict/dict0boot.cc @@ -0,0 +1,522 @@ +/***************************************************************************** + +Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file dict/dict0boot.cc +Data dictionary creation and booting + +Created 4/18/1996 Heikki Tuuri +*******************************************************/ + +#include "dict0boot.h" + +#ifdef UNIV_NONINL +#include "dict0boot.ic" +#endif + +#include "dict0crea.h" +#include "btr0btr.h" +#include "btr0sea.h" +#include "dict0load.h" +#include "trx0trx.h" +#include "srv0srv.h" +#include "ibuf0ibuf.h" +#include "buf0flu.h" +#include "log0recv.h" +#include "os0file.h" + +/**********************************************************************//** +Gets a pointer to the dictionary header and x-latches its page. +@return pointer to the dictionary header, page x-latched */ +UNIV_INTERN +dict_hdr_t* +dict_hdr_get( +/*=========*/ + mtr_t* mtr) /*!< in: mtr */ +{ + buf_block_t* block; + dict_hdr_t* header; + + block = buf_page_get(DICT_HDR_SPACE, 0, DICT_HDR_PAGE_NO, + RW_X_LATCH, mtr); + header = DICT_HDR + buf_block_get_frame(block); + + buf_block_dbg_add_level(block, SYNC_DICT_HEADER); + + return(header); +} + +/**********************************************************************//** +Returns a new table, index, or space id. */ +UNIV_INTERN +void +dict_hdr_get_new_id( +/*================*/ + table_id_t* table_id, /*!< out: table id + (not assigned if NULL) */ + index_id_t* index_id, /*!< out: index id + (not assigned if NULL) */ + ulint* space_id) /*!< out: space id + (not assigned if NULL) */ +{ + dict_hdr_t* dict_hdr; + ib_id_t id; + mtr_t mtr; + + mtr_start(&mtr); + + dict_hdr = dict_hdr_get(&mtr); + + if (table_id) { + id = mach_read_from_8(dict_hdr + DICT_HDR_TABLE_ID); + id++; + mlog_write_ull(dict_hdr + DICT_HDR_TABLE_ID, id, &mtr); + *table_id = id; + } + + if (index_id) { + id = mach_read_from_8(dict_hdr + DICT_HDR_INDEX_ID); + id++; + mlog_write_ull(dict_hdr + DICT_HDR_INDEX_ID, id, &mtr); + *index_id = id; + } + + if (space_id) { + *space_id = mtr_read_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID, + MLOG_4BYTES, &mtr); + if (fil_assign_new_space_id(space_id)) { + mlog_write_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID, + *space_id, MLOG_4BYTES, &mtr); + } + } + + mtr_commit(&mtr); +} + +/**********************************************************************//** +Writes the current value of the row id counter to the dictionary header file +page. */ +UNIV_INTERN +void +dict_hdr_flush_row_id(void) +/*=======================*/ +{ + dict_hdr_t* dict_hdr; + row_id_t id; + mtr_t mtr; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + id = dict_sys->row_id; + + mtr_start(&mtr); + + dict_hdr = dict_hdr_get(&mtr); + + mlog_write_ull(dict_hdr + DICT_HDR_ROW_ID, id, &mtr); + + mtr_commit(&mtr); +} + +/*****************************************************************//** +Creates the file page for the dictionary header. This function is +called only at the database creation. +@return TRUE if succeed */ +static +ibool +dict_hdr_create( +/*============*/ + mtr_t* mtr) /*!< in: mtr */ +{ + buf_block_t* block; + dict_hdr_t* dict_header; + ulint root_page_no; + + ut_ad(mtr); + + /* Create the dictionary header file block in a new, allocated file + segment in the system tablespace */ + block = fseg_create(DICT_HDR_SPACE, 0, + DICT_HDR + DICT_HDR_FSEG_HEADER, mtr); + + ut_a(DICT_HDR_PAGE_NO == buf_block_get_page_no(block)); + + dict_header = dict_hdr_get(mtr); + + /* Start counting row, table, index, and tree ids from + DICT_HDR_FIRST_ID */ + mlog_write_ull(dict_header + DICT_HDR_ROW_ID, + DICT_HDR_FIRST_ID, mtr); + + mlog_write_ull(dict_header + DICT_HDR_TABLE_ID, + DICT_HDR_FIRST_ID, mtr); + + mlog_write_ull(dict_header + DICT_HDR_INDEX_ID, + DICT_HDR_FIRST_ID, mtr); + + mlog_write_ulint(dict_header + DICT_HDR_MAX_SPACE_ID, + 0, MLOG_4BYTES, mtr); + + /* Obsolete, but we must initialize it anyway. */ + mlog_write_ulint(dict_header + DICT_HDR_MIX_ID_LOW, + DICT_HDR_FIRST_ID, MLOG_4BYTES, mtr); + + /* Create the B-tree roots for the clustered indexes of the basic + system tables */ + + /*--------------------------*/ + root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, + DICT_HDR_SPACE, 0, DICT_TABLES_ID, + dict_ind_redundant, mtr); + if (root_page_no == FIL_NULL) { + + return(FALSE); + } + + mlog_write_ulint(dict_header + DICT_HDR_TABLES, root_page_no, + MLOG_4BYTES, mtr); + /*--------------------------*/ + root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE, 0, + DICT_TABLE_IDS_ID, + dict_ind_redundant, mtr); + if (root_page_no == FIL_NULL) { + + return(FALSE); + } + + mlog_write_ulint(dict_header + DICT_HDR_TABLE_IDS, root_page_no, + MLOG_4BYTES, mtr); + /*--------------------------*/ + root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, + DICT_HDR_SPACE, 0, DICT_COLUMNS_ID, + dict_ind_redundant, mtr); + if (root_page_no == FIL_NULL) { + + return(FALSE); + } + + mlog_write_ulint(dict_header + DICT_HDR_COLUMNS, root_page_no, + MLOG_4BYTES, mtr); + /*--------------------------*/ + root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, + DICT_HDR_SPACE, 0, DICT_INDEXES_ID, + dict_ind_redundant, mtr); + if (root_page_no == FIL_NULL) { + + return(FALSE); + } + + mlog_write_ulint(dict_header + DICT_HDR_INDEXES, root_page_no, + MLOG_4BYTES, mtr); + /*--------------------------*/ + root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, + DICT_HDR_SPACE, 0, DICT_FIELDS_ID, + dict_ind_redundant, mtr); + if (root_page_no == FIL_NULL) { + + return(FALSE); + } + + mlog_write_ulint(dict_header + DICT_HDR_FIELDS, root_page_no, + MLOG_4BYTES, mtr); + /*--------------------------*/ + + return(TRUE); +} + +/*****************************************************************//** +Initializes the data dictionary memory structures when the database is +started. This function is also called when the data dictionary is created. +@return DB_SUCCESS or error code. */ +UNIV_INTERN +dberr_t +dict_boot(void) +/*===========*/ +{ + dict_table_t* table; + dict_index_t* index; + dict_hdr_t* dict_hdr; + mem_heap_t* heap; + mtr_t mtr; + dberr_t error; + + /* Be sure these constants do not ever change. To avoid bloat, + only check the *NUM_FIELDS* in each table */ + + ut_ad(DICT_NUM_COLS__SYS_TABLES == 8); + ut_ad(DICT_NUM_FIELDS__SYS_TABLES == 10); + ut_ad(DICT_NUM_FIELDS__SYS_TABLE_IDS == 2); + ut_ad(DICT_NUM_COLS__SYS_COLUMNS == 7); + ut_ad(DICT_NUM_FIELDS__SYS_COLUMNS == 9); + ut_ad(DICT_NUM_COLS__SYS_INDEXES == 7); + ut_ad(DICT_NUM_FIELDS__SYS_INDEXES == 9); + ut_ad(DICT_NUM_COLS__SYS_FIELDS == 3); + ut_ad(DICT_NUM_FIELDS__SYS_FIELDS == 5); + ut_ad(DICT_NUM_COLS__SYS_FOREIGN == 4); + ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN == 6); + ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_FOR_NAME == 2); + ut_ad(DICT_NUM_COLS__SYS_FOREIGN_COLS == 4); + ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_COLS == 6); + + mtr_start(&mtr); + + /* Create the hash tables etc. */ + dict_init(); + + heap = mem_heap_create(450); + + mutex_enter(&(dict_sys->mutex)); + + /* Get the dictionary header */ + dict_hdr = dict_hdr_get(&mtr); + + /* Because we only write new row ids to disk-based data structure + (dictionary header) when it is divisible by + DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover + the latest value of the row id counter. Therefore we advance + the counter at the database startup to avoid overlapping values. + Note that when a user after database startup first time asks for + a new row id, then because the counter is now divisible by + ..._MARGIN, it will immediately be updated to the disk-based + header. */ + + dict_sys->row_id = DICT_HDR_ROW_ID_WRITE_MARGIN + + ut_uint64_align_up(mach_read_from_8(dict_hdr + DICT_HDR_ROW_ID), + DICT_HDR_ROW_ID_WRITE_MARGIN); + + /* Insert into the dictionary cache the descriptions of the basic + system tables */ + /*-------------------------*/ + table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0, 0, + false); + + dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0); + /* ROW_FORMAT = (N_COLS >> 31) ? COMPACT : REDUNDANT */ + dict_mem_table_add_col(table, heap, "N_COLS", DATA_INT, 0, 4); + /* The low order bit of TYPE is always set to 1. If the format + is UNIV_FORMAT_B or higher, this field matches table->flags. */ + dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4); + dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0); + /* MIX_LEN may contain additional table flags when + ROW_FORMAT!=REDUNDANT. Currently, these flags include + DICT_TF2_TEMPORARY. */ + dict_mem_table_add_col(table, heap, "MIX_LEN", DATA_INT, 0, 4); + dict_mem_table_add_col(table, heap, "CLUSTER_NAME", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4); + + table->id = DICT_TABLES_ID; + + dict_table_add_to_cache(table, FALSE, heap); + dict_sys->sys_tables = table; + mem_heap_empty(heap); + + index = dict_mem_index_create("SYS_TABLES", "CLUST_IND", + DICT_HDR_SPACE, + DICT_UNIQUE | DICT_CLUSTERED, 1); + + dict_mem_index_add_field(index, "NAME", 0); + + index->id = DICT_TABLES_ID; + btr_search_index_init(index); + + error = dict_index_add_to_cache(table, index, + mtr_read_ulint(dict_hdr + + DICT_HDR_TABLES, + MLOG_4BYTES, &mtr), + FALSE); + ut_a(error == DB_SUCCESS); + + /*-------------------------*/ + index = dict_mem_index_create("SYS_TABLES", "ID_IND", + DICT_HDR_SPACE, DICT_UNIQUE, 1); + dict_mem_index_add_field(index, "ID", 0); + + index->id = DICT_TABLE_IDS_ID; + btr_search_index_init(index); + error = dict_index_add_to_cache(table, index, + mtr_read_ulint(dict_hdr + + DICT_HDR_TABLE_IDS, + MLOG_4BYTES, &mtr), + FALSE); + ut_a(error == DB_SUCCESS); + + /*-------------------------*/ + table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0, 0, + false); + + dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4); + dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "MTYPE", DATA_INT, 0, 4); + dict_mem_table_add_col(table, heap, "PRTYPE", DATA_INT, 0, 4); + dict_mem_table_add_col(table, heap, "LEN", DATA_INT, 0, 4); + dict_mem_table_add_col(table, heap, "PREC", DATA_INT, 0, 4); + + table->id = DICT_COLUMNS_ID; + + dict_table_add_to_cache(table, FALSE, heap); + dict_sys->sys_columns = table; + mem_heap_empty(heap); + + index = dict_mem_index_create("SYS_COLUMNS", "CLUST_IND", + DICT_HDR_SPACE, + DICT_UNIQUE | DICT_CLUSTERED, 2); + + dict_mem_index_add_field(index, "TABLE_ID", 0); + dict_mem_index_add_field(index, "POS", 0); + + index->id = DICT_COLUMNS_ID; + btr_search_index_init(index); + error = dict_index_add_to_cache(table, index, + mtr_read_ulint(dict_hdr + + DICT_HDR_COLUMNS, + MLOG_4BYTES, &mtr), + FALSE); + ut_a(error == DB_SUCCESS); + + /*-------------------------*/ + table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0, 0, + false); + + dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "N_FIELDS", DATA_INT, 0, 4); + dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4); + dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4); + dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_INT, 0, 4); + + table->id = DICT_INDEXES_ID; + + dict_table_add_to_cache(table, FALSE, heap); + dict_sys->sys_indexes = table; + mem_heap_empty(heap); + + index = dict_mem_index_create("SYS_INDEXES", "CLUST_IND", + DICT_HDR_SPACE, + DICT_UNIQUE | DICT_CLUSTERED, 2); + + dict_mem_index_add_field(index, "TABLE_ID", 0); + dict_mem_index_add_field(index, "ID", 0); + + index->id = DICT_INDEXES_ID; + btr_search_index_init(index); + error = dict_index_add_to_cache(table, index, + mtr_read_ulint(dict_hdr + + DICT_HDR_INDEXES, + MLOG_4BYTES, &mtr), + FALSE); + ut_a(error == DB_SUCCESS); + + /*-------------------------*/ + table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0, 0, + false); + + dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0); + dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4); + dict_mem_table_add_col(table, heap, "COL_NAME", DATA_BINARY, 0, 0); + + table->id = DICT_FIELDS_ID; + + dict_table_add_to_cache(table, FALSE, heap); + dict_sys->sys_fields = table; + mem_heap_free(heap); + + index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND", + DICT_HDR_SPACE, + DICT_UNIQUE | DICT_CLUSTERED, 2); + + dict_mem_index_add_field(index, "INDEX_ID", 0); + dict_mem_index_add_field(index, "POS", 0); + + index->id = DICT_FIELDS_ID; + btr_search_index_init(index); + error = dict_index_add_to_cache(table, index, + mtr_read_ulint(dict_hdr + + DICT_HDR_FIELDS, + MLOG_4BYTES, &mtr), + FALSE); + ut_a(error == DB_SUCCESS); + + mtr_commit(&mtr); + + /*-------------------------*/ + + /* Initialize the insert buffer table and index for each tablespace */ + + ibuf_init_at_db_start(); + + dberr_t err = DB_SUCCESS; + + if (srv_read_only_mode && !ibuf_is_empty()) { + + ib_logf(IB_LOG_LEVEL_ERROR, + "Change buffer must be empty when --innodb-read-only " + "is set!"); + + err = DB_ERROR; + } else { + /* Load definitions of other indexes on system tables */ + + dict_load_sys_table(dict_sys->sys_tables); + dict_load_sys_table(dict_sys->sys_columns); + dict_load_sys_table(dict_sys->sys_indexes); + dict_load_sys_table(dict_sys->sys_fields); + } + + mutex_exit(&(dict_sys->mutex)); + + return(err); +} + +/*****************************************************************//** +Inserts the basic system table data into themselves in the database +creation. */ +static +void +dict_insert_initial_data(void) +/*==========================*/ +{ + /* Does nothing yet */ +} + +/*****************************************************************//** +Creates and initializes the data dictionary at the server bootstrap. +@return DB_SUCCESS or error code. */ +UNIV_INTERN +dberr_t +dict_create(void) +/*=============*/ +{ + mtr_t mtr; + + mtr_start(&mtr); + + dict_hdr_create(&mtr); + + mtr_commit(&mtr); + + dberr_t err = dict_boot(); + + if (err == DB_SUCCESS) { + dict_insert_initial_data(); + } + + return(err); +} diff --git a/storage/xtradb/dict/dict0crea.cc b/storage/xtradb/dict/dict0crea.cc new file mode 100644 index 00000000000..30523ff2af4 --- /dev/null +++ b/storage/xtradb/dict/dict0crea.cc @@ -0,0 +1,1845 @@ +/***************************************************************************** + +Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file dict/dict0crea.cc +Database object creation + +Created 1/8/1996 Heikki Tuuri +*******************************************************/ + +#include "dict0crea.h" + +#ifdef UNIV_NONINL +#include "dict0crea.ic" +#endif + +#include "btr0pcur.h" +#include "btr0btr.h" +#include "page0page.h" +#include "mach0data.h" +#include "dict0boot.h" +#include "dict0dict.h" +#include "que0que.h" +#include "row0ins.h" +#include "row0mysql.h" +#include "pars0pars.h" +#include "trx0roll.h" +#include "usr0sess.h" +#include "ut0vec.h" +#include "dict0priv.h" +#include "fts0priv.h" +#include "ha_prototypes.h" + +/*****************************************************************//** +Based on a table object, this function builds the entry to be inserted +in the SYS_TABLES system table. +@return the tuple which should be inserted */ +static +dtuple_t* +dict_create_sys_tables_tuple( +/*=========================*/ + const dict_table_t* table, /*!< in: table */ + mem_heap_t* heap) /*!< in: memory heap from + which the memory for the built + tuple is allocated */ +{ + dict_table_t* sys_tables; + dtuple_t* entry; + dfield_t* dfield; + byte* ptr; + ulint type; + + ut_ad(table); + ut_ad(heap); + + sys_tables = dict_sys->sys_tables; + + entry = dtuple_create(heap, 8 + DATA_N_SYS_COLS); + + dict_table_copy_types(entry, sys_tables); + + /* 0: NAME -----------------------------*/ + dfield = dtuple_get_nth_field( + entry, DICT_COL__SYS_TABLES__NAME); + + dfield_set_data(dfield, table->name, ut_strlen(table->name)); + + /* 1: DB_TRX_ID added later */ + /* 2: DB_ROLL_PTR added later */ + /* 3: ID -------------------------------*/ + dfield = dtuple_get_nth_field( + entry, DICT_COL__SYS_TABLES__ID); + + ptr = static_cast<byte*>(mem_heap_alloc(heap, 8)); + mach_write_to_8(ptr, table->id); + + dfield_set_data(dfield, ptr, 8); + + /* 4: N_COLS ---------------------------*/ + dfield = dtuple_get_nth_field( + entry, DICT_COL__SYS_TABLES__N_COLS); + + ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); + mach_write_to_4(ptr, table->n_def + | ((table->flags & DICT_TF_COMPACT) << 31)); + dfield_set_data(dfield, ptr, 4); + + /* 5: TYPE (table flags) -----------------------------*/ + dfield = dtuple_get_nth_field( + entry, DICT_COL__SYS_TABLES__TYPE); + + ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); + + /* Validate the table flags and convert them to what is saved in + SYS_TABLES.TYPE. Table flag values 0 and 1 are both written to + SYS_TABLES.TYPE as 1. */ + type = dict_tf_to_sys_tables_type(table->flags); + mach_write_to_4(ptr, type); + + dfield_set_data(dfield, ptr, 4); + + /* 6: MIX_ID (obsolete) ---------------------------*/ + dfield = dtuple_get_nth_field( + entry, DICT_COL__SYS_TABLES__MIX_ID); + + ptr = static_cast<byte*>(mem_heap_zalloc(heap, 8)); + + dfield_set_data(dfield, ptr, 8); + + /* 7: MIX_LEN (additional flags) --------------------------*/ + dfield = dtuple_get_nth_field( + entry, DICT_COL__SYS_TABLES__MIX_LEN); + + ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); + /* Be sure all non-used bits are zero. */ + ut_a(!(table->flags2 & ~DICT_TF2_BIT_MASK)); + mach_write_to_4(ptr, table->flags2); + + dfield_set_data(dfield, ptr, 4); + + /* 8: CLUSTER_NAME ---------------------*/ + dfield = dtuple_get_nth_field( + entry, DICT_COL__SYS_TABLES__CLUSTER_ID); + dfield_set_null(dfield); /* not supported */ + + /* 9: SPACE ----------------------------*/ + dfield = dtuple_get_nth_field( + entry, DICT_COL__SYS_TABLES__SPACE); + + ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); + mach_write_to_4(ptr, table->space); + + dfield_set_data(dfield, ptr, 4); + /*----------------------------------*/ + + return(entry); +} + +/*****************************************************************//** +Based on a table object, this function builds the entry to be inserted +in the SYS_COLUMNS system table. +@return the tuple which should be inserted */ +static +dtuple_t* +dict_create_sys_columns_tuple( +/*==========================*/ + const dict_table_t* table, /*!< in: table */ + ulint i, /*!< in: column number */ + mem_heap_t* heap) /*!< in: memory heap from + which the memory for the built + tuple is allocated */ +{ + dict_table_t* sys_columns; + dtuple_t* entry; + const dict_col_t* column; + dfield_t* dfield; + byte* ptr; + const char* col_name; + + ut_ad(table); + ut_ad(heap); + + column = dict_table_get_nth_col(table, i); + + sys_columns = dict_sys->sys_columns; + + entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS); + + dict_table_copy_types(entry, sys_columns); + + /* 0: TABLE_ID -----------------------*/ + dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__TABLE_ID); + + ptr = static_cast<byte*>(mem_heap_alloc(heap, 8)); + mach_write_to_8(ptr, table->id); + + dfield_set_data(dfield, ptr, 8); + + /* 1: POS ----------------------------*/ + dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__POS); + + ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); + mach_write_to_4(ptr, i); + + dfield_set_data(dfield, ptr, 4); + + /* 2: DB_TRX_ID added later */ + /* 3: DB_ROLL_PTR added later */ + /* 4: NAME ---------------------------*/ + dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__NAME); + + col_name = dict_table_get_col_name(table, i); + dfield_set_data(dfield, col_name, ut_strlen(col_name)); + + /* 5: MTYPE --------------------------*/ + dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__MTYPE); + + ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); + mach_write_to_4(ptr, column->mtype); + + dfield_set_data(dfield, ptr, 4); + + /* 6: PRTYPE -------------------------*/ + dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__PRTYPE); + + ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); + mach_write_to_4(ptr, column->prtype); + + dfield_set_data(dfield, ptr, 4); + + /* 7: LEN ----------------------------*/ + dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__LEN); + + ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); + mach_write_to_4(ptr, column->len); + + dfield_set_data(dfield, ptr, 4); + + /* 8: PREC ---------------------------*/ + dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__PREC); + + ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); + mach_write_to_4(ptr, 0/* unused */); + + dfield_set_data(dfield, ptr, 4); + /*---------------------------------*/ + + return(entry); +} + +/***************************************************************//** +Builds a table definition to insert. +@return DB_SUCCESS or error code */ +static __attribute__((nonnull, warn_unused_result)) +dberr_t +dict_build_table_def_step( +/*======================*/ + que_thr_t* thr, /*!< in: query thread */ + tab_node_t* node) /*!< in: table create node */ +{ + dict_table_t* table; + dtuple_t* row; + dberr_t error; + const char* path; + mtr_t mtr; + ulint space = 0; + bool use_tablespace; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + table = node->table; + use_tablespace = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_TABLESPACE); + + dict_hdr_get_new_id(&table->id, NULL, NULL); + + thr_get_trx(thr)->table_id = table->id; + + /* Always set this bit for all new created tables */ + DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME); + DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name", + DICT_TF2_FLAG_UNSET(table, + DICT_TF2_FTS_AUX_HEX_NAME);); + + if (use_tablespace) { + /* This table will not use the system tablespace. + Get a new space id. */ + dict_hdr_get_new_id(NULL, NULL, &space); + + DBUG_EXECUTE_IF( + "ib_create_table_fail_out_of_space_ids", + space = ULINT_UNDEFINED; + ); + + if (UNIV_UNLIKELY(space == ULINT_UNDEFINED)) { + return(DB_ERROR); + } + + /* We create a new single-table tablespace for the table. + We initially let it be 4 pages: + - page 0 is the fsp header and an extent descriptor page, + - page 1 is an ibuf bitmap page, + - page 2 is the first inode page, + - page 3 will contain the root of the clustered index of the + table we create here. */ + + path = table->data_dir_path ? table->data_dir_path + : table->dir_path_of_temp_table; + + ut_ad(dict_table_get_format(table) <= UNIV_FORMAT_MAX); + ut_ad(!dict_table_zip_size(table) + || dict_table_get_format(table) >= UNIV_FORMAT_B); + + error = fil_create_new_single_table_tablespace( + space, table->name, path, + dict_tf_to_fsp_flags(table->flags), + table->flags2, + FIL_IBD_FILE_INITIAL_SIZE); + + table->space = (unsigned int) space; + + if (error != DB_SUCCESS) { + + return(error); + } + + mtr_start(&mtr); + + fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr); + + mtr_commit(&mtr); + } else { + /* Create in the system tablespace: disallow Barracuda + features by keeping only the first bit which says whether + the row format is redundant or compact */ + table->flags &= DICT_TF_COMPACT; + } + + row = dict_create_sys_tables_tuple(table, node->heap); + + ins_node_set_new_row(node->tab_def, row); + + return(DB_SUCCESS); +} + +/***************************************************************//** +Builds a column definition to insert. */ +static +void +dict_build_col_def_step( +/*====================*/ + tab_node_t* node) /*!< in: table create node */ +{ + dtuple_t* row; + + row = dict_create_sys_columns_tuple(node->table, node->col_no, + node->heap); + ins_node_set_new_row(node->col_def, row); +} + +/*****************************************************************//** +Based on an index object, this function builds the entry to be inserted +in the SYS_INDEXES system table. +@return the tuple which should be inserted */ +static +dtuple_t* +dict_create_sys_indexes_tuple( +/*==========================*/ + const dict_index_t* index, /*!< in: index */ + mem_heap_t* heap) /*!< in: memory heap from + which the memory for the built + tuple is allocated */ +{ + dict_table_t* sys_indexes; + dict_table_t* table; + dtuple_t* entry; + dfield_t* dfield; + byte* ptr; + + ut_ad(mutex_own(&(dict_sys->mutex))); + ut_ad(index); + ut_ad(heap); + + sys_indexes = dict_sys->sys_indexes; + + table = dict_table_get_low(index->table_name); + + entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS); + + dict_table_copy_types(entry, sys_indexes); + + /* 0: TABLE_ID -----------------------*/ + dfield = dtuple_get_nth_field( + entry, DICT_COL__SYS_INDEXES__TABLE_ID); + + ptr = static_cast<byte*>(mem_heap_alloc(heap, 8)); + mach_write_to_8(ptr, table->id); + + dfield_set_data(dfield, ptr, 8); + + /* 1: ID ----------------------------*/ + dfield = dtuple_get_nth_field( + entry, DICT_COL__SYS_INDEXES__ID); + + ptr = static_cast<byte*>(mem_heap_alloc(heap, 8)); + mach_write_to_8(ptr, index->id); + + dfield_set_data(dfield, ptr, 8); + + /* 2: DB_TRX_ID added later */ + /* 3: DB_ROLL_PTR added later */ + /* 4: NAME --------------------------*/ + dfield = dtuple_get_nth_field( + entry, DICT_COL__SYS_INDEXES__NAME); + + dfield_set_data(dfield, index->name, ut_strlen(index->name)); + + /* 5: N_FIELDS ----------------------*/ + dfield = dtuple_get_nth_field( + entry, DICT_COL__SYS_INDEXES__N_FIELDS); + + ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); + mach_write_to_4(ptr, index->n_fields); + + dfield_set_data(dfield, ptr, 4); + + /* 6: TYPE --------------------------*/ + dfield = dtuple_get_nth_field( + entry, DICT_COL__SYS_INDEXES__TYPE); + + ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); + mach_write_to_4(ptr, index->type); + + dfield_set_data(dfield, ptr, 4); + + /* 7: SPACE --------------------------*/ + + dfield = dtuple_get_nth_field( + entry, DICT_COL__SYS_INDEXES__SPACE); + + ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); + mach_write_to_4(ptr, index->space); + + dfield_set_data(dfield, ptr, 4); + + /* 8: PAGE_NO --------------------------*/ + + dfield = dtuple_get_nth_field( + entry, DICT_COL__SYS_INDEXES__PAGE_NO); + + ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); + mach_write_to_4(ptr, FIL_NULL); + + dfield_set_data(dfield, ptr, 4); + + /*--------------------------------*/ + + return(entry); +} + +/*****************************************************************//** +Based on an index object, this function builds the entry to be inserted +in the SYS_FIELDS system table. +@return the tuple which should be inserted */ +static +dtuple_t* +dict_create_sys_fields_tuple( +/*=========================*/ + const dict_index_t* index, /*!< in: index */ + ulint fld_no, /*!< in: field number */ + mem_heap_t* heap) /*!< in: memory heap from + which the memory for the built + tuple is allocated */ +{ + dict_table_t* sys_fields; + dtuple_t* entry; + dict_field_t* field; + dfield_t* dfield; + byte* ptr; + ibool index_contains_column_prefix_field = FALSE; + ulint j; + + ut_ad(index); + ut_ad(heap); + + for (j = 0; j < index->n_fields; j++) { + if (dict_index_get_nth_field(index, j)->prefix_len > 0) { + index_contains_column_prefix_field = TRUE; + break; + } + } + + field = dict_index_get_nth_field(index, fld_no); + + sys_fields = dict_sys->sys_fields; + + entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS); + + dict_table_copy_types(entry, sys_fields); + + /* 0: INDEX_ID -----------------------*/ + dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_FIELDS__INDEX_ID); + + ptr = static_cast<byte*>(mem_heap_alloc(heap, 8)); + mach_write_to_8(ptr, index->id); + + dfield_set_data(dfield, ptr, 8); + + /* 1: POS; FIELD NUMBER & PREFIX LENGTH -----------------------*/ + + dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_FIELDS__POS); + + ptr = static_cast<byte*>(mem_heap_alloc(heap, 4)); + + if (index_contains_column_prefix_field) { + /* If there are column prefix fields in the index, then + we store the number of the field to the 2 HIGH bytes + and the prefix length to the 2 low bytes, */ + + mach_write_to_4(ptr, (fld_no << 16) + field->prefix_len); + } else { + /* Else we store the number of the field to the 2 LOW bytes. + This is to keep the storage format compatible with + InnoDB versions < 4.0.14. */ + + mach_write_to_4(ptr, fld_no); + } + + dfield_set_data(dfield, ptr, 4); + + /* 2: DB_TRX_ID added later */ + /* 3: DB_ROLL_PTR added later */ + /* 4: COL_NAME -------------------------*/ + dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_FIELDS__COL_NAME); + + dfield_set_data(dfield, field->name, + ut_strlen(field->name)); + /*---------------------------------*/ + + return(entry); +} + +/*****************************************************************//** +Creates the tuple with which the index entry is searched for writing the index +tree root page number, if such a tree is created. +@return the tuple for search */ +static +dtuple_t* +dict_create_search_tuple( +/*=====================*/ + const dtuple_t* tuple, /*!< in: the tuple inserted in the SYS_INDEXES + table */ + mem_heap_t* heap) /*!< in: memory heap from which the memory for + the built tuple is allocated */ +{ + dtuple_t* search_tuple; + const dfield_t* field1; + dfield_t* field2; + + ut_ad(tuple && heap); + + search_tuple = dtuple_create(heap, 2); + + field1 = dtuple_get_nth_field(tuple, 0); + field2 = dtuple_get_nth_field(search_tuple, 0); + + dfield_copy(field2, field1); + + field1 = dtuple_get_nth_field(tuple, 1); + field2 = dtuple_get_nth_field(search_tuple, 1); + + dfield_copy(field2, field1); + + ut_ad(dtuple_validate(search_tuple)); + + return(search_tuple); +} + +/***************************************************************//** +Builds an index definition row to insert. +@return DB_SUCCESS or error code */ +static __attribute__((nonnull, warn_unused_result)) +dberr_t +dict_build_index_def_step( +/*======================*/ + que_thr_t* thr, /*!< in: query thread */ + ind_node_t* node) /*!< in: index create node */ +{ + dict_table_t* table; + dict_index_t* index; + dtuple_t* row; + trx_t* trx; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + trx = thr_get_trx(thr); + + index = node->index; + + table = dict_table_get_low(index->table_name); + + if (table == NULL) { + return(DB_TABLE_NOT_FOUND); + } + + if (!trx->table_id) { + /* Record only the first table id. */ + trx->table_id = table->id; + } + + node->table = table; + + ut_ad((UT_LIST_GET_LEN(table->indexes) > 0) + || dict_index_is_clust(index)); + + dict_hdr_get_new_id(NULL, &index->id, NULL); + + /* Inherit the space id from the table; we store all indexes of a + table in the same tablespace */ + + index->space = table->space; + node->page_no = FIL_NULL; + row = dict_create_sys_indexes_tuple(index, node->heap); + node->ind_row = row; + + ins_node_set_new_row(node->ind_def, row); + + /* Note that the index was created by this transaction. */ + index->trx_id = trx->id; + ut_ad(table->def_trx_id <= trx->id); + table->def_trx_id = trx->id; + + return(DB_SUCCESS); +} + +/***************************************************************//** +Builds a field definition row to insert. */ +static +void +dict_build_field_def_step( +/*======================*/ + ind_node_t* node) /*!< in: index create node */ +{ + dict_index_t* index; + dtuple_t* row; + + index = node->index; + + row = dict_create_sys_fields_tuple(index, node->field_no, node->heap); + + ins_node_set_new_row(node->field_def, row); +} + +/***************************************************************//** +Creates an index tree for the index if it is not a member of a cluster. +@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ +static __attribute__((nonnull, warn_unused_result)) +dberr_t +dict_create_index_tree_step( +/*========================*/ + ind_node_t* node) /*!< in: index create node */ +{ + dict_index_t* index; + dict_table_t* sys_indexes; + dtuple_t* search_tuple; + btr_pcur_t pcur; + mtr_t mtr; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + index = node->index; + + sys_indexes = dict_sys->sys_indexes; + + if (index->type == DICT_FTS) { + /* FTS index does not need an index tree */ + return(DB_SUCCESS); + } + + /* Run a mini-transaction in which the index tree is allocated for + the index and its root address is written to the index entry in + sys_indexes */ + + mtr_start(&mtr); + + search_tuple = dict_create_search_tuple(node->ind_row, node->heap); + + btr_pcur_open(UT_LIST_GET_FIRST(sys_indexes->indexes), + search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF, + &pcur, &mtr); + + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + + + dberr_t err = DB_SUCCESS; + ulint zip_size = dict_table_zip_size(index->table); + + if (node->index->table->ibd_file_missing + || dict_table_is_discarded(node->index->table)) { + + node->page_no = FIL_NULL; + } else { + node->page_no = btr_create( + index->type, index->space, zip_size, + index->id, index, &mtr); + + if (node->page_no == FIL_NULL) { + err = DB_OUT_OF_FILE_SPACE; + } + + DBUG_EXECUTE_IF("ib_import_create_index_failure_1", + node->page_no = FIL_NULL; + err = DB_OUT_OF_FILE_SPACE; ); + } + + page_rec_write_field( + btr_pcur_get_rec(&pcur), DICT_FLD__SYS_INDEXES__PAGE_NO, + node->page_no, &mtr); + + btr_pcur_close(&pcur); + + mtr_commit(&mtr); + + return(err); +} + +/*******************************************************************//** +Drops the index tree associated with a row in SYS_INDEXES table. */ +UNIV_INTERN +void +dict_drop_index_tree( +/*=================*/ + rec_t* rec, /*!< in/out: record in the clustered index + of SYS_INDEXES table */ + mtr_t* mtr) /*!< in: mtr having the latch on the record page */ +{ + ulint root_page_no; + ulint space; + ulint zip_size; + const byte* ptr; + ulint len; + + ut_ad(mutex_own(&(dict_sys->mutex))); + ut_a(!dict_table_is_comp(dict_sys->sys_indexes)); + ptr = rec_get_nth_field_old( + rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len); + + ut_ad(len == 4); + + root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); + + if (root_page_no == FIL_NULL) { + /* The tree has already been freed */ + + return; + } + + ptr = rec_get_nth_field_old( + rec, DICT_FLD__SYS_INDEXES__SPACE, &len); + + ut_ad(len == 4); + + space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); + zip_size = fil_space_get_zip_size(space); + + if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) { + /* It is a single table tablespace and the .ibd file is + missing: do nothing */ + + return; + } + + /* We free all the pages but the root page first; this operation + may span several mini-transactions */ + + btr_free_but_not_root(space, zip_size, root_page_no); + + /* Then we free the root page in the same mini-transaction where + we write FIL_NULL to the appropriate field in the SYS_INDEXES + record: this mini-transaction marks the B-tree totally freed */ + + /* printf("Dropping index tree in space %lu root page %lu\n", space, + root_page_no); */ + btr_free_root(space, zip_size, root_page_no, mtr); + + page_rec_write_field(rec, DICT_FLD__SYS_INDEXES__PAGE_NO, + FIL_NULL, mtr); +} + +/*******************************************************************//** +Truncates the index tree associated with a row in SYS_INDEXES table. +@return new root page number, or FIL_NULL on failure */ +UNIV_INTERN +ulint +dict_truncate_index_tree( +/*=====================*/ + dict_table_t* table, /*!< in: the table the index belongs to */ + ulint space, /*!< in: 0=truncate, + nonzero=create the index tree in the + given tablespace */ + btr_pcur_t* pcur, /*!< in/out: persistent cursor pointing to + record in the clustered index of + SYS_INDEXES table. The cursor may be + repositioned in this call. */ + mtr_t* mtr) /*!< in: mtr having the latch + on the record page. The mtr may be + committed and restarted in this call. */ +{ + ulint root_page_no; + ibool drop = !space; + ulint zip_size; + ulint type; + index_id_t index_id; + rec_t* rec; + const byte* ptr; + ulint len; + dict_index_t* index; + bool has_been_dropped = false; + + ut_ad(mutex_own(&(dict_sys->mutex))); + ut_a(!dict_table_is_comp(dict_sys->sys_indexes)); + rec = btr_pcur_get_rec(pcur); + ptr = rec_get_nth_field_old( + rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len); + + ut_ad(len == 4); + + root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); + + if (drop && root_page_no == FIL_NULL) { + has_been_dropped = true; + drop = FALSE; + } + + ptr = rec_get_nth_field_old( + rec, DICT_FLD__SYS_INDEXES__SPACE, &len); + + ut_ad(len == 4); + + if (drop) { + space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); + } + + zip_size = fil_space_get_zip_size(space); + + if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) { + /* It is a single table tablespace and the .ibd file is + missing: do nothing */ + + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Trying to TRUNCATE" + " a missing .ibd file of table %s!\n", table->name); + return(FIL_NULL); + } + + ptr = rec_get_nth_field_old( + rec, DICT_FLD__SYS_INDEXES__TYPE, &len); + ut_ad(len == 4); + type = mach_read_from_4(ptr); + + ptr = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__ID, &len); + ut_ad(len == 8); + index_id = mach_read_from_8(ptr); + + if (!drop) { + + goto create; + } + + /* We free all the pages but the root page first; this operation + may span several mini-transactions */ + + btr_free_but_not_root(space, zip_size, root_page_no); + + /* Then we free the root page in the same mini-transaction where + we create the b-tree and write its new root page number to the + appropriate field in the SYS_INDEXES record: this mini-transaction + marks the B-tree totally truncated */ + + btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, NULL, mtr); + + btr_free_root(space, zip_size, root_page_no, mtr); +create: + /* We will temporarily write FIL_NULL to the PAGE_NO field + in SYS_INDEXES, so that the database will not get into an + inconsistent state in case it crashes between the mtr_commit() + below and the following mtr_commit() call. */ + page_rec_write_field(rec, DICT_FLD__SYS_INDEXES__PAGE_NO, + FIL_NULL, mtr); + + /* We will need to commit the mini-transaction in order to avoid + deadlocks in the btr_create() call, because otherwise we would + be freeing and allocating pages in the same mini-transaction. */ + btr_pcur_store_position(pcur, mtr); + mtr_commit(mtr); + + mtr_start(mtr); + btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr); + + /* Find the index corresponding to this SYS_INDEXES record. */ + for (index = UT_LIST_GET_FIRST(table->indexes); + index; + index = UT_LIST_GET_NEXT(indexes, index)) { + if (index->id == index_id) { + if (index->type & DICT_FTS) { + return(FIL_NULL); + } else { + if (has_been_dropped) { + fprintf(stderr, " InnoDB: Trying to" + " TRUNCATE a missing index of" + " table %s!\n", + index->table->name); + } + + root_page_no = btr_create(type, space, zip_size, + index_id, index, mtr); + index->page = (unsigned int) root_page_no; + return(root_page_no); + } + } + } + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Index %llu of table %s is missing\n" + "InnoDB: from the data dictionary during TRUNCATE!\n", + (ullint) index_id, + table->name); + + return(FIL_NULL); +} + +/*********************************************************************//** +Creates a table create graph. +@return own: table create node */ +UNIV_INTERN +tab_node_t* +tab_create_graph_create( +/*====================*/ + dict_table_t* table, /*!< in: table to create, built as a memory data + structure */ + mem_heap_t* heap, /*!< in: heap where created */ + bool commit) /*!< in: true if the commit node should be + added to the query graph */ +{ + tab_node_t* node; + + node = static_cast<tab_node_t*>( + mem_heap_alloc(heap, sizeof(tab_node_t))); + + node->common.type = QUE_NODE_CREATE_TABLE; + + node->table = table; + + node->state = TABLE_BUILD_TABLE_DEF; + node->heap = mem_heap_create(256); + + node->tab_def = ins_node_create(INS_DIRECT, dict_sys->sys_tables, + heap); + node->tab_def->common.parent = node; + + node->col_def = ins_node_create(INS_DIRECT, dict_sys->sys_columns, + heap); + node->col_def->common.parent = node; + + if (commit) { + node->commit_node = trx_commit_node_create(heap); + node->commit_node->common.parent = node; + } else { + node->commit_node = 0; + } + + return(node); +} + +/*********************************************************************//** +Creates an index create graph. +@return own: index create node */ +UNIV_INTERN +ind_node_t* +ind_create_graph_create( +/*====================*/ + dict_index_t* index, /*!< in: index to create, built as a memory data + structure */ + mem_heap_t* heap, /*!< in: heap where created */ + bool commit) /*!< in: true if the commit node should be + added to the query graph */ +{ + ind_node_t* node; + + node = static_cast<ind_node_t*>( + mem_heap_alloc(heap, sizeof(ind_node_t))); + + node->common.type = QUE_NODE_CREATE_INDEX; + + node->index = index; + + node->state = INDEX_BUILD_INDEX_DEF; + node->page_no = FIL_NULL; + node->heap = mem_heap_create(256); + + node->ind_def = ins_node_create(INS_DIRECT, + dict_sys->sys_indexes, heap); + node->ind_def->common.parent = node; + + node->field_def = ins_node_create(INS_DIRECT, + dict_sys->sys_fields, heap); + node->field_def->common.parent = node; + + if (commit) { + node->commit_node = trx_commit_node_create(heap); + node->commit_node->common.parent = node; + } else { + node->commit_node = 0; + } + + return(node); +} + +/***********************************************************//** +Creates a table. This is a high-level function used in SQL execution graphs. +@return query thread to run next or NULL */ +UNIV_INTERN +que_thr_t* +dict_create_table_step( +/*===================*/ + que_thr_t* thr) /*!< in: query thread */ +{ + tab_node_t* node; + dberr_t err = DB_ERROR; + trx_t* trx; + + ut_ad(thr); + ut_ad(mutex_own(&(dict_sys->mutex))); + + trx = thr_get_trx(thr); + + node = static_cast<tab_node_t*>(thr->run_node); + + ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_TABLE); + + if (thr->prev_node == que_node_get_parent(node)) { + node->state = TABLE_BUILD_TABLE_DEF; + } + + if (node->state == TABLE_BUILD_TABLE_DEF) { + + /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */ + + err = dict_build_table_def_step(thr, node); + + if (err != DB_SUCCESS) { + + goto function_exit; + } + + node->state = TABLE_BUILD_COL_DEF; + node->col_no = 0; + + thr->run_node = node->tab_def; + + return(thr); + } + + if (node->state == TABLE_BUILD_COL_DEF) { + + if (node->col_no < (node->table)->n_def) { + + dict_build_col_def_step(node); + + node->col_no++; + + thr->run_node = node->col_def; + + return(thr); + } else { + node->state = TABLE_COMMIT_WORK; + } + } + + if (node->state == TABLE_COMMIT_WORK) { + + /* Table was correctly defined: do NOT commit the transaction + (CREATE TABLE does NOT do an implicit commit of the current + transaction) */ + + node->state = TABLE_ADD_TO_CACHE; + + /* thr->run_node = node->commit_node; + + return(thr); */ + } + + if (node->state == TABLE_ADD_TO_CACHE) { + + dict_table_add_to_cache(node->table, TRUE, node->heap); + + err = DB_SUCCESS; + } + +function_exit: + trx->error_state = err; + + if (err == DB_SUCCESS) { + /* Ok: do nothing */ + + } else if (err == DB_LOCK_WAIT) { + + return(NULL); + } else { + /* SQL error detected */ + + return(NULL); + } + + thr->run_node = que_node_get_parent(node); + + return(thr); +} + +/***********************************************************//** +Creates an index. This is a high-level function used in SQL execution +graphs. +@return query thread to run next or NULL */ +UNIV_INTERN +que_thr_t* +dict_create_index_step( +/*===================*/ + que_thr_t* thr) /*!< in: query thread */ +{ + ind_node_t* node; + dberr_t err = DB_ERROR; + trx_t* trx; + + ut_ad(thr); + ut_ad(mutex_own(&(dict_sys->mutex))); + + trx = thr_get_trx(thr); + + node = static_cast<ind_node_t*>(thr->run_node); + + ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_INDEX); + + if (thr->prev_node == que_node_get_parent(node)) { + node->state = INDEX_BUILD_INDEX_DEF; + } + + if (node->state == INDEX_BUILD_INDEX_DEF) { + /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */ + err = dict_build_index_def_step(thr, node); + + if (err != DB_SUCCESS) { + + goto function_exit; + } + + node->state = INDEX_BUILD_FIELD_DEF; + node->field_no = 0; + + thr->run_node = node->ind_def; + + return(thr); + } + + if (node->state == INDEX_BUILD_FIELD_DEF) { + + if (node->field_no < (node->index)->n_fields) { + + dict_build_field_def_step(node); + + node->field_no++; + + thr->run_node = node->field_def; + + return(thr); + } else { + node->state = INDEX_ADD_TO_CACHE; + } + } + + if (node->state == INDEX_ADD_TO_CACHE) { + + index_id_t index_id = node->index->id; + + err = dict_index_add_to_cache( + node->table, node->index, FIL_NULL, + trx_is_strict(trx) + || dict_table_get_format(node->table) + >= UNIV_FORMAT_B); + + node->index = dict_index_get_if_in_cache_low(index_id); + ut_a(!node->index == (err != DB_SUCCESS)); + + if (err != DB_SUCCESS) { + + goto function_exit; + } + + node->state = INDEX_CREATE_INDEX_TREE; + } + + if (node->state == INDEX_CREATE_INDEX_TREE) { + + err = dict_create_index_tree_step(node); + + DBUG_EXECUTE_IF("ib_dict_create_index_tree_fail", + err = DB_OUT_OF_MEMORY;); + + if (err != DB_SUCCESS) { + /* If this is a FTS index, we will need to remove + it from fts->cache->indexes list as well */ + if ((node->index->type & DICT_FTS) + && node->table->fts) { + fts_index_cache_t* index_cache; + + rw_lock_x_lock( + &node->table->fts->cache->init_lock); + + index_cache = (fts_index_cache_t*) + fts_find_index_cache( + node->table->fts->cache, + node->index); + + if (index_cache->words) { + rbt_free(index_cache->words); + index_cache->words = 0; + } + + ib_vector_remove( + node->table->fts->cache->indexes, + *reinterpret_cast<void**>(index_cache)); + + rw_lock_x_unlock( + &node->table->fts->cache->init_lock); + } + + dict_index_remove_from_cache(node->table, node->index); + node->index = NULL; + + goto function_exit; + } + + node->index->page = node->page_no; + /* These should have been set in + dict_build_index_def_step() and + dict_index_add_to_cache(). */ + ut_ad(node->index->trx_id == trx->id); + ut_ad(node->index->table->def_trx_id == trx->id); + node->state = INDEX_COMMIT_WORK; + } + + if (node->state == INDEX_COMMIT_WORK) { + + /* Index was correctly defined: do NOT commit the transaction + (CREATE INDEX does NOT currently do an implicit commit of + the current transaction) */ + + node->state = INDEX_CREATE_INDEX_TREE; + + /* thr->run_node = node->commit_node; + + return(thr); */ + } + +function_exit: + trx->error_state = err; + + if (err == DB_SUCCESS) { + /* Ok: do nothing */ + + } else if (err == DB_LOCK_WAIT) { + + return(NULL); + } else { + /* SQL error detected */ + + return(NULL); + } + + thr->run_node = que_node_get_parent(node); + + return(thr); +} + +/****************************************************************//** +Check whether a system table exists. Additionally, if it exists, +move it to the non-LRU end of the table LRU list. This is oly used +for system tables that can be upgraded or added to an older database, +which include SYS_FOREIGN, SYS_FOREIGN_COLS, SYS_TABLESPACES and +SYS_DATAFILES. +@return DB_SUCCESS if the sys table exists, DB_CORRUPTION if it exists +but is not current, DB_TABLE_NOT_FOUND if it does not exist*/ +static +dberr_t +dict_check_if_system_table_exists( +/*==============================*/ + const char* tablename, /*!< in: name of table */ + ulint num_fields, /*!< in: number of fields */ + ulint num_indexes) /*!< in: number of indexes */ +{ + dict_table_t* sys_table; + dberr_t error = DB_SUCCESS; + + ut_a(srv_get_active_thread_type() == SRV_NONE); + + mutex_enter(&dict_sys->mutex); + + sys_table = dict_table_get_low(tablename); + + if (sys_table == NULL) { + error = DB_TABLE_NOT_FOUND; + + } else if (UT_LIST_GET_LEN(sys_table->indexes) != num_indexes + || sys_table->n_cols != num_fields) { + error = DB_CORRUPTION; + + } else { + /* This table has already been created, and it is OK. + Ensure that it can't be evicted from the table LRU cache. */ + + dict_table_move_from_lru_to_non_lru(sys_table); + } + + mutex_exit(&dict_sys->mutex); + + return(error); +} + +/****************************************************************//** +Creates the foreign key constraints system tables inside InnoDB +at server bootstrap or server start if they are not found or are +not of the right form. +@return DB_SUCCESS or error code */ +UNIV_INTERN +dberr_t +dict_create_or_check_foreign_constraint_tables(void) +/*================================================*/ +{ + trx_t* trx; + my_bool srv_file_per_table_backup; + dberr_t err; + dberr_t sys_foreign_err; + dberr_t sys_foreign_cols_err; + + ut_a(srv_get_active_thread_type() == SRV_NONE); + + /* Note: The master thread has not been started at this point. */ + + + sys_foreign_err = dict_check_if_system_table_exists( + "SYS_FOREIGN", DICT_NUM_FIELDS__SYS_FOREIGN + 1, 3); + sys_foreign_cols_err = dict_check_if_system_table_exists( + "SYS_FOREIGN_COLS", DICT_NUM_FIELDS__SYS_FOREIGN_COLS + 1, 1); + + if (sys_foreign_err == DB_SUCCESS + && sys_foreign_cols_err == DB_SUCCESS) { + return(DB_SUCCESS); + } + + trx = trx_allocate_for_mysql(); + + trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); + + trx->op_info = "creating foreign key sys tables"; + + row_mysql_lock_data_dictionary(trx); + + /* Check which incomplete table definition to drop. */ + + if (sys_foreign_err == DB_CORRUPTION) { + ib_logf(IB_LOG_LEVEL_WARN, + "Dropping incompletely created " + "SYS_FOREIGN table."); + row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE); + } + + if (sys_foreign_cols_err == DB_CORRUPTION) { + ib_logf(IB_LOG_LEVEL_WARN, + "Dropping incompletely created " + "SYS_FOREIGN_COLS table."); + + row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE); + } + + ib_logf(IB_LOG_LEVEL_WARN, + "Creating foreign key constraint system tables."); + + /* NOTE: in dict_load_foreigns we use the fact that + there are 2 secondary indexes on SYS_FOREIGN, and they + are defined just like below */ + + /* NOTE: when designing InnoDB's foreign key support in 2001, we made + an error and made the table names and the foreign key id of type + 'CHAR' (internally, really a VARCHAR). We should have made the type + VARBINARY, like in other InnoDB system tables, to get a clean + design. */ + + srv_file_per_table_backup = srv_file_per_table; + + /* We always want SYSTEM tables to be created inside the system + tablespace. */ + + srv_file_per_table = 0; + + err = que_eval_sql( + NULL, + "PROCEDURE CREATE_FOREIGN_SYS_TABLES_PROC () IS\n" + "BEGIN\n" + "CREATE TABLE\n" + "SYS_FOREIGN(ID CHAR, FOR_NAME CHAR," + " REF_NAME CHAR, N_COLS INT);\n" + "CREATE UNIQUE CLUSTERED INDEX ID_IND" + " ON SYS_FOREIGN (ID);\n" + "CREATE INDEX FOR_IND" + " ON SYS_FOREIGN (FOR_NAME);\n" + "CREATE INDEX REF_IND" + " ON SYS_FOREIGN (REF_NAME);\n" + "CREATE TABLE\n" + "SYS_FOREIGN_COLS(ID CHAR, POS INT," + " FOR_COL_NAME CHAR, REF_COL_NAME CHAR);\n" + "CREATE UNIQUE CLUSTERED INDEX ID_IND" + " ON SYS_FOREIGN_COLS (ID, POS);\n" + "END;\n", + FALSE, trx); + + if (err != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Creation of SYS_FOREIGN and SYS_FOREIGN_COLS " + "has failed with error %lu. Tablespace is full. " + "Dropping incompletely created tables.", + (ulong) err); + + ut_ad(err == DB_OUT_OF_FILE_SPACE + || err == DB_TOO_MANY_CONCURRENT_TRXS); + + row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE); + row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE); + + if (err == DB_OUT_OF_FILE_SPACE) { + err = DB_MUST_GET_MORE_FILE_SPACE; + } + } + + trx_commit_for_mysql(trx); + + row_mysql_unlock_data_dictionary(trx); + + trx_free_for_mysql(trx); + + srv_file_per_table = srv_file_per_table_backup; + + if (err == DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_INFO, + "Foreign key constraint system tables created"); + } + + /* Note: The master thread has not been started at this point. */ + /* Confirm and move to the non-LRU part of the table LRU list. */ + sys_foreign_err = dict_check_if_system_table_exists( + "SYS_FOREIGN", DICT_NUM_FIELDS__SYS_FOREIGN + 1, 3); + ut_a(sys_foreign_err == DB_SUCCESS); + + sys_foreign_cols_err = dict_check_if_system_table_exists( + "SYS_FOREIGN_COLS", DICT_NUM_FIELDS__SYS_FOREIGN_COLS + 1, 1); + ut_a(sys_foreign_cols_err == DB_SUCCESS); + + return(err); +} + +/****************************************************************//** +Evaluate the given foreign key SQL statement. +@return error code or DB_SUCCESS */ +static __attribute__((nonnull, warn_unused_result)) +dberr_t +dict_foreign_eval_sql( +/*==================*/ + pars_info_t* info, /*!< in: info struct */ + const char* sql, /*!< in: SQL string to evaluate */ + const char* name, /*!< in: table name (for diagnostics) */ + const char* id, /*!< in: foreign key id */ + trx_t* trx) /*!< in/out: transaction */ +{ + dberr_t error; + FILE* ef = dict_foreign_err_file; + + error = que_eval_sql(info, sql, FALSE, trx); + + if (error == DB_DUPLICATE_KEY) { + mutex_enter(&dict_foreign_err_mutex); + rewind(ef); + ut_print_timestamp(ef); + fputs(" Error in foreign key constraint creation for table ", + ef); + ut_print_name(ef, trx, TRUE, name); + fputs(".\nA foreign key constraint of name ", ef); + ut_print_name(ef, trx, TRUE, id); + fputs("\nalready exists." + " (Note that internally InnoDB adds 'databasename'\n" + "in front of the user-defined constraint name.)\n" + "Note that InnoDB's FOREIGN KEY system tables store\n" + "constraint names as case-insensitive, with the\n" + "MySQL standard latin1_swedish_ci collation. If you\n" + "create tables or databases whose names differ only in\n" + "the character case, then collisions in constraint\n" + "names can occur. Workaround: name your constraints\n" + "explicitly with unique names.\n", + ef); + + mutex_exit(&dict_foreign_err_mutex); + + return(error); + } + + if (error != DB_SUCCESS) { + fprintf(stderr, + "InnoDB: Foreign key constraint creation failed:\n" + "InnoDB: internal error number %lu\n", (ulong) error); + + mutex_enter(&dict_foreign_err_mutex); + ut_print_timestamp(ef); + fputs(" Internal error in foreign key constraint creation" + " for table ", ef); + ut_print_name(ef, trx, TRUE, name); + fputs(".\n" + "See the MySQL .err log in the datadir" + " for more information.\n", ef); + mutex_exit(&dict_foreign_err_mutex); + + return(error); + } + + return(DB_SUCCESS); +} + +/********************************************************************//** +Add a single foreign key field definition to the data dictionary tables in +the database. +@return error code or DB_SUCCESS */ +static __attribute__((nonnull, warn_unused_result)) +dberr_t +dict_create_add_foreign_field_to_dictionary( +/*========================================*/ + ulint field_nr, /*!< in: field number */ + const char* table_name, /*!< in: table name */ + const dict_foreign_t* foreign, /*!< in: foreign */ + trx_t* trx) /*!< in/out: transaction */ +{ + pars_info_t* info = pars_info_create(); + + pars_info_add_str_literal(info, "id", foreign->id); + + pars_info_add_int4_literal(info, "pos", field_nr); + + pars_info_add_str_literal(info, "for_col_name", + foreign->foreign_col_names[field_nr]); + + pars_info_add_str_literal(info, "ref_col_name", + foreign->referenced_col_names[field_nr]); + + return(dict_foreign_eval_sql( + info, + "PROCEDURE P () IS\n" + "BEGIN\n" + "INSERT INTO SYS_FOREIGN_COLS VALUES" + "(:id, :pos, :for_col_name, :ref_col_name);\n" + "END;\n", + table_name, foreign->id, trx)); +} + +/********************************************************************//** +Add a foreign key definition to the data dictionary tables. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_add_foreign_to_dictionary( +/*==================================*/ + const char* name, /*!< in: table name */ + const dict_foreign_t* foreign,/*!< in: foreign key */ + trx_t* trx) /*!< in/out: dictionary transaction */ +{ + dberr_t error; + pars_info_t* info = pars_info_create(); + + pars_info_add_str_literal(info, "id", foreign->id); + + pars_info_add_str_literal(info, "for_name", name); + + pars_info_add_str_literal(info, "ref_name", + foreign->referenced_table_name); + + pars_info_add_int4_literal(info, "n_cols", + foreign->n_fields + (foreign->type << 24)); + + error = dict_foreign_eval_sql(info, + "PROCEDURE P () IS\n" + "BEGIN\n" + "INSERT INTO SYS_FOREIGN VALUES" + "(:id, :for_name, :ref_name, :n_cols);\n" + "END;\n" + , name, foreign->id, trx); + + if (error != DB_SUCCESS) { + + return(error); + } + + for (ulint i = 0; i < foreign->n_fields; i++) { + error = dict_create_add_foreign_field_to_dictionary( + i, name, foreign, trx); + + if (error != DB_SUCCESS) { + + return(error); + } + } + + return(error); +} + +/** Adds the given set of foreign key objects to the dictionary tables +in the database. This function does not modify the dictionary cache. The +caller must ensure that all foreign key objects contain a valid constraint +name in foreign->id. +@param[in] local_fk_set set of foreign key objects, to be added to +the dictionary tables +@param[in] table table to which the foreign key objects in +local_fk_set belong to +@param[in,out] trx transaction +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_add_foreigns_to_dictionary( +/*===================================*/ + const dict_foreign_set& local_fk_set, + const dict_table_t* table, + trx_t* trx) +{ + dict_foreign_t* foreign; + dberr_t error; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + if (NULL == dict_table_get_low("SYS_FOREIGN")) { + fprintf(stderr, + "InnoDB: table SYS_FOREIGN not found" + " in internal data dictionary\n"); + + return(DB_ERROR); + } + + for (dict_foreign_set::const_iterator it = local_fk_set.begin(); + it != local_fk_set.end(); + ++it) { + + foreign = *it; + ut_ad(foreign->id != NULL); + + error = dict_create_add_foreign_to_dictionary(table->name, + foreign, trx); + + if (error != DB_SUCCESS) { + + return(error); + } + } + + trx->op_info = "committing foreign key definitions"; + + trx_commit(trx); + + trx->op_info = ""; + + return(DB_SUCCESS); +} + +/****************************************************************//** +Creates the tablespaces and datafiles system tables inside InnoDB +at server bootstrap or server start if they are not found or are +not of the right form. +@return DB_SUCCESS or error code */ +UNIV_INTERN +dberr_t +dict_create_or_check_sys_tablespace(void) +/*=====================================*/ +{ + trx_t* trx; + my_bool srv_file_per_table_backup; + dberr_t err; + dberr_t sys_tablespaces_err; + dberr_t sys_datafiles_err; + + ut_a(srv_get_active_thread_type() == SRV_NONE); + + /* Note: The master thread has not been started at this point. */ + + sys_tablespaces_err = dict_check_if_system_table_exists( + "SYS_TABLESPACES", DICT_NUM_FIELDS__SYS_TABLESPACES + 1, 1); + sys_datafiles_err = dict_check_if_system_table_exists( + "SYS_DATAFILES", DICT_NUM_FIELDS__SYS_DATAFILES + 1, 1); + + if (sys_tablespaces_err == DB_SUCCESS + && sys_datafiles_err == DB_SUCCESS) { + return(DB_SUCCESS); + } + + trx = trx_allocate_for_mysql(); + + trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); + + trx->op_info = "creating tablepace and datafile sys tables"; + + row_mysql_lock_data_dictionary(trx); + + /* Check which incomplete table definition to drop. */ + + if (sys_tablespaces_err == DB_CORRUPTION) { + ib_logf(IB_LOG_LEVEL_WARN, + "Dropping incompletely created " + "SYS_TABLESPACES table."); + row_drop_table_for_mysql("SYS_TABLESPACES", trx, TRUE); + } + + if (sys_datafiles_err == DB_CORRUPTION) { + ib_logf(IB_LOG_LEVEL_WARN, + "Dropping incompletely created " + "SYS_DATAFILES table."); + + row_drop_table_for_mysql("SYS_DATAFILES", trx, TRUE); + } + + ib_logf(IB_LOG_LEVEL_INFO, + "Creating tablespace and datafile system tables."); + + /* We always want SYSTEM tables to be created inside the system + tablespace. */ + srv_file_per_table_backup = srv_file_per_table; + srv_file_per_table = 0; + + err = que_eval_sql( + NULL, + "PROCEDURE CREATE_SYS_TABLESPACE_PROC () IS\n" + "BEGIN\n" + "CREATE TABLE SYS_TABLESPACES(\n" + " SPACE INT, NAME CHAR, FLAGS INT);\n" + "CREATE UNIQUE CLUSTERED INDEX SYS_TABLESPACES_SPACE" + " ON SYS_TABLESPACES (SPACE);\n" + "CREATE TABLE SYS_DATAFILES(\n" + " SPACE INT, PATH CHAR);\n" + "CREATE UNIQUE CLUSTERED INDEX SYS_DATAFILES_SPACE" + " ON SYS_DATAFILES (SPACE);\n" + "END;\n", + FALSE, trx); + + if (err != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Creation of SYS_TABLESPACES and SYS_DATAFILES " + "has failed with error %lu. Tablespace is full. " + "Dropping incompletely created tables.", + (ulong) err); + + ut_a(err == DB_OUT_OF_FILE_SPACE + || err == DB_TOO_MANY_CONCURRENT_TRXS); + + row_drop_table_for_mysql("SYS_TABLESPACES", trx, TRUE); + row_drop_table_for_mysql("SYS_DATAFILES", trx, TRUE); + + if (err == DB_OUT_OF_FILE_SPACE) { + err = DB_MUST_GET_MORE_FILE_SPACE; + } + } + + trx_commit_for_mysql(trx); + + row_mysql_unlock_data_dictionary(trx); + + trx_free_for_mysql(trx); + + srv_file_per_table = srv_file_per_table_backup; + + if (err == DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_INFO, + "Tablespace and datafile system tables created."); + } + + /* Note: The master thread has not been started at this point. */ + /* Confirm and move to the non-LRU part of the table LRU list. */ + + sys_tablespaces_err = dict_check_if_system_table_exists( + "SYS_TABLESPACES", DICT_NUM_FIELDS__SYS_TABLESPACES + 1, 1); + ut_a(sys_tablespaces_err == DB_SUCCESS); + + sys_datafiles_err = dict_check_if_system_table_exists( + "SYS_DATAFILES", DICT_NUM_FIELDS__SYS_DATAFILES + 1, 1); + ut_a(sys_datafiles_err == DB_SUCCESS); + + return(err); +} + +/********************************************************************//** +Add a single tablespace definition to the data dictionary tables in the +database. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_add_tablespace_to_dictionary( +/*=====================================*/ + ulint space, /*!< in: tablespace id */ + const char* name, /*!< in: tablespace name */ + ulint flags, /*!< in: tablespace flags */ + const char* path, /*!< in: tablespace path */ + trx_t* trx, /*!< in/out: transaction */ + bool commit) /*!< in: if true then commit the + transaction */ +{ + dberr_t error; + + pars_info_t* info = pars_info_create(); + + ut_a(space > TRX_SYS_SPACE); + + pars_info_add_int4_literal(info, "space", space); + + pars_info_add_str_literal(info, "name", name); + + pars_info_add_int4_literal(info, "flags", flags); + + pars_info_add_str_literal(info, "path", path); + + error = que_eval_sql(info, + "PROCEDURE P () IS\n" + "BEGIN\n" + "INSERT INTO SYS_TABLESPACES VALUES" + "(:space, :name, :flags);\n" + "INSERT INTO SYS_DATAFILES VALUES" + "(:space, :path);\n" + "END;\n", + FALSE, trx); + + if (error != DB_SUCCESS) { + return(error); + } + + if (commit) { + trx->op_info = "committing tablespace and datafile definition"; + trx_commit(trx); + } + + trx->op_info = ""; + + return(error); +} diff --git a/storage/xtradb/dict/dict0dict.cc b/storage/xtradb/dict/dict0dict.cc new file mode 100644 index 00000000000..87a1caa31bb --- /dev/null +++ b/storage/xtradb/dict/dict0dict.cc @@ -0,0 +1,6750 @@ +/***************************************************************************** + +Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2012, Facebook Inc. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/******************************************************************//** +@file dict/dict0dict.cc +Data dictionary system + +Created 1/8/1996 Heikki Tuuri +***********************************************************************/ + +#include "dict0dict.h" +#include "fts0fts.h" +#include "fil0fil.h" +#include <algorithm> + +#ifdef UNIV_NONINL +#include "dict0dict.ic" +#include "dict0priv.ic" +#endif + +/** dummy index for ROW_FORMAT=REDUNDANT supremum and infimum records */ +UNIV_INTERN dict_index_t* dict_ind_redundant; +/** dummy index for ROW_FORMAT=COMPACT supremum and infimum records */ +UNIV_INTERN dict_index_t* dict_ind_compact; + +#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG +/** Flag to control insert buffer debugging. */ +extern UNIV_INTERN uint ibuf_debug; +#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + +/********************************************************************** +Issue a warning that the row is too big. */ +void +ib_warn_row_too_big(const dict_table_t* table); + +#ifndef UNIV_HOTBACKUP +#include "buf0buf.h" +#include "data0type.h" +#include "mach0data.h" +#include "dict0boot.h" +#include "dict0mem.h" +#include "dict0crea.h" +#include "dict0stats.h" +#include "trx0undo.h" +#include "btr0btr.h" +#include "btr0cur.h" +#include "btr0sea.h" +#include "os0once.h" +#include "page0zip.h" +#include "page0page.h" +#include "pars0pars.h" +#include "pars0sym.h" +#include "que0que.h" +#include "rem0cmp.h" +#include "fts0fts.h" +#include "fts0types.h" +#include "m_ctype.h" /* my_isspace() */ +#include "ha_prototypes.h" /* innobase_strcasecmp(), innobase_casedn_str() */ +#include "srv0mon.h" +#include "srv0start.h" +#include "lock0lock.h" +#include "dict0priv.h" +#include "row0upd.h" +#include "row0mysql.h" +#include "row0merge.h" +#include "row0log.h" +#include "ut0ut.h" /* ut_format_name() */ +#include "m_string.h" +#include "my_sys.h" +#include "mysqld.h" /* system_charset_info */ +#include "strfunc.h" /* strconvert() */ + +#include <ctype.h> + +/** the dictionary system */ +UNIV_INTERN dict_sys_t* dict_sys = NULL; + +/** @brief the data dictionary rw-latch protecting dict_sys + +table create, drop, etc. reserve this in X-mode; implicit or +backround operations purge, rollback, foreign key checks reserve this +in S-mode; we cannot trust that MySQL protects implicit or background +operations a table drop since MySQL does not know of them; therefore +we need this; NOTE: a transaction which reserves this must keep book +on the mode in trx_t::dict_operation_lock_mode */ +UNIV_INTERN rw_lock_t dict_operation_lock; + +/** Percentage of compression failures that are allowed in a single +round */ +UNIV_INTERN ulong zip_failure_threshold_pct = 5; + +/** Maximum percentage of a page that can be allowed as a pad to avoid +compression failures */ +UNIV_INTERN ulong zip_pad_max = 50; + +/* Keys to register rwlocks and mutexes with performance schema */ +#ifdef UNIV_PFS_RWLOCK +UNIV_INTERN mysql_pfs_key_t dict_operation_lock_key; +UNIV_INTERN mysql_pfs_key_t index_tree_rw_lock_key; +UNIV_INTERN mysql_pfs_key_t index_online_log_key; +UNIV_INTERN mysql_pfs_key_t dict_table_stats_key; +#endif /* UNIV_PFS_RWLOCK */ + +#ifdef UNIV_PFS_MUTEX +UNIV_INTERN mysql_pfs_key_t zip_pad_mutex_key; +UNIV_INTERN mysql_pfs_key_t dict_sys_mutex_key; +UNIV_INTERN mysql_pfs_key_t dict_foreign_err_mutex_key; +#endif /* UNIV_PFS_MUTEX */ + +#define DICT_HEAP_SIZE 100 /*!< initial memory heap size when + creating a table or index object */ +#define DICT_POOL_PER_TABLE_HASH 512 /*!< buffer pool max size per table + hash table fixed size in bytes */ +#define DICT_POOL_PER_VARYING 4 /*!< buffer pool max size per data + dictionary varying size in bytes */ + +/** Identifies generated InnoDB foreign key names */ +static char dict_ibfk[] = "_ibfk_"; + +/*******************************************************************//** +Tries to find column names for the index and sets the col field of the +index. +@return TRUE if the column names were found */ +static +ibool +dict_index_find_cols( +/*=================*/ + dict_table_t* table, /*!< in: table */ + dict_index_t* index); /*!< in: index */ +/*******************************************************************//** +Builds the internal dictionary cache representation for a clustered +index, containing also system fields not defined by the user. +@return own: the internal representation of the clustered index */ +static +dict_index_t* +dict_index_build_internal_clust( +/*============================*/ + const dict_table_t* table, /*!< in: table */ + dict_index_t* index); /*!< in: user representation of + a clustered index */ +/*******************************************************************//** +Builds the internal dictionary cache representation for a non-clustered +index, containing also system fields not defined by the user. +@return own: the internal representation of the non-clustered index */ +static +dict_index_t* +dict_index_build_internal_non_clust( +/*================================*/ + const dict_table_t* table, /*!< in: table */ + dict_index_t* index); /*!< in: user representation of + a non-clustered index */ +/**********************************************************************//** +Builds the internal dictionary cache representation for an FTS index. +@return own: the internal representation of the FTS index */ +static +dict_index_t* +dict_index_build_internal_fts( +/*==========================*/ + dict_table_t* table, /*!< in: table */ + dict_index_t* index); /*!< in: user representation of an FTS index */ +/**********************************************************************//** +Prints a column data. */ +static +void +dict_col_print_low( +/*===============*/ + const dict_table_t* table, /*!< in: table */ + const dict_col_t* col); /*!< in: column */ +/**********************************************************************//** +Prints an index data. */ +static +void +dict_index_print_low( +/*=================*/ + dict_index_t* index); /*!< in: index */ +/**********************************************************************//** +Prints a field data. */ +static +void +dict_field_print_low( +/*=================*/ + const dict_field_t* field); /*!< in: field */ + +/**********************************************************************//** +Removes an index from the dictionary cache. */ +static +void +dict_index_remove_from_cache_low( +/*=============================*/ + dict_table_t* table, /*!< in/out: table */ + dict_index_t* index, /*!< in, own: index */ + ibool lru_evict); /*!< in: TRUE if page being evicted + to make room in the table LRU list */ +/**********************************************************************//** +Removes a table object from the dictionary cache. */ +static +void +dict_table_remove_from_cache_low( +/*=============================*/ + dict_table_t* table, /*!< in, own: table */ + ibool lru_evict); /*!< in: TRUE if evicting from LRU */ +#ifdef UNIV_DEBUG +/**********************************************************************//** +Validate the dictionary table LRU list. +@return TRUE if validate OK */ +static +ibool +dict_lru_validate(void); +/*===================*/ +/**********************************************************************//** +Check if table is in the dictionary table LRU list. +@return TRUE if table found */ +static +ibool +dict_lru_find_table( +/*================*/ + const dict_table_t* find_table); /*!< in: table to find */ +/**********************************************************************//** +Check if a table exists in the dict table non-LRU list. +@return TRUE if table found */ +static +ibool +dict_non_lru_find_table( +/*====================*/ + const dict_table_t* find_table); /*!< in: table to find */ +#endif /* UNIV_DEBUG */ + +/* Stream for storing detailed information about the latest foreign key +and unique key errors. Only created if !srv_read_only_mode */ +UNIV_INTERN FILE* dict_foreign_err_file = NULL; +/* mutex protecting the foreign and unique error buffers */ +UNIV_INTERN ib_mutex_t dict_foreign_err_mutex; + +/******************************************************************//** +Makes all characters in a NUL-terminated UTF-8 string lower case. */ +UNIV_INTERN +void +dict_casedn_str( +/*============*/ + char* a) /*!< in/out: string to put in lower case */ +{ + innobase_casedn_str(a); +} + +/********************************************************************//** +Checks if the database name in two table names is the same. +@return TRUE if same db name */ +UNIV_INTERN +ibool +dict_tables_have_same_db( +/*=====================*/ + const char* name1, /*!< in: table name in the form + dbname '/' tablename */ + const char* name2) /*!< in: table name in the form + dbname '/' tablename */ +{ + for (; *name1 == *name2; name1++, name2++) { + if (*name1 == '/') { + return(TRUE); + } + ut_a(*name1); /* the names must contain '/' */ + } + return(FALSE); +} + +/********************************************************************//** +Return the end of table name where we have removed dbname and '/'. +@return table name */ +UNIV_INTERN +const char* +dict_remove_db_name( +/*================*/ + const char* name) /*!< in: table name in the form + dbname '/' tablename */ +{ + const char* s = strchr(name, '/'); + ut_a(s); + + return(s + 1); +} + +/********************************************************************//** +Get the database name length in a table name. +@return database name length */ +UNIV_INTERN +ulint +dict_get_db_name_len( +/*=================*/ + const char* name) /*!< in: table name in the form + dbname '/' tablename */ +{ + const char* s; + s = strchr(name, '/'); + ut_a(s); + return(s - name); +} + +/********************************************************************//** +Reserves the dictionary system mutex for MySQL. */ +UNIV_INTERN +void +dict_mutex_enter_for_mysql(void) +/*============================*/ +{ + mutex_enter(&(dict_sys->mutex)); +} + +/********************************************************************//** +Releases the dictionary system mutex for MySQL. */ +UNIV_INTERN +void +dict_mutex_exit_for_mysql(void) +/*===========================*/ +{ + mutex_exit(&(dict_sys->mutex)); +} + +/** Allocate and init a dict_table_t's stats latch. +This function must not be called concurrently on the same table object. +@param[in,out] table_void table whose stats latch to create */ +static +void +dict_table_stats_latch_alloc( + void* table_void) +{ + dict_table_t* table = static_cast<dict_table_t*>(table_void); + + table->stats_latch = new(std::nothrow) rw_lock_t; + + ut_a(table->stats_latch != NULL); + + rw_lock_create(dict_table_stats_key, table->stats_latch, + SYNC_INDEX_TREE); +} + +/** Deinit and free a dict_table_t's stats latch. +This function must not be called concurrently on the same table object. +@param[in,out] table table whose stats latch to free */ +static +void +dict_table_stats_latch_free( + dict_table_t* table) +{ + rw_lock_free(table->stats_latch); + delete table->stats_latch; +} + +/** Create a dict_table_t's stats latch or delay for lazy creation. +This function is only called from either single threaded environment +or from a thread that has not shared the table object with other threads. +@param[in,out] table table whose stats latch to create +@param[in] enabled if false then the latch is disabled +and dict_table_stats_lock()/unlock() become noop on this table. */ + +void +dict_table_stats_latch_create( + dict_table_t* table, + bool enabled) +{ + if (!enabled) { + table->stats_latch = NULL; + table->stats_latch_created = os_once::DONE; + return; + } + +#ifdef HAVE_ATOMIC_BUILTINS + /* We create this lazily the first time it is used. */ + table->stats_latch = NULL; + table->stats_latch_created = os_once::NEVER_DONE; +#else /* HAVE_ATOMIC_BUILTINS */ + + dict_table_stats_latch_alloc(table); + + table->stats_latch_created = os_once::DONE; +#endif /* HAVE_ATOMIC_BUILTINS */ +} + +/** Destroy a dict_table_t's stats latch. +This function is only called from either single threaded environment +or from a thread that has not shared the table object with other threads. +@param[in,out] table table whose stats latch to destroy */ + +void +dict_table_stats_latch_destroy( + dict_table_t* table) +{ + if (table->stats_latch_created == os_once::DONE + && table->stats_latch != NULL) { + + dict_table_stats_latch_free(table); + } +} + +/**********************************************************************//** +Lock the appropriate latch to protect a given table's statistics. */ +UNIV_INTERN +void +dict_table_stats_lock( +/*==================*/ + dict_table_t* table, /*!< in: table */ + ulint latch_mode) /*!< in: RW_S_LATCH or RW_X_LATCH */ +{ + ut_ad(table != NULL); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + +#ifdef HAVE_ATOMIC_BUILTINS + os_once::do_or_wait_for_done( + &table->stats_latch_created, + dict_table_stats_latch_alloc, table); +#else /* HAVE_ATOMIC_BUILTINS */ + ut_ad(table->stats_latch_created == os_once::DONE); +#endif /* HAVE_ATOMIC_BUILTINS */ + + if (table->stats_latch == NULL) { + /* This is a dummy table object that is private in the current + thread and is not shared between multiple threads, thus we + skip any locking. */ + return; + } + + switch (latch_mode) { + case RW_S_LATCH: + rw_lock_s_lock(table->stats_latch); + break; + case RW_X_LATCH: + rw_lock_x_lock(table->stats_latch); + break; + case RW_NO_LATCH: + /* fall through */ + default: + ut_error; + } +} + +/**********************************************************************//** +Unlock the latch that has been locked by dict_table_stats_lock() */ +UNIV_INTERN +void +dict_table_stats_unlock( +/*====================*/ + dict_table_t* table, /*!< in: table */ + ulint latch_mode) /*!< in: RW_S_LATCH or + RW_X_LATCH */ +{ + ut_ad(table != NULL); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + + if (table->stats_latch == NULL) { + /* This is a dummy table object that is private in the current + thread and is not shared between multiple threads, thus we + skip any locking. */ + return; + } + + switch (latch_mode) { + case RW_S_LATCH: + rw_lock_s_unlock(table->stats_latch); + break; + case RW_X_LATCH: + rw_lock_x_unlock(table->stats_latch); + break; + case RW_NO_LATCH: + /* fall through */ + default: + ut_error; + } +} + +/**********************************************************************//** +Try to drop any indexes after an aborted index creation. +This can also be after a server kill during DROP INDEX. */ +static +void +dict_table_try_drop_aborted( +/*========================*/ + dict_table_t* table, /*!< in: table, or NULL if it + needs to be looked up again */ + table_id_t table_id, /*!< in: table identifier */ + ulint ref_count) /*!< in: expected table->n_ref_count */ +{ + trx_t* trx; + + trx = trx_allocate_for_background(); + trx->op_info = "try to drop any indexes after an aborted index creation"; + row_mysql_lock_data_dictionary(trx); + trx_set_dict_operation(trx, TRX_DICT_OP_INDEX); + + if (table == NULL) { + table = dict_table_open_on_id_low( + table_id, DICT_ERR_IGNORE_NONE); + } else { + ut_ad(table->id == table_id); + } + + if (table && table->n_ref_count == ref_count && table->drop_aborted) { + /* Silence a debug assertion in row_merge_drop_indexes(). */ + ut_d(table->n_ref_count++); + row_merge_drop_indexes(trx, table, TRUE); + ut_d(table->n_ref_count--); + ut_ad(table->n_ref_count == ref_count); + trx_commit_for_mysql(trx); + } + + row_mysql_unlock_data_dictionary(trx); + trx_free_for_background(trx); +} + +/**********************************************************************//** +When opening a table, +try to drop any indexes after an aborted index creation. +Release the dict_sys->mutex. */ +static +void +dict_table_try_drop_aborted_and_mutex_exit( +/*=======================================*/ + dict_table_t* table, /*!< in: table (may be NULL) */ + ibool try_drop) /*!< in: FALSE if should try to + drop indexes whose online creation + was aborted */ +{ + if (try_drop + && table != NULL + && table->drop_aborted + && table->n_ref_count == 1 + && dict_table_get_first_index(table)) { + + /* Attempt to drop the indexes whose online creation + was aborted. */ + table_id_t table_id = table->id; + + mutex_exit(&dict_sys->mutex); + + dict_table_try_drop_aborted(table, table_id, 1); + } else { + mutex_exit(&dict_sys->mutex); + } +} + +/********************************************************************//** +Decrements the count of open handles to a table. */ +UNIV_INTERN +void +dict_table_close( +/*=============*/ + dict_table_t* table, /*!< in/out: table */ + ibool dict_locked, /*!< in: TRUE=data dictionary locked */ + ibool try_drop) /*!< in: TRUE=try to drop any orphan + indexes after an aborted online + index creation */ +{ + if (!dict_locked) { + mutex_enter(&dict_sys->mutex); + } + + ut_ad(mutex_own(&dict_sys->mutex)); + ut_a(table->n_ref_count > 0); + + --table->n_ref_count; + + /* Force persistent stats re-read upon next open of the table + so that FLUSH TABLE can be used to forcibly fetch stats from disk + if they have been manually modified. We reset table->stat_initialized + only if table reference count is 0 because we do not want too frequent + stats re-reads (e.g. in other cases than FLUSH TABLE). */ + if (strchr(table->name, '/') != NULL + && table->n_ref_count == 0 + && dict_stats_is_persistent_enabled(table)) { + + dict_stats_deinit(table); + } + + MONITOR_DEC(MONITOR_TABLE_REFERENCE); + + ut_ad(dict_lru_validate()); + +#ifdef UNIV_DEBUG + if (table->can_be_evicted) { + ut_ad(dict_lru_find_table(table)); + } else { + ut_ad(dict_non_lru_find_table(table)); + } +#endif /* UNIV_DEBUG */ + + if (!dict_locked) { + table_id_t table_id = table->id; + ibool drop_aborted; + + drop_aborted = try_drop + && table->drop_aborted + && table->n_ref_count == 1 + && dict_table_get_first_index(table); + + mutex_exit(&dict_sys->mutex); + + if (drop_aborted) { + dict_table_try_drop_aborted(NULL, table_id, 0); + } + } +} +#endif /* !UNIV_HOTBACKUP */ + +/**********************************************************************//** +Returns a column's name. +@return column name. NOTE: not guaranteed to stay valid if table is +modified in any way (columns added, etc.). */ +UNIV_INTERN +const char* +dict_table_get_col_name( +/*====================*/ + const dict_table_t* table, /*!< in: table */ + ulint col_nr) /*!< in: column number */ +{ + ulint i; + const char* s; + + ut_ad(table); + ut_ad(col_nr < table->n_def); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + + s = table->col_names; + if (s) { + for (i = 0; i < col_nr; i++) { + s += strlen(s) + 1; + } + } + + return(s); +} + +#ifndef UNIV_HOTBACKUP +/********************************************************************//** +Acquire the autoinc lock. */ +UNIV_INTERN +void +dict_table_autoinc_lock( +/*====================*/ + dict_table_t* table) /*!< in/out: table */ +{ + mutex_enter(&table->autoinc_mutex); +} + +/********************************************************************//** +Unconditionally set the autoinc counter. */ +UNIV_INTERN +void +dict_table_autoinc_initialize( +/*==========================*/ + dict_table_t* table, /*!< in/out: table */ + ib_uint64_t value) /*!< in: next value to assign to a row */ +{ + ut_ad(mutex_own(&table->autoinc_mutex)); + + table->autoinc = value; +} + +/************************************************************************ +Get all the FTS indexes on a table. +@return number of FTS indexes */ +UNIV_INTERN +ulint +dict_table_get_all_fts_indexes( +/*===========================*/ + dict_table_t* table, /*!< in: table */ + ib_vector_t* indexes) /*!< out: all FTS indexes on this + table */ +{ + dict_index_t* index; + + ut_a(ib_vector_size(indexes) == 0); + + for (index = dict_table_get_first_index(table); + index; + index = dict_table_get_next_index(index)) { + + if (index->type == DICT_FTS) { + ib_vector_push(indexes, &index); + } + } + + return(ib_vector_size(indexes)); +} + +/********************************************************************//** +Reads the next autoinc value (== autoinc counter value), 0 if not yet +initialized. +@return value for a new row, or 0 */ +UNIV_INTERN +ib_uint64_t +dict_table_autoinc_read( +/*====================*/ + const dict_table_t* table) /*!< in: table */ +{ + ut_ad(mutex_own(&table->autoinc_mutex)); + + return(table->autoinc); +} + +/********************************************************************//** +Updates the autoinc counter if the value supplied is greater than the +current value. */ +UNIV_INTERN +void +dict_table_autoinc_update_if_greater( +/*=================================*/ + + dict_table_t* table, /*!< in/out: table */ + ib_uint64_t value) /*!< in: value which was assigned to a row */ +{ + ut_ad(mutex_own(&table->autoinc_mutex)); + + if (value > table->autoinc) { + + table->autoinc = value; + } +} + +/********************************************************************//** +Release the autoinc lock. */ +UNIV_INTERN +void +dict_table_autoinc_unlock( +/*======================*/ + dict_table_t* table) /*!< in/out: table */ +{ + mutex_exit(&table->autoinc_mutex); +} +#endif /* !UNIV_HOTBACKUP */ + +/********************************************************************//** +Looks for column n in an index. +@return position in internal representation of the index; +ULINT_UNDEFINED if not contained */ +UNIV_INTERN +ulint +dict_index_get_nth_col_or_prefix_pos( +/*=================================*/ + const dict_index_t* index, /*!< in: index */ + ulint n, /*!< in: column number */ + ibool inc_prefix) /*!< in: TRUE=consider + column prefixes too */ +{ + const dict_field_t* field; + const dict_col_t* col; + ulint pos; + ulint n_fields; + + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + + col = dict_table_get_nth_col(index->table, n); + + if (dict_index_is_clust(index)) { + + return(dict_col_get_clust_pos(col, index)); + } + + n_fields = dict_index_get_n_fields(index); + + for (pos = 0; pos < n_fields; pos++) { + field = dict_index_get_nth_field(index, pos); + + if (col == field->col + && (inc_prefix || field->prefix_len == 0)) { + + return(pos); + } + } + + return(ULINT_UNDEFINED); +} + +#ifndef UNIV_HOTBACKUP +/********************************************************************//** +Returns TRUE if the index contains a column or a prefix of that column. +@return TRUE if contains the column or its prefix */ +UNIV_INTERN +ibool +dict_index_contains_col_or_prefix( +/*==============================*/ + const dict_index_t* index, /*!< in: index */ + ulint n) /*!< in: column number */ +{ + const dict_field_t* field; + const dict_col_t* col; + ulint pos; + ulint n_fields; + + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + + if (dict_index_is_clust(index)) { + + return(TRUE); + } + + col = dict_table_get_nth_col(index->table, n); + + n_fields = dict_index_get_n_fields(index); + + for (pos = 0; pos < n_fields; pos++) { + field = dict_index_get_nth_field(index, pos); + + if (col == field->col) { + + return(TRUE); + } + } + + return(FALSE); +} + +/********************************************************************//** +Looks for a matching field in an index. The column has to be the same. The +column in index must be complete, or must contain a prefix longer than the +column in index2. That is, we must be able to construct the prefix in index2 +from the prefix in index. +@return position in internal representation of the index; +ULINT_UNDEFINED if not contained */ +UNIV_INTERN +ulint +dict_index_get_nth_field_pos( +/*=========================*/ + const dict_index_t* index, /*!< in: index from which to search */ + const dict_index_t* index2, /*!< in: index */ + ulint n) /*!< in: field number in index2 */ +{ + const dict_field_t* field; + const dict_field_t* field2; + ulint n_fields; + ulint pos; + + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + + field2 = dict_index_get_nth_field(index2, n); + + n_fields = dict_index_get_n_fields(index); + + for (pos = 0; pos < n_fields; pos++) { + field = dict_index_get_nth_field(index, pos); + + if (field->col == field2->col + && (field->prefix_len == 0 + || (field->prefix_len >= field2->prefix_len + && field2->prefix_len != 0))) { + + return(pos); + } + } + + return(ULINT_UNDEFINED); +} + +/**********************************************************************//** +Returns a table object based on table id. +@return table, NULL if does not exist */ +UNIV_INTERN +dict_table_t* +dict_table_open_on_id( +/*==================*/ + table_id_t table_id, /*!< in: table id */ + ibool dict_locked, /*!< in: TRUE=data dictionary locked */ + dict_table_op_t table_op) /*!< in: operation to perform */ +{ + dict_table_t* table; + + if (!dict_locked) { + mutex_enter(&dict_sys->mutex); + } + + ut_ad(mutex_own(&dict_sys->mutex)); + + table = dict_table_open_on_id_low( + table_id, + table_op == DICT_TABLE_OP_LOAD_TABLESPACE + ? DICT_ERR_IGNORE_RECOVER_LOCK + : DICT_ERR_IGNORE_NONE); + + if (table != NULL) { + + if (table->can_be_evicted) { + dict_move_to_mru(table); + } + + ++table->n_ref_count; + + MONITOR_INC(MONITOR_TABLE_REFERENCE); + } + + if (!dict_locked) { + dict_table_try_drop_aborted_and_mutex_exit( + table, table_op == DICT_TABLE_OP_DROP_ORPHAN); + } + + return(table); +} + +/********************************************************************//** +Looks for column n position in the clustered index. +@return position in internal representation of the clustered index */ +UNIV_INTERN +ulint +dict_table_get_nth_col_pos( +/*=======================*/ + const dict_table_t* table, /*!< in: table */ + ulint n) /*!< in: column number */ +{ + return(dict_index_get_nth_col_pos(dict_table_get_first_index(table), + n)); +} + +/********************************************************************//** +Checks if a column is in the ordering columns of the clustered index of a +table. Column prefixes are treated like whole columns. +@return TRUE if the column, or its prefix, is in the clustered key */ +UNIV_INTERN +ibool +dict_table_col_in_clustered_key( +/*============================*/ + const dict_table_t* table, /*!< in: table */ + ulint n) /*!< in: column number */ +{ + const dict_index_t* index; + const dict_field_t* field; + const dict_col_t* col; + ulint pos; + ulint n_fields; + + ut_ad(table); + + col = dict_table_get_nth_col(table, n); + + index = dict_table_get_first_index(table); + + n_fields = dict_index_get_n_unique(index); + + for (pos = 0; pos < n_fields; pos++) { + field = dict_index_get_nth_field(index, pos); + + if (col == field->col) { + + return(TRUE); + } + } + + return(FALSE); +} + +/**********************************************************************//** +Inits the data dictionary module. */ +UNIV_INTERN +void +dict_init(void) +/*===========*/ +{ + dict_sys = static_cast<dict_sys_t*>(mem_zalloc(sizeof(*dict_sys))); + + mutex_create(dict_sys_mutex_key, &dict_sys->mutex, SYNC_DICT); + + dict_sys->table_hash = hash_create(buf_pool_get_curr_size() + / (DICT_POOL_PER_TABLE_HASH + * UNIV_WORD_SIZE)); + dict_sys->table_id_hash = hash_create(buf_pool_get_curr_size() + / (DICT_POOL_PER_TABLE_HASH + * UNIV_WORD_SIZE)); + rw_lock_create(dict_operation_lock_key, + &dict_operation_lock, SYNC_DICT_OPERATION); + + if (!srv_read_only_mode) { + dict_foreign_err_file = os_file_create_tmpfile(); + ut_a(dict_foreign_err_file); + + mutex_create(dict_foreign_err_mutex_key, + &dict_foreign_err_mutex, SYNC_NO_ORDER_CHECK); + } +} + +/**********************************************************************//** +Move to the most recently used segment of the LRU list. */ +UNIV_INTERN +void +dict_move_to_mru( +/*=============*/ + dict_table_t* table) /*!< in: table to move to MRU */ +{ + ut_ad(mutex_own(&dict_sys->mutex)); + ut_ad(dict_lru_validate()); + ut_ad(dict_lru_find_table(table)); + + ut_a(table->can_be_evicted); + + UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table); + + UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table); + + ut_ad(dict_lru_validate()); +} + +/**********************************************************************//** +Returns a table object and increment its open handle count. +NOTE! This is a high-level function to be used mainly from outside the +'dict' module. Inside this directory dict_table_get_low +is usually the appropriate function. +@return table, NULL if does not exist */ +UNIV_INTERN +dict_table_t* +dict_table_open_on_name( +/*====================*/ + const char* table_name, /*!< in: table name */ + ibool dict_locked, /*!< in: TRUE=data dictionary locked */ + ibool try_drop, /*!< in: TRUE=try to drop any orphan + indexes after an aborted online + index creation */ + dict_err_ignore_t + ignore_err) /*!< in: error to be ignored when + loading a table definition */ +{ + dict_table_t* table; + + if (!dict_locked) { + mutex_enter(&(dict_sys->mutex)); + } + + ut_ad(table_name); + ut_ad(mutex_own(&dict_sys->mutex)); + + table = dict_table_check_if_in_cache_low(table_name); + + if (table == NULL) { + table = dict_load_table(table_name, TRUE, ignore_err); + } + + ut_ad(!table || table->cached); + + if (table != NULL) { + + /* If table is corrupted, return NULL */ + if (ignore_err == DICT_ERR_IGNORE_NONE + && table->corrupted) { + + /* Make life easy for drop table. */ + if (table->can_be_evicted) { + dict_table_move_from_lru_to_non_lru(table); + } + + if (!dict_locked) { + mutex_exit(&dict_sys->mutex); + } + + ut_print_timestamp(stderr); + + fprintf(stderr, " InnoDB: table "); + ut_print_name(stderr, NULL, TRUE, table->name); + fprintf(stderr, "is corrupted. Please drop the table " + "and recreate\n"); + + return(NULL); + } + + if (table->can_be_evicted) { + dict_move_to_mru(table); + } + + ++table->n_ref_count; + + MONITOR_INC(MONITOR_TABLE_REFERENCE); + } + + ut_ad(dict_lru_validate()); + + if (!dict_locked) { + dict_table_try_drop_aborted_and_mutex_exit(table, try_drop); + } + + return(table); +} +#endif /* !UNIV_HOTBACKUP */ + +/**********************************************************************//** +Adds system columns to a table object. */ +UNIV_INTERN +void +dict_table_add_system_columns( +/*==========================*/ + dict_table_t* table, /*!< in/out: table */ + mem_heap_t* heap) /*!< in: temporary heap */ +{ + ut_ad(table); + ut_ad(table->n_def == table->n_cols - DATA_N_SYS_COLS); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + ut_ad(!table->cached); + + /* NOTE: the system columns MUST be added in the following order + (so that they can be indexed by the numerical value of DATA_ROW_ID, + etc.) and as the last columns of the table memory object. + The clustered index will not always physically contain all + system columns. */ + + dict_mem_table_add_col(table, heap, "DB_ROW_ID", DATA_SYS, + DATA_ROW_ID | DATA_NOT_NULL, + DATA_ROW_ID_LEN); +#if DATA_ROW_ID != 0 +#error "DATA_ROW_ID != 0" +#endif + dict_mem_table_add_col(table, heap, "DB_TRX_ID", DATA_SYS, + DATA_TRX_ID | DATA_NOT_NULL, + DATA_TRX_ID_LEN); +#if DATA_TRX_ID != 1 +#error "DATA_TRX_ID != 1" +#endif + dict_mem_table_add_col(table, heap, "DB_ROLL_PTR", DATA_SYS, + DATA_ROLL_PTR | DATA_NOT_NULL, + DATA_ROLL_PTR_LEN); +#if DATA_ROLL_PTR != 2 +#error "DATA_ROLL_PTR != 2" +#endif + + /* This check reminds that if a new system column is added to + the program, it should be dealt with here */ +#if DATA_N_SYS_COLS != 3 +#error "DATA_N_SYS_COLS != 3" +#endif +} + +#ifndef UNIV_HOTBACKUP +/**********************************************************************//** +Adds a table object to the dictionary cache. */ +UNIV_INTERN +void +dict_table_add_to_cache( +/*====================*/ + dict_table_t* table, /*!< in: table */ + ibool can_be_evicted, /*!< in: TRUE if can be evicted */ + mem_heap_t* heap) /*!< in: temporary heap */ +{ + ulint fold; + ulint id_fold; + ulint i; + ulint row_len; + + ut_ad(dict_lru_validate()); + + /* The lower limit for what we consider a "big" row */ +#define BIG_ROW_SIZE 1024 + + ut_ad(mutex_own(&(dict_sys->mutex))); + + dict_table_add_system_columns(table, heap); + + table->cached = TRUE; + + fold = ut_fold_string(table->name); + id_fold = ut_fold_ull(table->id); + + row_len = 0; + for (i = 0; i < table->n_def; i++) { + ulint col_len = dict_col_get_max_size( + dict_table_get_nth_col(table, i)); + + row_len += col_len; + + /* If we have a single unbounded field, or several gigantic + fields, mark the maximum row size as BIG_ROW_SIZE. */ + if (row_len >= BIG_ROW_SIZE || col_len >= BIG_ROW_SIZE) { + row_len = BIG_ROW_SIZE; + + break; + } + } + + table->big_rows = row_len >= BIG_ROW_SIZE; + + /* Look for a table with the same name: error if such exists */ + { + dict_table_t* table2; + HASH_SEARCH(name_hash, dict_sys->table_hash, fold, + dict_table_t*, table2, ut_ad(table2->cached), + ut_strcmp(table2->name, table->name) == 0); + ut_a(table2 == NULL); + +#ifdef UNIV_DEBUG + /* Look for the same table pointer with a different name */ + HASH_SEARCH_ALL(name_hash, dict_sys->table_hash, + dict_table_t*, table2, ut_ad(table2->cached), + table2 == table); + ut_ad(table2 == NULL); +#endif /* UNIV_DEBUG */ + } + + /* Look for a table with the same id: error if such exists */ + { + dict_table_t* table2; + HASH_SEARCH(id_hash, dict_sys->table_id_hash, id_fold, + dict_table_t*, table2, ut_ad(table2->cached), + table2->id == table->id); + ut_a(table2 == NULL); + +#ifdef UNIV_DEBUG + /* Look for the same table pointer with a different id */ + HASH_SEARCH_ALL(id_hash, dict_sys->table_id_hash, + dict_table_t*, table2, ut_ad(table2->cached), + table2 == table); + ut_ad(table2 == NULL); +#endif /* UNIV_DEBUG */ + } + + /* Add table to hash table of tables */ + HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, + table); + + /* Add table to hash table of tables based on table id */ + HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash, id_fold, + table); + + table->can_be_evicted = can_be_evicted; + + if (table->can_be_evicted) { + UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table); + } else { + UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_non_LRU, table); + } + + ut_ad(dict_lru_validate()); + + dict_sys->size += mem_heap_get_size(table->heap) + + strlen(table->name) + 1; +} + +/**********************************************************************//** +Test whether a table can be evicted from the LRU cache. +@return TRUE if table can be evicted. */ +static +ibool +dict_table_can_be_evicted( +/*======================*/ + const dict_table_t* table) /*!< in: table to test */ +{ + ut_ad(mutex_own(&dict_sys->mutex)); +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + + ut_a(table->can_be_evicted); + ut_a(table->foreign_set.empty()); + ut_a(table->referenced_set.empty()); + + if (table->n_ref_count == 0) { + dict_index_t* index; + + /* The transaction commit and rollback are called from + outside the handler interface. This means that there is + a window where the table->n_ref_count can be zero but + the table instance is in "use". */ + + if (lock_table_has_locks(table)) { + return(FALSE); + } + + for (index = dict_table_get_first_index(table); + index != NULL; + index = dict_table_get_next_index(index)) { + + btr_search_t* info = btr_search_get_info(index); + + /* We are not allowed to free the in-memory index + struct dict_index_t until all entries in the adaptive + hash index that point to any of the page belonging to + his b-tree index are dropped. This is so because + dropping of these entries require access to + dict_index_t struct. To avoid such scenario we keep + a count of number of such pages in the search_info and + only free the dict_index_t struct when this count + drops to zero. + + See also: dict_index_remove_from_cache_low() */ + + if (btr_search_info_get_ref_count(info, index) > 0) { + return(FALSE); + } + } + + return(TRUE); + } + + return(FALSE); +} + +/**********************************************************************//** +Make room in the table cache by evicting an unused table. The unused table +should not be part of FK relationship and currently not used in any user +transaction. There is no guarantee that it will remove a table. +@return number of tables evicted. If the number of tables in the dict_LRU +is less than max_tables it will not do anything. */ +UNIV_INTERN +ulint +dict_make_room_in_cache( +/*====================*/ + ulint max_tables, /*!< in: max tables allowed in cache */ + ulint pct_check) /*!< in: max percent to check */ +{ + ulint i; + ulint len; + dict_table_t* table; + ulint check_up_to; + ulint n_evicted = 0; + + ut_a(pct_check > 0); + ut_a(pct_check <= 100); + ut_ad(mutex_own(&dict_sys->mutex)); +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + ut_ad(dict_lru_validate()); + + i = len = UT_LIST_GET_LEN(dict_sys->table_LRU); + + if (len < max_tables) { + return(0); + } + + check_up_to = len - ((len * pct_check) / 100); + + /* Check for overflow */ + ut_a(i == 0 || check_up_to <= i); + + /* Find a suitable candidate to evict from the cache. Don't scan the + entire LRU list. Only scan pct_check list entries. */ + + for (table = UT_LIST_GET_LAST(dict_sys->table_LRU); + table != NULL + && i > check_up_to + && (len - n_evicted) > max_tables; + --i) { + + dict_table_t* prev_table; + + prev_table = UT_LIST_GET_PREV(table_LRU, table); + + if (dict_table_can_be_evicted(table)) { + + dict_table_remove_from_cache_low(table, TRUE); + + ++n_evicted; + } + + table = prev_table; + } + + return(n_evicted); +} + +/**********************************************************************//** +Move a table to the non-LRU list from the LRU list. */ +UNIV_INTERN +void +dict_table_move_from_lru_to_non_lru( +/*================================*/ + dict_table_t* table) /*!< in: table to move from LRU to non-LRU */ +{ + ut_ad(mutex_own(&dict_sys->mutex)); + ut_ad(dict_lru_find_table(table)); + + ut_a(table->can_be_evicted); + + UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table); + + UT_LIST_ADD_LAST(table_LRU, dict_sys->table_non_LRU, table); + + table->can_be_evicted = FALSE; +} + +/**********************************************************************//** +Move a table to the LRU list from the non-LRU list. */ +UNIV_INTERN +void +dict_table_move_from_non_lru_to_lru( +/*================================*/ + dict_table_t* table) /*!< in: table to move from non-LRU to LRU */ +{ + ut_ad(mutex_own(&dict_sys->mutex)); + ut_ad(dict_non_lru_find_table(table)); + + ut_a(!table->can_be_evicted); + + UT_LIST_REMOVE(table_LRU, dict_sys->table_non_LRU, table); + + UT_LIST_ADD_LAST(table_LRU, dict_sys->table_LRU, table); + + table->can_be_evicted = TRUE; +} + +/**********************************************************************//** +Looks for an index with the given id given a table instance. +@return index or NULL */ +static +dict_index_t* +dict_table_find_index_on_id( +/*========================*/ + const dict_table_t* table, /*!< in: table instance */ + index_id_t id) /*!< in: index id */ +{ + dict_index_t* index; + + for (index = dict_table_get_first_index(table); + index != NULL; + index = dict_table_get_next_index(index)) { + + if (id == index->id) { + /* Found */ + + return(index); + } + } + + return(NULL); +} + +/**********************************************************************//** +Looks for an index with the given id. NOTE that we do not reserve +the dictionary mutex: this function is for emergency purposes like +printing info of a corrupt database page! +@return index or NULL if not found in cache */ +UNIV_INTERN +dict_index_t* +dict_index_find_on_id_low( +/*======================*/ + index_id_t id) /*!< in: index id */ +{ + dict_table_t* table; + + /* This can happen if the system tablespace is the wrong page size */ + if (dict_sys == NULL) { + return(NULL); + } + + for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU); + table != NULL; + table = UT_LIST_GET_NEXT(table_LRU, table)) { + + dict_index_t* index = dict_table_find_index_on_id(table, id); + + if (index != NULL) { + return(index); + } + } + + for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU); + table != NULL; + table = UT_LIST_GET_NEXT(table_LRU, table)) { + + dict_index_t* index = dict_table_find_index_on_id(table, id); + + if (index != NULL) { + return(index); + } + } + + return(NULL); +} + +/** Function object to remove a foreign key constraint from the +referenced_set of the referenced table. The foreign key object is +also removed from the dictionary cache. The foreign key constraint +is not removed from the foreign_set of the table containing the +constraint. */ +struct dict_foreign_remove_partial +{ + void operator()(dict_foreign_t* foreign) { + dict_table_t* table = foreign->referenced_table; + if (table != NULL) { + table->referenced_set.erase(foreign); + } + dict_foreign_free(foreign); + } +}; + +/**********************************************************************//** +Renames a table object. +@return TRUE if success */ +UNIV_INTERN +dberr_t +dict_table_rename_in_cache( +/*=======================*/ + dict_table_t* table, /*!< in/out: table */ + const char* new_name, /*!< in: new name */ + ibool rename_also_foreigns)/*!< in: in ALTER TABLE we want + to preserve the original table name + in constraints which reference it */ +{ + dict_foreign_t* foreign; + dict_index_t* index; + ulint fold; + char old_name[MAX_FULL_NAME_LEN + 1]; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + /* store the old/current name to an automatic variable */ + if (strlen(table->name) + 1 <= sizeof(old_name)) { + memcpy(old_name, table->name, strlen(table->name) + 1); + } else { + ut_print_timestamp(stderr); + fprintf(stderr, "InnoDB: too long table name: '%s', " + "max length is %d\n", table->name, + MAX_FULL_NAME_LEN); + ut_error; + } + + fold = ut_fold_string(new_name); + + /* Look for a table with the same name: error if such exists */ + dict_table_t* table2; + HASH_SEARCH(name_hash, dict_sys->table_hash, fold, + dict_table_t*, table2, ut_ad(table2->cached), + (ut_strcmp(table2->name, new_name) == 0)); + DBUG_EXECUTE_IF("dict_table_rename_in_cache_failure", + if (table2 == NULL) { + table2 = (dict_table_t*) -1; + } ); + if (table2) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Cannot rename table '%s' to '%s' since the " + "dictionary cache already contains '%s'.", + old_name, new_name, new_name); + return(DB_ERROR); + } + + /* If the table is stored in a single-table tablespace, rename the + .ibd file and rebuild the .isl file if needed. */ + + if (dict_table_is_discarded(table)) { + os_file_type_t type; + ibool exists; + char* filepath; + + ut_ad(table->space != TRX_SYS_SPACE); + + if (DICT_TF_HAS_DATA_DIR(table->flags)) { + + dict_get_and_save_data_dir_path(table, true); + ut_a(table->data_dir_path); + + filepath = os_file_make_remote_pathname( + table->data_dir_path, table->name, "ibd"); + } else { + filepath = fil_make_ibd_name(table->name, false); + } + + fil_delete_tablespace(table->space, BUF_REMOVE_ALL_NO_WRITE); + + /* Delete any temp file hanging around. */ + if (os_file_status(filepath, &exists, &type) + && exists + && !os_file_delete_if_exists(innodb_file_temp_key, + filepath)) { + + ib_logf(IB_LOG_LEVEL_INFO, + "Delete of %s failed.", filepath); + } + + mem_free(filepath); + + } else if (table->space != TRX_SYS_SPACE) { + char* new_path = NULL; + + if (table->dir_path_of_temp_table != NULL) { + ut_print_timestamp(stderr); + fputs(" InnoDB: Error: trying to rename a" + " TEMPORARY TABLE ", stderr); + ut_print_name(stderr, NULL, TRUE, old_name); + fputs(" (", stderr); + ut_print_filename(stderr, + table->dir_path_of_temp_table); + fputs(" )\n", stderr); + return(DB_ERROR); + + } else if (DICT_TF_HAS_DATA_DIR(table->flags)) { + char* old_path; + + old_path = fil_space_get_first_path(table->space); + + new_path = os_file_make_new_pathname( + old_path, new_name); + + mem_free(old_path); + + dberr_t err = fil_create_link_file( + new_name, new_path); + + if (err != DB_SUCCESS) { + mem_free(new_path); + return(DB_TABLESPACE_EXISTS); + } + } + + ibool success = fil_rename_tablespace( + old_name, table->space, new_name, new_path); + + /* If the tablespace is remote, a new .isl file was created + If success, delete the old one. If not, delete the new one. */ + if (new_path) { + + mem_free(new_path); + fil_delete_link_file(success ? old_name : new_name); + } + + if (!success) { + return(DB_ERROR); + } + } + + /* Remove table from the hash tables of tables */ + HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash, + ut_fold_string(old_name), table); + + if (strlen(new_name) > strlen(table->name)) { + /* We allocate MAX_FULL_NAME_LEN + 1 bytes here to avoid + memory fragmentation, we assume a repeated calls of + ut_realloc() with the same size do not cause fragmentation */ + ut_a(strlen(new_name) <= MAX_FULL_NAME_LEN); + + table->name = static_cast<char*>( + ut_realloc(table->name, MAX_FULL_NAME_LEN + 1)); + } + memcpy(table->name, new_name, strlen(new_name) + 1); + + /* Add table to hash table of tables */ + HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, + table); + + dict_sys->size += strlen(new_name) - strlen(old_name); + ut_a(dict_sys->size > 0); + + /* Update the table_name field in indexes */ + for (index = dict_table_get_first_index(table); + index != NULL; + index = dict_table_get_next_index(index)) { + + index->table_name = table->name; + } + + if (!rename_also_foreigns) { + /* In ALTER TABLE we think of the rename table operation + in the direction table -> temporary table (#sql...) + as dropping the table with the old name and creating + a new with the new name. Thus we kind of drop the + constraints from the dictionary cache here. The foreign key + constraints will be inherited to the new table from the + system tables through a call of dict_load_foreigns. */ + + /* Remove the foreign constraints from the cache */ + std::for_each(table->foreign_set.begin(), + table->foreign_set.end(), + dict_foreign_remove_partial()); + table->foreign_set.clear(); + + /* Reset table field in referencing constraints */ + for (dict_foreign_set::iterator it + = table->referenced_set.begin(); + it != table->referenced_set.end(); + ++it) { + + foreign = *it; + foreign->referenced_table = NULL; + foreign->referenced_index = NULL; + + } + + /* Make the set of referencing constraints empty */ + table->referenced_set.clear(); + + return(DB_SUCCESS); + } + + /* Update the table name fields in foreign constraints, and update also + the constraint id of new format >= 4.0.18 constraints. Note that at + this point we have already changed table->name to the new name. */ + + dict_foreign_set fk_set; + + for (;;) { + + dict_foreign_set::iterator it + = table->foreign_set.begin(); + + if (it == table->foreign_set.end()) { + break; + } + + foreign = *it; + + if (foreign->referenced_table) { + foreign->referenced_table->referenced_set.erase(foreign); + } + + if (ut_strlen(foreign->foreign_table_name) + < ut_strlen(table->name)) { + /* Allocate a longer name buffer; + TODO: store buf len to save memory */ + + foreign->foreign_table_name = mem_heap_strdup( + foreign->heap, table->name); + dict_mem_foreign_table_name_lookup_set(foreign, TRUE); + } else { + strcpy(foreign->foreign_table_name, table->name); + dict_mem_foreign_table_name_lookup_set(foreign, FALSE); + } + if (strchr(foreign->id, '/')) { + /* This is a >= 4.0.18 format id */ + + ulint db_len; + char* old_id; + char old_name_cs_filename[MAX_TABLE_NAME_LEN+20]; + uint errors = 0; + + /* All table names are internally stored in charset + my_charset_filename (except the temp tables and the + partition identifier suffix in partition tables). The + foreign key constraint names are internally stored + in UTF-8 charset. The variable fkid here is used + to store foreign key constraint name in charset + my_charset_filename for comparison further below. */ + char fkid[MAX_TABLE_NAME_LEN+20]; + ibool on_tmp = FALSE; + + /* The old table name in my_charset_filename is stored + in old_name_cs_filename */ + + strncpy(old_name_cs_filename, old_name, + MAX_TABLE_NAME_LEN); + if (strstr(old_name, TEMP_TABLE_PATH_PREFIX) == NULL) { + + innobase_convert_to_system_charset( + strchr(old_name_cs_filename, '/') + 1, + strchr(old_name, '/') + 1, + MAX_TABLE_NAME_LEN, &errors); + + if (errors) { + /* There has been an error to convert + old table into UTF-8. This probably + means that the old table name is + actually in UTF-8. */ + innobase_convert_to_filename_charset( + strchr(old_name_cs_filename, + '/') + 1, + strchr(old_name, '/') + 1, + MAX_TABLE_NAME_LEN); + } else { + /* Old name already in + my_charset_filename */ + strncpy(old_name_cs_filename, old_name, + MAX_TABLE_NAME_LEN); + } + } + + strncpy(fkid, foreign->id, MAX_TABLE_NAME_LEN); + + if (strstr(fkid, TEMP_TABLE_PATH_PREFIX) == NULL) { + innobase_convert_to_filename_charset( + strchr(fkid, '/') + 1, + strchr(foreign->id, '/') + 1, + MAX_TABLE_NAME_LEN+20); + } else { + on_tmp = TRUE; + } + + old_id = mem_strdup(foreign->id); + + if (ut_strlen(fkid) > ut_strlen(old_name_cs_filename) + + ((sizeof dict_ibfk) - 1) + && !memcmp(fkid, old_name_cs_filename, + ut_strlen(old_name_cs_filename)) + && !memcmp(fkid + ut_strlen(old_name_cs_filename), + dict_ibfk, (sizeof dict_ibfk) - 1)) { + + /* This is a generated >= 4.0.18 format id */ + + char table_name[MAX_TABLE_NAME_LEN] = ""; + uint errors = 0; + + if (strlen(table->name) > strlen(old_name)) { + foreign->id = static_cast<char*>( + mem_heap_alloc( + foreign->heap, + strlen(table->name) + + strlen(old_id) + 1)); + } + + /* Convert the table name to UTF-8 */ + strncpy(table_name, table->name, + MAX_TABLE_NAME_LEN); + innobase_convert_to_system_charset( + strchr(table_name, '/') + 1, + strchr(table->name, '/') + 1, + MAX_TABLE_NAME_LEN, &errors); + + if (errors) { + /* Table name could not be converted + from charset my_charset_filename to + UTF-8. This means that the table name + is already in UTF-8 (#mysql#50). */ + strncpy(table_name, table->name, + MAX_TABLE_NAME_LEN); + } + + /* Replace the prefix 'databasename/tablename' + with the new names */ + strcpy(foreign->id, table_name); + if (on_tmp) { + strcat(foreign->id, + old_id + ut_strlen(old_name)); + } else { + sprintf(strchr(foreign->id, '/') + 1, + "%s%s", + strchr(table_name, '/') +1, + strstr(old_id, "_ibfk_") ); + } + + } else { + /* This is a >= 4.0.18 format id where the user + gave the id name */ + db_len = dict_get_db_name_len(table->name) + 1; + + if (dict_get_db_name_len(table->name) + > dict_get_db_name_len(foreign->id)) { + + foreign->id = static_cast<char*>( + mem_heap_alloc( + foreign->heap, + db_len + strlen(old_id) + 1)); + } + + /* Replace the database prefix in id with the + one from table->name */ + + ut_memcpy(foreign->id, table->name, db_len); + + strcpy(foreign->id + db_len, + dict_remove_db_name(old_id)); + } + + mem_free(old_id); + } + + table->foreign_set.erase(it); + fk_set.insert(foreign); + + if (foreign->referenced_table) { + foreign->referenced_table->referenced_set.insert(foreign); + } + } + + ut_a(table->foreign_set.empty()); + table->foreign_set.swap(fk_set); + + for (dict_foreign_set::iterator it = table->referenced_set.begin(); + it != table->referenced_set.end(); + ++it) { + + foreign = *it; + + if (ut_strlen(foreign->referenced_table_name) + < ut_strlen(table->name)) { + /* Allocate a longer name buffer; + TODO: store buf len to save memory */ + + foreign->referenced_table_name = mem_heap_strdup( + foreign->heap, table->name); + + dict_mem_referenced_table_name_lookup_set( + foreign, TRUE); + } else { + /* Use the same buffer */ + strcpy(foreign->referenced_table_name, table->name); + + dict_mem_referenced_table_name_lookup_set( + foreign, FALSE); + } + } + + return(DB_SUCCESS); +} + +/**********************************************************************//** +Change the id of a table object in the dictionary cache. This is used in +DISCARD TABLESPACE. */ +UNIV_INTERN +void +dict_table_change_id_in_cache( +/*==========================*/ + dict_table_t* table, /*!< in/out: table object already in cache */ + table_id_t new_id) /*!< in: new id to set */ +{ + ut_ad(table); + ut_ad(mutex_own(&(dict_sys->mutex))); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + + /* Remove the table from the hash table of id's */ + + HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash, + ut_fold_ull(table->id), table); + table->id = new_id; + + /* Add the table back to the hash table */ + HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash, + ut_fold_ull(table->id), table); +} + +/**********************************************************************//** +Removes a table object from the dictionary cache. */ +static +void +dict_table_remove_from_cache_low( +/*=============================*/ + dict_table_t* table, /*!< in, own: table */ + ibool lru_evict) /*!< in: TRUE if table being evicted + to make room in the table LRU list */ +{ + dict_foreign_t* foreign; + dict_index_t* index; + ulint size; + + ut_ad(table); + ut_ad(dict_lru_validate()); + ut_a(table->n_ref_count == 0); + ut_a(table->n_rec_locks == 0); + ut_ad(mutex_own(&(dict_sys->mutex))); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + + /* Remove the foreign constraints from the cache */ + std::for_each(table->foreign_set.begin(), table->foreign_set.end(), + dict_foreign_remove_partial()); + table->foreign_set.clear(); + + /* Reset table field in referencing constraints */ + for (dict_foreign_set::iterator it = table->referenced_set.begin(); + it != table->referenced_set.end(); + ++it) { + + foreign = *it; + foreign->referenced_table = NULL; + foreign->referenced_index = NULL; + } + + /* Remove the indexes from the cache */ + + for (index = UT_LIST_GET_LAST(table->indexes); + index != NULL; + index = UT_LIST_GET_LAST(table->indexes)) { + + dict_index_remove_from_cache_low(table, index, lru_evict); + } + + /* Remove table from the hash tables of tables */ + + HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash, + ut_fold_string(table->name), table); + + HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash, + ut_fold_ull(table->id), table); + + /* Remove table from LRU or non-LRU list. */ + if (table->can_be_evicted) { + ut_ad(dict_lru_find_table(table)); + UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table); + } else { + ut_ad(dict_non_lru_find_table(table)); + UT_LIST_REMOVE(table_LRU, dict_sys->table_non_LRU, table); + } + + ut_ad(dict_lru_validate()); + + if (lru_evict && table->drop_aborted) { + /* Do as dict_table_try_drop_aborted() does. */ + + trx_t* trx = trx_allocate_for_background(); + + ut_ad(mutex_own(&dict_sys->mutex)); +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + /* Mimic row_mysql_lock_data_dictionary(). */ + trx->dict_operation_lock_mode = RW_X_LATCH; + + trx_set_dict_operation(trx, TRX_DICT_OP_INDEX); + + /* Silence a debug assertion in row_merge_drop_indexes(). */ + ut_d(table->n_ref_count++); + row_merge_drop_indexes(trx, table, TRUE); + ut_d(table->n_ref_count--); + ut_ad(table->n_ref_count == 0); + trx_commit_for_mysql(trx); + trx->dict_operation_lock_mode = 0; + trx_free_for_background(trx); + } + + size = mem_heap_get_size(table->heap) + strlen(table->name) + 1; + + ut_ad(dict_sys->size >= size); + + dict_sys->size -= size; + + dict_mem_table_free(table); +} + +/**********************************************************************//** +Removes a table object from the dictionary cache. */ +UNIV_INTERN +void +dict_table_remove_from_cache( +/*=========================*/ + dict_table_t* table) /*!< in, own: table */ +{ + dict_table_remove_from_cache_low(table, FALSE); +} + +/****************************************************************//** +If the given column name is reserved for InnoDB system columns, return +TRUE. +@return TRUE if name is reserved */ +UNIV_INTERN +ibool +dict_col_name_is_reserved( +/*======================*/ + const char* name) /*!< in: column name */ +{ + /* This check reminds that if a new system column is added to + the program, it should be dealt with here. */ +#if DATA_N_SYS_COLS != 3 +#error "DATA_N_SYS_COLS != 3" +#endif + + static const char* reserved_names[] = { + "DB_ROW_ID", "DB_TRX_ID", "DB_ROLL_PTR" + }; + + ulint i; + + for (i = 0; i < UT_ARR_SIZE(reserved_names); i++) { + if (innobase_strcasecmp(name, reserved_names[i]) == 0) { + + return(TRUE); + } + } + + return(FALSE); +} + +#if 1 /* This function is not very accurate at determining + whether an UNDO record will be too big. See innodb_4k.test, + Bug 13336585, for a testcase that shows an index that can + be created but cannot be updated. */ + +/****************************************************************//** +If an undo log record for this table might not fit on a single page, +return TRUE. +@return TRUE if the undo log record could become too big */ +static +ibool +dict_index_too_big_for_undo( +/*========================*/ + const dict_table_t* table, /*!< in: table */ + const dict_index_t* new_index) /*!< in: index */ +{ + /* Make sure that all column prefixes will fit in the undo log record + in trx_undo_page_report_modify() right after trx_undo_page_init(). */ + + ulint i; + const dict_index_t* clust_index + = dict_table_get_first_index(table); + ulint undo_page_len + = TRX_UNDO_PAGE_HDR - TRX_UNDO_PAGE_HDR_SIZE + + 2 /* next record pointer */ + + 1 /* type_cmpl */ + + 11 /* trx->undo_no */ + 11 /* table->id */ + + 1 /* rec_get_info_bits() */ + + 11 /* DB_TRX_ID */ + + 11 /* DB_ROLL_PTR */ + + 10 + FIL_PAGE_DATA_END /* trx_undo_left() */ + + 2/* pointer to previous undo log record */; + + /* FTS index consists of auxiliary tables, they shall be excluded from + index row size check */ + if (new_index->type & DICT_FTS) { + return(false); + } + + if (!clust_index) { + ut_a(dict_index_is_clust(new_index)); + clust_index = new_index; + } + + /* Add the size of the ordering columns in the + clustered index. */ + for (i = 0; i < clust_index->n_uniq; i++) { + const dict_col_t* col + = dict_index_get_nth_col(clust_index, i); + + /* Use the maximum output size of + mach_write_compressed(), although the encoded + length should always fit in 2 bytes. */ + undo_page_len += 5 + dict_col_get_max_size(col); + } + + /* Add the old values of the columns to be updated. + First, the amount and the numbers of the columns. + These are written by mach_write_compressed() whose + maximum output length is 5 bytes. However, given that + the quantities are below REC_MAX_N_FIELDS (10 bits), + the maximum length is 2 bytes per item. */ + undo_page_len += 2 * (dict_table_get_n_cols(table) + 1); + + for (i = 0; i < clust_index->n_def; i++) { + const dict_col_t* col + = dict_index_get_nth_col(clust_index, i); + ulint max_size + = dict_col_get_max_size(col); + ulint fixed_size + = dict_col_get_fixed_size(col, + dict_table_is_comp(table)); + ulint max_prefix + = col->max_prefix; + + if (fixed_size) { + /* Fixed-size columns are stored locally. */ + max_size = fixed_size; + } else if (max_size <= BTR_EXTERN_FIELD_REF_SIZE * 2) { + /* Short columns are stored locally. */ + } else if (!col->ord_part + || (col->max_prefix + < (ulint) DICT_MAX_FIELD_LEN_BY_FORMAT(table))) { + /* See if col->ord_part would be set + because of new_index. Also check if the new + index could have longer prefix on columns + that already had ord_part set */ + ulint j; + + for (j = 0; j < new_index->n_uniq; j++) { + if (dict_index_get_nth_col( + new_index, j) == col) { + const dict_field_t* field + = dict_index_get_nth_field( + new_index, j); + + if (field->prefix_len + > col->max_prefix) { + max_prefix = + field->prefix_len; + } + + goto is_ord_part; + } + } + + if (col->ord_part) { + goto is_ord_part; + } + + /* This is not an ordering column in any index. + Thus, it can be stored completely externally. */ + max_size = BTR_EXTERN_FIELD_REF_SIZE; + } else { + ulint max_field_len; +is_ord_part: + max_field_len = DICT_MAX_FIELD_LEN_BY_FORMAT(table); + + /* This is an ordering column in some index. + A long enough prefix must be written to the + undo log. See trx_undo_page_fetch_ext(). */ + max_size = ut_min(max_size, max_field_len); + + /* We only store the needed prefix length in undo log */ + if (max_prefix) { + ut_ad(dict_table_get_format(table) + >= UNIV_FORMAT_B); + + max_size = ut_min(max_prefix, max_size); + } + + max_size += BTR_EXTERN_FIELD_REF_SIZE; + } + + undo_page_len += 5 + max_size; + } + + return(undo_page_len >= UNIV_PAGE_SIZE); +} +#endif + +/****************************************************************//** +If a record of this index might not fit on a single B-tree page, +return TRUE. +@return TRUE if the index record could become too big */ +static +ibool +dict_index_too_big_for_tree( +/*========================*/ + const dict_table_t* table, /*!< in: table */ + const dict_index_t* new_index) /*!< in: index */ +{ + ulint zip_size; + ulint comp; + ulint i; + /* maximum possible storage size of a record */ + ulint rec_max_size; + /* maximum allowed size of a record on a leaf page */ + ulint page_rec_max; + /* maximum allowed size of a node pointer record */ + ulint page_ptr_max; + + /* FTS index consists of auxiliary tables, they shall be excluded from + index row size check */ + if (new_index->type & DICT_FTS) { + return(false); + } + + DBUG_EXECUTE_IF( + "ib_force_create_table", + return(FALSE);); + + comp = dict_table_is_comp(table); + zip_size = dict_table_zip_size(table); + + if (zip_size && zip_size < UNIV_PAGE_SIZE) { + /* On a compressed page, two records must fit in the + uncompressed page modification log. On compressed + pages with zip_size == UNIV_PAGE_SIZE, this limit will + never be reached. */ + ut_ad(comp); + /* The maximum allowed record size is the size of + an empty page, minus a byte for recoding the heap + number in the page modification log. The maximum + allowed node pointer size is half that. */ + page_rec_max = page_zip_empty_size(new_index->n_fields, + zip_size); + if (page_rec_max) { + page_rec_max--; + } + page_ptr_max = page_rec_max / 2; + /* On a compressed page, there is a two-byte entry in + the dense page directory for every record. But there + is no record header. */ + rec_max_size = 2; + } else { + /* The maximum allowed record size is half a B-tree + page. No additional sparse page directory entry will + be generated for the first few user records. */ + page_rec_max = page_get_free_space_of_empty(comp) / 2; + page_ptr_max = page_rec_max; + /* Each record has a header. */ + rec_max_size = comp + ? REC_N_NEW_EXTRA_BYTES + : REC_N_OLD_EXTRA_BYTES; + } + + if (comp) { + /* Include the "null" flags in the + maximum possible record size. */ + rec_max_size += UT_BITS_IN_BYTES(new_index->n_nullable); + } else { + /* For each column, include a 2-byte offset and a + "null" flag. The 1-byte format is only used in short + records that do not contain externally stored columns. + Such records could never exceed the page limit, even + when using the 2-byte format. */ + rec_max_size += 2 * new_index->n_fields; + } + + /* Compute the maximum possible record size. */ + for (i = 0; i < new_index->n_fields; i++) { + const dict_field_t* field + = dict_index_get_nth_field(new_index, i); + const dict_col_t* col + = dict_field_get_col(field); + ulint field_max_size; + ulint field_ext_max_size; + + /* In dtuple_convert_big_rec(), variable-length columns + that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2 + may be chosen for external storage. + + Fixed-length columns, and all columns of secondary + index records are always stored inline. */ + + /* Determine the maximum length of the index field. + The field_ext_max_size should be computed as the worst + case in rec_get_converted_size_comp() for + REC_STATUS_ORDINARY records. */ + + field_max_size = dict_col_get_fixed_size(col, comp); + if (field_max_size) { + /* dict_index_add_col() should guarantee this */ + ut_ad(!field->prefix_len + || field->fixed_len == field->prefix_len); + /* Fixed lengths are not encoded + in ROW_FORMAT=COMPACT. */ + field_ext_max_size = 0; + goto add_field_size; + } + + field_max_size = dict_col_get_max_size(col); + field_ext_max_size = field_max_size < 256 ? 1 : 2; + + if (field->prefix_len) { + if (field->prefix_len < field_max_size) { + field_max_size = field->prefix_len; + } + } else if (field_max_size > BTR_EXTERN_FIELD_REF_SIZE * 2 + && dict_index_is_clust(new_index)) { + + /* In the worst case, we have a locally stored + column of BTR_EXTERN_FIELD_REF_SIZE * 2 bytes. + The length can be stored in one byte. If the + column were stored externally, the lengths in + the clustered index page would be + BTR_EXTERN_FIELD_REF_SIZE and 2. */ + field_max_size = BTR_EXTERN_FIELD_REF_SIZE * 2; + field_ext_max_size = 1; + } + + if (comp) { + /* Add the extra size for ROW_FORMAT=COMPACT. + For ROW_FORMAT=REDUNDANT, these bytes were + added to rec_max_size before this loop. */ + rec_max_size += field_ext_max_size; + } +add_field_size: + rec_max_size += field_max_size; + + /* Check the size limit on leaf pages. */ + if (UNIV_UNLIKELY(rec_max_size >= page_rec_max)) { + + return(TRUE); + } + + /* Check the size limit on non-leaf pages. Records + stored in non-leaf B-tree pages consist of the unique + columns of the record (the key columns of the B-tree) + and a node pointer field. When we have processed the + unique columns, rec_max_size equals the size of the + node pointer record minus the node pointer column. */ + if (i + 1 == dict_index_get_n_unique_in_tree(new_index) + && rec_max_size + REC_NODE_PTR_SIZE >= page_ptr_max) { + + return(TRUE); + } + } + + return(FALSE); +} + +/**********************************************************************//** +Adds an index to the dictionary cache. +@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */ +UNIV_INTERN +dberr_t +dict_index_add_to_cache( +/*====================*/ + dict_table_t* table, /*!< in: table on which the index is */ + dict_index_t* index, /*!< in, own: index; NOTE! The index memory + object is freed in this function! */ + ulint page_no,/*!< in: root page number of the index */ + ibool strict) /*!< in: TRUE=refuse to create the index + if records could be too big to fit in + an B-tree page */ +{ + dict_index_t* new_index; + ulint n_ord; + ulint i; + + ut_ad(index); + ut_ad(mutex_own(&(dict_sys->mutex))); + ut_ad(index->n_def == index->n_fields); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + ut_ad(!dict_index_is_online_ddl(index)); + + ut_ad(mem_heap_validate(index->heap)); + ut_a(!dict_index_is_clust(index) + || UT_LIST_GET_LEN(table->indexes) == 0); + + if (!dict_index_find_cols(table, index)) { + + dict_mem_index_free(index); + return(DB_CORRUPTION); + } + + /* Build the cache internal representation of the index, + containing also the added system fields */ + + if (index->type == DICT_FTS) { + new_index = dict_index_build_internal_fts(table, index); + } else if (dict_index_is_clust(index)) { + new_index = dict_index_build_internal_clust(table, index); + } else { + new_index = dict_index_build_internal_non_clust(table, index); + } + + /* Set the n_fields value in new_index to the actual defined + number of fields in the cache internal representation */ + + new_index->n_fields = new_index->n_def; + new_index->trx_id = index->trx_id; + + if (dict_index_too_big_for_tree(table, new_index)) { + + if (strict) { +too_big: + dict_mem_index_free(new_index); + dict_mem_index_free(index); + return(DB_TOO_BIG_RECORD); + } else { + + ib_warn_row_too_big(table); + + } + } + + if (dict_index_is_univ(index)) { + n_ord = new_index->n_fields; + } else { + n_ord = new_index->n_uniq; + } + +#if 1 /* The following code predetermines whether to call + dict_index_too_big_for_undo(). This function is not + accurate. See innodb_4k.test, Bug 13336585, for a + testcase that shows an index that can be created but + cannot be updated. */ + + switch (dict_table_get_format(table)) { + case UNIV_FORMAT_A: + /* ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT store + prefixes of externally stored columns locally within + the record. There are no special considerations for + the undo log record size. */ + goto undo_size_ok; + + case UNIV_FORMAT_B: + /* In ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPRESSED, + column prefix indexes require that prefixes of + externally stored columns are written to the undo log. + This may make the undo log record bigger than the + record on the B-tree page. The maximum size of an + undo log record is the page size. That must be + checked for below. */ + break; + +#if UNIV_FORMAT_B != UNIV_FORMAT_MAX +# error "UNIV_FORMAT_B != UNIV_FORMAT_MAX" +#endif + } + + for (i = 0; i < n_ord; i++) { + const dict_field_t* field + = dict_index_get_nth_field(new_index, i); + const dict_col_t* col + = dict_field_get_col(field); + + /* In dtuple_convert_big_rec(), variable-length columns + that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2 + may be chosen for external storage. If the column appears + in an ordering column of an index, a longer prefix determined + by dict_max_field_len_store_undo() will be copied to the undo + log by trx_undo_page_report_modify() and + trx_undo_page_fetch_ext(). It suffices to check the + capacity of the undo log whenever new_index includes + a column prefix on a column that may be stored externally. */ + + if (field->prefix_len /* prefix index */ + && (!col->ord_part /* not yet ordering column */ + || field->prefix_len > col->max_prefix) + && !dict_col_get_fixed_size(col, TRUE) /* variable-length */ + && dict_col_get_max_size(col) + > BTR_EXTERN_FIELD_REF_SIZE * 2 /* long enough */) { + + if (dict_index_too_big_for_undo(table, new_index)) { + /* An undo log record might not fit in + a single page. Refuse to create this index. */ + + goto too_big; + } + + break; + } + } + +undo_size_ok: +#endif + /* Flag the ordering columns and also set column max_prefix */ + + for (i = 0; i < n_ord; i++) { + const dict_field_t* field + = dict_index_get_nth_field(new_index, i); + + field->col->ord_part = 1; + + if (field->prefix_len > field->col->max_prefix) { + field->col->max_prefix = field->prefix_len; + } + } + + if (!dict_index_is_univ(new_index)) { + + new_index->stat_n_diff_key_vals = + static_cast<ib_uint64_t*>(mem_heap_zalloc( + new_index->heap, + dict_index_get_n_unique(new_index) + * sizeof(*new_index->stat_n_diff_key_vals))); + + new_index->stat_n_sample_sizes = + static_cast<ib_uint64_t*>(mem_heap_zalloc( + new_index->heap, + dict_index_get_n_unique(new_index) + * sizeof(*new_index->stat_n_sample_sizes))); + + new_index->stat_n_non_null_key_vals = + static_cast<ib_uint64_t*>(mem_heap_zalloc( + new_index->heap, + dict_index_get_n_unique(new_index) + * sizeof(*new_index->stat_n_non_null_key_vals))); + } + + new_index->stat_index_size = 1; + new_index->stat_n_leaf_pages = 1; + + /* Add the new index as the last index for the table */ + + UT_LIST_ADD_LAST(indexes, table->indexes, new_index); + new_index->table = table; + new_index->table_name = table->name; + new_index->search_info = btr_search_info_create(new_index->heap); + + new_index->page = page_no; + rw_lock_create(index_tree_rw_lock_key, &new_index->lock, + dict_index_is_ibuf(index) + ? SYNC_IBUF_INDEX_TREE : SYNC_INDEX_TREE); + + dict_sys->size += mem_heap_get_size(new_index->heap); + + dict_mem_index_free(index); + + return(DB_SUCCESS); +} + +/**********************************************************************//** +Removes an index from the dictionary cache. */ +static +void +dict_index_remove_from_cache_low( +/*=============================*/ + dict_table_t* table, /*!< in/out: table */ + dict_index_t* index, /*!< in, own: index */ + ibool lru_evict) /*!< in: TRUE if index being evicted + to make room in the table LRU list */ +{ + ulint size; + ulint retries = 0; + btr_search_t* info; + + ut_ad(table && index); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + ut_ad(mutex_own(&(dict_sys->mutex))); + + /* No need to acquire the dict_index_t::lock here because + there can't be any active operations on this index (or table). */ + + if (index->online_log) { + ut_ad(index->online_status == ONLINE_INDEX_CREATION); + row_log_free(index->online_log); + } + + /* We always create search info whether or not adaptive + hash index is enabled or not. */ + info = btr_search_get_info(index); + ut_ad(info); + + /* We are not allowed to free the in-memory index struct + dict_index_t until all entries in the adaptive hash index + that point to any of the page belonging to his b-tree index + are dropped. This is so because dropping of these entries + require access to dict_index_t struct. To avoid such scenario + We keep a count of number of such pages in the search_info and + only free the dict_index_t struct when this count drops to + zero. See also: dict_table_can_be_evicted() */ + + do { + ulint ref_count = btr_search_info_get_ref_count(info, + index); + + if (ref_count == 0) { + break; + } + + /* Sleep for 10ms before trying again. */ + os_thread_sleep(10000); + ++retries; + + if (retries % 500 == 0) { + /* No luck after 5 seconds of wait. */ + fprintf(stderr, "InnoDB: Error: Waited for" + " %lu secs for hash index" + " ref_count (%lu) to drop" + " to 0.\n" + "index: \"%s\"" + " table: \"%s\"\n", + retries/100, + ref_count, + index->name, + table->name); + } + + /* To avoid a hang here we commit suicide if the + ref_count doesn't drop to zero in 600 seconds. */ + if (retries >= 60000) { + ut_error; + } + } while (srv_shutdown_state == SRV_SHUTDOWN_NONE || !lru_evict); + + rw_lock_free(&index->lock); + + /* Remove the index from the list of indexes of the table */ + UT_LIST_REMOVE(indexes, table->indexes, index); + + size = mem_heap_get_size(index->heap); + + ut_ad(dict_sys->size >= size); + + dict_sys->size -= size; + + dict_mem_index_free(index); +} + +/**********************************************************************//** +Removes an index from the dictionary cache. */ +UNIV_INTERN +void +dict_index_remove_from_cache( +/*=========================*/ + dict_table_t* table, /*!< in/out: table */ + dict_index_t* index) /*!< in, own: index */ +{ + dict_index_remove_from_cache_low(table, index, FALSE); +} + +/*******************************************************************//** +Tries to find column names for the index and sets the col field of the +index. +@return TRUE if the column names were found */ +static +ibool +dict_index_find_cols( +/*=================*/ + dict_table_t* table, /*!< in: table */ + dict_index_t* index) /*!< in: index */ +{ + ulint i; + + ut_ad(table && index); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + ut_ad(mutex_own(&(dict_sys->mutex))); + + for (i = 0; i < index->n_fields; i++) { + ulint j; + dict_field_t* field = dict_index_get_nth_field(index, i); + + for (j = 0; j < table->n_cols; j++) { + if (!strcmp(dict_table_get_col_name(table, j), + field->name)) { + field->col = dict_table_get_nth_col(table, j); + + goto found; + } + } + +#ifdef UNIV_DEBUG + /* It is an error not to find a matching column. */ + fputs("InnoDB: Error: no matching column for ", stderr); + ut_print_name(stderr, NULL, FALSE, field->name); + fputs(" in ", stderr); + dict_index_name_print(stderr, NULL, index); + fputs("!\n", stderr); +#endif /* UNIV_DEBUG */ + return(FALSE); + +found: + ; + } + + return(TRUE); +} +#endif /* !UNIV_HOTBACKUP */ + +/*******************************************************************//** +Adds a column to index. */ +UNIV_INTERN +void +dict_index_add_col( +/*===============*/ + dict_index_t* index, /*!< in/out: index */ + const dict_table_t* table, /*!< in: table */ + dict_col_t* col, /*!< in: column */ + ulint prefix_len) /*!< in: column prefix length */ +{ + dict_field_t* field; + const char* col_name; + + col_name = dict_table_get_col_name(table, dict_col_get_no(col)); + + dict_mem_index_add_field(index, col_name, prefix_len); + + field = dict_index_get_nth_field(index, index->n_def - 1); + + field->col = col; + field->fixed_len = (unsigned int) dict_col_get_fixed_size( + col, dict_table_is_comp(table)); + + if (prefix_len && field->fixed_len > prefix_len) { + field->fixed_len = (unsigned int) prefix_len; + } + + /* Long fixed-length fields that need external storage are treated as + variable-length fields, so that the extern flag can be embedded in + the length word. */ + + if (field->fixed_len > DICT_MAX_FIXED_COL_LEN) { + field->fixed_len = 0; + } +#if DICT_MAX_FIXED_COL_LEN != 768 + /* The comparison limit above must be constant. If it were + changed, the disk format of some fixed-length columns would + change, which would be a disaster. */ +# error "DICT_MAX_FIXED_COL_LEN != 768" +#endif + + if (!(col->prtype & DATA_NOT_NULL)) { + index->n_nullable++; + } +} + +#ifndef UNIV_HOTBACKUP +/*******************************************************************//** +Copies fields contained in index2 to index1. */ +static +void +dict_index_copy( +/*============*/ + dict_index_t* index1, /*!< in: index to copy to */ + dict_index_t* index2, /*!< in: index to copy from */ + const dict_table_t* table, /*!< in: table */ + ulint start, /*!< in: first position to copy */ + ulint end) /*!< in: last position to copy */ +{ + dict_field_t* field; + ulint i; + + /* Copy fields contained in index2 */ + + for (i = start; i < end; i++) { + + field = dict_index_get_nth_field(index2, i); + dict_index_add_col(index1, table, field->col, + field->prefix_len); + } +} + +/*******************************************************************//** +Copies types of fields contained in index to tuple. */ +UNIV_INTERN +void +dict_index_copy_types( +/*==================*/ + dtuple_t* tuple, /*!< in/out: data tuple */ + const dict_index_t* index, /*!< in: index */ + ulint n_fields) /*!< in: number of + field types to copy */ +{ + ulint i; + + if (dict_index_is_univ(index)) { + dtuple_set_types_binary(tuple, n_fields); + + return; + } + + for (i = 0; i < n_fields; i++) { + const dict_field_t* ifield; + dtype_t* dfield_type; + + ifield = dict_index_get_nth_field(index, i); + dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i)); + dict_col_copy_type(dict_field_get_col(ifield), dfield_type); + } +} + +/*******************************************************************//** +Copies types of columns contained in table to tuple and sets all +fields of the tuple to the SQL NULL value. This function should +be called right after dtuple_create(). */ +UNIV_INTERN +void +dict_table_copy_types( +/*==================*/ + dtuple_t* tuple, /*!< in/out: data tuple */ + const dict_table_t* table) /*!< in: table */ +{ + ulint i; + + for (i = 0; i < dtuple_get_n_fields(tuple); i++) { + + dfield_t* dfield = dtuple_get_nth_field(tuple, i); + dtype_t* dtype = dfield_get_type(dfield); + + dfield_set_null(dfield); + dict_col_copy_type(dict_table_get_nth_col(table, i), dtype); + } +} + +/******************************************************************** +Wait until all the background threads of the given table have exited, i.e., +bg_threads == 0. Note: bg_threads_mutex must be reserved when +calling this. */ +UNIV_INTERN +void +dict_table_wait_for_bg_threads_to_exit( +/*===================================*/ + dict_table_t* table, /*< in: table */ + ulint delay) /*< in: time in microseconds to wait between + checks of bg_threads. */ +{ + fts_t* fts = table->fts; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&fts->bg_threads_mutex)); +#endif /* UNIV_SYNC_DEBUG */ + + while (fts->bg_threads > 0) { + mutex_exit(&fts->bg_threads_mutex); + + os_thread_sleep(delay); + + mutex_enter(&fts->bg_threads_mutex); + } +} + +/*******************************************************************//** +Builds the internal dictionary cache representation for a clustered +index, containing also system fields not defined by the user. +@return own: the internal representation of the clustered index */ +static +dict_index_t* +dict_index_build_internal_clust( +/*============================*/ + const dict_table_t* table, /*!< in: table */ + dict_index_t* index) /*!< in: user representation of + a clustered index */ +{ + dict_index_t* new_index; + dict_field_t* field; + ulint trx_id_pos; + ulint i; + ibool* indexed; + + ut_ad(table && index); + ut_ad(dict_index_is_clust(index)); + ut_ad(mutex_own(&(dict_sys->mutex))); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + + /* Create a new index object with certainly enough fields */ + new_index = dict_mem_index_create(table->name, + index->name, table->space, + index->type, + index->n_fields + table->n_cols); + + /* Copy other relevant data from the old index struct to the new + struct: it inherits the values */ + + new_index->n_user_defined_cols = index->n_fields; + + new_index->id = index->id; + btr_search_index_init(new_index); + + /* Copy the fields of index */ + dict_index_copy(new_index, index, table, 0, index->n_fields); + + if (dict_index_is_univ(index)) { + /* No fixed number of fields determines an entry uniquely */ + + new_index->n_uniq = REC_MAX_N_FIELDS; + + } else if (dict_index_is_unique(index)) { + /* Only the fields defined so far are needed to identify + the index entry uniquely */ + + new_index->n_uniq = new_index->n_def; + } else { + /* Also the row id is needed to identify the entry */ + new_index->n_uniq = 1 + new_index->n_def; + } + + new_index->trx_id_offset = 0; + + if (!dict_index_is_ibuf(index)) { + /* Add system columns, trx id first */ + + trx_id_pos = new_index->n_def; + +#if DATA_ROW_ID != 0 +# error "DATA_ROW_ID != 0" +#endif +#if DATA_TRX_ID != 1 +# error "DATA_TRX_ID != 1" +#endif +#if DATA_ROLL_PTR != 2 +# error "DATA_ROLL_PTR != 2" +#endif + + if (!dict_index_is_unique(index)) { + dict_index_add_col(new_index, table, + dict_table_get_sys_col( + table, DATA_ROW_ID), + 0); + trx_id_pos++; + } + + dict_index_add_col(new_index, table, + dict_table_get_sys_col(table, DATA_TRX_ID), + 0); + + dict_index_add_col(new_index, table, + dict_table_get_sys_col(table, + DATA_ROLL_PTR), + 0); + + for (i = 0; i < trx_id_pos; i++) { + + ulint fixed_size = dict_col_get_fixed_size( + dict_index_get_nth_col(new_index, i), + dict_table_is_comp(table)); + + if (fixed_size == 0) { + new_index->trx_id_offset = 0; + + break; + } + + if (dict_index_get_nth_field(new_index, i)->prefix_len + > 0) { + new_index->trx_id_offset = 0; + + break; + } + + /* Add fixed_size to new_index->trx_id_offset. + Because the latter is a bit-field, an overflow + can theoretically occur. Check for it. */ + fixed_size += new_index->trx_id_offset; + + new_index->trx_id_offset = fixed_size; + + if (new_index->trx_id_offset != fixed_size) { + /* Overflow. Pretend that this is a + variable-length PRIMARY KEY. */ + ut_ad(0); + new_index->trx_id_offset = 0; + break; + } + } + + } + + /* Remember the table columns already contained in new_index */ + indexed = static_cast<ibool*>( + mem_zalloc(table->n_cols * sizeof *indexed)); + + /* Mark the table columns already contained in new_index */ + for (i = 0; i < new_index->n_def; i++) { + + field = dict_index_get_nth_field(new_index, i); + + /* If there is only a prefix of the column in the index + field, do not mark the column as contained in the index */ + + if (field->prefix_len == 0) { + + indexed[field->col->ind] = TRUE; + } + } + + /* Add to new_index non-system columns of table not yet included + there */ + for (i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) { + + dict_col_t* col = dict_table_get_nth_col(table, i); + ut_ad(col->mtype != DATA_SYS); + + if (!indexed[col->ind]) { + dict_index_add_col(new_index, table, col, 0); + } + } + + mem_free(indexed); + + ut_ad(dict_index_is_ibuf(index) + || (UT_LIST_GET_LEN(table->indexes) == 0)); + + new_index->cached = TRUE; + + return(new_index); +} + +/*******************************************************************//** +Builds the internal dictionary cache representation for a non-clustered +index, containing also system fields not defined by the user. +@return own: the internal representation of the non-clustered index */ +static +dict_index_t* +dict_index_build_internal_non_clust( +/*================================*/ + const dict_table_t* table, /*!< in: table */ + dict_index_t* index) /*!< in: user representation of + a non-clustered index */ +{ + dict_field_t* field; + dict_index_t* new_index; + dict_index_t* clust_index; + ulint i; + ibool* indexed; + + ut_ad(table && index); + ut_ad(!dict_index_is_clust(index)); + ut_ad(mutex_own(&(dict_sys->mutex))); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + + /* The clustered index should be the first in the list of indexes */ + clust_index = UT_LIST_GET_FIRST(table->indexes); + + ut_ad(clust_index); + ut_ad(dict_index_is_clust(clust_index)); + ut_ad(!dict_index_is_univ(clust_index)); + + /* Create a new index */ + new_index = dict_mem_index_create( + table->name, index->name, index->space, index->type, + index->n_fields + 1 + clust_index->n_uniq); + + /* Copy other relevant data from the old index + struct to the new struct: it inherits the values */ + + new_index->n_user_defined_cols = index->n_fields; + + new_index->id = index->id; + btr_search_index_init(new_index); + + /* Copy fields from index to new_index */ + dict_index_copy(new_index, index, table, 0, index->n_fields); + + /* Remember the table columns already contained in new_index */ + indexed = static_cast<ibool*>( + mem_zalloc(table->n_cols * sizeof *indexed)); + + /* Mark the table columns already contained in new_index */ + for (i = 0; i < new_index->n_def; i++) { + + field = dict_index_get_nth_field(new_index, i); + + /* If there is only a prefix of the column in the index + field, do not mark the column as contained in the index */ + + if (field->prefix_len == 0) { + + indexed[field->col->ind] = TRUE; + } + } + + /* Add to new_index the columns necessary to determine the clustered + index entry uniquely */ + + for (i = 0; i < clust_index->n_uniq; i++) { + + field = dict_index_get_nth_field(clust_index, i); + + if (!indexed[field->col->ind]) { + dict_index_add_col(new_index, table, field->col, + field->prefix_len); + } + } + + mem_free(indexed); + + if (dict_index_is_unique(index)) { + new_index->n_uniq = index->n_fields; + } else { + new_index->n_uniq = new_index->n_def; + } + + /* Set the n_fields value in new_index to the actual defined + number of fields */ + + new_index->n_fields = new_index->n_def; + + new_index->cached = TRUE; + + return(new_index); +} + +/*********************************************************************** +Builds the internal dictionary cache representation for an FTS index. +@return own: the internal representation of the FTS index */ +static +dict_index_t* +dict_index_build_internal_fts( +/*==========================*/ + dict_table_t* table, /*!< in: table */ + dict_index_t* index) /*!< in: user representation of an FTS index */ +{ + dict_index_t* new_index; + + ut_ad(table && index); + ut_ad(index->type == DICT_FTS); +#ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&(dict_sys->mutex))); +#endif /* UNIV_SYNC_DEBUG */ + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + + /* Create a new index */ + new_index = dict_mem_index_create( + table->name, index->name, index->space, index->type, + index->n_fields); + + /* Copy other relevant data from the old index struct to the new + struct: it inherits the values */ + + new_index->n_user_defined_cols = index->n_fields; + + new_index->id = index->id; + btr_search_index_init(new_index); + + /* Copy fields from index to new_index */ + dict_index_copy(new_index, index, table, 0, index->n_fields); + + new_index->n_uniq = 0; + new_index->cached = TRUE; + + if (table->fts->cache == NULL) { + table->fts->cache = fts_cache_create(table); + } + + rw_lock_x_lock(&table->fts->cache->init_lock); + /* Notify the FTS cache about this index. */ + fts_cache_index_cache_create(table, new_index); + rw_lock_x_unlock(&table->fts->cache->init_lock); + + return(new_index); +} +/*====================== FOREIGN KEY PROCESSING ========================*/ + +/*********************************************************************//** +Checks if a table is referenced by foreign keys. +@return TRUE if table is referenced by a foreign key */ +UNIV_INTERN +ibool +dict_table_is_referenced_by_foreign_key( +/*====================================*/ + const dict_table_t* table) /*!< in: InnoDB table */ +{ + return(!table->referenced_set.empty()); +} + +/*********************************************************************//** +Check if the index is referenced by a foreign key, if TRUE return foreign +else return NULL +@return pointer to foreign key struct if index is defined for foreign +key, otherwise NULL */ +UNIV_INTERN +dict_foreign_t* +dict_table_get_referenced_constraint( +/*=================================*/ + dict_table_t* table, /*!< in: InnoDB table */ + dict_index_t* index) /*!< in: InnoDB index */ +{ + dict_foreign_t* foreign; + + ut_ad(index != NULL); + ut_ad(table != NULL); + + for (dict_foreign_set::iterator it = table->referenced_set.begin(); + it != table->referenced_set.end(); + ++it) { + + foreign = *it; + + if (foreign->referenced_index == index) { + + return(foreign); + } + } + + return(NULL); +} + +/*********************************************************************//** +Checks if a index is defined for a foreign key constraint. Index is a part +of a foreign key constraint if the index is referenced by foreign key +or index is a foreign key index. +@return pointer to foreign key struct if index is defined for foreign +key, otherwise NULL */ +UNIV_INTERN +dict_foreign_t* +dict_table_get_foreign_constraint( +/*==============================*/ + dict_table_t* table, /*!< in: InnoDB table */ + dict_index_t* index) /*!< in: InnoDB index */ +{ + dict_foreign_t* foreign; + + ut_ad(index != NULL); + ut_ad(table != NULL); + + for (dict_foreign_set::iterator it = table->foreign_set.begin(); + it != table->foreign_set.end(); + ++it) { + + foreign = *it; + + if (foreign->foreign_index == index) { + + return(foreign); + } + } + + return(NULL); +} + +/**********************************************************************//** +Removes a foreign constraint struct from the dictionary cache. */ +UNIV_INTERN +void +dict_foreign_remove_from_cache( +/*===========================*/ + dict_foreign_t* foreign) /*!< in, own: foreign constraint */ +{ + ut_ad(mutex_own(&(dict_sys->mutex))); + ut_a(foreign); + + if (foreign->referenced_table != NULL) { + foreign->referenced_table->referenced_set.erase(foreign); + } + + if (foreign->foreign_table != NULL) { + foreign->foreign_table->foreign_set.erase(foreign); + } + + dict_foreign_free(foreign); +} + +/**********************************************************************//** +Looks for the foreign constraint from the foreign and referenced lists +of a table. +@return foreign constraint */ +static +dict_foreign_t* +dict_foreign_find( +/*==============*/ + dict_table_t* table, /*!< in: table object */ + dict_foreign_t* foreign) /*!< in: foreign constraint */ +{ + ut_ad(mutex_own(&(dict_sys->mutex))); + + ut_ad(dict_foreign_set_validate(table->foreign_set)); + ut_ad(dict_foreign_set_validate(table->referenced_set)); + + dict_foreign_set::iterator it = table->foreign_set.find(foreign); + + if (it != table->foreign_set.end()) { + return(*it); + } + + it = table->referenced_set.find(foreign); + + if (it != table->referenced_set.end()) { + return(*it); + } + + return(NULL); +} + + +/*********************************************************************//** +Tries to find an index whose first fields are the columns in the array, +in the same order and is not marked for deletion and is not the same +as types_idx. +@return matching index, NULL if not found */ +UNIV_INTERN +dict_index_t* +dict_foreign_find_index( +/*====================*/ + const dict_table_t* table, /*!< in: table */ + const char** col_names, + /*!< in: column names, or NULL + to use table->col_names */ + const char** columns,/*!< in: array of column names */ + ulint n_cols, /*!< in: number of columns */ + const dict_index_t* types_idx, + /*!< in: NULL or an index + whose types the column types + must match */ + bool check_charsets, + /*!< in: whether to check + charsets. only has an effect + if types_idx != NULL */ + ulint check_null) + /*!< in: nonzero if none of + the columns must be declared + NOT NULL */ +{ + dict_index_t* index; + + ut_ad(mutex_own(&dict_sys->mutex)); + + index = dict_table_get_first_index(table); + + while (index != NULL) { + if (types_idx != index + && !(index->type & DICT_FTS) + && !index->to_be_dropped + && dict_foreign_qualify_index( + table, col_names, columns, n_cols, + index, types_idx, + check_charsets, check_null)) { + return(index); + } + + index = dict_table_get_next_index(index); + } + + return(NULL); +} + +/**********************************************************************//** +Report an error in a foreign key definition. */ +static +void +dict_foreign_error_report_low( +/*==========================*/ + FILE* file, /*!< in: output stream */ + const char* name) /*!< in: table name */ +{ + rewind(file); + ut_print_timestamp(file); + fprintf(file, " Error in foreign key constraint of table %s:\n", + name); +} + +/**********************************************************************//** +Report an error in a foreign key definition. */ +static +void +dict_foreign_error_report( +/*======================*/ + FILE* file, /*!< in: output stream */ + dict_foreign_t* fk, /*!< in: foreign key constraint */ + const char* msg) /*!< in: the error message */ +{ + mutex_enter(&dict_foreign_err_mutex); + dict_foreign_error_report_low(file, fk->foreign_table_name); + fputs(msg, file); + fputs(" Constraint:\n", file); + dict_print_info_on_foreign_key_in_create_format(file, NULL, fk, TRUE); + putc('\n', file); + if (fk->foreign_index) { + fputs("The index in the foreign key in table is ", file); + ut_print_name(file, NULL, FALSE, fk->foreign_index->name); + fputs("\n" + "See " REFMAN "innodb-foreign-key-constraints.html\n" + "for correct foreign key definition.\n", + file); + } + mutex_exit(&dict_foreign_err_mutex); +} + +/**********************************************************************//** +Adds a foreign key constraint object to the dictionary cache. May free +the object if there already is an object with the same identifier in. +At least one of the foreign table and the referenced table must already +be in the dictionary cache! +@return DB_SUCCESS or error code */ +UNIV_INTERN +dberr_t +dict_foreign_add_to_cache( +/*======================*/ + dict_foreign_t* foreign, + /*!< in, own: foreign key constraint */ + const char** col_names, + /*!< in: column names, or NULL to use + foreign->foreign_table->col_names */ + bool check_charsets, + /*!< in: whether to check charset + compatibility */ + dict_err_ignore_t ignore_err) + /*!< in: error to be ignored */ +{ + dict_table_t* for_table; + dict_table_t* ref_table; + dict_foreign_t* for_in_cache = NULL; + dict_index_t* index; + ibool added_to_referenced_list= FALSE; + FILE* ef = dict_foreign_err_file; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + for_table = dict_table_check_if_in_cache_low( + foreign->foreign_table_name_lookup); + + ref_table = dict_table_check_if_in_cache_low( + foreign->referenced_table_name_lookup); + ut_a(for_table || ref_table); + + if (for_table) { + for_in_cache = dict_foreign_find(for_table, foreign); + } + + if (!for_in_cache && ref_table) { + for_in_cache = dict_foreign_find(ref_table, foreign); + } + + if (for_in_cache) { + /* Free the foreign object */ + mem_heap_free(foreign->heap); + } else { + for_in_cache = foreign; + } + + if (ref_table && !for_in_cache->referenced_table) { + index = dict_foreign_find_index( + ref_table, NULL, + for_in_cache->referenced_col_names, + for_in_cache->n_fields, for_in_cache->foreign_index, + check_charsets, false); + + if (index == NULL + && !(ignore_err & DICT_ERR_IGNORE_FK_NOKEY)) { + dict_foreign_error_report( + ef, for_in_cache, + "there is no index in referenced table" + " which would contain\n" + "the columns as the first columns," + " or the data types in the\n" + "referenced table do not match" + " the ones in table."); + + if (for_in_cache == foreign) { + mem_heap_free(foreign->heap); + } + + return(DB_CANNOT_ADD_CONSTRAINT); + } + + for_in_cache->referenced_table = ref_table; + for_in_cache->referenced_index = index; + + std::pair<dict_foreign_set::iterator, bool> ret + = ref_table->referenced_set.insert(for_in_cache); + + ut_a(ret.second); /* second is true if the insertion + took place */ + added_to_referenced_list = TRUE; + } + + if (for_table && !for_in_cache->foreign_table) { + index = dict_foreign_find_index( + for_table, col_names, + for_in_cache->foreign_col_names, + for_in_cache->n_fields, + for_in_cache->referenced_index, check_charsets, + for_in_cache->type + & (DICT_FOREIGN_ON_DELETE_SET_NULL + | DICT_FOREIGN_ON_UPDATE_SET_NULL)); + + if (index == NULL + && !(ignore_err & DICT_ERR_IGNORE_FK_NOKEY)) { + dict_foreign_error_report( + ef, for_in_cache, + "there is no index in the table" + " which would contain\n" + "the columns as the first columns," + " or the data types in the\n" + "table do not match" + " the ones in the referenced table\n" + "or one of the ON ... SET NULL columns" + " is declared NOT NULL."); + + if (for_in_cache == foreign) { + if (added_to_referenced_list) { + const dict_foreign_set::size_type n + = ref_table->referenced_set + .erase(for_in_cache); + + ut_a(n == 1); /* the number of + elements removed must + be one */ + } + + mem_heap_free(foreign->heap); + } + + return(DB_CANNOT_ADD_CONSTRAINT); + } + + for_in_cache->foreign_table = for_table; + for_in_cache->foreign_index = index; + std::pair<dict_foreign_set::iterator, bool> ret + = for_table->foreign_set.insert(for_in_cache); + + ut_a(ret.second); /* second is true if the insertion + took place */ + } + + /* We need to move the table to the non-LRU end of the table LRU + list. Otherwise it will be evicted from the cache. */ + + if (ref_table != NULL && ref_table->can_be_evicted) { + dict_table_move_from_lru_to_non_lru(ref_table); + } + + if (for_table != NULL && for_table->can_be_evicted) { + dict_table_move_from_lru_to_non_lru(for_table); + } + + ut_ad(dict_lru_validate()); + + return(DB_SUCCESS); +} + +/*********************************************************************//** +Scans from pointer onwards. Stops if is at the start of a copy of +'string' where characters are compared without case sensitivity, and +only outside `` or "" quotes. Stops also at NUL. +@return scanned up to this */ +static +const char* +dict_scan_to( +/*=========*/ + const char* ptr, /*!< in: scan from */ + const char* string) /*!< in: look for this */ +{ + char quote = '\0'; + bool escape = false; + + for (; *ptr; ptr++) { + if (*ptr == quote) { + /* Closing quote character: do not look for + starting quote or the keyword. */ + + /* If the quote character is escaped by a + backslash, ignore it. */ + if (escape) { + escape = false; + } else { + quote = '\0'; + } + } else if (quote) { + /* Within quotes: do nothing. */ + if (escape) { + escape = false; + } else if (*ptr == '\\') { + escape = true; + } + } else if (*ptr == '`' || *ptr == '"' || *ptr == '\'') { + /* Starting quote: remember the quote character. */ + quote = *ptr; + } else { + /* Outside quotes: look for the keyword. */ + ulint i; + for (i = 0; string[i]; i++) { + if (toupper((int)(unsigned char)(ptr[i])) + != toupper((int)(unsigned char) + (string[i]))) { + goto nomatch; + } + } + break; +nomatch: + ; + } + } + + return(ptr); +} + +/*********************************************************************//** +Accepts a specified string. Comparisons are case-insensitive. +@return if string was accepted, the pointer is moved after that, else +ptr is returned */ +static +const char* +dict_accept( +/*========*/ + struct charset_info_st* cs,/*!< in: the character set of ptr */ + const char* ptr, /*!< in: scan from this */ + const char* string, /*!< in: accept only this string as the next + non-whitespace string */ + ibool* success)/*!< out: TRUE if accepted */ +{ + const char* old_ptr = ptr; + const char* old_ptr2; + + *success = FALSE; + + while (my_isspace(cs, *ptr)) { + ptr++; + } + + old_ptr2 = ptr; + + ptr = dict_scan_to(ptr, string); + + if (*ptr == '\0' || old_ptr2 != ptr) { + return(old_ptr); + } + + *success = TRUE; + + return(ptr + ut_strlen(string)); +} + +/*********************************************************************//** +Scans an id. For the lexical definition of an 'id', see the code below. +Strips backquotes or double quotes from around the id. +@return scanned to */ +static +const char* +dict_scan_id( +/*=========*/ + struct charset_info_st* cs,/*!< in: the character set of ptr */ + const char* ptr, /*!< in: scanned to */ + mem_heap_t* heap, /*!< in: heap where to allocate the id + (NULL=id will not be allocated, but it + will point to string near ptr) */ + const char** id, /*!< out,own: the id; NULL if no id was + scannable */ + ibool table_id,/*!< in: TRUE=convert the allocated id + as a table name; FALSE=convert to UTF-8 */ + ibool accept_also_dot) + /*!< in: TRUE if also a dot can appear in a + non-quoted id; in a quoted id it can appear + always */ +{ + char quote = '\0'; + ulint len = 0; + const char* s; + char* str; + char* dst; + + *id = NULL; + + while (my_isspace(cs, *ptr)) { + ptr++; + } + + if (*ptr == '\0') { + + return(ptr); + } + + if (*ptr == '`' || *ptr == '"') { + quote = *ptr++; + } + + s = ptr; + + if (quote) { + for (;;) { + if (!*ptr) { + /* Syntax error */ + return(ptr); + } + if (*ptr == quote) { + ptr++; + if (*ptr != quote) { + break; + } + } + ptr++; + len++; + } + } else { + while (!my_isspace(cs, *ptr) && *ptr != '(' && *ptr != ')' + && (accept_also_dot || *ptr != '.') + && *ptr != ',' && *ptr != '\0') { + + ptr++; + } + + len = ptr - s; + } + + if (UNIV_UNLIKELY(!heap)) { + /* no heap given: id will point to source string */ + *id = s; + return(ptr); + } + + if (quote) { + char* d; + + str = d = static_cast<char*>( + mem_heap_alloc(heap, len + 1)); + + while (len--) { + if ((*d++ = *s++) == quote) { + s++; + } + } + *d++ = 0; + len = d - str; + ut_ad(*s == quote); + ut_ad(s + 1 == ptr); + } else { + str = mem_heap_strdupl(heap, s, len); + } + + if (!table_id) { +convert_id: + /* Convert the identifier from connection character set + to UTF-8. */ + len = 3 * len + 1; + *id = dst = static_cast<char*>(mem_heap_alloc(heap, len)); + + innobase_convert_from_id(cs, dst, str, len); + } else if (!strncmp(str, srv_mysql50_table_name_prefix, + sizeof(srv_mysql50_table_name_prefix) - 1)) { + /* This is a pre-5.1 table name + containing chars other than [A-Za-z0-9]. + Discard the prefix and use raw UTF-8 encoding. */ + str += sizeof(srv_mysql50_table_name_prefix) - 1; + len -= sizeof(srv_mysql50_table_name_prefix) - 1; + goto convert_id; + } else { + /* Encode using filename-safe characters. */ + len = 5 * len + 1; + *id = dst = static_cast<char*>(mem_heap_alloc(heap, len)); + + innobase_convert_from_table_id(cs, dst, str, len); + } + + return(ptr); +} + +/*********************************************************************//** +Tries to scan a column name. +@return scanned to */ +static +const char* +dict_scan_col( +/*==========*/ + struct charset_info_st* cs, /*!< in: the character set of ptr */ + const char* ptr, /*!< in: scanned to */ + ibool* success,/*!< out: TRUE if success */ + dict_table_t* table, /*!< in: table in which the column is */ + const dict_col_t** column, /*!< out: pointer to column if success */ + mem_heap_t* heap, /*!< in: heap where to allocate */ + const char** name) /*!< out,own: the column name; + NULL if no name was scannable */ +{ + ulint i; + + *success = FALSE; + + ptr = dict_scan_id(cs, ptr, heap, name, FALSE, TRUE); + + if (*name == NULL) { + + return(ptr); /* Syntax error */ + } + + if (table == NULL) { + *success = TRUE; + *column = NULL; + } else { + for (i = 0; i < dict_table_get_n_cols(table); i++) { + + const char* col_name = dict_table_get_col_name( + table, i); + + if (0 == innobase_strcasecmp(col_name, *name)) { + /* Found */ + + *success = TRUE; + *column = dict_table_get_nth_col(table, i); + strcpy((char*) *name, col_name); + + break; + } + } + } + + return(ptr); +} + + +/*********************************************************************//** +Open a table from its database and table name, this is currently used by +foreign constraint parser to get the referenced table. +@return complete table name with database and table name, allocated from +heap memory passed in */ +UNIV_INTERN +char* +dict_get_referenced_table( +/*======================*/ + const char* name, /*!< in: foreign key table name */ + const char* database_name, /*!< in: table db name */ + ulint database_name_len, /*!< in: db name length */ + const char* table_name, /*!< in: table name */ + ulint table_name_len, /*!< in: table name length */ + dict_table_t** table, /*!< out: table object or NULL */ + mem_heap_t* heap) /*!< in/out: heap memory */ +{ + char* ref; + const char* db_name; + + if (!database_name) { + /* Use the database name of the foreign key table */ + + db_name = name; + database_name_len = dict_get_db_name_len(name); + } else { + db_name = database_name; + } + + /* Copy database_name, '/', table_name, '\0' */ + ref = static_cast<char*>( + mem_heap_alloc(heap, database_name_len + table_name_len + 2)); + + memcpy(ref, db_name, database_name_len); + ref[database_name_len] = '/'; + memcpy(ref + database_name_len + 1, table_name, table_name_len + 1); + + /* Values; 0 = Store and compare as given; case sensitive + 1 = Store and compare in lower; case insensitive + 2 = Store as given, compare in lower; case semi-sensitive */ + if (innobase_get_lower_case_table_names() == 2) { + innobase_casedn_str(ref); + *table = dict_table_get_low(ref); + memcpy(ref, db_name, database_name_len); + ref[database_name_len] = '/'; + memcpy(ref + database_name_len + 1, table_name, table_name_len + 1); + + } else { +#ifndef __WIN__ + if (innobase_get_lower_case_table_names() == 1) { + innobase_casedn_str(ref); + } +#else + innobase_casedn_str(ref); +#endif /* !__WIN__ */ + *table = dict_table_get_low(ref); + } + + return(ref); +} +/*********************************************************************//** +Scans a table name from an SQL string. +@return scanned to */ +static +const char* +dict_scan_table_name( +/*=================*/ + struct charset_info_st* cs,/*!< in: the character set of ptr */ + const char* ptr, /*!< in: scanned to */ + dict_table_t** table, /*!< out: table object or NULL */ + const char* name, /*!< in: foreign key table name */ + ibool* success,/*!< out: TRUE if ok name found */ + mem_heap_t* heap, /*!< in: heap where to allocate the id */ + const char** ref_name)/*!< out,own: the table name; + NULL if no name was scannable */ +{ + const char* database_name = NULL; + ulint database_name_len = 0; + const char* table_name = NULL; + const char* scan_name; + + *success = FALSE; + *table = NULL; + + ptr = dict_scan_id(cs, ptr, heap, &scan_name, TRUE, FALSE); + + if (scan_name == NULL) { + + return(ptr); /* Syntax error */ + } + + if (*ptr == '.') { + /* We scanned the database name; scan also the table name */ + + ptr++; + + database_name = scan_name; + database_name_len = strlen(database_name); + + ptr = dict_scan_id(cs, ptr, heap, &table_name, TRUE, FALSE); + + if (table_name == NULL) { + + return(ptr); /* Syntax error */ + } + } else { + /* To be able to read table dumps made with InnoDB-4.0.17 or + earlier, we must allow the dot separator between the database + name and the table name also to appear within a quoted + identifier! InnoDB used to print a constraint as: + ... REFERENCES `databasename.tablename` ... + starting from 4.0.18 it is + ... REFERENCES `databasename`.`tablename` ... */ + const char* s; + + for (s = scan_name; *s; s++) { + if (*s == '.') { + database_name = scan_name; + database_name_len = s - scan_name; + scan_name = ++s; + break;/* to do: multiple dots? */ + } + } + + table_name = scan_name; + } + + *ref_name = dict_get_referenced_table( + name, database_name, database_name_len, + table_name, strlen(table_name), table, heap); + + *success = TRUE; + return(ptr); +} + +/*********************************************************************//** +Skips one id. The id is allowed to contain also '.'. +@return scanned to */ +static +const char* +dict_skip_word( +/*===========*/ + struct charset_info_st* cs,/*!< in: the character set of ptr */ + const char* ptr, /*!< in: scanned to */ + ibool* success)/*!< out: TRUE if success, FALSE if just spaces + left in string or a syntax error */ +{ + const char* start; + + *success = FALSE; + + ptr = dict_scan_id(cs, ptr, NULL, &start, FALSE, TRUE); + + if (start) { + *success = TRUE; + } + + return(ptr); +} + +/*********************************************************************//** +Removes MySQL comments from an SQL string. A comment is either +(a) '#' to the end of the line, +(b) '--[space]' to the end of the line, or +(c) '[slash][asterisk]' till the next '[asterisk][slash]' (like the familiar +C comment syntax). +@return own: SQL string stripped from comments; the caller must free +this with mem_free()! */ +static +char* +dict_strip_comments( +/*================*/ + const char* sql_string, /*!< in: SQL string */ + size_t sql_length) /*!< in: length of sql_string */ +{ + char* str; + const char* sptr; + const char* eptr = sql_string + sql_length; + char* ptr; + /* unclosed quote character (0 if none) */ + char quote = 0; + bool escape = false; + + DBUG_ENTER("dict_strip_comments"); + + DBUG_PRINT("dict_strip_comments", ("%s", sql_string)); + + str = static_cast<char*>(mem_alloc(sql_length + 1)); + + sptr = sql_string; + ptr = str; + + for (;;) { +scan_more: + if (sptr >= eptr || *sptr == '\0') { +end_of_string: + *ptr = '\0'; + + ut_a(ptr <= str + sql_length); + + DBUG_PRINT("dict_strip_comments", ("%s", str)); + DBUG_RETURN(str); + } + + if (*sptr == quote) { + /* Closing quote character: do not look for + starting quote or comments. */ + + /* If the quote character is escaped by a + backslash, ignore it. */ + if (escape) { + escape = false; + } else { + quote = 0; + } + } else if (quote) { + /* Within quotes: do not look for + starting quotes or comments. */ + if (escape) { + escape = false; + } else if (*sptr == '\\') { + escape = true; + } + } else if (*sptr == '"' || *sptr == '`' || *sptr == '\'') { + /* Starting quote: remember the quote character. */ + quote = *sptr; + } else if (*sptr == '#' + || (sptr[0] == '-' && sptr[1] == '-' + && sptr[2] == ' ')) { + for (;;) { + if (++sptr >= eptr) { + goto end_of_string; + } + + /* In Unix a newline is 0x0A while in Windows + it is 0x0D followed by 0x0A */ + + switch (*sptr) { + case (char) 0X0A: + case (char) 0x0D: + case '\0': + goto scan_more; + } + } + } else if (!quote && *sptr == '/' && *(sptr + 1) == '*') { + sptr += 2; + for (;;) { + if (sptr >= eptr) { + goto end_of_string; + } + + switch (*sptr) { + case '\0': + goto scan_more; + case '*': + if (sptr[1] == '/') { + sptr += 2; + goto scan_more; + } + } + + sptr++; + } + } + + *ptr = *sptr; + + ptr++; + sptr++; + } +} + +/*********************************************************************//** +Finds the highest [number] for foreign key constraints of the table. Looks +only at the >= 4.0.18-format id's, which are of the form +databasename/tablename_ibfk_[number]. +@return highest number, 0 if table has no new format foreign key constraints */ +UNIV_INTERN +ulint +dict_table_get_highest_foreign_id( +/*==============================*/ + dict_table_t* table) /*!< in: table in the dictionary memory cache */ +{ + dict_foreign_t* foreign; + char* endp; + ulint biggest_id = 0; + ulint id; + ulint len; + + ut_a(table); + + len = ut_strlen(table->name); + + for (dict_foreign_set::iterator it = table->foreign_set.begin(); + it != table->foreign_set.end(); + ++it) { + foreign = *it; + + if (ut_strlen(foreign->id) > ((sizeof dict_ibfk) - 1) + len + && 0 == ut_memcmp(foreign->id, table->name, len) + && 0 == ut_memcmp(foreign->id + len, + dict_ibfk, (sizeof dict_ibfk) - 1) + && foreign->id[len + ((sizeof dict_ibfk) - 1)] != '0') { + /* It is of the >= 4.0.18 format */ + + id = strtoul(foreign->id + len + + ((sizeof dict_ibfk) - 1), + &endp, 10); + if (*endp == '\0') { + ut_a(id != biggest_id); + + if (id > biggest_id) { + biggest_id = id; + } + } + } + } + + return(biggest_id); +} + +/*********************************************************************//** +Reports a simple foreign key create clause syntax error. */ +static +void +dict_foreign_report_syntax_err( +/*===========================*/ + const char* name, /*!< in: table name */ + const char* start_of_latest_foreign, + /*!< in: start of the foreign key clause + in the SQL string */ + const char* ptr) /*!< in: place of the syntax error */ +{ + ut_ad(!srv_read_only_mode); + + FILE* ef = dict_foreign_err_file; + + mutex_enter(&dict_foreign_err_mutex); + dict_foreign_error_report_low(ef, name); + fprintf(ef, "%s:\nSyntax error close to:\n%s\n", + start_of_latest_foreign, ptr); + mutex_exit(&dict_foreign_err_mutex); +} + +/*********************************************************************//** +Scans a table create SQL string and adds to the data dictionary the foreign +key constraints declared in the string. This function should be called after +the indexes for a table have been created. Each foreign key constraint must +be accompanied with indexes in both participating tables. The indexes are +allowed to contain more fields than mentioned in the constraint. +@return error code or DB_SUCCESS */ +static +dberr_t +dict_create_foreign_constraints_low( +/*================================*/ + trx_t* trx, /*!< in: transaction */ + mem_heap_t* heap, /*!< in: memory heap */ + struct charset_info_st* cs,/*!< in: the character set of sql_string */ + const char* sql_string, + /*!< in: CREATE TABLE or ALTER TABLE statement + where foreign keys are declared like: + FOREIGN KEY (a, b) REFERENCES table2(c, d), + table2 can be written also with the database + name before it: test.table2; the default + database is the database of parameter name */ + const char* name, /*!< in: table full name in the normalized form + database_name/table_name */ + ibool reject_fks) + /*!< in: if TRUE, fail with error code + DB_CANNOT_ADD_CONSTRAINT if any foreign + keys are found. */ +{ + dict_table_t* table; + dict_table_t* referenced_table; + dict_table_t* table_to_alter; + ulint highest_id_so_far = 0; + ulint number = 1; + dict_index_t* index; + dict_foreign_t* foreign; + const char* ptr = sql_string; + const char* start_of_latest_foreign = sql_string; + FILE* ef = dict_foreign_err_file; + const char* constraint_name; + ibool success; + dberr_t error; + const char* ptr1; + const char* ptr2; + ulint i; + ulint j; + ibool is_on_delete; + ulint n_on_deletes; + ulint n_on_updates; + const dict_col_t*columns[500]; + const char* column_names[500]; + const char* referenced_table_name; + dict_foreign_set local_fk_set; + dict_foreign_set_free local_fk_set_free(local_fk_set); + + ut_ad(!srv_read_only_mode); + ut_ad(mutex_own(&(dict_sys->mutex))); + + table = dict_table_get_low(name); + + if (table == NULL) { + mutex_enter(&dict_foreign_err_mutex); + dict_foreign_error_report_low(ef, name); + fprintf(ef, + "Cannot find the table in the internal" + " data dictionary of InnoDB.\n" + "Create table statement:\n%s\n", sql_string); + mutex_exit(&dict_foreign_err_mutex); + + return(DB_ERROR); + } + + /* First check if we are actually doing an ALTER TABLE, and in that + case look for the table being altered */ + + ptr = dict_accept(cs, ptr, "ALTER", &success); + + if (!success) { + + goto loop; + } + + ptr = dict_accept(cs, ptr, "TABLE", &success); + + if (!success) { + + goto loop; + } + + /* We are doing an ALTER TABLE: scan the table name we are altering */ + + ptr = dict_scan_table_name(cs, ptr, &table_to_alter, name, + &success, heap, &referenced_table_name); + if (!success) { + fprintf(stderr, + "InnoDB: Error: could not find" + " the table being ALTERED in:\n%s\n", + sql_string); + + return(DB_ERROR); + } + + /* Starting from 4.0.18 and 4.1.2, we generate foreign key id's in the + format databasename/tablename_ibfk_[number], where [number] is local + to the table; look for the highest [number] for table_to_alter, so + that we can assign to new constraints higher numbers. */ + + /* If we are altering a temporary table, the table name after ALTER + TABLE does not correspond to the internal table name, and + table_to_alter is NULL. TODO: should we fix this somehow? */ + + if (table_to_alter == NULL) { + highest_id_so_far = 0; + } else { + highest_id_so_far = dict_table_get_highest_foreign_id( + table_to_alter); + } + + number = highest_id_so_far + 1; + /* Scan for foreign key declarations in a loop */ +loop: + /* Scan either to "CONSTRAINT" or "FOREIGN", whichever is closer */ + + ptr1 = dict_scan_to(ptr, "CONSTRAINT"); + ptr2 = dict_scan_to(ptr, "FOREIGN"); + + constraint_name = NULL; + + if (ptr1 < ptr2) { + /* The user may have specified a constraint name. Pick it so + that we can store 'databasename/constraintname' as the id of + of the constraint to system tables. */ + ptr = ptr1; + + ptr = dict_accept(cs, ptr, "CONSTRAINT", &success); + + ut_a(success); + + if (!my_isspace(cs, *ptr) && *ptr != '"' && *ptr != '`') { + goto loop; + } + + while (my_isspace(cs, *ptr)) { + ptr++; + } + + /* read constraint name unless got "CONSTRAINT FOREIGN" */ + if (ptr != ptr2) { + ptr = dict_scan_id(cs, ptr, heap, + &constraint_name, FALSE, FALSE); + } + } else { + ptr = ptr2; + } + + if (*ptr == '\0') { + /* The proper way to reject foreign keys for temporary + tables would be to split the lexing and syntactical + analysis of foreign key clauses from the actual adding + of them, so that ha_innodb.cc could first parse the SQL + command, determine if there are any foreign keys, and + if so, immediately reject the command if the table is a + temporary one. For now, this kludge will work. */ + if (reject_fks && !local_fk_set.empty()) { + + return(DB_CANNOT_ADD_CONSTRAINT); + } + + /**********************************************************/ + /* The following call adds the foreign key constraints + to the data dictionary system tables on disk */ + + error = dict_create_add_foreigns_to_dictionary( + local_fk_set, table, trx); + + if (error == DB_SUCCESS) { + + table->foreign_set.insert(local_fk_set.begin(), + local_fk_set.end()); + std::for_each(local_fk_set.begin(), + local_fk_set.end(), + dict_foreign_add_to_referenced_table()); + local_fk_set.clear(); + } + return(error); + } + + start_of_latest_foreign = ptr; + + ptr = dict_accept(cs, ptr, "FOREIGN", &success); + + if (!success) { + goto loop; + } + + if (!my_isspace(cs, *ptr)) { + goto loop; + } + + ptr = dict_accept(cs, ptr, "KEY", &success); + + if (!success) { + goto loop; + } + + ptr = dict_accept(cs, ptr, "(", &success); + + if (!success) { + /* MySQL allows also an index id before the '('; we + skip it */ + ptr = dict_skip_word(cs, ptr, &success); + + if (!success) { + dict_foreign_report_syntax_err( + name, start_of_latest_foreign, ptr); + + return(DB_CANNOT_ADD_CONSTRAINT); + } + + ptr = dict_accept(cs, ptr, "(", &success); + + if (!success) { + /* We do not flag a syntax error here because in an + ALTER TABLE we may also have DROP FOREIGN KEY abc */ + + goto loop; + } + } + + i = 0; + + /* Scan the columns in the first list */ +col_loop1: + ut_a(i < (sizeof column_names) / sizeof *column_names); + ptr = dict_scan_col(cs, ptr, &success, table, columns + i, + heap, column_names + i); + if (!success) { + mutex_enter(&dict_foreign_err_mutex); + dict_foreign_error_report_low(ef, name); + fprintf(ef, "%s:\nCannot resolve column name close to:\n%s\n", + start_of_latest_foreign, ptr); + mutex_exit(&dict_foreign_err_mutex); + + return(DB_CANNOT_ADD_CONSTRAINT); + } + + i++; + + ptr = dict_accept(cs, ptr, ",", &success); + + if (success) { + goto col_loop1; + } + + ptr = dict_accept(cs, ptr, ")", &success); + + if (!success) { + dict_foreign_report_syntax_err( + name, start_of_latest_foreign, ptr); + return(DB_CANNOT_ADD_CONSTRAINT); + } + + /* Try to find an index which contains the columns + as the first fields and in the right order. There is + no need to check column type match (on types_idx), since + the referenced table can be NULL if foreign_key_checks is + set to 0 */ + + index = dict_foreign_find_index( + table, NULL, column_names, i, NULL, TRUE, FALSE); + + if (!index) { + mutex_enter(&dict_foreign_err_mutex); + dict_foreign_error_report_low(ef, name); + fputs("There is no index in table ", ef); + ut_print_name(ef, NULL, TRUE, name); + fprintf(ef, " where the columns appear\n" + "as the first columns. Constraint:\n%s\n" + "See " REFMAN "innodb-foreign-key-constraints.html\n" + "for correct foreign key definition.\n", + start_of_latest_foreign); + mutex_exit(&dict_foreign_err_mutex); + + return(DB_CHILD_NO_INDEX); + } + ptr = dict_accept(cs, ptr, "REFERENCES", &success); + + if (!success || !my_isspace(cs, *ptr)) { + dict_foreign_report_syntax_err( + name, start_of_latest_foreign, ptr); + return(DB_CANNOT_ADD_CONSTRAINT); + } + + /* Let us create a constraint struct */ + + foreign = dict_mem_foreign_create(); + + if (constraint_name) { + ulint db_len; + + /* Catenate 'databasename/' to the constraint name specified + by the user: we conceive the constraint as belonging to the + same MySQL 'database' as the table itself. We store the name + to foreign->id. */ + + db_len = dict_get_db_name_len(table->name); + + foreign->id = static_cast<char*>(mem_heap_alloc( + foreign->heap, db_len + strlen(constraint_name) + 2)); + + ut_memcpy(foreign->id, table->name, db_len); + foreign->id[db_len] = '/'; + strcpy(foreign->id + db_len + 1, constraint_name); + } + + if (foreign->id == NULL) { + error = dict_create_add_foreign_id(&number, + table->name, foreign); + if (error != DB_SUCCESS) { + dict_foreign_free(foreign); + return(error); + } + } + + std::pair<dict_foreign_set::iterator, bool> ret + = local_fk_set.insert(foreign); + + if (!ret.second) { + /* A duplicate foreign key name has been found */ + dict_foreign_free(foreign); + return(DB_CANNOT_ADD_CONSTRAINT); + } + + foreign->foreign_table = table; + foreign->foreign_table_name = mem_heap_strdup( + foreign->heap, table->name); + dict_mem_foreign_table_name_lookup_set(foreign, TRUE); + + foreign->foreign_index = index; + foreign->n_fields = (unsigned int) i; + + foreign->foreign_col_names = static_cast<const char**>( + mem_heap_alloc(foreign->heap, i * sizeof(void*))); + + for (i = 0; i < foreign->n_fields; i++) { + foreign->foreign_col_names[i] = mem_heap_strdup( + foreign->heap, + dict_table_get_col_name(table, + dict_col_get_no(columns[i]))); + } + + ptr = dict_scan_table_name(cs, ptr, &referenced_table, name, + &success, heap, &referenced_table_name); + + /* Note that referenced_table can be NULL if the user has suppressed + checking of foreign key constraints! */ + + if (!success || (!referenced_table && trx->check_foreigns)) { + mutex_enter(&dict_foreign_err_mutex); + dict_foreign_error_report_low(ef, name); + fprintf(ef, "%s:\nCannot resolve table name close to:\n" + "%s\n", + start_of_latest_foreign, ptr); + mutex_exit(&dict_foreign_err_mutex); + + return(DB_CANNOT_ADD_CONSTRAINT); + } + + ptr = dict_accept(cs, ptr, "(", &success); + + if (!success) { + dict_foreign_report_syntax_err(name, start_of_latest_foreign, + ptr); + return(DB_CANNOT_ADD_CONSTRAINT); + } + + /* Scan the columns in the second list */ + i = 0; + +col_loop2: + ptr = dict_scan_col(cs, ptr, &success, referenced_table, columns + i, + heap, column_names + i); + i++; + + if (!success) { + + mutex_enter(&dict_foreign_err_mutex); + dict_foreign_error_report_low(ef, name); + fprintf(ef, "%s:\nCannot resolve column name close to:\n" + "%s\n", + start_of_latest_foreign, ptr); + mutex_exit(&dict_foreign_err_mutex); + + return(DB_CANNOT_ADD_CONSTRAINT); + } + + ptr = dict_accept(cs, ptr, ",", &success); + + if (success) { + goto col_loop2; + } + + ptr = dict_accept(cs, ptr, ")", &success); + + if (!success || foreign->n_fields != i) { + + dict_foreign_report_syntax_err(name, start_of_latest_foreign, + ptr); + return(DB_CANNOT_ADD_CONSTRAINT); + } + + n_on_deletes = 0; + n_on_updates = 0; + +scan_on_conditions: + /* Loop here as long as we can find ON ... conditions */ + + ptr = dict_accept(cs, ptr, "ON", &success); + + if (!success) { + + goto try_find_index; + } + + ptr = dict_accept(cs, ptr, "DELETE", &success); + + if (!success) { + ptr = dict_accept(cs, ptr, "UPDATE", &success); + + if (!success) { + + dict_foreign_report_syntax_err( + name, start_of_latest_foreign, ptr); + return(DB_CANNOT_ADD_CONSTRAINT); + } + + is_on_delete = FALSE; + n_on_updates++; + } else { + is_on_delete = TRUE; + n_on_deletes++; + } + + ptr = dict_accept(cs, ptr, "RESTRICT", &success); + + if (success) { + goto scan_on_conditions; + } + + ptr = dict_accept(cs, ptr, "CASCADE", &success); + + if (success) { + if (is_on_delete) { + foreign->type |= DICT_FOREIGN_ON_DELETE_CASCADE; + } else { + foreign->type |= DICT_FOREIGN_ON_UPDATE_CASCADE; + } + + goto scan_on_conditions; + } + + ptr = dict_accept(cs, ptr, "NO", &success); + + if (success) { + ptr = dict_accept(cs, ptr, "ACTION", &success); + + if (!success) { + dict_foreign_report_syntax_err( + name, start_of_latest_foreign, ptr); + + return(DB_CANNOT_ADD_CONSTRAINT); + } + + if (is_on_delete) { + foreign->type |= DICT_FOREIGN_ON_DELETE_NO_ACTION; + } else { + foreign->type |= DICT_FOREIGN_ON_UPDATE_NO_ACTION; + } + + goto scan_on_conditions; + } + + ptr = dict_accept(cs, ptr, "SET", &success); + + if (!success) { + dict_foreign_report_syntax_err(name, start_of_latest_foreign, + ptr); + return(DB_CANNOT_ADD_CONSTRAINT); + } + + ptr = dict_accept(cs, ptr, "NULL", &success); + + if (!success) { + dict_foreign_report_syntax_err(name, start_of_latest_foreign, + ptr); + return(DB_CANNOT_ADD_CONSTRAINT); + } + + for (j = 0; j < foreign->n_fields; j++) { + if ((dict_index_get_nth_col(foreign->foreign_index, j)->prtype) + & DATA_NOT_NULL) { + + /* It is not sensible to define SET NULL + if the column is not allowed to be NULL! */ + + mutex_enter(&dict_foreign_err_mutex); + dict_foreign_error_report_low(ef, name); + fprintf(ef, "%s:\n" + "You have defined a SET NULL condition" + " though some of the\n" + "columns are defined as NOT NULL.\n", + start_of_latest_foreign); + mutex_exit(&dict_foreign_err_mutex); + + return(DB_CANNOT_ADD_CONSTRAINT); + } + } + + if (is_on_delete) { + foreign->type |= DICT_FOREIGN_ON_DELETE_SET_NULL; + } else { + foreign->type |= DICT_FOREIGN_ON_UPDATE_SET_NULL; + } + + goto scan_on_conditions; + +try_find_index: + if (n_on_deletes > 1 || n_on_updates > 1) { + /* It is an error to define more than 1 action */ + + mutex_enter(&dict_foreign_err_mutex); + dict_foreign_error_report_low(ef, name); + fprintf(ef, "%s:\n" + "You have twice an ON DELETE clause" + " or twice an ON UPDATE clause.\n", + start_of_latest_foreign); + mutex_exit(&dict_foreign_err_mutex); + + return(DB_CANNOT_ADD_CONSTRAINT); + } + + /* Try to find an index which contains the columns as the first fields + and in the right order, and the types are the same as in + foreign->foreign_index */ + + if (referenced_table) { + index = dict_foreign_find_index(referenced_table, NULL, + column_names, i, + foreign->foreign_index, + TRUE, FALSE); + if (!index) { + mutex_enter(&dict_foreign_err_mutex); + dict_foreign_error_report_low(ef, name); + fprintf(ef, "%s:\n" + "Cannot find an index in the" + " referenced table where the\n" + "referenced columns appear as the" + " first columns, or column types\n" + "in the table and the referenced table" + " do not match for constraint.\n" + "Note that the internal storage type of" + " ENUM and SET changed in\n" + "tables created with >= InnoDB-4.1.12," + " and such columns in old tables\n" + "cannot be referenced by such columns" + " in new tables.\n" + "See " REFMAN + "innodb-foreign-key-constraints.html\n" + "for correct foreign key definition.\n", + start_of_latest_foreign); + mutex_exit(&dict_foreign_err_mutex); + + return(DB_PARENT_NO_INDEX); + } + } else { + ut_a(trx->check_foreigns == FALSE); + index = NULL; + } + + foreign->referenced_index = index; + foreign->referenced_table = referenced_table; + + foreign->referenced_table_name = mem_heap_strdup( + foreign->heap, referenced_table_name); + dict_mem_referenced_table_name_lookup_set(foreign, TRUE); + + foreign->referenced_col_names = static_cast<const char**>( + mem_heap_alloc(foreign->heap, i * sizeof(void*))); + + for (i = 0; i < foreign->n_fields; i++) { + foreign->referenced_col_names[i] + = mem_heap_strdup(foreign->heap, column_names[i]); + } + + goto loop; +} +/************************************************************************** +Determines whether a string starts with the specified keyword. +@return TRUE if str starts with keyword */ +UNIV_INTERN +ibool +dict_str_starts_with_keyword( +/*=========================*/ + THD* thd, /*!< in: MySQL thread handle */ + const char* str, /*!< in: string to scan for keyword */ + const char* keyword) /*!< in: keyword to look for */ +{ + struct charset_info_st* cs = innobase_get_charset(thd); + ibool success; + + dict_accept(cs, str, keyword, &success); + return(success); +} + +/*********************************************************************//** +Scans a table create SQL string and adds to the data dictionary the foreign +key constraints declared in the string. This function should be called after +the indexes for a table have been created. Each foreign key constraint must +be accompanied with indexes in both participating tables. The indexes are +allowed to contain more fields than mentioned in the constraint. +@return error code or DB_SUCCESS */ +UNIV_INTERN +dberr_t +dict_create_foreign_constraints( +/*============================*/ + trx_t* trx, /*!< in: transaction */ + const char* sql_string, /*!< in: table create statement where + foreign keys are declared like: + FOREIGN KEY (a, b) REFERENCES + table2(c, d), table2 can be written + also with the database + name before it: test.table2; the + default database id the database of + parameter name */ + size_t sql_length, /*!< in: length of sql_string */ + const char* name, /*!< in: table full name in the + normalized form + database_name/table_name */ + ibool reject_fks) /*!< in: if TRUE, fail with error + code DB_CANNOT_ADD_CONSTRAINT if + any foreign keys are found. */ +{ + char* str; + dberr_t err; + mem_heap_t* heap; + + ut_a(trx); + ut_a(trx->mysql_thd); + + str = dict_strip_comments(sql_string, sql_length); + heap = mem_heap_create(10000); + + err = dict_create_foreign_constraints_low( + trx, heap, innobase_get_charset(trx->mysql_thd), str, name, + reject_fks); + + mem_heap_free(heap); + mem_free(str); + + return(err); +} + +/**********************************************************************//** +Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement. +@return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the +constraint id does not match */ +UNIV_INTERN +dberr_t +dict_foreign_parse_drop_constraints( +/*================================*/ + mem_heap_t* heap, /*!< in: heap from which we can + allocate memory */ + trx_t* trx, /*!< in: transaction */ + dict_table_t* table, /*!< in: table */ + ulint* n, /*!< out: number of constraints + to drop */ + const char*** constraints_to_drop) /*!< out: id's of the + constraints to drop */ +{ + ibool success; + char* str; + size_t len; + const char* ptr; + const char* id; + struct charset_info_st* cs; + + ut_a(trx); + ut_a(trx->mysql_thd); + + cs = innobase_get_charset(trx->mysql_thd); + + *n = 0; + + *constraints_to_drop = static_cast<const char**>( + mem_heap_alloc(heap, 1000 * sizeof(char*))); + + ptr = innobase_get_stmt(trx->mysql_thd, &len); + + str = dict_strip_comments(ptr, len); + + ptr = str; + + ut_ad(mutex_own(&(dict_sys->mutex))); +loop: + ptr = dict_scan_to(ptr, "DROP"); + + if (*ptr == '\0') { + mem_free(str); + + return(DB_SUCCESS); + } + + ptr = dict_accept(cs, ptr, "DROP", &success); + + if (!my_isspace(cs, *ptr)) { + + goto loop; + } + + ptr = dict_accept(cs, ptr, "FOREIGN", &success); + + if (!success || !my_isspace(cs, *ptr)) { + + goto loop; + } + + ptr = dict_accept(cs, ptr, "KEY", &success); + + if (!success) { + + goto syntax_error; + } + + ptr = dict_scan_id(cs, ptr, heap, &id, FALSE, TRUE); + + if (id == NULL) { + + goto syntax_error; + } + + ut_a(*n < 1000); + (*constraints_to_drop)[*n] = id; + (*n)++; + + if (std::find_if(table->foreign_set.begin(), + table->foreign_set.end(), + dict_foreign_matches_id(id)) + == table->foreign_set.end()) { + + if (!srv_read_only_mode) { + FILE* ef = dict_foreign_err_file; + + mutex_enter(&dict_foreign_err_mutex); + rewind(ef); + ut_print_timestamp(ef); + fputs(" Error in dropping of a foreign key " + "constraint of table ", ef); + ut_print_name(ef, NULL, TRUE, table->name); + fputs(",\nin SQL command\n", ef); + fputs(str, ef); + fputs("\nCannot find a constraint with the " + "given id ", ef); + ut_print_name(ef, NULL, FALSE, id); + fputs(".\n", ef); + mutex_exit(&dict_foreign_err_mutex); + } + + mem_free(str); + + return(DB_CANNOT_DROP_CONSTRAINT); + } + + goto loop; + +syntax_error: + if (!srv_read_only_mode) { + FILE* ef = dict_foreign_err_file; + + mutex_enter(&dict_foreign_err_mutex); + rewind(ef); + ut_print_timestamp(ef); + fputs(" Syntax error in dropping of a" + " foreign key constraint of table ", ef); + ut_print_name(ef, NULL, TRUE, table->name); + fprintf(ef, ",\n" + "close to:\n%s\n in SQL command\n%s\n", ptr, str); + mutex_exit(&dict_foreign_err_mutex); + } + + mem_free(str); + + return(DB_CANNOT_DROP_CONSTRAINT); +} + +/*==================== END OF FOREIGN KEY PROCESSING ====================*/ + +/**********************************************************************//** +Returns an index object if it is found in the dictionary cache. +Assumes that dict_sys->mutex is already being held. +@return index, NULL if not found */ +UNIV_INTERN +dict_index_t* +dict_index_get_if_in_cache_low( +/*===========================*/ + index_id_t index_id) /*!< in: index id */ +{ + ut_ad(mutex_own(&(dict_sys->mutex))); + + return(dict_index_find_on_id_low(index_id)); +} + +#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG +/**********************************************************************//** +Returns an index object if it is found in the dictionary cache. +@return index, NULL if not found */ +UNIV_INTERN +dict_index_t* +dict_index_get_if_in_cache( +/*=======================*/ + index_id_t index_id) /*!< in: index id */ +{ + dict_index_t* index; + + if (dict_sys == NULL) { + return(NULL); + } + + mutex_enter(&(dict_sys->mutex)); + + index = dict_index_get_if_in_cache_low(index_id); + + mutex_exit(&(dict_sys->mutex)); + + return(index); +} +#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ + +#ifdef UNIV_DEBUG +/**********************************************************************//** +Checks that a tuple has n_fields_cmp value in a sensible range, so that +no comparison can occur with the page number field in a node pointer. +@return TRUE if ok */ +UNIV_INTERN +ibool +dict_index_check_search_tuple( +/*==========================*/ + const dict_index_t* index, /*!< in: index tree */ + const dtuple_t* tuple) /*!< in: tuple used in a search */ +{ + ut_a(index); + ut_a(dtuple_get_n_fields_cmp(tuple) + <= dict_index_get_n_unique_in_tree(index)); + return(TRUE); +} +#endif /* UNIV_DEBUG */ + +/**********************************************************************//** +Builds a node pointer out of a physical record and a page number. +@return own: node pointer */ +UNIV_INTERN +dtuple_t* +dict_index_build_node_ptr( +/*======================*/ + const dict_index_t* index, /*!< in: index */ + const rec_t* rec, /*!< in: record for which to build node + pointer */ + ulint page_no,/*!< in: page number to put in node + pointer */ + mem_heap_t* heap, /*!< in: memory heap where pointer + created */ + ulint level) /*!< in: level of rec in tree: + 0 means leaf level */ +{ + dtuple_t* tuple; + dfield_t* field; + byte* buf; + ulint n_unique; + + if (dict_index_is_univ(index)) { + /* In a universal index tree, we take the whole record as + the node pointer if the record is on the leaf level, + on non-leaf levels we remove the last field, which + contains the page number of the child page */ + + ut_a(!dict_table_is_comp(index->table)); + n_unique = rec_get_n_fields_old(rec); + + if (level > 0) { + ut_a(n_unique > 1); + n_unique--; + } + } else { + n_unique = dict_index_get_n_unique_in_tree(index); + } + + tuple = dtuple_create(heap, n_unique + 1); + + /* When searching in the tree for the node pointer, we must not do + comparison on the last field, the page number field, as on upper + levels in the tree there may be identical node pointers with a + different page number; therefore, we set the n_fields_cmp to one + less: */ + + dtuple_set_n_fields_cmp(tuple, n_unique); + + dict_index_copy_types(tuple, index, n_unique); + + buf = static_cast<byte*>(mem_heap_alloc(heap, 4)); + + mach_write_to_4(buf, page_no); + + field = dtuple_get_nth_field(tuple, n_unique); + dfield_set_data(field, buf, 4); + + dtype_set(dfield_get_type(field), DATA_SYS_CHILD, DATA_NOT_NULL, 4); + + rec_copy_prefix_to_dtuple(tuple, rec, index, n_unique, heap); + dtuple_set_info_bits(tuple, dtuple_get_info_bits(tuple) + | REC_STATUS_NODE_PTR); + + ut_ad(dtuple_check_typed(tuple)); + + return(tuple); +} + +/**********************************************************************//** +Copies an initial segment of a physical record, long enough to specify an +index entry uniquely. +@return pointer to the prefix record */ +UNIV_INTERN +rec_t* +dict_index_copy_rec_order_prefix( +/*=============================*/ + const dict_index_t* index, /*!< in: index */ + const rec_t* rec, /*!< in: record for which to + copy prefix */ + ulint* n_fields,/*!< out: number of fields copied */ + byte** buf, /*!< in/out: memory buffer for the + copied prefix, or NULL */ + ulint* buf_size)/*!< in/out: buffer size */ +{ + ulint n; + + UNIV_PREFETCH_R(rec); + + if (dict_index_is_univ(index)) { + ut_a(!dict_table_is_comp(index->table)); + n = rec_get_n_fields_old(rec); + } else { + n = dict_index_get_n_unique_in_tree(index); + } + + *n_fields = n; + return(rec_copy_prefix_to_buf(rec, index, n, buf, buf_size)); +} + +/**********************************************************************//** +Builds a typed data tuple out of a physical record. +@return own: data tuple */ +UNIV_INTERN +dtuple_t* +dict_index_build_data_tuple( +/*========================*/ + dict_index_t* index, /*!< in: index tree */ + rec_t* rec, /*!< in: record for which to build data tuple */ + ulint n_fields,/*!< in: number of data fields */ + mem_heap_t* heap) /*!< in: memory heap where tuple created */ +{ + dtuple_t* tuple; + + ut_ad(dict_table_is_comp(index->table) + || n_fields <= rec_get_n_fields_old(rec)); + + tuple = dtuple_create(heap, n_fields); + + dict_index_copy_types(tuple, index, n_fields); + + rec_copy_prefix_to_dtuple(tuple, rec, index, n_fields, heap); + + ut_ad(dtuple_check_typed(tuple)); + + return(tuple); +} + +/*********************************************************************//** +Calculates the minimum record length in an index. */ +UNIV_INTERN +ulint +dict_index_calc_min_rec_len( +/*========================*/ + const dict_index_t* index) /*!< in: index */ +{ + ulint sum = 0; + ulint i; + ulint comp = dict_table_is_comp(index->table); + + if (comp) { + ulint nullable = 0; + sum = REC_N_NEW_EXTRA_BYTES; + for (i = 0; i < dict_index_get_n_fields(index); i++) { + const dict_col_t* col + = dict_index_get_nth_col(index, i); + ulint size = dict_col_get_fixed_size(col, comp); + sum += size; + if (!size) { + size = col->len; + sum += size < 128 ? 1 : 2; + } + if (!(col->prtype & DATA_NOT_NULL)) { + nullable++; + } + } + + /* round the NULL flags up to full bytes */ + sum += UT_BITS_IN_BYTES(nullable); + + return(sum); + } + + for (i = 0; i < dict_index_get_n_fields(index); i++) { + sum += dict_col_get_fixed_size( + dict_index_get_nth_col(index, i), comp); + } + + if (sum > 127) { + sum += 2 * dict_index_get_n_fields(index); + } else { + sum += dict_index_get_n_fields(index); + } + + sum += REC_N_OLD_EXTRA_BYTES; + + return(sum); +} + +/**********************************************************************//** +Prints info of a foreign key constraint. */ +static +void +dict_foreign_print_low( +/*===================*/ + dict_foreign_t* foreign) /*!< in: foreign key constraint */ +{ + ulint i; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + fprintf(stderr, " FOREIGN KEY CONSTRAINT %s: %s (", + foreign->id, foreign->foreign_table_name); + + for (i = 0; i < foreign->n_fields; i++) { + fprintf(stderr, " %s", foreign->foreign_col_names[i]); + } + + fprintf(stderr, " )\n" + " REFERENCES %s (", + foreign->referenced_table_name); + + for (i = 0; i < foreign->n_fields; i++) { + fprintf(stderr, " %s", foreign->referenced_col_names[i]); + } + + fputs(" )\n", stderr); +} + +/**********************************************************************//** +Prints a table data. */ +UNIV_INTERN +void +dict_table_print( +/*=============*/ + dict_table_t* table) /*!< in: table */ +{ + dict_index_t* index; + ulint i; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + dict_table_stats_lock(table, RW_X_LATCH); + + if (!table->stat_initialized) { + dict_stats_update_transient(table); + } + + fprintf(stderr, + "--------------------------------------\n" + "TABLE: name %s, id %llu, flags %lx, columns %lu," + " indexes %lu, appr.rows " UINT64PF "\n" + " COLUMNS: ", + table->name, + (ullint) table->id, + (ulong) table->flags, + (ulong) table->n_cols, + (ulong) UT_LIST_GET_LEN(table->indexes), + table->stat_n_rows); + + for (i = 0; i < (ulint) table->n_cols; i++) { + dict_col_print_low(table, dict_table_get_nth_col(table, i)); + fputs("; ", stderr); + } + + putc('\n', stderr); + + index = UT_LIST_GET_FIRST(table->indexes); + + while (index != NULL) { + dict_index_print_low(index); + index = UT_LIST_GET_NEXT(indexes, index); + } + + dict_table_stats_unlock(table, RW_X_LATCH); + + std::for_each(table->foreign_set.begin(), + table->foreign_set.end(), + dict_foreign_print_low); + + std::for_each(table->referenced_set.begin(), + table->referenced_set.end(), + dict_foreign_print_low); +} + +/**********************************************************************//** +Prints a column data. */ +static +void +dict_col_print_low( +/*===============*/ + const dict_table_t* table, /*!< in: table */ + const dict_col_t* col) /*!< in: column */ +{ + dtype_t type; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + dict_col_copy_type(col, &type); + fprintf(stderr, "%s: ", dict_table_get_col_name(table, + dict_col_get_no(col))); + + dtype_print(&type); +} + +/**********************************************************************//** +Prints an index data. */ +static +void +dict_index_print_low( +/*=================*/ + dict_index_t* index) /*!< in: index */ +{ + ib_int64_t n_vals; + ulint i; + + ut_a(index->table->stat_initialized); + + ut_ad(mutex_own(&(dict_sys->mutex))); + + if (index->n_user_defined_cols > 0) { + n_vals = index->stat_n_diff_key_vals[ + index->n_user_defined_cols - 1]; + } else { + n_vals = index->stat_n_diff_key_vals[0]; + } + + fprintf(stderr, + " INDEX: name %s, id %llu, fields %lu/%lu," + " uniq %lu, type %lu\n" + " root page %lu, appr.key vals %lu," + " leaf pages %lu, size pages %lu\n" + " FIELDS: ", + index->name, + (ullint) index->id, + (ulong) index->n_user_defined_cols, + (ulong) index->n_fields, + (ulong) index->n_uniq, + (ulong) index->type, + (ulong) index->page, + (ulong) n_vals, + (ulong) index->stat_n_leaf_pages, + (ulong) index->stat_index_size); + + for (i = 0; i < index->n_fields; i++) { + dict_field_print_low(dict_index_get_nth_field(index, i)); + } + + putc('\n', stderr); + +#ifdef UNIV_BTR_PRINT + btr_print_size(index); + + btr_print_index(index, 7); +#endif /* UNIV_BTR_PRINT */ +} + +/**********************************************************************//** +Prints a field data. */ +static +void +dict_field_print_low( +/*=================*/ + const dict_field_t* field) /*!< in: field */ +{ + ut_ad(mutex_own(&(dict_sys->mutex))); + + fprintf(stderr, " %s", field->name); + + if (field->prefix_len != 0) { + fprintf(stderr, "(%lu)", (ulong) field->prefix_len); + } +} + +/**********************************************************************//** +Outputs info on a foreign key of a table in a format suitable for +CREATE TABLE. */ +UNIV_INTERN +void +dict_print_info_on_foreign_key_in_create_format( +/*============================================*/ + FILE* file, /*!< in: file where to print */ + trx_t* trx, /*!< in: transaction */ + dict_foreign_t* foreign, /*!< in: foreign key constraint */ + ibool add_newline) /*!< in: whether to add a newline */ +{ + const char* stripped_id; + ulint i; + + if (strchr(foreign->id, '/')) { + /* Strip the preceding database name from the constraint id */ + stripped_id = foreign->id + 1 + + dict_get_db_name_len(foreign->id); + } else { + stripped_id = foreign->id; + } + + putc(',', file); + + if (add_newline) { + /* SHOW CREATE TABLE wants constraints each printed nicely + on its own line, while error messages want no newlines + inserted. */ + fputs("\n ", file); + } + + fputs(" CONSTRAINT ", file); + ut_print_name(file, trx, FALSE, stripped_id); + fputs(" FOREIGN KEY (", file); + + for (i = 0;;) { + ut_print_name(file, trx, FALSE, foreign->foreign_col_names[i]); + if (++i < foreign->n_fields) { + fputs(", ", file); + } else { + break; + } + } + + fputs(") REFERENCES ", file); + + if (dict_tables_have_same_db(foreign->foreign_table_name_lookup, + foreign->referenced_table_name_lookup)) { + /* Do not print the database name of the referenced table */ + ut_print_name(file, trx, TRUE, + dict_remove_db_name( + foreign->referenced_table_name)); + } else { + ut_print_name(file, trx, TRUE, + foreign->referenced_table_name); + } + + putc(' ', file); + putc('(', file); + + for (i = 0;;) { + ut_print_name(file, trx, FALSE, + foreign->referenced_col_names[i]); + if (++i < foreign->n_fields) { + fputs(", ", file); + } else { + break; + } + } + + putc(')', file); + + if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE) { + fputs(" ON DELETE CASCADE", file); + } + + if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) { + fputs(" ON DELETE SET NULL", file); + } + + if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) { + fputs(" ON DELETE NO ACTION", file); + } + + if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) { + fputs(" ON UPDATE CASCADE", file); + } + + if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) { + fputs(" ON UPDATE SET NULL", file); + } + + if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) { + fputs(" ON UPDATE NO ACTION", file); + } +} + +/**********************************************************************//** +Outputs info on foreign keys of a table. */ +UNIV_INTERN +void +dict_print_info_on_foreign_keys( +/*============================*/ + ibool create_table_format, /*!< in: if TRUE then print in + a format suitable to be inserted into + a CREATE TABLE, otherwise in the format + of SHOW TABLE STATUS */ + FILE* file, /*!< in: file where to print */ + trx_t* trx, /*!< in: transaction */ + dict_table_t* table) /*!< in: table */ +{ + dict_foreign_t* foreign; + + mutex_enter(&(dict_sys->mutex)); + + for (dict_foreign_set::iterator it = table->foreign_set.begin(); + it != table->foreign_set.end(); + ++it) { + + foreign = *it; + + if (create_table_format) { + dict_print_info_on_foreign_key_in_create_format( + file, trx, foreign, TRUE); + } else { + ulint i; + fputs("; (", file); + + for (i = 0; i < foreign->n_fields; i++) { + if (i) { + putc(' ', file); + } + + ut_print_name(file, trx, FALSE, + foreign->foreign_col_names[i]); + } + + fputs(") REFER ", file); + ut_print_name(file, trx, TRUE, + foreign->referenced_table_name); + putc('(', file); + + for (i = 0; i < foreign->n_fields; i++) { + if (i) { + putc(' ', file); + } + ut_print_name( + file, trx, FALSE, + foreign->referenced_col_names[i]); + } + + putc(')', file); + + if (foreign->type == DICT_FOREIGN_ON_DELETE_CASCADE) { + fputs(" ON DELETE CASCADE", file); + } + + if (foreign->type == DICT_FOREIGN_ON_DELETE_SET_NULL) { + fputs(" ON DELETE SET NULL", file); + } + + if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) { + fputs(" ON DELETE NO ACTION", file); + } + + if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) { + fputs(" ON UPDATE CASCADE", file); + } + + if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) { + fputs(" ON UPDATE SET NULL", file); + } + + if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) { + fputs(" ON UPDATE NO ACTION", file); + } + } + } + + mutex_exit(&(dict_sys->mutex)); +} + +/********************************************************************//** +Displays the names of the index and the table. */ +UNIV_INTERN +void +dict_index_name_print( +/*==================*/ + FILE* file, /*!< in: output stream */ + const trx_t* trx, /*!< in: transaction */ + const dict_index_t* index) /*!< in: index to print */ +{ + fputs("index ", file); + ut_print_name(file, trx, FALSE, index->name); + fputs(" of table ", file); + ut_print_name(file, trx, TRUE, index->table_name); +} + +/**********************************************************************//** +Find a table in dict_sys->table_LRU list with specified space id +@return table if found, NULL if not */ +static +dict_table_t* +dict_find_table_by_space( +/*=====================*/ + ulint space_id) /*!< in: space ID */ +{ + dict_table_t* table; + ulint num_item; + ulint count = 0; + + ut_ad(space_id > 0); + + if (dict_sys == NULL) { + /* This could happen when it's in redo processing. */ + return(NULL); + } + + table = UT_LIST_GET_FIRST(dict_sys->table_LRU); + num_item = UT_LIST_GET_LEN(dict_sys->table_LRU); + + /* This function intentionally does not acquire mutex as it is used + by error handling code in deep call stack as last means to avoid + killing the server, so it worth to risk some consequencies for + the action. */ + while (table && count < num_item) { + if (table->space == space_id) { + return(table); + } + + table = UT_LIST_GET_NEXT(table_LRU, table); + count++; + } + + return(NULL); +} + +/**********************************************************************//** +Flags a table with specified space_id corrupted in the data dictionary +cache +@return TRUE if successful */ +UNIV_INTERN +ibool +dict_set_corrupted_by_space( +/*========================*/ + ulint space_id) /*!< in: space ID */ +{ + dict_table_t* table; + + table = dict_find_table_by_space(space_id); + + if (!table) { + return(FALSE); + } + + /* mark the table->corrupted bit only, since the caller + could be too deep in the stack for SYS_INDEXES update */ + table->corrupted = TRUE; + + return(TRUE); +} + +/**********************************************************************//** +Flags an index corrupted both in the data dictionary cache +and in the SYS_INDEXES */ +UNIV_INTERN +void +dict_set_corrupted( +/*===============*/ + dict_index_t* index, /*!< in/out: index */ + trx_t* trx, /*!< in/out: transaction */ + const char* ctx) /*!< in: context */ +{ + mem_heap_t* heap; + mtr_t mtr; + dict_index_t* sys_index; + dtuple_t* tuple; + dfield_t* dfield; + byte* buf; + char* table_name; + const char* status; + btr_cur_t cursor; + bool locked = RW_X_LATCH == trx->dict_operation_lock_mode; + + if (!locked) { + row_mysql_lock_data_dictionary(trx); + } + + ut_ad(index); + ut_ad(mutex_own(&dict_sys->mutex)); + ut_ad(!dict_table_is_comp(dict_sys->sys_tables)); + ut_ad(!dict_table_is_comp(dict_sys->sys_indexes)); + +#ifdef UNIV_SYNC_DEBUG + ut_ad(sync_thread_levels_empty_except_dict()); +#endif + + /* Mark the table as corrupted only if the clustered index + is corrupted */ + if (dict_index_is_clust(index)) { + index->table->corrupted = TRUE; + } + + if (index->type & DICT_CORRUPT) { + /* The index was already flagged corrupted. */ + ut_ad(!dict_index_is_clust(index) || index->table->corrupted); + goto func_exit; + } + + heap = mem_heap_create(sizeof(dtuple_t) + 2 * (sizeof(dfield_t) + + sizeof(que_fork_t) + sizeof(upd_node_t) + + sizeof(upd_t) + 12)); + mtr_start(&mtr); + index->type |= DICT_CORRUPT; + + sys_index = UT_LIST_GET_FIRST(dict_sys->sys_indexes->indexes); + + /* Find the index row in SYS_INDEXES */ + tuple = dtuple_create(heap, 2); + + dfield = dtuple_get_nth_field(tuple, 0); + buf = static_cast<byte*>(mem_heap_alloc(heap, 8)); + mach_write_to_8(buf, index->table->id); + dfield_set_data(dfield, buf, 8); + + dfield = dtuple_get_nth_field(tuple, 1); + buf = static_cast<byte*>(mem_heap_alloc(heap, 8)); + mach_write_to_8(buf, index->id); + dfield_set_data(dfield, buf, 8); + + dict_index_copy_types(tuple, sys_index, 2); + + btr_cur_search_to_nth_level(sys_index, 0, tuple, PAGE_CUR_LE, + BTR_MODIFY_LEAF, + &cursor, 0, __FILE__, __LINE__, &mtr); + + if (cursor.low_match == dtuple_get_n_fields(tuple)) { + /* UPDATE SYS_INDEXES SET TYPE=index->type + WHERE TABLE_ID=index->table->id AND INDEX_ID=index->id */ + ulint len; + byte* field = rec_get_nth_field_old( + btr_cur_get_rec(&cursor), + DICT_FLD__SYS_INDEXES__TYPE, &len); + if (len != 4) { + goto fail; + } + mlog_write_ulint(field, index->type, MLOG_4BYTES, &mtr); + status = "Flagged"; + } else { +fail: + status = "Unable to flag"; + } + + mtr_commit(&mtr); + mem_heap_empty(heap); + table_name = static_cast<char*>(mem_heap_alloc(heap, FN_REFLEN + 1)); + *innobase_convert_name( + table_name, FN_REFLEN, + index->table_name, strlen(index->table_name), + NULL, TRUE) = 0; + + ib_logf(IB_LOG_LEVEL_ERROR, "%s corruption of %s in table %s in %s", + status, index->name, table_name, ctx); + + mem_heap_free(heap); + +func_exit: + if (!locked) { + row_mysql_unlock_data_dictionary(trx); + } +} + +/**********************************************************************//** +Flags an index corrupted in the data dictionary cache only. This +is used mostly to mark a corrupted index when index's own dictionary +is corrupted, and we force to load such index for repair purpose */ +UNIV_INTERN +void +dict_set_corrupted_index_cache_only( +/*================================*/ + dict_index_t* index, /*!< in/out: index */ + dict_table_t* table) /*!< in/out: table */ +{ + ut_ad(index); + ut_ad(mutex_own(&dict_sys->mutex)); + ut_ad(!dict_table_is_comp(dict_sys->sys_tables)); + ut_ad(!dict_table_is_comp(dict_sys->sys_indexes)); + + /* Mark the table as corrupted only if the clustered index + is corrupted */ + if (dict_index_is_clust(index)) { + dict_table_t* corrupt_table; + + corrupt_table = table ? table : index->table; + ut_ad(!index->table || !table || index->table == table); + + if (corrupt_table) { + corrupt_table->corrupted = TRUE; + } + } + + index->type |= DICT_CORRUPT; +} + +/************************************************************************* +set is_corrupt flag by space_id*/ + +void +dict_table_set_corrupt_by_space( +/*============================*/ + ulint space_id, + ibool need_mutex) +{ + dict_table_t* table; + ibool found = FALSE; + + ut_a(space_id != 0 && space_id < SRV_LOG_SPACE_FIRST_ID); + + if (need_mutex) + mutex_enter(&(dict_sys->mutex)); + + table = UT_LIST_GET_FIRST(dict_sys->table_LRU); + + while (table) { + if (table->space == space_id) { + table->is_corrupt = TRUE; + found = TRUE; + } + + table = UT_LIST_GET_NEXT(table_LRU, table); + } + + if (need_mutex) + mutex_exit(&(dict_sys->mutex)); + + if (!found) { + fprintf(stderr, "InnoDB: space to be marked as " + "crashed was not found for id " ULINTPF ".\n", + space_id); + } +} +#endif /* !UNIV_HOTBACKUP */ + +/**********************************************************************//** +Inits dict_ind_redundant and dict_ind_compact. */ +UNIV_INTERN +void +dict_ind_init(void) +/*===============*/ +{ + dict_table_t* table; + + /* create dummy table and index for REDUNDANT infimum and supremum */ + table = dict_mem_table_create("SYS_DUMMY1", DICT_HDR_SPACE, 1, 0, 0, + true); + dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR, + DATA_ENGLISH | DATA_NOT_NULL, 8); + + dict_ind_redundant = dict_mem_index_create("SYS_DUMMY1", "SYS_DUMMY1", + DICT_HDR_SPACE, 0, 1); + dict_index_add_col(dict_ind_redundant, table, + dict_table_get_nth_col(table, 0), 0); + dict_ind_redundant->table = table; + + /* create dummy table and index for COMPACT infimum and supremum */ + table = dict_mem_table_create("SYS_DUMMY2", + DICT_HDR_SPACE, 1, + DICT_TF_COMPACT, 0, true); + dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR, + DATA_ENGLISH | DATA_NOT_NULL, 8); + dict_ind_compact = dict_mem_index_create("SYS_DUMMY2", "SYS_DUMMY2", + DICT_HDR_SPACE, 0, 1); + dict_index_add_col(dict_ind_compact, table, + dict_table_get_nth_col(table, 0), 0); + dict_ind_compact->table = table; + + /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ + dict_ind_redundant->cached = dict_ind_compact->cached = TRUE; +} + +#ifndef UNIV_HOTBACKUP +/**********************************************************************//** +Frees dict_ind_redundant and dict_ind_compact. */ +static +void +dict_ind_free(void) +/*===============*/ +{ + dict_table_t* table; + + table = dict_ind_compact->table; + dict_mem_index_free(dict_ind_compact); + dict_ind_compact = NULL; + dict_mem_table_free(table); + + table = dict_ind_redundant->table; + dict_mem_index_free(dict_ind_redundant); + dict_ind_redundant = NULL; + dict_mem_table_free(table); +} + +/**********************************************************************//** +Get index by name +@return index, NULL if does not exist */ +UNIV_INTERN +dict_index_t* +dict_table_get_index_on_name( +/*=========================*/ + dict_table_t* table, /*!< in: table */ + const char* name) /*!< in: name of the index to find */ +{ + dict_index_t* index; + + /* If name is NULL, just return */ + if (!name) { + return(NULL); + } + + index = dict_table_get_first_index(table); + + while (index != NULL) { + if (innobase_strcasecmp(index->name, name) == 0) { + + return(index); + } + + index = dict_table_get_next_index(index); + } + + return(NULL); +} + +/**********************************************************************//** +Replace the index passed in with another equivalent index in the +foreign key lists of the table. +@return whether all replacements were found */ +UNIV_INTERN +bool +dict_foreign_replace_index( +/*=======================*/ + dict_table_t* table, /*!< in/out: table */ + const char** col_names, + /*!< in: column names, or NULL + to use table->col_names */ + const dict_index_t* index) /*!< in: index to be replaced */ +{ + bool found = true; + dict_foreign_t* foreign; + + ut_ad(index->to_be_dropped); + ut_ad(index->table == table); + + for (dict_foreign_set::iterator it = table->foreign_set.begin(); + it != table->foreign_set.end(); + ++it) { + + foreign = *it; + if (foreign->foreign_index == index) { + ut_ad(foreign->foreign_table == index->table); + + dict_index_t* new_index = dict_foreign_find_index( + foreign->foreign_table, col_names, + foreign->foreign_col_names, + foreign->n_fields, index, + /*check_charsets=*/TRUE, /*check_null=*/FALSE); + if (new_index) { + ut_ad(new_index->table == index->table); + ut_ad(!new_index->to_be_dropped); + } else { + found = false; + } + + foreign->foreign_index = new_index; + } + } + + for (dict_foreign_set::iterator it = table->referenced_set.begin(); + it != table->referenced_set.end(); + ++it) { + + foreign = *it; + if (foreign->referenced_index == index) { + ut_ad(foreign->referenced_table == index->table); + + dict_index_t* new_index = dict_foreign_find_index( + foreign->referenced_table, NULL, + foreign->referenced_col_names, + foreign->n_fields, index, + /*check_charsets=*/TRUE, /*check_null=*/FALSE); + /* There must exist an alternative index, + since this must have been checked earlier. */ + if (new_index) { + ut_ad(new_index->table == index->table); + ut_ad(!new_index->to_be_dropped); + } else { + found = false; + } + + foreign->referenced_index = new_index; + } + } + + return(found); +} + +/**********************************************************************//** +In case there is more than one index with the same name return the index +with the min(id). +@return index, NULL if does not exist */ +UNIV_INTERN +dict_index_t* +dict_table_get_index_on_name_and_min_id( +/*=====================================*/ + dict_table_t* table, /*!< in: table */ + const char* name) /*!< in: name of the index to find */ +{ + dict_index_t* index; + dict_index_t* min_index; /* Index with matching name and min(id) */ + + min_index = NULL; + index = dict_table_get_first_index(table); + + while (index != NULL) { + if (ut_strcmp(index->name, name) == 0) { + if (!min_index || index->id < min_index->id) { + + min_index = index; + } + } + + index = dict_table_get_next_index(index); + } + + return(min_index); + +} + +#ifdef UNIV_DEBUG +/**********************************************************************//** +Check for duplicate index entries in a table [using the index name] */ +UNIV_INTERN +void +dict_table_check_for_dup_indexes( +/*=============================*/ + const dict_table_t* table, /*!< in: Check for dup indexes + in this table */ + enum check_name check) /*!< in: whether and when to allow + temporary index names */ +{ + /* Check for duplicates, ignoring indexes that are marked + as to be dropped */ + + const dict_index_t* index1; + const dict_index_t* index2; + + ut_ad(mutex_own(&dict_sys->mutex)); + + /* The primary index _must_ exist */ + ut_a(UT_LIST_GET_LEN(table->indexes) > 0); + + index1 = UT_LIST_GET_FIRST(table->indexes); + + do { + if (*index1->name == TEMP_INDEX_PREFIX) { + ut_a(!dict_index_is_clust(index1)); + + switch (check) { + case CHECK_ALL_COMPLETE: + ut_error; + case CHECK_ABORTED_OK: + switch (dict_index_get_online_status(index1)) { + case ONLINE_INDEX_COMPLETE: + case ONLINE_INDEX_CREATION: + ut_error; + break; + case ONLINE_INDEX_ABORTED: + case ONLINE_INDEX_ABORTED_DROPPED: + break; + } + /* fall through */ + case CHECK_PARTIAL_OK: + break; + } + } + + for (index2 = UT_LIST_GET_NEXT(indexes, index1); + index2 != NULL; + index2 = UT_LIST_GET_NEXT(indexes, index2)) { + ut_ad(ut_strcmp(index1->name, index2->name)); + } + + index1 = UT_LIST_GET_NEXT(indexes, index1); + } while (index1); +} +#endif /* UNIV_DEBUG */ + +/** Auxiliary macro used inside dict_table_schema_check(). */ +#define CREATE_TYPES_NAMES() \ + dtype_sql_name((unsigned) req_schema->columns[i].mtype, \ + (unsigned) req_schema->columns[i].prtype_mask, \ + (unsigned) req_schema->columns[i].len, \ + req_type, sizeof(req_type)); \ + dtype_sql_name(table->cols[j].mtype, \ + table->cols[j].prtype, \ + table->cols[j].len, \ + actual_type, sizeof(actual_type)) + +/*********************************************************************//** +Checks whether a table exists and whether it has the given structure. +The table must have the same number of columns with the same names and +types. The order of the columns does not matter. +The caller must own the dictionary mutex. +dict_table_schema_check() @{ +@return DB_SUCCESS if the table exists and contains the necessary columns */ +UNIV_INTERN +dberr_t +dict_table_schema_check( +/*====================*/ + dict_table_schema_t* req_schema, /*!< in/out: required table + schema */ + char* errstr, /*!< out: human readable error + message if != DB_SUCCESS is + returned */ + size_t errstr_sz) /*!< in: errstr size */ +{ + char buf[MAX_FULL_NAME_LEN]; + char req_type[64]; + char actual_type[64]; + dict_table_t* table; + ulint i; + + ut_ad(mutex_own(&dict_sys->mutex)); + + table = dict_table_get_low(req_schema->table_name); + + if (table == NULL) { + /* no such table */ + + ut_snprintf(errstr, errstr_sz, + "Table %s not found.", + ut_format_name(req_schema->table_name, + TRUE, buf, sizeof(buf))); + + return(DB_TABLE_NOT_FOUND); + } + + if (table->ibd_file_missing) { + /* missing tablespace */ + + ut_snprintf(errstr, errstr_sz, + "Tablespace for table %s is missing.", + ut_format_name(req_schema->table_name, + TRUE, buf, sizeof(buf))); + + return(DB_TABLE_NOT_FOUND); + } + + if ((ulint) table->n_def - DATA_N_SYS_COLS != req_schema->n_cols) { + /* the table has a different number of columns than + required */ + + ut_snprintf(errstr, errstr_sz, + "%s has %d columns but should have %lu.", + ut_format_name(req_schema->table_name, + TRUE, buf, sizeof(buf)), + table->n_def - DATA_N_SYS_COLS, + req_schema->n_cols); + + return(DB_ERROR); + } + + /* For each column from req_schema->columns[] search + whether it is present in table->cols[]. + The following algorithm is O(n_cols^2), but is optimized to + be O(n_cols) if the columns are in the same order in both arrays. */ + + for (i = 0; i < req_schema->n_cols; i++) { + ulint j; + + /* check if i'th column is the same in both arrays */ + if (innobase_strcasecmp(req_schema->columns[i].name, + dict_table_get_col_name(table, i)) == 0) { + + /* we found the column in table->cols[] quickly */ + j = i; + } else { + + /* columns in both arrays are not in the same order, + do a full scan of the second array */ + for (j = 0; j < table->n_def; j++) { + const char* name; + + name = dict_table_get_col_name(table, j); + + if (innobase_strcasecmp(name, + req_schema->columns[i].name) == 0) { + + /* found the column on j'th + position */ + break; + } + } + + if (j == table->n_def) { + + ut_snprintf(errstr, errstr_sz, + "required column %s " + "not found in table %s.", + req_schema->columns[i].name, + ut_format_name( + req_schema->table_name, + TRUE, buf, sizeof(buf))); + + return(DB_ERROR); + } + } + + /* we found a column with the same name on j'th position, + compare column types and flags */ + + /* check length for exact match */ + if (req_schema->columns[i].len != table->cols[j].len) { + + CREATE_TYPES_NAMES(); + + ut_snprintf(errstr, errstr_sz, + "Column %s in table %s is %s " + "but should be %s (length mismatch).", + req_schema->columns[i].name, + ut_format_name(req_schema->table_name, + TRUE, buf, sizeof(buf)), + actual_type, req_type); + + return(DB_ERROR); + } + + /* check mtype for exact match */ + if (req_schema->columns[i].mtype != table->cols[j].mtype) { + + CREATE_TYPES_NAMES(); + + ut_snprintf(errstr, errstr_sz, + "Column %s in table %s is %s " + "but should be %s (type mismatch).", + req_schema->columns[i].name, + ut_format_name(req_schema->table_name, + TRUE, buf, sizeof(buf)), + actual_type, req_type); + + return(DB_ERROR); + } + + /* check whether required prtype mask is set */ + if (req_schema->columns[i].prtype_mask != 0 + && (table->cols[j].prtype + & req_schema->columns[i].prtype_mask) + != req_schema->columns[i].prtype_mask) { + + CREATE_TYPES_NAMES(); + + ut_snprintf(errstr, errstr_sz, + "Column %s in table %s is %s " + "but should be %s (flags mismatch).", + req_schema->columns[i].name, + ut_format_name(req_schema->table_name, + TRUE, buf, sizeof(buf)), + actual_type, req_type); + + return(DB_ERROR); + } + } + + if (req_schema->n_foreign != table->foreign_set.size()) { + ut_snprintf( + errstr, errstr_sz, + "Table %s has " ULINTPF " foreign key(s) pointing" + " to other tables, but it must have %lu.", + ut_format_name(req_schema->table_name, + TRUE, buf, sizeof(buf)), + static_cast<ulint>(table->foreign_set.size()), + req_schema->n_foreign); + return(DB_ERROR); + } + + if (req_schema->n_referenced != table->referenced_set.size()) { + ut_snprintf( + errstr, errstr_sz, + "There are " ULINTPF " foreign key(s) pointing to %s, " + "but there must be %lu.", + static_cast<ulint>(table->referenced_set.size()), + ut_format_name(req_schema->table_name, + TRUE, buf, sizeof(buf)), + req_schema->n_referenced); + return(DB_ERROR); + } + + return(DB_SUCCESS); +} +/* @} */ + +/*********************************************************************//** +Converts a database and table name from filesystem encoding +(e.g. d@i1b/a@q1b@1Kc, same format as used in dict_table_t::name) in two +strings in UTF8 encoding (e.g. dцb and aюbØc). The output buffers must be +at least MAX_DB_UTF8_LEN and MAX_TABLE_UTF8_LEN bytes. */ +UNIV_INTERN +void +dict_fs2utf8( +/*=========*/ + const char* db_and_table, /*!< in: database and table names, + e.g. d@i1b/a@q1b@1Kc */ + char* db_utf8, /*!< out: database name, e.g. dцb */ + size_t db_utf8_size, /*!< in: dbname_utf8 size */ + char* table_utf8, /*!< out: table name, e.g. aюbØc */ + size_t table_utf8_size)/*!< in: table_utf8 size */ +{ + char db[MAX_DATABASE_NAME_LEN + 1]; + ulint db_len; + uint errors; + + db_len = dict_get_db_name_len(db_and_table); + + ut_a(db_len <= sizeof(db)); + + memcpy(db, db_and_table, db_len); + db[db_len] = '\0'; + + strconvert( + &my_charset_filename, db, system_charset_info, + db_utf8, static_cast<uint>(db_utf8_size), &errors); + + /* convert each # to @0023 in table name and store the result in buf */ + const char* table = dict_remove_db_name(db_and_table); + const char* table_p; + char buf[MAX_TABLE_NAME_LEN * 5 + 1]; + char* buf_p; + for (table_p = table, buf_p = buf; table_p[0] != '\0'; table_p++) { + if (table_p[0] != '#') { + buf_p[0] = table_p[0]; + buf_p++; + } else { + buf_p[0] = '@'; + buf_p[1] = '0'; + buf_p[2] = '0'; + buf_p[3] = '2'; + buf_p[4] = '3'; + buf_p += 5; + } + ut_a((size_t) (buf_p - buf) < sizeof(buf)); + } + buf_p[0] = '\0'; + + errors = 0; + strconvert( + &my_charset_filename, buf, system_charset_info, + table_utf8, static_cast<uint>(table_utf8_size), + &errors); + + if (errors != 0) { + ut_snprintf(table_utf8, table_utf8_size, "%s%s", + srv_mysql50_table_name_prefix, table); + } +} + +/**********************************************************************//** +Closes the data dictionary module. */ +UNIV_INTERN +void +dict_close(void) +/*============*/ +{ + ulint i; + + /* Free the hash elements. We don't remove them from the table + because we are going to destroy the table anyway. */ + for (i = 0; i < hash_get_n_cells(dict_sys->table_hash); i++) { + dict_table_t* table; + + table = static_cast<dict_table_t*>( + HASH_GET_FIRST(dict_sys->table_hash, i)); + + while (table) { + dict_table_t* prev_table = table; + + table = static_cast<dict_table_t*>( + HASH_GET_NEXT(name_hash, prev_table)); +#ifdef UNIV_DEBUG + ut_a(prev_table->magic_n == DICT_TABLE_MAGIC_N); +#endif + /* Acquire only because it's a pre-condition. */ + mutex_enter(&dict_sys->mutex); + + dict_table_remove_from_cache(prev_table); + + mutex_exit(&dict_sys->mutex); + } + } + + hash_table_free(dict_sys->table_hash); + + /* The elements are the same instance as in dict_sys->table_hash, + therefore we don't delete the individual elements. */ + hash_table_free(dict_sys->table_id_hash); + + dict_ind_free(); + + mutex_free(&dict_sys->mutex); + + rw_lock_free(&dict_operation_lock); + memset(&dict_operation_lock, 0x0, sizeof(dict_operation_lock)); + + if (!srv_read_only_mode) { + mutex_free(&dict_foreign_err_mutex); + } + + mem_free(dict_sys); + dict_sys = NULL; +} + +#ifdef UNIV_DEBUG +/**********************************************************************//** +Validate the dictionary table LRU list. +@return TRUE if valid */ +static +ibool +dict_lru_validate(void) +/*===================*/ +{ + dict_table_t* table; + + ut_ad(mutex_own(&dict_sys->mutex)); + + for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU); + table != NULL; + table = UT_LIST_GET_NEXT(table_LRU, table)) { + + ut_a(table->can_be_evicted); + } + + for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU); + table != NULL; + table = UT_LIST_GET_NEXT(table_LRU, table)) { + + ut_a(!table->can_be_evicted); + } + + return(TRUE); +} + +/**********************************************************************//** +Check if a table exists in the dict table LRU list. +@return TRUE if table found in LRU list */ +static +ibool +dict_lru_find_table( +/*================*/ + const dict_table_t* find_table) /*!< in: table to find */ +{ + dict_table_t* table; + + ut_ad(find_table != NULL); + ut_ad(mutex_own(&dict_sys->mutex)); + + for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU); + table != NULL; + table = UT_LIST_GET_NEXT(table_LRU, table)) { + + ut_a(table->can_be_evicted); + + if (table == find_table) { + return(TRUE); + } + } + + return(FALSE); +} + +/**********************************************************************//** +Check if a table exists in the dict table non-LRU list. +@return TRUE if table found in non-LRU list */ +static +ibool +dict_non_lru_find_table( +/*====================*/ + const dict_table_t* find_table) /*!< in: table to find */ +{ + dict_table_t* table; + + ut_ad(find_table != NULL); + ut_ad(mutex_own(&dict_sys->mutex)); + + for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU); + table != NULL; + table = UT_LIST_GET_NEXT(table_LRU, table)) { + + ut_a(!table->can_be_evicted); + + if (table == find_table) { + return(TRUE); + } + } + + return(FALSE); +} +#endif /* UNIV_DEBUG */ +/*********************************************************************//** +Check an index to see whether its first fields are the columns in the array, +in the same order and is not marked for deletion and is not the same +as types_idx. +@return true if the index qualifies, otherwise false */ +UNIV_INTERN +bool +dict_foreign_qualify_index( +/*=======================*/ + const dict_table_t* table, /*!< in: table */ + const char** col_names, + /*!< in: column names, or NULL + to use table->col_names */ + const char** columns,/*!< in: array of column names */ + ulint n_cols, /*!< in: number of columns */ + const dict_index_t* index, /*!< in: index to check */ + const dict_index_t* types_idx, + /*!< in: NULL or an index + whose types the column types + must match */ + bool check_charsets, + /*!< in: whether to check + charsets. only has an effect + if types_idx != NULL */ + ulint check_null) + /*!< in: nonzero if none of + the columns must be declared + NOT NULL */ +{ + if (dict_index_get_n_fields(index) < n_cols) { + return(false); + } + + for (ulint i = 0; i < n_cols; i++) { + dict_field_t* field; + const char* col_name; + ulint col_no; + + field = dict_index_get_nth_field(index, i); + col_no = dict_col_get_no(field->col); + + if (field->prefix_len != 0) { + /* We do not accept column prefix + indexes here */ + return(false); + } + + if (check_null + && (field->col->prtype & DATA_NOT_NULL)) { + return(false); + } + + col_name = col_names + ? col_names[col_no] + : dict_table_get_col_name(table, col_no); + + if (0 != innobase_strcasecmp(columns[i], col_name)) { + return(false); + } + + if (types_idx && !cmp_cols_are_equal( + dict_index_get_nth_col(index, i), + dict_index_get_nth_col(types_idx, i), + check_charsets)) { + return(false); + } + } + + return(true); +} + +/*********************************************************************//** +Update the state of compression failure padding heuristics. This is +called whenever a compression operation succeeds or fails. +The caller must be holding info->mutex */ +static +void +dict_index_zip_pad_update( +/*======================*/ + zip_pad_info_t* info, /*<! in/out: info to be updated */ + ulint zip_threshold) /*<! in: zip threshold value */ +{ + ulint total; + ulint fail_pct; + + ut_ad(info); + + total = info->success + info->failure; + + ut_ad(total > 0); + + if(zip_threshold == 0) { + /* User has just disabled the padding. */ + return; + } + + if (total < ZIP_PAD_ROUND_LEN) { + /* We are in middle of a round. Do nothing. */ + return; + } + + /* We are at a 'round' boundary. Reset the values but first + calculate fail rate for our heuristic. */ + fail_pct = (info->failure * 100) / total; + info->failure = 0; + info->success = 0; + + if (fail_pct > zip_threshold) { + /* Compression failures are more then user defined + threshold. Increase the pad size to reduce chances of + compression failures. */ + ut_ad(info->pad % ZIP_PAD_INCR == 0); + + /* Only do increment if it won't increase padding + beyond max pad size. */ + if (info->pad + ZIP_PAD_INCR + < (UNIV_PAGE_SIZE * zip_pad_max) / 100) { +#ifdef HAVE_ATOMIC_BUILTINS + /* Use atomics even though we have the mutex. + This is to ensure that we are able to read + info->pad atomically where atomics are + supported. */ + os_atomic_increment_ulint(&info->pad, ZIP_PAD_INCR); +#else /* HAVE_ATOMIC_BUILTINS */ + info->pad += ZIP_PAD_INCR; +#endif /* HAVE_ATOMIC_BUILTINS */ + + MONITOR_INC(MONITOR_PAD_INCREMENTS); + } + + info->n_rounds = 0; + + } else { + /* Failure rate was OK. Another successful round + completed. */ + ++info->n_rounds; + + /* If enough successful rounds are completed with + compression failure rate in control, decrease the + padding. */ + if (info->n_rounds >= ZIP_PAD_SUCCESSFUL_ROUND_LIMIT + && info->pad > 0) { + + ut_ad(info->pad % ZIP_PAD_INCR == 0); +#ifdef HAVE_ATOMIC_BUILTINS + /* Use atomics even though we have the mutex. + This is to ensure that we are able to read + info->pad atomically where atomics are + supported. */ + os_atomic_decrement_ulint(&info->pad, ZIP_PAD_INCR); +#else /* HAVE_ATOMIC_BUILTINS */ + info->pad -= ZIP_PAD_INCR; +#endif /* HAVE_ATOMIC_BUILTINS */ + + info->n_rounds = 0; + + MONITOR_INC(MONITOR_PAD_DECREMENTS); + } + } +} + +/*********************************************************************//** +This function should be called whenever a page is successfully +compressed. Updates the compression padding information. */ +UNIV_INTERN +void +dict_index_zip_success( +/*===================*/ + dict_index_t* index) /*!< in/out: index to be updated. */ +{ + ut_ad(index); + + ulint zip_threshold = zip_failure_threshold_pct; + if (!zip_threshold) { + /* Disabled by user. */ + return; + } + + os_fast_mutex_lock(&index->zip_pad.mutex); + ++index->zip_pad.success; + dict_index_zip_pad_update(&index->zip_pad, zip_threshold); + os_fast_mutex_unlock(&index->zip_pad.mutex); +} + +/*********************************************************************//** +This function should be called whenever a page compression attempt +fails. Updates the compression padding information. */ +UNIV_INTERN +void +dict_index_zip_failure( +/*===================*/ + dict_index_t* index) /*!< in/out: index to be updated. */ +{ + ut_ad(index); + + ulint zip_threshold = zip_failure_threshold_pct; + if (!zip_threshold) { + /* Disabled by user. */ + return; + } + + os_fast_mutex_lock(&index->zip_pad.mutex); + ++index->zip_pad.failure; + dict_index_zip_pad_update(&index->zip_pad, zip_threshold); + os_fast_mutex_unlock(&index->zip_pad.mutex); +} + + +/*********************************************************************//** +Return the optimal page size, for which page will likely compress. +@return page size beyond which page might not compress */ +UNIV_INTERN +ulint +dict_index_zip_pad_optimal_page_size( +/*=================================*/ + dict_index_t* index) /*!< in: index for which page size + is requested */ +{ + ulint pad; + ulint min_sz; + ulint sz; + + ut_ad(index); + + if (!zip_failure_threshold_pct) { + /* Disabled by user. */ + return(UNIV_PAGE_SIZE); + } + + /* We use atomics to read index->zip_pad.pad. Here we use zero + as increment as are not changing the value of the 'pad'. On + platforms where atomics are not available we grab the mutex. */ + +#ifdef HAVE_ATOMIC_BUILTINS + pad = os_atomic_increment_ulint(&index->zip_pad.pad, 0); +#else /* HAVE_ATOMIC_BUILTINS */ + os_fast_mutex_lock(&index->zip_pad.mutex); + pad = index->zip_pad.pad; + os_fast_mutex_unlock(&index->zip_pad.mutex); +#endif /* HAVE_ATOMIC_BUILTINS */ + + ut_ad(pad < UNIV_PAGE_SIZE); + sz = UNIV_PAGE_SIZE - pad; + + /* Min size allowed by user. */ + ut_ad(zip_pad_max < 100); + min_sz = (UNIV_PAGE_SIZE * (100 - zip_pad_max)) / 100; + + return(ut_max(sz, min_sz)); +} + +/*************************************************************//** +Convert table flag to row format string. +@return row format name. */ +UNIV_INTERN +const char* +dict_tf_to_row_format_string( +/*=========================*/ + ulint table_flag) /*!< in: row format setting */ +{ + switch (dict_tf_get_rec_format(table_flag)) { + case REC_FORMAT_REDUNDANT: + return("ROW_TYPE_REDUNDANT"); + case REC_FORMAT_COMPACT: + return("ROW_TYPE_COMPACT"); + case REC_FORMAT_COMPRESSED: + return("ROW_TYPE_COMPRESSED"); + case REC_FORMAT_DYNAMIC: + return("ROW_TYPE_DYNAMIC"); + } + + ut_error; + return(0); +} +#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/dict/dict0load.cc b/storage/xtradb/dict/dict0load.cc new file mode 100644 index 00000000000..874614bfb5c --- /dev/null +++ b/storage/xtradb/dict/dict0load.cc @@ -0,0 +1,3149 @@ +/***************************************************************************** + +Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file dict/dict0load.cc +Loads to the memory cache database object definitions +from dictionary tables + +Created 4/24/1996 Heikki Tuuri +*******************************************************/ + +#include "dict0load.h" +#include "mysql_version.h" + +#ifdef UNIV_NONINL +#include "dict0load.ic" +#endif + +#include "btr0pcur.h" +#include "btr0btr.h" +#include "page0page.h" +#include "mach0data.h" +#include "dict0dict.h" +#include "dict0boot.h" +#include "dict0stats.h" +#include "rem0cmp.h" +#include "srv0start.h" +#include "srv0srv.h" +#include "dict0crea.h" +#include "dict0priv.h" +#include "ha_prototypes.h" /* innobase_casedn_str() */ +#include "fts0priv.h" + +/** Following are the InnoDB system tables. The positions in +this array are referenced by enum dict_system_table_id. */ +static const char* SYSTEM_TABLE_NAME[] = { + "SYS_TABLES", + "SYS_INDEXES", + "SYS_COLUMNS", + "SYS_FIELDS", + "SYS_FOREIGN", + "SYS_FOREIGN_COLS", + "SYS_TABLESPACES", + "SYS_DATAFILES" +}; + +/* If this flag is TRUE, then we will load the cluster index's (and tables') +metadata even if it is marked as "corrupted". */ +UNIV_INTERN my_bool srv_load_corrupted = FALSE; + +#ifdef UNIV_DEBUG +/****************************************************************//** +Compare the name of an index column. +@return TRUE if the i'th column of index is 'name'. */ +static +ibool +name_of_col_is( +/*===========*/ + const dict_table_t* table, /*!< in: table */ + const dict_index_t* index, /*!< in: index */ + ulint i, /*!< in: index field offset */ + const char* name) /*!< in: name to compare to */ +{ + ulint tmp = dict_col_get_no(dict_field_get_col( + dict_index_get_nth_field( + index, i))); + + return(strcmp(name, dict_table_get_col_name(table, tmp)) == 0); +} +#endif /* UNIV_DEBUG */ + +/********************************************************************//** +Finds the first table name in the given database. +@return own: table name, NULL if does not exist; the caller must free +the memory in the string! */ +UNIV_INTERN +char* +dict_get_first_table_name_in_db( +/*============================*/ + const char* name) /*!< in: database name which ends in '/' */ +{ + dict_table_t* sys_tables; + btr_pcur_t pcur; + dict_index_t* sys_index; + dtuple_t* tuple; + mem_heap_t* heap; + dfield_t* dfield; + const rec_t* rec; + const byte* field; + ulint len; + mtr_t mtr; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + heap = mem_heap_create(1000); + + mtr_start(&mtr); + + sys_tables = dict_table_get_low("SYS_TABLES"); + sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); + ut_ad(!dict_table_is_comp(sys_tables)); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + dfield_set_data(dfield, name, ut_strlen(name)); + dict_index_copy_types(tuple, sys_index, 1); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); +loop: + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur)) { + /* Not found */ + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + + return(NULL); + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLES__NAME, &len); + + if (len < strlen(name) + || ut_memcmp(name, field, strlen(name)) != 0) { + /* Not found */ + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + + return(NULL); + } + + if (!rec_get_deleted_flag(rec, 0)) { + + /* We found one */ + + char* table_name = mem_strdupl((char*) field, len); + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + + return(table_name); + } + + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + + goto loop; +} + +/********************************************************************//** +Prints to the standard output information on all tables found in the data +dictionary system table. */ +UNIV_INTERN +void +dict_print(void) +/*============*/ +{ + dict_table_t* table; + btr_pcur_t pcur; + const rec_t* rec; + mem_heap_t* heap; + mtr_t mtr; + + /* Enlarge the fatal semaphore wait timeout during the InnoDB table + monitor printout */ + + os_increment_counter_by_amount( + server_mutex, + srv_fatal_semaphore_wait_threshold, + SRV_SEMAPHORE_WAIT_EXTENSION); + + heap = mem_heap_create(1000); + mutex_enter(&(dict_sys->mutex)); + mtr_start(&mtr); + + rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES); + + while (rec) { + const char* err_msg; + + err_msg = static_cast<const char*>( + dict_process_sys_tables_rec_and_mtr_commit( + heap, rec, &table, DICT_TABLE_LOAD_FROM_CACHE, + &mtr)); + + if (!err_msg) { + dict_table_print(table); + } else { + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: %s\n", err_msg); + } + + mem_heap_empty(heap); + + mtr_start(&mtr); + rec = dict_getnext_system(&pcur, &mtr); + } + + mtr_commit(&mtr); + mutex_exit(&(dict_sys->mutex)); + mem_heap_free(heap); + + /* Restore the fatal semaphore wait timeout */ + os_decrement_counter_by_amount( + server_mutex, + srv_fatal_semaphore_wait_threshold, + SRV_SEMAPHORE_WAIT_EXTENSION); +} + +/********************************************************************//** +This function gets the next system table record as it scans the table. +@return the next record if found, NULL if end of scan */ +static +const rec_t* +dict_getnext_system_low( +/*====================*/ + btr_pcur_t* pcur, /*!< in/out: persistent cursor to the + record*/ + mtr_t* mtr) /*!< in: the mini-transaction */ +{ + rec_t* rec = NULL; + + while (!rec || rec_get_deleted_flag(rec, 0)) { + btr_pcur_move_to_next_user_rec(pcur, mtr); + + rec = btr_pcur_get_rec(pcur); + + if (!btr_pcur_is_on_user_rec(pcur)) { + /* end of index */ + btr_pcur_close(pcur); + + return(NULL); + } + } + + /* Get a record, let's save the position */ + btr_pcur_store_position(pcur, mtr); + + return(rec); +} + +/********************************************************************//** +This function opens a system table, and returns the first record. +@return first record of the system table */ +UNIV_INTERN +const rec_t* +dict_startscan_system( +/*==================*/ + btr_pcur_t* pcur, /*!< out: persistent cursor to + the record */ + mtr_t* mtr, /*!< in: the mini-transaction */ + dict_system_id_t system_id) /*!< in: which system table to open */ +{ + dict_table_t* system_table; + dict_index_t* clust_index; + const rec_t* rec; + + ut_a(system_id < SYS_NUM_SYSTEM_TABLES); + + system_table = dict_table_get_low(SYSTEM_TABLE_NAME[system_id]); + + clust_index = UT_LIST_GET_FIRST(system_table->indexes); + + btr_pcur_open_at_index_side(true, clust_index, BTR_SEARCH_LEAF, pcur, + true, 0, mtr); + + rec = dict_getnext_system_low(pcur, mtr); + + return(rec); +} + +/********************************************************************//** +This function gets the next system table record as it scans the table. +@return the next record if found, NULL if end of scan */ +UNIV_INTERN +const rec_t* +dict_getnext_system( +/*================*/ + btr_pcur_t* pcur, /*!< in/out: persistent cursor + to the record */ + mtr_t* mtr) /*!< in: the mini-transaction */ +{ + const rec_t* rec; + + /* Restore the position */ + btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr); + + /* Get the next record */ + rec = dict_getnext_system_low(pcur, mtr); + + return(rec); +} + +/********************************************************************//** +This function processes one SYS_TABLES record and populate the dict_table_t +struct for the table. Extracted out of dict_print() to be used by +both monitor table output and information schema innodb_sys_tables output. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_tables_rec_and_mtr_commit( +/*=======================================*/ + mem_heap_t* heap, /*!< in/out: temporary memory heap */ + const rec_t* rec, /*!< in: SYS_TABLES record */ + dict_table_t** table, /*!< out: dict_table_t to fill */ + dict_table_info_t status, /*!< in: status bit controls + options such as whether we shall + look for dict_table_t from cache + first */ + mtr_t* mtr) /*!< in/out: mini-transaction, + will be committed */ +{ + ulint len; + const char* field; + const char* err_msg = NULL; + char* table_name; + + field = (const char*) rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLES__NAME, &len); + + ut_a(!rec_get_deleted_flag(rec, 0)); + + ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); + + /* Get the table name */ + table_name = mem_heap_strdupl(heap, field, len); + + /* If DICT_TABLE_LOAD_FROM_CACHE is set, first check + whether there is cached dict_table_t struct */ + if (status & DICT_TABLE_LOAD_FROM_CACHE) { + + /* Commit before load the table again */ + mtr_commit(mtr); + + *table = dict_table_get_low(table_name); + + if (!(*table)) { + err_msg = "Table not found in cache"; + } + } else { + err_msg = dict_load_table_low(table_name, rec, table); + mtr_commit(mtr); + } + + if (err_msg) { + return(err_msg); + } + + return(NULL); +} + +/********************************************************************//** +This function parses a SYS_INDEXES record and populate a dict_index_t +structure with the information from the record. For detail information +about SYS_INDEXES fields, please refer to dict_boot() function. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_indexes_rec( +/*=========================*/ + mem_heap_t* heap, /*!< in/out: heap memory */ + const rec_t* rec, /*!< in: current SYS_INDEXES rec */ + dict_index_t* index, /*!< out: index to be filled */ + table_id_t* table_id) /*!< out: index table id */ +{ + const char* err_msg; + byte* buf; + + buf = static_cast<byte*>(mem_heap_alloc(heap, 8)); + + /* Parse the record, and get "dict_index_t" struct filled */ + err_msg = dict_load_index_low(buf, NULL, + heap, rec, FALSE, &index); + + *table_id = mach_read_from_8(buf); + + return(err_msg); +} + +/********************************************************************//** +This function parses a SYS_COLUMNS record and populate a dict_column_t +structure with the information from the record. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_columns_rec( +/*=========================*/ + mem_heap_t* heap, /*!< in/out: heap memory */ + const rec_t* rec, /*!< in: current SYS_COLUMNS rec */ + dict_col_t* column, /*!< out: dict_col_t to be filled */ + table_id_t* table_id, /*!< out: table id */ + const char** col_name) /*!< out: column name */ +{ + const char* err_msg; + + /* Parse the record, and get "dict_col_t" struct filled */ + err_msg = dict_load_column_low(NULL, heap, column, + table_id, col_name, rec); + + return(err_msg); +} + +/********************************************************************//** +This function parses a SYS_FIELDS record and populates a dict_field_t +structure with the information from the record. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_fields_rec( +/*========================*/ + mem_heap_t* heap, /*!< in/out: heap memory */ + const rec_t* rec, /*!< in: current SYS_FIELDS rec */ + dict_field_t* sys_field, /*!< out: dict_field_t to be + filled */ + ulint* pos, /*!< out: Field position */ + index_id_t* index_id, /*!< out: current index id */ + index_id_t last_id) /*!< in: previous index id */ +{ + byte* buf; + byte* last_index_id; + const char* err_msg; + + buf = static_cast<byte*>(mem_heap_alloc(heap, 8)); + + last_index_id = static_cast<byte*>(mem_heap_alloc(heap, 8)); + mach_write_to_8(last_index_id, last_id); + + err_msg = dict_load_field_low(buf, NULL, sys_field, + pos, last_index_id, heap, rec); + + *index_id = mach_read_from_8(buf); + + return(err_msg); + +} + +/********************************************************************//** +This function parses a SYS_FOREIGN record and populate a dict_foreign_t +structure with the information from the record. For detail information +about SYS_FOREIGN fields, please refer to dict_load_foreign() function. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_foreign_rec( +/*=========================*/ + mem_heap_t* heap, /*!< in/out: heap memory */ + const rec_t* rec, /*!< in: current SYS_FOREIGN rec */ + dict_foreign_t* foreign) /*!< out: dict_foreign_t struct + to be filled */ +{ + ulint len; + const byte* field; + ulint n_fields_and_type; + + if (rec_get_deleted_flag(rec, 0)) { + return("delete-marked record in SYS_FOREIGN"); + } + + if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_FOREIGN) { + return("wrong number of columns in SYS_FOREIGN record"); + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN__ID, &len); + if (len == 0 || len == UNIV_SQL_NULL) { +err_len: + return("incorrect column length in SYS_FOREIGN"); + } + + /* This recieves a dict_foreign_t* that points to a stack variable. + So mem_heap_free(foreign->heap) is not used as elsewhere. + Since the heap used here is freed elsewhere, foreign->heap + is not assigned. */ + foreign->id = mem_heap_strdupl(heap, (const char*) field, len); + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_FOREIGN__DB_TRX_ID, &len); + if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_FOREIGN__DB_ROLL_PTR, &len); + if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } + + /* The _lookup versions of the referenced and foreign table names + are not assigned since they are not used in this dict_foreign_t */ + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN__FOR_NAME, &len); + if (len == 0 || len == UNIV_SQL_NULL) { + goto err_len; + } + foreign->foreign_table_name = mem_heap_strdupl( + heap, (const char*) field, len); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN__REF_NAME, &len); + if (len == 0 || len == UNIV_SQL_NULL) { + goto err_len; + } + foreign->referenced_table_name = mem_heap_strdupl( + heap, (const char*) field, len); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN__N_COLS, &len); + if (len != 4) { + goto err_len; + } + n_fields_and_type = mach_read_from_4(field); + + foreign->type = (unsigned int) (n_fields_and_type >> 24); + foreign->n_fields = (unsigned int) (n_fields_and_type & 0x3FFUL); + + return(NULL); +} + +/********************************************************************//** +This function parses a SYS_FOREIGN_COLS record and extract necessary +information from the record and return to caller. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_foreign_col_rec( +/*=============================*/ + mem_heap_t* heap, /*!< in/out: heap memory */ + const rec_t* rec, /*!< in: current SYS_FOREIGN_COLS rec */ + const char** name, /*!< out: foreign key constraint name */ + const char** for_col_name, /*!< out: referencing column name */ + const char** ref_col_name, /*!< out: referenced column name + in referenced table */ + ulint* pos) /*!< out: column position */ +{ + ulint len; + const byte* field; + + if (rec_get_deleted_flag(rec, 0)) { + return("delete-marked record in SYS_FOREIGN_COLS"); + } + + if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_FOREIGN_COLS) { + return("wrong number of columns in SYS_FOREIGN_COLS record"); + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN_COLS__ID, &len); + if (len == 0 || len == UNIV_SQL_NULL) { +err_len: + return("incorrect column length in SYS_FOREIGN_COLS"); + } + *name = mem_heap_strdupl(heap, (char*) field, len); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN_COLS__POS, &len); + if (len != 4) { + goto err_len; + } + *pos = mach_read_from_4(field); + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_FOREIGN_COLS__DB_TRX_ID, &len); + if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_FOREIGN_COLS__DB_ROLL_PTR, &len); + if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME, &len); + if (len == 0 || len == UNIV_SQL_NULL) { + goto err_len; + } + *for_col_name = mem_heap_strdupl(heap, (char*) field, len); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME, &len); + if (len == 0 || len == UNIV_SQL_NULL) { + goto err_len; + } + *ref_col_name = mem_heap_strdupl(heap, (char*) field, len); + + return(NULL); +} + +/********************************************************************//** +This function parses a SYS_TABLESPACES record, extracts necessary +information from the record and returns to caller. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_tablespaces( +/*=========================*/ + mem_heap_t* heap, /*!< in/out: heap memory */ + const rec_t* rec, /*!< in: current SYS_TABLESPACES rec */ + ulint* space, /*!< out: space id */ + const char** name, /*!< out: tablespace name */ + ulint* flags) /*!< out: tablespace flags */ +{ + ulint len; + const byte* field; + + /* Initialize the output values */ + *space = ULINT_UNDEFINED; + *name = NULL; + *flags = ULINT_UNDEFINED; + + if (rec_get_deleted_flag(rec, 0)) { + return("delete-marked record in SYS_TABLESPACES"); + } + + if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_TABLESPACES) { + return("wrong number of columns in SYS_TABLESPACES record"); + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLESPACES__SPACE, &len); + if (len != DICT_FLD_LEN_SPACE) { +err_len: + return("incorrect column length in SYS_TABLESPACES"); + } + *space = mach_read_from_4(field); + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_TABLESPACES__DB_TRX_ID, &len); + if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_TABLESPACES__DB_ROLL_PTR, &len); + if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLESPACES__NAME, &len); + if (len == 0 || len == UNIV_SQL_NULL) { + goto err_len; + } + *name = mem_heap_strdupl(heap, (char*) field, len); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLESPACES__FLAGS, &len); + if (len != DICT_FLD_LEN_FLAGS) { + goto err_len; + } + *flags = mach_read_from_4(field); + + return(NULL); +} + +/********************************************************************//** +This function parses a SYS_DATAFILES record, extracts necessary +information from the record and returns it to the caller. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_process_sys_datafiles( +/*=======================*/ + mem_heap_t* heap, /*!< in/out: heap memory */ + const rec_t* rec, /*!< in: current SYS_DATAFILES rec */ + ulint* space, /*!< out: space id */ + const char** path) /*!< out: datafile paths */ +{ + ulint len; + const byte* field; + + if (rec_get_deleted_flag(rec, 0)) { + return("delete-marked record in SYS_DATAFILES"); + } + + if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_DATAFILES) { + return("wrong number of columns in SYS_DATAFILES record"); + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_DATAFILES__SPACE, &len); + if (len != DICT_FLD_LEN_SPACE) { +err_len: + return("incorrect column length in SYS_DATAFILES"); + } + *space = mach_read_from_4(field); + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_DATAFILES__DB_TRX_ID, &len); + if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_DATAFILES__DB_ROLL_PTR, &len); + if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_DATAFILES__PATH, &len); + if (len == 0 || len == UNIV_SQL_NULL) { + goto err_len; + } + *path = mem_heap_strdupl(heap, (char*) field, len); + + return(NULL); +} + +/********************************************************************//** +Determine the flags of a table as stored in SYS_TABLES.TYPE and N_COLS. +@return ULINT_UNDEFINED if error, else a valid dict_table_t::flags. */ +static +ulint +dict_sys_tables_get_flags( +/*======================*/ + const rec_t* rec) /*!< in: a record of SYS_TABLES */ +{ + const byte* field; + ulint len; + ulint type; + ulint n_cols; + + /* read the 4 byte flags from the TYPE field */ + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLES__TYPE, &len); + ut_a(len == 4); + type = mach_read_from_4(field); + + /* The low order bit of SYS_TABLES.TYPE is always set to 1. But in + dict_table_t::flags the low order bit is used to determine if the + row format is Redundant or Compact when the format is Antelope. + Read the 4 byte N_COLS field and look at the high order bit. It + should be set for COMPACT and later. It should not be set for + REDUNDANT. */ + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLES__N_COLS, &len); + ut_a(len == 4); + n_cols = mach_read_from_4(field); + + /* This validation function also combines the DICT_N_COLS_COMPACT + flag in n_cols into the type field to effectively make it a + dict_table_t::flags. */ + + if (ULINT_UNDEFINED == dict_sys_tables_type_validate(type, n_cols)) { + return(ULINT_UNDEFINED); + } + + return(dict_sys_tables_type_to_tf(type, n_cols)); +} + +/********************************************************************//** +Gets the filepath for a spaceid from SYS_DATAFILES and checks it against +the contents of a link file. This function is called when there is no +fil_node_t entry for this space ID so both durable locations on disk +must be checked and compared. +We use a temporary heap here for the table lookup, but not for the path +returned which the caller must free. +This function can return NULL if the space ID is not found in SYS_DATAFILES, +then the caller will assume that the ibd file is in the normal datadir. +@return own: A copy of the first datafile found in SYS_DATAFILES.PATH for +the given space ID. NULL if space ID is zero or not found. */ +UNIV_INTERN +char* +dict_get_first_path( +/*================*/ + ulint space, /*!< in: space id */ + const char* name) /*!< in: tablespace name */ +{ + mtr_t mtr; + dict_table_t* sys_datafiles; + dict_index_t* sys_index; + dtuple_t* tuple; + dfield_t* dfield; + byte* buf; + btr_pcur_t pcur; + const rec_t* rec; + const byte* field; + ulint len; + char* dict_filepath = NULL; + mem_heap_t* heap = mem_heap_create(1024); + + ut_ad(mutex_own(&(dict_sys->mutex))); + + mtr_start(&mtr); + + sys_datafiles = dict_table_get_low("SYS_DATAFILES"); + sys_index = UT_LIST_GET_FIRST(sys_datafiles->indexes); + ut_ad(!dict_table_is_comp(sys_datafiles)); + ut_ad(name_of_col_is(sys_datafiles, sys_index, + DICT_FLD__SYS_DATAFILES__SPACE, "SPACE")); + ut_ad(name_of_col_is(sys_datafiles, sys_index, + DICT_FLD__SYS_DATAFILES__PATH, "PATH")); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, DICT_FLD__SYS_DATAFILES__SPACE); + + buf = static_cast<byte*>(mem_heap_alloc(heap, 4)); + mach_write_to_4(buf, space); + + dfield_set_data(dfield, buf, 4); + dict_index_copy_types(tuple, sys_index, 1); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + + rec = btr_pcur_get_rec(&pcur); + + /* If the file-per-table tablespace was created with + an earlier version of InnoDB, then this record is not + in SYS_DATAFILES. But a link file still might exist. */ + + if (btr_pcur_is_on_user_rec(&pcur)) { + /* A record for this space ID was found. */ + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_DATAFILES__PATH, &len); + ut_a(len > 0 || len == UNIV_SQL_NULL); + ut_a(len < OS_FILE_MAX_PATH); + dict_filepath = mem_strdupl((char*) field, len); + ut_a(dict_filepath); + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + + return(dict_filepath); +} + +/********************************************************************//** +Update the record for space_id in SYS_TABLESPACES to this filepath. +@return DB_SUCCESS if OK, dberr_t if the insert failed */ +UNIV_INTERN +dberr_t +dict_update_filepath( +/*=================*/ + ulint space_id, /*!< in: space id */ + const char* filepath) /*!< in: filepath */ +{ + dberr_t err = DB_SUCCESS; + trx_t* trx; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + ut_ad(mutex_own(&(dict_sys->mutex))); + + trx = trx_allocate_for_background(); + trx->op_info = "update filepath"; + trx->dict_operation_lock_mode = RW_X_LATCH; + trx_start_for_ddl(trx, TRX_DICT_OP_INDEX); + + pars_info_t* info = pars_info_create(); + + pars_info_add_int4_literal(info, "space", space_id); + pars_info_add_str_literal(info, "path", filepath); + + err = que_eval_sql(info, + "PROCEDURE UPDATE_FILEPATH () IS\n" + "BEGIN\n" + "UPDATE SYS_DATAFILES" + " SET PATH = :path\n" + " WHERE SPACE = :space;\n" + "END;\n", FALSE, trx); + + trx_commit_for_mysql(trx); + trx->dict_operation_lock_mode = 0; + trx_free_for_background(trx); + + if (err == DB_SUCCESS) { + /* We just updated SYS_DATAFILES due to the contents in + a link file. Make a note that we did this. */ + ib_logf(IB_LOG_LEVEL_INFO, + "The InnoDB data dictionary table SYS_DATAFILES " + "for tablespace ID %lu was updated to use file %s.", + (ulong) space_id, filepath); + } else { + ib_logf(IB_LOG_LEVEL_WARN, + "Problem updating InnoDB data dictionary table " + "SYS_DATAFILES for tablespace ID %lu to file %s.", + (ulong) space_id, filepath); + } + + return(err); +} + +/********************************************************************//** +Insert records into SYS_TABLESPACES and SYS_DATAFILES. +@return DB_SUCCESS if OK, dberr_t if the insert failed */ +UNIV_INTERN +dberr_t +dict_insert_tablespace_and_filepath( +/*================================*/ + ulint space, /*!< in: space id */ + const char* name, /*!< in: talespace name */ + const char* filepath, /*!< in: filepath */ + ulint fsp_flags) /*!< in: tablespace flags */ +{ + dberr_t err = DB_SUCCESS; + trx_t* trx; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + ut_ad(mutex_own(&(dict_sys->mutex))); + ut_ad(filepath); + + trx = trx_allocate_for_background(); + trx->op_info = "insert tablespace and filepath"; + trx->dict_operation_lock_mode = RW_X_LATCH; + trx_start_for_ddl(trx, TRX_DICT_OP_INDEX); + + /* A record for this space ID was not found in + SYS_DATAFILES. Assume the record is also missing in + SYS_TABLESPACES. Insert records onto them both. */ + err = dict_create_add_tablespace_to_dictionary( + space, name, fsp_flags, filepath, trx, false); + + trx_commit_for_mysql(trx); + trx->dict_operation_lock_mode = 0; + trx_free_for_background(trx); + + return(err); +} + +/********************************************************************//** +This function looks at each table defined in SYS_TABLES. It checks the +tablespace for any table with a space_id > 0. It looks up the tablespace +in SYS_DATAFILES to ensure the correct path. + +In a crash recovery we already have all the tablespace objects created. +This function compares the space id information in the InnoDB data dictionary +to what we already read with fil_load_single_table_tablespaces(). + +In a normal startup, we create the tablespace objects for every table in +InnoDB's data dictionary, if the corresponding .ibd file exists. +We also scan the biggest space id, and store it to fil_system. */ +UNIV_INTERN +void +dict_check_tablespaces_and_store_max_id( +/*====================================*/ + dict_check_t dict_check) /*!< in: how to check */ +{ + dict_table_t* sys_tables; + dict_index_t* sys_index; + btr_pcur_t pcur; + const rec_t* rec; + ulint max_space_id; + mtr_t mtr; + + rw_lock_x_lock(&dict_operation_lock); + mutex_enter(&(dict_sys->mutex)); + + mtr_start(&mtr); + + sys_tables = dict_table_get_low("SYS_TABLES"); + sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); + ut_ad(!dict_table_is_comp(sys_tables)); + + max_space_id = mtr_read_ulint(dict_hdr_get(&mtr) + + DICT_HDR_MAX_SPACE_ID, + MLOG_4BYTES, &mtr); + fil_set_max_space_id_if_bigger(max_space_id); + + btr_pcur_open_at_index_side(true, sys_index, BTR_SEARCH_LEAF, &pcur, + true, 0, &mtr); +loop: + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur)) { + /* end of index */ + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + /* We must make the tablespace cache aware of the biggest + known space id */ + + /* printf("Biggest space id in data dictionary %lu\n", + max_space_id); */ + fil_set_max_space_id_if_bigger(max_space_id); + + mutex_exit(&(dict_sys->mutex)); + rw_lock_x_unlock(&dict_operation_lock); + + return; + } + + if (!rec_get_deleted_flag(rec, 0)) { + + /* We found one */ + const byte* field; + ulint len; + ulint space_id; + ulint flags; + char* name; + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLES__NAME, &len); + + name = mem_strdupl((char*) field, len); + + char table_name[MAX_FULL_NAME_LEN + 1]; + + innobase_format_name( + table_name, sizeof(table_name), name, FALSE); + + flags = dict_sys_tables_get_flags(rec); + if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) { + /* Read again the 4 bytes from rec. */ + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLES__TYPE, &len); + ut_ad(len == 4); /* this was checked earlier */ + flags = mach_read_from_4(field); + + ib_logf(IB_LOG_LEVEL_ERROR, + "Table '%s' in InnoDB data dictionary" + " has unknown type %lx", table_name, flags); + mem_free(name); + goto loop; + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLES__SPACE, &len); + ut_a(len == 4); + + space_id = mach_read_from_4(field); + + btr_pcur_store_position(&pcur, &mtr); + + mtr_commit(&mtr); + + /* For tables created with old versions of InnoDB, + SYS_TABLES.MIX_LEN may contain garbage. Such tables + would always be in ROW_FORMAT=REDUNDANT. Pretend that + all such tables are non-temporary. That is, do not + suppress error printouts about temporary or discarded + tablespaces not being found. */ + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len); + + bool is_temp = false; + bool discarded = false; + ib_uint32_t flags2 = static_cast<ib_uint32_t>( + mach_read_from_4(field)); + + /* Check that the tablespace (the .ibd file) really + exists; print a warning to the .err log if not. + Do not print warnings for temporary tables or for + tablespaces that have been discarded. */ + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLES__N_COLS, &len); + + /* MIX_LEN valid only for ROW_FORMAT > REDUNDANT. */ + if (mach_read_from_4(field) & DICT_N_COLS_COMPACT) { + + is_temp = !!(flags2 & DICT_TF2_TEMPORARY); + discarded = !!(flags2 & DICT_TF2_DISCARDED); + } + + if (space_id == 0) { + /* The system tablespace always exists. */ + ut_ad(!discarded); + goto next_tablespace; + } + + switch (dict_check) { + case DICT_CHECK_ALL_LOADED: + /* All tablespaces should have been found in + fil_load_single_table_tablespaces(). */ + if (fil_space_for_table_exists_in_mem( + space_id, name, TRUE, !(is_temp || discarded), + false, NULL, 0) + && !(is_temp || discarded)) { + /* If user changes the path of .ibd files in + *.isl files before doing crash recovery , + then this leads to inconsistency in + SYS_DATAFILES system table because the + tables are loaded from the updated path + but the SYS_DATAFILES still points to the + old path.Therefore after crash recovery + update SYS_DATAFILES with the updated path.*/ + ut_ad(space_id); + ut_ad(recv_needed_recovery); + char *dict_path = dict_get_first_path(space_id, + name); + char *remote_path = fil_read_link_file(name); + if(dict_path && remote_path) { + if(strcmp(dict_path,remote_path)) { + dict_update_filepath(space_id, + remote_path); + } + } + if(dict_path) + mem_free(dict_path); + if(remote_path) + mem_free(remote_path); + } + break; + + case DICT_CHECK_SOME_LOADED: + /* Some tablespaces may have been opened in + trx_resurrect_table_locks(). */ + if (fil_space_for_table_exists_in_mem( + space_id, name, FALSE, FALSE, + false, NULL, 0)) { + break; + } + /* fall through */ + case DICT_CHECK_NONE_LOADED: + if (discarded) { + ib_logf(IB_LOG_LEVEL_INFO, + "DISCARD flag set for table '%s'," + " ignored.", + table_name); + break; + } + + /* It is a normal database startup: create the + space object and check that the .ibd file exists. + If the table uses a remote tablespace, look for the + space_id in SYS_DATAFILES to find the filepath */ + + /* Use the remote filepath if known. */ + char* filepath = NULL; + if (DICT_TF_HAS_DATA_DIR(flags)) { + filepath = dict_get_first_path( + space_id, name); + } + + /* We set the 2nd param (fix_dict = true) + here because we already have an x-lock on + dict_operation_lock and dict_sys->mutex. Besides, + this is at startup and we are now single threaded. + If the filepath is not known, it will need to + be discovered. */ + dberr_t err = fil_open_single_table_tablespace( + false, srv_read_only_mode ? false : true, + space_id, dict_tf_to_fsp_flags(flags), + name, filepath); + + if (err != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Tablespace open failed for '%s', " + "ignored.", table_name); + } + + if (filepath) { + mem_free(filepath); + } + + break; + } + + if (space_id > max_space_id) { + max_space_id = space_id; + } + +next_tablespace: + mem_free(name); + mtr_start(&mtr); + + btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr); + } + + goto loop; +} + +/********************************************************************//** +Loads a table column definition from a SYS_COLUMNS record to +dict_table_t. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_load_column_low( +/*=================*/ + dict_table_t* table, /*!< in/out: table, could be NULL + if we just populate a dict_column_t + struct with information from + a SYS_COLUMNS record */ + mem_heap_t* heap, /*!< in/out: memory heap + for temporary storage */ + dict_col_t* column, /*!< out: dict_column_t to fill, + or NULL if table != NULL */ + table_id_t* table_id, /*!< out: table id */ + const char** col_name, /*!< out: column name */ + const rec_t* rec) /*!< in: SYS_COLUMNS record */ +{ + char* name; + const byte* field; + ulint len; + ulint mtype; + ulint prtype; + ulint col_len; + ulint pos; + + ut_ad(table || column); + + if (rec_get_deleted_flag(rec, 0)) { + return("delete-marked record in SYS_COLUMNS"); + } + + if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_COLUMNS) { + return("wrong number of columns in SYS_COLUMNS record"); + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_COLUMNS__TABLE_ID, &len); + if (len != 8) { +err_len: + return("incorrect column length in SYS_COLUMNS"); + } + + if (table_id) { + *table_id = mach_read_from_8(field); + } else if (table->id != mach_read_from_8(field)) { + return("SYS_COLUMNS.TABLE_ID mismatch"); + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_COLUMNS__POS, &len); + if (len != 4) { + + goto err_len; + } + + pos = mach_read_from_4(field); + + if (table && table->n_def != pos) { + return("SYS_COLUMNS.POS mismatch"); + } + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_COLUMNS__DB_TRX_ID, &len); + if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_COLUMNS__DB_ROLL_PTR, &len); + if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_COLUMNS__NAME, &len); + if (len == 0 || len == UNIV_SQL_NULL) { + goto err_len; + } + + name = mem_heap_strdupl(heap, (const char*) field, len); + + if (col_name) { + *col_name = name; + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_COLUMNS__MTYPE, &len); + if (len != 4) { + goto err_len; + } + + mtype = mach_read_from_4(field); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_COLUMNS__PRTYPE, &len); + if (len != 4) { + goto err_len; + } + prtype = mach_read_from_4(field); + + if (dtype_get_charset_coll(prtype) == 0 + && dtype_is_string_type(mtype)) { + /* The table was created with < 4.1.2. */ + + if (dtype_is_binary_string_type(mtype, prtype)) { + /* Use the binary collation for + string columns of binary type. */ + + prtype = dtype_form_prtype( + prtype, + DATA_MYSQL_BINARY_CHARSET_COLL); + } else { + /* Use the default charset for + other than binary columns. */ + + prtype = dtype_form_prtype( + prtype, + data_mysql_default_charset_coll); + } + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_COLUMNS__LEN, &len); + if (len != 4) { + goto err_len; + } + col_len = mach_read_from_4(field); + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_COLUMNS__PREC, &len); + if (len != 4) { + goto err_len; + } + + if (!column) { + dict_mem_table_add_col(table, heap, name, mtype, + prtype, col_len); + } else { + dict_mem_fill_column_struct(column, pos, mtype, + prtype, col_len); + } + + return(NULL); +} + +/********************************************************************//** +Loads definitions for table columns. */ +static +void +dict_load_columns( +/*==============*/ + dict_table_t* table, /*!< in/out: table */ + mem_heap_t* heap) /*!< in/out: memory heap + for temporary storage */ +{ + dict_table_t* sys_columns; + dict_index_t* sys_index; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + const rec_t* rec; + byte* buf; + ulint i; + mtr_t mtr; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + mtr_start(&mtr); + + sys_columns = dict_table_get_low("SYS_COLUMNS"); + sys_index = UT_LIST_GET_FIRST(sys_columns->indexes); + ut_ad(!dict_table_is_comp(sys_columns)); + + ut_ad(name_of_col_is(sys_columns, sys_index, + DICT_FLD__SYS_COLUMNS__NAME, "NAME")); + ut_ad(name_of_col_is(sys_columns, sys_index, + DICT_FLD__SYS_COLUMNS__PREC, "PREC")); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + buf = static_cast<byte*>(mem_heap_alloc(heap, 8)); + mach_write_to_8(buf, table->id); + + dfield_set_data(dfield, buf, 8); + dict_index_copy_types(tuple, sys_index, 1); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + for (i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) { + const char* err_msg; + const char* name = NULL; + + rec = btr_pcur_get_rec(&pcur); + + ut_a(btr_pcur_is_on_user_rec(&pcur)); + + err_msg = dict_load_column_low(table, heap, NULL, NULL, + &name, rec); + + if (err_msg) { + fprintf(stderr, "InnoDB: %s\n", err_msg); + ut_error; + } + + /* Note: Currently we have one DOC_ID column that is + shared by all FTS indexes on a table. */ + if (innobase_strcasecmp(name, + FTS_DOC_ID_COL_NAME) == 0) { + dict_col_t* col; + /* As part of normal loading of tables the + table->flag is not set for tables with FTS + till after the FTS indexes are loaded. So we + create the fts_t instance here if there isn't + one already created. + + This case does not arise for table create as + the flag is set before the table is created. */ + if (table->fts == NULL) { + table->fts = fts_create(table); + fts_optimize_add_table(table); + } + + ut_a(table->fts->doc_col == ULINT_UNDEFINED); + + col = dict_table_get_nth_col(table, i); + + ut_ad(col->len == sizeof(doc_id_t)); + + if (col->prtype & DATA_FTS_DOC_ID) { + DICT_TF2_FLAG_SET( + table, DICT_TF2_FTS_HAS_DOC_ID); + DICT_TF2_FLAG_UNSET( + table, DICT_TF2_FTS_ADD_DOC_ID); + } + + table->fts->doc_col = i; + } + + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); +} + +/** Error message for a delete-marked record in dict_load_field_low() */ +static const char* dict_load_field_del = "delete-marked record in SYS_FIELDS"; + +/********************************************************************//** +Loads an index field definition from a SYS_FIELDS record to +dict_index_t. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_load_field_low( +/*================*/ + byte* index_id, /*!< in/out: index id (8 bytes) + an "in" value if index != NULL + and "out" if index == NULL */ + dict_index_t* index, /*!< in/out: index, could be NULL + if we just populate a dict_field_t + struct with information from + a SYS_FIELDS record */ + dict_field_t* sys_field, /*!< out: dict_field_t to be + filled */ + ulint* pos, /*!< out: Field position */ + byte* last_index_id, /*!< in: last index id */ + mem_heap_t* heap, /*!< in/out: memory heap + for temporary storage */ + const rec_t* rec) /*!< in: SYS_FIELDS record */ +{ + const byte* field; + ulint len; + ulint pos_and_prefix_len; + ulint prefix_len; + ibool first_field; + ulint position; + + /* Either index or sys_field is supplied, not both */ + ut_a((!index) || (!sys_field)); + + if (rec_get_deleted_flag(rec, 0)) { + return(dict_load_field_del); + } + + if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_FIELDS) { + return("wrong number of columns in SYS_FIELDS record"); + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FIELDS__INDEX_ID, &len); + if (len != 8) { +err_len: + return("incorrect column length in SYS_FIELDS"); + } + + if (!index) { + ut_a(last_index_id); + memcpy(index_id, (const char*) field, 8); + first_field = memcmp(index_id, last_index_id, 8); + } else { + first_field = (index->n_def == 0); + if (memcmp(field, index_id, 8)) { + return("SYS_FIELDS.INDEX_ID mismatch"); + } + } + + /* The next field stores the field position in the index and a + possible column prefix length if the index field does not + contain the whole column. The storage format is like this: if + there is at least one prefix field in the index, then the HIGH + 2 bytes contain the field number (index->n_def) and the low 2 + bytes the prefix length for the field. Otherwise the field + number (index->n_def) is contained in the 2 LOW bytes. */ + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FIELDS__POS, &len); + if (len != 4) { + goto err_len; + } + + pos_and_prefix_len = mach_read_from_4(field); + + if (index && UNIV_UNLIKELY + ((pos_and_prefix_len & 0xFFFFUL) != index->n_def + && (pos_and_prefix_len >> 16 & 0xFFFF) != index->n_def)) { + return("SYS_FIELDS.POS mismatch"); + } + + if (first_field || pos_and_prefix_len > 0xFFFFUL) { + prefix_len = pos_and_prefix_len & 0xFFFFUL; + position = (pos_and_prefix_len & 0xFFFF0000UL) >> 16; + } else { + prefix_len = 0; + position = pos_and_prefix_len & 0xFFFFUL; + } + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_FIELDS__DB_TRX_ID, &len); + if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_FIELDS__DB_ROLL_PTR, &len); + if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FIELDS__COL_NAME, &len); + if (len == 0 || len == UNIV_SQL_NULL) { + goto err_len; + } + + if (index) { + dict_mem_index_add_field( + index, mem_heap_strdupl(heap, (const char*) field, len), + prefix_len); + } else { + ut_a(sys_field); + ut_a(pos); + + sys_field->name = mem_heap_strdupl( + heap, (const char*) field, len); + sys_field->prefix_len = prefix_len; + *pos = position; + } + + return(NULL); +} + +/********************************************************************//** +Loads definitions for index fields. +@return DB_SUCCESS if ok, DB_CORRUPTION if corruption */ +static +ulint +dict_load_fields( +/*=============*/ + dict_index_t* index, /*!< in/out: index whose fields to load */ + mem_heap_t* heap) /*!< in: memory heap for temporary storage */ +{ + dict_table_t* sys_fields; + dict_index_t* sys_index; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + const rec_t* rec; + byte* buf; + ulint i; + mtr_t mtr; + dberr_t error; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + mtr_start(&mtr); + + sys_fields = dict_table_get_low("SYS_FIELDS"); + sys_index = UT_LIST_GET_FIRST(sys_fields->indexes); + ut_ad(!dict_table_is_comp(sys_fields)); + ut_ad(name_of_col_is(sys_fields, sys_index, + DICT_FLD__SYS_FIELDS__COL_NAME, "COL_NAME")); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + buf = static_cast<byte*>(mem_heap_alloc(heap, 8)); + mach_write_to_8(buf, index->id); + + dfield_set_data(dfield, buf, 8); + dict_index_copy_types(tuple, sys_index, 1); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + for (i = 0; i < index->n_fields; i++) { + const char* err_msg; + + rec = btr_pcur_get_rec(&pcur); + + ut_a(btr_pcur_is_on_user_rec(&pcur)); + + err_msg = dict_load_field_low(buf, index, NULL, NULL, NULL, + heap, rec); + + if (err_msg == dict_load_field_del) { + /* There could be delete marked records in + SYS_FIELDS because SYS_FIELDS.INDEX_ID can be + updated by ALTER TABLE ADD INDEX. */ + + goto next_rec; + } else if (err_msg) { + fprintf(stderr, "InnoDB: %s\n", err_msg); + error = DB_CORRUPTION; + goto func_exit; + } +next_rec: + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + } + + error = DB_SUCCESS; +func_exit: + btr_pcur_close(&pcur); + mtr_commit(&mtr); + return(error); +} + +/** Error message for a delete-marked record in dict_load_index_low() */ +static const char* dict_load_index_del = "delete-marked record in SYS_INDEXES"; +/** Error message for table->id mismatch in dict_load_index_low() */ +static const char* dict_load_index_id_err = "SYS_INDEXES.TABLE_ID mismatch"; + +/********************************************************************//** +Loads an index definition from a SYS_INDEXES record to dict_index_t. +If allocate=TRUE, we will create a dict_index_t structure and fill it +accordingly. If allocated=FALSE, the dict_index_t will be supplied by +the caller and filled with information read from the record. @return +error message, or NULL on success */ +UNIV_INTERN +const char* +dict_load_index_low( +/*================*/ + byte* table_id, /*!< in/out: table id (8 bytes), + an "in" value if allocate=TRUE + and "out" when allocate=FALSE */ + const char* table_name, /*!< in: table name */ + mem_heap_t* heap, /*!< in/out: temporary memory heap */ + const rec_t* rec, /*!< in: SYS_INDEXES record */ + ibool allocate, /*!< in: TRUE=allocate *index, + FALSE=fill in a pre-allocated + *index */ + dict_index_t** index) /*!< out,own: index, or NULL */ +{ + const byte* field; + ulint len; + ulint name_len; + char* name_buf; + index_id_t id; + ulint n_fields; + ulint type; + ulint space; + + if (allocate) { + /* If allocate=TRUE, no dict_index_t will + be supplied. Initialize "*index" to NULL */ + *index = NULL; + } + + if (rec_get_deleted_flag(rec, 0)) { + return(dict_load_index_del); + } + + if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_INDEXES) { + return("wrong number of columns in SYS_INDEXES record"); + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_INDEXES__TABLE_ID, &len); + if (len != 8) { +err_len: + return("incorrect column length in SYS_INDEXES"); + } + + if (!allocate) { + /* We are reading a SYS_INDEXES record. Copy the table_id */ + memcpy(table_id, (const char*) field, 8); + } else if (memcmp(field, table_id, 8)) { + /* Caller supplied table_id, verify it is the same + id as on the index record */ + return(dict_load_index_id_err); + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_INDEXES__ID, &len); + if (len != 8) { + goto err_len; + } + + id = mach_read_from_8(field); + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_INDEXES__DB_TRX_ID, &len); + if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_INDEXES__DB_ROLL_PTR, &len); + if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_INDEXES__NAME, &name_len); + if (name_len == UNIV_SQL_NULL) { + goto err_len; + } + + name_buf = mem_heap_strdupl(heap, (const char*) field, + name_len); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_INDEXES__N_FIELDS, &len); + if (len != 4) { + goto err_len; + } + n_fields = mach_read_from_4(field); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_INDEXES__TYPE, &len); + if (len != 4) { + goto err_len; + } + type = mach_read_from_4(field); + if (type & (~0 << DICT_IT_BITS)) { + return("unknown SYS_INDEXES.TYPE bits"); + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_INDEXES__SPACE, &len); + if (len != 4) { + goto err_len; + } + space = mach_read_from_4(field); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len); + if (len != 4) { + goto err_len; + } + + if (allocate) { + *index = dict_mem_index_create(table_name, name_buf, + space, type, n_fields); + } else { + ut_a(*index); + + dict_mem_fill_index_struct(*index, NULL, NULL, name_buf, + space, type, n_fields); + } + + (*index)->id = id; + (*index)->page = mach_read_from_4(field); + btr_search_index_init(*index); + ut_ad((*index)->page); + + return(NULL); +} + +/********************************************************************//** +Loads definitions for table indexes. Adds them to the data dictionary +cache. +@return DB_SUCCESS if ok, DB_CORRUPTION if corruption of dictionary +table or DB_UNSUPPORTED if table has unknown index type */ +static __attribute__((nonnull)) +dberr_t +dict_load_indexes( +/*==============*/ + dict_table_t* table, /*!< in/out: table */ + mem_heap_t* heap, /*!< in: memory heap for temporary storage */ + dict_err_ignore_t ignore_err) + /*!< in: error to be ignored when + loading the index definition */ +{ + dict_table_t* sys_indexes; + dict_index_t* sys_index; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + const rec_t* rec; + byte* buf; + mtr_t mtr; + dberr_t error = DB_SUCCESS; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + mtr_start(&mtr); + + sys_indexes = dict_table_get_low("SYS_INDEXES"); + sys_index = UT_LIST_GET_FIRST(sys_indexes->indexes); + ut_ad(!dict_table_is_comp(sys_indexes)); + ut_ad(name_of_col_is(sys_indexes, sys_index, + DICT_FLD__SYS_INDEXES__NAME, "NAME")); + ut_ad(name_of_col_is(sys_indexes, sys_index, + DICT_FLD__SYS_INDEXES__PAGE_NO, "PAGE_NO")); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + buf = static_cast<byte*>(mem_heap_alloc(heap, 8)); + mach_write_to_8(buf, table->id); + + dfield_set_data(dfield, buf, 8); + dict_index_copy_types(tuple, sys_index, 1); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + for (;;) { + dict_index_t* index = NULL; + const char* err_msg; + + if (!btr_pcur_is_on_user_rec(&pcur)) { + + /* We should allow the table to open even + without index when DICT_ERR_IGNORE_CORRUPT is set. + DICT_ERR_IGNORE_CORRUPT is currently only set + for drop table */ + if (dict_table_get_first_index(table) == NULL + && !(ignore_err & DICT_ERR_IGNORE_CORRUPT)) { + ib_logf(IB_LOG_LEVEL_WARN, + "Cannot load table %s " + "because it has no indexes in " + "InnoDB internal data dictionary.", + table->name); + error = DB_CORRUPTION; + goto func_exit; + } + + break; + } + + rec = btr_pcur_get_rec(&pcur); + + if ((ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK) + && rec_get_n_fields_old(rec) + == DICT_NUM_FIELDS__SYS_INDEXES) { + const byte* field; + ulint len; + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_INDEXES__NAME, &len); + + if (len != UNIV_SQL_NULL + && char(*field) == char(TEMP_INDEX_PREFIX)) { + /* Skip indexes whose name starts with + TEMP_INDEX_PREFIX, because they will + be dropped during crash recovery. */ + goto next_rec; + } + } + + err_msg = dict_load_index_low(buf, table->name, heap, rec, + TRUE, &index); + ut_ad((index == NULL && err_msg != NULL) + || (index != NULL && err_msg == NULL)); + + if (err_msg == dict_load_index_id_err) { + /* TABLE_ID mismatch means that we have + run out of index definitions for the table. */ + + if (dict_table_get_first_index(table) == NULL + && !(ignore_err & DICT_ERR_IGNORE_CORRUPT)) { + ib_logf(IB_LOG_LEVEL_WARN, + "Failed to load the " + "clustered index for table %s " + "because of the following error: %s. " + "Refusing to load the rest of the " + "indexes (if any) and the whole table " + "altogether.", table->name, err_msg); + error = DB_CORRUPTION; + goto func_exit; + } + + break; + } else if (err_msg == dict_load_index_del) { + /* Skip delete-marked records. */ + goto next_rec; + } else if (err_msg) { + fprintf(stderr, "InnoDB: %s\n", err_msg); + if (ignore_err & DICT_ERR_IGNORE_CORRUPT) { + goto next_rec; + } + error = DB_CORRUPTION; + goto func_exit; + } + + ut_ad(index); + + /* Check whether the index is corrupted */ + if (dict_index_is_corrupted(index)) { + ut_print_timestamp(stderr); + fputs(" InnoDB: ", stderr); + dict_index_name_print(stderr, NULL, index); + fputs(" is corrupted\n", stderr); + + if (!srv_load_corrupted + && !(ignore_err & DICT_ERR_IGNORE_CORRUPT) + && dict_index_is_clust(index)) { + dict_mem_index_free(index); + + error = DB_INDEX_CORRUPT; + goto func_exit; + } else { + /* We will load the index if + 1) srv_load_corrupted is TRUE + 2) ignore_err is set with + DICT_ERR_IGNORE_CORRUPT + 3) if the index corrupted is a secondary + index */ + ut_print_timestamp(stderr); + fputs(" InnoDB: load corrupted index ", stderr); + dict_index_name_print(stderr, NULL, index); + putc('\n', stderr); + } + } + + if (index->type & DICT_FTS + && !DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS)) { + /* This should have been created by now. */ + ut_a(table->fts != NULL); + DICT_TF2_FLAG_SET(table, DICT_TF2_FTS); + } + + /* We check for unsupported types first, so that the + subsequent checks are relevant for the supported types. */ + if (index->type & ~(DICT_CLUSTERED | DICT_UNIQUE + | DICT_CORRUPT | DICT_FTS)) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Unknown type %lu of index %s of table %s", + (ulong) index->type, index->name, table->name); + + error = DB_UNSUPPORTED; + dict_mem_index_free(index); + goto func_exit; + } else if (index->page == FIL_NULL + && !table->ibd_file_missing + && (!(index->type & DICT_FTS))) { + + fprintf(stderr, + "InnoDB: Error: trying to load index %s" + " for table %s\n" + "InnoDB: but the index tree has been freed!\n", + index->name, table->name); + + if (ignore_err & DICT_ERR_IGNORE_INDEX_ROOT) { + /* If caller can tolerate this error, + we will continue to load the index and + let caller deal with this error. However + mark the index and table corrupted. We + only need to mark such in the index + dictionary cache for such metadata corruption, + since we would always be able to set it + when loading the dictionary cache */ + dict_set_corrupted_index_cache_only( + index, table); + + fprintf(stderr, + "InnoDB: Index is corrupt but forcing" + " load into data dictionary\n"); + } else { +corrupted: + dict_mem_index_free(index); + error = DB_CORRUPTION; + goto func_exit; + } + } else if (!dict_index_is_clust(index) + && NULL == dict_table_get_first_index(table)) { + + fputs("InnoDB: Error: trying to load index ", + stderr); + ut_print_name(stderr, NULL, FALSE, index->name); + fputs(" for table ", stderr); + ut_print_name(stderr, NULL, TRUE, table->name); + fputs("\nInnoDB: but the first index" + " is not clustered!\n", stderr); + + goto corrupted; + } else if (dict_is_sys_table(table->id) + && (dict_index_is_clust(index) + || ((table == dict_sys->sys_tables) + && !strcmp("ID_IND", index->name)))) { + + /* The index was created in memory already at booting + of the database server */ + dict_mem_index_free(index); + } else { + dict_load_fields(index, heap); + + error = dict_index_add_to_cache( + table, index, index->page, FALSE); + + /* The data dictionary tables should never contain + invalid index definitions. If we ignored this error + and simply did not load this index definition, the + .frm file would disagree with the index definitions + inside InnoDB. */ + if (UNIV_UNLIKELY(error != DB_SUCCESS)) { + + goto func_exit; + } + } +next_rec: + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + } + + /* If the table contains FTS indexes, populate table->fts->indexes */ + if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS)) { + /* table->fts->indexes should have been created. */ + ut_a(table->fts->indexes != NULL); + dict_table_get_all_fts_indexes(table, table->fts->indexes); + } + +func_exit: + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + return(error); +} + +/********************************************************************//** +Loads a table definition from a SYS_TABLES record to dict_table_t. +Does not load any columns or indexes. +@return error message, or NULL on success */ +UNIV_INTERN +const char* +dict_load_table_low( +/*================*/ + const char* name, /*!< in: table name */ + const rec_t* rec, /*!< in: SYS_TABLES record */ + dict_table_t** table) /*!< out,own: table, or NULL */ +{ + const byte* field; + ulint len; + ulint space; + ulint n_cols; + ulint flags = 0; + ulint flags2; + + if (rec_get_deleted_flag(rec, 0)) { + return("delete-marked record in SYS_TABLES"); + } + + if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_TABLES) { + return("wrong number of columns in SYS_TABLES record"); + } + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_TABLES__NAME, &len); + if (len == 0 || len == UNIV_SQL_NULL) { +err_len: + return("incorrect column length in SYS_TABLES"); + } + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_TABLES__DB_TRX_ID, &len); + if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_TABLES__DB_ROLL_PTR, &len); + if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) { + goto err_len; + } + + rec_get_nth_field_offs_old(rec, DICT_FLD__SYS_TABLES__ID, &len); + if (len != 8) { + goto err_len; + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLES__N_COLS, &len); + if (len != 4) { + goto err_len; + } + + n_cols = mach_read_from_4(field); + + rec_get_nth_field_offs_old(rec, DICT_FLD__SYS_TABLES__TYPE, &len); + if (len != 4) { + goto err_len; + } + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_TABLES__MIX_ID, &len); + if (len != 8) { + goto err_len; + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len); + if (len != 4) { + goto err_len; + } + + /* MIX_LEN may hold additional flags in post-antelope file formats. */ + flags2 = mach_read_from_4(field); + + /* DICT_TF2_FTS will be set when indexes is being loaded */ + flags2 &= ~DICT_TF2_FTS; + + rec_get_nth_field_offs_old( + rec, DICT_FLD__SYS_TABLES__CLUSTER_ID, &len); + if (len != UNIV_SQL_NULL) { + goto err_len; + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLES__SPACE, &len); + if (len != 4) { + goto err_len; + } + + space = mach_read_from_4(field); + + /* Check if the tablespace exists and has the right name */ + flags = dict_sys_tables_get_flags(rec); + + if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) { + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLES__TYPE, &len); + ut_ad(len == 4); /* this was checked earlier */ + flags = mach_read_from_4(field); + + ut_print_timestamp(stderr); + fputs(" InnoDB: Error: table ", stderr); + ut_print_filename(stderr, name); + fprintf(stderr, "\n" + "InnoDB: in InnoDB data dictionary" + " has unknown type %lx.\n", + (ulong) flags); + return("incorrect flags in SYS_TABLES"); + } + + /* The high-order bit of N_COLS is the "compact format" flag. + For tables in that format, MIX_LEN may hold additional flags. */ + if (n_cols & DICT_N_COLS_COMPACT) { + ut_ad(flags & DICT_TF_COMPACT); + + if (flags2 & ~DICT_TF2_BIT_MASK) { + ut_print_timestamp(stderr); + fputs(" InnoDB: Warning: table ", stderr); + ut_print_filename(stderr, name); + fprintf(stderr, "\n" + "InnoDB: in InnoDB data dictionary" + " has unknown flags %lx.\n", + (ulong) flags2); + + /* Clean it up and keep going */ + flags2 &= DICT_TF2_BIT_MASK; + } + } else { + /* Do not trust the MIX_LEN field when the + row format is Redundant. */ + flags2 = 0; + } + + /* See if the tablespace is available. */ + *table = dict_mem_table_create( + name, space, n_cols & ~DICT_N_COLS_COMPACT, flags, flags2, + false); + + field = rec_get_nth_field_old(rec, DICT_FLD__SYS_TABLES__ID, &len); + ut_ad(len == 8); /* this was checked earlier */ + + (*table)->id = mach_read_from_8(field); + + (*table)->ibd_file_missing = FALSE; + + return(NULL); +} + +/********************************************************************//** +Using the table->heap, copy the null-terminated filepath into +table->data_dir_path and replace the 'databasename/tablename.ibd' +portion with 'tablename'. +This allows SHOW CREATE TABLE to return the correct DATA DIRECTORY path. +Make this data directory path only if it has not yet been saved. */ +UNIV_INTERN +void +dict_save_data_dir_path( +/*====================*/ + dict_table_t* table, /*!< in/out: table */ + char* filepath) /*!< in: filepath of tablespace */ +{ + ut_ad(mutex_own(&(dict_sys->mutex))); + ut_a(DICT_TF_HAS_DATA_DIR(table->flags)); + + ut_a(!table->data_dir_path); + ut_a(filepath); + + /* Be sure this filepath is not the default filepath. */ + char* default_filepath = fil_make_ibd_name(table->name, false); + if (strcmp(filepath, default_filepath)) { + ulint pathlen = strlen(filepath); + ut_a(pathlen < OS_FILE_MAX_PATH); + ut_a(0 == strcmp(filepath + pathlen - 4, ".ibd")); + + table->data_dir_path = mem_heap_strdup(table->heap, filepath); + os_file_make_data_dir_path(table->data_dir_path); + } else { + /* This does not change SYS_DATAFILES or SYS_TABLES + or FSP_FLAGS on the header page of the tablespace, + but it makes dict_table_t consistent */ + table->flags &= ~DICT_TF_MASK_DATA_DIR; + } + mem_free(default_filepath); +} + +/*****************************************************************//** +Make sure the data_file_name is saved in dict_table_t if needed. Try to +read it from the file dictionary first, then from SYS_DATAFILES. */ +UNIV_INTERN +void +dict_get_and_save_data_dir_path( +/*============================*/ + dict_table_t* table, /*!< in/out: table */ + bool dict_mutex_own) /*!< in: true if dict_sys->mutex + is owned already */ +{ + if (DICT_TF_HAS_DATA_DIR(table->flags) + && (!table->data_dir_path)) { + char* path = fil_space_get_first_path(table->space); + + if (!dict_mutex_own) { + dict_mutex_enter_for_mysql(); + } + if (!path) { + path = dict_get_first_path( + table->space, table->name); + } + + if (path) { + dict_save_data_dir_path(table, path); + mem_free(path); + } + + if (!dict_mutex_own) { + dict_mutex_exit_for_mysql(); + } + } +} + +/********************************************************************//** +Loads a table definition and also all its index definitions, and also +the cluster definition if the table is a member in a cluster. Also loads +all foreign key constraints where the foreign key is in the table or where +a foreign key references columns in this table. Adds all these to the data +dictionary cache. +@return table, NULL if does not exist; if the table is stored in an +.ibd file, but the file does not exist, then we set the +ibd_file_missing flag TRUE in the table object we return */ +UNIV_INTERN +dict_table_t* +dict_load_table( +/*============*/ + const char* name, /*!< in: table name in the + databasename/tablename format */ + ibool cached, /*!< in: TRUE=add to cache, FALSE=do not */ + dict_err_ignore_t ignore_err) + /*!< in: error to be ignored when loading + table and its indexes' definition */ +{ + dberr_t err; + dict_table_t* table; + dict_table_t* sys_tables; + btr_pcur_t pcur; + dict_index_t* sys_index; + dtuple_t* tuple; + mem_heap_t* heap; + dfield_t* dfield; + const rec_t* rec; + const byte* field; + ulint len; + char* filepath = NULL; + const char* err_msg; + mtr_t mtr; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + heap = mem_heap_create(32000); + + mtr_start(&mtr); + + sys_tables = dict_table_get_low("SYS_TABLES"); + sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); + ut_ad(!dict_table_is_comp(sys_tables)); + ut_ad(name_of_col_is(sys_tables, sys_index, + DICT_FLD__SYS_TABLES__ID, "ID")); + ut_ad(name_of_col_is(sys_tables, sys_index, + DICT_FLD__SYS_TABLES__N_COLS, "N_COLS")); + ut_ad(name_of_col_is(sys_tables, sys_index, + DICT_FLD__SYS_TABLES__TYPE, "TYPE")); + ut_ad(name_of_col_is(sys_tables, sys_index, + DICT_FLD__SYS_TABLES__MIX_LEN, "MIX_LEN")); + ut_ad(name_of_col_is(sys_tables, sys_index, + DICT_FLD__SYS_TABLES__SPACE, "SPACE")); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + dfield_set_data(dfield, name, ut_strlen(name)); + dict_index_copy_types(tuple, sys_index, 1); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur) + || rec_get_deleted_flag(rec, 0)) { + /* Not found */ +err_exit: + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + + return(NULL); + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLES__NAME, &len); + + /* Check if the table name in record is the searched one */ + if (len != ut_strlen(name) || ut_memcmp(name, field, len) != 0) { + + goto err_exit; + } + + err_msg = dict_load_table_low(name, rec, &table); + + if (err_msg) { + + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: %s\n", err_msg); + goto err_exit; + } + + char table_name[MAX_FULL_NAME_LEN + 1]; + + innobase_format_name(table_name, sizeof(table_name), name, FALSE); + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + if (table->space == 0) { + /* The system tablespace is always available. */ + } else if (table->flags2 & DICT_TF2_DISCARDED) { + + ib_logf(IB_LOG_LEVEL_WARN, + "Table '%s' tablespace is set as discarded.", + table_name); + + table->ibd_file_missing = TRUE; + + } else if (!fil_space_for_table_exists_in_mem( + table->space, name, FALSE, FALSE, true, heap, + table->id)) { + + if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)) { + /* Do not bother to retry opening temporary tables. */ + table->ibd_file_missing = TRUE; + + } else { + if (!(ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Failed to find tablespace for " + "table '%s' in the cache. " + "Attempting to load the tablespace " + "with space id %lu.", + table_name, (ulong) table->space); + } + + /* Use the remote filepath if needed. */ + if (DICT_TF_HAS_DATA_DIR(table->flags)) { + /* This needs to be added to the table + from SYS_DATAFILES */ + dict_get_and_save_data_dir_path(table, true); + + if (table->data_dir_path) { + filepath = os_file_make_remote_pathname( + table->data_dir_path, + table->name, "ibd"); + } + } + + /* Try to open the tablespace. We set the + 2nd param (fix_dict = false) here because we + do not have an x-lock on dict_operation_lock */ + err = fil_open_single_table_tablespace( + true, false, table->space, + dict_tf_to_fsp_flags(table->flags), + name, filepath); + + if (err != DB_SUCCESS) { + /* We failed to find a sensible + tablespace file */ + + table->ibd_file_missing = TRUE; + } + if (filepath) { + mem_free(filepath); + } + } + } + + dict_load_columns(table, heap); + + if (cached) { + dict_table_add_to_cache(table, TRUE, heap); + } else { + dict_table_add_system_columns(table, heap); + } + + mem_heap_empty(heap); + + /* If there is no tablespace for the table then we only need to + load the index definitions. So that we can IMPORT the tablespace + later. When recovering table locks for resurrected incomplete + transactions, the tablespace should exist, because DDL operations + were not allowed while the table is being locked by a transaction. */ + dict_err_ignore_t index_load_err = + !(ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK) + && table->ibd_file_missing + ? DICT_ERR_IGNORE_ALL + : ignore_err; + err = dict_load_indexes(table, heap, index_load_err); + + if (err == DB_INDEX_CORRUPT) { + /* Refuse to load the table if the table has a corrupted + cluster index */ + if (!srv_load_corrupted) { + fprintf(stderr, "InnoDB: Error: Load table "); + ut_print_name(stderr, NULL, TRUE, table->name); + fprintf(stderr, " failed, the table has corrupted" + " clustered indexes. Turn on" + " 'innodb_force_load_corrupted'" + " to drop it\n"); + + dict_table_remove_from_cache(table); + table = NULL; + goto func_exit; + } else { + dict_index_t* clust_index; + clust_index = dict_table_get_first_index(table); + + if (dict_index_is_corrupted(clust_index)) { + table->corrupted = TRUE; + } + } + } + + /* Initialize table foreign_child value. Its value could be + changed when dict_load_foreigns() is called below */ + table->fk_max_recusive_level = 0; + + /* If the force recovery flag is set, we open the table irrespective + of the error condition, since the user may want to dump data from the + clustered index. However we load the foreign key information only if + all indexes were loaded. */ + if (!cached || table->ibd_file_missing) { + /* Don't attempt to load the indexes from disk. */ + } else if (err == DB_SUCCESS) { + err = dict_load_foreigns(table->name, NULL, true, true, + ignore_err); + + if (err != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_WARN, + "Load table '%s' failed, the table has missing " + "foreign key indexes. Turn off " + "'foreign_key_checks' and try again.", + table->name); + + dict_table_remove_from_cache(table); + table = NULL; + } else { + table->fk_max_recusive_level = 0; + } + } else { + dict_index_t* index; + + /* Make sure that at least the clustered index was loaded. + Otherwise refuse to load the table */ + index = dict_table_get_first_index(table); + + if (!srv_force_recovery + || !index + || !dict_index_is_clust(index)) { + + dict_table_remove_from_cache(table); + table = NULL; + + } else if (dict_index_is_corrupted(index) + && !table->ibd_file_missing) { + + /* It is possible we force to load a corrupted + clustered index if srv_load_corrupted is set. + Mark the table as corrupted in this case */ + table->corrupted = TRUE; + } + } + +func_exit: + mem_heap_free(heap); + + ut_ad(!table + || ignore_err != DICT_ERR_IGNORE_NONE + || table->ibd_file_missing + || !table->corrupted); + + if (table && table->fts) { + if (!(dict_table_has_fts_index(table) + || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) + || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID))) { + /* the table->fts could be created in dict_load_column + when a user defined FTS_DOC_ID is present, but no + FTS */ + fts_free(table); + } else { + fts_optimize_add_table(table); + } + } + + ut_ad(err != DB_SUCCESS || dict_foreign_set_validate(*table)); + + return(table); +} + +/***********************************************************************//** +Loads a table object based on the table id. +@return table; NULL if table does not exist */ +UNIV_INTERN +dict_table_t* +dict_load_table_on_id( +/*==================*/ + table_id_t table_id, /*!< in: table id */ + dict_err_ignore_t ignore_err) /*!< in: errors to ignore + when loading the table */ +{ + byte id_buf[8]; + btr_pcur_t pcur; + mem_heap_t* heap; + dtuple_t* tuple; + dfield_t* dfield; + dict_index_t* sys_table_ids; + dict_table_t* sys_tables; + const rec_t* rec; + const byte* field; + ulint len; + dict_table_t* table; + mtr_t mtr; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + table = NULL; + + /* NOTE that the operation of this function is protected by + the dictionary mutex, and therefore no deadlocks can occur + with other dictionary operations. */ + + mtr_start(&mtr); + /*---------------------------------------------------*/ + /* Get the secondary index based on ID for table SYS_TABLES */ + sys_tables = dict_sys->sys_tables; + sys_table_ids = dict_table_get_next_index( + dict_table_get_first_index(sys_tables)); + ut_ad(!dict_table_is_comp(sys_tables)); + ut_ad(!dict_index_is_clust(sys_table_ids)); + heap = mem_heap_create(256); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + /* Write the table id in byte format to id_buf */ + mach_write_to_8(id_buf, table_id); + + dfield_set_data(dfield, id_buf, 8); + dict_index_copy_types(tuple, sys_table_ids, 1); + + btr_pcur_open_on_user_rec(sys_table_ids, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + +check_rec: + rec = btr_pcur_get_rec(&pcur); + + if (page_rec_is_user_rec(rec)) { + /*---------------------------------------------------*/ + /* Now we have the record in the secondary index + containing the table ID and NAME */ + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLE_IDS__ID, &len); + ut_ad(len == 8); + + /* Check if the table id in record is the one searched for */ + if (table_id == mach_read_from_8(field)) { + if (rec_get_deleted_flag(rec, 0)) { + /* Until purge has completed, there + may be delete-marked duplicate records + for the same SYS_TABLES.ID. + Due to Bug #60049, some delete-marked + records may survive the purge forever. */ + if (btr_pcur_move_to_next(&pcur, &mtr)) { + + goto check_rec; + } + } else { + /* Now we get the table name from the record */ + field = rec_get_nth_field_old(rec, + DICT_FLD__SYS_TABLE_IDS__NAME, &len); + /* Load the table definition to memory */ + table = dict_load_table( + mem_heap_strdupl( + heap, (char*) field, len), + TRUE, ignore_err); + } + } + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + + return(table); +} + +/********************************************************************//** +This function is called when the database is booted. Loads system table +index definitions except for the clustered index which is added to the +dictionary cache at booting before calling this function. */ +UNIV_INTERN +void +dict_load_sys_table( +/*================*/ + dict_table_t* table) /*!< in: system table */ +{ + mem_heap_t* heap; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + heap = mem_heap_create(1000); + + dict_load_indexes(table, heap, DICT_ERR_IGNORE_NONE); + + mem_heap_free(heap); +} + +/********************************************************************//** +Loads foreign key constraint col names (also for the referenced table). +Members that must be set (and valid) in foreign: +foreign->heap +foreign->n_fields +foreign->id ('\0'-terminated) +Members that will be created and set by this function: +foreign->foreign_col_names[i] +foreign->referenced_col_names[i] +(for i=0..foreign->n_fields-1) */ +static +void +dict_load_foreign_cols( +/*===================*/ + dict_foreign_t* foreign)/*!< in/out: foreign constraint object */ +{ + dict_table_t* sys_foreign_cols; + dict_index_t* sys_index; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + const rec_t* rec; + const byte* field; + ulint len; + ulint i; + mtr_t mtr; + size_t id_len; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + id_len = strlen(foreign->id); + + foreign->foreign_col_names = static_cast<const char**>( + mem_heap_alloc(foreign->heap, + foreign->n_fields * sizeof(void*))); + + foreign->referenced_col_names = static_cast<const char**>( + mem_heap_alloc(foreign->heap, + foreign->n_fields * sizeof(void*))); + + mtr_start(&mtr); + + sys_foreign_cols = dict_table_get_low("SYS_FOREIGN_COLS"); + + sys_index = UT_LIST_GET_FIRST(sys_foreign_cols->indexes); + ut_ad(!dict_table_is_comp(sys_foreign_cols)); + + tuple = dtuple_create(foreign->heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + dfield_set_data(dfield, foreign->id, id_len); + dict_index_copy_types(tuple, sys_index, 1); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + for (i = 0; i < foreign->n_fields; i++) { + + rec = btr_pcur_get_rec(&pcur); + + ut_a(btr_pcur_is_on_user_rec(&pcur)); + ut_a(!rec_get_deleted_flag(rec, 0)); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN_COLS__ID, &len); + + if (len != id_len || ut_memcmp(foreign->id, field, len) != 0) { + const rec_t* pos; + ulint pos_len; + const rec_t* for_col_name; + ulint for_col_name_len; + const rec_t* ref_col_name; + ulint ref_col_name_len; + + pos = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN_COLS__POS, + &pos_len); + + for_col_name = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME, + &for_col_name_len); + + ref_col_name = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME, + &ref_col_name_len); + + ib_logf(IB_LOG_LEVEL_ERROR, + "Unable to load columns names for foreign " + "key '%s' because it was not found in " + "InnoDB internal table SYS_FOREIGN_COLS. The " + "closest entry we found is: " + "(ID='%.*s', POS=%lu, FOR_COL_NAME='%.*s', " + "REF_COL_NAME='%.*s')", + foreign->id, + (int) len, field, + mach_read_from_4(pos), + (int) for_col_name_len, for_col_name, + (int) ref_col_name_len, ref_col_name); + + ut_error; + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN_COLS__POS, &len); + ut_a(len == 4); + ut_a(i == mach_read_from_4(field)); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME, &len); + foreign->foreign_col_names[i] = mem_heap_strdupl( + foreign->heap, (char*) field, len); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME, &len); + foreign->referenced_col_names[i] = mem_heap_strdupl( + foreign->heap, (char*) field, len); + + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); +} + +/***********************************************************************//** +Loads a foreign key constraint to the dictionary cache. +@return DB_SUCCESS or error code */ +static __attribute__((nonnull(1), warn_unused_result)) +dberr_t +dict_load_foreign( +/*==============*/ + const char* id, + /*!< in: foreign constraint id, must be + '\0'-terminated */ + const char** col_names, + /*!< in: column names, or NULL + to use foreign->foreign_table->col_names */ + bool check_recursive, + /*!< in: whether to record the foreign table + parent count to avoid unlimited recursive + load of chained foreign tables */ + bool check_charsets, + /*!< in: whether to check charset + compatibility */ + dict_err_ignore_t ignore_err) + /*!< in: error to be ignored */ +{ + dict_foreign_t* foreign; + dict_table_t* sys_foreign; + btr_pcur_t pcur; + dict_index_t* sys_index; + dtuple_t* tuple; + mem_heap_t* heap2; + dfield_t* dfield; + const rec_t* rec; + const byte* field; + ulint len; + ulint n_fields_and_type; + mtr_t mtr; + dict_table_t* for_table; + dict_table_t* ref_table; + size_t id_len; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + id_len = strlen(id); + + heap2 = mem_heap_create(1000); + + mtr_start(&mtr); + + sys_foreign = dict_table_get_low("SYS_FOREIGN"); + + sys_index = UT_LIST_GET_FIRST(sys_foreign->indexes); + ut_ad(!dict_table_is_comp(sys_foreign)); + + tuple = dtuple_create(heap2, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + dfield_set_data(dfield, id, id_len); + dict_index_copy_types(tuple, sys_index, 1); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur) + || rec_get_deleted_flag(rec, 0)) { + /* Not found */ + + fprintf(stderr, + "InnoDB: Error: cannot load foreign constraint " + "%s: could not find the relevant record in " + "SYS_FOREIGN\n", id); + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap2); + + return(DB_ERROR); + } + + field = rec_get_nth_field_old(rec, DICT_FLD__SYS_FOREIGN__ID, &len); + + /* Check if the id in record is the searched one */ + if (len != id_len || ut_memcmp(id, field, len) != 0) { + + fprintf(stderr, + "InnoDB: Error: cannot load foreign constraint " + "%s: found %.*s instead in SYS_FOREIGN\n", + id, (int) len, field); + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap2); + + return(DB_ERROR); + } + + /* Read the table names and the number of columns associated + with the constraint */ + + mem_heap_free(heap2); + + foreign = dict_mem_foreign_create(); + + n_fields_and_type = mach_read_from_4( + rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN__N_COLS, &len)); + + ut_a(len == 4); + + /* We store the type in the bits 24..29 of n_fields_and_type. */ + + foreign->type = (unsigned int) (n_fields_and_type >> 24); + foreign->n_fields = (unsigned int) (n_fields_and_type & 0x3FFUL); + + foreign->id = mem_heap_strdupl(foreign->heap, id, id_len); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN__FOR_NAME, &len); + + foreign->foreign_table_name = mem_heap_strdupl( + foreign->heap, (char*) field, len); + dict_mem_foreign_table_name_lookup_set(foreign, TRUE); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN__REF_NAME, &len); + foreign->referenced_table_name = mem_heap_strdupl( + foreign->heap, (char*) field, len); + dict_mem_referenced_table_name_lookup_set(foreign, TRUE); + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + dict_load_foreign_cols(foreign); + + ref_table = dict_table_check_if_in_cache_low( + foreign->referenced_table_name_lookup); + + /* We could possibly wind up in a deep recursive calls if + we call dict_table_get_low() again here if there + is a chain of tables concatenated together with + foreign constraints. In such case, each table is + both a parent and child of the other tables, and + act as a "link" in such table chains. + To avoid such scenario, we would need to check the + number of ancesters the current table has. If that + exceeds DICT_FK_MAX_CHAIN_LEN, we will stop loading + the child table. + Foreign constraints are loaded in a Breath First fashion, + that is, the index on FOR_NAME is scanned first, and then + index on REF_NAME. So foreign constrains in which + current table is a child (foreign table) are loaded first, + and then those constraints where current table is a + parent (referenced) table. + Thus we could check the parent (ref_table) table's + reference count (fk_max_recusive_level) to know how deep the + recursive call is. If the parent table (ref_table) is already + loaded, and its fk_max_recusive_level is larger than + DICT_FK_MAX_CHAIN_LEN, we will stop the recursive loading + by skipping loading the child table. It will not affect foreign + constraint check for DMLs since child table will be loaded + at that time for the constraint check. */ + if (!ref_table + || ref_table->fk_max_recusive_level < DICT_FK_MAX_RECURSIVE_LOAD) { + + /* If the foreign table is not yet in the dictionary cache, we + have to load it so that we are able to make type comparisons + in the next function call. */ + + for_table = dict_table_get_low(foreign->foreign_table_name_lookup); + + if (for_table && ref_table && check_recursive) { + /* This is to record the longest chain of ancesters + this table has, if the parent has more ancesters + than this table has, record it after add 1 (for this + parent */ + if (ref_table->fk_max_recusive_level + >= for_table->fk_max_recusive_level) { + for_table->fk_max_recusive_level = + ref_table->fk_max_recusive_level + 1; + } + } + } + + /* Note that there may already be a foreign constraint object in + the dictionary cache for this constraint: then the following + call only sets the pointers in it to point to the appropriate table + and index objects and frees the newly created object foreign. + Adding to the cache should always succeed since we are not creating + a new foreign key constraint but loading one from the data + dictionary. */ + + return(dict_foreign_add_to_cache(foreign, col_names, check_charsets, + ignore_err)); +} + +/***********************************************************************//** +Loads foreign key constraints where the table is either the foreign key +holder or where the table is referenced by a foreign key. Adds these +constraints to the data dictionary. Note that we know that the dictionary +cache already contains all constraints where the other relevant table is +already in the dictionary cache. +@return DB_SUCCESS or error code */ +UNIV_INTERN +dberr_t +dict_load_foreigns( +/*===============*/ + const char* table_name, /*!< in: table name */ + const char** col_names, /*!< in: column names, or NULL + to use table->col_names */ + bool check_recursive,/*!< in: Whether to check + recursive load of tables + chained by FK */ + bool check_charsets, /*!< in: whether to check + charset compatibility */ + dict_err_ignore_t ignore_err) /*!< in: error to be ignored */ +{ + ulint tuple_buf[(DTUPLE_EST_ALLOC(1) + sizeof(ulint) - 1) + / sizeof(ulint)]; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + dict_index_t* sec_index; + dict_table_t* sys_foreign; + const rec_t* rec; + const byte* field; + ulint len; + dberr_t err; + mtr_t mtr; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + sys_foreign = dict_table_get_low("SYS_FOREIGN"); + + if (sys_foreign == NULL) { + /* No foreign keys defined yet in this database */ + + fprintf(stderr, + "InnoDB: Error: no foreign key system tables" + " in the database\n"); + + return(DB_ERROR); + } + + ut_ad(!dict_table_is_comp(sys_foreign)); + mtr_start(&mtr); + + /* Get the secondary index based on FOR_NAME from table + SYS_FOREIGN */ + + sec_index = dict_table_get_next_index( + dict_table_get_first_index(sys_foreign)); + ut_ad(!dict_index_is_clust(sec_index)); +start_load: + + tuple = dtuple_create_from_mem(tuple_buf, sizeof(tuple_buf), 1); + dfield = dtuple_get_nth_field(tuple, 0); + + dfield_set_data(dfield, table_name, ut_strlen(table_name)); + dict_index_copy_types(tuple, sec_index, 1); + + btr_pcur_open_on_user_rec(sec_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); +loop: + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur)) { + /* End of index */ + + goto load_next_index; + } + + /* Now we have the record in the secondary index containing a table + name and a foreign constraint ID */ + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN_FOR_NAME__NAME, &len); + + /* Check if the table name in the record is the one searched for; the + following call does the comparison in the latin1_swedish_ci + charset-collation, in a case-insensitive way. */ + + if (0 != cmp_data_data(dfield_get_type(dfield)->mtype, + dfield_get_type(dfield)->prtype, + static_cast<const byte*>( + dfield_get_data(dfield)), + dfield_get_len(dfield), + field, len)) { + + goto load_next_index; + } + + /* Since table names in SYS_FOREIGN are stored in a case-insensitive + order, we have to check that the table name matches also in a binary + string comparison. On Unix, MySQL allows table names that only differ + in character case. If lower_case_table_names=2 then what is stored + may not be the same case, but the previous comparison showed that they + match with no-case. */ + + if (rec_get_deleted_flag(rec, 0)) { + goto next_rec; + } + + if ((innobase_get_lower_case_table_names() != 2) + && (0 != ut_memcmp(field, table_name, len))) { + goto next_rec; + } + + /* Now we get a foreign key constraint id */ + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN_FOR_NAME__ID, &len); + + /* Copy the string because the page may be modified or evicted + after mtr_commit() below. */ + char fk_id[MAX_TABLE_NAME_LEN + 1]; + + ut_a(len <= MAX_TABLE_NAME_LEN); + memcpy(fk_id, field, len); + fk_id[len] = '\0'; + + btr_pcur_store_position(&pcur, &mtr); + + mtr_commit(&mtr); + + /* Load the foreign constraint definition to the dictionary cache */ + + err = dict_load_foreign(fk_id, col_names, + check_recursive, check_charsets, ignore_err); + + if (err != DB_SUCCESS) { + btr_pcur_close(&pcur); + + return(err); + } + + mtr_start(&mtr); + + btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr); +next_rec: + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + + goto loop; + +load_next_index: + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + sec_index = dict_table_get_next_index(sec_index); + + if (sec_index != NULL) { + + mtr_start(&mtr); + + /* Switch to scan index on REF_NAME, fk_max_recusive_level + already been updated when scanning FOR_NAME index, no need to + update again */ + check_recursive = FALSE; + + goto start_load; + } + + return(DB_SUCCESS); +} diff --git a/storage/xtradb/dict/dict0mem.cc b/storage/xtradb/dict/dict0mem.cc new file mode 100644 index 00000000000..44b074dd718 --- /dev/null +++ b/storage/xtradb/dict/dict0mem.cc @@ -0,0 +1,755 @@ +/***************************************************************************** + +Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2012, Facebook Inc. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/******************************************************************//** +@file dict/dict0mem.cc +Data dictionary memory object creation + +Created 1/8/1996 Heikki Tuuri +***********************************************************************/ + +#include "dict0mem.h" + +#ifdef UNIV_NONINL +#include "dict0mem.ic" +#endif + +#include "rem0rec.h" +#include "data0type.h" +#include "mach0data.h" +#include "dict0dict.h" +#include "fts0priv.h" +#include "ut0crc32.h" +#ifndef UNIV_HOTBACKUP +# include "ha_prototypes.h" /* innobase_casedn_str(), + innobase_get_lower_case_table_names */ +# include "mysql_com.h" /* NAME_LEN */ +# include "lock0lock.h" +#endif /* !UNIV_HOTBACKUP */ +#ifdef UNIV_BLOB_DEBUG +# include "ut0rbt.h" +#endif /* UNIV_BLOB_DEBUG */ +#include <iostream> + +#define DICT_HEAP_SIZE 100 /*!< initial memory heap size when + creating a table or index object */ + +#ifdef UNIV_PFS_MUTEX +/* Key to register autoinc_mutex with performance schema */ +UNIV_INTERN mysql_pfs_key_t autoinc_mutex_key; +#endif /* UNIV_PFS_MUTEX */ + +/** An interger randomly initialized at startup used to make a temporary +table name as unique as possible. */ +static ib_uint32_t dict_temp_file_num; + +/**********************************************************************//** +Creates a table memory object. +@return own: table object */ +UNIV_INTERN +dict_table_t* +dict_mem_table_create( +/*==================*/ + const char* name, /*!< in: table name */ + ulint space, /*!< in: space where the clustered index of + the table is placed */ + ulint n_cols, /*!< in: number of columns */ + ulint flags, /*!< in: table flags */ + ulint flags2, /*!< in: table flags2 */ + bool nonshared)/*!< in: whether the table object is a dummy + one that does not need the initialization of + locking-related fields. */ +{ + dict_table_t* table; + mem_heap_t* heap; + + ut_ad(name); + ut_a(dict_tf_is_valid(flags)); + ut_a(!(flags2 & ~DICT_TF2_BIT_MASK)); + + heap = mem_heap_create(DICT_HEAP_SIZE); + + table = static_cast<dict_table_t*>( + mem_heap_zalloc(heap, sizeof(dict_table_t))); + + table->heap = heap; + + table->flags = (unsigned int) flags; + table->flags2 = (unsigned int) flags2; + table->name = static_cast<char*>(ut_malloc(strlen(name) + 1)); + memcpy(table->name, name, strlen(name) + 1); + table->space = (unsigned int) space; + table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS); + + table->cols = static_cast<dict_col_t*>( + mem_heap_alloc(heap, + (n_cols + DATA_N_SYS_COLS) + * sizeof(dict_col_t))); + + ut_d(table->magic_n = DICT_TABLE_MAGIC_N); + + /* true means that the stats latch will be enabled - + dict_table_stats_lock() will not be noop. */ + dict_table_stats_latch_create(table, true); + +#ifndef UNIV_HOTBACKUP + + if (!nonshared) { + + table->autoinc_lock = static_cast<ib_lock_t*>( + mem_heap_alloc(heap, lock_get_size())); + + mutex_create(autoinc_mutex_key, + &table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX); + } else { + + table->autoinc_lock = NULL; + } + + table->autoinc = 0; + + /* The number of transactions that are either waiting on the + AUTOINC lock or have been granted the lock. */ + table->n_waiting_or_granted_auto_inc_locks = 0; + + /* If the table has an FTS index or we are in the process + of building one, create the table->fts */ + if (dict_table_has_fts_index(table) + || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) + || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) { + table->fts = fts_create(table); + table->fts->cache = fts_cache_create(table); + } else { + table->fts = NULL; + } + + table->is_corrupt = FALSE; + +#endif /* !UNIV_HOTBACKUP */ + + new(&table->foreign_set) dict_foreign_set(); + new(&table->referenced_set) dict_foreign_set(); + + return(table); +} + +/****************************************************************//** +Free a table memory object. */ +UNIV_INTERN +void +dict_mem_table_free( +/*================*/ + dict_table_t* table) /*!< in: table */ +{ + ut_ad(table); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + ut_d(table->cached = FALSE); + + if (dict_table_has_fts_index(table) + || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) + || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) { + if (table->fts) { + if (table->cached) { + fts_optimize_remove_table(table); + } + + fts_free(table); + } + } +#ifndef UNIV_HOTBACKUP + if (table->autoinc_lock) { + + mutex_free(&(table->autoinc_mutex)); + } +#endif /* UNIV_HOTBACKUP */ + + dict_table_stats_latch_destroy(table); + + table->foreign_set.~dict_foreign_set(); + table->referenced_set.~dict_foreign_set(); + + ut_free(table->name); + mem_heap_free(table->heap); +} + +/****************************************************************//** +Append 'name' to 'col_names'. @see dict_table_t::col_names +@return new column names array */ +static +const char* +dict_add_col_name( +/*==============*/ + const char* col_names, /*!< in: existing column names, or + NULL */ + ulint cols, /*!< in: number of existing columns */ + const char* name, /*!< in: new column name */ + mem_heap_t* heap) /*!< in: heap */ +{ + ulint old_len; + ulint new_len; + ulint total_len; + char* res; + + ut_ad(!cols == !col_names); + + /* Find out length of existing array. */ + if (col_names) { + const char* s = col_names; + ulint i; + + for (i = 0; i < cols; i++) { + s += strlen(s) + 1; + } + + old_len = s - col_names; + } else { + old_len = 0; + } + + new_len = strlen(name) + 1; + total_len = old_len + new_len; + + res = static_cast<char*>(mem_heap_alloc(heap, total_len)); + + if (old_len > 0) { + memcpy(res, col_names, old_len); + } + + memcpy(res + old_len, name, new_len); + + return(res); +} + +/**********************************************************************//** +Adds a column definition to a table. */ +UNIV_INTERN +void +dict_mem_table_add_col( +/*===================*/ + dict_table_t* table, /*!< in: table */ + mem_heap_t* heap, /*!< in: temporary memory heap, or NULL */ + const char* name, /*!< in: column name, or NULL */ + ulint mtype, /*!< in: main datatype */ + ulint prtype, /*!< in: precise type */ + ulint len) /*!< in: precision */ +{ + dict_col_t* col; + ulint i; + + ut_ad(table); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + ut_ad(!heap == !name); + + i = table->n_def++; + + if (name) { + if (UNIV_UNLIKELY(table->n_def == table->n_cols)) { + heap = table->heap; + } + if (UNIV_LIKELY(i) && UNIV_UNLIKELY(!table->col_names)) { + /* All preceding column names are empty. */ + char* s = static_cast<char*>( + mem_heap_zalloc(heap, table->n_def)); + + table->col_names = s; + } + + table->col_names = dict_add_col_name(table->col_names, + i, name, heap); + } + + col = dict_table_get_nth_col(table, i); + + dict_mem_fill_column_struct(col, i, mtype, prtype, len); +} + +/**********************************************************************//** +Renames a column of a table in the data dictionary cache. */ +static __attribute__((nonnull)) +void +dict_mem_table_col_rename_low( +/*==========================*/ + dict_table_t* table, /*!< in/out: table */ + unsigned i, /*!< in: column offset corresponding to s */ + const char* to, /*!< in: new column name */ + const char* s) /*!< in: pointer to table->col_names */ +{ + size_t from_len = strlen(s), to_len = strlen(to); + + ut_ad(i < table->n_def); + ut_ad(from_len <= NAME_LEN); + ut_ad(to_len <= NAME_LEN); + + if (from_len == to_len) { + /* The easy case: simply replace the column name in + table->col_names. */ + strcpy(const_cast<char*>(s), to); + } else { + /* We need to adjust all affected index->field + pointers, as in dict_index_add_col(). First, copy + table->col_names. */ + ulint prefix_len = s - table->col_names; + + for (; i < table->n_def; i++) { + s += strlen(s) + 1; + } + + ulint full_len = s - table->col_names; + char* col_names; + + if (to_len > from_len) { + col_names = static_cast<char*>( + mem_heap_alloc( + table->heap, + full_len + to_len - from_len)); + + memcpy(col_names, table->col_names, prefix_len); + } else { + col_names = const_cast<char*>(table->col_names); + } + + memcpy(col_names + prefix_len, to, to_len); + memmove(col_names + prefix_len + to_len, + table->col_names + (prefix_len + from_len), + full_len - (prefix_len + from_len)); + + /* Replace the field names in every index. */ + for (dict_index_t* index = dict_table_get_first_index(table); + index != NULL; + index = dict_table_get_next_index(index)) { + ulint n_fields = dict_index_get_n_fields(index); + + for (ulint i = 0; i < n_fields; i++) { + dict_field_t* field + = dict_index_get_nth_field( + index, i); + ulint name_ofs + = field->name - table->col_names; + if (name_ofs <= prefix_len) { + field->name = col_names + name_ofs; + } else { + ut_a(name_ofs < full_len); + field->name = col_names + + name_ofs + to_len - from_len; + } + } + } + + table->col_names = col_names; + } + + dict_foreign_t* foreign; + + /* Replace the field names in every foreign key constraint. */ + for (dict_foreign_set::iterator it = table->foreign_set.begin(); + it != table->foreign_set.end(); + ++it) { + + foreign = *it; + + for (unsigned f = 0; f < foreign->n_fields; f++) { + /* These can point straight to + table->col_names, because the foreign key + constraints will be freed at the same time + when the table object is freed. */ + foreign->foreign_col_names[f] + = dict_index_get_nth_field( + foreign->foreign_index, f)->name; + } + } + + for (dict_foreign_set::iterator it = table->referenced_set.begin(); + it != table->referenced_set.end(); + ++it) { + + foreign = *it; + + for (unsigned f = 0; f < foreign->n_fields; f++) { + /* foreign->referenced_col_names[] need to be + copies, because the constraint may become + orphan when foreign_key_checks=0 and the + parent table is dropped. */ + + const char* col_name = dict_index_get_nth_field( + foreign->referenced_index, f)->name; + + if (strcmp(foreign->referenced_col_names[f], + col_name)) { + char** rc = const_cast<char**>( + foreign->referenced_col_names + f); + size_t col_name_len_1 = strlen(col_name) + 1; + + if (col_name_len_1 <= strlen(*rc) + 1) { + memcpy(*rc, col_name, col_name_len_1); + } else { + *rc = static_cast<char*>( + mem_heap_dup( + foreign->heap, + col_name, + col_name_len_1)); + } + } + } + } +} + +/**********************************************************************//** +Renames a column of a table in the data dictionary cache. */ +UNIV_INTERN +void +dict_mem_table_col_rename( +/*======================*/ + dict_table_t* table, /*!< in/out: table */ + unsigned nth_col,/*!< in: column index */ + const char* from, /*!< in: old column name */ + const char* to) /*!< in: new column name */ +{ + const char* s = table->col_names; + + ut_ad(nth_col < table->n_def); + + for (unsigned i = 0; i < nth_col; i++) { + size_t len = strlen(s); + ut_ad(len > 0); + s += len + 1; + } + + /* This could fail if the data dictionaries are out of sync. + Proceed with the renaming anyway. */ + ut_ad(!strcmp(from, s)); + + dict_mem_table_col_rename_low(table, nth_col, to, s); +} + +/**********************************************************************//** +This function populates a dict_col_t memory structure with +supplied information. */ +UNIV_INTERN +void +dict_mem_fill_column_struct( +/*========================*/ + dict_col_t* column, /*!< out: column struct to be + filled */ + ulint col_pos, /*!< in: column position */ + ulint mtype, /*!< in: main data type */ + ulint prtype, /*!< in: precise type */ + ulint col_len) /*!< in: column length */ +{ +#ifndef UNIV_HOTBACKUP + ulint mbminlen; + ulint mbmaxlen; +#endif /* !UNIV_HOTBACKUP */ + + column->ind = (unsigned int) col_pos; + column->ord_part = 0; + column->max_prefix = 0; + column->mtype = (unsigned int) mtype; + column->prtype = (unsigned int) prtype; + column->len = (unsigned int) col_len; +#ifndef UNIV_HOTBACKUP + dtype_get_mblen(mtype, prtype, &mbminlen, &mbmaxlen); + dict_col_set_mbminmaxlen(column, mbminlen, mbmaxlen); +#endif /* !UNIV_HOTBACKUP */ +} + +/**********************************************************************//** +Creates an index memory object. +@return own: index object */ +UNIV_INTERN +dict_index_t* +dict_mem_index_create( +/*==================*/ + const char* table_name, /*!< in: table name */ + const char* index_name, /*!< in: index name */ + ulint space, /*!< in: space where the index tree is + placed, ignored if the index is of + the clustered type */ + ulint type, /*!< in: DICT_UNIQUE, + DICT_CLUSTERED, ... ORed */ + ulint n_fields) /*!< in: number of fields */ +{ + dict_index_t* index; + mem_heap_t* heap; + + ut_ad(table_name && index_name); + + heap = mem_heap_create(DICT_HEAP_SIZE); + + index = static_cast<dict_index_t*>( + mem_heap_zalloc(heap, sizeof(*index))); + + dict_mem_fill_index_struct(index, heap, table_name, index_name, + space, type, n_fields); + + os_fast_mutex_init(zip_pad_mutex_key, &index->zip_pad.mutex); + + return(index); +} + +#ifndef UNIV_HOTBACKUP +/**********************************************************************//** +Creates and initializes a foreign constraint memory object. +@return own: foreign constraint struct */ +UNIV_INTERN +dict_foreign_t* +dict_mem_foreign_create(void) +/*=========================*/ +{ + dict_foreign_t* foreign; + mem_heap_t* heap; + + heap = mem_heap_create(100); + + foreign = static_cast<dict_foreign_t*>( + mem_heap_zalloc(heap, sizeof(dict_foreign_t))); + + foreign->heap = heap; + + return(foreign); +} + +/**********************************************************************//** +Sets the foreign_table_name_lookup pointer based on the value of +lower_case_table_names. If that is 0 or 1, foreign_table_name_lookup +will point to foreign_table_name. If 2, then another string is +allocated from foreign->heap and set to lower case. */ +UNIV_INTERN +void +dict_mem_foreign_table_name_lookup_set( +/*===================================*/ + dict_foreign_t* foreign, /*!< in/out: foreign struct */ + ibool do_alloc) /*!< in: is an alloc needed */ +{ + if (innobase_get_lower_case_table_names() == 2) { + if (do_alloc) { + ulint len; + + len = strlen(foreign->foreign_table_name) + 1; + + foreign->foreign_table_name_lookup = + static_cast<char*>( + mem_heap_alloc(foreign->heap, len)); + } + strcpy(foreign->foreign_table_name_lookup, + foreign->foreign_table_name); + innobase_casedn_str(foreign->foreign_table_name_lookup); + } else { + foreign->foreign_table_name_lookup + = foreign->foreign_table_name; + } +} + +/**********************************************************************//** +Sets the referenced_table_name_lookup pointer based on the value of +lower_case_table_names. If that is 0 or 1, referenced_table_name_lookup +will point to referenced_table_name. If 2, then another string is +allocated from foreign->heap and set to lower case. */ +UNIV_INTERN +void +dict_mem_referenced_table_name_lookup_set( +/*======================================*/ + dict_foreign_t* foreign, /*!< in/out: foreign struct */ + ibool do_alloc) /*!< in: is an alloc needed */ +{ + if (innobase_get_lower_case_table_names() == 2) { + if (do_alloc) { + ulint len; + + len = strlen(foreign->referenced_table_name) + 1; + + foreign->referenced_table_name_lookup = + static_cast<char*>( + mem_heap_alloc(foreign->heap, len)); + } + strcpy(foreign->referenced_table_name_lookup, + foreign->referenced_table_name); + innobase_casedn_str(foreign->referenced_table_name_lookup); + } else { + foreign->referenced_table_name_lookup + = foreign->referenced_table_name; + } +} +#endif /* !UNIV_HOTBACKUP */ + +/**********************************************************************//** +Adds a field definition to an index. NOTE: does not take a copy +of the column name if the field is a column. The memory occupied +by the column name may be released only after publishing the index. */ +UNIV_INTERN +void +dict_mem_index_add_field( +/*=====================*/ + dict_index_t* index, /*!< in: index */ + const char* name, /*!< in: column name */ + ulint prefix_len) /*!< in: 0 or the column prefix length + in a MySQL index like + INDEX (textcol(25)) */ +{ + dict_field_t* field; + + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + + index->n_def++; + + field = dict_index_get_nth_field(index, index->n_def - 1); + + field->name = name; + field->prefix_len = (unsigned int) prefix_len; +} + +/**********************************************************************//** +Frees an index memory object. */ +UNIV_INTERN +void +dict_mem_index_free( +/*================*/ + dict_index_t* index) /*!< in: index */ +{ + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); +#ifdef UNIV_BLOB_DEBUG + if (index->blobs) { + mutex_free(&index->blobs_mutex); + rbt_free(index->blobs); + } +#endif /* UNIV_BLOB_DEBUG */ + + os_fast_mutex_free(&index->zip_pad.mutex); + + mem_heap_free(index->heap); +} + +/** Create a temporary tablename like "#sql-ibtid-inc where + tid = the Table ID + inc = a randomly initialized number that is incremented for each file +The table ID is a 64 bit integer, can use up to 20 digits, and is +initialized at bootstrap. The second number is 32 bits, can use up to 10 +digits, and is initialized at startup to a randomly distributed number. +It is hoped that the combination of these two numbers will provide a +reasonably unique temporary file name. +@param[in] heap A memory heap +@param[in] dbtab Table name in the form database/table name +@param[in] id Table id +@return A unique temporary tablename suitable for InnoDB use */ +UNIV_INTERN +char* +dict_mem_create_temporary_tablename( + mem_heap_t* heap, + const char* dbtab, + table_id_t id) +{ + size_t size; + char* name; + const char* dbend = strchr(dbtab, '/'); + ut_ad(dbend); + size_t dblen = dbend - dbtab + 1; + +#ifdef HAVE_ATOMIC_BUILTINS + /* Increment a randomly initialized number for each temp file. */ + os_atomic_increment_uint32(&dict_temp_file_num, 1); +#else /* HAVE_ATOMIC_BUILTINS */ + dict_temp_file_num++; +#endif /* HAVE_ATOMIC_BUILTINS */ + + size = tmp_file_prefix_length + 3 + 20 + 1 + 10 + dblen; + name = static_cast<char*>(mem_heap_alloc(heap, size)); + memcpy(name, dbtab, dblen); + ut_snprintf(name + dblen, size - dblen, + TEMP_FILE_PREFIX_INNODB UINT64PF "-" UINT32PF, + id, dict_temp_file_num); + + return(name); +} + +/** Initialize dict memory variables */ + +void +dict_mem_init(void) +{ + /* Initialize a randomly distributed temporary file number */ + ib_uint32_t now = static_cast<ib_uint32_t>(ut_time()); + + const byte* buf = reinterpret_cast<const byte*>(&now); + ut_ad(ut_crc32 != NULL); + + dict_temp_file_num = ut_crc32(buf, sizeof(now)); + + DBUG_PRINT("dict_mem_init", + ("Starting Temporary file number is " UINT32PF, + dict_temp_file_num)); +} + +/** Validate the search order in the foreign key set. +@param[in] fk_set the foreign key set to be validated +@return true if search order is fine in the set, false otherwise. */ +bool +dict_foreign_set_validate( + const dict_foreign_set& fk_set) +{ + dict_foreign_not_exists not_exists(fk_set); + + dict_foreign_set::iterator it = std::find_if( + fk_set.begin(), fk_set.end(), not_exists); + + if (it == fk_set.end()) { + return(true); + } + + dict_foreign_t* foreign = *it; + std::cerr << "Foreign key lookup failed: " << *foreign; + std::cerr << fk_set; + ut_ad(0); + return(false); +} + +/** Validate the search order in the foreign key sets of the table +(foreign_set and referenced_set). +@param[in] table table whose foreign key sets are to be validated +@return true if foreign key sets are fine, false otherwise. */ +bool +dict_foreign_set_validate( + const dict_table_t& table) +{ + return(dict_foreign_set_validate(table.foreign_set) + && dict_foreign_set_validate(table.referenced_set)); +} + +std::ostream& +operator<< (std::ostream& out, const dict_foreign_t& foreign) +{ + out << "[dict_foreign_t: id='" << foreign.id << "'"; + + if (foreign.foreign_table_name != NULL) { + out << ",for: '" << foreign.foreign_table_name << "'"; + } + + out << "]"; + return(out); +} + +std::ostream& +operator<< (std::ostream& out, const dict_foreign_set& fk_set) +{ + out << "[dict_foreign_set:"; + std::for_each(fk_set.begin(), fk_set.end(), dict_foreign_print(out)); + out << "]" << std::endl; + return(out); +} + diff --git a/storage/xtradb/dict/dict0stats.cc b/storage/xtradb/dict/dict0stats.cc new file mode 100644 index 00000000000..9cd909686ed --- /dev/null +++ b/storage/xtradb/dict/dict0stats.cc @@ -0,0 +1,4182 @@ +/***************************************************************************** + +Copyright (c) 2009, 2014, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file dict/dict0stats.cc +Code used for calculating and manipulating table statistics. + +Created Jan 06, 2010 Vasil Dimov +*******************************************************/ + +#ifndef UNIV_HOTBACKUP + +#include "univ.i" + +#include "btr0btr.h" /* btr_get_size() */ +#include "btr0cur.h" /* btr_estimate_number_of_different_key_vals() */ +#include "dict0dict.h" /* dict_table_get_first_index(), dict_fs2utf8() */ +#include "dict0mem.h" /* DICT_TABLE_MAGIC_N */ +#include "dict0stats.h" +#include "data0type.h" /* dtype_t */ +#include "db0err.h" /* dberr_t */ +#include "page0page.h" /* page_align() */ +#include "pars0pars.h" /* pars_info_create() */ +#include "pars0types.h" /* pars_info_t */ +#include "que0que.h" /* que_eval_sql() */ +#include "rem0cmp.h" /* REC_MAX_N_FIELDS,cmp_rec_rec_with_match() */ +#include "row0sel.h" /* sel_node_t */ +#include "row0types.h" /* sel_node_t */ +#include "trx0trx.h" /* trx_create() */ +#include "trx0roll.h" /* trx_rollback_to_savepoint() */ +#include "ut0rnd.h" /* ut_rnd_interval() */ +#include "ut0ut.h" /* ut_format_name(), ut_time() */ + +#include <algorithm> +#include <map> +#include <vector> + +/* Sampling algorithm description @{ + +The algorithm is controlled by one number - N_SAMPLE_PAGES(index), +let it be A, which is the number of leaf pages to analyze for a given index +for each n-prefix (if the index is on 3 columns, then 3*A leaf pages will be +analyzed). + +Let the total number of leaf pages in the table be T. +Level 0 - leaf pages, level H - root. + +Definition: N-prefix-boring record is a record on a non-leaf page that equals +the next (to the right, cross page boundaries, skipping the supremum and +infimum) record on the same level when looking at the fist n-prefix columns. +The last (user) record on a level is not boring (it does not match the +non-existent user record to the right). We call the records boring because all +the records on the page below a boring record are equal to that boring record. + +We avoid diving below boring records when searching for a leaf page to +estimate the number of distinct records because we know that such a leaf +page will have number of distinct records == 1. + +For each n-prefix: start from the root level and full scan subsequent lower +levels until a level that contains at least A*10 distinct records is found. +Lets call this level LA. +As an optimization the search is canceled if it has reached level 1 (never +descend to the level 0 (leaf)) and also if the next level to be scanned +would contain more than A pages. The latter is because the user has asked +to analyze A leaf pages and it does not make sense to scan much more than +A non-leaf pages with the sole purpose of finding a good sample of A leaf +pages. + +After finding the appropriate level LA with >A*10 distinct records (or less in +the exceptions described above), divide it into groups of equal records and +pick A such groups. Then pick the last record from each group. For example, +let the level be: + +index: 0,1,2,3,4,5,6,7,8,9,10 +record: 1,1,1,2,2,7,7,7,7,7,9 + +There are 4 groups of distinct records and if A=2 random ones are selected, +e.g. 1,1,1 and 7,7,7,7,7, then records with indexes 2 and 9 will be selected. + +After selecting A records as described above, dive below them to find A leaf +pages and analyze them, finding the total number of distinct records. The +dive to the leaf level is performed by selecting a non-boring record from +each page and diving below it. + +This way, a total of A leaf pages are analyzed for the given n-prefix. + +Let the number of different key values found in each leaf page i be Pi (i=1..A). +Let N_DIFF_AVG_LEAF be (P1 + P2 + ... + PA) / A. +Let the number of different key values on level LA be N_DIFF_LA. +Let the total number of records on level LA be TOTAL_LA. +Let R be N_DIFF_LA / TOTAL_LA, we assume this ratio is the same on the +leaf level. +Let the number of leaf pages be N. +Then the total number of different key values on the leaf level is: +N * R * N_DIFF_AVG_LEAF. +See REF01 for the implementation. + +The above describes how to calculate the cardinality of an index. +This algorithm is executed for each n-prefix of a multi-column index +where n=1..n_uniq. +@} */ + +/* names of the tables from the persistent statistics storage */ +#define TABLE_STATS_NAME "mysql/innodb_table_stats" +#define TABLE_STATS_NAME_PRINT "mysql.innodb_table_stats" +#define INDEX_STATS_NAME "mysql/innodb_index_stats" +#define INDEX_STATS_NAME_PRINT "mysql.innodb_index_stats" + +#ifdef UNIV_STATS_DEBUG +#define DEBUG_PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) +#else /* UNIV_STATS_DEBUG */ +#define DEBUG_PRINTF(fmt, ...) /* noop */ +#endif /* UNIV_STATS_DEBUG */ + +/* Gets the number of leaf pages to sample in persistent stats estimation */ +#define N_SAMPLE_PAGES(index) \ + static_cast<ib_uint64_t>( \ + (index)->table->stats_sample_pages != 0 \ + ? (index)->table->stats_sample_pages \ + : srv_stats_persistent_sample_pages) + +/* number of distinct records on a given level that are required to stop +descending to lower levels and fetch N_SAMPLE_PAGES(index) records +from that level */ +#define N_DIFF_REQUIRED(index) (N_SAMPLE_PAGES(index) * 10) + +/* A dynamic array where we store the boundaries of each distinct group +of keys. For example if a btree level is: +index: 0,1,2,3,4,5,6,7,8,9,10,11,12 +data: b,b,b,b,b,b,g,g,j,j,j, x, y +then we would store 5,7,10,11,12 in the array. */ +typedef std::vector<ib_uint64_t> boundaries_t; + +/* This is used to arrange the index based on the index name. +@return true if index_name1 is smaller than index_name2. */ +struct index_cmp +{ + bool operator()(const char* index_name1, const char* index_name2) const { + return(strcmp(index_name1, index_name2) < 0); + } +}; + +typedef std::map<const char*, dict_index_t*, index_cmp> index_map_t; + +/*********************************************************************//** +Checks whether an index should be ignored in stats manipulations: +* stats fetch +* stats recalc +* stats save +@return true if exists and all tables are ok */ +UNIV_INLINE +bool +dict_stats_should_ignore_index( +/*===========================*/ + const dict_index_t* index) /*!< in: index */ +{ + return((index->type & DICT_FTS) + || dict_index_is_corrupted(index) + || index->to_be_dropped + || *index->name == TEMP_INDEX_PREFIX); +} + +/*********************************************************************//** +Checks whether the persistent statistics storage exists and that all +tables have the proper structure. +@return true if exists and all tables are ok */ +static +bool +dict_stats_persistent_storage_check( +/*================================*/ + bool caller_has_dict_sys_mutex) /*!< in: true if the caller + owns dict_sys->mutex */ +{ + /* definition for the table TABLE_STATS_NAME */ + dict_col_meta_t table_stats_columns[] = { + {"database_name", DATA_VARMYSQL, + DATA_NOT_NULL, 192}, + + {"table_name", DATA_VARMYSQL, + DATA_NOT_NULL, 192}, + + {"last_update", DATA_FIXBINARY, + DATA_NOT_NULL, 4}, + + {"n_rows", DATA_INT, + DATA_NOT_NULL | DATA_UNSIGNED, 8}, + + {"clustered_index_size", DATA_INT, + DATA_NOT_NULL | DATA_UNSIGNED, 8}, + + {"sum_of_other_index_sizes", DATA_INT, + DATA_NOT_NULL | DATA_UNSIGNED, 8} + }; + dict_table_schema_t table_stats_schema = { + TABLE_STATS_NAME, + UT_ARR_SIZE(table_stats_columns), + table_stats_columns, + 0 /* n_foreign */, + 0 /* n_referenced */ + }; + + /* definition for the table INDEX_STATS_NAME */ + dict_col_meta_t index_stats_columns[] = { + {"database_name", DATA_VARMYSQL, + DATA_NOT_NULL, 192}, + + {"table_name", DATA_VARMYSQL, + DATA_NOT_NULL, 192}, + + {"index_name", DATA_VARMYSQL, + DATA_NOT_NULL, 192}, + + {"last_update", DATA_FIXBINARY, + DATA_NOT_NULL, 4}, + + {"stat_name", DATA_VARMYSQL, + DATA_NOT_NULL, 64*3}, + + {"stat_value", DATA_INT, + DATA_NOT_NULL | DATA_UNSIGNED, 8}, + + {"sample_size", DATA_INT, + DATA_UNSIGNED, 8}, + + {"stat_description", DATA_VARMYSQL, + DATA_NOT_NULL, 1024*3} + }; + dict_table_schema_t index_stats_schema = { + INDEX_STATS_NAME, + UT_ARR_SIZE(index_stats_columns), + index_stats_columns, + 0 /* n_foreign */, + 0 /* n_referenced */ + }; + + char errstr[512]; + dberr_t ret; + + if (!caller_has_dict_sys_mutex) { + mutex_enter(&(dict_sys->mutex)); + } + + ut_ad(mutex_own(&dict_sys->mutex)); + + /* first check table_stats */ + ret = dict_table_schema_check(&table_stats_schema, errstr, + sizeof(errstr)); + if (ret == DB_SUCCESS) { + /* if it is ok, then check index_stats */ + ret = dict_table_schema_check(&index_stats_schema, errstr, + sizeof(errstr)); + } + + if (!caller_has_dict_sys_mutex) { + mutex_exit(&(dict_sys->mutex)); + } + + if (ret != DB_SUCCESS) { + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Error: %s\n", errstr); + return(false); + } + /* else */ + + return(true); +} + +/** Executes a given SQL statement using the InnoDB internal SQL parser. +This function will free the pinfo object. +@param[in,out] pinfo pinfo to pass to que_eval_sql() must already +have any literals bound to it +@param[in] sql SQL string to execute +@param[in,out] trx in case of NULL the function will allocate and +free the trx object. If it is not NULL then it will be rolled back +only in the case of error, but not freed. +@return DB_SUCCESS or error code */ +static +dberr_t +dict_stats_exec_sql( + pars_info_t* pinfo, + const char* sql, + trx_t* trx) +{ + dberr_t err; + bool trx_started = false; +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + ut_ad(mutex_own(&dict_sys->mutex)); + + if (!dict_stats_persistent_storage_check(true)) { + pars_info_free(pinfo); + return(DB_STATS_DO_NOT_EXIST); + } + + if (trx == NULL) { + trx = trx_allocate_for_background(); + trx_start_if_not_started(trx); + trx_started = true; + } + + err = que_eval_sql(pinfo, sql, FALSE, trx); /* pinfo is freed here */ + + DBUG_EXECUTE_IF("stats_index_error", + if (!trx_started) { + err = DB_STATS_DO_NOT_EXIST; + trx->error_state = DB_STATS_DO_NOT_EXIST; + }); + + if (!trx_started && err == DB_SUCCESS) { + return(DB_SUCCESS); + } + + if (err == DB_SUCCESS) { + trx_commit_for_mysql(trx); + } else { + trx->op_info = "rollback of internal trx on stats tables"; + trx->dict_operation_lock_mode = RW_X_LATCH; + trx_rollback_to_savepoint(trx, NULL); + trx->dict_operation_lock_mode = 0; + trx->op_info = ""; + ut_a(trx->error_state == DB_SUCCESS); + } + + if (trx_started) { + trx_free_for_background(trx); + } + + return(err); +} + +/*********************************************************************//** +Duplicate a table object and its indexes. +This function creates a dummy dict_table_t object and initializes the +following table and index members: +dict_table_t::id (copied) +dict_table_t::heap (newly created) +dict_table_t::name (copied) +dict_table_t::corrupted (copied) +dict_table_t::indexes<> (newly created) +dict_table_t::magic_n +for each entry in dict_table_t::indexes, the following are initialized: +(indexes that have DICT_FTS set in index->type are skipped) +dict_index_t::id (copied) +dict_index_t::name (copied) +dict_index_t::table_name (points to the copied table name) +dict_index_t::table (points to the above semi-initialized object) +dict_index_t::type (copied) +dict_index_t::to_be_dropped (copied) +dict_index_t::online_status (copied) +dict_index_t::n_uniq (copied) +dict_index_t::fields[] (newly created, only first n_uniq, only fields[i].name) +dict_index_t::indexes<> (newly created) +dict_index_t::stat_n_diff_key_vals[] (only allocated, left uninitialized) +dict_index_t::stat_n_sample_sizes[] (only allocated, left uninitialized) +dict_index_t::stat_n_non_null_key_vals[] (only allocated, left uninitialized) +dict_index_t::magic_n +The returned object should be freed with dict_stats_table_clone_free() +when no longer needed. +@return incomplete table object */ +static +dict_table_t* +dict_stats_table_clone_create( +/*==========================*/ + const dict_table_t* table) /*!< in: table whose stats to copy */ +{ + size_t heap_size; + dict_index_t* index; + + /* Estimate the size needed for the table and all of its indexes */ + + heap_size = 0; + heap_size += sizeof(dict_table_t); + heap_size += strlen(table->name) + 1; + + for (index = dict_table_get_first_index(table); + index != NULL; + index = dict_table_get_next_index(index)) { + + if (dict_stats_should_ignore_index(index)) { + continue; + } + + ut_ad(!dict_index_is_univ(index)); + + ulint n_uniq = dict_index_get_n_unique(index); + + heap_size += sizeof(dict_index_t); + heap_size += strlen(index->name) + 1; + heap_size += n_uniq * sizeof(index->fields[0]); + for (ulint i = 0; i < n_uniq; i++) { + heap_size += strlen(index->fields[i].name) + 1; + } + heap_size += n_uniq * sizeof(index->stat_n_diff_key_vals[0]); + heap_size += n_uniq * sizeof(index->stat_n_sample_sizes[0]); + heap_size += n_uniq * sizeof(index->stat_n_non_null_key_vals[0]); + } + + /* Allocate the memory and copy the members */ + + mem_heap_t* heap; + + heap = mem_heap_create(heap_size); + + dict_table_t* t; + + t = (dict_table_t*) mem_heap_alloc(heap, sizeof(*t)); + + UNIV_MEM_ASSERT_RW_ABORT(&table->id, sizeof(table->id)); + t->id = table->id; + + t->heap = heap; + + UNIV_MEM_ASSERT_RW_ABORT(table->name, strlen(table->name) + 1); + t->name = (char*) mem_heap_strdup(heap, table->name); + + t->corrupted = table->corrupted; + + /* This private object "t" is not shared with other threads, so + we do not need the stats_latch (thus we pass false below). The + dict_table_stats_lock()/unlock() routines will do nothing. */ + dict_table_stats_latch_create(t, false); + + UT_LIST_INIT(t->indexes); + + for (index = dict_table_get_first_index(table); + index != NULL; + index = dict_table_get_next_index(index)) { + + if (dict_stats_should_ignore_index(index)) { + continue; + } + + ut_ad(!dict_index_is_univ(index)); + + dict_index_t* idx; + + idx = (dict_index_t*) mem_heap_alloc(heap, sizeof(*idx)); + + UNIV_MEM_ASSERT_RW_ABORT(&index->id, sizeof(index->id)); + idx->id = index->id; + + UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name) + 1); + idx->name = (char*) mem_heap_strdup(heap, index->name); + + idx->table_name = t->name; + + idx->table = t; + + idx->type = index->type; + + idx->to_be_dropped = 0; + + idx->online_status = ONLINE_INDEX_COMPLETE; + + idx->n_uniq = index->n_uniq; + + idx->fields = (dict_field_t*) mem_heap_alloc( + heap, idx->n_uniq * sizeof(idx->fields[0])); + + for (ulint i = 0; i < idx->n_uniq; i++) { + UNIV_MEM_ASSERT_RW_ABORT(index->fields[i].name, strlen(index->fields[i].name) + 1); + idx->fields[i].name = (char*) mem_heap_strdup( + heap, index->fields[i].name); + } + + /* hook idx into t->indexes */ + UT_LIST_ADD_LAST(indexes, t->indexes, idx); + + idx->stat_n_diff_key_vals = (ib_uint64_t*) mem_heap_alloc( + heap, + idx->n_uniq * sizeof(idx->stat_n_diff_key_vals[0])); + + idx->stat_n_sample_sizes = (ib_uint64_t*) mem_heap_alloc( + heap, + idx->n_uniq * sizeof(idx->stat_n_sample_sizes[0])); + + idx->stat_n_non_null_key_vals = (ib_uint64_t*) mem_heap_alloc( + heap, + idx->n_uniq * sizeof(idx->stat_n_non_null_key_vals[0])); + ut_d(idx->magic_n = DICT_INDEX_MAGIC_N); + } + + ut_d(t->magic_n = DICT_TABLE_MAGIC_N); + + return(t); +} + +/*********************************************************************//** +Free the resources occupied by an object returned by +dict_stats_table_clone_create(). */ +static +void +dict_stats_table_clone_free( +/*========================*/ + dict_table_t* t) /*!< in: dummy table object to free */ +{ + dict_table_stats_latch_destroy(t); + mem_heap_free(t->heap); +} + +/*********************************************************************//** +Write all zeros (or 1 where it makes sense) into an index +statistics members. The resulting stats correspond to an empty index. +The caller must own index's table stats latch in X mode +(dict_table_stats_lock(table, RW_X_LATCH)) */ +static +void +dict_stats_empty_index( +/*===================*/ + dict_index_t* index) /*!< in/out: index */ +{ + ut_ad(!(index->type & DICT_FTS)); + ut_ad(!dict_index_is_univ(index)); + + ulint n_uniq = index->n_uniq; + + for (ulint i = 0; i < n_uniq; i++) { + index->stat_n_diff_key_vals[i] = 0; + index->stat_n_sample_sizes[i] = 1; + index->stat_n_non_null_key_vals[i] = 0; + } + + index->stat_index_size = 1; + index->stat_n_leaf_pages = 1; +} + +/*********************************************************************//** +Write all zeros (or 1 where it makes sense) into a table and its indexes' +statistics members. The resulting stats correspond to an empty table. */ +static +void +dict_stats_empty_table( +/*===================*/ + dict_table_t* table) /*!< in/out: table */ +{ + /* Zero the stats members */ + + dict_table_stats_lock(table, RW_X_LATCH); + + table->stat_n_rows = 0; + table->stat_clustered_index_size = 1; + /* 1 page for each index, not counting the clustered */ + table->stat_sum_of_other_index_sizes + = UT_LIST_GET_LEN(table->indexes) - 1; + table->stat_modified_counter = 0; + + dict_index_t* index; + + for (index = dict_table_get_first_index(table); + index != NULL; + index = dict_table_get_next_index(index)) { + + if (index->type & DICT_FTS) { + continue; + } + + ut_ad(!dict_index_is_univ(index)); + + dict_stats_empty_index(index); + } + + table->stat_initialized = TRUE; + + dict_table_stats_unlock(table, RW_X_LATCH); +} + +/*********************************************************************//** +Check whether index's stats are initialized (assert if they are not). */ +static +void +dict_stats_assert_initialized_index( +/*================================*/ + const dict_index_t* index) /*!< in: index */ +{ + UNIV_MEM_ASSERT_RW_ABORT( + index->stat_n_diff_key_vals, + index->n_uniq * sizeof(index->stat_n_diff_key_vals[0])); + + UNIV_MEM_ASSERT_RW_ABORT( + index->stat_n_sample_sizes, + index->n_uniq * sizeof(index->stat_n_sample_sizes[0])); + + UNIV_MEM_ASSERT_RW_ABORT( + index->stat_n_non_null_key_vals, + index->n_uniq * sizeof(index->stat_n_non_null_key_vals[0])); + + UNIV_MEM_ASSERT_RW_ABORT( + &index->stat_index_size, + sizeof(index->stat_index_size)); + + UNIV_MEM_ASSERT_RW_ABORT( + &index->stat_n_leaf_pages, + sizeof(index->stat_n_leaf_pages)); +} + +/*********************************************************************//** +Check whether table's stats are initialized (assert if they are not). */ +static +void +dict_stats_assert_initialized( +/*==========================*/ + const dict_table_t* table) /*!< in: table */ +{ + ut_a(table->stat_initialized); + + UNIV_MEM_ASSERT_RW_ABORT(&table->stats_last_recalc, + sizeof(table->stats_last_recalc)); + + UNIV_MEM_ASSERT_RW_ABORT(&table->stat_persistent, + sizeof(table->stat_persistent)); + + UNIV_MEM_ASSERT_RW_ABORT(&table->stats_auto_recalc, + sizeof(table->stats_auto_recalc)); + + UNIV_MEM_ASSERT_RW_ABORT(&table->stats_sample_pages, + sizeof(table->stats_sample_pages)); + + UNIV_MEM_ASSERT_RW_ABORT(&table->stat_n_rows, + sizeof(table->stat_n_rows)); + + UNIV_MEM_ASSERT_RW_ABORT(&table->stat_clustered_index_size, + sizeof(table->stat_clustered_index_size)); + + UNIV_MEM_ASSERT_RW_ABORT(&table->stat_sum_of_other_index_sizes, + sizeof(table->stat_sum_of_other_index_sizes)); + + UNIV_MEM_ASSERT_RW_ABORT(&table->stat_modified_counter, + sizeof(table->stat_modified_counter)); + + UNIV_MEM_ASSERT_RW_ABORT(&table->stats_bg_flag, + sizeof(table->stats_bg_flag)); + + for (dict_index_t* index = dict_table_get_first_index(table); + index != NULL; + index = dict_table_get_next_index(index)) { + + if (!dict_stats_should_ignore_index(index)) { + dict_stats_assert_initialized_index(index); + } + } +} + +#define INDEX_EQ(i1, i2) \ + ((i1) != NULL \ + && (i2) != NULL \ + && (i1)->id == (i2)->id \ + && strcmp((i1)->name, (i2)->name) == 0) + +/*********************************************************************//** +Copy table and index statistics from one table to another, including index +stats. Extra indexes in src are ignored and extra indexes in dst are +initialized to correspond to an empty index. */ +static +void +dict_stats_copy( +/*============*/ + dict_table_t* dst, /*!< in/out: destination table */ + const dict_table_t* src) /*!< in: source table */ +{ + dst->stats_last_recalc = src->stats_last_recalc; + dst->stat_n_rows = src->stat_n_rows; + dst->stat_clustered_index_size = src->stat_clustered_index_size; + dst->stat_sum_of_other_index_sizes = src->stat_sum_of_other_index_sizes; + dst->stat_modified_counter = src->stat_modified_counter; + + dict_index_t* dst_idx; + dict_index_t* src_idx; + + for (dst_idx = dict_table_get_first_index(dst), + src_idx = dict_table_get_first_index(src); + dst_idx != NULL; + dst_idx = dict_table_get_next_index(dst_idx), + (src_idx != NULL + && (src_idx = dict_table_get_next_index(src_idx)))) { + + if (dict_stats_should_ignore_index(dst_idx)) { + continue; + } + + ut_ad(!dict_index_is_univ(dst_idx)); + + if (!INDEX_EQ(src_idx, dst_idx)) { + for (src_idx = dict_table_get_first_index(src); + src_idx != NULL; + src_idx = dict_table_get_next_index(src_idx)) { + + if (INDEX_EQ(src_idx, dst_idx)) { + break; + } + } + } + + if (!INDEX_EQ(src_idx, dst_idx)) { + dict_stats_empty_index(dst_idx); + continue; + } + + ulint n_copy_el; + + if (dst_idx->n_uniq > src_idx->n_uniq) { + n_copy_el = src_idx->n_uniq; + /* Since src is smaller some elements in dst + will remain untouched by the following memmove(), + thus we init all of them here. */ + dict_stats_empty_index(dst_idx); + } else { + n_copy_el = dst_idx->n_uniq; + } + + memmove(dst_idx->stat_n_diff_key_vals, + src_idx->stat_n_diff_key_vals, + n_copy_el * sizeof(dst_idx->stat_n_diff_key_vals[0])); + + memmove(dst_idx->stat_n_sample_sizes, + src_idx->stat_n_sample_sizes, + n_copy_el * sizeof(dst_idx->stat_n_sample_sizes[0])); + + memmove(dst_idx->stat_n_non_null_key_vals, + src_idx->stat_n_non_null_key_vals, + n_copy_el * sizeof(dst_idx->stat_n_non_null_key_vals[0])); + + dst_idx->stat_index_size = src_idx->stat_index_size; + + dst_idx->stat_n_leaf_pages = src_idx->stat_n_leaf_pages; + } + + dst->stat_initialized = TRUE; +} + +/*********************************************************************//** +Duplicate the stats of a table and its indexes. +This function creates a dummy dict_table_t object and copies the input +table's stats into it. The returned table object is not in the dictionary +cache and cannot be accessed by any other threads. In addition to the +members copied in dict_stats_table_clone_create() this function initializes +the following: +dict_table_t::stat_initialized +dict_table_t::stat_persistent +dict_table_t::stat_n_rows +dict_table_t::stat_clustered_index_size +dict_table_t::stat_sum_of_other_index_sizes +dict_table_t::stat_modified_counter +dict_index_t::stat_n_diff_key_vals[] +dict_index_t::stat_n_sample_sizes[] +dict_index_t::stat_n_non_null_key_vals[] +dict_index_t::stat_index_size +dict_index_t::stat_n_leaf_pages +The returned object should be freed with dict_stats_snapshot_free() +when no longer needed. +@return incomplete table object */ +static +dict_table_t* +dict_stats_snapshot_create( +/*=======================*/ + dict_table_t* table) /*!< in: table whose stats to copy */ +{ + mutex_enter(&dict_sys->mutex); + + dict_table_stats_lock(table, RW_S_LATCH); + + dict_stats_assert_initialized(table); + + dict_table_t* t; + + t = dict_stats_table_clone_create(table); + + dict_stats_copy(t, table); + + t->stat_persistent = table->stat_persistent; + t->stats_auto_recalc = table->stats_auto_recalc; + t->stats_sample_pages = table->stats_sample_pages; + t->stats_bg_flag = table->stats_bg_flag; + + dict_table_stats_unlock(table, RW_S_LATCH); + + mutex_exit(&dict_sys->mutex); + + return(t); +} + +/*********************************************************************//** +Free the resources occupied by an object returned by +dict_stats_snapshot_create(). */ +static +void +dict_stats_snapshot_free( +/*=====================*/ + dict_table_t* t) /*!< in: dummy table object to free */ +{ + dict_stats_table_clone_free(t); +} + +/*********************************************************************//** +Calculates new estimates for index statistics. This function is +relatively quick and is used to calculate transient statistics that +are not saved on disk. This was the only way to calculate statistics +before the Persistent Statistics feature was introduced. */ +static +void +dict_stats_update_transient_for_index( +/*==================================*/ + dict_index_t* index) /*!< in/out: index */ +{ + if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO + && (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO + || !dict_index_is_clust(index))) { + /* If we have set a high innodb_force_recovery + level, do not calculate statistics, as a badly + corrupted index can cause a crash in it. + Initialize some bogus index cardinality + statistics, so that the data can be queried in + various means, also via secondary indexes. */ + dict_stats_empty_index(index); +#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG + } else if (ibuf_debug && !dict_index_is_clust(index)) { + dict_stats_empty_index(index); +#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + } else { + mtr_t mtr; + ulint size; + mtr_start(&mtr); + mtr_s_lock(dict_index_get_lock(index), &mtr); + + size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr); + + if (size != ULINT_UNDEFINED) { + index->stat_index_size = size; + + size = btr_get_size( + index, BTR_N_LEAF_PAGES, &mtr); + } + + mtr_commit(&mtr); + + switch (size) { + case ULINT_UNDEFINED: + dict_stats_empty_index(index); + return; + case 0: + /* The root node of the tree is a leaf */ + size = 1; + } + + index->stat_n_leaf_pages = size; + + btr_estimate_number_of_different_key_vals(index); + } +} + +/*********************************************************************//** +Calculates new estimates for table and index statistics. This function +is relatively quick and is used to calculate transient statistics that +are not saved on disk. +This was the only way to calculate statistics before the +Persistent Statistics feature was introduced. */ +UNIV_INTERN +void +dict_stats_update_transient( +/*========================*/ + dict_table_t* table) /*!< in/out: table */ +{ + dict_index_t* index; + ulint sum_of_index_sizes = 0; + + /* Find out the sizes of the indexes and how many different values + for the key they approximately have */ + + index = dict_table_get_first_index(table); + + if (dict_table_is_discarded(table)) { + /* Nothing to do. */ + dict_stats_empty_table(table); + return; + } else if (index == NULL) { + /* Table definition is corrupt */ + + char buf[MAX_FULL_NAME_LEN]; + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: table %s has no indexes. " + "Cannot calculate statistics.\n", + ut_format_name(table->name, TRUE, buf, sizeof(buf))); + dict_stats_empty_table(table); + return; + } + + for (; index != NULL; index = dict_table_get_next_index(index)) { + + ut_ad(!dict_index_is_univ(index)); + + if (index->type & DICT_FTS) { + continue; + } + + dict_stats_empty_index(index); + + if (dict_stats_should_ignore_index(index)) { + continue; + } + + dict_stats_update_transient_for_index(index); + + sum_of_index_sizes += index->stat_index_size; + } + + index = dict_table_get_first_index(table); + + table->stat_n_rows = index->stat_n_diff_key_vals[ + dict_index_get_n_unique(index) - 1]; + + table->stat_clustered_index_size = index->stat_index_size; + + table->stat_sum_of_other_index_sizes = sum_of_index_sizes + - index->stat_index_size; + + table->stats_last_recalc = ut_time(); + + table->stat_modified_counter = 0; + + table->stat_initialized = TRUE; +} + +/* @{ Pseudo code about the relation between the following functions + +let N = N_SAMPLE_PAGES(index) + +dict_stats_analyze_index() + for each n_prefix + search for good enough level: + dict_stats_analyze_index_level() // only called if level has <= N pages + // full scan of the level in one mtr + collect statistics about the given level + if we are not satisfied with the level, search next lower level + we have found a good enough level here + dict_stats_analyze_index_for_n_prefix(that level, stats collected above) + // full scan of the level in one mtr + dive below some records and analyze the leaf page there: + dict_stats_analyze_index_below_cur() +@} */ + +/*********************************************************************//** +Find the total number and the number of distinct keys on a given level in +an index. Each of the 1..n_uniq prefixes are looked up and the results are +saved in the array n_diff[0] .. n_diff[n_uniq - 1]. The total number of +records on the level is saved in total_recs. +Also, the index of the last record in each group of equal records is saved +in n_diff_boundaries[0..n_uniq - 1], records indexing starts from the leftmost +record on the level and continues cross pages boundaries, counting from 0. */ +static +void +dict_stats_analyze_index_level( +/*===========================*/ + dict_index_t* index, /*!< in: index */ + ulint level, /*!< in: level */ + ib_uint64_t* n_diff, /*!< out: array for number of + distinct keys for all prefixes */ + ib_uint64_t* total_recs, /*!< out: total number of records */ + ib_uint64_t* total_pages, /*!< out: total number of pages */ + boundaries_t* n_diff_boundaries,/*!< out: boundaries of the groups + of distinct keys */ + mtr_t* mtr) /*!< in/out: mini-transaction */ +{ + ulint n_uniq; + mem_heap_t* heap; + btr_pcur_t pcur; + const page_t* page; + const rec_t* rec; + const rec_t* prev_rec; + bool prev_rec_is_copied; + byte* prev_rec_buf = NULL; + ulint prev_rec_buf_size = 0; + ulint* rec_offsets; + ulint* prev_rec_offsets; + ulint i; + + DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu)\n", __func__, + index->table->name, index->name, level); + + ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), + MTR_MEMO_S_LOCK)); + + n_uniq = dict_index_get_n_unique(index); + + /* elements in the n_diff array are 0..n_uniq-1 (inclusive) */ + memset(n_diff, 0x0, n_uniq * sizeof(n_diff[0])); + + /* Allocate space for the offsets header (the allocation size at + offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1, + so that this will never be less than the size calculated in + rec_get_offsets_func(). */ + i = (REC_OFFS_HEADER_SIZE + 1 + 1) + index->n_fields; + + heap = mem_heap_create((2 * sizeof *rec_offsets) * i); + rec_offsets = static_cast<ulint*>( + mem_heap_alloc(heap, i * sizeof *rec_offsets)); + prev_rec_offsets = static_cast<ulint*>( + mem_heap_alloc(heap, i * sizeof *prev_rec_offsets)); + rec_offs_set_n_alloc(rec_offsets, i); + rec_offs_set_n_alloc(prev_rec_offsets, i); + + /* reset the dynamic arrays n_diff_boundaries[0..n_uniq-1] */ + if (n_diff_boundaries != NULL) { + for (i = 0; i < n_uniq; i++) { + n_diff_boundaries[i].erase( + n_diff_boundaries[i].begin(), + n_diff_boundaries[i].end()); + } + } + + /* Position pcur on the leftmost record on the leftmost page + on the desired level. */ + + btr_pcur_open_at_index_side( + true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED, + &pcur, true, level, mtr); + btr_pcur_move_to_next_on_page(&pcur); + + page = btr_pcur_get_page(&pcur); + + /* The page must not be empty, except when + it is the root page (and the whole index is empty). */ + ut_ad(btr_pcur_is_on_user_rec(&pcur) || page_is_leaf(page)); + ut_ad(btr_pcur_get_rec(&pcur) + == page_rec_get_next_const(page_get_infimum_rec(page))); + + /* check that we are indeed on the desired level */ + ut_a(btr_page_get_level(page, mtr) == level); + + /* there should not be any pages on the left */ + ut_a(btr_page_get_prev(page, mtr) == FIL_NULL); + + /* check whether the first record on the leftmost page is marked + as such, if we are on a non-leaf level */ + ut_a((level == 0) + == !(REC_INFO_MIN_REC_FLAG & rec_get_info_bits( + btr_pcur_get_rec(&pcur), page_is_comp(page)))); + + prev_rec = NULL; + prev_rec_is_copied = false; + + /* no records by default */ + *total_recs = 0; + + *total_pages = 0; + + /* iterate over all user records on this level + and compare each two adjacent ones, even the last on page + X and the fist on page X+1 */ + for (; + btr_pcur_is_on_user_rec(&pcur); + btr_pcur_move_to_next_user_rec(&pcur, mtr)) { + + ulint matched_fields = 0; + ulint matched_bytes = 0; + bool rec_is_last_on_page; + + rec = btr_pcur_get_rec(&pcur); + + /* If rec and prev_rec are on different pages, then prev_rec + must have been copied, because we hold latch only on the page + where rec resides. */ + if (prev_rec != NULL + && page_align(rec) != page_align(prev_rec)) { + + ut_a(prev_rec_is_copied); + } + + rec_is_last_on_page = + page_rec_is_supremum(page_rec_get_next_const(rec)); + + /* increment the pages counter at the end of each page */ + if (rec_is_last_on_page) { + + (*total_pages)++; + } + + /* Skip delete-marked records on the leaf level. If we + do not skip them, then ANALYZE quickly after DELETE + could count them or not (purge may have already wiped + them away) which brings non-determinism. We skip only + leaf-level delete marks because delete marks on + non-leaf level do not make sense. */ + if (level == 0 && + rec_get_deleted_flag( + rec, + page_is_comp(btr_pcur_get_page(&pcur)))) { + + if (rec_is_last_on_page + && !prev_rec_is_copied + && prev_rec != NULL) { + /* copy prev_rec */ + + prev_rec_offsets = rec_get_offsets( + prev_rec, index, prev_rec_offsets, + n_uniq, &heap); + + prev_rec = rec_copy_prefix_to_buf( + prev_rec, index, + rec_offs_n_fields(prev_rec_offsets), + &prev_rec_buf, &prev_rec_buf_size); + + prev_rec_is_copied = true; + } + + continue; + } + + rec_offsets = rec_get_offsets( + rec, index, rec_offsets, n_uniq, &heap); + + (*total_recs)++; + + if (prev_rec != NULL) { + prev_rec_offsets = rec_get_offsets( + prev_rec, index, prev_rec_offsets, + n_uniq, &heap); + + cmp_rec_rec_with_match(rec, + prev_rec, + rec_offsets, + prev_rec_offsets, + index, + FALSE, + &matched_fields, + &matched_bytes); + + for (i = matched_fields; i < n_uniq; i++) { + + if (n_diff_boundaries != NULL) { + /* push the index of the previous + record, that is - the last one from + a group of equal keys */ + + ib_uint64_t idx; + + /* the index of the current record + is total_recs - 1, the index of the + previous record is total_recs - 2; + we know that idx is not going to + become negative here because if we + are in this branch then there is a + previous record and thus + total_recs >= 2 */ + idx = *total_recs - 2; + + n_diff_boundaries[i].push_back(idx); + } + + /* increment the number of different keys + for n_prefix=i+1 (e.g. if i=0 then we increment + for n_prefix=1 which is stored in n_diff[0]) */ + n_diff[i]++; + } + } else { + /* this is the first non-delete marked record */ + for (i = 0; i < n_uniq; i++) { + n_diff[i] = 1; + } + } + + if (rec_is_last_on_page) { + /* end of a page has been reached */ + + /* we need to copy the record instead of assigning + like prev_rec = rec; because when we traverse the + records on this level at some point we will jump from + one page to the next and then rec and prev_rec will + be on different pages and + btr_pcur_move_to_next_user_rec() will release the + latch on the page that prev_rec is on */ + prev_rec = rec_copy_prefix_to_buf( + rec, index, rec_offs_n_fields(rec_offsets), + &prev_rec_buf, &prev_rec_buf_size); + prev_rec_is_copied = true; + + } else { + /* still on the same page, the next call to + btr_pcur_move_to_next_user_rec() will not jump + on the next page, we can simply assign pointers + instead of copying the records like above */ + + prev_rec = rec; + prev_rec_is_copied = false; + } + } + + /* if *total_pages is left untouched then the above loop was not + entered at all and there is one page in the whole tree which is + empty or the loop was entered but this is level 0, contains one page + and all records are delete-marked */ + if (*total_pages == 0) { + + ut_ad(level == 0); + ut_ad(*total_recs == 0); + + *total_pages = 1; + } + + /* if there are records on this level and boundaries + should be saved */ + if (*total_recs > 0 && n_diff_boundaries != NULL) { + + /* remember the index of the last record on the level as the + last one from the last group of equal keys; this holds for + all possible prefixes */ + for (i = 0; i < n_uniq; i++) { + ib_uint64_t idx; + + idx = *total_recs - 1; + + n_diff_boundaries[i].push_back(idx); + } + } + + /* now in n_diff_boundaries[i] there are exactly n_diff[i] integers, + for i=0..n_uniq-1 */ + +#ifdef UNIV_STATS_DEBUG + for (i = 0; i < n_uniq; i++) { + + DEBUG_PRINTF(" %s(): total recs: " UINT64PF + ", total pages: " UINT64PF + ", n_diff[%lu]: " UINT64PF "\n", + __func__, *total_recs, + *total_pages, + i, n_diff[i]); + +#if 0 + if (n_diff_boundaries != NULL) { + ib_uint64_t j; + + DEBUG_PRINTF(" %s(): boundaries[%lu]: ", + __func__, i); + + for (j = 0; j < n_diff[i]; j++) { + ib_uint64_t idx; + + idx = n_diff_boundaries[i][j]; + + DEBUG_PRINTF(UINT64PF "=" UINT64PF ", ", + j, idx); + } + DEBUG_PRINTF("\n"); + } +#endif + } +#endif /* UNIV_STATS_DEBUG */ + + /* Release the latch on the last page, because that is not done by + btr_pcur_close(). This function works also for non-leaf pages. */ + btr_leaf_page_release(btr_pcur_get_block(&pcur), BTR_SEARCH_LEAF, mtr); + + btr_pcur_close(&pcur); + + if (prev_rec_buf != NULL) { + + mem_free(prev_rec_buf); + } + + mem_heap_free(heap); +} + +/* aux enum for controlling the behavior of dict_stats_scan_page() @{ */ +enum page_scan_method_t { + COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED,/* scan all records on + the given page and count the number of + distinct ones, also ignore delete marked + records */ + QUIT_ON_FIRST_NON_BORING/* quit when the first record that differs + from its right neighbor is found */ +}; +/* @} */ + +/** Scan a page, reading records from left to right and counting the number +of distinct records (looking only at the first n_prefix +columns) and the number of external pages pointed by records from this page. +If scan_method is QUIT_ON_FIRST_NON_BORING then the function +will return as soon as it finds a record that does not match its neighbor +to the right, which means that in the case of QUIT_ON_FIRST_NON_BORING the +returned n_diff can either be 0 (empty page), 1 (the whole page has all keys +equal) or 2 (the function found a non-boring record and returned). +@param[out] out_rec record, or NULL +@param[out] offsets1 rec_get_offsets() working space (must +be big enough) +@param[out] offsets2 rec_get_offsets() working space (must +be big enough) +@param[in] index index of the page +@param[in] page the page to scan +@param[in] n_prefix look at the first n_prefix columns +@param[in] scan_method scan to the end of the page or not +@param[out] n_diff number of distinct records encountered +@param[out] n_external_pages if this is non-NULL then it will be set +to the number of externally stored pages which were encountered +@return offsets1 or offsets2 (the offsets of *out_rec), +or NULL if the page is empty and does not contain user records. */ +UNIV_INLINE +ulint* +dict_stats_scan_page( + const rec_t** out_rec, + ulint* offsets1, + ulint* offsets2, + dict_index_t* index, + const page_t* page, + ulint n_prefix, + page_scan_method_t scan_method, + ib_uint64_t* n_diff, + ib_uint64_t* n_external_pages) +{ + ulint* offsets_rec = offsets1; + ulint* offsets_next_rec = offsets2; + const rec_t* rec; + const rec_t* next_rec; + /* A dummy heap, to be passed to rec_get_offsets(). + Because offsets1,offsets2 should be big enough, + this memory heap should never be used. */ + mem_heap_t* heap = NULL; + const rec_t* (*get_next)(const rec_t*); + + if (scan_method == COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED) { + get_next = page_rec_get_next_non_del_marked; + } else { + get_next = page_rec_get_next_const; + } + + const bool should_count_external_pages = n_external_pages != NULL; + + if (should_count_external_pages) { + *n_external_pages = 0; + } + + rec = get_next(page_get_infimum_rec(page)); + + if (page_rec_is_supremum(rec)) { + /* the page is empty or contains only delete-marked records */ + *n_diff = 0; + *out_rec = NULL; + return(NULL); + } + + offsets_rec = rec_get_offsets(rec, index, offsets_rec, + ULINT_UNDEFINED, &heap); + + if (should_count_external_pages) { + *n_external_pages += btr_rec_get_externally_stored_len( + rec, offsets_rec); + } + + next_rec = get_next(rec); + + *n_diff = 1; + + while (!page_rec_is_supremum(next_rec)) { + + ulint matched_fields = 0; + ulint matched_bytes = 0; + + offsets_next_rec = rec_get_offsets(next_rec, index, + offsets_next_rec, + ULINT_UNDEFINED, + &heap); + + /* check whether rec != next_rec when looking at + the first n_prefix fields */ + cmp_rec_rec_with_match(rec, next_rec, + offsets_rec, offsets_next_rec, + index, FALSE, &matched_fields, + &matched_bytes); + + if (matched_fields < n_prefix) { + /* rec != next_rec, => rec is non-boring */ + + (*n_diff)++; + + if (scan_method == QUIT_ON_FIRST_NON_BORING) { + goto func_exit; + } + } + + rec = next_rec; + { + /* Assign offsets_rec = offsets_next_rec + so that offsets_rec matches with rec which + was just assigned rec = next_rec above. + Also need to point offsets_next_rec to the + place where offsets_rec was pointing before + because we have just 2 placeholders where + data is actually stored: + offsets_onstack1 and offsets_onstack2 and we + are using them in circular fashion + (offsets[_next]_rec are just pointers to + those placeholders). */ + ulint* offsets_tmp; + offsets_tmp = offsets_rec; + offsets_rec = offsets_next_rec; + offsets_next_rec = offsets_tmp; + } + + if (should_count_external_pages) { + *n_external_pages += btr_rec_get_externally_stored_len( + rec, offsets_rec); + } + + next_rec = get_next(next_rec); + } + +func_exit: + /* offsets1,offsets2 should have been big enough */ + ut_a(heap == NULL); + *out_rec = rec; + return(offsets_rec); +} + +/** Dive below the current position of a cursor and calculate the number of +distinct records on the leaf page, when looking at the fist n_prefix +columns. Also calculate the number of external pages pointed by records +on the leaf page. +@param[in] cur cursor +@param[in] n_prefix look at the first n_prefix columns +when comparing records +@param[out] n_diff number of distinct records +@param[out] n_external_pages number of external pages +@param[in,out] mtr mini-transaction +@return number of distinct records on the leaf page */ +static +void +dict_stats_analyze_index_below_cur( + const btr_cur_t* cur, + ulint n_prefix, + ib_uint64_t* n_diff, + ib_uint64_t* n_external_pages, + mtr_t* mtr) +{ + dict_index_t* index; + ulint space; + ulint zip_size; + buf_block_t* block; + ulint page_no; + const page_t* page; + mem_heap_t* heap; + const rec_t* rec; + ulint* offsets1; + ulint* offsets2; + ulint* offsets_rec; + ulint size; + + index = btr_cur_get_index(cur); + + /* Allocate offsets for the record and the node pointer, for + node pointer records. In a secondary index, the node pointer + record will consist of all index fields followed by a child + page number. + Allocate space for the offsets header (the allocation size at + offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1, + so that this will never be less than the size calculated in + rec_get_offsets_func(). */ + size = (1 + REC_OFFS_HEADER_SIZE) + 1 + dict_index_get_n_fields(index); + + heap = mem_heap_create(size * (sizeof *offsets1 + sizeof *offsets2)); + + offsets1 = static_cast<ulint*>(mem_heap_alloc( + heap, size * sizeof *offsets1)); + + offsets2 = static_cast<ulint*>(mem_heap_alloc( + heap, size * sizeof *offsets2)); + + rec_offs_set_n_alloc(offsets1, size); + rec_offs_set_n_alloc(offsets2, size); + + space = dict_index_get_space(index); + zip_size = dict_table_zip_size(index->table); + + rec = btr_cur_get_rec(cur); + + offsets_rec = rec_get_offsets(rec, index, offsets1, + ULINT_UNDEFINED, &heap); + + page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec); + + /* assume no external pages by default - in case we quit from this + function without analyzing any leaf pages */ + *n_external_pages = 0; + + /* descend to the leaf level on the B-tree */ + for (;;) { + + block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH, + NULL /* no guessed block */, + BUF_GET, __FILE__, __LINE__, mtr); + + page = buf_block_get_frame(block); + + if (btr_page_get_level(page, mtr) == 0) { + /* leaf level */ + break; + } + /* else */ + + /* search for the first non-boring record on the page */ + offsets_rec = dict_stats_scan_page( + &rec, offsets1, offsets2, index, page, n_prefix, + QUIT_ON_FIRST_NON_BORING, n_diff, NULL); + + /* pages on level > 0 are not allowed to be empty */ + ut_a(offsets_rec != NULL); + /* if page is not empty (offsets_rec != NULL) then n_diff must + be > 0, otherwise there is a bug in dict_stats_scan_page() */ + ut_a(*n_diff > 0); + + if (*n_diff == 1) { + /* page has all keys equal and the end of the page + was reached by dict_stats_scan_page(), no need to + descend to the leaf level */ + mem_heap_free(heap); + /* can't get an estimate for n_external_pages here + because we do not dive to the leaf level, assume no + external pages (*n_external_pages was assigned to 0 + above). */ + return; + } + /* else */ + + /* when we instruct dict_stats_scan_page() to quit on the + first non-boring record it finds, then the returned n_diff + can either be 0 (empty page), 1 (page has all keys equal) or + 2 (non-boring record was found) */ + ut_a(*n_diff == 2); + + /* we have a non-boring record in rec, descend below it */ + + page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec); + } + + /* make sure we got a leaf page as a result from the above loop */ + ut_ad(btr_page_get_level(page, mtr) == 0); + + /* scan the leaf page and find the number of distinct keys, + when looking only at the first n_prefix columns; also estimate + the number of externally stored pages pointed by records on this + page */ + + offsets_rec = dict_stats_scan_page( + &rec, offsets1, offsets2, index, page, n_prefix, + COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED, n_diff, + n_external_pages); + +#if 0 + DEBUG_PRINTF(" %s(): n_diff below page_no=%lu: " UINT64PF "\n", + __func__, page_no, n_diff); +#endif + + mem_heap_free(heap); +} + +/** Input data that is used to calculate dict_index_t::stat_n_diff_key_vals[] +for each n-columns prefix (n from 1 to n_uniq). */ +struct n_diff_data_t { + /** Index of the level on which the descent through the btree + stopped. level 0 is the leaf level. This is >= 1 because we + avoid scanning the leaf level because it may contain too many + pages and doing so is useless when combined with the random dives - + if we are to scan the leaf level, this means a full scan and we can + simply do that instead of fiddling with picking random records higher + in the tree and to dive below them. At the start of the analyzing + we may decide to do full scan of the leaf level, but then this + structure is not used in that code path. */ + ulint level; + + /** Number of records on the level where the descend through the btree + stopped. When we scan the btree from the root, we stop at some mid + level, choose some records from it and dive below them towards a leaf + page to analyze. */ + ib_uint64_t n_recs_on_level; + + /** Number of different key values that were found on the mid level. */ + ib_uint64_t n_diff_on_level; + + /** Number of leaf pages that are analyzed. This is also the same as + the number of records that we pick from the mid level and dive below + them. */ + ib_uint64_t n_leaf_pages_to_analyze; + + /** Cumulative sum of the number of different key values that were + found on all analyzed pages. */ + ib_uint64_t n_diff_all_analyzed_pages; + + /** Cumulative sum of the number of external pages (stored outside of + the btree but in the same file segment). */ + ib_uint64_t n_external_pages_sum; +}; + +/** Estimate the number of different key values in an index when looking at +the first n_prefix columns. For a given level in an index select +n_diff_data->n_leaf_pages_to_analyze records from that level and dive below +them to the corresponding leaf pages, then scan those leaf pages and save the +sampling results in n_diff_data->n_diff_all_analyzed_pages. +@param[in] index index +@param[in] n_prefix look at first 'n_prefix' columns when +comparing records +@param[in] boundaries a vector that contains +n_diff_data->n_diff_on_level integers each of which represents the index (on +level 'level', counting from left/smallest to right/biggest from 0) of the +last record from each group of distinct keys +@param[in,out] n_diff_data n_diff_all_analyzed_pages and +n_external_pages_sum in this structure will be set by this function. The +members level, n_diff_on_level and n_leaf_pages_to_analyze must be set by the +caller in advance - they are used by some calculations inside this function +@param[in,out] mtr mini-transaction */ +static +void +dict_stats_analyze_index_for_n_prefix( + dict_index_t* index, + ulint n_prefix, + const boundaries_t* boundaries, + n_diff_data_t* n_diff_data, + mtr_t* mtr) +{ + btr_pcur_t pcur; + const page_t* page; + ib_uint64_t rec_idx; + ib_uint64_t i; + +#if 0 + DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu, n_prefix=%lu, " + "n_diff_on_level=" UINT64PF ")\n", + __func__, index->table->name, index->name, level, + n_prefix, n_diff_data->n_diff_on_level); +#endif + + ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), + MTR_MEMO_S_LOCK)); + + /* Position pcur on the leftmost record on the leftmost page + on the desired level. */ + + btr_pcur_open_at_index_side( + true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED, + &pcur, true, n_diff_data->level, mtr); + btr_pcur_move_to_next_on_page(&pcur); + + page = btr_pcur_get_page(&pcur); + + const rec_t* first_rec = btr_pcur_get_rec(&pcur); + + /* We shouldn't be scanning the leaf level. The caller of this function + should have stopped the descend on level 1 or higher. */ + ut_ad(n_diff_data->level > 0); + ut_ad(!page_is_leaf(page)); + + /* The page must not be empty, except when + it is the root page (and the whole index is empty). */ + ut_ad(btr_pcur_is_on_user_rec(&pcur)); + ut_ad(first_rec == page_rec_get_next_const(page_get_infimum_rec(page))); + + /* check that we are indeed on the desired level */ + ut_a(btr_page_get_level(page, mtr) == n_diff_data->level); + + /* there should not be any pages on the left */ + ut_a(btr_page_get_prev(page, mtr) == FIL_NULL); + + /* check whether the first record on the leftmost page is marked + as such; we are on a non-leaf level */ + ut_a(rec_get_info_bits(first_rec, page_is_comp(page)) + & REC_INFO_MIN_REC_FLAG); + + const ib_uint64_t last_idx_on_level = boundaries->at( + static_cast<unsigned>(n_diff_data->n_diff_on_level - 1)); + + rec_idx = 0; + + n_diff_data->n_diff_all_analyzed_pages = 0; + n_diff_data->n_external_pages_sum = 0; + + for (i = 0; i < n_diff_data->n_leaf_pages_to_analyze; i++) { + /* there are n_diff_on_level elements + in 'boundaries' and we divide those elements + into n_leaf_pages_to_analyze segments, for example: + + let n_diff_on_level=100, n_leaf_pages_to_analyze=4, then: + segment i=0: [0, 24] + segment i=1: [25, 49] + segment i=2: [50, 74] + segment i=3: [75, 99] or + + let n_diff_on_level=1, n_leaf_pages_to_analyze=1, then: + segment i=0: [0, 0] or + + let n_diff_on_level=2, n_leaf_pages_to_analyze=2, then: + segment i=0: [0, 0] + segment i=1: [1, 1] or + + let n_diff_on_level=13, n_leaf_pages_to_analyze=7, then: + segment i=0: [0, 0] + segment i=1: [1, 2] + segment i=2: [3, 4] + segment i=3: [5, 6] + segment i=4: [7, 8] + segment i=5: [9, 10] + segment i=6: [11, 12] + + then we select a random record from each segment and dive + below it */ + const ib_uint64_t n_diff = n_diff_data->n_diff_on_level; + const ib_uint64_t n_pick + = n_diff_data->n_leaf_pages_to_analyze; + + const ib_uint64_t left = n_diff * i / n_pick; + const ib_uint64_t right = n_diff * (i + 1) / n_pick - 1; + + ut_a(left <= right); + ut_a(right <= last_idx_on_level); + + /* we do not pass (left, right) because we do not want to ask + ut_rnd_interval() to work with too big numbers since + ib_uint64_t could be bigger than ulint */ + const ulint rnd = ut_rnd_interval( + 0, static_cast<ulint>(right - left)); + + const ib_uint64_t dive_below_idx + = boundaries->at(static_cast<unsigned>(left + rnd)); + +#if 0 + DEBUG_PRINTF(" %s(): dive below record with index=" + UINT64PF "\n", __func__, dive_below_idx); +#endif + + /* seek to the record with index dive_below_idx */ + while (rec_idx < dive_below_idx + && btr_pcur_is_on_user_rec(&pcur)) { + + btr_pcur_move_to_next_user_rec(&pcur, mtr); + rec_idx++; + } + + /* if the level has finished before the record we are + searching for, this means that the B-tree has changed in + the meantime, quit our sampling and use whatever stats + we have collected so far */ + if (rec_idx < dive_below_idx) { + + ut_ad(!btr_pcur_is_on_user_rec(&pcur)); + break; + } + + /* it could be that the tree has changed in such a way that + the record under dive_below_idx is the supremum record, in + this case rec_idx == dive_below_idx and pcur is positioned + on the supremum, we do not want to dive below it */ + if (!btr_pcur_is_on_user_rec(&pcur)) { + break; + } + + ut_a(rec_idx == dive_below_idx); + + ib_uint64_t n_diff_on_leaf_page; + ib_uint64_t n_external_pages; + + dict_stats_analyze_index_below_cur(btr_pcur_get_btr_cur(&pcur), + n_prefix, + &n_diff_on_leaf_page, + &n_external_pages, + mtr); + + /* We adjust n_diff_on_leaf_page here to avoid counting + one record twice - once as the last on some page and once + as the first on another page. Consider the following example: + Leaf level: + page: (2,2,2,2,3,3) + ... many pages like (3,3,3,3,3,3) ... + page: (3,3,3,3,5,5) + ... many pages like (5,5,5,5,5,5) ... + page: (5,5,5,5,8,8) + page: (8,8,8,8,9,9) + our algo would (correctly) get an estimate that there are + 2 distinct records per page (average). Having 4 pages below + non-boring records, it would (wrongly) estimate the number + of distinct records to 8. */ + if (n_diff_on_leaf_page > 0) { + n_diff_on_leaf_page--; + } + + n_diff_data->n_diff_all_analyzed_pages += n_diff_on_leaf_page; + + n_diff_data->n_external_pages_sum += n_external_pages; + } + + btr_pcur_close(&pcur); +} + +/** Set dict_index_t::stat_n_diff_key_vals[] and stat_n_sample_sizes[]. +@param[in] n_diff_data input data to use to derive the results +@param[in,out] index index whose stat_n_diff_key_vals[] to set */ +UNIV_INLINE +void +dict_stats_index_set_n_diff( + const n_diff_data_t* n_diff_data, + dict_index_t* index) +{ + for (ulint n_prefix = dict_index_get_n_unique(index); + n_prefix >= 1; + n_prefix--) { + /* n_diff_all_analyzed_pages can be 0 here if + all the leaf pages sampled contained only + delete-marked records. In this case we should assign + 0 to index->stat_n_diff_key_vals[n_prefix - 1], which + the formula below does. */ + + const n_diff_data_t* data = &n_diff_data[n_prefix - 1]; + + ut_ad(data->n_leaf_pages_to_analyze > 0); + ut_ad(data->n_recs_on_level > 0); + + ulint n_ordinary_leaf_pages; + + if (data->level == 1) { + /* If we know the number of records on level 1, then + this number is the same as the number of pages on + level 0 (leaf). */ + n_ordinary_leaf_pages = data->n_recs_on_level; + } else { + /* If we analyzed D ordinary leaf pages and found E + external pages in total linked from those D ordinary + leaf pages, then this means that the ratio + ordinary/external is D/E. Then the ratio ordinary/total + is D / (D + E). Knowing that the total number of pages + is T (including ordinary and external) then we estimate + that the total number of ordinary leaf pages is + T * D / (D + E). */ + n_ordinary_leaf_pages + = index->stat_n_leaf_pages + * data->n_leaf_pages_to_analyze + / (data->n_leaf_pages_to_analyze + + data->n_external_pages_sum); + } + + /* See REF01 for an explanation of the algorithm */ + index->stat_n_diff_key_vals[n_prefix - 1] + = n_ordinary_leaf_pages + + * data->n_diff_on_level + / data->n_recs_on_level + + * data->n_diff_all_analyzed_pages + / data->n_leaf_pages_to_analyze; + + index->stat_n_sample_sizes[n_prefix - 1] + = data->n_leaf_pages_to_analyze; + + DEBUG_PRINTF(" %s(): n_diff=" UINT64PF " for n_prefix=%lu" + " (%lu" + " * " UINT64PF " / " UINT64PF + " * " UINT64PF " / " UINT64PF ")\n", + __func__, + index->stat_n_diff_key_vals[n_prefix - 1], + n_prefix, + index->stat_n_leaf_pages, + data->n_diff_on_level, + data->n_recs_on_level, + data->n_diff_all_analyzed_pages, + data->n_leaf_pages_to_analyze); + } +} + +/*********************************************************************//** +Calculates new statistics for a given index and saves them to the index +members stat_n_diff_key_vals[], stat_n_sample_sizes[], stat_index_size and +stat_n_leaf_pages. This function could be slow. */ +static +void +dict_stats_analyze_index( +/*=====================*/ + dict_index_t* index) /*!< in/out: index to analyze */ +{ + ulint root_level; + ulint level; + bool level_is_analyzed; + ulint n_uniq; + ulint n_prefix; + ib_uint64_t total_recs; + ib_uint64_t total_pages; + mtr_t mtr; + ulint size; + DBUG_ENTER("dict_stats_analyze_index"); + + DBUG_PRINT("info", ("index: %s, online status: %d", index->name, + dict_index_get_online_status(index))); + + DEBUG_PRINTF(" %s(index=%s)\n", __func__, index->name); + + dict_stats_empty_index(index); + + mtr_start(&mtr); + + mtr_s_lock(dict_index_get_lock(index), &mtr); + + size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr); + + if (size != ULINT_UNDEFINED) { + index->stat_index_size = size; + size = btr_get_size(index, BTR_N_LEAF_PAGES, &mtr); + } + + /* Release the X locks on the root page taken by btr_get_size() */ + mtr_commit(&mtr); + + switch (size) { + case ULINT_UNDEFINED: + dict_stats_assert_initialized_index(index); + DBUG_VOID_RETURN; + case 0: + /* The root node of the tree is a leaf */ + size = 1; + } + + index->stat_n_leaf_pages = size; + + mtr_start(&mtr); + + mtr_s_lock(dict_index_get_lock(index), &mtr); + + root_level = btr_height_get(index, &mtr); + + n_uniq = dict_index_get_n_unique(index); + + /* If the tree has just one level (and one page) or if the user + has requested to sample too many pages then do full scan. + + For each n-column prefix (for n=1..n_uniq) N_SAMPLE_PAGES(index) + will be sampled, so in total N_SAMPLE_PAGES(index) * n_uniq leaf + pages will be sampled. If that number is bigger than the total + number of leaf pages then do full scan of the leaf level instead + since it will be faster and will give better results. */ + + if (root_level == 0 + || N_SAMPLE_PAGES(index) * n_uniq > index->stat_n_leaf_pages) { + + if (root_level == 0) { + DEBUG_PRINTF(" %s(): just one page, " + "doing full scan\n", __func__); + } else { + DEBUG_PRINTF(" %s(): too many pages requested for " + "sampling, doing full scan\n", __func__); + } + + /* do full scan of level 0; save results directly + into the index */ + + dict_stats_analyze_index_level(index, + 0 /* leaf level */, + index->stat_n_diff_key_vals, + &total_recs, + &total_pages, + NULL /* boundaries not needed */, + &mtr); + + for (ulint i = 0; i < n_uniq; i++) { + index->stat_n_sample_sizes[i] = total_pages; + } + + mtr_commit(&mtr); + + dict_stats_assert_initialized_index(index); + DBUG_VOID_RETURN; + } + + /* For each level that is being scanned in the btree, this contains the + number of different key values for all possible n-column prefixes. */ + ib_uint64_t* n_diff_on_level = new ib_uint64_t[n_uniq]; + + /* For each level that is being scanned in the btree, this contains the + index of the last record from each group of equal records (when + comparing only the first n columns, n=1..n_uniq). */ + boundaries_t* n_diff_boundaries = new boundaries_t[n_uniq]; + + /* For each n-column prefix this array contains the input data that is + used to calculate dict_index_t::stat_n_diff_key_vals[]. */ + n_diff_data_t* n_diff_data = new n_diff_data_t[n_uniq]; + + /* total_recs is also used to estimate the number of pages on one + level below, so at the start we have 1 page (the root) */ + total_recs = 1; + + /* Here we use the following optimization: + If we find that level L is the first one (searching from the + root) that contains at least D distinct keys when looking at + the first n_prefix columns, then: + if we look at the first n_prefix-1 columns then the first + level that contains D distinct keys will be either L or a + lower one. + So if we find that the first level containing D distinct + keys (on n_prefix columns) is L, we continue from L when + searching for D distinct keys on n_prefix-1 columns. */ + level = root_level; + level_is_analyzed = false; + + for (n_prefix = n_uniq; n_prefix >= 1; n_prefix--) { + + DEBUG_PRINTF(" %s(): searching level with >=%llu " + "distinct records, n_prefix=%lu\n", + __func__, N_DIFF_REQUIRED(index), n_prefix); + + /* Commit the mtr to release the tree S lock to allow + other threads to do some work too. */ + mtr_commit(&mtr); + mtr_start(&mtr); + mtr_s_lock(dict_index_get_lock(index), &mtr); + if (root_level != btr_height_get(index, &mtr)) { + /* Just quit if the tree has changed beyond + recognition here. The old stats from previous + runs will remain in the values that we have + not calculated yet. Initially when the index + object is created the stats members are given + some sensible values so leaving them untouched + here even the first time will not cause us to + read uninitialized memory later. */ + break; + } + + /* check whether we should pick the current level; + we pick level 1 even if it does not have enough + distinct records because we do not want to scan the + leaf level because it may contain too many records */ + if (level_is_analyzed + && (n_diff_on_level[n_prefix - 1] >= N_DIFF_REQUIRED(index) + || level == 1)) { + + goto found_level; + } + + /* search for a level that contains enough distinct records */ + + if (level_is_analyzed && level > 1) { + + /* if this does not hold we should be on + "found_level" instead of here */ + ut_ad(n_diff_on_level[n_prefix - 1] + < N_DIFF_REQUIRED(index)); + + level--; + level_is_analyzed = false; + } + + /* descend into the tree, searching for "good enough" level */ + for (;;) { + + /* make sure we do not scan the leaf level + accidentally, it may contain too many pages */ + ut_ad(level > 0); + + /* scanning the same level twice is an optimization + bug */ + ut_ad(!level_is_analyzed); + + /* Do not scan if this would read too many pages. + Here we use the following fact: + the number of pages on level L equals the number + of records on level L+1, thus we deduce that the + following call would scan total_recs pages, because + total_recs is left from the previous iteration when + we scanned one level upper or we have not scanned any + levels yet in which case total_recs is 1. */ + if (total_recs > N_SAMPLE_PAGES(index)) { + + /* if the above cond is true then we are + not at the root level since on the root + level total_recs == 1 (set before we + enter the n-prefix loop) and cannot + be > N_SAMPLE_PAGES(index) */ + ut_a(level != root_level); + + /* step one level back and be satisfied with + whatever it contains */ + level++; + level_is_analyzed = true; + + break; + } + + dict_stats_analyze_index_level(index, + level, + n_diff_on_level, + &total_recs, + &total_pages, + n_diff_boundaries, + &mtr); + + level_is_analyzed = true; + + if (level == 1 + || n_diff_on_level[n_prefix - 1] + >= N_DIFF_REQUIRED(index)) { + /* we have reached the last level we could scan + or we found a good level with many distinct + records */ + break; + } + + level--; + level_is_analyzed = false; + } +found_level: + + DEBUG_PRINTF(" %s(): found level %lu that has " UINT64PF + " distinct records for n_prefix=%lu\n", + __func__, level, n_diff_on_level[n_prefix - 1], + n_prefix); + /* here we are either on level 1 or the level that we are on + contains >= N_DIFF_REQUIRED distinct keys or we did not scan + deeper levels because they would contain too many pages */ + + ut_ad(level > 0); + + ut_ad(level_is_analyzed); + + /* if any of these is 0 then there is exactly one page in the + B-tree and it is empty and we should have done full scan and + should not be here */ + ut_ad(total_recs > 0); + ut_ad(n_diff_on_level[n_prefix - 1] > 0); + + ut_ad(N_SAMPLE_PAGES(index) > 0); + + n_diff_data_t* data = &n_diff_data[n_prefix - 1]; + + data->level = level; + + data->n_recs_on_level = total_recs; + + data->n_diff_on_level = n_diff_on_level[n_prefix - 1]; + + data->n_leaf_pages_to_analyze = std::min( + N_SAMPLE_PAGES(index), + n_diff_on_level[n_prefix - 1]); + + /* pick some records from this level and dive below them for + the given n_prefix */ + + dict_stats_analyze_index_for_n_prefix( + index, n_prefix, &n_diff_boundaries[n_prefix - 1], + data, &mtr); + } + + mtr_commit(&mtr); + + delete[] n_diff_boundaries; + + delete[] n_diff_on_level; + + /* n_prefix == 0 means that the above loop did not end up prematurely + due to tree being changed and so n_diff_data[] is set up. */ + if (n_prefix == 0) { + dict_stats_index_set_n_diff(n_diff_data, index); + } + + delete[] n_diff_data; + + dict_stats_assert_initialized_index(index); + DBUG_VOID_RETURN; +} + +/*********************************************************************//** +Calculates new estimates for table and index statistics. This function +is relatively slow and is used to calculate persistent statistics that +will be saved on disk. +@return DB_SUCCESS or error code */ +static +dberr_t +dict_stats_update_persistent( +/*=========================*/ + dict_table_t* table) /*!< in/out: table */ +{ + dict_index_t* index; + + DEBUG_PRINTF("%s(table=%s)\n", __func__, table->name); + + dict_table_stats_lock(table, RW_X_LATCH); + + /* analyze the clustered index first */ + + index = dict_table_get_first_index(table); + + if (index == NULL + || dict_index_is_corrupted(index) + || (index->type | DICT_UNIQUE) != (DICT_CLUSTERED | DICT_UNIQUE)) { + + /* Table definition is corrupt */ + dict_table_stats_unlock(table, RW_X_LATCH); + dict_stats_empty_table(table); + + return(DB_CORRUPTION); + } + + ut_ad(!dict_index_is_univ(index)); + + dict_stats_analyze_index(index); + + ulint n_unique = dict_index_get_n_unique(index); + + table->stat_n_rows = index->stat_n_diff_key_vals[n_unique - 1]; + + table->stat_clustered_index_size = index->stat_index_size; + + /* analyze other indexes from the table, if any */ + + table->stat_sum_of_other_index_sizes = 0; + + for (index = dict_table_get_next_index(index); + index != NULL; + index = dict_table_get_next_index(index)) { + + ut_ad(!dict_index_is_univ(index)); + + if (index->type & DICT_FTS) { + continue; + } + + dict_stats_empty_index(index); + + if (dict_stats_should_ignore_index(index)) { + continue; + } + + if (!(table->stats_bg_flag & BG_STAT_SHOULD_QUIT)) { + dict_stats_analyze_index(index); + } + + table->stat_sum_of_other_index_sizes + += index->stat_index_size; + } + + table->stats_last_recalc = ut_time(); + + table->stat_modified_counter = 0; + + table->stat_initialized = TRUE; + + dict_stats_assert_initialized(table); + + dict_table_stats_unlock(table, RW_X_LATCH); + + return(DB_SUCCESS); +} + +#include "mysql_com.h" +/** Save an individual index's statistic into the persistent statistics +storage. +@param[in] index index to be updated +@param[in] last_update timestamp of the stat +@param[in] stat_name name of the stat +@param[in] stat_value value of the stat +@param[in] sample_size n pages sampled or NULL +@param[in] stat_description description of the stat +@param[in,out] trx in case of NULL the function will +allocate and free the trx object. If it is not NULL then it will be +rolled back only in the case of error, but not freed. +@return DB_SUCCESS or error code */ +static +dberr_t +dict_stats_save_index_stat( + dict_index_t* index, + lint last_update, + const char* stat_name, + ib_uint64_t stat_value, + ib_uint64_t* sample_size, + const char* stat_description, + trx_t* trx) +{ + pars_info_t* pinfo; + dberr_t ret; + char db_utf8[MAX_DB_UTF8_LEN]; + char table_utf8[MAX_TABLE_UTF8_LEN]; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + ut_ad(mutex_own(&dict_sys->mutex)); + + dict_fs2utf8(index->table->name, db_utf8, sizeof(db_utf8), + table_utf8, sizeof(table_utf8)); + + pinfo = pars_info_create(); + pars_info_add_str_literal(pinfo, "database_name", db_utf8); + pars_info_add_str_literal(pinfo, "table_name", table_utf8); + UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name)); + pars_info_add_str_literal(pinfo, "index_name", index->name); + UNIV_MEM_ASSERT_RW_ABORT(&last_update, 4); + pars_info_add_int4_literal(pinfo, "last_update", last_update); + UNIV_MEM_ASSERT_RW_ABORT(stat_name, strlen(stat_name)); + pars_info_add_str_literal(pinfo, "stat_name", stat_name); + UNIV_MEM_ASSERT_RW_ABORT(&stat_value, 8); + pars_info_add_ull_literal(pinfo, "stat_value", stat_value); + if (sample_size != NULL) { + UNIV_MEM_ASSERT_RW_ABORT(sample_size, 8); + pars_info_add_ull_literal(pinfo, "sample_size", *sample_size); + } else { + pars_info_add_literal(pinfo, "sample_size", NULL, + UNIV_SQL_NULL, DATA_FIXBINARY, 0); + } + UNIV_MEM_ASSERT_RW_ABORT(stat_description, strlen(stat_description)); + pars_info_add_str_literal(pinfo, "stat_description", + stat_description); + + ret = dict_stats_exec_sql( + pinfo, + "PROCEDURE INDEX_STATS_SAVE () IS\n" + "BEGIN\n" + + "DELETE FROM \"" INDEX_STATS_NAME "\"\n" + "WHERE\n" + "database_name = :database_name AND\n" + "table_name = :table_name AND\n" + "index_name = :index_name AND\n" + "stat_name = :stat_name;\n" + + "INSERT INTO \"" INDEX_STATS_NAME "\"\n" + "VALUES\n" + "(\n" + ":database_name,\n" + ":table_name,\n" + ":index_name,\n" + ":last_update,\n" + ":stat_name,\n" + ":stat_value,\n" + ":sample_size,\n" + ":stat_description\n" + ");\n" + "END;", trx); + + if (ret != DB_SUCCESS) { + char buf_table[MAX_FULL_NAME_LEN]; + char buf_index[MAX_FULL_NAME_LEN]; + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Cannot save index statistics for table " + "%s, index %s, stat name \"%s\": %s\n", + ut_format_name(index->table->name, TRUE, + buf_table, sizeof(buf_table)), + ut_format_name(index->name, FALSE, + buf_index, sizeof(buf_index)), + stat_name, ut_strerr(ret)); + } + + return(ret); +} + +/** Save the table's statistics into the persistent statistics storage. +@param[in] table_orig table whose stats to save +@param[in] only_for_index if this is non-NULL, then stats for indexes +that are not equal to it will not be saved, if NULL, then all +indexes' stats are saved +@return DB_SUCCESS or error code */ +static +dberr_t +dict_stats_save( +/*============*/ + dict_table_t* table_orig, + const index_id_t* only_for_index) +{ + pars_info_t* pinfo; + lint now; + dberr_t ret; + dict_table_t* table; + char db_utf8[MAX_DB_UTF8_LEN]; + char table_utf8[MAX_TABLE_UTF8_LEN]; + + table = dict_stats_snapshot_create(table_orig); + + dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8), + table_utf8, sizeof(table_utf8)); + + rw_lock_x_lock(&dict_operation_lock); + mutex_enter(&dict_sys->mutex); + + /* MySQL's timestamp is 4 byte, so we use + pars_info_add_int4_literal() which takes a lint arg, so "now" is + lint */ + now = (lint) ut_time(); + + pinfo = pars_info_create(); + + pars_info_add_str_literal(pinfo, "database_name", db_utf8); + pars_info_add_str_literal(pinfo, "table_name", table_utf8); + pars_info_add_int4_literal(pinfo, "last_update", now); + pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows); + pars_info_add_ull_literal(pinfo, "clustered_index_size", + table->stat_clustered_index_size); + pars_info_add_ull_literal(pinfo, "sum_of_other_index_sizes", + table->stat_sum_of_other_index_sizes); + + ret = dict_stats_exec_sql( + pinfo, + "PROCEDURE TABLE_STATS_SAVE () IS\n" + "BEGIN\n" + + "DELETE FROM \"" TABLE_STATS_NAME "\"\n" + "WHERE\n" + "database_name = :database_name AND\n" + "table_name = :table_name;\n" + + "INSERT INTO \"" TABLE_STATS_NAME "\"\n" + "VALUES\n" + "(\n" + ":database_name,\n" + ":table_name,\n" + ":last_update,\n" + ":n_rows,\n" + ":clustered_index_size,\n" + ":sum_of_other_index_sizes\n" + ");\n" + "END;", NULL); + + if (ret != DB_SUCCESS) { + char buf[MAX_FULL_NAME_LEN]; + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Cannot save table statistics for table " + "%s: %s\n", + ut_format_name(table->name, TRUE, buf, sizeof(buf)), + ut_strerr(ret)); + + mutex_exit(&dict_sys->mutex); + rw_lock_x_unlock(&dict_operation_lock); + + dict_stats_snapshot_free(table); + + return(ret); + } + + trx_t* trx = trx_allocate_for_background(); + trx_start_if_not_started(trx); + + dict_index_t* index; + index_map_t indexes; + + /* Below we do all the modifications in innodb_index_stats in a single + transaction for performance reasons. Modifying more than one row in a + single transaction may deadlock with other transactions if they + lock the rows in different order. Other transaction could be for + example when we DROP a table and do + DELETE FROM innodb_index_stats WHERE database_name = '...' + AND table_name = '...'; which will affect more than one row. To + prevent deadlocks we always lock the rows in the same order - the + order of the PK, which is (database_name, table_name, index_name, + stat_name). This is why below we sort the indexes by name and then + for each index, do the mods ordered by stat_name. */ + + for (index = dict_table_get_first_index(table); + index != NULL; + index = dict_table_get_next_index(index)) { + + indexes[index->name] = index; + } + + index_map_t::const_iterator it; + + for (it = indexes.begin(); it != indexes.end(); ++it) { + + index = it->second; + + if (only_for_index != NULL && index->id != *only_for_index) { + continue; + } + + if (dict_stats_should_ignore_index(index)) { + continue; + } + + ut_ad(!dict_index_is_univ(index)); + + for (ulint i = 0; i < index->n_uniq; i++) { + + char stat_name[16]; + char stat_description[1024]; + ulint j; + + ut_snprintf(stat_name, sizeof(stat_name), + "n_diff_pfx%02lu", i + 1); + + /* craft a string that contains the columns names */ + ut_snprintf(stat_description, + sizeof(stat_description), + "%s", index->fields[0].name); + for (j = 1; j <= i; j++) { + size_t len; + + len = strlen(stat_description); + + ut_snprintf(stat_description + len, + sizeof(stat_description) - len, + ",%s", index->fields[j].name); + } + + ret = dict_stats_save_index_stat( + index, now, stat_name, + index->stat_n_diff_key_vals[i], + &index->stat_n_sample_sizes[i], + stat_description, trx); + + if (ret != DB_SUCCESS) { + goto end; + } + } + + ret = dict_stats_save_index_stat(index, now, "n_leaf_pages", + index->stat_n_leaf_pages, + NULL, + "Number of leaf pages " + "in the index", trx); + if (ret != DB_SUCCESS) { + goto end; + } + + ret = dict_stats_save_index_stat(index, now, "size", + index->stat_index_size, + NULL, + "Number of pages " + "in the index", trx); + if (ret != DB_SUCCESS) { + goto end; + } + } + + trx_commit_for_mysql(trx); + +end: + trx_free_for_background(trx); + + mutex_exit(&dict_sys->mutex); + rw_lock_x_unlock(&dict_operation_lock); + + dict_stats_snapshot_free(table); + + return(ret); +} + +/*********************************************************************//** +Called for the row that is selected by +SELECT ... FROM mysql.innodb_table_stats WHERE table='...' +The second argument is a pointer to the table and the fetched stats are +written to it. +@return non-NULL dummy */ +static +ibool +dict_stats_fetch_table_stats_step( +/*==============================*/ + void* node_void, /*!< in: select node */ + void* table_void) /*!< out: table */ +{ + sel_node_t* node = (sel_node_t*) node_void; + dict_table_t* table = (dict_table_t*) table_void; + que_common_t* cnode; + int i; + + /* this should loop exactly 3 times - for + n_rows,clustered_index_size,sum_of_other_index_sizes */ + for (cnode = static_cast<que_common_t*>(node->select_list), i = 0; + cnode != NULL; + cnode = static_cast<que_common_t*>(que_node_get_next(cnode)), + i++) { + + const byte* data; + dfield_t* dfield = que_node_get_val(cnode); + dtype_t* type = dfield_get_type(dfield); + ulint len = dfield_get_len(dfield); + + data = static_cast<const byte*>(dfield_get_data(dfield)); + + switch (i) { + case 0: /* mysql.innodb_table_stats.n_rows */ + + ut_a(dtype_get_mtype(type) == DATA_INT); + ut_a(len == 8); + + table->stat_n_rows = mach_read_from_8(data); + + break; + + case 1: /* mysql.innodb_table_stats.clustered_index_size */ + + ut_a(dtype_get_mtype(type) == DATA_INT); + ut_a(len == 8); + + table->stat_clustered_index_size + = (ulint) mach_read_from_8(data); + + break; + + case 2: /* mysql.innodb_table_stats.sum_of_other_index_sizes */ + + ut_a(dtype_get_mtype(type) == DATA_INT); + ut_a(len == 8); + + table->stat_sum_of_other_index_sizes + = (ulint) mach_read_from_8(data); + + break; + + default: + + /* someone changed SELECT + n_rows,clustered_index_size,sum_of_other_index_sizes + to select more columns from innodb_table_stats without + adjusting here */ + ut_error; + } + } + + /* if i < 3 this means someone changed the + SELECT n_rows,clustered_index_size,sum_of_other_index_sizes + to select less columns from innodb_table_stats without adjusting here; + if i > 3 we would have ut_error'ed earlier */ + ut_a(i == 3 /*n_rows,clustered_index_size,sum_of_other_index_sizes*/); + + /* XXX this is not used but returning non-NULL is necessary */ + return(TRUE); +} + +/** Aux struct used to pass a table and a boolean to +dict_stats_fetch_index_stats_step(). */ +struct index_fetch_t { + dict_table_t* table; /*!< table whose indexes are to be modified */ + bool stats_were_modified; /*!< will be set to true if at + least one index stats were modified */ +}; + +/*********************************************************************//** +Called for the rows that are selected by +SELECT ... FROM mysql.innodb_index_stats WHERE table='...' +The second argument is a pointer to the table and the fetched stats are +written to its indexes. +Let a table has N indexes and each index has Ui unique columns for i=1..N, +then mysql.innodb_index_stats will have SUM(Ui) i=1..N rows for that table. +So this function will be called SUM(Ui) times where SUM(Ui) is of magnitude +N*AVG(Ui). In each call it searches for the currently fetched index into +table->indexes linearly, assuming this list is not sorted. Thus, overall, +fetching all indexes' stats from mysql.innodb_index_stats is O(N^2) where N +is the number of indexes. +This can be improved if we sort table->indexes in a temporary area just once +and then search in that sorted list. Then the complexity will be O(N*log(N)). +We assume a table will not have more than 100 indexes, so we go with the +simpler N^2 algorithm. +@return non-NULL dummy */ +static +ibool +dict_stats_fetch_index_stats_step( +/*==============================*/ + void* node_void, /*!< in: select node */ + void* arg_void) /*!< out: table + a flag that tells if we + modified anything */ +{ + sel_node_t* node = (sel_node_t*) node_void; + index_fetch_t* arg = (index_fetch_t*) arg_void; + dict_table_t* table = arg->table; + dict_index_t* index = NULL; + que_common_t* cnode; + const char* stat_name = NULL; + ulint stat_name_len = ULINT_UNDEFINED; + ib_uint64_t stat_value = UINT64_UNDEFINED; + ib_uint64_t sample_size = UINT64_UNDEFINED; + int i; + + /* this should loop exactly 4 times - for the columns that + were selected: index_name,stat_name,stat_value,sample_size */ + for (cnode = static_cast<que_common_t*>(node->select_list), i = 0; + cnode != NULL; + cnode = static_cast<que_common_t*>(que_node_get_next(cnode)), + i++) { + + const byte* data; + dfield_t* dfield = que_node_get_val(cnode); + dtype_t* type = dfield_get_type(dfield); + ulint len = dfield_get_len(dfield); + + data = static_cast<const byte*>(dfield_get_data(dfield)); + + switch (i) { + case 0: /* mysql.innodb_index_stats.index_name */ + + ut_a(dtype_get_mtype(type) == DATA_VARMYSQL); + + /* search for index in table's indexes whose name + matches data; the fetched index name is in data, + has no terminating '\0' and has length len */ + for (index = dict_table_get_first_index(table); + index != NULL; + index = dict_table_get_next_index(index)) { + + if (strlen(index->name) == len + && memcmp(index->name, data, len) == 0) { + /* the corresponding index was found */ + break; + } + } + + /* if index is NULL here this means that + mysql.innodb_index_stats contains more rows than the + number of indexes in the table; this is ok, we just + return ignoring those extra rows; in other words + dict_stats_fetch_index_stats_step() has been called + for a row from index_stats with unknown index_name + column */ + if (index == NULL) { + + return(TRUE); + } + + break; + + case 1: /* mysql.innodb_index_stats.stat_name */ + + ut_a(dtype_get_mtype(type) == DATA_VARMYSQL); + + ut_a(index != NULL); + + stat_name = (const char*) data; + stat_name_len = len; + + break; + + case 2: /* mysql.innodb_index_stats.stat_value */ + + ut_a(dtype_get_mtype(type) == DATA_INT); + ut_a(len == 8); + + ut_a(index != NULL); + ut_a(stat_name != NULL); + ut_a(stat_name_len != ULINT_UNDEFINED); + + stat_value = mach_read_from_8(data); + + break; + + case 3: /* mysql.innodb_index_stats.sample_size */ + + ut_a(dtype_get_mtype(type) == DATA_INT); + ut_a(len == 8 || len == UNIV_SQL_NULL); + + ut_a(index != NULL); + ut_a(stat_name != NULL); + ut_a(stat_name_len != ULINT_UNDEFINED); + ut_a(stat_value != UINT64_UNDEFINED); + + if (len == UNIV_SQL_NULL) { + break; + } + /* else */ + + sample_size = mach_read_from_8(data); + + break; + + default: + + /* someone changed + SELECT index_name,stat_name,stat_value,sample_size + to select more columns from innodb_index_stats without + adjusting here */ + ut_error; + } + } + + /* if i < 4 this means someone changed the + SELECT index_name,stat_name,stat_value,sample_size + to select less columns from innodb_index_stats without adjusting here; + if i > 4 we would have ut_error'ed earlier */ + ut_a(i == 4 /* index_name,stat_name,stat_value,sample_size */); + + ut_a(index != NULL); + ut_a(stat_name != NULL); + ut_a(stat_name_len != ULINT_UNDEFINED); + ut_a(stat_value != UINT64_UNDEFINED); + /* sample_size could be UINT64_UNDEFINED here, if it is NULL */ + +#define PFX "n_diff_pfx" +#define PFX_LEN 10 + + if (stat_name_len == 4 /* strlen("size") */ + && strncasecmp("size", stat_name, stat_name_len) == 0) { + index->stat_index_size = (ulint) stat_value; + arg->stats_were_modified = true; + } else if (stat_name_len == 12 /* strlen("n_leaf_pages") */ + && strncasecmp("n_leaf_pages", stat_name, stat_name_len) + == 0) { + index->stat_n_leaf_pages = (ulint) stat_value; + arg->stats_were_modified = true; + } else if (stat_name_len > PFX_LEN /* e.g. stat_name=="n_diff_pfx01" */ + && strncasecmp(PFX, stat_name, PFX_LEN) == 0) { + + const char* num_ptr; + unsigned long n_pfx; + + /* point num_ptr into "1" from "n_diff_pfx12..." */ + num_ptr = stat_name + PFX_LEN; + + /* stat_name should have exactly 2 chars appended to PFX + and they should be digits */ + if (stat_name_len != PFX_LEN + 2 + || num_ptr[0] < '0' || num_ptr[0] > '9' + || num_ptr[1] < '0' || num_ptr[1] > '9') { + + char db_utf8[MAX_DB_UTF8_LEN]; + char table_utf8[MAX_TABLE_UTF8_LEN]; + + dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8), + table_utf8, sizeof(table_utf8)); + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Ignoring strange row from " + "%s WHERE " + "database_name = '%s' AND " + "table_name = '%s' AND " + "index_name = '%s' AND " + "stat_name = '%.*s'; because stat_name " + "is malformed\n", + INDEX_STATS_NAME_PRINT, + db_utf8, + table_utf8, + index->name, + (int) stat_name_len, + stat_name); + return(TRUE); + } + /* else */ + + /* extract 12 from "n_diff_pfx12..." into n_pfx + note that stat_name does not have a terminating '\0' */ + n_pfx = (num_ptr[0] - '0') * 10 + (num_ptr[1] - '0'); + + ulint n_uniq = index->n_uniq; + + if (n_pfx == 0 || n_pfx > n_uniq) { + + char db_utf8[MAX_DB_UTF8_LEN]; + char table_utf8[MAX_TABLE_UTF8_LEN]; + + dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8), + table_utf8, sizeof(table_utf8)); + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Ignoring strange row from " + "%s WHERE " + "database_name = '%s' AND " + "table_name = '%s' AND " + "index_name = '%s' AND " + "stat_name = '%.*s'; because stat_name is " + "out of range, the index has %lu unique " + "columns\n", + INDEX_STATS_NAME_PRINT, + db_utf8, + table_utf8, + index->name, + (int) stat_name_len, + stat_name, + n_uniq); + return(TRUE); + } + /* else */ + + index->stat_n_diff_key_vals[n_pfx - 1] = stat_value; + + if (sample_size != UINT64_UNDEFINED) { + index->stat_n_sample_sizes[n_pfx - 1] = sample_size; + } else { + /* hmm, strange... the user must have UPDATEd the + table manually and SET sample_size = NULL */ + index->stat_n_sample_sizes[n_pfx - 1] = 0; + } + + index->stat_n_non_null_key_vals[n_pfx - 1] = 0; + + arg->stats_were_modified = true; + } else { + /* silently ignore rows with unknown stat_name, the + user may have developed her own stats */ + } + + /* XXX this is not used but returning non-NULL is necessary */ + return(TRUE); +} + +/*********************************************************************//** +Read table's statistics from the persistent statistics storage. +@return DB_SUCCESS or error code */ +static +dberr_t +dict_stats_fetch_from_ps( +/*=====================*/ + dict_table_t* table) /*!< in/out: table */ +{ + index_fetch_t index_fetch_arg; + trx_t* trx; + pars_info_t* pinfo; + dberr_t ret; + char db_utf8[MAX_DB_UTF8_LEN]; + char table_utf8[MAX_TABLE_UTF8_LEN]; + + ut_ad(!mutex_own(&dict_sys->mutex)); + + /* Initialize all stats to dummy values before fetching because if + the persistent storage contains incomplete stats (e.g. missing stats + for some index) then we would end up with (partially) uninitialized + stats. */ + dict_stats_empty_table(table); + + trx = trx_allocate_for_background(); + + /* Use 'read-uncommitted' so that the SELECTs we execute + do not get blocked in case some user has locked the rows we + are SELECTing */ + + trx->isolation_level = TRX_ISO_READ_UNCOMMITTED; + + trx_start_if_not_started(trx); + + dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8), + table_utf8, sizeof(table_utf8)); + + pinfo = pars_info_create(); + + pars_info_add_str_literal(pinfo, "database_name", db_utf8); + + pars_info_add_str_literal(pinfo, "table_name", table_utf8); + + pars_info_bind_function(pinfo, + "fetch_table_stats_step", + dict_stats_fetch_table_stats_step, + table); + + index_fetch_arg.table = table; + index_fetch_arg.stats_were_modified = false; + pars_info_bind_function(pinfo, + "fetch_index_stats_step", + dict_stats_fetch_index_stats_step, + &index_fetch_arg); + + ret = que_eval_sql(pinfo, + "PROCEDURE FETCH_STATS () IS\n" + "found INT;\n" + "DECLARE FUNCTION fetch_table_stats_step;\n" + "DECLARE FUNCTION fetch_index_stats_step;\n" + "DECLARE CURSOR table_stats_cur IS\n" + " SELECT\n" + /* if you change the selected fields, be + sure to adjust + dict_stats_fetch_table_stats_step() */ + " n_rows,\n" + " clustered_index_size,\n" + " sum_of_other_index_sizes\n" + " FROM \"" TABLE_STATS_NAME "\"\n" + " WHERE\n" + " database_name = :database_name AND\n" + " table_name = :table_name;\n" + "DECLARE CURSOR index_stats_cur IS\n" + " SELECT\n" + /* if you change the selected fields, be + sure to adjust + dict_stats_fetch_index_stats_step() */ + " index_name,\n" + " stat_name,\n" + " stat_value,\n" + " sample_size\n" + " FROM \"" INDEX_STATS_NAME "\"\n" + " WHERE\n" + " database_name = :database_name AND\n" + " table_name = :table_name;\n" + + "BEGIN\n" + + "OPEN table_stats_cur;\n" + "FETCH table_stats_cur INTO\n" + " fetch_table_stats_step();\n" + "IF (SQL % NOTFOUND) THEN\n" + " CLOSE table_stats_cur;\n" + " RETURN;\n" + "END IF;\n" + "CLOSE table_stats_cur;\n" + + "OPEN index_stats_cur;\n" + "found := 1;\n" + "WHILE found = 1 LOOP\n" + " FETCH index_stats_cur INTO\n" + " fetch_index_stats_step();\n" + " IF (SQL % NOTFOUND) THEN\n" + " found := 0;\n" + " END IF;\n" + "END LOOP;\n" + "CLOSE index_stats_cur;\n" + + "END;", + TRUE, trx); + /* pinfo is freed by que_eval_sql() */ + + trx_commit_for_mysql(trx); + + trx_free_for_background(trx); + + if (!index_fetch_arg.stats_were_modified) { + return(DB_STATS_DO_NOT_EXIST); + } + + return(ret); +} + +/*********************************************************************//** +Fetches or calculates new estimates for index statistics. */ +UNIV_INTERN +void +dict_stats_update_for_index( +/*========================*/ + dict_index_t* index) /*!< in/out: index */ +{ + DBUG_ENTER("dict_stats_update_for_index"); + + ut_ad(!mutex_own(&dict_sys->mutex)); + + if (dict_stats_is_persistent_enabled(index->table)) { + + if (dict_stats_persistent_storage_check(false)) { + dict_table_stats_lock(index->table, RW_X_LATCH); + dict_stats_analyze_index(index); + dict_table_stats_unlock(index->table, RW_X_LATCH); + dict_stats_save(index->table, &index->id); + DBUG_VOID_RETURN; + } + /* else */ + + /* Fall back to transient stats since the persistent + storage is not present or is corrupted */ + char buf_table[MAX_FULL_NAME_LEN]; + char buf_index[MAX_FULL_NAME_LEN]; + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Recalculation of persistent statistics " + "requested for table %s index %s but the required " + "persistent statistics storage is not present or is " + "corrupted. Using transient stats instead.\n", + ut_format_name(index->table->name, TRUE, + buf_table, sizeof(buf_table)), + ut_format_name(index->name, FALSE, + buf_index, sizeof(buf_index))); + } + + dict_table_stats_lock(index->table, RW_X_LATCH); + dict_stats_update_transient_for_index(index); + dict_table_stats_unlock(index->table, RW_X_LATCH); + + DBUG_VOID_RETURN; +} + +/*********************************************************************//** +Calculates new estimates for table and index statistics. The statistics +are used in query optimization. +@return DB_SUCCESS or error code */ +UNIV_INTERN +dberr_t +dict_stats_update( +/*==============*/ + dict_table_t* table, /*!< in/out: table */ + dict_stats_upd_option_t stats_upd_option) + /*!< in: whether to (re) calc + the stats or to fetch them from + the persistent statistics + storage */ +{ + char buf[MAX_FULL_NAME_LEN]; + + ut_ad(!mutex_own(&dict_sys->mutex)); + + if (table->ibd_file_missing) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: cannot calculate statistics for table %s " + "because the .ibd file is missing. For help, please " + "refer to " REFMAN "innodb-troubleshooting.html\n", + ut_format_name(table->name, TRUE, buf, sizeof(buf))); + dict_stats_empty_table(table); + return(DB_TABLESPACE_DELETED); + } else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) { + /* If we have set a high innodb_force_recovery level, do + not calculate statistics, as a badly corrupted index can + cause a crash in it. */ + dict_stats_empty_table(table); + return(DB_SUCCESS); + } + + switch (stats_upd_option) { + case DICT_STATS_RECALC_PERSISTENT: + + if (srv_read_only_mode) { + goto transient; + } + + /* Persistent recalculation requested, called from + 1) ANALYZE TABLE, or + 2) the auto recalculation background thread, or + 3) open table if stats do not exist on disk and auto recalc + is enabled */ + + /* InnoDB internal tables (e.g. SYS_TABLES) cannot have + persistent stats enabled */ + ut_a(strchr(table->name, '/') != NULL); + + /* check if the persistent statistics storage exists + before calling the potentially slow function + dict_stats_update_persistent(); that is a + prerequisite for dict_stats_save() succeeding */ + if (dict_stats_persistent_storage_check(false)) { + + dberr_t err; + + err = dict_stats_update_persistent(table); + + if (err != DB_SUCCESS) { + return(err); + } + + err = dict_stats_save(table, NULL); + + return(err); + } + + /* Fall back to transient stats since the persistent + storage is not present or is corrupted */ + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Recalculation of persistent statistics " + "requested for table %s but the required persistent " + "statistics storage is not present or is corrupted. " + "Using transient stats instead.\n", + ut_format_name(table->name, TRUE, buf, sizeof(buf))); + + goto transient; + + case DICT_STATS_RECALC_TRANSIENT: + + goto transient; + + case DICT_STATS_EMPTY_TABLE: + + dict_stats_empty_table(table); + + /* If table is using persistent stats, + then save the stats on disk */ + + if (dict_stats_is_persistent_enabled(table)) { + + if (dict_stats_persistent_storage_check(false)) { + + return(dict_stats_save(table, NULL)); + } + + return(DB_STATS_DO_NOT_EXIST); + } + + return(DB_SUCCESS); + + case DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY: + + /* fetch requested, either fetch from persistent statistics + storage or use the old method */ + + if (table->stat_initialized) { + return(DB_SUCCESS); + } + + /* InnoDB internal tables (e.g. SYS_TABLES) cannot have + persistent stats enabled */ + ut_a(strchr(table->name, '/') != NULL); + + if (!dict_stats_persistent_storage_check(false)) { + /* persistent statistics storage does not exist + or is corrupted, calculate the transient stats */ + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: Fetch of persistent " + "statistics requested for table %s but the " + "required system tables %s and %s are not " + "present or have unexpected structure. " + "Using transient stats instead.\n", + ut_format_name(table->name, TRUE, + buf, sizeof(buf)), + TABLE_STATS_NAME_PRINT, + INDEX_STATS_NAME_PRINT); + + goto transient; + } + + dict_table_t* t; + + /* Create a dummy table object with the same name and + indexes, suitable for fetching the stats into it. */ + t = dict_stats_table_clone_create(table); + + dberr_t err = dict_stats_fetch_from_ps(t); + + t->stats_last_recalc = table->stats_last_recalc; + t->stat_modified_counter = 0; + + switch (err) { + case DB_SUCCESS: + + dict_table_stats_lock(table, RW_X_LATCH); + + /* Initialize all stats to dummy values before + copying because dict_stats_table_clone_create() does + skip corrupted indexes so our dummy object 't' may + have less indexes than the real object 'table'. */ + dict_stats_empty_table(table); + + dict_stats_copy(table, t); + + dict_stats_assert_initialized(table); + + dict_table_stats_unlock(table, RW_X_LATCH); + + dict_stats_table_clone_free(t); + + return(DB_SUCCESS); + case DB_STATS_DO_NOT_EXIST: + + dict_stats_table_clone_free(t); + + if (srv_read_only_mode) { + goto transient; + } + + if (dict_stats_auto_recalc_is_enabled(table)) { + return(dict_stats_update( + table, + DICT_STATS_RECALC_PERSISTENT)); + } + + ut_format_name(table->name, TRUE, buf, sizeof(buf)); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Trying to use table %s which has " + "persistent statistics enabled, but auto " + "recalculation turned off and the statistics " + "do not exist in %s and %s. Please either run " + "\"ANALYZE TABLE %s;\" manually or enable the " + "auto recalculation with " + "\"ALTER TABLE %s STATS_AUTO_RECALC=1;\". " + "InnoDB will now use transient statistics for " + "%s.\n", + buf, TABLE_STATS_NAME, INDEX_STATS_NAME, buf, + buf, buf); + + goto transient; + default: + + dict_stats_table_clone_free(t); + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error fetching persistent statistics " + "for table %s from %s and %s: %s. " + "Using transient stats method instead.\n", + ut_format_name(table->name, TRUE, buf, + sizeof(buf)), + TABLE_STATS_NAME, + INDEX_STATS_NAME, + ut_strerr(err)); + + goto transient; + } + /* no "default:" in order to produce a compilation warning + about unhandled enumeration value */ + } + +transient: + + dict_table_stats_lock(table, RW_X_LATCH); + + dict_stats_update_transient(table); + + dict_table_stats_unlock(table, RW_X_LATCH); + + return(DB_SUCCESS); +} + +/*********************************************************************//** +Removes the information for a particular index's stats from the persistent +storage if it exists and if there is data stored for this index. +This function creates its own trx and commits it. +A note from Marko why we cannot edit user and sys_* tables in one trx: +marko: The problem is that ibuf merges should be disabled while we are +rolling back dict transactions. +marko: If ibuf merges are not disabled, we need to scan the *.ibd files. +But we shouldn't open *.ibd files before we have rolled back dict +transactions and opened the SYS_* records for the *.ibd files. +@return DB_SUCCESS or error code */ +UNIV_INTERN +dberr_t +dict_stats_drop_index( +/*==================*/ + const char* db_and_table,/*!< in: db and table, e.g. 'db/table' */ + const char* iname, /*!< in: index name */ + char* errstr, /*!< out: error message if != DB_SUCCESS + is returned */ + ulint errstr_sz)/*!< in: size of the errstr buffer */ +{ + char db_utf8[MAX_DB_UTF8_LEN]; + char table_utf8[MAX_TABLE_UTF8_LEN]; + pars_info_t* pinfo; + dberr_t ret; + + ut_ad(!mutex_own(&dict_sys->mutex)); + + /* skip indexes whose table names do not contain a database name + e.g. if we are dropping an index from SYS_TABLES */ + if (strchr(db_and_table, '/') == NULL) { + + return(DB_SUCCESS); + } + + dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8), + table_utf8, sizeof(table_utf8)); + + pinfo = pars_info_create(); + + pars_info_add_str_literal(pinfo, "database_name", db_utf8); + + pars_info_add_str_literal(pinfo, "table_name", table_utf8); + + pars_info_add_str_literal(pinfo, "index_name", iname); + + rw_lock_x_lock(&dict_operation_lock); + mutex_enter(&dict_sys->mutex); + + ret = dict_stats_exec_sql( + pinfo, + "PROCEDURE DROP_INDEX_STATS () IS\n" + "BEGIN\n" + "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n" + "database_name = :database_name AND\n" + "table_name = :table_name AND\n" + "index_name = :index_name;\n" + "END;\n", NULL); + + mutex_exit(&dict_sys->mutex); + rw_lock_x_unlock(&dict_operation_lock); + + if (ret == DB_STATS_DO_NOT_EXIST) { + ret = DB_SUCCESS; + } + + if (ret != DB_SUCCESS) { + ut_snprintf(errstr, errstr_sz, + "Unable to delete statistics for index %s " + "from %s%s: %s. They can be deleted later using " + "DELETE FROM %s WHERE " + "database_name = '%s' AND " + "table_name = '%s' AND " + "index_name = '%s';", + iname, + INDEX_STATS_NAME_PRINT, + (ret == DB_LOCK_WAIT_TIMEOUT + ? " because the rows are locked" + : ""), + ut_strerr(ret), + INDEX_STATS_NAME_PRINT, + db_utf8, + table_utf8, + iname); + + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: %s\n", errstr); + } + + return(ret); +} + +/*********************************************************************//** +Executes +DELETE FROM mysql.innodb_table_stats +WHERE database_name = '...' AND table_name = '...'; +Creates its own transaction and commits it. +@return DB_SUCCESS or error code */ +UNIV_INLINE +dberr_t +dict_stats_delete_from_table_stats( +/*===============================*/ + const char* database_name, /*!< in: database name, e.g. 'db' */ + const char* table_name) /*!< in: table name, e.g. 'table' */ +{ + pars_info_t* pinfo; + dberr_t ret; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + ut_ad(mutex_own(&dict_sys->mutex)); + + pinfo = pars_info_create(); + + pars_info_add_str_literal(pinfo, "database_name", database_name); + pars_info_add_str_literal(pinfo, "table_name", table_name); + + ret = dict_stats_exec_sql( + pinfo, + "PROCEDURE DELETE_FROM_TABLE_STATS () IS\n" + "BEGIN\n" + "DELETE FROM \"" TABLE_STATS_NAME "\" WHERE\n" + "database_name = :database_name AND\n" + "table_name = :table_name;\n" + "END;\n", NULL); + + return(ret); +} + +/*********************************************************************//** +Executes +DELETE FROM mysql.innodb_index_stats +WHERE database_name = '...' AND table_name = '...'; +Creates its own transaction and commits it. +@return DB_SUCCESS or error code */ +UNIV_INLINE +dberr_t +dict_stats_delete_from_index_stats( +/*===============================*/ + const char* database_name, /*!< in: database name, e.g. 'db' */ + const char* table_name) /*!< in: table name, e.g. 'table' */ +{ + pars_info_t* pinfo; + dberr_t ret; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + ut_ad(mutex_own(&dict_sys->mutex)); + + pinfo = pars_info_create(); + + pars_info_add_str_literal(pinfo, "database_name", database_name); + pars_info_add_str_literal(pinfo, "table_name", table_name); + + ret = dict_stats_exec_sql( + pinfo, + "PROCEDURE DELETE_FROM_INDEX_STATS () IS\n" + "BEGIN\n" + "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n" + "database_name = :database_name AND\n" + "table_name = :table_name;\n" + "END;\n", NULL); + + return(ret); +} + +/*********************************************************************//** +Removes the statistics for a table and all of its indexes from the +persistent statistics storage if it exists and if there is data stored for +the table. This function creates its own transaction and commits it. +@return DB_SUCCESS or error code */ +UNIV_INTERN +dberr_t +dict_stats_drop_table( +/*==================*/ + const char* db_and_table, /*!< in: db and table, e.g. 'db/table' */ + char* errstr, /*!< out: error message + if != DB_SUCCESS is returned */ + ulint errstr_sz) /*!< in: size of errstr buffer */ +{ + char db_utf8[MAX_DB_UTF8_LEN]; + char table_utf8[MAX_TABLE_UTF8_LEN]; + dberr_t ret; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + ut_ad(mutex_own(&dict_sys->mutex)); + + /* skip tables that do not contain a database name + e.g. if we are dropping SYS_TABLES */ + if (strchr(db_and_table, '/') == NULL) { + + return(DB_SUCCESS); + } + + /* skip innodb_table_stats and innodb_index_stats themselves */ + if (strcmp(db_and_table, TABLE_STATS_NAME) == 0 + || strcmp(db_and_table, INDEX_STATS_NAME) == 0) { + + return(DB_SUCCESS); + } + + dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8), + table_utf8, sizeof(table_utf8)); + + ret = dict_stats_delete_from_table_stats(db_utf8, table_utf8); + + if (ret == DB_SUCCESS) { + ret = dict_stats_delete_from_index_stats(db_utf8, table_utf8); + } + + if (ret == DB_STATS_DO_NOT_EXIST) { + ret = DB_SUCCESS; + } + + if (ret != DB_SUCCESS) { + + ut_snprintf(errstr, errstr_sz, + "Unable to delete statistics for table %s.%s: %s. " + "They can be deleted later using " + + "DELETE FROM %s WHERE " + "database_name = '%s' AND " + "table_name = '%s'; " + + "DELETE FROM %s WHERE " + "database_name = '%s' AND " + "table_name = '%s';", + + db_utf8, table_utf8, + ut_strerr(ret), + + INDEX_STATS_NAME_PRINT, + db_utf8, table_utf8, + + TABLE_STATS_NAME_PRINT, + db_utf8, table_utf8); + } + + return(ret); +} + +/*********************************************************************//** +Executes +UPDATE mysql.innodb_table_stats SET +database_name = '...', table_name = '...' +WHERE database_name = '...' AND table_name = '...'; +Creates its own transaction and commits it. +@return DB_SUCCESS or error code */ +UNIV_INLINE +dberr_t +dict_stats_rename_in_table_stats( +/*=============================*/ + const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */ + const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */ + const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */ + const char* new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */ +{ + pars_info_t* pinfo; + dberr_t ret; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + ut_ad(mutex_own(&dict_sys->mutex)); + + pinfo = pars_info_create(); + + pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8); + pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8); + pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8); + pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8); + + ret = dict_stats_exec_sql( + pinfo, + "PROCEDURE RENAME_IN_TABLE_STATS () IS\n" + "BEGIN\n" + "UPDATE \"" TABLE_STATS_NAME "\" SET\n" + "database_name = :new_dbname_utf8,\n" + "table_name = :new_tablename_utf8\n" + "WHERE\n" + "database_name = :old_dbname_utf8 AND\n" + "table_name = :old_tablename_utf8;\n" + "END;\n", NULL); + + return(ret); +} + +/*********************************************************************//** +Executes +UPDATE mysql.innodb_index_stats SET +database_name = '...', table_name = '...' +WHERE database_name = '...' AND table_name = '...'; +Creates its own transaction and commits it. +@return DB_SUCCESS or error code */ +UNIV_INLINE +dberr_t +dict_stats_rename_in_index_stats( +/*=============================*/ + const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */ + const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */ + const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */ + const char* new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */ +{ + pars_info_t* pinfo; + dberr_t ret; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + ut_ad(mutex_own(&dict_sys->mutex)); + + pinfo = pars_info_create(); + + pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8); + pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8); + pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8); + pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8); + + ret = dict_stats_exec_sql( + pinfo, + "PROCEDURE RENAME_IN_INDEX_STATS () IS\n" + "BEGIN\n" + "UPDATE \"" INDEX_STATS_NAME "\" SET\n" + "database_name = :new_dbname_utf8,\n" + "table_name = :new_tablename_utf8\n" + "WHERE\n" + "database_name = :old_dbname_utf8 AND\n" + "table_name = :old_tablename_utf8;\n" + "END;\n", NULL); + + return(ret); +} + +/*********************************************************************//** +Renames a table in InnoDB persistent stats storage. +This function creates its own transaction and commits it. +@return DB_SUCCESS or error code */ +UNIV_INTERN +dberr_t +dict_stats_rename_table( +/*====================*/ + const char* old_name, /*!< in: old name, e.g. 'db/table' */ + const char* new_name, /*!< in: new name, e.g. 'db/table' */ + char* errstr, /*!< out: error string if != DB_SUCCESS + is returned */ + size_t errstr_sz) /*!< in: errstr size */ +{ + char old_db_utf8[MAX_DB_UTF8_LEN]; + char new_db_utf8[MAX_DB_UTF8_LEN]; + char old_table_utf8[MAX_TABLE_UTF8_LEN]; + char new_table_utf8[MAX_TABLE_UTF8_LEN]; + dberr_t ret; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + ut_ad(!mutex_own(&dict_sys->mutex)); + + /* skip innodb_table_stats and innodb_index_stats themselves */ + if (strcmp(old_name, TABLE_STATS_NAME) == 0 + || strcmp(old_name, INDEX_STATS_NAME) == 0 + || strcmp(new_name, TABLE_STATS_NAME) == 0 + || strcmp(new_name, INDEX_STATS_NAME) == 0) { + + return(DB_SUCCESS); + } + + dict_fs2utf8(old_name, old_db_utf8, sizeof(old_db_utf8), + old_table_utf8, sizeof(old_table_utf8)); + + dict_fs2utf8(new_name, new_db_utf8, sizeof(new_db_utf8), + new_table_utf8, sizeof(new_table_utf8)); + + rw_lock_x_lock(&dict_operation_lock); + mutex_enter(&dict_sys->mutex); + + ulint n_attempts = 0; + do { + n_attempts++; + + ret = dict_stats_rename_in_table_stats( + old_db_utf8, old_table_utf8, + new_db_utf8, new_table_utf8); + + if (ret == DB_DUPLICATE_KEY) { + dict_stats_delete_from_table_stats( + new_db_utf8, new_table_utf8); + } + + if (ret == DB_STATS_DO_NOT_EXIST) { + ret = DB_SUCCESS; + } + + if (ret != DB_SUCCESS) { + mutex_exit(&dict_sys->mutex); + rw_lock_x_unlock(&dict_operation_lock); + os_thread_sleep(200000 /* 0.2 sec */); + rw_lock_x_lock(&dict_operation_lock); + mutex_enter(&dict_sys->mutex); + } + } while ((ret == DB_DEADLOCK + || ret == DB_DUPLICATE_KEY + || ret == DB_LOCK_WAIT_TIMEOUT) + && n_attempts < 5); + + if (ret != DB_SUCCESS) { + ut_snprintf(errstr, errstr_sz, + "Unable to rename statistics from " + "%s.%s to %s.%s in %s: %s. " + "They can be renamed later using " + + "UPDATE %s SET " + "database_name = '%s', " + "table_name = '%s' " + "WHERE " + "database_name = '%s' AND " + "table_name = '%s';", + + old_db_utf8, old_table_utf8, + new_db_utf8, new_table_utf8, + TABLE_STATS_NAME_PRINT, + ut_strerr(ret), + + TABLE_STATS_NAME_PRINT, + new_db_utf8, new_table_utf8, + old_db_utf8, old_table_utf8); + mutex_exit(&dict_sys->mutex); + rw_lock_x_unlock(&dict_operation_lock); + return(ret); + } + /* else */ + + n_attempts = 0; + do { + n_attempts++; + + ret = dict_stats_rename_in_index_stats( + old_db_utf8, old_table_utf8, + new_db_utf8, new_table_utf8); + + if (ret == DB_DUPLICATE_KEY) { + dict_stats_delete_from_index_stats( + new_db_utf8, new_table_utf8); + } + + if (ret == DB_STATS_DO_NOT_EXIST) { + ret = DB_SUCCESS; + } + + if (ret != DB_SUCCESS) { + mutex_exit(&dict_sys->mutex); + rw_lock_x_unlock(&dict_operation_lock); + os_thread_sleep(200000 /* 0.2 sec */); + rw_lock_x_lock(&dict_operation_lock); + mutex_enter(&dict_sys->mutex); + } + } while ((ret == DB_DEADLOCK + || ret == DB_DUPLICATE_KEY + || ret == DB_LOCK_WAIT_TIMEOUT) + && n_attempts < 5); + + mutex_exit(&dict_sys->mutex); + rw_lock_x_unlock(&dict_operation_lock); + + if (ret != DB_SUCCESS) { + ut_snprintf(errstr, errstr_sz, + "Unable to rename statistics from " + "%s.%s to %s.%s in %s: %s. " + "They can be renamed later using " + + "UPDATE %s SET " + "database_name = '%s', " + "table_name = '%s' " + "WHERE " + "database_name = '%s' AND " + "table_name = '%s';", + + old_db_utf8, old_table_utf8, + new_db_utf8, new_table_utf8, + INDEX_STATS_NAME_PRINT, + ut_strerr(ret), + + INDEX_STATS_NAME_PRINT, + new_db_utf8, new_table_utf8, + old_db_utf8, old_table_utf8); + } + + return(ret); +} + +/* tests @{ */ +#ifdef UNIV_COMPILE_TEST_FUNCS + +/* The following unit tests test some of the functions in this file +individually, such testing cannot be performed by the mysql-test framework +via SQL. */ + +/* test_dict_table_schema_check() @{ */ +void +test_dict_table_schema_check() +{ + /* + CREATE TABLE tcheck ( + c01 VARCHAR(123), + c02 INT, + c03 INT NOT NULL, + c04 INT UNSIGNED, + c05 BIGINT, + c06 BIGINT UNSIGNED NOT NULL, + c07 TIMESTAMP + ) ENGINE=INNODB; + */ + /* definition for the table 'test/tcheck' */ + dict_col_meta_t columns[] = { + {"c01", DATA_VARCHAR, 0, 123}, + {"c02", DATA_INT, 0, 4}, + {"c03", DATA_INT, DATA_NOT_NULL, 4}, + {"c04", DATA_INT, DATA_UNSIGNED, 4}, + {"c05", DATA_INT, 0, 8}, + {"c06", DATA_INT, DATA_NOT_NULL | DATA_UNSIGNED, 8}, + {"c07", DATA_INT, 0, 4}, + {"c_extra", DATA_INT, 0, 4} + }; + dict_table_schema_t schema = { + "test/tcheck", + 0 /* will be set individually for each test below */, + columns + }; + char errstr[512]; + + ut_snprintf(errstr, sizeof(errstr), "Table not found"); + + /* prevent any data dictionary modifications while we are checking + the tables' structure */ + + mutex_enter(&(dict_sys->mutex)); + + /* check that a valid table is reported as valid */ + schema.n_cols = 7; + if (dict_table_schema_check(&schema, errstr, sizeof(errstr)) + == DB_SUCCESS) { + printf("OK: test.tcheck ok\n"); + } else { + printf("ERROR: %s\n", errstr); + printf("ERROR: test.tcheck not present or corrupted\n"); + goto test_dict_table_schema_check_end; + } + + /* check columns with wrong length */ + schema.columns[1].len = 8; + if (dict_table_schema_check(&schema, errstr, sizeof(errstr)) + != DB_SUCCESS) { + printf("OK: test.tcheck.c02 has different length and is " + "reported as corrupted\n"); + } else { + printf("OK: test.tcheck.c02 has different length but is " + "reported as ok\n"); + goto test_dict_table_schema_check_end; + } + schema.columns[1].len = 4; + + /* request that c02 is NOT NULL while actually it does not have + this flag set */ + schema.columns[1].prtype_mask |= DATA_NOT_NULL; + if (dict_table_schema_check(&schema, errstr, sizeof(errstr)) + != DB_SUCCESS) { + printf("OK: test.tcheck.c02 does not have NOT NULL while " + "it should and is reported as corrupted\n"); + } else { + printf("ERROR: test.tcheck.c02 does not have NOT NULL while " + "it should and is not reported as corrupted\n"); + goto test_dict_table_schema_check_end; + } + schema.columns[1].prtype_mask &= ~DATA_NOT_NULL; + + /* check a table that contains some extra columns */ + schema.n_cols = 6; + if (dict_table_schema_check(&schema, errstr, sizeof(errstr)) + == DB_SUCCESS) { + printf("ERROR: test.tcheck has more columns but is not " + "reported as corrupted\n"); + goto test_dict_table_schema_check_end; + } else { + printf("OK: test.tcheck has more columns and is " + "reported as corrupted\n"); + } + + /* check a table that has some columns missing */ + schema.n_cols = 8; + if (dict_table_schema_check(&schema, errstr, sizeof(errstr)) + != DB_SUCCESS) { + printf("OK: test.tcheck has missing columns and is " + "reported as corrupted\n"); + } else { + printf("ERROR: test.tcheck has missing columns but is " + "reported as ok\n"); + goto test_dict_table_schema_check_end; + } + + /* check non-existent table */ + schema.table_name = "test/tcheck_nonexistent"; + if (dict_table_schema_check(&schema, errstr, sizeof(errstr)) + != DB_SUCCESS) { + printf("OK: test.tcheck_nonexistent is not present\n"); + } else { + printf("ERROR: test.tcheck_nonexistent is present!?\n"); + goto test_dict_table_schema_check_end; + } + +test_dict_table_schema_check_end: + + mutex_exit(&(dict_sys->mutex)); +} +/* @} */ + +/* save/fetch aux macros @{ */ +#define TEST_DATABASE_NAME "foobardb" +#define TEST_TABLE_NAME "test_dict_stats" + +#define TEST_N_ROWS 111 +#define TEST_CLUSTERED_INDEX_SIZE 222 +#define TEST_SUM_OF_OTHER_INDEX_SIZES 333 + +#define TEST_IDX1_NAME "tidx1" +#define TEST_IDX1_COL1_NAME "tidx1_col1" +#define TEST_IDX1_INDEX_SIZE 123 +#define TEST_IDX1_N_LEAF_PAGES 234 +#define TEST_IDX1_N_DIFF1 50 +#define TEST_IDX1_N_DIFF1_SAMPLE_SIZE 500 + +#define TEST_IDX2_NAME "tidx2" +#define TEST_IDX2_COL1_NAME "tidx2_col1" +#define TEST_IDX2_COL2_NAME "tidx2_col2" +#define TEST_IDX2_COL3_NAME "tidx2_col3" +#define TEST_IDX2_COL4_NAME "tidx2_col4" +#define TEST_IDX2_INDEX_SIZE 321 +#define TEST_IDX2_N_LEAF_PAGES 432 +#define TEST_IDX2_N_DIFF1 60 +#define TEST_IDX2_N_DIFF1_SAMPLE_SIZE 600 +#define TEST_IDX2_N_DIFF2 61 +#define TEST_IDX2_N_DIFF2_SAMPLE_SIZE 610 +#define TEST_IDX2_N_DIFF3 62 +#define TEST_IDX2_N_DIFF3_SAMPLE_SIZE 620 +#define TEST_IDX2_N_DIFF4 63 +#define TEST_IDX2_N_DIFF4_SAMPLE_SIZE 630 +/* @} */ + +/* test_dict_stats_save() @{ */ +void +test_dict_stats_save() +{ + dict_table_t table; + dict_index_t index1; + dict_field_t index1_fields[1]; + ib_uint64_t index1_stat_n_diff_key_vals[1]; + ib_uint64_t index1_stat_n_sample_sizes[1]; + dict_index_t index2; + dict_field_t index2_fields[4]; + ib_uint64_t index2_stat_n_diff_key_vals[4]; + ib_uint64_t index2_stat_n_sample_sizes[4]; + dberr_t ret; + + /* craft a dummy dict_table_t */ + table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME); + table.stat_n_rows = TEST_N_ROWS; + table.stat_clustered_index_size = TEST_CLUSTERED_INDEX_SIZE; + table.stat_sum_of_other_index_sizes = TEST_SUM_OF_OTHER_INDEX_SIZES; + UT_LIST_INIT(table.indexes); + UT_LIST_ADD_LAST(indexes, table.indexes, &index1); + UT_LIST_ADD_LAST(indexes, table.indexes, &index2); + ut_d(table.magic_n = DICT_TABLE_MAGIC_N); + ut_d(index1.magic_n = DICT_INDEX_MAGIC_N); + + index1.name = TEST_IDX1_NAME; + index1.table = &table; + index1.cached = 1; + index1.n_uniq = 1; + index1.fields = index1_fields; + index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals; + index1.stat_n_sample_sizes = index1_stat_n_sample_sizes; + index1.stat_index_size = TEST_IDX1_INDEX_SIZE; + index1.stat_n_leaf_pages = TEST_IDX1_N_LEAF_PAGES; + index1_fields[0].name = TEST_IDX1_COL1_NAME; + index1_stat_n_diff_key_vals[0] = TEST_IDX1_N_DIFF1; + index1_stat_n_sample_sizes[0] = TEST_IDX1_N_DIFF1_SAMPLE_SIZE; + + ut_d(index2.magic_n = DICT_INDEX_MAGIC_N); + index2.name = TEST_IDX2_NAME; + index2.table = &table; + index2.cached = 1; + index2.n_uniq = 4; + index2.fields = index2_fields; + index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals; + index2.stat_n_sample_sizes = index2_stat_n_sample_sizes; + index2.stat_index_size = TEST_IDX2_INDEX_SIZE; + index2.stat_n_leaf_pages = TEST_IDX2_N_LEAF_PAGES; + index2_fields[0].name = TEST_IDX2_COL1_NAME; + index2_fields[1].name = TEST_IDX2_COL2_NAME; + index2_fields[2].name = TEST_IDX2_COL3_NAME; + index2_fields[3].name = TEST_IDX2_COL4_NAME; + index2_stat_n_diff_key_vals[0] = TEST_IDX2_N_DIFF1; + index2_stat_n_diff_key_vals[1] = TEST_IDX2_N_DIFF2; + index2_stat_n_diff_key_vals[2] = TEST_IDX2_N_DIFF3; + index2_stat_n_diff_key_vals[3] = TEST_IDX2_N_DIFF4; + index2_stat_n_sample_sizes[0] = TEST_IDX2_N_DIFF1_SAMPLE_SIZE; + index2_stat_n_sample_sizes[1] = TEST_IDX2_N_DIFF2_SAMPLE_SIZE; + index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE; + index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE; + + ret = dict_stats_save(&table, NULL); + + ut_a(ret == DB_SUCCESS); + + printf("\nOK: stats saved successfully, now go ahead and read " + "what's inside %s and %s:\n\n", + TABLE_STATS_NAME_PRINT, + INDEX_STATS_NAME_PRINT); + + printf("SELECT COUNT(*) = 1 AS table_stats_saved_successfully\n" + "FROM %s\n" + "WHERE\n" + "database_name = '%s' AND\n" + "table_name = '%s' AND\n" + "n_rows = %d AND\n" + "clustered_index_size = %d AND\n" + "sum_of_other_index_sizes = %d;\n" + "\n", + TABLE_STATS_NAME_PRINT, + TEST_DATABASE_NAME, + TEST_TABLE_NAME, + TEST_N_ROWS, + TEST_CLUSTERED_INDEX_SIZE, + TEST_SUM_OF_OTHER_INDEX_SIZES); + + printf("SELECT COUNT(*) = 3 AS tidx1_stats_saved_successfully\n" + "FROM %s\n" + "WHERE\n" + "database_name = '%s' AND\n" + "table_name = '%s' AND\n" + "index_name = '%s' AND\n" + "(\n" + " (stat_name = 'size' AND stat_value = %d AND" + " sample_size IS NULL) OR\n" + " (stat_name = 'n_leaf_pages' AND stat_value = %d AND" + " sample_size IS NULL) OR\n" + " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND" + " sample_size = '%d' AND stat_description = '%s')\n" + ");\n" + "\n", + INDEX_STATS_NAME_PRINT, + TEST_DATABASE_NAME, + TEST_TABLE_NAME, + TEST_IDX1_NAME, + TEST_IDX1_INDEX_SIZE, + TEST_IDX1_N_LEAF_PAGES, + TEST_IDX1_N_DIFF1, + TEST_IDX1_N_DIFF1_SAMPLE_SIZE, + TEST_IDX1_COL1_NAME); + + printf("SELECT COUNT(*) = 6 AS tidx2_stats_saved_successfully\n" + "FROM %s\n" + "WHERE\n" + "database_name = '%s' AND\n" + "table_name = '%s' AND\n" + "index_name = '%s' AND\n" + "(\n" + " (stat_name = 'size' AND stat_value = %d AND" + " sample_size IS NULL) OR\n" + " (stat_name = 'n_leaf_pages' AND stat_value = %d AND" + " sample_size IS NULL) OR\n" + " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND" + " sample_size = '%d' AND stat_description = '%s') OR\n" + " (stat_name = 'n_diff_pfx02' AND stat_value = %d AND" + " sample_size = '%d' AND stat_description = '%s,%s') OR\n" + " (stat_name = 'n_diff_pfx03' AND stat_value = %d AND" + " sample_size = '%d' AND stat_description = '%s,%s,%s') OR\n" + " (stat_name = 'n_diff_pfx04' AND stat_value = %d AND" + " sample_size = '%d' AND stat_description = '%s,%s,%s,%s')\n" + ");\n" + "\n", + INDEX_STATS_NAME_PRINT, + TEST_DATABASE_NAME, + TEST_TABLE_NAME, + TEST_IDX2_NAME, + TEST_IDX2_INDEX_SIZE, + TEST_IDX2_N_LEAF_PAGES, + TEST_IDX2_N_DIFF1, + TEST_IDX2_N_DIFF1_SAMPLE_SIZE, TEST_IDX2_COL1_NAME, + TEST_IDX2_N_DIFF2, + TEST_IDX2_N_DIFF2_SAMPLE_SIZE, + TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, + TEST_IDX2_N_DIFF3, + TEST_IDX2_N_DIFF3_SAMPLE_SIZE, + TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME, + TEST_IDX2_N_DIFF4, + TEST_IDX2_N_DIFF4_SAMPLE_SIZE, + TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME, + TEST_IDX2_COL4_NAME); +} +/* @} */ + +/* test_dict_stats_fetch_from_ps() @{ */ +void +test_dict_stats_fetch_from_ps() +{ + dict_table_t table; + dict_index_t index1; + ib_uint64_t index1_stat_n_diff_key_vals[1]; + ib_uint64_t index1_stat_n_sample_sizes[1]; + dict_index_t index2; + ib_uint64_t index2_stat_n_diff_key_vals[4]; + ib_uint64_t index2_stat_n_sample_sizes[4]; + dberr_t ret; + + /* craft a dummy dict_table_t */ + table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME); + UT_LIST_INIT(table.indexes); + UT_LIST_ADD_LAST(indexes, table.indexes, &index1); + UT_LIST_ADD_LAST(indexes, table.indexes, &index2); + ut_d(table.magic_n = DICT_TABLE_MAGIC_N); + + index1.name = TEST_IDX1_NAME; + ut_d(index1.magic_n = DICT_INDEX_MAGIC_N); + index1.cached = 1; + index1.n_uniq = 1; + index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals; + index1.stat_n_sample_sizes = index1_stat_n_sample_sizes; + + index2.name = TEST_IDX2_NAME; + ut_d(index2.magic_n = DICT_INDEX_MAGIC_N); + index2.cached = 1; + index2.n_uniq = 4; + index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals; + index2.stat_n_sample_sizes = index2_stat_n_sample_sizes; + + ret = dict_stats_fetch_from_ps(&table); + + ut_a(ret == DB_SUCCESS); + + ut_a(table.stat_n_rows == TEST_N_ROWS); + ut_a(table.stat_clustered_index_size == TEST_CLUSTERED_INDEX_SIZE); + ut_a(table.stat_sum_of_other_index_sizes + == TEST_SUM_OF_OTHER_INDEX_SIZES); + + ut_a(index1.stat_index_size == TEST_IDX1_INDEX_SIZE); + ut_a(index1.stat_n_leaf_pages == TEST_IDX1_N_LEAF_PAGES); + ut_a(index1_stat_n_diff_key_vals[0] == TEST_IDX1_N_DIFF1); + ut_a(index1_stat_n_sample_sizes[0] == TEST_IDX1_N_DIFF1_SAMPLE_SIZE); + + ut_a(index2.stat_index_size == TEST_IDX2_INDEX_SIZE); + ut_a(index2.stat_n_leaf_pages == TEST_IDX2_N_LEAF_PAGES); + ut_a(index2_stat_n_diff_key_vals[0] == TEST_IDX2_N_DIFF1); + ut_a(index2_stat_n_sample_sizes[0] == TEST_IDX2_N_DIFF1_SAMPLE_SIZE); + ut_a(index2_stat_n_diff_key_vals[1] == TEST_IDX2_N_DIFF2); + ut_a(index2_stat_n_sample_sizes[1] == TEST_IDX2_N_DIFF2_SAMPLE_SIZE); + ut_a(index2_stat_n_diff_key_vals[2] == TEST_IDX2_N_DIFF3); + ut_a(index2_stat_n_sample_sizes[2] == TEST_IDX2_N_DIFF3_SAMPLE_SIZE); + ut_a(index2_stat_n_diff_key_vals[3] == TEST_IDX2_N_DIFF4); + ut_a(index2_stat_n_sample_sizes[3] == TEST_IDX2_N_DIFF4_SAMPLE_SIZE); + + printf("OK: fetch successful\n"); +} +/* @} */ + +/* test_dict_stats_all() @{ */ +void +test_dict_stats_all() +{ + test_dict_table_schema_check(); + + test_dict_stats_save(); + + test_dict_stats_fetch_from_ps(); +} +/* @} */ + +#endif /* UNIV_COMPILE_TEST_FUNCS */ +/* @} */ + +#endif /* UNIV_HOTBACKUP */ diff --git a/storage/xtradb/dict/dict0stats_bg.cc b/storage/xtradb/dict/dict0stats_bg.cc new file mode 100644 index 00000000000..9e1f75a13a9 --- /dev/null +++ b/storage/xtradb/dict/dict0stats_bg.cc @@ -0,0 +1,367 @@ +/***************************************************************************** + +Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file dict/dict0stats_bg.cc +Code used for background table and index stats gathering. + +Created Apr 25, 2012 Vasil Dimov +*******************************************************/ + +#include "row0mysql.h" +#include "srv0start.h" +#include "dict0stats.h" +#include "dict0stats_bg.h" + +#ifdef UNIV_NONINL +# include "dict0stats_bg.ic" +#endif + +#include <vector> + +/** Minimum time interval between stats recalc for a given table */ +#define MIN_RECALC_INTERVAL 10 /* seconds */ + +#define SHUTTING_DOWN() (srv_shutdown_state != SRV_SHUTDOWN_NONE) + +/** Event to wake up the stats thread */ +UNIV_INTERN os_event_t dict_stats_event = NULL; + +/** This mutex protects the "recalc_pool" variable. */ +static ib_mutex_t recalc_pool_mutex; +#ifdef HAVE_PSI_INTERFACE +static mysql_pfs_key_t recalc_pool_mutex_key; +#endif /* HAVE_PSI_INTERFACE */ + +/** The number of tables that can be added to "recalc_pool" before +it is enlarged */ +static const ulint RECALC_POOL_INITIAL_SLOTS = 128; + +/** The multitude of tables whose stats are to be automatically +recalculated - an STL vector */ +typedef std::vector<table_id_t> recalc_pool_t; +static recalc_pool_t recalc_pool; + +typedef recalc_pool_t::iterator recalc_pool_iterator_t; + +/*****************************************************************//** +Initialize the recalc pool, called once during thread initialization. */ +static +void +dict_stats_recalc_pool_init() +/*=========================*/ +{ + ut_ad(!srv_read_only_mode); + + recalc_pool.reserve(RECALC_POOL_INITIAL_SLOTS); +} + +/*****************************************************************//** +Free the resources occupied by the recalc pool, called once during +thread de-initialization. */ +static +void +dict_stats_recalc_pool_deinit() +/*===========================*/ +{ + ut_ad(!srv_read_only_mode); + + recalc_pool.clear(); +} + +/*****************************************************************//** +Add a table to the recalc pool, which is processed by the +background stats gathering thread. Only the table id is added to the +list, so the table can be closed after being enqueued and it will be +opened when needed. If the table does not exist later (has been DROPped), +then it will be removed from the pool and skipped. */ +UNIV_INTERN +void +dict_stats_recalc_pool_add( +/*=======================*/ + const dict_table_t* table) /*!< in: table to add */ +{ + ut_ad(!srv_read_only_mode); + + mutex_enter(&recalc_pool_mutex); + + /* quit if already in the list */ + for (recalc_pool_iterator_t iter = recalc_pool.begin(); + iter != recalc_pool.end(); + ++iter) { + + if (*iter == table->id) { + mutex_exit(&recalc_pool_mutex); + return; + } + } + + recalc_pool.push_back(table->id); + + mutex_exit(&recalc_pool_mutex); + + os_event_set(dict_stats_event); +} + +/*****************************************************************//** +Get a table from the auto recalc pool. The returned table id is removed +from the pool. +@return true if the pool was non-empty and "id" was set, false otherwise */ +static +bool +dict_stats_recalc_pool_get( +/*=======================*/ + table_id_t* id) /*!< out: table id, or unmodified if list is + empty */ +{ + ut_ad(!srv_read_only_mode); + + mutex_enter(&recalc_pool_mutex); + + if (recalc_pool.empty()) { + mutex_exit(&recalc_pool_mutex); + return(false); + } + + *id = recalc_pool[0]; + + recalc_pool.erase(recalc_pool.begin()); + + mutex_exit(&recalc_pool_mutex); + + return(true); +} + +/*****************************************************************//** +Delete a given table from the auto recalc pool. +dict_stats_recalc_pool_del() */ +UNIV_INTERN +void +dict_stats_recalc_pool_del( +/*=======================*/ + const dict_table_t* table) /*!< in: table to remove */ +{ + ut_ad(!srv_read_only_mode); + ut_ad(mutex_own(&dict_sys->mutex)); + + mutex_enter(&recalc_pool_mutex); + + ut_ad(table->id > 0); + + for (recalc_pool_iterator_t iter = recalc_pool.begin(); + iter != recalc_pool.end(); + ++iter) { + + if (*iter == table->id) { + /* erase() invalidates the iterator */ + recalc_pool.erase(iter); + break; + } + } + + mutex_exit(&recalc_pool_mutex); +} + +/*****************************************************************//** +Wait until background stats thread has stopped using the specified table. +The caller must have locked the data dictionary using +row_mysql_lock_data_dictionary() and this function may unlock it temporarily +and restore the lock before it exits. +The background stats thread is guaranteed not to start using the specified +table after this function returns and before the caller unlocks the data +dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag +under dict_sys->mutex. */ +UNIV_INTERN +void +dict_stats_wait_bg_to_stop_using_table( +/*===================================*/ + dict_table_t* table, /*!< in/out: table */ + trx_t* trx) /*!< in/out: transaction to use for + unlocking/locking the data dict */ +{ + while (!dict_stats_stop_bg(table)) { + DICT_STATS_BG_YIELD(trx); + } +} + +/*****************************************************************//** +Initialize global variables needed for the operation of dict_stats_thread() +Must be called before dict_stats_thread() is started. */ +UNIV_INTERN +void +dict_stats_thread_init() +/*====================*/ +{ + ut_a(!srv_read_only_mode); + + dict_stats_event = os_event_create(); + + /* The recalc_pool_mutex is acquired from: + 1) the background stats gathering thread before any other latch + and released without latching anything else in between (thus + any level would do here) + 2) from row_update_statistics_if_needed() + and released without latching anything else in between. We know + that dict_sys->mutex (SYNC_DICT) is not acquired when + row_update_statistics_if_needed() is called and it may be acquired + inside that function (thus a level <=SYNC_DICT would do). + 3) from row_drop_table_for_mysql() after dict_sys->mutex (SYNC_DICT) + and dict_operation_lock (SYNC_DICT_OPERATION) have been locked + (thus a level <SYNC_DICT && <SYNC_DICT_OPERATION would do) + So we choose SYNC_STATS_AUTO_RECALC to be about below SYNC_DICT. */ + mutex_create(recalc_pool_mutex_key, &recalc_pool_mutex, + SYNC_STATS_AUTO_RECALC); + + dict_stats_recalc_pool_init(); +} + +/*****************************************************************//** +Free resources allocated by dict_stats_thread_init(), must be called +after dict_stats_thread() has exited. */ +UNIV_INTERN +void +dict_stats_thread_deinit() +/*======================*/ +{ + ut_a(!srv_read_only_mode); + ut_ad(!srv_dict_stats_thread_active); + + dict_stats_recalc_pool_deinit(); + + mutex_free(&recalc_pool_mutex); + memset(&recalc_pool_mutex, 0x0, sizeof(recalc_pool_mutex)); + + os_event_free(dict_stats_event); + dict_stats_event = NULL; +} + +/*****************************************************************//** +Get the first table that has been added for auto recalc and eventually +update its stats. */ +static +void +dict_stats_process_entry_from_recalc_pool() +/*=======================================*/ +{ + table_id_t table_id; + + ut_ad(!srv_read_only_mode); + + /* pop the first table from the auto recalc pool */ + if (!dict_stats_recalc_pool_get(&table_id)) { + /* no tables for auto recalc */ + return; + } + + dict_table_t* table; + + mutex_enter(&dict_sys->mutex); + + table = dict_table_open_on_id(table_id, TRUE, DICT_TABLE_OP_NORMAL); + + if (table == NULL) { + /* table does not exist, must have been DROPped + after its id was enqueued */ + mutex_exit(&dict_sys->mutex); + return; + } + + /* Check whether table is corrupted */ + if (table->corrupted) { + dict_table_close(table, TRUE, FALSE); + mutex_exit(&dict_sys->mutex); + return; + } + + table->stats_bg_flag = BG_STAT_IN_PROGRESS; + + mutex_exit(&dict_sys->mutex); + + /* ut_time() could be expensive, the current function + is called once every time a table has been changed more than 10% and + on a system with lots of small tables, this could become hot. If we + find out that this is a problem, then the check below could eventually + be replaced with something else, though a time interval is the natural + approach. */ + + if (ut_difftime(ut_time(), table->stats_last_recalc) + < MIN_RECALC_INTERVAL) { + + /* Stats were (re)calculated not long ago. To avoid + too frequent stats updates we put back the table on + the auto recalc list and do nothing. */ + + dict_stats_recalc_pool_add(table); + + } else { + + dict_stats_update(table, DICT_STATS_RECALC_PERSISTENT); + } + + mutex_enter(&dict_sys->mutex); + + table->stats_bg_flag = BG_STAT_NONE; + + dict_table_close(table, TRUE, FALSE); + + mutex_exit(&dict_sys->mutex); +} + +/*****************************************************************//** +This is the thread for background stats gathering. It pops tables, from +the auto recalc list and proceeds them, eventually recalculating their +statistics. +@return this function does not return, it calls os_thread_exit() */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(dict_stats_thread)( +/*==============================*/ + void* arg __attribute__((unused))) /*!< in: a dummy parameter + required by os_thread_create */ +{ + ut_a(!srv_read_only_mode); + + srv_dict_stats_thread_active = TRUE; + + while (!SHUTTING_DOWN()) { + + /* Wake up periodically even if not signaled. This is + because we may lose an event - if the below call to + dict_stats_process_entry_from_recalc_pool() puts the entry back + in the list, the os_event_set() will be lost by the subsequent + os_event_reset(). */ + os_event_wait_time( + dict_stats_event, MIN_RECALC_INTERVAL * 1000000); + + if (SHUTTING_DOWN()) { + break; + } + + dict_stats_process_entry_from_recalc_pool(); + + os_event_reset(dict_stats_event); + } + + srv_dict_stats_thread_active = FALSE; + + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit instead of return(). */ + os_thread_exit(NULL); + + OS_THREAD_DUMMY_RETURN; +} |