diff options
Diffstat (limited to 'storage/innobase/row/row0trunc.cc')
-rw-r--r-- | storage/innobase/row/row0trunc.cc | 3060 |
1 files changed, 3060 insertions, 0 deletions
diff --git a/storage/innobase/row/row0trunc.cc b/storage/innobase/row/row0trunc.cc new file mode 100644 index 00000000000..ff70e0ff31e --- /dev/null +++ b/storage/innobase/row/row0trunc.cc @@ -0,0 +1,3060 @@ +/***************************************************************************** + +Copyright (c) 2013, 2015, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file row/row0trunc.cc +TRUNCATE implementation + +Created 2013-04-12 Sunny Bains +*******************************************************/ + +#include "row0mysql.h" +#include "pars0pars.h" +#include "dict0crea.h" +#include "dict0boot.h" +#include "dict0stats.h" +#include "dict0stats_bg.h" +#include "lock0lock.h" +#include "fts0fts.h" +#include "fsp0sysspace.h" +#include "srv0start.h" +#include "row0trunc.h" +#include "os0file.h" +#include <vector> + +bool truncate_t::s_fix_up_active = false; +truncate_t::tables_t truncate_t::s_tables; +truncate_t::truncated_tables_t truncate_t::s_truncated_tables; + +/** +Iterator over the the raw records in an index, doesn't support MVCC. */ +class IndexIterator { + +public: + /** + Iterate over an indexes records + @param index index to iterate over */ + explicit IndexIterator(dict_index_t* index) + : + m_index(index) + { + /* Do nothing */ + } + + /** + Search for key. Position the cursor on a record GE key. + @return DB_SUCCESS or error code. */ + dberr_t search(dtuple_t& key, bool noredo) + { + mtr_start(&m_mtr); + + if (noredo) { + mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO); + } + + btr_pcur_open_on_user_rec( + m_index, + &key, + PAGE_CUR_GE, + BTR_MODIFY_LEAF, + &m_pcur, &m_mtr); + + return(DB_SUCCESS); + } + + /** + Iterate over all the records + @return DB_SUCCESS or error code */ + template <typename Callback> + dberr_t for_each(Callback& callback) + { + dberr_t err = DB_SUCCESS; + + for (;;) { + + if (!btr_pcur_is_on_user_rec(&m_pcur) + || !callback.match(&m_mtr, &m_pcur)) { + + /* The end of of the index has been reached. */ + err = DB_END_OF_INDEX; + break; + } + + rec_t* rec = btr_pcur_get_rec(&m_pcur); + + if (!rec_get_deleted_flag(rec, FALSE)) { + + err = callback(&m_mtr, &m_pcur); + + if (err != DB_SUCCESS) { + break; + } + } + + btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr); + } + + btr_pcur_close(&m_pcur); + mtr_commit(&m_mtr); + + return(err == DB_END_OF_INDEX ? DB_SUCCESS : err); + } + +private: + // Disable copying + IndexIterator(const IndexIterator&); + IndexIterator& operator=(const IndexIterator&); + +private: + mtr_t m_mtr; + btr_pcur_t m_pcur; + dict_index_t* m_index; +}; + +/** SysIndex table iterator, iterate over records for a table. */ +class SysIndexIterator { + +public: + /** + Iterate over all the records that match the table id. + @return DB_SUCCESS or error code */ + template <typename Callback> + dberr_t for_each(Callback& callback) const + { + dict_index_t* sys_index; + byte buf[DTUPLE_EST_ALLOC(1)]; + dtuple_t* tuple = + dtuple_create_from_mem(buf, sizeof(buf), 1, 0); + dfield_t* dfield = dtuple_get_nth_field(tuple, 0); + + dfield_set_data( + dfield, + callback.table_id(), + sizeof(*callback.table_id())); + + sys_index = dict_table_get_first_index(dict_sys->sys_indexes); + + dict_index_copy_types(tuple, sys_index, 1); + + IndexIterator iterator(sys_index); + + /* Search on the table id and position the cursor + on GE table_id. */ + iterator.search(*tuple, callback.get_logging_status()); + + return(iterator.for_each(callback)); + } +}; + +/** Generic callback abstract class. */ +class Callback +{ + +public: + /** + Constructor + @param table_id id of the table being operated. + @param noredo if true turn off logging. */ + Callback(table_id_t table_id, bool noredo) + : + m_id(), + m_noredo(noredo) + { + /* Convert to storage byte order. */ + mach_write_to_8(&m_id, table_id); + } + + /** + Destructor */ + virtual ~Callback() + { + /* Do nothing */ + } + + /** + @param mtr mini-transaction covering the iteration + @param pcur persistent cursor used for iteration + @return true if the table id column matches. */ + bool match(mtr_t* mtr, btr_pcur_t* pcur) const + { + ulint len; + const byte* field; + rec_t* rec = btr_pcur_get_rec(pcur); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_INDEXES__TABLE_ID, &len); + + ut_ad(len == 8); + + return(memcmp(&m_id, field, len) == 0); + } + + /** + @return pointer to table id storage format buffer */ + const table_id_t* table_id() const + { + return(&m_id); + } + + /** + @return return if logging needs to be turned off. */ + bool get_logging_status() const + { + return(m_noredo); + } + +protected: + // Disably copying + Callback(const Callback&); + Callback& operator=(const Callback&); + +protected: + /** Table id in storage format */ + table_id_t m_id; + + /** Turn off logging. */ + const bool m_noredo; +}; + +/** +Creates a TRUNCATE log record with space id, table name, data directory path, +tablespace flags, table format, index ids, index types, number of index fields +and index field information of the table. */ +class TruncateLogger : public Callback { + +public: + /** + Constructor + + @param table Table to truncate + @param flags tablespace falgs */ + TruncateLogger( + dict_table_t* table, + ulint flags, + table_id_t new_table_id) + : + Callback(table->id, false), + m_table(table), + m_flags(flags), + m_truncate(table->id, new_table_id, table->data_dir_path), + m_log_file_name() + { + /* Do nothing */ + } + + /** + Initialize Truncate Logger by constructing Truncate Log File Name. + + @return DB_SUCCESS or error code. */ + dberr_t init() + { + /* Construct log file name. */ + ulint log_file_name_buf_sz = + strlen(srv_log_group_home_dir) + 22 + 22 + 1 /* NUL */ + + strlen(TruncateLogger::s_log_prefix) + + strlen(TruncateLogger::s_log_ext); + + m_log_file_name = UT_NEW_ARRAY_NOKEY(char, log_file_name_buf_sz); + if (m_log_file_name == NULL) { + return(DB_OUT_OF_MEMORY); + } + memset(m_log_file_name, 0, log_file_name_buf_sz); + + strcpy(m_log_file_name, srv_log_group_home_dir); + ulint log_file_name_len = strlen(m_log_file_name); + if (m_log_file_name[log_file_name_len - 1] + != OS_PATH_SEPARATOR) { + + m_log_file_name[log_file_name_len] + = OS_PATH_SEPARATOR; + log_file_name_len = strlen(m_log_file_name); + } + + ut_snprintf(m_log_file_name + log_file_name_len, + log_file_name_buf_sz - log_file_name_len, + "%s%lu_%lu_%s", + TruncateLogger::s_log_prefix, + (ulong) m_table->space, + (ulong) m_table->id, + TruncateLogger::s_log_ext); + + return(DB_SUCCESS); + + } + + /** + Destructor */ + ~TruncateLogger() + { + if (m_log_file_name != NULL) { + bool exist; + os_file_delete_if_exists( + innodb_log_file_key, m_log_file_name, &exist); + UT_DELETE_ARRAY(m_log_file_name); + m_log_file_name = NULL; + } + } + + /** + @param mtr mini-transaction covering the read + @param pcur persistent cursor used for reading + @return DB_SUCCESS or error code */ + dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur); + + /** Called after iteratoring over the records. + @return true if invariant satisfied. */ + bool debug() const + { + /* We must find all the index entries on disk. */ + return(UT_LIST_GET_LEN(m_table->indexes) + == m_truncate.indexes()); + } + + /** + Write the TRUNCATE log + @return DB_SUCCESS or error code */ + dberr_t log() const + { + dberr_t err = DB_SUCCESS; + + if (m_log_file_name == 0) { + return(DB_ERROR); + } + + bool ret; + os_file_t handle = os_file_create( + innodb_log_file_key, m_log_file_name, + OS_FILE_CREATE, OS_FILE_NORMAL, + OS_LOG_FILE, srv_read_only_mode, &ret); + if (!ret) { + return(DB_IO_ERROR); + } + + + ulint sz = UNIV_PAGE_SIZE; + void* buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE); + if (buf == 0) { + os_file_close(handle); + return(DB_OUT_OF_MEMORY); + } + + /* Align the memory for file i/o if we might have O_DIRECT set*/ + byte* log_buf = static_cast<byte*>( + ut_align(buf, UNIV_PAGE_SIZE)); + + lsn_t lsn = log_get_lsn(); + + /* Generally loop should exit in single go but + just for those 1% of rare cases we need to assume + corner case. */ + do { + /* First 4 bytes are reserved for magic number + which is currently 0. */ + err = m_truncate.write( + log_buf + 4, log_buf + sz - 4, + m_table->space, m_table->name.m_name, + m_flags, m_table->flags, lsn); + + DBUG_EXECUTE_IF("ib_err_trunc_oom_logging", + err = DB_FAIL;); + + if (err != DB_SUCCESS) { + ut_ad(err == DB_FAIL); + ut_free(buf); + sz *= 2; + buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE); + DBUG_EXECUTE_IF("ib_err_trunc_oom_logging", + ut_free(buf); + buf = 0;); + if (buf == 0) { + os_file_close(handle); + return(DB_OUT_OF_MEMORY); + } + log_buf = static_cast<byte*>( + ut_align(buf, UNIV_PAGE_SIZE)); + } + + } while (err != DB_SUCCESS); + + dberr_t io_err; + + IORequest request(IORequest::WRITE); + + request.disable_compression(); + + io_err = os_file_write( + request, m_log_file_name, handle, log_buf, 0, sz); + + if (io_err != DB_SUCCESS) { + + ib::error() + << "IO: Failed to write the file size to '" + << m_log_file_name << "'"; + + /* Preserve the original error code */ + if (err == DB_SUCCESS) { + err = io_err; + } + } + + os_file_flush(handle); + os_file_close(handle); + + ut_free(buf); + + /* Why we need MLOG_TRUNCATE when we have truncate_log for + recovery? + - truncate log can protect us if crash happens while truncate + is active. Once truncate is done truncate log is removed. + - If crash happens post truncate and system is yet to + checkpoint, on recovery we would see REDO records from action + before truncate (unless we explicitly checkpoint before + returning from truncate API. Costly alternative so rejected). + - These REDO records may reference a page that doesn't exist + post truncate so we need a mechanism to skip all such REDO + records. MLOG_TRUNCATE records space_id and lsn that exactly + serve the purpose. + - If checkpoint happens post truncate and crash happens post + this point then neither MLOG_TRUNCATE nor REDO record + from action before truncate are accessible. */ + if (!is_system_tablespace(m_table->space)) { + mtr_t mtr; + byte* log_ptr; + + mtr_start(&mtr); + + log_ptr = mlog_open(&mtr, 11 + 8); + log_ptr = mlog_write_initial_log_record_low( + MLOG_TRUNCATE, m_table->space, 0, + log_ptr, &mtr); + + mach_write_to_8(log_ptr, lsn); + log_ptr += 8; + + mlog_close(&mtr, log_ptr); + mtr_commit(&mtr); + } + + return(err); + } + + /** + Indicate completion of truncate log by writing magic-number. + File will be removed from the system but to protect against + unlink (File-System) anomalies we ensure we write magic-number. */ + void done() + { + if (m_log_file_name == 0) { + return; + } + + bool ret; + os_file_t handle = os_file_create_simple_no_error_handling( + innodb_log_file_key, m_log_file_name, + OS_FILE_OPEN, OS_FILE_READ_WRITE, + srv_read_only_mode, &ret); + DBUG_EXECUTE_IF("ib_err_trunc_writing_magic_number", + os_file_close(handle); + ret = false;); + if (!ret) { + ib::error() << "Failed to open truncate log file " + << m_log_file_name << "." + " If server crashes before truncate log is" + " removed make sure it is manually removed" + " before restarting server"; + os_file_delete(innodb_log_file_key, m_log_file_name); + return; + } + + byte buffer[sizeof(TruncateLogger::s_magic)]; + mach_write_to_4(buffer, TruncateLogger::s_magic); + + dberr_t err; + + IORequest request(IORequest::WRITE); + + request.disable_compression(); + + err = os_file_write( + request, + m_log_file_name, handle, buffer, 0, sizeof(buffer)); + + if (err != DB_SUCCESS) { + + ib::error() + << "IO: Failed to write the magic number to '" + << m_log_file_name << "'"; + } + + DBUG_EXECUTE_IF("ib_trunc_crash_after_updating_magic_no", + DBUG_SUICIDE();); + os_file_flush(handle); + os_file_close(handle); + DBUG_EXECUTE_IF("ib_trunc_crash_after_logging_complete", + log_buffer_flush_to_disk(); + os_thread_sleep(1000000); + DBUG_SUICIDE();); + os_file_delete(innodb_log_file_key, m_log_file_name); + } + +private: + // Disably copying + TruncateLogger(const TruncateLogger&); + TruncateLogger& operator=(const TruncateLogger&); + +private: + /** Lookup the index using the index id. + @return index instance if found else NULL */ + const dict_index_t* find(index_id_t id) const + { + for (const dict_index_t* index = UT_LIST_GET_FIRST( + m_table->indexes); + index != NULL; + index = UT_LIST_GET_NEXT(indexes, index)) { + + if (index->id == id) { + return(index); + } + } + + return(NULL); + } + +private: + /** Table to be truncated */ + dict_table_t* m_table; + + /** Tablespace flags */ + ulint m_flags; + + /** Collect table to truncate information */ + truncate_t m_truncate; + + /** Truncate log file name. */ + char* m_log_file_name; + +public: + /** Magic Number to indicate truncate action is complete. */ + const static ib_uint32_t s_magic; + + /** Truncate Log file Prefix. */ + const static char* s_log_prefix; + + /** Truncate Log file Extension. */ + const static char* s_log_ext; +}; + +const ib_uint32_t TruncateLogger::s_magic = 32743712; +const char* TruncateLogger::s_log_prefix = "ib_"; +const char* TruncateLogger::s_log_ext = "trunc.log"; + +/** +Scan to find out truncate log file from the given directory path. + +@param dir_path look for log directory in following path. +@param log_files cache to hold truncate log file name found. +@return DB_SUCCESS or error code. */ +dberr_t +TruncateLogParser::scan( + const char* dir_path, + trunc_log_files_t& log_files) +{ + os_file_dir_t dir; + os_file_stat_t fileinfo; + dberr_t err = DB_SUCCESS; + ulint ext_len = strlen(TruncateLogger::s_log_ext); + ulint prefix_len = strlen(TruncateLogger::s_log_prefix); + ulint dir_len = strlen(dir_path); + + /* Scan and look out for the truncate log files. */ + dir = os_file_opendir(dir_path, true); + if (dir == NULL) { + return(DB_IO_ERROR); + } + + while (fil_file_readdir_next_file( + &err, dir_path, dir, &fileinfo) == 0) { + + ulint nm_len = strlen(fileinfo.name); + + if (fileinfo.type == OS_FILE_TYPE_FILE + && nm_len > ext_len + prefix_len + && (0 == strncmp(fileinfo.name + nm_len - ext_len, + TruncateLogger::s_log_ext, ext_len)) + && (0 == strncmp(fileinfo.name, + TruncateLogger::s_log_prefix, + prefix_len))) { + + if (fileinfo.size == 0) { + /* Truncate log not written. Remove the file. */ + os_file_delete( + innodb_log_file_key, fileinfo.name); + continue; + } + + /* Construct file name by appending directory path */ + ulint sz = dir_len + 22 + 22 + 1 + ext_len + prefix_len; + char* log_file_name = UT_NEW_ARRAY_NOKEY(char, sz); + if (log_file_name == NULL) { + err = DB_OUT_OF_MEMORY; + break; + } + memset(log_file_name, 0, sz); + + strncpy(log_file_name, dir_path, dir_len); + ulint log_file_name_len = strlen(log_file_name); + if (log_file_name[log_file_name_len - 1] + != OS_PATH_SEPARATOR) { + + log_file_name[log_file_name_len] + = OS_PATH_SEPARATOR; + log_file_name_len = strlen(log_file_name); + } + strcat(log_file_name, fileinfo.name); + log_files.push_back(log_file_name); + } + } + + os_file_closedir(dir); + + return(err); +} + +/** +Parse the log file and populate table to truncate information. +(Add this table to truncate information to central vector that is then + used by truncate fix-up routine to fix-up truncate action of the table.) + +@param log_file_name log file to parse +@return DB_SUCCESS or error code. */ +dberr_t +TruncateLogParser::parse( + const char* log_file_name) +{ + dberr_t err = DB_SUCCESS; + truncate_t* truncate = NULL; + + /* Open the file and read magic-number to findout if truncate action + was completed. */ + bool ret; + os_file_t handle = os_file_create_simple( + innodb_log_file_key, log_file_name, + OS_FILE_OPEN, OS_FILE_READ_ONLY, srv_read_only_mode, &ret); + if (!ret) { + ib::error() << "Error opening truncate log file: " + << log_file_name; + return(DB_IO_ERROR); + } + + ulint sz = UNIV_PAGE_SIZE; + void* buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE); + if (buf == 0) { + os_file_close(handle); + return(DB_OUT_OF_MEMORY); + } + + IORequest request(IORequest::READ); + + request.disable_compression(); + + /* Align the memory for file i/o if we might have O_DIRECT set*/ + byte* log_buf = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE)); + + do { + err = os_file_read(request, handle, log_buf, 0, sz); + + if (err != DB_SUCCESS) { + os_file_close(handle); + break; + } + + ulint magic_n = mach_read_from_4(log_buf); + if (magic_n == TruncateLogger::s_magic) { + + /* Truncate action completed. Avoid parsing the file. */ + os_file_close(handle); + + os_file_delete(innodb_log_file_key, log_file_name); + break; + } + + if (truncate == NULL) { + truncate = UT_NEW_NOKEY(truncate_t(log_file_name)); + if (truncate == NULL) { + os_file_close(handle); + err = DB_OUT_OF_MEMORY; + break; + } + } + + err = truncate->parse(log_buf + 4, log_buf + sz - 4); + + if (err != DB_SUCCESS) { + + ut_ad(err == DB_FAIL); + + ut_free(buf); + buf = 0; + + sz *= 2; + + buf = ut_zalloc_nokey(sz + UNIV_PAGE_SIZE); + + if (buf == 0) { + os_file_close(handle); + err = DB_OUT_OF_MEMORY; + UT_DELETE(truncate); + truncate = NULL; + break; + } + + log_buf = static_cast<byte*>( + ut_align(buf, UNIV_PAGE_SIZE)); + } + } while (err != DB_SUCCESS); + + ut_free(buf); + + if (err == DB_SUCCESS && truncate != NULL) { + truncate_t::add(truncate); + os_file_close(handle); + } + + return(err); +} + +/** +Scan and Parse truncate log files. + +@param dir_path look for log directory in following path +@return DB_SUCCESS or error code. */ +dberr_t +TruncateLogParser::scan_and_parse( + const char* dir_path) +{ + dberr_t err; + trunc_log_files_t log_files; + + /* Scan and trace all the truncate log files. */ + err = TruncateLogParser::scan(dir_path, log_files); + + /* Parse truncate lof files if scan was successful. */ + if (err == DB_SUCCESS) { + + for (ulint i = 0; + i < log_files.size() && err == DB_SUCCESS; + i++) { + err = TruncateLogParser::parse(log_files[i]); + } + } + + trunc_log_files_t::const_iterator end = log_files.end(); + for (trunc_log_files_t::const_iterator it = log_files.begin(); + it != end; + ++it) { + if (*it != NULL) { + UT_DELETE_ARRAY(*it); + } + } + log_files.clear(); + + return(err); +} + +/** Callback to drop indexes during TRUNCATE */ +class DropIndex : public Callback { + +public: + /** + Constructor + + @param[in,out] table Table to truncate + @param[in] noredo whether to disable redo logging */ + DropIndex(dict_table_t* table, bool noredo) + : + Callback(table->id, noredo), + m_table(table) + { + /* No op */ + } + + /** + @param mtr mini-transaction covering the read + @param pcur persistent cursor used for reading + @return DB_SUCCESS or error code */ + dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur) const; + +private: + /** Table to be truncated */ + dict_table_t* m_table; +}; + +/** Callback to create the indexes during TRUNCATE */ +class CreateIndex : public Callback { + +public: + /** + Constructor + + @param[in,out] table Table to truncate + @param[in] noredo whether to disable redo logging */ + CreateIndex(dict_table_t* table, bool noredo) + : + Callback(table->id, noredo), + m_table(table) + { + /* No op */ + } + + /** + Create the new index and update the root page number in the + SysIndex table. + + @param mtr mini-transaction covering the read + @param pcur persistent cursor used for reading + @return DB_SUCCESS or error code */ + dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur) const; + +private: + // Disably copying + CreateIndex(const CreateIndex&); + CreateIndex& operator=(const CreateIndex&); + +private: + /** Table to be truncated */ + dict_table_t* m_table; +}; + +/** Check for presence of table-id in SYS_XXXX tables. */ +class TableLocator : public Callback { + +public: + /** + Constructor + @param table_id table_id to look for */ + explicit TableLocator(table_id_t table_id) + : + Callback(table_id, false), + m_table_found() + { + /* No op */ + } + + /** + @return true if table is found */ + bool is_table_found() const + { + return(m_table_found); + } + + /** + Look for table-id in SYS_XXXX tables without loading the table. + + @param mtr mini-transaction covering the read + @param pcur persistent cursor used for reading + @return DB_SUCCESS or error code */ + dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur); + +private: + // Disably copying + TableLocator(const TableLocator&); + TableLocator& operator=(const TableLocator&); + +private: + /** Set to true if table is present */ + bool m_table_found; +}; + +/** +@param mtr mini-transaction covering the read +@param pcur persistent cursor used for reading +@return DB_SUCCESS or error code */ +dberr_t +TruncateLogger::operator()(mtr_t* mtr, btr_pcur_t* pcur) +{ + ulint len; + const byte* field; + rec_t* rec = btr_pcur_get_rec(pcur); + truncate_t::index_t index; + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_INDEXES__TYPE, &len); + ut_ad(len == 4); + index.m_type = mach_read_from_4(field); + + field = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__ID, &len); + ut_ad(len == 8); + index.m_id = mach_read_from_8(field); + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len); + ut_ad(len == 4); + index.m_root_page_no = mach_read_from_4(field); + + /* For compressed tables we need to store extra meta-data + required during btr_create(). */ + if (fsp_flags_is_compressed(m_flags)) { + + const dict_index_t* dict_index = find(index.m_id); + + if (dict_index != NULL) { + + dberr_t err = index.set(dict_index); + + if (err != DB_SUCCESS) { + m_truncate.clear(); + return(err); + } + + } else { + ib::warn() << "Index id " << index.m_id + << " not found"; + } + } + + m_truncate.add(index); + + return(DB_SUCCESS); +} + +/** +Drop an index in the table. + +@param mtr mini-transaction covering the read +@param pcur persistent cursor used for reading +@return DB_SUCCESS or error code */ +dberr_t +DropIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const +{ + rec_t* rec = btr_pcur_get_rec(pcur); + + bool freed = dict_drop_index_tree(rec, pcur, mtr); + +#ifdef UNIV_DEBUG + { + ulint len; + const byte* field; + ulint index_type; + + field = rec_get_nth_field_old( + btr_pcur_get_rec(pcur), DICT_FLD__SYS_INDEXES__TYPE, + &len); + ut_ad(len == 4); + + index_type = mach_read_from_4(field); + + if (index_type & DICT_CLUSTERED) { + /* Clustered index */ + DBUG_EXECUTE_IF("ib_trunc_crash_on_drop_of_clust_index", + log_buffer_flush_to_disk(); + os_thread_sleep(2000000); + DBUG_SUICIDE();); + } else if (index_type & DICT_UNIQUE) { + /* Unique index */ + DBUG_EXECUTE_IF("ib_trunc_crash_on_drop_of_uniq_index", + log_buffer_flush_to_disk(); + os_thread_sleep(2000000); + DBUG_SUICIDE();); + } else if (index_type == 0) { + /* Secondary index */ + DBUG_EXECUTE_IF("ib_trunc_crash_on_drop_of_sec_index", + log_buffer_flush_to_disk(); + os_thread_sleep(2000000); + DBUG_SUICIDE();); + } + } +#endif /* UNIV_DEBUG */ + + DBUG_EXECUTE_IF("ib_err_trunc_drop_index", + freed = false;); + + if (freed) { + + /* We will need to commit and restart the + mini-transaction in order to avoid deadlocks. + The dict_drop_index_tree() call has freed + a page in this mini-transaction, and the rest + of this loop could latch another index page.*/ + const mtr_log_t log_mode = mtr->get_log_mode(); + mtr_commit(mtr); + + mtr_start(mtr); + mtr->set_log_mode(log_mode); + + btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr); + } else { + /* Check if the .ibd file is missing. */ + bool found; + + fil_space_get_page_size(m_table->space, &found); + + DBUG_EXECUTE_IF("ib_err_trunc_drop_index", + found = false;); + + if (!found) { + return(DB_ERROR); + } + } + + return(DB_SUCCESS); +} + +/** +Create the new index and update the root page number in the +SysIndex table. + +@param mtr mini-transaction covering the read +@param pcur persistent cursor used for reading +@return DB_SUCCESS or error code */ +dberr_t +CreateIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const +{ + ulint root_page_no; + + root_page_no = dict_recreate_index_tree(m_table, pcur, mtr); + +#ifdef UNIV_DEBUG + { + ulint len; + const byte* field; + ulint index_type; + + field = rec_get_nth_field_old( + btr_pcur_get_rec(pcur), DICT_FLD__SYS_INDEXES__TYPE, + &len); + ut_ad(len == 4); + + index_type = mach_read_from_4(field); + + if (index_type & DICT_CLUSTERED) { + /* Clustered index */ + DBUG_EXECUTE_IF( + "ib_trunc_crash_on_create_of_clust_index", + log_buffer_flush_to_disk(); + os_thread_sleep(2000000); + DBUG_SUICIDE();); + } else if (index_type & DICT_UNIQUE) { + /* Unique index */ + DBUG_EXECUTE_IF( + "ib_trunc_crash_on_create_of_uniq_index", + log_buffer_flush_to_disk(); + os_thread_sleep(2000000); + DBUG_SUICIDE();); + } else if (index_type == 0) { + /* Secondary index */ + DBUG_EXECUTE_IF( + "ib_trunc_crash_on_create_of_sec_index", + log_buffer_flush_to_disk(); + os_thread_sleep(2000000); + DBUG_SUICIDE();); + } + } +#endif /* UNIV_DEBUG */ + + DBUG_EXECUTE_IF("ib_err_trunc_create_index", + root_page_no = FIL_NULL;); + + if (root_page_no != FIL_NULL) { + + rec_t* rec = btr_pcur_get_rec(pcur); + + page_rec_write_field( + rec, DICT_FLD__SYS_INDEXES__PAGE_NO, + root_page_no, mtr); + + /* We will need to commit and restart the + mini-transaction in order to avoid deadlocks. + The dict_create_index_tree() call has allocated + a page in this mini-transaction, and the rest of + this loop could latch another index page. */ + mtr_commit(mtr); + + mtr_start(mtr); + + btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr); + + } else { + bool found; + fil_space_get_page_size(m_table->space, &found); + + DBUG_EXECUTE_IF("ib_err_trunc_create_index", + found = false;); + + if (!found) { + return(DB_ERROR); + } + } + + return(DB_SUCCESS); +} + +/** +Look for table-id in SYS_XXXX tables without loading the table. + +@param mtr mini-transaction covering the read +@param pcur persistent cursor used for reading +@return DB_SUCCESS */ +dberr_t +TableLocator::operator()(mtr_t* mtr, btr_pcur_t* pcur) +{ + m_table_found = true; + + return(DB_SUCCESS); +} + +/** +Rollback the transaction and release the index locks. +Drop indexes if table is corrupted so that drop/create +sequence works as expected. + +@param table table to truncate +@param trx transaction covering the TRUNCATE +@param new_id new table id that was suppose to get assigned + to the table if truncate executed successfully. +@param has_internal_doc_id indicate existence of fts index +@param no_redo if true, turn-off redo logging +@param corrupted table corrupted status +@param unlock_index if true then unlock indexes before action */ +static +void +row_truncate_rollback( + dict_table_t* table, + trx_t* trx, + table_id_t new_id, + bool has_internal_doc_id, + bool no_redo, + bool corrupted, + bool unlock_index) +{ + if (unlock_index) { + dict_table_x_unlock_indexes(table); + } + + trx->error_state = DB_SUCCESS; + + trx_rollback_to_savepoint(trx, NULL); + + trx->error_state = DB_SUCCESS; + + if (corrupted && !dict_table_is_temporary(table)) { + + /* Cleanup action to ensure we don't left over stale entries + if we are marking table as corrupted. This will ensure + it can be recovered using drop/create sequence. */ + dict_table_x_lock_indexes(table); + + DropIndex dropIndex(table, no_redo); + + SysIndexIterator().for_each(dropIndex); + + dict_table_x_unlock_indexes(table); + + for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes); + index != NULL; + index = UT_LIST_GET_NEXT(indexes, index)) { + + dict_set_corrupted(index, trx, "TRUNCATE TABLE"); + } + + if (has_internal_doc_id) { + + ut_ad(!trx_is_started(trx)); + + table_id_t id = table->id; + + table->id = new_id; + + fts_drop_tables(trx, table); + + table->id = id; + + ut_ad(trx_is_started(trx)); + + trx_commit_for_mysql(trx); + } + + } else if (corrupted && dict_table_is_temporary(table)) { + + dict_table_x_lock_indexes(table); + + for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes); + index != NULL; + index = UT_LIST_GET_NEXT(indexes, index)) { + + dict_drop_index_tree_in_mem(index, index->page); + + index->page = FIL_NULL; + } + + dict_table_x_unlock_indexes(table); + } + + table->corrupted = corrupted; +} + +/** +Finish the TRUNCATE operations for both commit and rollback. + +@param table table being truncated +@param trx transaction covering the truncate +@param fsp_flags tablespace flags +@param logger table to truncate information logger +@param err status of truncate operation + +@return DB_SUCCESS or error code */ +static __attribute__((warn_unused_result)) +dberr_t +row_truncate_complete( + dict_table_t* table, + trx_t* trx, + ulint fsp_flags, + TruncateLogger* &logger, + dberr_t err) +{ + bool is_file_per_table = dict_table_is_file_per_table(table); + + if (table->memcached_sync_count == DICT_TABLE_IN_DDL) { + /* We need to set the memcached sync back to 0, unblock + memcached operations. */ + table->memcached_sync_count = 0; + } + + row_mysql_unlock_data_dictionary(trx); + + DEBUG_SYNC_C("ib_trunc_table_trunc_completing"); + + if (!dict_table_is_temporary(table)) { + + DBUG_EXECUTE_IF("ib_trunc_crash_before_log_removal", + log_buffer_flush_to_disk(); + os_thread_sleep(500000); + DBUG_SUICIDE();); + + /* Note: We don't log-checkpoint instead we have written + a special REDO log record MLOG_TRUNCATE that is used to + avoid applying REDO records before truncate for crash + that happens post successful truncate completion. */ + + if (logger != NULL) { + logger->done(); + UT_DELETE(logger); + logger = NULL; + } + } + + /* If non-temp file-per-table tablespace... */ + if (is_file_per_table + && !dict_table_is_temporary(table) + && fsp_flags != ULINT_UNDEFINED) { + + /* This function will reset back the stop_new_ops + and is_being_truncated so that fil-ops can re-start. */ + dberr_t err2 = truncate_t::truncate( + table->space, + table->data_dir_path, + table->name.m_name, fsp_flags, false); + + if (err2 != DB_SUCCESS) { + return(err2); + } + } + + if (err == DB_SUCCESS) { + dict_stats_update(table, DICT_STATS_EMPTY_TABLE); + } + + trx->op_info = ""; + + /* For temporary tables or if there was an error, we need to reset + the dict operation flags. */ + trx->ddl = false; + trx->dict_operation = TRX_DICT_OP_NONE; + + ut_ad(!trx_is_started(trx)); + + srv_wake_master_thread(); + + DBUG_EXECUTE_IF("ib_trunc_crash_after_truncate_done", + DBUG_SUICIDE();); + + return(err); +} + +/** +Handle FTS truncate issues. +@param table table being truncated +@param new_id new id for the table +@param trx transaction covering the truncate +@return DB_SUCCESS or error code. */ +static __attribute__((warn_unused_result)) +dberr_t +row_truncate_fts( + dict_table_t* table, + table_id_t new_id, + trx_t* trx) +{ + dict_table_t fts_table; + + fts_table.id = new_id; + fts_table.name = table->name; + fts_table.flags2 = table->flags2; + fts_table.flags = table->flags; + fts_table.tablespace = table->tablespace; + fts_table.space = table->space; + + /* table->data_dir_path is used for FTS AUX table + creation. */ + if (DICT_TF_HAS_DATA_DIR(table->flags) + && table->data_dir_path == NULL) { + dict_get_and_save_data_dir_path(table, true); + ut_ad(table->data_dir_path != NULL); + } + + /* table->tablespace() may not be always populated or + if table->tablespace() uses "innodb_general" name, + fetch the real name. */ + if (DICT_TF_HAS_SHARED_SPACE(table->flags) + && (table->tablespace() == NULL + || dict_table_has_temp_general_tablespace_name( + table->tablespace()))) { + dict_get_and_save_space_name(table, true); + ut_ad(table->tablespace() != NULL); + ut_ad(!dict_table_has_temp_general_tablespace_name( + table->tablespace())); + } + + fts_table.tablespace = table->tablespace(); + fts_table.data_dir_path = table->data_dir_path; + + dberr_t err; + + err = fts_create_common_tables( + trx, &fts_table, table->name.m_name, TRUE); + + for (ulint i = 0; + i < ib_vector_size(table->fts->indexes) && err == DB_SUCCESS; + i++) { + + dict_index_t* fts_index; + + fts_index = static_cast<dict_index_t*>( + ib_vector_getp(table->fts->indexes, i)); + + err = fts_create_index_tables_low( + trx, fts_index, table->name.m_name, new_id); + } + + DBUG_EXECUTE_IF("ib_err_trunc_during_fts_trunc", + err = DB_ERROR;); + + if (err != DB_SUCCESS) { + + trx->error_state = DB_SUCCESS; + trx_rollback_to_savepoint(trx, NULL); + trx->error_state = DB_SUCCESS; + + ib::error() << "Unable to truncate FTS index for table " + << table->name; + } else { + + ut_ad(trx_is_started(trx)); + } + + return(err); +} + +/** +Update system table to reflect new table id. +@param old_table_id old table id +@param new_table_id new table id +@param reserve_dict_mutex if TRUE, acquire/release + dict_sys->mutex around call to pars_sql. +@param trx transaction +@return error code or DB_SUCCESS */ +static __attribute__((warn_unused_result)) +dberr_t +row_truncate_update_table_id( + table_id_t old_table_id, + table_id_t new_table_id, + ibool reserve_dict_mutex, + trx_t* trx) +{ + pars_info_t* info = NULL; + dberr_t err = DB_SUCCESS; + + /* Scan the SYS_XXXX table and update to reflect new table-id. */ + info = pars_info_create(); + pars_info_add_ull_literal(info, "old_id", old_table_id); + pars_info_add_ull_literal(info, "new_id", new_table_id); + + err = que_eval_sql( + info, + "PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n" + "BEGIN\n" + "UPDATE SYS_TABLES" + " SET ID = :new_id\n" + " WHERE ID = :old_id;\n" + "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n" + " WHERE TABLE_ID = :old_id;\n" + "UPDATE SYS_INDEXES" + " SET TABLE_ID = :new_id\n" + " WHERE TABLE_ID = :old_id;\n" + "UPDATE SYS_VIRTUAL" + " SET TABLE_ID = :new_id\n" + " WHERE TABLE_ID = :old_id;\n" + "END;\n", reserve_dict_mutex, trx); + + return(err); +} + +/** +Get the table id to truncate. +@param truncate_t old/new table id of table to truncate +@return table_id_t table_id to use in SYS_XXXX table update. */ +static __attribute__((warn_unused_result)) +table_id_t +row_truncate_get_trunc_table_id( + const truncate_t& truncate) +{ + TableLocator tableLocator(truncate.old_table_id()); + + SysIndexIterator().for_each(tableLocator); + + return(tableLocator.is_table_found() ? + truncate.old_table_id(): truncate.new_table_id()); +} + +/** +Update system table to reflect new table id and root page number. +@param truncate_t old/new table id of table to truncate + and updated root_page_no of indexes. +@param new_table_id new table id +@param reserve_dict_mutex if TRUE, acquire/release + dict_sys->mutex around call to pars_sql. +@param mark_index_corrupted if true, then mark index corrupted. +@return error code or DB_SUCCESS */ +static __attribute__((warn_unused_result)) +dberr_t +row_truncate_update_sys_tables_during_fix_up( + const truncate_t& truncate, + table_id_t new_table_id, + ibool reserve_dict_mutex, + bool mark_index_corrupted) +{ + trx_t* trx = trx_allocate_for_background(); + + trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); + + table_id_t table_id = row_truncate_get_trunc_table_id(truncate); + + /* Step-1: Update the root-page-no */ + + dberr_t err; + + err = truncate.update_root_page_no( + trx, table_id, reserve_dict_mutex, mark_index_corrupted); + + if (err != DB_SUCCESS) { + return(err); + } + + /* Step-2: Update table-id. */ + + err = row_truncate_update_table_id( + table_id, new_table_id, reserve_dict_mutex, trx); + + if (err == DB_SUCCESS) { + trx_commit_for_mysql(trx); + trx_free_for_background(trx); + } + + return(err); +} + +/** +Truncate also results in assignment of new table id, update the system +SYSTEM TABLES with the new id. +@param table, table being truncated +@param new_id, new table id +@param has_internal_doc_id, has doc col (fts) +@param no_redo if true, turn-off redo logging +@param trx transaction handle +@return error code or DB_SUCCESS */ +static __attribute__((warn_unused_result)) +dberr_t +row_truncate_update_system_tables( + dict_table_t* table, + table_id_t new_id, + bool has_internal_doc_id, + bool no_redo, + trx_t* trx) +{ + dberr_t err = DB_SUCCESS; + + ut_a(!dict_table_is_temporary(table)); + + err = row_truncate_update_table_id(table->id, new_id, FALSE, trx); + + DBUG_EXECUTE_IF("ib_err_trunc_during_sys_table_update", + err = DB_ERROR;); + + if (err != DB_SUCCESS) { + + row_truncate_rollback( + table, trx, new_id, has_internal_doc_id, + no_redo, true, false); + + ib::error() << "Unable to assign a new identifier to table " + << table->name << " after truncating it. Marked the" + " table as corrupted. In-memory representation is now" + " different from the on-disk representation."; + err = DB_ERROR; + } else { + /* Drop the old FTS index */ + if (has_internal_doc_id) { + + ut_ad(trx_is_started(trx)); + + fts_drop_tables(trx, table); + + DBUG_EXECUTE_IF("ib_truncate_crash_while_fts_cleanup", + DBUG_SUICIDE();); + + ut_ad(trx_is_started(trx)); + } + + DBUG_EXECUTE_IF("ib_trunc_crash_after_fts_drop", + log_buffer_flush_to_disk(); + os_thread_sleep(2000000); + DBUG_SUICIDE();); + + dict_table_change_id_in_cache(table, new_id); + + /* Reset the Doc ID in cache to 0 */ + if (has_internal_doc_id && table->fts->cache != NULL) { + table->fts->fts_status |= TABLE_DICT_LOCKED; + fts_update_next_doc_id(trx, table, NULL, 0); + fts_cache_clear(table->fts->cache); + fts_cache_init(table->fts->cache); + table->fts->fts_status &= ~TABLE_DICT_LOCKED; + } + } + + return(err); +} + +/** +Prepare for the truncate process. On success all of the table's indexes will +be locked in X mode. +@param table table to truncate +@param flags tablespace flags +@return error code or DB_SUCCESS */ +static __attribute__((warn_unused_result)) +dberr_t +row_truncate_prepare(dict_table_t* table, ulint* flags) +{ + ut_ad(!dict_table_is_temporary(table)); + ut_ad(dict_table_is_file_per_table(table)); + + *flags = fil_space_get_flags(table->space); + + ut_ad(!dict_table_is_temporary(table)); + + dict_get_and_save_data_dir_path(table, true); + + dict_get_and_save_space_name(table, true); + + if (*flags != ULINT_UNDEFINED) { + + dberr_t err = fil_prepare_for_truncate(table->space); + + if (err != DB_SUCCESS) { + return(err); + } + } + + return(DB_SUCCESS); +} + +/** +Do foreign key checks before starting TRUNCATE. +@param table table being truncated +@param trx transaction covering the truncate +@return DB_SUCCESS or error code */ +static __attribute__((warn_unused_result)) +dberr_t +row_truncate_foreign_key_checks( + const dict_table_t* table, + const trx_t* trx) +{ + /* Check if the table is referenced by foreign key constraints from + some other table (not the table itself) */ + + dict_foreign_set::iterator it + = std::find_if(table->referenced_set.begin(), + table->referenced_set.end(), + dict_foreign_different_tables()); + + if (!srv_read_only_mode + && it != table->referenced_set.end() + && trx->check_foreigns) { + + dict_foreign_t* foreign = *it; + + FILE* ef = dict_foreign_err_file; + + /* We only allow truncating a referenced table if + FOREIGN_KEY_CHECKS is set to 0 */ + + mutex_enter(&dict_foreign_err_mutex); + + rewind(ef); + + ut_print_timestamp(ef); + + fputs(" Cannot truncate table ", ef); + ut_print_name(ef, trx, table->name.m_name); + fputs(" by DROP+CREATE\n" + "InnoDB: because it is referenced by ", ef); + ut_print_name(ef, trx, foreign->foreign_table_name); + putc('\n', ef); + + mutex_exit(&dict_foreign_err_mutex); + + return(DB_ERROR); + } + + /* TODO: could we replace the counter n_foreign_key_checks_running + with lock checks on the table? Acquire here an exclusive lock on the + table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that + they can cope with the table having been truncated here? Foreign key + checks take an IS or IX lock on the table. */ + + if (table->n_foreign_key_checks_running > 0) { + ib::warn() << "Cannot truncate table " << table->name + << " because there is a foreign key check running on" + " it."; + + return(DB_ERROR); + } + + return(DB_SUCCESS); +} + +/** +Do some sanity checks before starting the actual TRUNCATE. +@param table table being truncated +@return DB_SUCCESS or error code */ +static __attribute__((warn_unused_result)) +dberr_t +row_truncate_sanity_checks( + const dict_table_t* table) +{ + if (dict_table_is_discarded(table)) { + + return(DB_TABLESPACE_DELETED); + + } else if (table->ibd_file_missing) { + + return(DB_TABLESPACE_NOT_FOUND); + + } else if (dict_table_is_corrupted(table)) { + + return(DB_TABLE_CORRUPT); + } + + return(DB_SUCCESS); +} + +/** +Truncates a table for MySQL. +@param table table being truncated +@param trx transaction covering the truncate +@return error code or DB_SUCCESS */ +dberr_t +row_truncate_table_for_mysql( + dict_table_t* table, + trx_t* trx) +{ + bool is_file_per_table = dict_table_is_file_per_table(table); + dberr_t err; +#ifdef UNIV_DEBUG + ulint old_space = table->space; +#endif /* UNIV_DEBUG */ + TruncateLogger* logger = NULL; + + /* Understanding the truncate flow. + + Step-1: Perform intiial sanity check to ensure table can be truncated. + This would include check for tablespace discard status, ibd file + missing, etc .... + + Step-2: Start transaction (only for non-temp table as temp-table don't + modify any data on disk doesn't need transaction object). + + Step-3: Validate ownership of needed locks (Exclusive lock). + Ownership will also ensure there is no active SQL queries, INSERT, + SELECT, ..... + + Step-4: Stop all the background process associated with table. + + Step-5: There are few foreign key related constraint under which + we can't truncate table (due to referential integrity unless it is + turned off). Ensure this condition is satisfied. + + Step-6: Truncate operation can be rolled back in case of error + till some point. Associate rollback segment to record undo log. + + Step-7: Generate new table-id. + Why we need new table-id ? + Purge and rollback case: we assign a new table id for the table. + Since purge and rollback look for the table based on the table id, + they see the table as 'dropped' and discard their operations. + + Step-8: Log information about tablespace which includes + table and index information. If there is a crash in the next step + then during recovery we will attempt to fixup the operation. + + Step-9: Drop all indexes (this include freeing of the pages + associated with them). + + Step-10: Re-create new indexes. + + Step-11: Update new table-id to in-memory cache (dictionary), + on-disk (INNODB_SYS_TABLES). INNODB_SYS_INDEXES also needs to + be updated to reflect updated root-page-no of new index created + and updated table-id. + + Step-12: Cleanup Stage. Reset auto-inc value to 1. + Release all the locks. + Commit the transaction. Update trx operation state. + + Notes: + - On error, log checkpoint is done followed writing of magic number to + truncate log file. If servers crashes after truncate, fix-up action + will not be applied. + + - log checkpoint is done before starting truncate table to ensure + that previous REDO log entries are not applied if current truncate + crashes. Consider following use-case: + - create table .... insert/load table .... truncate table (crash) + - on restart table is restored .... truncate table (crash) + - on restart (assuming default log checkpoint is not done) will have + 2 REDO log entries for same table. (Note 2 REDO log entries + for different table is not an issue). + For system-tablespace we can't truncate the tablespace so we need + to initiate a local cleanup that involves dropping of indexes and + re-creating them. If we apply stale entry we might end-up issuing + drop on wrong indexes. + + - Insert buffer: TRUNCATE TABLE is analogous to DROP TABLE, + so we do not have to remove insert buffer records, as the + insert buffer works at a low level. If a freed page is later + reallocated, the allocator will remove the ibuf entries for + it. When we prepare to truncate *.ibd files, we remove all entries + for the table in the insert buffer tree. This is not strictly + necessary, but we can free up some space in the system tablespace. + + - Linear readahead and random readahead: we use the same + method as in 3) to discard ongoing operations. (This is only + relevant for TRUNCATE TABLE by TRUNCATE TABLESPACE.) + Ensure that the table will be dropped by trx_rollback_active() in + case of a crash. + */ + + /*-----------------------------------------------------------------*/ + /* Step-1: Perform intiial sanity check to ensure table can be + truncated. This would include check for tablespace discard status, + ibd file missing, etc .... */ + err = row_truncate_sanity_checks(table); + if (err != DB_SUCCESS) { + return(err); + + } + + /* Step-2: Start transaction (only for non-temp table as temp-table + don't modify any data on disk doesn't need transaction object). */ + if (!dict_table_is_temporary(table)) { + /* Avoid transaction overhead for temporary table DDL. */ + trx_start_for_ddl(trx, TRX_DICT_OP_TABLE); + } + + /* Step-3: Validate ownership of needed locks (Exclusive lock). + Ownership will also ensure there is no active SQL queries, INSERT, + SELECT, .....*/ + trx->op_info = "truncating table"; + ut_a(trx->dict_operation_lock_mode == 0); + row_mysql_lock_data_dictionary(trx); + ut_ad(mutex_own(&dict_sys->mutex)); + ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X)); + + /* Step-4: Stop all the background process associated with table. */ + dict_stats_wait_bg_to_stop_using_table(table, trx); + + /* Step-5: There are few foreign key related constraint under which + we can't truncate table (due to referential integrity unless it is + turned off). Ensure this condition is satisfied. */ + ulint fsp_flags = ULINT_UNDEFINED; + err = row_truncate_foreign_key_checks(table, trx); + if (err != DB_SUCCESS) { + trx_rollback_to_savepoint(trx, NULL); + return(row_truncate_complete( + table, trx, fsp_flags, logger, err)); + } + + /* Check if memcached DML is running on this table. if is, we don't + allow truncate this table. */ + if (table->memcached_sync_count != 0) { + ib::error() << "Cannot truncate table " + << table->name + << " by DROP+CREATE because there are memcached" + " operations running on it."; + err = DB_ERROR; + trx_rollback_to_savepoint(trx, NULL); + return(row_truncate_complete( + table, trx, fsp_flags, logger, err)); + } else { + /* We need to set this counter to -1 for blocking + memcached operations. */ + table->memcached_sync_count = DICT_TABLE_IN_DDL; + } + + /* Remove all locks except the table-level X lock. */ + lock_remove_all_on_table(table, FALSE); + trx->table_id = table->id; + trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); + + /* Step-6: Truncate operation can be rolled back in case of error + till some point. Associate rollback segment to record undo log. */ + if (!dict_table_is_temporary(table)) { + + /* Temporary tables don't need undo logging for autocommit stmt. + On crash (i.e. mysql restart) temporary tables are anyway not + accessible. */ + mutex_enter(&trx->undo_mutex); + + err = trx_undo_assign_undo( + trx, &trx->rsegs.m_redo, TRX_UNDO_UPDATE); + + mutex_exit(&trx->undo_mutex); + + DBUG_EXECUTE_IF("ib_err_trunc_assigning_undo_log", + err = DB_ERROR;); + if (err != DB_SUCCESS) { + trx_rollback_to_savepoint(trx, NULL); + return(row_truncate_complete( + table, trx, fsp_flags, logger, err)); + } + } + + /* Step-7: Generate new table-id. + Why we need new table-id ? + Purge and rollback: we assign a new table id for the + table. Since purge and rollback look for the table based on + the table id, they see the table as 'dropped' and discard + their operations. */ + table_id_t new_id; + dict_hdr_get_new_id(&new_id, NULL, NULL, table, false); + + /* Check if table involves FTS index. */ + bool has_internal_doc_id = + dict_table_has_fts_index(table) + || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID); + + bool no_redo = is_file_per_table && !has_internal_doc_id; + + /* Step-8: Log information about tablespace which includes + table and index information. If there is a crash in the next step + then during recovery we will attempt to fixup the operation. */ + + /* Lock all index trees for this table, as we will truncate + the table/index and possibly change their metadata. All + DML/DDL are blocked by table level X lock, with a few exceptions + such as queries into information schema about the table, + MySQL could try to access index stats for this kind of query, + we need to use index locks to sync up */ + dict_table_x_lock_indexes(table); + + if (!dict_table_is_temporary(table) && !has_internal_doc_id) { + + if (is_file_per_table) { + + err = row_truncate_prepare(table, &fsp_flags); + + DBUG_EXECUTE_IF("ib_err_trunc_preparing_for_truncate", + err = DB_ERROR;); + + if (err != DB_SUCCESS) { + row_truncate_rollback( + table, trx, new_id, + has_internal_doc_id, + no_redo, false, true); + return(row_truncate_complete( + table, trx, fsp_flags, logger, err)); + } + } else { + fsp_flags = fil_space_get_flags(table->space); + + DBUG_EXECUTE_IF("ib_err_trunc_preparing_for_truncate", + fsp_flags = ULINT_UNDEFINED;); + + if (fsp_flags == ULINT_UNDEFINED) { + row_truncate_rollback( + table, trx, new_id, + has_internal_doc_id, + no_redo, false, true); + return(row_truncate_complete( + table, trx, fsp_flags, + logger, DB_ERROR)); + } + } + + logger = UT_NEW_NOKEY(TruncateLogger( + table, fsp_flags, new_id)); + + err = logger->init(); + if (err != DB_SUCCESS) { + row_truncate_rollback( + table, trx, new_id, has_internal_doc_id, + no_redo, false, true); + return(row_truncate_complete( + table, trx, fsp_flags, logger, DB_ERROR)); + + } + + err = SysIndexIterator().for_each(*logger); + if (err != DB_SUCCESS) { + row_truncate_rollback( + table, trx, new_id, has_internal_doc_id, + no_redo, false, true); + return(row_truncate_complete( + table, trx, fsp_flags, logger, DB_ERROR)); + + } + + ut_ad(logger->debug()); + + err = logger->log(); + + if (err != DB_SUCCESS) { + row_truncate_rollback( + table, trx, new_id, has_internal_doc_id, + no_redo, false, true); + return(row_truncate_complete( + table, trx, fsp_flags, logger, DB_ERROR)); + } + } + + DBUG_EXECUTE_IF("ib_trunc_crash_after_redo_log_write_complete", + log_buffer_flush_to_disk(); + os_thread_sleep(3000000); + DBUG_SUICIDE();); + + /* Step-9: Drop all indexes (free index pages associated with these + indexes) */ + if (!dict_table_is_temporary(table)) { + + DropIndex dropIndex(table, no_redo); + + err = SysIndexIterator().for_each(dropIndex); + + if (err != DB_SUCCESS) { + + row_truncate_rollback( + table, trx, new_id, has_internal_doc_id, + no_redo, true, true); + + return(row_truncate_complete( + table, trx, fsp_flags, logger, err)); + } + + } else { + /* For temporary tables we don't have entries in SYSTEM TABLES*/ + for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes); + index != NULL; + index = UT_LIST_GET_NEXT(indexes, index)) { + + err = dict_truncate_index_tree_in_mem(index); + + if (err != DB_SUCCESS) { + row_truncate_rollback( + table, trx, new_id, has_internal_doc_id, + no_redo, true, true); + return(row_truncate_complete( + table, trx, fsp_flags, logger, err)); + } + + DBUG_EXECUTE_IF( + "ib_trunc_crash_during_drop_index_temp_table", + log_buffer_flush_to_disk(); + os_thread_sleep(2000000); + DBUG_SUICIDE();); + } + } + + if (is_file_per_table + && !dict_table_is_temporary(table) + && fsp_flags != ULINT_UNDEFINED) { + + fil_reinit_space_header( + table->space, + table->indexes.count + FIL_IBD_FILE_INITIAL_SIZE + 1); + } + + DBUG_EXECUTE_IF("ib_trunc_crash_with_intermediate_log_checkpoint", + log_buffer_flush_to_disk(); + os_thread_sleep(2000000); + log_checkpoint(TRUE, TRUE); + os_thread_sleep(1000000); + DBUG_SUICIDE();); + + DBUG_EXECUTE_IF("ib_trunc_crash_drop_reinit_done_create_to_start", + log_buffer_flush_to_disk(); + os_thread_sleep(2000000); + DBUG_SUICIDE();); + + /* Step-10: Re-create new indexes. */ + if (!dict_table_is_temporary(table)) { + + CreateIndex createIndex(table, no_redo); + + err = SysIndexIterator().for_each(createIndex); + + if (err != DB_SUCCESS) { + + row_truncate_rollback( + table, trx, new_id, has_internal_doc_id, + no_redo, true, true); + + return(row_truncate_complete( + table, trx, fsp_flags, logger, err)); + } + } + + /* Done with index truncation, release index tree locks, + subsequent work relates to table level metadata change */ + dict_table_x_unlock_indexes(table); + + if (has_internal_doc_id) { + + err = row_truncate_fts(table, new_id, trx); + + if (err != DB_SUCCESS) { + + row_truncate_rollback( + table, trx, new_id, has_internal_doc_id, + no_redo, true, false); + + return(row_truncate_complete( + table, trx, fsp_flags, logger, err)); + } + } + + /* Step-11: Update new table-id to in-memory cache (dictionary), + on-disk (INNODB_SYS_TABLES). INNODB_SYS_INDEXES also needs to + be updated to reflect updated root-page-no of new index created + and updated table-id. */ + if (dict_table_is_temporary(table)) { + + dict_table_change_id_in_cache(table, new_id); + err = DB_SUCCESS; + + } else { + + /* If this fails then we are in an inconsistent state and + the results are undefined. */ + ut_ad(old_space == table->space); + + err = row_truncate_update_system_tables( + table, new_id, has_internal_doc_id, no_redo, trx); + + if (err != DB_SUCCESS) { + return(row_truncate_complete( + table, trx, fsp_flags, logger, err)); + } + } + + DBUG_EXECUTE_IF("ib_trunc_crash_on_updating_dict_sys_info", + log_buffer_flush_to_disk(); + os_thread_sleep(2000000); + DBUG_SUICIDE();); + + /* Step-12: Cleanup Stage. Reset auto-inc value to 1. + Release all the locks. + Commit the transaction. Update trx operation state. */ + dict_table_autoinc_lock(table); + dict_table_autoinc_initialize(table, 1); + dict_table_autoinc_unlock(table); + + if (trx_is_started(trx)) { + + trx_commit_for_mysql(trx); + } + + return(row_truncate_complete(table, trx, fsp_flags, logger, err)); +} + +/** +Fix the table truncate by applying information parsed from TRUNCATE log. +Fix-up includes re-creating table (drop and re-create indexes) +@return error code or DB_SUCCESS */ +dberr_t +truncate_t::fixup_tables_in_system_tablespace() +{ + dberr_t err = DB_SUCCESS; + + /* Using the info cached during REDO log scan phase fix the + table truncate. */ + + for (tables_t::iterator it = s_tables.begin(); + it != s_tables.end();) { + + if ((*it)->m_space_id == TRX_SYS_SPACE) { + /* Step-1: Drop and re-create indexes. */ + ib::info() << "Completing truncate for table with " + "id (" << (*it)->m_old_table_id << ") " + "residing in the system tablespace."; + + err = fil_recreate_table( + (*it)->m_space_id, + (*it)->m_format_flags, + (*it)->m_tablespace_flags, + (*it)->m_tablename, + **it); + + /* Step-2: Update the SYS_XXXX tables to reflect + this new table_id and root_page_no. */ + table_id_t new_id; + + dict_hdr_get_new_id(&new_id, NULL, NULL, NULL, true); + + err = row_truncate_update_sys_tables_during_fix_up( + **it, new_id, TRUE, + (err == DB_SUCCESS) ? false : true); + + if (err != DB_SUCCESS) { + break; + } + + os_file_delete( + innodb_log_file_key, (*it)->m_log_file_name); + UT_DELETE(*it); + it = s_tables.erase(it); + } else { + ++it; + } + } + + /* Also clear the map used to track tablespace truncated. */ + s_truncated_tables.clear(); + + return(err); +} + +/** +Fix the table truncate by applying information parsed from TRUNCATE log. +Fix-up includes re-creating tablespace. +@return error code or DB_SUCCESS */ +dberr_t +truncate_t::fixup_tables_in_non_system_tablespace() +{ + dberr_t err = DB_SUCCESS; + + /* Using the info cached during REDO log scan phase fix the + table truncate. */ + tables_t::iterator end = s_tables.end(); + + for (tables_t::iterator it = s_tables.begin(); it != end; ++it) { + + /* All tables in the system tablespace have already been + done and erased from this list. */ + ut_a((*it)->m_space_id != TRX_SYS_SPACE); + + /* Step-1: Drop tablespace (only for single-tablespace), + drop indexes and re-create indexes. */ + + if (fsp_is_file_per_table((*it)->m_space_id, + (*it)->m_tablespace_flags)) { + /* The table is file_per_table */ + + ib::info() << "Completing truncate for table with " + "id (" << (*it)->m_old_table_id << ") " + "residing in file-per-table tablespace with " + "id (" << (*it)->m_space_id << ")"; + + if (!fil_space_get((*it)->m_space_id)) { + + /* Create the database directory for name, + if it does not exist yet */ + fil_create_directory_for_tablename( + (*it)->m_tablename); + + err = fil_ibd_create( + (*it)->m_space_id, + (*it)->m_tablename, + (*it)->m_dir_path, + (*it)->m_tablespace_flags, + FIL_IBD_FILE_INITIAL_SIZE, + (*it)->m_encryption, + (*it)->m_key_id); + if (err != DB_SUCCESS) { + /* If checkpoint is not yet done + and table is dropped and then we might + still have REDO entries for this table + which are INVALID. Ignore them. */ + ib::warn() << "Failed to create" + " tablespace for " + << (*it)->m_space_id + << " space-id"; + err = DB_ERROR; + break; + } + } + + ut_ad(fil_space_get((*it)->m_space_id)); + + err = fil_recreate_tablespace( + (*it)->m_space_id, + (*it)->m_format_flags, + (*it)->m_tablespace_flags, + (*it)->m_tablename, + **it, log_get_lsn()); + + } else { + /* Table is in a shared tablespace */ + + ib::info() << "Completing truncate for table with " + "id (" << (*it)->m_old_table_id << ") " + "residing in shared tablespace with " + "id (" << (*it)->m_space_id << ")"; + + /* Temp-tables in temp-tablespace are never restored.*/ + ut_ad((*it)->m_space_id != srv_tmp_space.space_id()); + + err = fil_recreate_table( + (*it)->m_space_id, + (*it)->m_format_flags, + (*it)->m_tablespace_flags, + (*it)->m_tablename, + **it); + } + + /* Step-2: Update the SYS_XXXX tables to reflect new + table-id and root_page_no. */ + table_id_t new_id; + + dict_hdr_get_new_id(&new_id, NULL, NULL, NULL, true); + + err = row_truncate_update_sys_tables_during_fix_up( + **it, new_id, TRUE, (err == DB_SUCCESS) ? false : true); + + if (err != DB_SUCCESS) { + break; + } + } + + if (err == DB_SUCCESS && s_tables.size() > 0) { + + log_make_checkpoint_at(LSN_MAX, TRUE); + } + + for (ulint i = 0; i < s_tables.size(); ++i) { + os_file_delete( + innodb_log_file_key, s_tables[i]->m_log_file_name); + UT_DELETE(s_tables[i]); + } + + s_tables.clear(); + + return(err); +} + +/** +Constructor + +@param old_table_id old table id assigned to table before truncate +@param new_table_id new table id that will be assigned to table + after truncate +@param dir_path directory path */ + +truncate_t::truncate_t( + table_id_t old_table_id, + table_id_t new_table_id, + const char* dir_path) + : + m_space_id(), + m_old_table_id(old_table_id), + m_new_table_id(new_table_id), + m_dir_path(), + m_tablename(), + m_tablespace_flags(), + m_format_flags(), + m_indexes(), + m_log_lsn(), + m_log_file_name(), + /* JAN: TODO: Encryption */ + m_encryption(FIL_SPACE_ENCRYPTION_DEFAULT), + m_key_id(FIL_DEFAULT_ENCRYPTION_KEY) +{ + if (dir_path != NULL) { + m_dir_path = mem_strdup(dir_path); + } +} + +/** +Consturctor + +@param log_file_name parse the log file during recovery to populate + information related to table to truncate */ +truncate_t::truncate_t( + const char* log_file_name) + : + m_space_id(), + m_old_table_id(), + m_new_table_id(), + m_dir_path(), + m_tablename(), + m_tablespace_flags(), + m_format_flags(), + m_indexes(), + m_log_lsn(), + m_log_file_name(), + /* JAN: TODO: Encryption */ + m_encryption(FIL_SPACE_ENCRYPTION_DEFAULT), + m_key_id(FIL_DEFAULT_ENCRYPTION_KEY) + +{ + m_log_file_name = mem_strdup(log_file_name); + if (m_log_file_name == NULL) { + ib::fatal() << "Failed creating truncate_t; out of memory"; + } +} + +/** Constructor */ + +truncate_t::index_t::index_t() + : + m_id(), + m_type(), + m_root_page_no(FIL_NULL), + m_new_root_page_no(FIL_NULL), + m_n_fields(), + m_trx_id_pos(ULINT_UNDEFINED), + m_fields() +{ + /* Do nothing */ +} + +/** Destructor */ + +truncate_t::~truncate_t() +{ + if (m_dir_path != NULL) { + ut_free(m_dir_path); + m_dir_path = NULL; + } + + if (m_tablename != NULL) { + ut_free(m_tablename); + m_tablename = NULL; + } + + if (m_log_file_name != NULL) { + ut_free(m_log_file_name); + m_log_file_name = NULL; + } + + m_indexes.clear(); +} + +/** +@return number of indexes parsed from the log record */ + +size_t +truncate_t::indexes() const +{ + return(m_indexes.size()); +} + +/** +Update root page number in SYS_XXXX tables. + +@param trx transaction object +@param table_id table id for which information needs to + be updated. +@param reserve_dict_mutex if TRUE, acquire/release + dict_sys->mutex around call to pars_sql. +@param mark_index_corrupted if true, then mark index corrupted. +@return DB_SUCCESS or error code */ + +dberr_t +truncate_t::update_root_page_no( + trx_t* trx, + table_id_t table_id, + ibool reserve_dict_mutex, + bool mark_index_corrupted) const +{ + indexes_t::const_iterator end = m_indexes.end(); + + dberr_t err = DB_SUCCESS; + + for (indexes_t::const_iterator it = m_indexes.begin(); + it != end; + ++it) { + + pars_info_t* info = pars_info_create(); + + pars_info_add_int4_literal( + info, "page_no", it->m_new_root_page_no); + + pars_info_add_ull_literal(info, "table_id", table_id); + + pars_info_add_ull_literal( + info, "index_id", + (mark_index_corrupted ? -1 : it->m_id)); + + err = que_eval_sql( + info, + "PROCEDURE RENUMBER_IDX_PAGE_NO_PROC () IS\n" + "BEGIN\n" + "UPDATE SYS_INDEXES" + " SET PAGE_NO = :page_no\n" + " WHERE TABLE_ID = :table_id" + " AND ID = :index_id;\n" + "END;\n", reserve_dict_mutex, trx); + + if (err != DB_SUCCESS) { + break; + } + } + + return(err); +} + +/** +Check whether a tablespace was truncated during recovery +@param space_id tablespace id to check +@return true if the tablespace was truncated */ + +bool +truncate_t::is_tablespace_truncated(ulint space_id) +{ + tables_t::iterator end = s_tables.end(); + + for (tables_t::iterator it = s_tables.begin(); it != end; ++it) { + + if ((*it)->m_space_id == space_id) { + + return(true); + } + } + + return(false); +} + +/** Was tablespace truncated (on crash before checkpoint). +If the MLOG_TRUNCATE redo-record is still available then tablespace +was truncated and checkpoint is yet to happen. +@param[in] space_id tablespace id to check. +@return true if tablespace is was truncated. */ +bool +truncate_t::was_tablespace_truncated(ulint space_id) +{ + return(s_truncated_tables.find(space_id) != s_truncated_tables.end()); +} + +/** Get the lsn associated with space. +@param[in] space_id tablespace id to check. +@return associated lsn. */ +lsn_t +truncate_t::get_truncated_tablespace_init_lsn(ulint space_id) +{ + ut_ad(was_tablespace_truncated(space_id)); + + return(s_truncated_tables.find(space_id)->second); +} + +/** +Parses log record during recovery +@param start_ptr buffer containing log body to parse +@param end_ptr buffer end + +@return DB_SUCCESS or error code */ + +dberr_t +truncate_t::parse( + byte* start_ptr, + const byte* end_ptr) +{ + /* Parse lsn, space-id, format-flags and tablespace-flags. */ + if (end_ptr < start_ptr + (8 + 4 + 4 + 4)) { + return(DB_FAIL); + } + + m_log_lsn = mach_read_from_8(start_ptr); + start_ptr += 8; + + m_space_id = mach_read_from_4(start_ptr); + start_ptr += 4; + + m_format_flags = mach_read_from_4(start_ptr); + start_ptr += 4; + + m_tablespace_flags = mach_read_from_4(start_ptr); + start_ptr += 4; + + /* Parse table-name. */ + if (end_ptr < start_ptr + (2)) { + return(DB_FAIL); + } + + ulint n_tablename_len = mach_read_from_2(start_ptr); + start_ptr += 2; + + if (n_tablename_len > 0) { + if (end_ptr < start_ptr + n_tablename_len) { + return(DB_FAIL); + } + m_tablename = mem_strdup(reinterpret_cast<char*>(start_ptr)); + ut_ad(m_tablename[n_tablename_len - 1] == 0); + start_ptr += n_tablename_len; + } + + + /* Parse and read old/new table-id, number of indexes */ + if (end_ptr < start_ptr + (8 + 8 + 2 + 2)) { + return(DB_FAIL); + } + + ut_ad(m_indexes.empty()); + + m_old_table_id = mach_read_from_8(start_ptr); + start_ptr += 8; + + m_new_table_id = mach_read_from_8(start_ptr); + start_ptr += 8; + + ulint n_indexes = mach_read_from_2(start_ptr); + start_ptr += 2; + + /* Parse the remote directory from TRUNCATE log record */ + { + ulint n_tabledirpath_len = mach_read_from_2(start_ptr); + start_ptr += 2; + + if (end_ptr < start_ptr + n_tabledirpath_len) { + return(DB_FAIL); + } + + if (n_tabledirpath_len > 0) { + + m_dir_path = mem_strdup(reinterpret_cast<char*>(start_ptr)); + ut_ad(m_dir_path[n_tabledirpath_len - 1] == 0); + start_ptr += n_tabledirpath_len; + } + } + + /* Parse index ids and types from TRUNCATE log record */ + for (ulint i = 0; i < n_indexes; ++i) { + index_t index; + + if (end_ptr < start_ptr + (8 + 4 + 4 + 4)) { + return(DB_FAIL); + } + + index.m_id = mach_read_from_8(start_ptr); + start_ptr += 8; + + index.m_type = mach_read_from_4(start_ptr); + start_ptr += 4; + + index.m_root_page_no = mach_read_from_4(start_ptr); + start_ptr += 4; + + index.m_trx_id_pos = mach_read_from_4(start_ptr); + start_ptr += 4; + + m_indexes.push_back(index); + } + + ut_ad(!m_indexes.empty()); + + if (fsp_flags_is_compressed(m_tablespace_flags)) { + + /* Parse the number of index fields from TRUNCATE log record */ + for (ulint i = 0; i < m_indexes.size(); ++i) { + + if (end_ptr < start_ptr + (2 + 2)) { + return(DB_FAIL); + } + + m_indexes[i].m_n_fields = mach_read_from_2(start_ptr); + start_ptr += 2; + + ulint len = mach_read_from_2(start_ptr); + start_ptr += 2; + + if (end_ptr < start_ptr + len) { + return(DB_FAIL); + } + + index_t& index = m_indexes[i]; + + /* Should be NUL terminated. */ + ut_ad((start_ptr)[len - 1] == 0); + + index_t::fields_t::iterator end; + + end = index.m_fields.end(); + + index.m_fields.insert( + end, start_ptr, &(start_ptr)[len]); + + start_ptr += len; + } + } + + return(DB_SUCCESS); +} + +/** Parse log record from REDO log file during recovery. +@param[in,out] start_ptr buffer containing log body to parse +@param[in] end_ptr buffer end +@param[in] space_id tablespace identifier +@return parsed upto or NULL. */ +byte* +truncate_t::parse_redo_entry( + byte* start_ptr, + const byte* end_ptr, + ulint space_id) +{ + lsn_t lsn; + + /* Parse space-id, lsn */ + if (end_ptr < (start_ptr + 8)) { + return(NULL); + } + + lsn = mach_read_from_8(start_ptr); + start_ptr += 8; + + /* Tablespace can't exist in both state. + (scheduled-for-truncate, was-truncated). */ + if (!is_tablespace_truncated(space_id)) { + + truncated_tables_t::iterator it = + s_truncated_tables.find(space_id); + + if (it == s_truncated_tables.end()) { + s_truncated_tables.insert( + std::pair<ulint, lsn_t>(space_id, lsn)); + } else { + it->second = lsn; + } + } + + return(start_ptr); +} + +/** +Set the truncate log values for a compressed table. +@param index index from which recreate infoormation needs to be extracted +@return DB_SUCCESS or error code */ + +dberr_t +truncate_t::index_t::set( + const dict_index_t* index) +{ + /* Get trx-id column position (set only for clustered index) */ + if (dict_index_is_clust(index)) { + m_trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID); + ut_ad(m_trx_id_pos > 0); + ut_ad(m_trx_id_pos != ULINT_UNDEFINED); + } else { + m_trx_id_pos = 0; + } + + /* Original logic set this field differently if page is not leaf. + For truncate case this being first page to get created it is + always a leaf page and so we don't need that condition here. */ + m_n_fields = dict_index_get_n_fields(index); + + /* See requirements of page_zip_fields_encode for size. */ + ulint encoded_buf_size = (m_n_fields + 1) * 2; + byte* encoded_buf = UT_NEW_ARRAY_NOKEY(byte, encoded_buf_size); + + if (encoded_buf == NULL) { + return(DB_OUT_OF_MEMORY); + } + + ulint len = page_zip_fields_encode( + m_n_fields, index, m_trx_id_pos, encoded_buf); + ut_a(len <= encoded_buf_size); + + /* Append the encoded fields data. */ + m_fields.insert(m_fields.end(), &encoded_buf[0], &encoded_buf[len]); + + /* NUL terminate the encoded data */ + m_fields.push_back(0); + + UT_DELETE_ARRAY(encoded_buf); + + return(DB_SUCCESS); +} + +/** Create an index for a table. +@param[in] table_name table name, for which to create +the index +@param[in] space_id space id where we have to +create the index +@param[in] page_size page size of the .ibd file +@param[in] index_type type of index to truncate +@param[in] index_id id of index to truncate +@param[in] btr_redo_create_info control info for ::btr_create() +@param[in,out] mtr mini-transaction covering the +create index +@return root page no or FIL_NULL on failure */ +ulint +truncate_t::create_index( + const char* table_name, + ulint space_id, + const page_size_t& page_size, + ulint index_type, + index_id_t index_id, + const btr_create_t& btr_redo_create_info, + mtr_t* mtr) const +{ + ulint root_page_no = btr_create( + index_type, space_id, page_size, index_id, + NULL, &btr_redo_create_info, mtr); + + if (root_page_no == FIL_NULL) { + + ib::info() << "innodb_force_recovery was set to " + << srv_force_recovery << ". Continuing crash recovery" + " even though we failed to create index " << index_id + << " for compressed table '" << table_name << "' with" + " tablespace " << space_id << " during recovery"; + } + + return(root_page_no); +} + +/** Check if index has been modified since TRUNCATE log snapshot +was recorded. +@param space_id space_id where table/indexes resides. +@param root_page_no root page of index that needs to be verified. +@return true if modified else false */ + +bool +truncate_t::is_index_modified_since_logged( + ulint space_id, + ulint root_page_no) const +{ + mtr_t mtr; + bool found; + const page_size_t& page_size = fil_space_get_page_size(space_id, + &found); + dberr_t err = DB_SUCCESS; + + ut_ad(found); + + mtr_start(&mtr); + + /* Root page could be in free state if truncate crashed after drop_index + and page was not allocated for any other object. */ + buf_block_t* block= buf_page_get_gen( + page_id_t(space_id, root_page_no), page_size, RW_X_LATCH, NULL, + BUF_GET_POSSIBLY_FREED, __FILE__, __LINE__, &mtr, &err); + + page_t* root = buf_block_get_frame(block); + +#ifdef UNIV_DEBUG + /* If the root page has been freed as part of truncate drop_index action + and not yet allocated for any object still the pagelsn > snapshot lsn */ + if (block->page.file_page_was_freed) { + ut_ad(mach_read_from_8(root + FIL_PAGE_LSN) > m_log_lsn); + } +#endif /* UNIV_DEBUG */ + + lsn_t page_lsn = mach_read_from_8(root + FIL_PAGE_LSN); + + mtr_commit(&mtr); + + if (page_lsn > m_log_lsn) { + return(true); + } + + return(false); +} + +/** Drop indexes for a table. +@param space_id space_id where table/indexes resides. */ + +void +truncate_t::drop_indexes( + ulint space_id) const +{ + mtr_t mtr; + ulint root_page_no = FIL_NULL; + + indexes_t::const_iterator end = m_indexes.end(); + + for (indexes_t::const_iterator it = m_indexes.begin(); + it != end; + ++it) { + + root_page_no = it->m_root_page_no; + + bool found; + const page_size_t& page_size + = fil_space_get_page_size(space_id, &found); + + ut_ad(found); + + if (is_index_modified_since_logged( + space_id, root_page_no)) { + /* Page has been modified since TRUNCATE log snapshot + was recorded so not safe to drop the index. */ + continue; + } + + mtr_start(&mtr); + + if (space_id != TRX_SYS_SPACE) { + /* Do not log changes for single-table + tablespaces, we are in recovery mode. */ + mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO); + } + + if (root_page_no != FIL_NULL) { + const page_id_t root_page_id(space_id, root_page_no); + + btr_free_if_exists( + root_page_id, page_size, it->m_id, &mtr); + } + + /* If tree is already freed then we might return immediately + in which case we need to release the lock we have acquired + on root_page. */ + mtr_commit(&mtr); + } +} + + +/** Create the indexes for a table +@param[in] table_name table name, for which to create the indexes +@param[in] space_id space id where we have to create the indexes +@param[in] page_size page size of the .ibd file +@param[in] flags tablespace flags +@param[in] format_flags page format flags +@return DB_SUCCESS or error code. */ +dberr_t +truncate_t::create_indexes( + const char* table_name, + ulint space_id, + const page_size_t& page_size, + ulint flags, + ulint format_flags) +{ + mtr_t mtr; + + mtr_start(&mtr); + + if (space_id != TRX_SYS_SPACE) { + /* Do not log changes for single-table tablespaces, we + are in recovery mode. */ + mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO); + } + + /* Create all new index trees with table format, index ids, index + types, number of index fields and index field information taken + out from the TRUNCATE log record. */ + + ulint root_page_no = FIL_NULL; + indexes_t::iterator end = m_indexes.end(); + for (indexes_t::iterator it = m_indexes.begin(); + it != end; + ++it) { + + btr_create_t btr_redo_create_info( + fsp_flags_is_compressed(flags) + ? &it->m_fields[0] : NULL); + + btr_redo_create_info.format_flags = format_flags; + + if (fsp_flags_is_compressed(flags)) { + + btr_redo_create_info.n_fields = it->m_n_fields; + /* Skip the NUL appended field */ + btr_redo_create_info.field_len = + it->m_fields.size() - 1; + btr_redo_create_info.trx_id_pos = it->m_trx_id_pos; + } + + root_page_no = create_index( + table_name, space_id, page_size, it->m_type, it->m_id, + btr_redo_create_info, &mtr); + + if (root_page_no == FIL_NULL) { + break; + } + + it->m_new_root_page_no = root_page_no; + } + + mtr_commit(&mtr); + + return(root_page_no == FIL_NULL ? DB_ERROR : DB_SUCCESS); +} + +/** +Write a TRUNCATE log record for fixing up table if truncate crashes. +@param start_ptr buffer to write log record +@param end_ptr buffer end +@param space_id space id +@param tablename the table name in the usual databasename/tablename + format of InnoDB +@param flags tablespace flags +@param format_flags page format +@param lsn lsn while logging +@return DB_SUCCESS or error code */ + +dberr_t +truncate_t::write( + byte* start_ptr, + byte* end_ptr, + ulint space_id, + const char* tablename, + ulint flags, + ulint format_flags, + lsn_t lsn) const +{ + if (end_ptr < start_ptr) { + return(DB_FAIL); + } + + /* LSN, Type, Space-ID, format-flag (also know as log_flag. + Stored in page_no field), tablespace flags */ + if (end_ptr < (start_ptr + (8 + 4 + 4 + 4))) { + return(DB_FAIL); + } + + mach_write_to_8(start_ptr, lsn); + start_ptr += 8; + + mach_write_to_4(start_ptr, space_id); + start_ptr += 4; + + mach_write_to_4(start_ptr, format_flags); + start_ptr += 4; + + mach_write_to_4(start_ptr, flags); + start_ptr += 4; + + /* Name of the table. */ + /* Include the NUL in the log record. */ + ulint len = strlen(tablename) + 1; + if (end_ptr < (start_ptr + (len + 2))) { + return(DB_FAIL); + } + + mach_write_to_2(start_ptr, len); + start_ptr += 2; + + memcpy(start_ptr, tablename, len - 1); + start_ptr += len; + + DBUG_EXECUTE_IF("ib_trunc_crash_while_writing_redo_log", + DBUG_SUICIDE();); + + /* Old/New Table-ID, Number of Indexes and Tablespace dir-path-name. */ + /* Write the remote directory of the table into mtr log */ + len = m_dir_path != NULL ? strlen(m_dir_path) + 1 : 0; + if (end_ptr < (start_ptr + (len + 8 + 8 + 2 + 2))) { + return(DB_FAIL); + } + + /* Write out old-table-id. */ + mach_write_to_8(start_ptr, m_old_table_id); + start_ptr += 8; + + /* Write out new-table-id. */ + mach_write_to_8(start_ptr, m_new_table_id); + start_ptr += 8; + + /* Write out the number of indexes. */ + mach_write_to_2(start_ptr, m_indexes.size()); + start_ptr += 2; + + /* Write the length (NUL included) of the .ibd path. */ + mach_write_to_2(start_ptr, len); + start_ptr += 2; + + if (m_dir_path != NULL) { + memcpy(start_ptr, m_dir_path, len - 1); + start_ptr += len; + } + + /* Indexes information (id, type) */ + /* Write index ids, type, root-page-no into mtr log */ + for (ulint i = 0; i < m_indexes.size(); ++i) { + + if (end_ptr < (start_ptr + (8 + 4 + 4 + 4))) { + return(DB_FAIL); + } + + mach_write_to_8(start_ptr, m_indexes[i].m_id); + start_ptr += 8; + + mach_write_to_4(start_ptr, m_indexes[i].m_type); + start_ptr += 4; + + mach_write_to_4(start_ptr, m_indexes[i].m_root_page_no); + start_ptr += 4; + + mach_write_to_4(start_ptr, m_indexes[i].m_trx_id_pos); + start_ptr += 4; + } + + /* If tablespace compressed then field info of each index. */ + if (fsp_flags_is_compressed(flags)) { + + for (ulint i = 0; i < m_indexes.size(); ++i) { + + ulint len = m_indexes[i].m_fields.size(); + if (end_ptr < (start_ptr + (len + 2 + 2))) { + return(DB_FAIL); + } + + mach_write_to_2( + start_ptr, m_indexes[i].m_n_fields); + start_ptr += 2; + + mach_write_to_2(start_ptr, len); + start_ptr += 2; + + const byte* ptr = &m_indexes[i].m_fields[0]; + memcpy(start_ptr, ptr, len - 1); + start_ptr += len; + } + } + + return(DB_SUCCESS); +} + |