diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2018-09-07 22:15:06 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2018-09-07 22:15:06 +0300 |
commit | 5a1868b58d26b286b6ad433096e7184895953311 (patch) | |
tree | 9cee54b5852a4e33897d4fdf22c8c85619d12662 /storage | |
parent | 4901f31c13f91e130f077f2f77b32c40b0036e32 (diff) | |
parent | 980d1bf1a921a270423ab36bd5d1ce2a1cd7590b (diff) | |
download | mariadb-git-5a1868b58d26b286b6ad433096e7184895953311.tar.gz |
MDEV-13564 Mariabackup does not work with TRUNCATE
This is a merge from 10.2, but the 10.2 version of this will not
be pushed into 10.2 yet, because the 10.2 version would include
backports of MDEV-14717 and MDEV-14585, which would introduce
a crash recovery regression: Tables could be lost on
table-rebuilding DDL operations, such as ALTER TABLE,
OPTIMIZE TABLE or this new backup-friendly TRUNCATE TABLE.
The test innodb.truncate_crash occasionally loses the table due to
the following bug:
MDEV-17158 log_write_up_to() sometimes fails
Diffstat (limited to 'storage')
38 files changed, 745 insertions, 1924 deletions
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index 8c473b0c658..043a5ffd426 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -560,7 +560,8 @@ buf_dblwr_process() is scheduled for truncation or was truncated and we have parsed an MLOG_TRUNCATE record. */ if (!srv_is_tablespace_truncated(space_id) - && !srv_was_tablespace_truncated(space)) { + && !srv_was_tablespace_truncated(space) + && !srv_is_undo_tablespace(space_id)) { ib::warn() << "A copy of page " << page_id << " in the doublewrite buffer slot " << page_no_dblwr diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index 125c5beb47c..f9b2189c79a 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -557,13 +557,15 @@ the list as they age towards the tail of the LRU. @param[in] id tablespace identifier @param[in] observer flush observer (to check for interrupt), or NULL if the files should not be written to -@return whether all dirty pages were freed */ +@param[in] first first page to be flushed or evicted +@return whether all matching dirty pages were removed */ static MY_ATTRIBUTE((warn_unused_result)) bool buf_flush_or_remove_pages( buf_pool_t* buf_pool, ulint id, - FlushObserver* observer) + FlushObserver* observer, + ulint first) { buf_page_t* prev; buf_page_t* bpage; @@ -604,6 +606,8 @@ rescan: } else if (id != bpage->id.space()) { /* Skip this block, because it is for a different tablespace. */ + } else if (bpage->id.page_no() < first) { + /* Skip this block, because it is below the limit. */ } else if (!buf_flush_or_remove_page( buf_pool, bpage, observer != NULL)) { @@ -667,18 +671,20 @@ the tail of the LRU list. @param[in] id tablespace identifier @param[in] observer flush observer, or NULL if the files should not be written to -*/ +@param[in] first first page to be flushed or evicted */ static void buf_flush_dirty_pages( buf_pool_t* buf_pool, ulint id, - FlushObserver* observer) + FlushObserver* observer, + ulint first) { for (;;) { buf_pool_mutex_enter(buf_pool); - bool freed = buf_flush_or_remove_pages(buf_pool, id, observer); + bool freed = buf_flush_or_remove_pages(buf_pool, id, observer, + first); buf_pool_mutex_exit(buf_pool); @@ -693,20 +699,24 @@ buf_flush_dirty_pages( } ut_ad((observer && observer->is_interrupted()) + || first || buf_pool_get_dirty_pages_count(buf_pool, id, observer) == 0); } /** Empty the flush list for all pages belonging to a tablespace. @param[in] id tablespace identifier @param[in] observer flush observer, - or NULL if nothing is to be written */ -void buf_LRU_flush_or_remove_pages(ulint id, FlushObserver* observer) + or NULL if nothing is to be written +@param[in] first first page to be flushed or evicted */ +void buf_LRU_flush_or_remove_pages(ulint id, FlushObserver* observer, + ulint first) { /* Pages in the system tablespace must never be discarded. */ ut_ad(id || observer); for (ulint i = 0; i < srv_buf_pool_instances; i++) { - buf_flush_dirty_pages(buf_pool_from_array(i), id, observer); + buf_flush_dirty_pages(buf_pool_from_array(i), id, observer, + first); } if (observer && !observer->is_interrupted()) { diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc index 00b2ac378db..14d1d3d4706 100644 --- a/storage/innobase/buf/buf0rea.cc +++ b/storage/innobase/buf/buf0rea.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2017, MariaDB Corporation. +Copyright (c) 2015, 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -176,17 +176,6 @@ buf_read_page_low( dst = ((buf_block_t*) bpage)->frame; } - DBUG_EXECUTE_IF( - "innodb_invalid_read_after_truncate", - if (fil_space_t* space = fil_space_acquire(page_id.space())) { - if (!strcmp(space->name, "test/t1") - && page_id.page_no() == space->size - 1) { - type = 0; - sync = true; - } - space->release(); - }); - IORequest request(type | IORequest::READ); *err = fil_io( @@ -332,19 +321,6 @@ buf_read_ahead_random( that is, reside near the start of the LRU list. */ for (i = low; i < high; i++) { - DBUG_EXECUTE_IF( - "innodb_invalid_read_after_truncate", - if (fil_space_t* space = fil_space_acquire( - page_id.space())) { - bool skip = !strcmp(space->name, "test/t1"); - space->release(); - if (skip) { - high = space->size; - buf_pool_mutex_exit(buf_pool); - goto read_ahead; - } - }); - const buf_page_t* bpage = buf_page_hash_get( buf_pool, page_id_t(page_id.space(), i)); diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc index 801d96fd410..1d7ee29d019 100644 --- a/storage/innobase/dict/dict0crea.cc +++ b/storage/innobase/dict/dict0crea.cc @@ -1510,16 +1510,11 @@ dict_create_or_check_foreign_constraint_tables(void) /* Check which incomplete table definition to drop. */ if (sys_foreign_err == DB_CORRUPTION) { - ib::warn() << "Dropping incompletely created" - " SYS_FOREIGN table."; - row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE, TRUE); + row_drop_table_after_create_fail("SYS_FOREIGN", trx); } if (sys_foreign_cols_err == DB_CORRUPTION) { - ib::warn() << "Dropping incompletely created" - " SYS_FOREIGN_COLS table."; - - row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE, TRUE); + row_drop_table_after_create_fail("SYS_FOREIGN_COLS", trx); } ib::info() << "Creating foreign key constraint system tables."; @@ -1571,8 +1566,8 @@ dict_create_or_check_foreign_constraint_tables(void) ut_ad(err == DB_OUT_OF_FILE_SPACE || err == DB_TOO_MANY_CONCURRENT_TRXS); - row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE, TRUE); - row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE, TRUE); + row_drop_table_after_create_fail("SYS_FOREIGN", trx); + row_drop_table_after_create_fail("SYS_FOREIGN_COLS", trx); if (err == DB_OUT_OF_FILE_SPACE) { err = DB_MUST_GET_MORE_FILE_SPACE; @@ -1640,9 +1635,7 @@ dict_create_or_check_sys_virtual() /* Check which incomplete table definition to drop. */ if (err == DB_CORRUPTION) { - ib::warn() << "Dropping incompletely created" - " SYS_VIRTUAL table."; - row_drop_table_for_mysql("SYS_VIRTUAL", trx, false, TRUE); + row_drop_table_after_create_fail("SYS_VIRTUAL", trx); } ib::info() << "Creating sys_virtual system tables."; @@ -1676,7 +1669,7 @@ dict_create_or_check_sys_virtual() ut_ad(err == DB_OUT_OF_FILE_SPACE || err == DB_TOO_MANY_CONCURRENT_TRXS); - row_drop_table_for_mysql("SYS_VIRTUAL", trx, false, TRUE); + row_drop_table_after_create_fail("SYS_VIRTUAL", trx); if (err == DB_OUT_OF_FILE_SPACE) { err = DB_MUST_GET_MORE_FILE_SPACE; @@ -2296,16 +2289,11 @@ dict_create_or_check_sys_tablespace(void) /* Check which incomplete table definition to drop. */ if (sys_tablespaces_err == DB_CORRUPTION) { - ib::warn() << "Dropping incompletely created" - " SYS_TABLESPACES table."; - row_drop_table_for_mysql("SYS_TABLESPACES", trx, TRUE, TRUE); + row_drop_table_after_create_fail("SYS_TABLESPACES", trx); } if (sys_datafiles_err == DB_CORRUPTION) { - ib::warn() << "Dropping incompletely created" - " SYS_DATAFILES table."; - - row_drop_table_for_mysql("SYS_DATAFILES", trx, TRUE, TRUE); + row_drop_table_after_create_fail("SYS_DATAFILES", trx); } ib::info() << "Creating tablespace and datafile system tables."; @@ -2340,8 +2328,8 @@ dict_create_or_check_sys_tablespace(void) || err == DB_DUPLICATE_KEY || err == DB_TOO_MANY_CONCURRENT_TRXS); - row_drop_table_for_mysql("SYS_TABLESPACES", trx, TRUE, TRUE); - row_drop_table_for_mysql("SYS_DATAFILES", trx, TRUE, TRUE); + row_drop_table_after_create_fail("SYS_TABLESPACES", trx); + row_drop_table_after_create_fail("SYS_DATAFILES", trx); if (err == DB_OUT_OF_FILE_SPACE) { err = DB_MUST_GET_MORE_FILE_SPACE; diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index 76c65fca841..86bad03f7fb 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -942,8 +942,8 @@ fil_crypt_read_crypt_data(fil_space_t* space) /* The encryption metadata has already been read, or the tablespace is not encrypted and the file has been opened already, or the file cannot be accessed, - likely due to a concurrent TRUNCATE or - RENAME or DROP (possibly as part of ALTER TABLE). + likely due to a concurrent DROP + (possibly as part of TRUNCATE or ALTER TABLE). FIXME: The file can become unaccessible any time after this check! We should really remove this function and instead make crypt_data an integral @@ -1627,7 +1627,7 @@ fil_crypt_get_page_throttle_func( ut_ad(space->referenced()); /* Before reading from tablespace we need to make sure that - the tablespace is not about to be dropped or truncated. */ + the tablespace is not about to be dropped. */ if (space->is_stopping()) { return NULL; } diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index ac7a84c90f8..dc8d2cddf5b 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -2131,13 +2131,14 @@ fil_op_write_log( byte* log_ptr; ulint len; - ut_ad(first_page_no == 0); + ut_ad(first_page_no == 0 || type == MLOG_FILE_CREATE2); ut_ad(fsp_flags_is_valid(flags, space_id)); /* fil_name_parse() requires that there be at least one path separator and that the file path end with ".ibd". */ ut_ad(strchr(path, OS_PATH_SEPARATOR) != NULL); - ut_ad(strcmp(&path[strlen(path) - strlen(DOT_IBD)], DOT_IBD) == 0); + ut_ad(first_page_no /* trimming an undo tablespace */ + || !strcmp(&path[strlen(path) - strlen(DOT_IBD)], DOT_IBD)); log_ptr = mlog_open(mtr, 11 + 4 + 2 + 1); @@ -2351,7 +2352,7 @@ fil_op_replay_rename( enum fil_operation_t { FIL_OPERATION_DELETE, /*!< delete a single-table tablespace */ FIL_OPERATION_CLOSE, /*!< close a single-table tablespace */ - FIL_OPERATION_TRUNCATE /*!< truncate a single-table tablespace */ + FIL_OPERATION_TRUNCATE /*!< truncate an undo tablespace */ }; /** Check for pending operations. @@ -2484,8 +2485,6 @@ fil_check_pending_operations( /* Check for pending IO. */ - *path = 0; - for (;;) { sp = fil_space_get_by_id(id); @@ -2498,7 +2497,7 @@ fil_check_pending_operations( count = fil_check_pending_io(operation, sp, &node, count); - if (count == 0) { + if (count == 0 && path) { *path = mem_strdup(node->name); } @@ -2586,7 +2585,8 @@ fil_close_tablespace( not (necessarily) protected by meta-data locks. (Rollback would generally be protected, but rollback of FOREIGN KEY CASCADE/SET NULL is not protected by meta-data locks -but only by InnoDB table locks, which may be broken by TRUNCATE TABLE.) +but only by InnoDB table locks, which may be broken by +lock_remove_all_on_table().) @param[in] table persistent table checked @return whether the table is accessible */ bool @@ -2728,85 +2728,33 @@ fil_delete_tablespace( return(err); } -/** Truncate the tablespace to needed size. -@param[in,out] space tablespace truncate -@param[in] size_in_pages truncate size. -@return true if truncate was successful. */ -bool fil_truncate_tablespace(fil_space_t* space, ulint size_in_pages) +/** Prepare to truncate an undo tablespace. +@param[in] space_id undo tablespace id +@return the tablespace +@retval NULL if tablespace not found */ +fil_space_t* fil_truncate_prepare(ulint space_id) { - /* Step-1: Prepare tablespace for truncate. This involves - stopping all the new operations + IO on that tablespace - and ensuring that related pages are flushed to disk. */ - if (fil_prepare_for_truncate(space->id) != DB_SUCCESS) { - return(false); - } - - /* Step-2: Invalidate buffer pool pages belonging to the tablespace - to re-create. Remove all insert buffer entries for the tablespace */ - buf_LRU_flush_or_remove_pages(space->id, NULL); - - /* Step-3: Truncate the tablespace and accordingly update - the fil_space_t handler that is used to access this tablespace. */ - mutex_enter(&fil_system.mutex); - - /* The following code must change when InnoDB supports - multiple datafiles per tablespace. */ - ut_a(UT_LIST_GET_LEN(space->chain) == 1); - - fil_node_t* node = UT_LIST_GET_FIRST(space->chain); - - ut_ad(node->is_open()); - - space->size = node->size = size_in_pages; - - bool success = os_file_truncate(node->name, node->handle, 0); - if (success) { - - os_offset_t size = os_offset_t(size_in_pages) - << srv_page_size_shift; - - success = os_file_set_size( - node->name, node->handle, size, - FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags)); - - if (success) { - space->stop_new_ops = false; - space->is_being_truncated = false; - } + /* Stop all I/O on the tablespace and ensure that related + pages are flushed to disk. */ + fil_space_t* space; + if (fil_check_pending_operations(space_id, FIL_OPERATION_TRUNCATE, + &space, NULL) != DB_SUCCESS) { + return NULL; } - - mutex_exit(&fil_system.mutex); - - return(success); + ut_ad(space != NULL); + return space; } -/*******************************************************************//** -Prepare for truncating a single-table tablespace. -1) Check pending operations on a tablespace; -2) Remove all insert buffer entries for the tablespace; -@return DB_SUCCESS or error */ -dberr_t -fil_prepare_for_truncate( -/*=====================*/ - ulint id) /*!< in: space id */ +/** Write log about an undo tablespace truncate operation. */ +void fil_truncate_log(fil_space_t* space, ulint size, mtr_t* mtr) { - char* path = 0; - fil_space_t* space = 0; - - ut_a(!is_system_tablespace(id)); - - dberr_t err = fil_check_pending_operations( - id, FIL_OPERATION_TRUNCATE, &space, &path); - - ut_free(path); - - if (err == DB_TABLESPACE_NOT_FOUND) { - ib::error() << "Cannot truncate tablespace " << id - << " because it is not found in the tablespace" - " memory cache."; - } - - return(err); + /* Write a MLOG_FILE_CREATE2 record with the new size, so that + recovery and backup will ignore any preceding redo log records + for writing pages that are after the new end of the tablespace. */ + ut_ad(UT_LIST_GET_LEN(space->chain) == 1); + const fil_node_t* file = UT_LIST_GET_FIRST(space->chain); + fil_op_write_log(MLOG_FILE_CREATE2, space->id, size, file->name, + NULL, space->flags & ~FSP_FLAGS_MEM_MASK, mtr); } /*******************************************************************//** @@ -4392,7 +4340,6 @@ fil_io( if (space->id != TRX_SYS_SPACE && UT_LIST_GET_LEN(space->chain) == 1 && (srv_is_tablespace_truncated(space->id) - || space->is_being_truncated || srv_was_tablespace_truncated(space)) && req_type.is_read()) { @@ -5004,7 +4951,7 @@ fil_space_validate_for_mtr_commit( fil_space_t::release() after mtr_commit(). This is why n_pending_ops should not be zero if stop_new_ops is set. */ ut_ad(!space->stop_new_ops - || space->is_being_truncated /* TRUNCATE sets stop_new_ops */ + || space->is_being_truncated /* fil_truncate_prepare() */ || space->referenced()); } #endif /* UNIV_DEBUG */ @@ -5230,7 +5177,6 @@ truncate_t::truncate( } space->stop_new_ops = false; - space->is_being_truncated = false; /* If we opened the file in this function, close it. */ if (!already_open) { @@ -5405,7 +5351,7 @@ fil_space_keyrotate_next( } /* Skip spaces that are being created by fil_ibd_create(), - or dropped or truncated. Note that rotation_list contains only + or dropped. Note that rotation_list contains only space->purpose == FIL_TYPE_TABLESPACE. */ while (space != NULL && (UT_LIST_GET_LEN(space->chain) == 0 diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index e46cb0d7cf1..26502086d81 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -625,7 +625,6 @@ fsp_space_modify_check( ut_ad(space->purpose == FIL_TYPE_TEMPORARY || space->purpose == FIL_TYPE_IMPORT || my_atomic_loadlint(&space->redo_skipped_count) - || space->is_being_truncated || srv_is_tablespace_truncated(space->id)); return; case MTR_LOG_ALL: diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc index 7c26614b309..104700bc913 100644 --- a/storage/innobase/fts/fts0fts.cc +++ b/storage/innobase/fts/fts0fts.cc @@ -1469,7 +1469,8 @@ fts_drop_table( /* Pass nonatomic=false (dont allow data dict unlock), because the transaction may hold locks on SYS_* tables from previous calls to fts_drop_table(). */ - error = row_drop_table_for_mysql(table_name, trx, true, false, false); + error = row_drop_table_for_mysql(table_name, trx, + SQLCOM_DROP_DB, false, false); if (error != DB_SUCCESS) { ib::error() << "Unable to drop FTS index aux table " @@ -1943,8 +1944,8 @@ func_exit: if (error != DB_SUCCESS) { for (it = common_tables.begin(); it != common_tables.end(); ++it) { - row_drop_table_for_mysql( - (*it)->name.m_name, trx, true, FALSE); + row_drop_table_for_mysql((*it)->name.m_name, trx, + SQLCOM_DROP_DB); } } @@ -2113,8 +2114,8 @@ fts_create_index_tables(trx_t* trx, const dict_index_t* index, table_id_t id) for (it = aux_idx_tables.begin(); it != aux_idx_tables.end(); ++it) { - row_drop_table_for_mysql( - (*it)->name.m_name, trx, true, FALSE); + row_drop_table_for_mysql((*it)->name.m_name, trx, + SQLCOM_DROP_DB); } } @@ -6689,7 +6690,8 @@ fts_drop_obsolete_aux_table_from_vector( trx_start_for_ddl(trx_drop, TRX_DICT_OP_TABLE); err = row_drop_table_for_mysql( - aux_drop_table->name, trx_drop, false, true); + aux_drop_table->name, trx_drop, + SQLCOM_DROP_TABLE, true); trx_drop->dict_operation_lock_mode = 0; diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index d2cb4439293..b65266fbf2e 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1068,6 +1068,8 @@ static SHOW_VAR innodb_status_variables[]= { (char*) &export_vars.innodb_truncated_status_writes, SHOW_LONG}, {"available_undo_logs", (char*) &export_vars.innodb_available_undo_logs, SHOW_LONG}, + {"undo_truncations", + (char*) &export_vars.innodb_undo_truncations, SHOW_LONG}, /* Status variables for page compression */ {"page_compression_saved", @@ -12372,7 +12374,8 @@ create_table_info_t::create_table() trx_rollback_to_savepoint(m_trx, NULL); m_trx->error_state = DB_SUCCESS; - row_drop_table_for_mysql(m_table_name, m_trx, true, FALSE); + row_drop_table_for_mysql(m_table_name, m_trx, + SQLCOM_DROP_DB); m_trx->error_state = DB_SUCCESS; DBUG_RETURN(error); @@ -12548,17 +12551,21 @@ create_table_info_t::allocate_trx() @param[in] name Table name, format: "db/table_name". @param[in] form Table format; columns and index information. @param[in] create_info Create info (including create statement string). +@param[in] file_per_table whether to create .ibd file +@param[in,out] trx dictionary transaction, or NULL to create new @return 0 if success else error number. */ -int +inline int ha_innobase::create( const char* name, TABLE* form, - HA_CREATE_INFO* create_info) + HA_CREATE_INFO* create_info, + bool file_per_table, + trx_t* trx) { int error; char norm_name[FN_REFLEN]; /* {database}/{tablename} */ char remote_path[FN_REFLEN]; /* Absolute path of table */ - trx_t* trx; + DBUG_ENTER("ha_innobase::create"); DBUG_ASSERT(form->s == table_share); @@ -12569,7 +12576,8 @@ ha_innobase::create( form, create_info, norm_name, - remote_path); + remote_path, + file_per_table, trx); /* Initialize the object. */ if ((error = info.initialize())) { @@ -12581,9 +12589,11 @@ ha_innobase::create( DBUG_RETURN(error); } - info.allocate_trx(); - - trx = info.trx(); + bool own_trx = !trx; + if (own_trx) { + info.allocate_trx(); + trx = info.trx(); + } /* Latch the InnoDB data dictionary exclusively so that no deadlocks or lock waits can happen in it during a table create operation. @@ -12591,10 +12601,16 @@ ha_innobase::create( row_mysql_lock_data_dictionary(trx); if ((error = info.create_table())) { - goto cleanup; + if (own_trx) { + trx_rollback_for_mysql(trx); + } + row_mysql_unlock_data_dictionary(trx); + goto func_exit; } - innobase_commit_low(trx); + if (own_trx) { + innobase_commit_low(trx); + } ut_ad(!srv_read_only_mode); row_mysql_unlock_data_dictionary(trx); @@ -12609,17 +12625,26 @@ ha_innobase::create( utility threads: */ srv_active_wake_master_thread(); - - trx_free(trx); +func_exit: + if (own_trx) { + trx_free(trx); + } DBUG_RETURN(error); +} -cleanup: - trx_rollback_for_mysql(trx); - row_mysql_unlock_data_dictionary(trx); - trx_free(trx); - - DBUG_RETURN(error); +/** Create a new table to an InnoDB database. +@param[in] name Table name, format: "db/table_name". +@param[in] form Table format; columns and index information. +@param[in] create_info Create info (including create statement string). +@return 0 if success else error number. */ +int +ha_innobase::create( + const char* name, + TABLE* form, + HA_CREATE_INFO* create_info) +{ + return create(name, form, create_info, srv_file_per_table); } /*****************************************************************//** @@ -12742,74 +12767,16 @@ ha_innobase::discard_or_import_tablespace( DBUG_RETURN(convert_error_code_to_mysql(err, dict_table->flags, NULL)); } -/*****************************************************************//** -Deletes all rows of an InnoDB table. -@return error number */ - -int -ha_innobase::truncate() -/*===================*/ -{ - DBUG_ENTER("ha_innobase::truncate"); - - if (high_level_read_only) { - DBUG_RETURN(HA_ERR_TABLE_READONLY); - } - - /* Get the transaction associated with the current thd, or create one - if not yet created, and update m_prebuilt->trx */ - - update_thd(ha_thd()); - - m_prebuilt->trx->ddl = true; - trx_start_if_not_started(m_prebuilt->trx, true); - - dberr_t err = row_mysql_lock_table(m_prebuilt->trx, m_prebuilt->table, - LOCK_X, "truncate table"); - if (err == DB_SUCCESS) { - err = row_truncate_table_for_mysql(m_prebuilt->table, - m_prebuilt->trx); - } - - switch (err) { - case DB_FORCED_ABORT: - case DB_DEADLOCK: - thd_mark_transaction_to_rollback(m_user_thd, 1); - DBUG_RETURN(HA_ERR_LOCK_DEADLOCK); - case DB_LOCK_TABLE_FULL: - thd_mark_transaction_to_rollback(m_user_thd, 1); - DBUG_RETURN(HA_ERR_LOCK_TABLE_FULL); - case DB_LOCK_WAIT_TIMEOUT: - DBUG_RETURN(HA_ERR_LOCK_WAIT_TIMEOUT); - case DB_TABLESPACE_DELETED: - case DB_TABLESPACE_NOT_FOUND: - ib_senderrf( - m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR, - (err == DB_TABLESPACE_DELETED ? - ER_TABLESPACE_DISCARDED : ER_TABLESPACE_MISSING), - table->s->table_name.str); - table->status = STATUS_NOT_FOUND; - DBUG_RETURN(HA_ERR_TABLESPACE_MISSING); - default: - table->status = STATUS_NOT_FOUND; - DBUG_RETURN(convert_error_code_to_mysql( - err, m_prebuilt->table->flags, - m_user_thd)); - } -} - -/*****************************************************************//** +/** Drops a table from an InnoDB database. Before calling this function, MySQL calls innobase_commit to commit the transaction of the current user. Then the current user cannot have locks set on the table. Drop table operation inside InnoDB will remove all locks any user has on the table inside InnoDB. +@param[in] name table name +@param[in] sqlcom SQLCOM_DROP_DB, SQLCOM_TRUNCATE, ... @return error number */ - -int -ha_innobase::delete_table( -/*======================*/ - const char* name) /*!< in: table name */ +inline int ha_innobase::delete_table(const char* name, enum_sql_command sqlcom) { dberr_t err; THD* thd = ha_thd(); @@ -12873,9 +12840,7 @@ ha_innobase::delete_table( /* Drop the table in InnoDB */ - err = row_drop_table_for_mysql( - norm_name, trx, thd_sql_command(thd) == SQLCOM_DROP_DB, - false); + err = row_drop_table_for_mysql(norm_name, trx, sqlcom); if (err == DB_TABLE_NOT_FOUND && innobase_get_lower_case_table_names() == 1) { @@ -12899,9 +12864,7 @@ ha_innobase::delete_table( par_case_name, name, FALSE); #endif err = row_drop_table_for_mysql( - par_case_name, trx, - thd_sql_command(thd) == SQLCOM_DROP_DB, - FALSE); + par_case_name, trx, sqlcom); } } @@ -12964,9 +12927,7 @@ ha_innobase::delete_table( par_case_name, name, FALSE); #endif /* _WIN32 */ err = row_drop_table_for_mysql( - par_case_name, trx, - thd_sql_command(thd) == SQLCOM_DROP_DB, - true); + par_case_name, trx, sqlcom, true); } } @@ -12984,6 +12945,24 @@ ha_innobase::delete_table( DBUG_RETURN(convert_error_code_to_mysql(err, 0, NULL)); } +/** Drop an InnoDB table. +@param[in] name table name +@return error number */ +int ha_innobase::delete_table(const char* name) +{ + enum_sql_command sqlcom = enum_sql_command(thd_sql_command(ha_thd())); + + if (sqlcom == SQLCOM_TRUNCATE + && thd_killed(ha_thd()) + && (m_prebuilt == NULL || m_prebuilt->table->is_temporary())) { + sqlcom = SQLCOM_DROP_TABLE; + } + + /* SQLCOM_TRUNCATE will be passed via ha_innobase::truncate() only. */ + DBUG_ASSERT(sqlcom != SQLCOM_TRUNCATE); + return delete_table(name, sqlcom); +} + /** Remove all tables in the named database inside InnoDB. @param[in] hton handlerton from InnoDB @param[in] path Database path; Inside InnoDB the name of the last @@ -13058,7 +13037,7 @@ innobase_drop_database( /*********************************************************************//** Renames an InnoDB table. @return DB_SUCCESS or error code */ -inline MY_ATTRIBUTE((warn_unused_result)) +inline dberr_t innobase_rename_table( /*==================*/ @@ -13071,7 +13050,8 @@ innobase_rename_table( char norm_from[FN_REFLEN]; DBUG_ENTER("innobase_rename_table"); - DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX); + DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX + || trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE); ut_ad(!srv_read_only_mode); @@ -13178,6 +13158,79 @@ innobase_rename_table( DBUG_RETURN(error); } +/** TRUNCATE TABLE +@return error code +@retval 0 on success */ +int ha_innobase::truncate() +{ + DBUG_ENTER("ha_innobase::truncate"); + + if (high_level_read_only) { + DBUG_RETURN(HA_ERR_TABLE_READONLY); + } + + update_thd(); + + HA_CREATE_INFO info; + mem_heap_t* heap = mem_heap_create(1000); + dict_table_t* ib_table = m_prebuilt->table; + const time_t update_time = ib_table->update_time; + const ulint stored_lock = m_prebuilt->stored_select_lock_type; + memset(&info, 0, sizeof info); + update_create_info_from_table(&info, table); + + if (ib_table->is_temporary()) { + info.options|= HA_LEX_CREATE_TMP_TABLE; + } else { + dict_get_and_save_data_dir_path(ib_table, false); + } + + char* data_file_name = ib_table->data_dir_path; + + if (data_file_name) { + info.data_file_name = data_file_name + = mem_heap_strdup(heap, data_file_name); + } + + const char* temp_name = dict_mem_create_temporary_tablename( + heap, ib_table->name.m_name, ib_table->id); + const char* name = mem_heap_strdup(heap, ib_table->name.m_name); + trx_t* trx = innobase_trx_allocate(m_user_thd); + + ++trx->will_lock; + trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); + int err = convert_error_code_to_mysql( + innobase_rename_table(trx, ib_table->name.m_name, temp_name), + ib_table->flags, m_user_thd); + if (!err) { + err = create(name, table, &info, + dict_table_is_file_per_table(ib_table), trx); + } + + if (err) { + innobase_rename_table(trx, temp_name, name); + trx_rollback_to_savepoint(trx, NULL); + } + + innobase_commit_low(trx); + trx_free(trx); + + if (!err) { + /* Reopen the newly created table, and drop the + original table that was renamed to temp_name. */ + close(); + err = open(name, 0, 0); + if (!err) { + m_prebuilt->stored_select_lock_type = stored_lock; + m_prebuilt->table->update_time = update_time; + delete_table(temp_name, SQLCOM_TRUNCATE); + } + } + + mem_heap_free(heap); + DBUG_RETURN(err); +} + /*********************************************************************//** Renames an InnoDB table. @return 0 or error code */ diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h index c98779f6823..fd8ea696faa 100644 --- a/storage/innobase/handler/ha_innodb.h +++ b/storage/innobase/handler/ha_innodb.h @@ -26,6 +26,9 @@ extern const char innobase_index_reserve_name[]; /** Prebuilt structures in an InnoDB table handle used within MySQL */ struct row_prebuilt_t; +/** InnoDB transaction */ +struct trx_t; + /** Engine specific table options are defined using this struct */ struct ha_table_option_struct { @@ -188,6 +191,13 @@ public: void update_create_info(HA_CREATE_INFO* create_info); + inline int create( + const char* name, + TABLE* form, + HA_CREATE_INFO* create_info, + bool file_per_table, + trx_t* trx = NULL); + int create( const char* name, TABLE* form, @@ -196,6 +206,8 @@ public: const char* check_table_options(THD *thd, TABLE* table, HA_CREATE_INFO* create_info, const bool use_tablespace, const ulint file_format); + inline int delete_table(const char* name, enum_sql_command sqlcom); + int truncate(); int delete_table(const char *name); @@ -645,13 +657,16 @@ public: TABLE* form, HA_CREATE_INFO* create_info, char* table_name, - char* remote_path) + char* remote_path, + bool file_per_table, + trx_t* trx = NULL) :m_thd(thd), + m_trx(trx), m_form(form), m_create_info(create_info), m_table_name(table_name), m_table(NULL), m_remote_path(remote_path), - m_innodb_file_per_table(srv_file_per_table) + m_innodb_file_per_table(file_per_table) {} /** Initialize the object. */ diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index f387d809956..d0afac185f3 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -6285,7 +6285,8 @@ ha_innobase::prepare_inplace_alter_table( altered_table, ha_alter_info->create_info, NULL, - NULL); + NULL, + srv_file_per_table); info.set_tablespace_type(indexed_table->space != fil_system.sys_space); diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 13d58e45ee8..30e66c1110a 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -4779,7 +4779,7 @@ reset_bit: /*********************************************************************//** Deletes all entries in the insert buffer for a given space id. This is used -in DISCARD TABLESPACE, IMPORT TABLESPACE and TRUNCATE TABLESPACE. +in DISCARD TABLESPACE, IMPORT TABLESPACE, and 5.7 TRUNCATE TABLE recovery. NOTE: this does not update the page free bitmaps in the space. The space will become CORRUPT when you call this function! */ void diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h index dd7129a86ac..d3e953ad9c7 100644 --- a/storage/innobase/include/buf0lru.h +++ b/storage/innobase/include/buf0lru.h @@ -65,8 +65,10 @@ bool buf_LRU_drop_page_hash_for_tablespace(dict_table_t* table) /** Empty the flush list for all pages belonging to a tablespace. @param[in] id tablespace identifier @param[in,out] observer flush observer, - or NULL if nothing is to be written */ -void buf_LRU_flush_or_remove_pages(ulint id, FlushObserver* observer); + or NULL if nothing is to be written +@param[in] first first page to be flushed or evicted */ +void buf_LRU_flush_or_remove_pages(ulint id, FlushObserver* observer, + ulint first = 0); #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG /********************************************************************//** diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index eb00239ca4e..890684af67e 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -97,10 +97,8 @@ struct fil_space_t { new write operations because we don't check this flag when doing flush batches. */ + /** whether undo tablespace truncation is in progress */ bool is_being_truncated; - /*!< this is set to true when we prepare to - truncate a single-table tablespace and its - .ibd file */ #ifdef UNIV_DEBUG ulint redo_skipped_count; /*!< reference count for operations who want @@ -181,12 +179,8 @@ struct fil_space_t { ulint magic_n;/*!< FIL_SPACE_MAGIC_N */ - /** @return whether the tablespace is about to be dropped or - truncated */ - bool is_stopping() const - { - return stop_new_ops || is_being_truncated; - } + /** @return whether the tablespace is about to be dropped */ + bool is_stopping() const { return stop_new_ops; } /** @return whether doublewrite buffering is needed */ bool use_doublewrite() const @@ -881,7 +875,8 @@ fil_op_replay_rename( not (necessarily) protected by meta-data locks. (Rollback would generally be protected, but rollback of FOREIGN KEY CASCADE/SET NULL is not protected by meta-data locks -but only by InnoDB table locks, which may be broken by TRUNCATE TABLE.) +but only by InnoDB table locks, which may be broken by +lock_remove_all_on_table().) @param[in] table persistent table checked @return whether the table is accessible */ bool @@ -899,22 +894,15 @@ fil_delete_tablespace( #endif /* BTR_CUR_HASH_ADAPT */ ); -/** Truncate the tablespace to needed size. -@param[in,out] space tablespace truncate -@param[in] size_in_pages truncate size. -@return true if truncate was successful. */ -bool fil_truncate_tablespace(fil_space_t* space, ulint size_in_pages); +/** Prepare to truncate an undo tablespace. +@param[in] space_id undo tablespace id +@return the tablespace +@retval NULL if the tablespace does not exist */ +fil_space_t* fil_truncate_prepare(ulint space_id); -/*******************************************************************//** -Prepare for truncating a single-table tablespace. The tablespace -must be cached in the memory cache. -1) Check pending operations on a tablespace; -2) Remove all insert buffer entries for the tablespace; -@return DB_SUCCESS or error */ -dberr_t -fil_prepare_for_truncate( -/*=====================*/ - ulint id); /*!< in: space id */ +/** Write log about an undo tablespace truncate operation. */ +void fil_truncate_log(fil_space_t* space, ulint size, mtr_t* mtr) + MY_ATTRIBUTE((nonnull)); /*******************************************************************//** Closes a single-table tablespace. The tablespace must be cached in the diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index fd8295cfe12..b215ba34a77 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -450,10 +450,12 @@ to this checkpoint, or 0 if the information has not been written */ This used to be called LOG_GROUP_ID and always written as 0, because InnoDB never supported more than one copy of the redo log. */ #define LOG_HEADER_FORMAT 0 -/** 4 unused (zero-initialized) bytes. In format version 0, the +/** Redo log subformat (originally 0). In format version 0, the LOG_FILE_START_LSN started here, 4 bytes earlier than LOG_HEADER_START_LSN, -which the LOG_FILE_START_LSN was renamed to. */ -#define LOG_HEADER_PAD1 4 +which the LOG_FILE_START_LSN was renamed to. +Subformat 1 is for the fully redo-logged TRUNCATE +(no MLOG_TRUNCATE records or extra log checkpoints or log files) */ +#define LOG_HEADER_SUBFORMAT 4 /** LSN of the start of data in this log file (with format version 1; in format version 0, it was called LOG_FILE_START_LSN and at offset 4). */ #define LOG_HEADER_START_LSN 8 @@ -474,11 +476,18 @@ or the MySQL version that created the redo log file. */ #define LOG_HEADER_FORMAT_3_23 0 /** The MySQL 5.7.9/MariaDB 10.2.2 log format */ #define LOG_HEADER_FORMAT_10_2 1 -/** The MariaDB 10.3.2 log format */ +/** The MariaDB 10.3.2 log format. +To prevent crash-downgrade to earlier 10.2 due to the inability to +roll back a retroactively introduced TRX_UNDO_RENAME_TABLE undo log record, +MariaDB 10.2.18 and later will use the 10.3 format, but LOG_HEADER_SUBFORMAT +1 instead of 0. MariaDB 10.3 will use subformat 0 (5.7-style TRUNCATE) or 2 +(MDEV-13564 backup-friendly TRUNCATE). */ #define LOG_HEADER_FORMAT_10_3 103 /** The redo log format identifier corresponding to the current format version. Stored in LOG_HEADER_FORMAT. */ #define LOG_HEADER_FORMAT_CURRENT LOG_HEADER_FORMAT_10_3 +/** Future MariaDB 10.4 log format */ +#define LOG_HEADER_FORMAT_10_4 104 /** Encrypted MariaDB redo log */ #define LOG_HEADER_FORMAT_ENCRYPTED (1U<<31) @@ -549,7 +558,10 @@ struct log_t{ /** number of files */ ulint n_files; /** format of the redo log: e.g., LOG_HEADER_FORMAT_CURRENT */ - ulint format; + uint32_t format; + /** redo log subformat: 0 with separately logged TRUNCATE, + 2 with fully redo-logged TRUNCATE (1 in MariaDB 10.2) */ + uint32_t subformat; /** individual log file size in bytes, including the header */ lsn_t file_size; /** lsn used to fix coordinates within the log group */ diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 99530a3799c..d15ec19d86b 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -267,6 +267,15 @@ struct recv_sys_t{ ulint n_addrs;/*!< number of not processed hashed file addresses in the hash table */ + /** Undo tablespaces for which truncate has been logged + (indexed by id - srv_undo_space_id_start) */ + struct trunc { + /** log sequence number of MLOG_FILE_CREATE2, or 0 if none */ + lsn_t lsn; + /** truncated size of the tablespace, or 0 if not truncated */ + unsigned pages; + } truncated_undo_spaces[127]; + recv_dblwr_t dblwr; /** Lastly added LSN to the hash table of log records. */ diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index 20cab0ca7e9..71da751ad25 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -1241,17 +1241,18 @@ bool os_file_set_eof( FILE* file); /*!< in: file to be truncated */ -/** Truncates a file to a specified size in bytes. Do nothing if the size -preserved is smaller or equal than current size of file. +/** Truncate a file to a specified size in bytes. @param[in] pathname file path @param[in] file file to be truncated @param[in] size size preserved in bytes +@param[in] allow_shrink whether to allow the file to become smaller @return true if success */ bool os_file_truncate( const char* pathname, os_file_t file, - os_offset_t size); + os_offset_t size, + bool allow_shrink = false); /** NOTE! Use the corresponding macro os_file_flush(), not directly this function! diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h index 4d8b055e13f..c59248d88c4 100644 --- a/storage/innobase/include/row0mysql.h +++ b/storage/innobase/include/row0mysql.h @@ -29,6 +29,8 @@ Created 9/17/2000 Heikki Tuuri #define row0mysql_h #include "ha_prototypes.h" +#include "sql_list.h" +#include "sql_cmd.h" #include "data0data.h" #include "que0types.h" @@ -451,32 +453,28 @@ row_mysql_lock_table( const char* op_info) /*!< in: string for trx->op_info */ MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Truncates a table for MySQL. -@return error code or DB_SUCCESS */ -dberr_t -row_truncate_table_for_mysql( -/*=========================*/ - dict_table_t* table, /*!< in: table handle */ - trx_t* trx) /*!< in: transaction handle */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/*********************************************************************//** -Drops a table for MySQL. If the data dictionary was not already locked -by the transaction, the transaction will be committed. Otherwise, the -data dictionary will remain locked. -@return error code or DB_SUCCESS */ +/** Drop a table. +If the data dictionary was not already locked by the transaction, +the transaction will be committed. Otherwise, the data dictionary +will remain locked. +@param[in] name Table name +@param[in,out] trx Transaction handle +@param[in] sqlcom type of SQL operation +@param[in] create_failed true=create table failed + because e.g. foreign key column +@param[in] nonatomic Whether it is permitted to release + and reacquire dict_operation_lock +@return error code */ dberr_t row_drop_table_for_mysql( -/*=====================*/ - const char* name, /*!< in: table name */ - trx_t* trx, /*!< in: dictionary transaction handle */ - bool drop_db,/*!< in: true=dropping whole database */ - ibool create_failed,/*!<in: TRUE=create table failed - because e.g. foreign key column - type mismatch. */ - bool nonatomic = true); - /*!< in: whether it is permitted - to release and reacquire dict_operation_lock */ + const char* name, + trx_t* trx, + enum_sql_command sqlcom, + bool create_failed = false, + bool nonatomic = true); + +/** Drop a table after failed CREATE TABLE. */ +dberr_t row_drop_table_after_create_fail(const char* name, trx_t* trx); /*********************************************************************//** Discards the tablespace of a table which stored in an .ibd file. Discarding diff --git a/storage/innobase/include/row0trunc.h b/storage/innobase/include/row0trunc.h index 94b6b7046b4..993dac295da 100644 --- a/storage/innobase/include/row0trunc.h +++ b/storage/innobase/include/row0trunc.h @@ -414,14 +414,4 @@ private: const char* log_file_name); }; - -/** -Truncates a table for MySQL. -@param table table being truncated -@param trx transaction covering the truncate -@return error code or DB_SUCCESS */ -dberr_t -row_truncate_table_for_mysql(dict_table_t* table, trx_t* trx); - #endif /* row0trunc_h */ - diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index ee82381032c..422b8ef39e4 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -997,6 +997,8 @@ struct export_var_t{ ulint innodb_truncated_status_writes; /*!< srv_truncated_status_writes */ ulint innodb_available_undo_logs; /*!< srv_available_undo_logs */ + /** Number of undo tablespace truncation operations */ + ulong innodb_undo_truncations; ulint innodb_defragment_compression_failures; /*!< Number of defragment re-compression failures */ diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h index 171f1d2ce86..27807321212 100644 --- a/storage/innobase/include/trx0purge.h +++ b/storage/innobase/include/trx0purge.h @@ -140,27 +140,6 @@ namespace undo { typedef std::vector<ulint> undo_spaces_t; typedef std::vector<trx_rseg_t*> rseg_for_trunc_t; - /** Magic Number to indicate truncate action is complete. */ - const ib_uint32_t s_magic = 76845412; - - /** Truncate Log file Prefix. */ - const char* const s_log_prefix = "undo_"; - - /** Truncate Log file Extension. */ - const char* const s_log_ext = "trunc.log"; - - /** Populate log file name based on space_id - @param[in] space_id id of the undo tablespace. - @return DB_SUCCESS or error code */ - dberr_t populate_log_file_name( - ulint space_id, - char*& log_file_name); - - /** Create the truncate log file. - @param[in] space_id id of the undo tablespace to truncate. - @return DB_SUCCESS or error code. */ - dberr_t init(ulint space_id); - /** Mark completion of undo truncate action by writing magic number to the log file and then removing it from the disk. If we are going to remove it from disk then why write magic number ? @@ -322,23 +301,6 @@ namespace undo { return(m_purge_rseg_truncate_frequency); } - /* Start writing log information to a special file. - On successfull completion, file is removed. - On crash, file is used to complete the truncate action. - @param space_id space id of undo tablespace - @return DB_SUCCESS or error code. */ - dberr_t start_logging(ulint space_id) - { - return(init(space_id)); - } - - /* Mark completion of logging./ - @param space_id space id of undo tablespace */ - void done_logging(ulint space_id) - { - return(done(space_id)); - } - private: /** UNDO tablespace is mark for truncate. */ ulint m_undo_for_trunc; diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h index 7d4e632d3ce..16e2a384424 100644 --- a/storage/innobase/include/trx0undo.h +++ b/storage/innobase/include/trx0undo.h @@ -260,18 +260,6 @@ trx_undo_commit_cleanup(trx_undo_t* undo, bool is_temp); void trx_undo_free_at_shutdown(trx_t *trx); -/* Forward declaration. */ -namespace undo { - class Truncate; -}; - -/** Truncate UNDO tablespace, reinitialize header and rseg. -@param[in] undo_trunc UNDO tablespace handler -@return true if success else false. */ -bool -trx_undo_truncate_tablespace( - undo::Truncate* undo_trunc); - /** Parse MLOG_UNDO_INIT. @param[in] ptr log record @param[in] end_ptr end of log record buffer diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index c871acb5389..29c781bcce7 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -670,6 +670,7 @@ void log_t::files::create(ulint n_files) format= srv_encrypt_log ? LOG_HEADER_FORMAT_CURRENT | LOG_HEADER_FORMAT_ENCRYPTED : LOG_HEADER_FORMAT_CURRENT; + subformat= 2; file_size= srv_log_file_size; lsn= LOG_START_LSN; lsn_offset= LOG_FILE_HDR_SIZE; @@ -708,6 +709,7 @@ log_file_header_flush( memset(buf, 0, OS_FILE_LOG_BLOCK_SIZE); mach_write_to_4(buf + LOG_HEADER_FORMAT, log_sys.log.format); + mach_write_to_4(buf + LOG_HEADER_SUBFORMAT, log_sys.log.subformat); mach_write_to_8(buf + LOG_HEADER_START_LSN, start_lsn); strcpy(reinterpret_cast<char*>(buf) + LOG_HEADER_CREATOR, LOG_HEADER_CREATOR_CURRENT); diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index fe42de2b7a2..07acb9dec18 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -131,8 +131,7 @@ bool recv_writer_thread_active; /** Return string name of the redo log record type. @param[in] type record log record enum @return string name of record log record */ -const char* -get_mlog_string(mlog_id_t type); +static const char* get_mlog_string(mlog_id_t type); #endif /* !DBUG_OFF */ /** Tablespace item during recovery */ @@ -219,6 +218,75 @@ void (*log_file_op)(ulint space_id, const byte* flags, const byte* name, ulint len, const byte* new_name, ulint new_len); +/** Process a MLOG_CREATE2 record that indicates that a tablespace +is being shrunk in size. +@param[in] space_id tablespace identifier +@param[in] pages trimmed size of the file, in pages +@param[in] lsn log sequence number of the operation */ +static void recv_addr_trim(ulint space_id, unsigned pages, lsn_t lsn) +{ + DBUG_ENTER("recv_addr_trim"); + DBUG_LOG("ib_log", + "discarding log beyond end of tablespace " + << page_id_t(space_id, pages) << " before LSN " << lsn); + ut_ad(mutex_own(&recv_sys->mutex)); + for (ulint i = recv_sys->addr_hash->n_cells; i--; ) { + hash_cell_t* const cell = hash_get_nth_cell( + recv_sys->addr_hash, i); + for (recv_addr_t* addr = static_cast<recv_addr_t*>(cell->node), + *prev = NULL, *next; + addr; + prev = addr, addr = next) { + next = static_cast<recv_addr_t*>(addr->addr_hash); + + if (addr->space != space_id || addr->page_no < pages) { + continue; + } + + for (recv_t* recv = UT_LIST_GET_FIRST(addr->rec_list); + recv; ) { + recv_t* n = UT_LIST_GET_NEXT(rec_list, recv); + if (recv->start_lsn < lsn) { + DBUG_PRINT("ib_log", + ("Discarding %s for" + " page %u:%u at " LSN_PF, + get_mlog_string( + recv->type), + addr->space, addr->page_no, + recv->start_lsn)); + UT_LIST_REMOVE(addr->rec_list, recv); + } + recv = n; + } + + if (UT_LIST_GET_LEN(addr->rec_list)) { + DBUG_PRINT("ib_log", + ("preserving " ULINTPF + " records for page %u:%u", + UT_LIST_GET_LEN(addr->rec_list), + addr->space, addr->page_no)); + } else { + ut_ad(recv_sys->n_addrs); + --recv_sys->n_addrs; + if (addr == cell->node) { + cell->node = next; + } else { + prev->addr_hash = next; + } + } + } + } + if (fil_space_t* space = fil_space_get(space_id)) { + ut_ad(UT_LIST_GET_LEN(space->chain) == 1); + fil_node_t* file = UT_LIST_GET_FIRST(space->chain); + ut_ad(file->is_open()); + os_file_truncate(file->name, file->handle, + os_offset_t(pages) << srv_page_size_shift, + true); + } + DBUG_VOID_RETURN; +} + /** Process a file name from a MLOG_FILE_* record. @param[in,out] name file name @param[in] len length of the file name @@ -391,9 +459,8 @@ fil_name_parse( user-created tablespaces. The name must be long enough and end in .ibd. */ bool corrupt = is_predefined_tablespace(space_id) - || first_page_no != 0 // TODO: multi-file user tablespaces || len < sizeof "/a.ibd\0" - || memcmp(ptr + len - 5, DOT_IBD, 5) != 0 + || (!first_page_no != !memcmp(ptr + len - 5, DOT_IBD, 5)) || memchr(ptr, OS_PATH_SEPARATOR, len) == NULL; byte* end_ptr = ptr + len; @@ -422,7 +489,18 @@ fil_name_parse( reinterpret_cast<char*>(ptr), len, space_id, true); /* fall through */ case MLOG_FILE_CREATE2: - if (log_file_op) { + if (first_page_no) { + ut_ad(first_page_no + == SRV_UNDO_TABLESPACE_SIZE_IN_PAGES); + ut_a(srv_is_undo_tablespace(space_id)); + compile_time_assert( + UT_ARR_SIZE(recv_sys->truncated_undo_spaces) + == TRX_SYS_MAX_UNDO_SPACES); + recv_sys_t::trunc& t = recv_sys->truncated_undo_spaces[ + space_id - srv_undo_space_id_start]; + t.lsn = recv_sys->recovered_lsn; + t.pages = uint32_t(first_page_no); + } else if (log_file_op) { log_file_op(space_id, type == MLOG_FILE_CREATE2 ? ptr - 4 : NULL, ptr, len, NULL, 0); @@ -969,6 +1047,54 @@ static dberr_t recv_log_format_0_recover(lsn_t lsn, bool crypt) return(DB_SUCCESS); } +/** Determine if a redo log from MariaDB 10.4 is clean. +@return error code +@retval DB_SUCCESS if the redo log is clean +@retval DB_CORRUPTION if the redo log is corrupted +@retval DB_ERROR if the redo log is not empty */ +static dberr_t recv_log_recover_10_4() +{ + ut_ad(!log_sys.is_encrypted()); + const lsn_t lsn = log_sys.log.lsn; + log_mutex_enter(); + const lsn_t source_offset = log_sys.log.calc_lsn_offset(lsn); + log_mutex_exit(); + const ulint page_no + = (ulint) (source_offset / univ_page_size.physical()); + byte* buf = log_sys.buf; + + fil_io(IORequestLogRead, true, + page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no), + univ_page_size, + (ulint) ((source_offset & ~(OS_FILE_LOG_BLOCK_SIZE - 1)) + % univ_page_size.physical()), + OS_FILE_LOG_BLOCK_SIZE, buf, NULL); + + if (log_block_calc_checksum(buf) != log_block_get_checksum(buf)) { + return DB_CORRUPTION; + } + + /* On a clean shutdown, the redo log will be logically empty + after the checkpoint lsn. */ + + if (log_block_get_data_len(buf) + != (source_offset & (OS_FILE_LOG_BLOCK_SIZE - 1))) { + return DB_ERROR; + } + + /* Mark the redo log for downgrading. */ + srv_log_file_size = 0; + recv_sys->parse_start_lsn = recv_sys->recovered_lsn + = recv_sys->scanned_lsn + = recv_sys->mlog_checkpoint_lsn = lsn; + log_sys.last_checkpoint_lsn = log_sys.next_checkpoint_lsn + = log_sys.lsn = log_sys.write_lsn + = log_sys.current_flush_lsn = log_sys.flushed_to_disk_lsn + = lsn; + log_sys.next_checkpoint_no = 0; + return DB_SUCCESS; +} + /** Find the latest checkpoint in the log header. @param[out] max_field LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 @return error code or DB_SUCCESS */ @@ -989,6 +1115,9 @@ recv_find_max_checkpoint(ulint* max_field) /* Check the header page checksum. There was no checksum in the first redo log format (version 0). */ log_sys.log.format = mach_read_from_4(buf + LOG_HEADER_FORMAT); + log_sys.log.subformat = log_sys.log.format != LOG_HEADER_FORMAT_3_23 + ? mach_read_from_4(buf + LOG_HEADER_SUBFORMAT) + : 0; if (log_sys.log.format != LOG_HEADER_FORMAT_3_23 && !recv_check_log_header_checksum(buf)) { ib::error() << "Invalid redo log header checksum."; @@ -1008,6 +1137,9 @@ recv_find_max_checkpoint(ulint* max_field) case LOG_HEADER_FORMAT_10_2 | LOG_HEADER_FORMAT_ENCRYPTED: case LOG_HEADER_FORMAT_CURRENT: case LOG_HEADER_FORMAT_CURRENT | LOG_HEADER_FORMAT_ENCRYPTED: + case LOG_HEADER_FORMAT_10_4: + /* We can only parse the unencrypted LOG_HEADER_FORMAT_10_4. + The encrypted format uses a larger redo log block trailer. */ break; default: ib::error() << "Unsupported redo log format." @@ -1072,7 +1204,19 @@ recv_find_max_checkpoint(ulint* max_field) return(DB_ERROR); } - return(DB_SUCCESS); + if (log_sys.log.format == LOG_HEADER_FORMAT_10_4) { + dberr_t err = recv_log_recover_10_4(); + if (err != DB_SUCCESS) { + ib::error() + << "Downgrade after a crash is not supported." + " The redo log was created with " << creator + << (err == DB_ERROR + ? "." : ", and it appears corrupted."); + } + return err; + } + + return DB_SUCCESS; } /** Try to parse a single log record body and also applies it if @@ -2020,6 +2164,14 @@ recv_apply_hashed_log_recs(bool last_batch) recv_sys->apply_log_recs = TRUE; recv_sys->apply_batch_on = TRUE; + for (ulint id = srv_undo_tablespaces_open; id--; ) { + recv_sys_t::trunc& t = recv_sys->truncated_undo_spaces[id]; + if (t.lsn) { + recv_addr_trim(id + srv_undo_space_id_start, t.pages, + t.lsn); + } + } + for (ulint i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) { for (recv_addr_t* recv_addr = static_cast<recv_addr_t*>( HASH_GET_FIRST(recv_sys->addr_hash, i)); @@ -3638,8 +3790,7 @@ recv_dblwr_t::find_page(ulint space_id, ulint page_no) /** Return string name of the redo log record type. @param[in] type record log record enum @return string name of record log record */ -const char* -get_mlog_string(mlog_id_t type) +static const char* get_mlog_string(mlog_id_t type) { switch (type) { case MLOG_SINGLE_REC_FLAG: diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index 208fe356fbe..5d0f3f4fc9c 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -5407,25 +5407,27 @@ fallback: return(current_size >= size && os_file_flush(file)); } -/** Truncates a file to a specified size in bytes. -Do nothing if the size to preserve is greater or equal to the current -size of the file. +/** Truncate a file to a specified size in bytes. @param[in] pathname file path @param[in] file file to be truncated -@param[in] size size to preserve in bytes +@param[in] size size preserved in bytes +@param[in] allow_shrink whether to allow the file to become smaller @return true if success */ bool os_file_truncate( const char* pathname, os_file_t file, - os_offset_t size) + os_offset_t size, + bool allow_shrink) { - /* Do nothing if the size preserved is larger than or equal to the - current size of file */ - os_offset_t size_bytes = os_file_get_size(file); + if (!allow_shrink) { + /* Do nothing if the size preserved is larger than or + equal to the current size of file */ + os_offset_t size_bytes = os_file_get_size(file); - if (size >= size_bytes) { - return(true); + if (size >= size_bytes) { + return(true); + } } #ifdef _WIN32 diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index c6ac98f1082..37ae828d09d 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -1660,8 +1660,7 @@ row_ins_check_foreign_constraint( if (check_table == NULL || !check_table->is_readable() - || check_index == NULL - || check_table->space->is_being_truncated) { + || check_index == NULL) { if (!srv_read_only_mode && check_ref) { FILE* ef = dict_foreign_err_file; diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index 000da4b0562..eaddb8b0432 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -4516,7 +4516,7 @@ row_merge_drop_table( ut_a(table->get_ref_count() == 0); return(row_drop_table_for_mysql(table->name.m_name, - trx, false, false, false)); + trx, SQLCOM_DROP_TABLE, false, false)); } /** Write an MLOG_INDEX_LOAD record to indicate in the redo-log diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 7c68bf6f7c2..a1f5743007a 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -2608,7 +2608,8 @@ error_handling: trx_rollback_to_savepoint(trx, NULL); } - row_drop_table_for_mysql(table->name.m_name, trx, FALSE, true); + row_drop_table_for_mysql(table->name.m_name, trx, + SQLCOM_DROP_TABLE, true); if (trx_is_started(trx)) { @@ -2691,7 +2692,7 @@ row_table_add_foreign_constraints( trx_rollback_to_savepoint(trx, NULL); } - row_drop_table_for_mysql(name, trx, FALSE, true); + row_drop_table_for_mysql(name, trx, SQLCOM_DROP_TABLE, true); if (trx_is_started(trx)) { @@ -2731,7 +2732,7 @@ row_drop_table_for_mysql_in_background( /* Try to drop the table in InnoDB */ - error = row_drop_table_for_mysql(name, trx, FALSE, FALSE); + error = row_drop_table_for_mysql(name, trx, SQLCOM_TRUNCATE); trx_commit_for_mysql(trx); @@ -2877,8 +2878,8 @@ row_mysql_drop_garbage_tables() if (dict_load_table(table_name, true, DICT_ERR_IGNORE_ALL)) { - row_drop_table_for_mysql( - table_name, trx, FALSE, FALSE); + row_drop_table_for_mysql(table_name, trx, + SQLCOM_DROP_TABLE); trx_commit_for_mysql(trx); } @@ -2942,6 +2943,7 @@ func_exit: @param[in,out] trx transaction @param[out] new_id new table id @return error code or DB_SUCCESS */ +static dberr_t row_mysql_table_id_reassign( dict_table_t* table, @@ -3421,20 +3423,20 @@ If the data dictionary was not already locked by the transaction, the transaction will be committed. Otherwise, the data dictionary will remain locked. @param[in] name Table name -@param[in] trx Transaction handle -@param[in] drop_db true=dropping whole database -@param[in] create_failed TRUE=create table failed +@param[in,out] trx Transaction handle +@param[in] sqlcom type of SQL operation +@param[in] create_failed true=create table failed because e.g. foreign key column @param[in] nonatomic Whether it is permitted to release and reacquire dict_operation_lock @return error code or DB_SUCCESS */ dberr_t row_drop_table_for_mysql( - const char* name, - trx_t* trx, - bool drop_db, - ibool create_failed, - bool nonatomic) + const char* name, + trx_t* trx, + enum_sql_command sqlcom, + bool create_failed, + bool nonatomic) { dberr_t err; dict_foreign_t* foreign; @@ -3603,7 +3605,7 @@ row_drop_table_for_mysql( foreign = *it; - const bool ref_ok = drop_db + const bool ref_ok = sqlcom == SQLCOM_DROP_DB && dict_tables_have_same_db( name, foreign->foreign_table_name_lookup); @@ -3741,12 +3743,11 @@ defer: dict_drop_index_tree(). */ info = pars_info_create(); pars_info_add_str_literal(info, "table_name", name); - err = que_eval_sql( + err = (sqlcom == SQLCOM_TRUNCATE) ? DB_SUCCESS : que_eval_sql( info, - "PROCEDURE DROP_TABLE_PROC () IS\n" + "PROCEDURE DROP_FOREIGN_PROC () IS\n" "sys_foreign_id CHAR;\n" "table_id CHAR;\n" - "index_id CHAR;\n" "foreign_id CHAR;\n" "space_id INT;\n" "found INT;\n" @@ -3756,19 +3757,14 @@ defer: "WHERE FOR_NAME = :table_name\n" "AND TO_BINARY(FOR_NAME)\n" " = TO_BINARY(:table_name)\n" - "LOCK IN SHARE MODE;\n" - - "DECLARE CURSOR cur_idx IS\n" - "SELECT ID FROM SYS_INDEXES\n" - "WHERE TABLE_ID = table_id\n" - "LOCK IN SHARE MODE;\n" + "FOR UPDATE;\n" "BEGIN\n" "SELECT ID INTO table_id\n" "FROM SYS_TABLES\n" "WHERE NAME = :table_name\n" - "LOCK IN SHARE MODE;\n" + "FOR UPDATE;\n" "IF (SQL % NOTFOUND) THEN\n" " RETURN;\n" "END IF;\n" @@ -3784,7 +3780,7 @@ defer: "SELECT ID INTO sys_foreign_id\n" "FROM SYS_TABLES\n" "WHERE NAME = 'SYS_FOREIGN'\n" - "LOCK IN SHARE MODE;\n" + "FOR UPDATE;\n" "IF (SQL % NOTFOUND) THEN\n" " found := 0;\n" "END IF;\n" @@ -3811,36 +3807,65 @@ defer: "END LOOP;\n" "CLOSE cur_fk;\n" - "found := 1;\n" - "OPEN cur_idx;\n" - "WHILE found = 1 LOOP\n" - " FETCH cur_idx INTO index_id;\n" - " IF (SQL % NOTFOUND) THEN\n" - " found := 0;\n" - " ELSE\n" - " DELETE FROM SYS_FIELDS\n" - " WHERE INDEX_ID = index_id;\n" - " DELETE FROM SYS_INDEXES\n" - " WHERE ID = index_id\n" - " AND TABLE_ID = table_id;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE cur_idx;\n" + "END;\n", + FALSE, trx); + if (err == DB_SUCCESS) { + if (sqlcom != SQLCOM_TRUNCATE) { + info = pars_info_create(); + pars_info_add_str_literal(info, "table_name", name); + } - "DELETE FROM SYS_COLUMNS\n" - "WHERE TABLE_ID = table_id;\n" - "DELETE FROM SYS_TABLES\n" - "WHERE NAME = :table_name;\n" + err = que_eval_sql( + info, + "PROCEDURE DROP_TABLE_PROC () IS\n" + "table_id CHAR;\n" + "space_id INT;\n" + "index_id CHAR;\n" - "DELETE FROM SYS_TABLESPACES\n" - "WHERE SPACE = space_id;\n" - "DELETE FROM SYS_DATAFILES\n" - "WHERE SPACE = space_id;\n" + "DECLARE CURSOR cur_idx IS\n" + "SELECT ID FROM SYS_INDEXES\n" + "WHERE TABLE_ID = table_id\n" + "FOR UPDATE;\n" - "DELETE FROM SYS_VIRTUAL\n" - "WHERE TABLE_ID = table_id;\n" - "END;\n", - FALSE, trx); + "BEGIN\n" + "SELECT ID, SPACE INTO table_id,space_id\n" + "FROM SYS_TABLES\n" + "WHERE NAME = :table_name FOR UPDATE;\n" + "IF (SQL % NOTFOUND) THEN\n" + " RETURN;\n" + "END IF;\n" + + "DELETE FROM SYS_COLUMNS\n" + "WHERE TABLE_ID = table_id;\n" + "DELETE FROM SYS_TABLES\n" + "WHERE NAME = :table_name;\n" + + "DELETE FROM SYS_TABLESPACES\n" + "WHERE SPACE = space_id;\n" + "DELETE FROM SYS_DATAFILES\n" + "WHERE SPACE = space_id;\n" + + "DELETE FROM SYS_VIRTUAL\n" + "WHERE TABLE_ID = table_id;\n" + + "OPEN cur_idx;\n" + "WHILE 1 = 1 LOOP\n" + " FETCH cur_idx INTO index_id;\n" + " IF (SQL % NOTFOUND) THEN\n" + " EXIT;\n" + " ELSE\n" + " DELETE FROM SYS_FIELDS\n" + " WHERE INDEX_ID = index_id;\n" + " DELETE FROM SYS_INDEXES\n" + " WHERE ID = index_id\n" + " AND TABLE_ID = table_id;\n" + " END IF;\n" + "END LOOP;\n" + "CLOSE cur_idx;\n" + + "END;\n", + FALSE, trx); + } switch (err) { fil_space_t* space; @@ -3968,6 +3993,13 @@ funct_exit_all_freed: DBUG_RETURN(err); } +/** Drop a table after failed CREATE TABLE. */ +dberr_t row_drop_table_after_create_fail(const char* name, trx_t* trx) +{ + ib::warn() << "Dropping incompletely created " << name << " table."; + return row_drop_table_for_mysql(name, trx, SQLCOM_DROP_DB, true); +} + /*******************************************************************//** Drop all foreign keys in a database, see Bug#18942. Called at the end of row_drop_database_for_mysql(). @@ -4155,7 +4187,8 @@ loop: goto loop; } - err = row_drop_table_for_mysql(table_name, trx, TRUE, FALSE); + err = row_drop_table_for_mysql( + table_name, trx, SQLCOM_DROP_DB); trx_commit_for_mysql(trx); if (err != DB_SUCCESS) { diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index 30eacbfc518..dc575c09ad9 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -1110,6 +1110,7 @@ try_again: /* The table was corrupt in the data dictionary. dict_set_corrupted() works on an index, and we do not have an index to call it with. */ +close_exit: dict_table_close(node->table, FALSE, FALSE); node->table = NULL; err_exit: diff --git a/storage/innobase/row/row0trunc.cc b/storage/innobase/row/row0trunc.cc index 65170a10f57..39487d2749c 100644 --- a/storage/innobase/row/row0trunc.cc +++ b/storage/innobase/row/row0trunc.cc @@ -239,333 +239,6 @@ protected: }; /** -Creates a TRUNCATE log record with space id, table name, data directory path, -tablespace flags, table format, index ids, index types, number of index fields -and index field information of the table. */ -class TruncateLogger : public Callback { - -public: - /** - Constructor - - @param table Table to truncate - @param flags tablespace falgs */ - TruncateLogger( - dict_table_t* table, - ulint flags, - table_id_t new_table_id) - : - Callback(table->id, false), - m_table(table), - m_flags(flags), - m_truncate(table->id, new_table_id, table->data_dir_path), - m_log_file_name() - { - /* Do nothing */ - } - - /** - Initialize Truncate Logger by constructing Truncate Log File Name. - - @return DB_SUCCESS or error code. */ - dberr_t init() - { - /* Construct log file name. */ - ulint log_file_name_buf_sz = - strlen(srv_log_group_home_dir) + 22 + 22 + 1 /* NUL */ - + strlen(TruncateLogger::s_log_prefix) - + strlen(TruncateLogger::s_log_ext); - - m_log_file_name = UT_NEW_ARRAY_NOKEY(char, log_file_name_buf_sz); - if (m_log_file_name == NULL) { - return(DB_OUT_OF_MEMORY); - } - memset(m_log_file_name, 0, log_file_name_buf_sz); - - strcpy(m_log_file_name, srv_log_group_home_dir); - ulint log_file_name_len = strlen(m_log_file_name); - if (m_log_file_name[log_file_name_len - 1] - != OS_PATH_SEPARATOR) { - - m_log_file_name[log_file_name_len] - = OS_PATH_SEPARATOR; - log_file_name_len = strlen(m_log_file_name); - } - - snprintf(m_log_file_name + log_file_name_len, - log_file_name_buf_sz - log_file_name_len, - "%s" ULINTPF "_" IB_ID_FMT "_%s", - TruncateLogger::s_log_prefix, - m_table->space_id, m_table->id, - TruncateLogger::s_log_ext); - - return(DB_SUCCESS); - } - - /** - Destructor */ - ~TruncateLogger() - { - if (m_log_file_name != NULL) { - bool exist; - os_file_delete_if_exists( - innodb_log_file_key, m_log_file_name, &exist); - UT_DELETE_ARRAY(m_log_file_name); - m_log_file_name = NULL; - } - } - - /** - @param mtr mini-transaction covering the read - @param pcur persistent cursor used for reading - @return DB_SUCCESS or error code */ - dberr_t operator()(mtr_t* mtr, btr_pcur_t* pcur); - - /** Called after iteratoring over the records. - @return true if invariant satisfied. */ - bool debug() const - { - /* We must find all the index entries on disk. */ - return(UT_LIST_GET_LEN(m_table->indexes) - == m_truncate.indexes()); - } - - /** - Write the TRUNCATE log - @return DB_SUCCESS or error code */ - dberr_t log() const - { - dberr_t err = DB_SUCCESS; - - if (m_log_file_name == 0) { - return(DB_ERROR); - } - - bool ret; - os_file_t handle = os_file_create( - innodb_log_file_key, m_log_file_name, - OS_FILE_CREATE, OS_FILE_NORMAL, - OS_LOG_FILE, srv_read_only_mode, &ret); - if (!ret) { - return(DB_IO_ERROR); - } - - - ulint sz = srv_page_size; - void* buf = ut_zalloc_nokey(sz + srv_page_size); - if (buf == 0) { - os_file_close(handle); - return(DB_OUT_OF_MEMORY); - } - - /* Align the memory for file i/o if we might have O_DIRECT set*/ - byte* log_buf = static_cast<byte*>( - ut_align(buf, srv_page_size)); - - lsn_t lsn = log_get_lsn(); - - /* Generally loop should exit in single go but - just for those 1% of rare cases we need to assume - corner case. */ - do { - /* First 4 bytes are reserved for magic number - which is currently 0. */ - err = m_truncate.write( - log_buf + 4, log_buf + sz - 4, - m_table->space_id, m_table->name.m_name, - m_flags, m_table->flags, lsn); - - DBUG_EXECUTE_IF("ib_err_trunc_oom_logging", - err = DB_FAIL;); - - if (err != DB_SUCCESS) { - ut_ad(err == DB_FAIL); - ut_free(buf); - sz *= 2; - buf = ut_zalloc_nokey(sz + srv_page_size); - DBUG_EXECUTE_IF("ib_err_trunc_oom_logging", - ut_free(buf); - buf = 0;); - if (buf == 0) { - os_file_close(handle); - return(DB_OUT_OF_MEMORY); - } - log_buf = static_cast<byte*>( - ut_align(buf, srv_page_size)); - } - - } while (err != DB_SUCCESS); - - dberr_t io_err; - - IORequest request(IORequest::WRITE); - - io_err = os_file_write( - request, m_log_file_name, handle, log_buf, 0, sz); - - if (io_err != DB_SUCCESS) { - - ib::error() - << "IO: Failed to write the file size to '" - << m_log_file_name << "'"; - - /* Preserve the original error code */ - if (err == DB_SUCCESS) { - err = io_err; - } - } - - os_file_flush(handle); - os_file_close(handle); - - ut_free(buf); - - /* Why we need MLOG_TRUNCATE when we have truncate_log for - recovery? - - truncate log can protect us if crash happens while truncate - is active. Once truncate is done truncate log is removed. - - If crash happens post truncate and system is yet to - checkpoint, on recovery we would see REDO records from action - before truncate (unless we explicitly checkpoint before - returning from truncate API. Costly alternative so rejected). - - These REDO records may reference a page that doesn't exist - post truncate so we need a mechanism to skip all such REDO - records. MLOG_TRUNCATE records space_id and lsn that exactly - serve the purpose. - - If checkpoint happens post truncate and crash happens post - this point then neither MLOG_TRUNCATE nor REDO record - from action before truncate are accessible. */ - if (!is_system_tablespace(m_table->space_id)) { - mtr_t mtr; - byte* log_ptr; - - mtr_start(&mtr); - - log_ptr = mlog_open(&mtr, 11 + 8); - log_ptr = mlog_write_initial_log_record_low( - MLOG_TRUNCATE, m_table->space_id, 0, - log_ptr, &mtr); - - mach_write_to_8(log_ptr, lsn); - log_ptr += 8; - - mlog_close(&mtr, log_ptr); - mtr_commit(&mtr); - } - - return(err); - } - - /** - Indicate completion of truncate log by writing magic-number. - File will be removed from the system but to protect against - unlink (File-System) anomalies we ensure we write magic-number. */ - void done() - { - if (m_log_file_name == 0) { - return; - } - - bool ret; - os_file_t handle = os_file_create_simple_no_error_handling( - innodb_log_file_key, m_log_file_name, - OS_FILE_OPEN, OS_FILE_READ_WRITE, - srv_read_only_mode, &ret); - DBUG_EXECUTE_IF("ib_err_trunc_writing_magic_number", - os_file_close(handle); - ret = false;); - if (!ret) { - ib::error() << "Failed to open truncate log file " - << m_log_file_name << "." - " If server crashes before truncate log is" - " removed make sure it is manually removed" - " before restarting server"; - os_file_delete(innodb_log_file_key, m_log_file_name); - return; - } - - byte buffer[sizeof(TruncateLogger::s_magic)]; - mach_write_to_4(buffer, TruncateLogger::s_magic); - - dberr_t err; - - IORequest request(IORequest::WRITE); - - err = os_file_write( - request, - m_log_file_name, handle, buffer, 0, sizeof(buffer)); - - if (err != DB_SUCCESS) { - - ib::error() - << "IO: Failed to write the magic number to '" - << m_log_file_name << "'"; - } - - DBUG_EXECUTE_IF("ib_trunc_crash_after_updating_magic_no", - DBUG_SUICIDE();); - os_file_flush(handle); - os_file_close(handle); - DBUG_EXECUTE_IF("ib_trunc_crash_after_logging_complete", - log_buffer_flush_to_disk(); - os_thread_sleep(1000000); - DBUG_SUICIDE();); - os_file_delete(innodb_log_file_key, m_log_file_name); - } - -private: - // Disably copying - TruncateLogger(const TruncateLogger&); - TruncateLogger& operator=(const TruncateLogger&); - -private: - /** Lookup the index using the index id. - @return index instance if found else NULL */ - const dict_index_t* find(index_id_t id) const - { - for (const dict_index_t* index = UT_LIST_GET_FIRST( - m_table->indexes); - index != NULL; - index = UT_LIST_GET_NEXT(indexes, index)) { - - if (index->id == id) { - return(index); - } - } - - return(NULL); - } - -private: - /** Table to be truncated */ - dict_table_t* m_table; - - /** Tablespace flags */ - ulint m_flags; - - /** Collect table to truncate information */ - truncate_t m_truncate; - - /** Truncate log file name. */ - char* m_log_file_name; - - -public: - /** Magic Number to indicate truncate action is complete. */ - const static ib_uint32_t s_magic; - - /** Truncate Log file Prefix. */ - const static char* s_log_prefix; - - /** Truncate Log file Extension. */ - const static char* s_log_ext; -}; - -const ib_uint32_t TruncateLogger::s_magic = 32743712; -const char* TruncateLogger::s_log_prefix = "ib_"; -const char* TruncateLogger::s_log_ext = "trunc.log"; - -/** Scan to find out truncate log file from the given directory path. @param dir_path look for log directory in following path. @@ -579,9 +252,7 @@ TruncateLogParser::scan( os_file_dir_t dir; os_file_stat_t fileinfo; dberr_t err = DB_SUCCESS; - ulint ext_len = strlen(TruncateLogger::s_log_ext); - ulint prefix_len = strlen(TruncateLogger::s_log_prefix); - ulint dir_len = strlen(dir_path); + const ulint dir_len = strlen(dir_path); /* Scan and look out for the truncate log files. */ dir = os_file_opendir(dir_path, true); @@ -595,12 +266,11 @@ TruncateLogParser::scan( ulint nm_len = strlen(fileinfo.name); if (fileinfo.type == OS_FILE_TYPE_FILE - && nm_len > ext_len + prefix_len - && (0 == strncmp(fileinfo.name + nm_len - ext_len, - TruncateLogger::s_log_ext, ext_len)) - && (0 == strncmp(fileinfo.name, - TruncateLogger::s_log_prefix, - prefix_len))) { + && nm_len > sizeof "ib_trunc.log" + && (0 == strncmp(fileinfo.name + nm_len + - ((sizeof "trunc.log") - 1), + "trunc.log", (sizeof "trunc.log") - 1)) + && (0 == strncmp(fileinfo.name, "ib_", 3))) { if (fileinfo.size == 0) { /* Truncate log not written. Remove the file. */ @@ -610,7 +280,7 @@ TruncateLogParser::scan( } /* Construct file name by appending directory path */ - ulint sz = dir_len + 22 + 22 + 1 + ext_len + prefix_len; + ulint sz = dir_len + 22 + 22 + sizeof "ib_trunc.log"; char* log_file_name = UT_NEW_ARRAY_NOKEY(char, sz); if (log_file_name == NULL) { err = DB_OUT_OF_MEMORY; @@ -683,8 +353,7 @@ TruncateLogParser::parse( break; } - ulint magic_n = mach_read_from_4(log_buf); - if (magic_n == TruncateLogger::s_magic) { + if (mach_read_from_4(log_buf) == 32743712) { /* Truncate action completed. Avoid parsing the file. */ os_file_close(handle); @@ -879,57 +548,6 @@ private: }; /** -@param pcur persistent cursor used for reading -@return DB_SUCCESS or error code */ -dberr_t -TruncateLogger::operator()(mtr_t*, btr_pcur_t* pcur) -{ - ulint len; - const byte* field; - rec_t* rec = btr_pcur_get_rec(pcur); - truncate_t::index_t index; - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__TYPE, &len); - ut_ad(len == 4); - index.m_type = mach_read_from_4(field); - - field = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__ID, &len); - ut_ad(len == 8); - index.m_id = mach_read_from_8(field); - - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len); - ut_ad(len == 4); - index.m_root_page_no = mach_read_from_4(field); - - /* For compressed tables we need to store extra meta-data - required during btr_create(). */ - if (FSP_FLAGS_GET_ZIP_SSIZE(m_flags)) { - - const dict_index_t* dict_index = find(index.m_id); - - if (dict_index != NULL) { - - dberr_t err = index.set(dict_index); - - if (err != DB_SUCCESS) { - m_truncate.clear(); - return(err); - } - - } else { - ib::warn() << "Index id " << index.m_id - << " not found"; - } - } - - m_truncate.add(index); - - return(DB_SUCCESS); -} - -/** Drop an index in the table. @param mtr mini-transaction covering the read @@ -1085,233 +703,6 @@ CreateIndex::operator()(mtr_t* mtr, btr_pcur_t* pcur) const } /** -Rollback the transaction and release the index locks. -Drop indexes if table is corrupted so that drop/create -sequence works as expected. - -@param table table to truncate -@param trx transaction covering the TRUNCATE -@param new_id new table id that was suppose to get assigned - to the table if truncate executed successfully. -@param has_internal_doc_id indicate existence of fts index -@param no_redo if true, turn-off redo logging -@param corrupted table corrupted status -@param unlock_index if true then unlock indexes before action */ -static -void -row_truncate_rollback( - dict_table_t* table, - trx_t* trx, - table_id_t new_id, - bool has_internal_doc_id, - bool no_redo, - bool corrupted, - bool unlock_index) -{ - ut_ad(!table->is_temporary()); - if (unlock_index) { - dict_table_x_unlock_indexes(table); - } - - trx->error_state = DB_SUCCESS; - - trx_rollback_to_savepoint(trx, NULL); - - trx->error_state = DB_SUCCESS; - - if (corrupted) { - - /* Cleanup action to ensure we don't left over stale entries - if we are marking table as corrupted. This will ensure - it can be recovered using drop/create sequence. */ - dict_table_x_lock_indexes(table); - - DropIndex dropIndex(table, no_redo); - - SysIndexIterator().for_each(dropIndex); - - dict_table_x_unlock_indexes(table); - - for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes); - index != NULL; - index = UT_LIST_GET_NEXT(indexes, index)) { - - dict_set_corrupted(index, trx, "TRUNCATE TABLE"); - } - - if (has_internal_doc_id) { - - ut_ad(!trx_is_started(trx)); - - table_id_t id = table->id; - - table->id = new_id; - - fts_drop_tables(trx, table); - - table->id = id; - - ut_ad(trx_is_started(trx)); - - trx_commit_for_mysql(trx); - } - } - - table->corrupted = corrupted; -} - -/** -Finish the TRUNCATE operations for both commit and rollback. - -@param table table being truncated -@param trx transaction covering the truncate -@param fsp_flags tablespace flags -@param logger table to truncate information logger -@param err status of truncate operation - -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((warn_unused_result)) -dberr_t -row_truncate_complete( - dict_table_t* table, - trx_t* trx, - ulint fsp_flags, - TruncateLogger* &logger, - dberr_t err) -{ - bool is_file_per_table = dict_table_is_file_per_table(table); - - /* Add the table back to FTS optimize background thread. */ - if (table->fts) { - fts_optimize_add_table(table); - } - - row_mysql_unlock_data_dictionary(trx); - - DEBUG_SYNC_C("ib_trunc_table_trunc_completing"); - - if (!table->is_temporary()) { - - DBUG_EXECUTE_IF("ib_trunc_crash_before_log_removal", - log_buffer_flush_to_disk(); - os_thread_sleep(500000); - DBUG_SUICIDE();); - - /* Note: We don't log-checkpoint instead we have written - a special REDO log record MLOG_TRUNCATE that is used to - avoid applying REDO records before truncate for crash - that happens post successful truncate completion. */ - - if (logger != NULL) { - logger->done(); - UT_DELETE(logger); - logger = NULL; - } - } - - /* If non-temp file-per-table tablespace... */ - if (is_file_per_table - && !table->is_temporary() - && fsp_flags != ULINT_UNDEFINED) { - - /* This function will reset back the stop_new_ops - and is_being_truncated so that fil-ops can re-start. */ - dberr_t err2 = truncate_t::truncate( - table->space_id, - table->data_dir_path, - table->name.m_name, fsp_flags, false); - - if (err2 != DB_SUCCESS) { - return(err2); - } - } - - if (err == DB_SUCCESS) { - dict_stats_update(table, DICT_STATS_EMPTY_TABLE); - } - - trx->op_info = ""; - - /* For temporary tables or if there was an error, we need to reset - the dict operation flags. */ - trx->ddl = false; - trx->dict_operation = TRX_DICT_OP_NONE; - - ut_ad(!trx_is_started(trx)); - - srv_wake_master_thread(); - - DBUG_EXECUTE_IF("ib_trunc_crash_after_truncate_done", - DBUG_SUICIDE();); - - return(err); -} - -/** -Handle FTS truncate issues. -@param table table being truncated -@param new_id new id for the table -@param trx transaction covering the truncate -@return DB_SUCCESS or error code. */ -static MY_ATTRIBUTE((warn_unused_result)) -dberr_t -row_truncate_fts( - dict_table_t* table, - table_id_t new_id, - trx_t* trx) -{ - dict_table_t fts_table; - - fts_table.id = new_id; - fts_table.name = table->name; - fts_table.flags2 = table->flags2; - fts_table.flags = table->flags; - fts_table.space = table->space; - - /* table->data_dir_path is used for FTS AUX table - creation. */ - if (DICT_TF_HAS_DATA_DIR(table->flags) - && table->data_dir_path == NULL) { - dict_get_and_save_data_dir_path(table, true); - ut_ad(table->data_dir_path != NULL); - } - - fts_table.data_dir_path = table->data_dir_path; - - dberr_t err = fts_create_common_tables(trx, &fts_table, true); - - for (ulint i = 0; - i < ib_vector_size(table->fts->indexes) && err == DB_SUCCESS; - i++) { - - dict_index_t* fts_index; - - fts_index = static_cast<dict_index_t*>( - ib_vector_getp(table->fts->indexes, i)); - - err = fts_create_index_tables(trx, fts_index, new_id); - } - - DBUG_EXECUTE_IF("ib_err_trunc_during_fts_trunc", - err = DB_ERROR;); - - if (err != DB_SUCCESS) { - - trx->error_state = DB_SUCCESS; - trx_rollback_to_savepoint(trx, NULL); - trx->error_state = DB_SUCCESS; - - ib::error() << "Unable to truncate FTS index for table " - << table->name; - } else { - - ut_ad(trx_is_started(trx)); - } - - return(err); -} - -/** Update system table to reflect new table id. @param old_table_id old table id @param new_table_id new table id @@ -1451,659 +842,6 @@ row_truncate_update_sys_tables_during_fix_up( return(err); } -/** -Truncate also results in assignment of new table id, update the system -SYSTEM TABLES with the new id. -@param table, table being truncated -@param new_id, new table id -@param has_internal_doc_id, has doc col (fts) -@param no_redo if true, turn-off redo logging -@param trx transaction handle -@return error code or DB_SUCCESS */ -static MY_ATTRIBUTE((warn_unused_result)) -dberr_t -row_truncate_update_system_tables( - dict_table_t* table, - table_id_t new_id, - bool has_internal_doc_id, - bool no_redo, - trx_t* trx) -{ - dberr_t err = DB_SUCCESS; - - ut_a(!table->is_temporary()); - - err = row_truncate_update_table_id(table->id, new_id, FALSE, trx); - - DBUG_EXECUTE_IF("ib_err_trunc_during_sys_table_update", - err = DB_ERROR;); - - if (err != DB_SUCCESS) { - - row_truncate_rollback( - table, trx, new_id, has_internal_doc_id, - no_redo, true, false); - - ib::error() << "Unable to assign a new identifier to table " - << table->name << " after truncating it. Marked the" - " table as corrupted. In-memory representation is now" - " different from the on-disk representation."; - err = DB_ERROR; - } else { - /* Drop the old FTS index */ - if (has_internal_doc_id) { - - ut_ad(trx_is_started(trx)); - - fts_drop_tables(trx, table); - - DBUG_EXECUTE_IF("ib_truncate_crash_while_fts_cleanup", - DBUG_SUICIDE();); - - ut_ad(trx_is_started(trx)); - } - - DBUG_EXECUTE_IF("ib_trunc_crash_after_fts_drop", - log_buffer_flush_to_disk(); - os_thread_sleep(2000000); - DBUG_SUICIDE();); - - dict_table_change_id_in_cache(table, new_id); - - /* Reset the Doc ID in cache to 0 */ - if (has_internal_doc_id && table->fts->cache != NULL) { - DBUG_EXECUTE_IF("ib_trunc_sleep_before_fts_cache_clear", - os_thread_sleep(10000000);); - - table->fts->fts_status |= TABLE_DICT_LOCKED; - fts_update_next_doc_id(trx, table, NULL, 0); - fts_cache_clear(table->fts->cache); - fts_cache_init(table->fts->cache); - table->fts->fts_status &= uint(~TABLE_DICT_LOCKED); - } - } - - return(err); -} - -/** -Do foreign key checks before starting TRUNCATE. -@param table table being truncated -@param trx transaction covering the truncate -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((warn_unused_result)) -dberr_t -row_truncate_foreign_key_checks( - const dict_table_t* table, - const trx_t* trx) -{ - /* Check if the table is referenced by foreign key constraints from - some other table (not the table itself) */ - - dict_foreign_set::iterator it - = std::find_if(table->referenced_set.begin(), - table->referenced_set.end(), - dict_foreign_different_tables()); - - if (!srv_read_only_mode - && it != table->referenced_set.end() - && trx->check_foreigns) { - - dict_foreign_t* foreign = *it; - - FILE* ef = dict_foreign_err_file; - - /* We only allow truncating a referenced table if - FOREIGN_KEY_CHECKS is set to 0 */ - - mutex_enter(&dict_foreign_err_mutex); - - rewind(ef); - - ut_print_timestamp(ef); - - fputs(" Cannot truncate table ", ef); - ut_print_name(ef, trx, table->name.m_name); - fputs(" by DROP+CREATE\n" - "InnoDB: because it is referenced by ", ef); - ut_print_name(ef, trx, foreign->foreign_table_name); - putc('\n', ef); - - mutex_exit(&dict_foreign_err_mutex); - - return(DB_ERROR); - } - - ut_ad(!table->n_foreign_key_checks_running); - - return(DB_SUCCESS); -} - -/** -Do some sanity checks before starting the actual TRUNCATE. -@param table table being truncated -@return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((warn_unused_result)) -dberr_t -row_truncate_sanity_checks( - const dict_table_t* table) -{ - if (!table->space) { - - return(DB_TABLESPACE_DELETED); - - } else if (!table->is_readable()) { - if (!table->space) { - return(DB_TABLESPACE_NOT_FOUND); - - } else { - return(DB_DECRYPTION_FAILED); - } - } else if (dict_table_is_corrupted(table)) { - - return(DB_TABLE_CORRUPT); - } - - return(DB_SUCCESS); -} - -/** Reinitialize the original tablespace header with the same space id -for single tablespace -@param[in] table table belongs to tablespace -@param[in] size size in blocks -@param[in] trx Transaction covering truncate */ -static void -fil_reinit_space_header_for_table( - dict_table_t* table, - ulint size, - trx_t* trx) -{ - fil_space_t* space = table->space; - ut_a(!is_system_tablespace(space->id)); - ut_ad(space->id == table->space_id); - - /* Invalidate in the buffer pool all pages belonging - to the tablespace. The buffer pool scan may take long - time to complete, therefore we release dict_sys->mutex - and the dict operation lock during the scan and aquire - it again after the buffer pool scan.*/ - - /* Release the lock on the indexes too. So that - they won't violate the latch ordering. */ - dict_table_x_unlock_indexes(table); - row_mysql_unlock_data_dictionary(trx); - - /* Lock the search latch in shared mode to prevent user - from disabling AHI during the scan */ - btr_search_s_lock_all(); - DEBUG_SYNC_C("buffer_pool_scan"); - buf_LRU_flush_or_remove_pages(space->id, NULL); - btr_search_s_unlock_all(); - - row_mysql_lock_data_dictionary(trx); - - dict_table_x_lock_indexes(table); - - /* Remove all insert buffer entries for the tablespace */ - ibuf_delete_for_discarded_space(space->id); - - mtr_t mtr; - - mtr.start(); - mtr.set_named_space(space); - mtr_x_lock(&space->latch, &mtr); - - ut_ad(UT_LIST_GET_LEN(space->chain) == 1); - space->size = UT_LIST_GET_FIRST(space->chain)->size = size; - fsp_header_init(space, size, &mtr); - - mtr.commit(); -} - -/** -Truncates a table for MySQL. -@param table table being truncated -@param trx transaction covering the truncate -@return error code or DB_SUCCESS */ -dberr_t -row_truncate_table_for_mysql( - dict_table_t* table, - trx_t* trx) -{ - bool is_file_per_table = dict_table_is_file_per_table(table); - dberr_t err; - TruncateLogger* logger = NULL; - ut_d(const fil_space_t* old_space = table->space); - - /* Understanding the truncate flow. - - Step-1: Perform intiial sanity check to ensure table can be truncated. - This would include check for tablespace discard status, ibd file - missing, etc .... - - Step-3: Validate ownership of needed locks (Exclusive lock). - Ownership will also ensure there is no active SQL queries, INSERT, - SELECT, ..... - - Step-4: Stop all the background process associated with table. - - Step-5: There are few foreign key related constraint under which - we can't truncate table (due to referential integrity unless it is - turned off). Ensure this condition is satisfied. - - Step-6: Truncate operation can be rolled back in case of error - till some point. Associate rollback segment to record undo log. - - Step-7: Generate new table-id. - Why we need new table-id ? - Purge and rollback case: we assign a new table id for the table. - Since purge and rollback look for the table based on the table id, - they see the table as 'dropped' and discard their operations. - - Step-8: Log information about tablespace which includes - table and index information. If there is a crash in the next step - then during recovery we will attempt to fixup the operation. - - Step-9: Drop all indexes (this include freeing of the pages - associated with them). - - Step-10: Re-create new indexes. - - Step-11: Update new table-id to in-memory cache (dictionary), - on-disk (INNODB_SYS_TABLES). INNODB_SYS_INDEXES also needs to - be updated to reflect updated root-page-no of new index created - and updated table-id. - - Step-12: Cleanup Stage. Reset auto-inc value to 1. - Release all the locks. - Commit the transaction. Update trx operation state. - - Notes: - - On error, log checkpoint is done followed writing of magic number to - truncate log file. If servers crashes after truncate, fix-up action - will not be applied. - - - log checkpoint is done before starting truncate table to ensure - that previous REDO log entries are not applied if current truncate - crashes. Consider following use-case: - - create table .... insert/load table .... truncate table (crash) - - on restart table is restored .... truncate table (crash) - - on restart (assuming default log checkpoint is not done) will have - 2 REDO log entries for same table. (Note 2 REDO log entries - for different table is not an issue). - For system-tablespace we can't truncate the tablespace so we need - to initiate a local cleanup that involves dropping of indexes and - re-creating them. If we apply stale entry we might end-up issuing - drop on wrong indexes. - - - Insert buffer: TRUNCATE TABLE is analogous to DROP TABLE, - so we do not have to remove insert buffer records, as the - insert buffer works at a low level. If a freed page is later - reallocated, the allocator will remove the ibuf entries for - it. When we prepare to truncate *.ibd files, we remove all entries - for the table in the insert buffer tree. This is not strictly - necessary, but we can free up some space in the system tablespace. - - - Linear readahead and random readahead: we use the same - method as in 3) to discard ongoing operations. (This is only - relevant for TRUNCATE TABLE by TRUNCATE TABLESPACE.) - Ensure that the table will be dropped by trx_rollback_active() in - case of a crash. - */ - - /*-----------------------------------------------------------------*/ - /* Step-1: Perform intiial sanity check to ensure table can be - truncated. This would include check for tablespace discard status, - ibd file missing, etc .... */ - err = row_truncate_sanity_checks(table); - if (err != DB_SUCCESS) { - return(err); - - } - - if (!table->is_temporary()) { - if (table->fts) { - fts_optimize_remove_table(table); - } - - trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - } - - DEBUG_SYNC_C("row_trunc_before_dict_lock"); - - /* Step-3: Validate ownership of needed locks (Exclusive lock). - Ownership will also ensure there is no active SQL queries, INSERT, - SELECT, .....*/ - trx->op_info = "truncating table"; - ut_a(trx->dict_operation_lock_mode == 0); - row_mysql_lock_data_dictionary(trx); - ut_ad(mutex_own(&dict_sys->mutex)); - ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X)); - - /* Step-4: Stop all the background process associated with table. */ - dict_stats_wait_bg_to_stop_using_table(table, trx); - - /* Step-5: There are few foreign key related constraint under which - we can't truncate table (due to referential integrity unless it is - turned off). Ensure this condition is satisfied. */ - ulint fsp_flags = ULINT_UNDEFINED; - err = row_truncate_foreign_key_checks(table, trx); - if (err != DB_SUCCESS) { - trx_rollback_to_savepoint(trx, NULL); - return(row_truncate_complete( - table, trx, fsp_flags, logger, err)); - } - - trx->table_id = table->id; - trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); - - /* Step-6: Truncate operation can be rolled back in case of error - till some point. Associate rollback segment to record undo log. */ - if (!table->is_temporary()) { - mtr_t mtr; - mtr.start(); - trx_undo_assign(trx, &err, &mtr); - mtr.commit(); - - DBUG_EXECUTE_IF("ib_err_trunc_assigning_undo_log", - err = DB_ERROR;); - if (err != DB_SUCCESS) { - trx_rollback_to_savepoint(trx, NULL); - return(row_truncate_complete( - table, trx, fsp_flags, logger, err)); - } - } - - /* Step-7: Generate new table-id. - Why we need new table-id ? - Purge and rollback: we assign a new table id for the - table. Since purge and rollback look for the table based on - the table id, they see the table as 'dropped' and discard - their operations. */ - table_id_t new_id; - dict_hdr_get_new_id(&new_id, NULL, NULL, table, false); - - /* Check if table involves FTS index. */ - bool has_internal_doc_id = - dict_table_has_fts_index(table) - || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID); - - bool no_redo = is_file_per_table && !has_internal_doc_id; - - /* Step-8: Log information about tablespace which includes - table and index information. If there is a crash in the next step - then during recovery we will attempt to fixup the operation. */ - - /* Lock all index trees for this table, as we will truncate - the table/index and possibly change their metadata. All - DML/DDL are blocked by table level X lock, with a few exceptions - such as queries into information schema about the table, - MySQL could try to access index stats for this kind of query, - we need to use index locks to sync up */ - dict_table_x_lock_indexes(table); - - if (!table->is_temporary()) { - fsp_flags = table->space - ? table->space->flags - : ULINT_UNDEFINED; - - if (is_file_per_table) { - ut_ad(!table->is_temporary()); - ut_ad(dict_table_is_file_per_table(table)); - - dict_get_and_save_data_dir_path(table, true); - err = table->space - ? fil_prepare_for_truncate(table->space_id) - : DB_TABLESPACE_NOT_FOUND; - - DBUG_EXECUTE_IF("ib_err_trunc_preparing_for_truncate", - err = DB_ERROR;); - - if (err != DB_SUCCESS) { - row_truncate_rollback( - table, trx, new_id, - has_internal_doc_id, - no_redo, false, true); - return(row_truncate_complete( - table, trx, fsp_flags, logger, err)); - } - } else { - DBUG_EXECUTE_IF("ib_err_trunc_preparing_for_truncate", - fsp_flags = ULINT_UNDEFINED;); - - if (fsp_flags == ULINT_UNDEFINED) { - row_truncate_rollback( - table, trx, new_id, - has_internal_doc_id, - no_redo, false, true); - return(row_truncate_complete( - table, trx, fsp_flags, - logger, DB_ERROR)); - } - } - - logger = UT_NEW_NOKEY(TruncateLogger( - table, fsp_flags, new_id)); - - err = logger->init(); - if (err != DB_SUCCESS) { - row_truncate_rollback( - table, trx, new_id, has_internal_doc_id, - no_redo, false, true); - return(row_truncate_complete( - table, trx, fsp_flags, logger, DB_ERROR)); - - } - - err = SysIndexIterator().for_each(*logger); - if (err != DB_SUCCESS) { - row_truncate_rollback( - table, trx, new_id, has_internal_doc_id, - no_redo, false, true); - return(row_truncate_complete( - table, trx, fsp_flags, logger, DB_ERROR)); - - } - - ut_ad(logger->debug()); - - err = logger->log(); - - if (err != DB_SUCCESS) { - row_truncate_rollback( - table, trx, new_id, has_internal_doc_id, - no_redo, false, true); - return(row_truncate_complete( - table, trx, fsp_flags, logger, DB_ERROR)); - } - - DBUG_EXECUTE_IF("ib_trunc_crash_after_redo_log_write_complete", - log_buffer_flush_to_disk(); - os_thread_sleep(3000000); - DBUG_SUICIDE();); - - DropIndex dropIndex(table, no_redo); - - err = SysIndexIterator().for_each(dropIndex); - - if (err != DB_SUCCESS) { - - row_truncate_rollback( - table, trx, new_id, has_internal_doc_id, - no_redo, true, true); - - return(row_truncate_complete( - table, trx, fsp_flags, logger, err)); - } - - dict_table_get_first_index(table)->remove_instant(); - } else { - ut_ad(!table->is_instant()); - ut_ad(table->space == fil_system.temp_space); - bool fail = false; - for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes); - index != NULL; - index = UT_LIST_GET_NEXT(indexes, index)) { - if (index->page != FIL_NULL) { - btr_free(page_id_t(SRV_TMP_SPACE_ID, - index->page), - univ_page_size); - } - - mtr_t mtr; - mtr.start(); - mtr.set_log_mode(MTR_LOG_NO_REDO); - index->page = btr_create( - index->type, table->space, index->id, index, - NULL, &mtr); - DBUG_EXECUTE_IF("ib_err_trunc_temp_recreate_index", - index->page = FIL_NULL;); - mtr.commit(); - if (index->page == FIL_NULL) { - fail = true; - break; - } - } - if (fail) { - for (dict_index_t* index = UT_LIST_GET_FIRST( - table->indexes); - index != NULL; - index = UT_LIST_GET_NEXT(indexes, index)) { - if (index->page != FIL_NULL) { - btr_free(page_id_t(SRV_TMP_SPACE_ID, - index->page), - univ_page_size); - index->page = FIL_NULL; - } - } - } - - table->corrupted = fail; - if (fail) { - return row_truncate_complete( - table, trx, fsp_flags, logger, DB_ERROR); - } - - DBUG_EXECUTE_IF( - "ib_trunc_crash_during_drop_index_temp_table", - log_buffer_flush_to_disk(); - DBUG_SUICIDE();); - } - - if (is_file_per_table && fsp_flags != ULINT_UNDEFINED) { - /* A single-table tablespace has initially - FIL_IBD_FILE_INITIAL_SIZE number of pages allocated and an - extra page is allocated for each of the indexes present. But in - the case of clust index 2 pages are allocated and as one is - covered in the calculation as part of table->indexes.count we - take care of the other page by adding 1. */ - ulint space_size = table->indexes.count + - FIL_IBD_FILE_INITIAL_SIZE + 1; - - if (has_internal_doc_id) { - /* Since aux tables are created for fts indexes and - they use seperate tablespaces. */ - space_size -= ib_vector_size(table->fts->indexes); - } - - fil_reinit_space_header_for_table(table, space_size, trx); - } - - DBUG_EXECUTE_IF("ib_trunc_crash_with_intermediate_log_checkpoint", - log_buffer_flush_to_disk(); - os_thread_sleep(2000000); - log_checkpoint(TRUE, TRUE); - os_thread_sleep(1000000); - DBUG_SUICIDE();); - - DBUG_EXECUTE_IF("ib_trunc_crash_drop_reinit_done_create_to_start", - log_buffer_flush_to_disk(); - os_thread_sleep(2000000); - DBUG_SUICIDE();); - - /* Step-10: Re-create new indexes. */ - if (!table->is_temporary()) { - - CreateIndex createIndex(table, no_redo); - - err = SysIndexIterator().for_each(createIndex); - - if (err != DB_SUCCESS) { - - row_truncate_rollback( - table, trx, new_id, has_internal_doc_id, - no_redo, true, true); - - return(row_truncate_complete( - table, trx, fsp_flags, logger, err)); - } - } - - /* Done with index truncation, release index tree locks, - subsequent work relates to table level metadata change */ - dict_table_x_unlock_indexes(table); - - if (has_internal_doc_id) { - - err = row_truncate_fts(table, new_id, trx); - - if (err != DB_SUCCESS) { - - row_truncate_rollback( - table, trx, new_id, has_internal_doc_id, - no_redo, true, false); - - return(row_truncate_complete( - table, trx, fsp_flags, logger, err)); - } - } - - /* Step-11: Update new table-id to in-memory cache (dictionary), - on-disk (INNODB_SYS_TABLES). INNODB_SYS_INDEXES also needs to - be updated to reflect updated root-page-no of new index created - and updated table-id. */ - if (table->is_temporary()) { - - dict_table_change_id_in_cache(table, new_id); - err = DB_SUCCESS; - - } else { - - /* If this fails then we are in an inconsistent state and - the results are undefined. */ - ut_ad(old_space == table->space); - - err = row_truncate_update_system_tables( - table, new_id, has_internal_doc_id, no_redo, trx); - - if (err != DB_SUCCESS) { - return(row_truncate_complete( - table, trx, fsp_flags, logger, err)); - } - } - - DBUG_EXECUTE_IF("ib_trunc_crash_on_updating_dict_sys_info", - log_buffer_flush_to_disk(); - os_thread_sleep(2000000); - DBUG_SUICIDE();); - - /* Step-12: Cleanup Stage. Reset auto-inc value to 1. - Release all the locks. - Commit the transaction. Update trx operation state. */ - dict_table_autoinc_lock(table); - dict_table_autoinc_initialize(table, 1); - dict_table_autoinc_unlock(table); - - if (trx_is_started(trx)) { - - trx_commit_for_mysql(trx); - } - - ut_ad(!table->is_instant()); - - return(row_truncate_complete(table, trx, fsp_flags, logger, err)); -} - /********************************************************//** Recreates table indexes by applying TRUNCATE log record during recovery. @@ -3226,4 +1964,3 @@ truncate_t::write( return(DB_SUCCESS); } - diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc index 2067e957f3c..23867b7395a 100644 --- a/storage/innobase/row/row0uins.cc +++ b/storage/innobase/row/row0uins.cc @@ -444,8 +444,8 @@ row_undo_ins_parse_undo_rec( close_table: /* Normally, tables should not disappear or become unaccessible during ROLLBACK, because they should be - protected by InnoDB table locks. TRUNCATE TABLE - or table corruption could be valid exceptions. + protected by InnoDB table locks. Corruption could be + a valid exception. FIXME: When running out of temporary tablespace, it would probably be better to just drop all temporary diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc index 90bce6c8be6..b46d3d83bb0 100644 --- a/storage/innobase/row/row0umod.cc +++ b/storage/innobase/row/row0umod.cc @@ -1182,8 +1182,8 @@ row_undo_mod_parse_undo_rec( close_table: /* Normally, tables should not disappear or become unaccessible during ROLLBACK, because they should be - protected by InnoDB table locks. TRUNCATE TABLE - or table corruption could be valid exceptions. + protected by InnoDB table locks. Corruption could be + a valid exception. FIXME: When running out of temporary tablespace, it would probably be better to just drop all temporary diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc index 447132dcf86..7070066efc2 100644 --- a/storage/innobase/row/row0upd.cc +++ b/storage/innobase/row/row0upd.cc @@ -286,29 +286,7 @@ row_upd_check_references_constraints( FALSE, FALSE, DICT_ERR_IGNORE_NONE); } - /* dict_operation_lock is held both here - (UPDATE or DELETE with FOREIGN KEY) and by TRUNCATE - TABLE operations. - If a TRUNCATE TABLE operation is in progress, - there can be 2 possible conditions: - 1) row_truncate_table_for_mysql() is not yet called. - 2) Truncate releases dict_operation_lock - during eviction of pages from buffer pool - for a file-per-table tablespace. - - In case of (1), truncate will wait for FK operation - to complete. - In case of (2), truncate will be rolled forward even - if it is interrupted. So if the foreign table is - undergoing a truncate, ignore the FK check. */ - if (foreign_table) { - if (foreign_table->space - && foreign_table->space - ->is_being_truncated) { - continue; - } - foreign_table->inc_fk_checks(); } diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 08b21bcdd7d..d35df24865e 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1098,8 +1098,7 @@ srv_undo_tablespaces_init(bool create_new_db) buf_LRU_flush_or_remove_pages(*it, &dummy2); /* Remove the truncate redo log file. */ - undo::Truncate undo_trunc; - undo_trunc.done_logging(*it); + undo::done(*it); } } @@ -1330,6 +1329,12 @@ srv_prepare_to_delete_redo_log_files( ulint pending_io = 0; ulint count = 0; + if ((log_sys.log.format & ~LOG_HEADER_FORMAT_ENCRYPTED) + != LOG_HEADER_FORMAT_CURRENT + || log_sys.log.subformat != 2) { + srv_log_file_size = 0; + } + do { /* Clean the buffer pool. */ buf_flush_sync_all_buf_pools(); @@ -1345,11 +1350,12 @@ srv_prepare_to_delete_redo_log_files( { ib::info info; - if (srv_log_file_size == 0 - || (log_sys.log.format - & ~LOG_HEADER_FORMAT_ENCRYPTED) - != LOG_HEADER_FORMAT_CURRENT) { - info << "Upgrading redo log: "; + if (srv_log_file_size == 0) { + info << ((log_sys.log.format + & ~LOG_HEADER_FORMAT_ENCRYPTED) + != LOG_HEADER_FORMAT_10_4 + ? "Upgrading redo log: " + : "Downgrading redo log: "); } else if (n_files != srv_n_log_files || srv_log_file_size != srv_log_file_size_requested) { @@ -2176,8 +2182,10 @@ files_checked: == (srv_encrypt_log ? LOG_HEADER_FORMAT_CURRENT | LOG_HEADER_FORMAT_ENCRYPTED - : LOG_HEADER_FORMAT_CURRENT)) { - /* No need to upgrade or resize the redo log. */ + : LOG_HEADER_FORMAT_CURRENT) + && log_sys.log.subformat == 2) { + /* No need to add or remove encryption, + upgrade, downgrade, or resize. */ } else { /* Prepare to delete the old redo log files */ flushed_lsn = srv_prepare_to_delete_redo_log_files(i); diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 8f6e585f66b..da4084f49d9 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -519,18 +519,22 @@ truncate of the UNDO is in progress. This file is required during recovery to complete the truncate. */ namespace undo { + /** Magic Number to indicate truncate action is complete. */ + static const ib_uint32_t s_magic = 76845412; /** Populate log file name based on space_id @param[in] space_id id of the undo tablespace. @return DB_SUCCESS or error code */ - dberr_t populate_log_file_name( + static dberr_t populate_log_file_name( ulint space_id, char*& log_file_name) { - ulint log_file_name_sz = - strlen(srv_log_group_home_dir) + 22 + 1 /* NUL */ - + strlen(undo::s_log_prefix) - + strlen(undo::s_log_ext); + static const char s_log_prefix[] = "undo_"; + static const char s_log_ext[] = "trunc.log"; + + ulint log_file_name_sz = strlen(srv_log_group_home_dir) + + (22 - 1 /* NUL */ + + sizeof s_log_prefix + sizeof s_log_ext); log_file_name = new (std::nothrow) char[log_file_name_sz]; if (log_file_name == 0) { @@ -552,63 +556,12 @@ namespace undo { snprintf(log_file_name + log_file_name_len, log_file_name_sz - log_file_name_len, - "%s%lu_%s", undo::s_log_prefix, - (ulong) space_id, s_log_ext); + "%s" ULINTPF "_%s", s_log_prefix, + space_id, s_log_ext); return(DB_SUCCESS); } - /** Create the truncate log file. - @param[in] space_id id of the undo tablespace to truncate. - @return DB_SUCCESS or error code. */ - dberr_t init(ulint space_id) - { - dberr_t err; - char* log_file_name; - - /* Step-1: Create the log file name using the pre-decided - prefix/suffix and table id of undo tablepsace to truncate. */ - err = populate_log_file_name(space_id, log_file_name); - if (err != DB_SUCCESS) { - return(err); - } - - /* Step-2: Create the log file, open it and write 0 to - indicate init phase. */ - bool ret; - os_file_t handle = os_file_create( - innodb_log_file_key, log_file_name, OS_FILE_CREATE, - OS_FILE_NORMAL, OS_LOG_FILE, srv_read_only_mode, &ret); - if (!ret) { - delete[] log_file_name; - return(DB_IO_ERROR); - } - - ulint sz = srv_page_size; - void* buf = ut_zalloc_nokey(sz + srv_page_size); - if (buf == NULL) { - os_file_close(handle); - delete[] log_file_name; - return(DB_OUT_OF_MEMORY); - } - - byte* log_buf = static_cast<byte*>( - ut_align(buf, srv_page_size)); - - IORequest request(IORequest::WRITE); - - err = os_file_write( - request, log_file_name, handle, log_buf, 0, sz); - - os_file_flush(handle); - os_file_close(handle); - - ut_free(buf); - delete[] log_file_name; - - return(err); - } - /** Mark completion of undo truncate action by writing magic number to the log file and then removing it from the disk. If we are going to remove it from disk then why write magic number ? @@ -967,43 +920,137 @@ trx_purge_initiate_truncate( /* Step-3: Start the actual truncate. - a. log-checkpoint - b. Write the DDL log to protect truncate action from CRASH - c. Remove rseg instance if added to purge queue before we + a. Remove rseg instance if added to purge queue before we initiate truncate. - d. Execute actual truncate - e. Remove the DDL log. */ - - /* After truncate if server crashes then redo logging done for this - undo tablespace might not stand valid as tablespace has been - truncated. */ - log_make_checkpoint_at(LSN_MAX, TRUE); + b. Execute actual truncate */ const ulint space_id = undo_trunc->get_marked_space_id(); ib::info() << "Truncating UNDO tablespace " << space_id; -#ifdef UNIV_DEBUG - dberr_t err = -#endif /* UNIV_DEBUG */ - undo_trunc->start_logging(space_id); - ut_ad(err == DB_SUCCESS); + trx_purge_cleanse_purge_queue(undo_trunc); - DBUG_EXECUTE_IF("ib_undo_trunc_before_truncate", - ib::info() << "ib_undo_trunc_before_truncate"; - DBUG_SUICIDE();); + ut_a(srv_is_undo_tablespace(space_id)); - trx_purge_cleanse_purge_queue(undo_trunc); + fil_space_t* space = fil_space_get(space_id); - if (!trx_undo_truncate_tablespace(undo_trunc)) { - /* Note: In case of error we don't enable the rsegs - and neither unmark the tablespace so the tablespace - continue to remain inactive. */ - ib::error() << "Failed to truncate UNDO tablespace " - << space_id; + if (!space) { +not_found: + ib::error() << "Failed to find UNDO tablespace " << space_id; return; } + /* Flush all to-be-discarded pages of the tablespace. + + During truncation, we do not want any writes to the + to-be-discarded area, because we must set the space->size + early in order to have deterministic page allocation. + + If a log checkpoint was completed at LSN earlier than our + mini-transaction commit and the server was killed, then + discarding the to-be-trimmed pages without flushing would + break crash recovery. So, we cannot avoid the write. */ + { + FlushObserver observer( + space, + UT_LIST_GET_FIRST(purge_sys.query->thrs)->graph->trx, + NULL); + buf_LRU_flush_or_remove_pages(space_id, &observer); + } + + log_free_check(); + + /* Adjust the tablespace metadata. */ + space = fil_truncate_prepare(space_id); + + if (!space) { + goto not_found; + } + + /* Undo tablespace always are a single file. */ + ut_a(UT_LIST_GET_LEN(space->chain) == 1); + fil_node_t* file = UT_LIST_GET_FIRST(space->chain); + /* The undo tablespace files are never closed. */ + ut_ad(file->is_open()); + + /* Re-initialize tablespace, in a single mini-transaction. */ + mtr_t mtr; + const ulint size = SRV_UNDO_TABLESPACE_SIZE_IN_PAGES; + mtr.start(); + mtr_x_lock(&space->latch, &mtr); + fil_truncate_log(space, size, &mtr); + fsp_header_init(space, size, &mtr); + mutex_enter(&fil_system.mutex); + space->size = file->size = size; + mutex_exit(&fil_system.mutex); + + buf_block_t* sys_header = trx_sysf_get(&mtr); + + for (ulint i = 0; i < undo_trunc->rsegs_size(); ++i) { + trx_rsegf_t* rseg_header; + + trx_rseg_t* rseg = undo_trunc->get_ith_rseg(i); + + rseg->page_no = trx_rseg_header_create( + space, rseg->id, sys_header, &mtr); + + rseg_header = trx_rsegf_get_new( + space_id, rseg->page_no, &mtr); + + /* Before re-initialization ensure that we free the existing + structure. There can't be any active transactions. */ + ut_a(UT_LIST_GET_LEN(rseg->undo_list) == 0); + ut_a(UT_LIST_GET_LEN(rseg->old_insert_list) == 0); + + trx_undo_t* next_undo; + + for (trx_undo_t* undo = UT_LIST_GET_FIRST(rseg->undo_cached); + undo != NULL; + undo = next_undo) { + + next_undo = UT_LIST_GET_NEXT(undo_list, undo); + UT_LIST_REMOVE(rseg->undo_cached, undo); + MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED); + ut_free(undo); + } + + UT_LIST_INIT(rseg->undo_list, &trx_undo_t::undo_list); + UT_LIST_INIT(rseg->undo_cached, &trx_undo_t::undo_list); + UT_LIST_INIT(rseg->old_insert_list, &trx_undo_t::undo_list); + + /* These were written by trx_rseg_header_create(). */ + ut_ad(!mach_read_from_4(rseg_header + TRX_RSEG_FORMAT)); + ut_ad(!mach_read_from_4(rseg_header + TRX_RSEG_HISTORY_SIZE)); + + /* Initialize the undo log lists according to the rseg header */ + rseg->curr_size = 1; + rseg->trx_ref_count = 0; + rseg->last_page_no = FIL_NULL; + rseg->last_offset = 0; + rseg->last_commit = 0; + rseg->needs_purge = false; + } + + mtr.commit(); + /* Write-ahead the redo log record. */ + log_write_up_to(mtr.commit_lsn(), true); + + /* Trim the file size. */ + os_file_truncate(file->name, file->handle, + os_offset_t(size) << srv_page_size_shift, true); + + /* This is only executed by the srv_coordinator_thread. */ + export_vars.innodb_undo_truncations++; + + /* TODO: PUNCH_HOLE the garbage (with write-ahead logging) */ + + mutex_enter(&fil_system.mutex); + ut_ad(space->stop_new_ops); + ut_ad(space->is_being_truncated); + space->stop_new_ops = false; + space->is_being_truncated = false; + mutex_exit(&fil_system.mutex); + if (purge_sys.rseg != NULL && purge_sys.rseg->last_page_no == FIL_NULL) { /* If purge_sys.rseg is pointing to rseg that was recently @@ -1017,14 +1064,11 @@ trx_purge_initiate_truncate( purge_sys.rseg = NULL; } - DBUG_EXECUTE_IF("ib_undo_trunc_before_ddl_log_end", - ib::info() << "ib_undo_trunc_before_ddl_log_end"; + DBUG_EXECUTE_IF("ib_undo_trunc", + ib::info() << "ib_undo_trunc"; + log_write_up_to(LSN_MAX, true); DBUG_SUICIDE();); - log_make_checkpoint_at(LSN_MAX, TRUE); - - undo_trunc->done_logging(space_id); - /* Completed truncate. Now it is safe to re-use the tablespace. */ for (ulint i = 0; i < undo_trunc->rsegs_size(); ++i) { trx_rseg_t* rseg = undo_trunc->get_ith_rseg(i); @@ -1035,10 +1079,6 @@ trx_purge_initiate_truncate( undo_trunc->reset(); undo::Truncate::clear_trunc_list(); - - DBUG_EXECUTE_IF("ib_undo_trunc_trunc_done", - ib::info() << "ib_undo_trunc_trunc_done"; - DBUG_SUICIDE();); } /** diff --git a/storage/innobase/trx/trx0rseg.cc b/storage/innobase/trx/trx0rseg.cc index 0e56af81ed4..199a1b69f0b 100644 --- a/storage/innobase/trx/trx0rseg.cc +++ b/storage/innobase/trx/trx0rseg.cc @@ -306,7 +306,9 @@ trx_rseg_header_create( /* Reset the undo log slots */ for (ulint i = 0; i < TRX_RSEG_N_SLOTS; i++) { - + /* FIXME: This is generating a lot of redo log. + Why not just let it remain zero-initialized, + and adjust trx_rsegf_undo_find_free() and friends? */ trx_rsegf_set_nth_undo(rsegf, i, FIL_NULL, mtr); } diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc index 63ec73fc2b3..0c31299486b 100644 --- a/storage/innobase/trx/trx0undo.cc +++ b/storage/innobase/trx/trx0undo.cc @@ -1684,87 +1684,3 @@ trx_undo_free_at_shutdown(trx_t *trx) undo = NULL; } } - -/** Truncate UNDO tablespace, reinitialize header and rseg. -@param[in] undo_trunc UNDO tablespace handler -@return true if success else false. */ -bool -trx_undo_truncate_tablespace( - undo::Truncate* undo_trunc) - -{ - fil_space_t* space = fil_space_acquire( - undo_trunc->get_marked_space_id()); - if (!space) return false; - - /* Step-1: Truncate tablespace. */ - if (!fil_truncate_tablespace( - space, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES)) { - space->release(); - return false; - } - - /* Step-2: Re-initialize tablespace header. - Avoid REDO logging as we don't want to apply the action if server - crashes. For fix-up we have UNDO-truncate-ddl-log. */ - mtr_t mtr; - mtr_start(&mtr); - mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO); - fsp_header_init(space, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr); - mtr_commit(&mtr); - - /* Step-3: Re-initialize rollback segment header that resides - in truncated tablespaced. */ - mtr_start(&mtr); - mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO); - mtr_x_lock(&space->latch, &mtr); - buf_block_t* sys_header = trx_sysf_get(&mtr); - - for (ulint i = 0; i < undo_trunc->rsegs_size(); ++i) { - trx_rsegf_t* rseg_header; - - trx_rseg_t* rseg = undo_trunc->get_ith_rseg(i); - - rseg->page_no = trx_rseg_header_create( - space, rseg->id, sys_header, &mtr); - - rseg_header = trx_rsegf_get_new(space->id, rseg->page_no, - &mtr); - - /* Before re-initialization ensure that we free the existing - structure. There can't be any active transactions. */ - ut_a(UT_LIST_GET_LEN(rseg->undo_list) == 0); - - trx_undo_t* next_undo; - - for (trx_undo_t* undo = UT_LIST_GET_FIRST(rseg->undo_cached); - undo != NULL; - undo = next_undo) { - - next_undo = UT_LIST_GET_NEXT(undo_list, undo); - UT_LIST_REMOVE(rseg->undo_cached, undo); - MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED); - ut_free(undo); - } - - UT_LIST_INIT(rseg->undo_list, &trx_undo_t::undo_list); - UT_LIST_INIT(rseg->undo_cached, &trx_undo_t::undo_list); - - /* Initialize the undo log lists according to the rseg header */ - rseg->curr_size = mtr_read_ulint( - rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, &mtr) - + 1; - - ut_ad(rseg->curr_size == 1); - - rseg->trx_ref_count = 0; - rseg->last_page_no = FIL_NULL; - rseg->last_offset = 0; - rseg->last_commit = 0; - rseg->needs_purge = false; - } - mtr_commit(&mtr); - space->release(); - - return true; -} diff --git a/storage/mroonga/ha_mroonga.cpp b/storage/mroonga/ha_mroonga.cpp index 55c6b5d330e..624232f0130 100644 --- a/storage/mroonga/ha_mroonga.cpp +++ b/storage/mroonga/ha_mroonga.cpp @@ -12874,13 +12874,22 @@ int ha_mroonga::delete_all_rows() int ha_mroonga::wrapper_truncate() { int error = 0; + MRN_SHARE *tmp_share; MRN_DBUG_ENTER_METHOD(); + + if (!(tmp_share = mrn_get_share(table->s->table_name.str, table, &error))) + DBUG_RETURN(error); + MRN_SET_WRAP_SHARE_KEY(share, table->s); MRN_SET_WRAP_TABLE_KEY(this, table); - error = wrap_handler->ha_truncate(); + error = parse_engine_table_options(ha_thd(), tmp_share->hton, table->s) + ? MRN_GET_ERROR_NUMBER + : wrap_handler->ha_truncate(); MRN_SET_BASE_SHARE_KEY(share, table->s); MRN_SET_BASE_TABLE_KEY(this, table); + mrn_free_share(tmp_share); + if (!error && wrapper_have_target_index()) { error = wrapper_truncate_index(); } |