From 3c279d9a5a45d734c7cdd2b641520e199e342f95 Mon Sep 17 00:00:00 2001 From: Davi Arnaut Date: Tue, 25 May 2010 17:01:38 -0300 Subject: Bug#42643: InnoDB does not support replication of TRUNCATE TABLE The problem was that TRUNCATE TABLE didn't take a exclusive lock on a table if it resorted to truncating via delete of all rows in the table. Specifically for InnoDB tables, this could break proper isolation as InnoDB ends up aborting some granted locks when truncating a table. The solution is to take a exclusive metadata lock before TRUNCATE TABLE can proceed. This guarantees that no other transaction is using the table. Incompatible change: Truncate via delete no longer fails if sql_safe_updates is activated (this was a undocumented side effect). --- sql/sql_truncate.cc | 464 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 464 insertions(+) create mode 100644 sql/sql_truncate.cc (limited to 'sql/sql_truncate.cc') diff --git a/sql/sql_truncate.cc b/sql/sql_truncate.cc new file mode 100644 index 00000000000..47a98769d57 --- /dev/null +++ b/sql/sql_truncate.cc @@ -0,0 +1,464 @@ +/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ + +#include "sql_truncate.h" +#include "sql_priv.h" +#include "transaction.h" +#include "debug_sync.h" +#include "records.h" // READ_RECORD +#include "table.h" // TABLE +#include "sql_class.h" // THD +#include "sql_base.h" // open_and_lock_tables +#include "sql_table.h" // write_bin_log +#include "sql_handler.h" // mysql_ha_rm_tables +#include "datadict.h" // dd_recreate_table() +#include "lock.h" // MYSQL_OPEN_TEMPORARY_ONLY + + +/* + Delete all rows of a locked table. + + @param thd Thread context. + @param table_list Table list element for the table. + @param rows_deleted Whether rows might have been deleted. + + @retval FALSE Success. + @retval TRUE Error. +*/ + +static bool +delete_all_rows(THD *thd, TABLE *table) +{ + int error; + READ_RECORD info; + bool is_bulk_delete; + bool some_rows_deleted= FALSE; + bool save_binlog_row_based= thd->is_current_stmt_binlog_format_row(); + DBUG_ENTER("delete_all_rows"); + + /* Replication of truncate table must be statement based. */ + thd->clear_current_stmt_binlog_format_row(); + + /* + Attempt to delete all rows in the table. + If it is unsupported, switch to row by row deletion. + */ + if (! (error= table->file->ha_delete_all_rows())) + goto end; + + if (error != HA_ERR_WRONG_COMMAND) + { + /* + If a transactional engine fails in the middle of deletion, + we expect it to be able to roll it back. Some reasons + for the engine to fail would be media failure or corrupted + data dictionary (i.e. in case of a partitioned table). We + have sufficiently strong metadata locks to rule out any + potential deadlocks. + + If a non-transactional engine fails here (that would + not be MyISAM, since MyISAM does TRUNCATE by recreate), + and binlog is on, replication breaks, since nothing gets + written to the binary log. (XXX: is this a bug?) + */ + table->file->print_error(error, MYF(0)); + goto end; + } + + /* + A workaround for Bug#53696 "Performance schema engine violates the + PSEA API by calling my_error()". + */ + if (thd->is_error()) + goto end; + + /* Handler didn't support fast delete. Delete rows one by one. */ + + init_read_record(&info, thd, table, NULL, TRUE, TRUE, FALSE); + + /* + Start bulk delete. If the engine does not support it, go on, + it's not an error. + */ + is_bulk_delete= ! table->file->start_bulk_delete(); + + table->mark_columns_needed_for_delete(); + + while (!(error= info.read_record(&info)) && !thd->killed) + { + if ((error= table->file->ha_delete_row(table->record[0]))) + { + table->file->print_error(error, MYF(0)); + break; + } + + some_rows_deleted= TRUE; + } + + /* HA_ERR_END_OF_FILE */ + if (error == -1) + error= 0; + + /* Close down the bulk delete. */ + if (is_bulk_delete) + { + int bulk_delete_error= table->file->end_bulk_delete(); + if (bulk_delete_error && !error) + { + table->file->print_error(bulk_delete_error, MYF(0)); + error= bulk_delete_error; + } + } + + end_read_record(&info); + + /* + Regardless of the error status, the query must be written to the + binary log if rows of the table is non-transactional. + */ + if (some_rows_deleted && !table->file->has_transactions()) + { + thd->transaction.stmt.modified_non_trans_table= TRUE; + thd->transaction.all.modified_non_trans_table= TRUE; + } + + if (error || thd->killed) + goto end; + + /* Truncate resets the auto-increment counter. */ + error= table->file->ha_reset_auto_increment(0); + if (error) + { + if (error != HA_ERR_WRONG_COMMAND) + table->file->print_error(error, MYF(0)); + else + error= 0; + } + +end: + if (save_binlog_row_based) + thd->set_current_stmt_binlog_format_row(); + + DBUG_RETURN(error); +} + + +/* + Close and recreate a temporary table. In case of success, + write truncate statement into the binary log if in statement + mode. + + @param thd Thread context. + @param table The temporary table. + + @retval FALSE Success. + @retval TRUE Error. +*/ + +static bool recreate_temporary_table(THD *thd, TABLE *table) +{ + bool error= TRUE; + TABLE_SHARE *share= table->s; + HA_CREATE_INFO create_info; + handlerton *table_type= table->s->db_type(); + DBUG_ENTER("recreate_temporary_table"); + + memset(&create_info, 0, sizeof(create_info)); + + table->file->info(HA_STATUS_AUTO | HA_STATUS_NO_LOCK); + + /* Don't free share. */ + close_temporary_table(thd, table, FALSE, FALSE); + + /* + We must use share->normalized_path.str since for temporary tables it + differs from what dd_recreate_table() would generate based + on table and schema names. + */ + ha_create_table(thd, share->normalized_path.str, share->db.str, + share->table_name.str, &create_info, 1); + + if (open_temporary_table(thd, share->path.str, share->db.str, + share->table_name.str, 1)) + { + error= FALSE; + thd->thread_specific_used= TRUE; + } + else + rm_temporary_table(table_type, share->path.str); + + free_table_share(share); + my_free(table, MYF(0)); + + DBUG_RETURN(error); +} + + +/* + Handle opening and locking if a base table for truncate. + + @param[in] thd Thread context. + @param[in] table_ref Table list element for the table to + be truncated. + @param[out] hton_can_recreate Set to TRUE if table can be dropped + and recreated. + @param[out] ticket_downgrade Set if a lock must be downgraded after + truncate is done. + + @retval FALSE Success. + @retval TRUE Error. +*/ + +static bool open_and_lock_table_for_truncate(THD *thd, TABLE_LIST *table_ref, + bool *hton_can_recreate, + MDL_ticket **ticket_downgrade) +{ + TABLE *table= NULL; + MDL_ticket *mdl_ticket= NULL; + DBUG_ENTER("open_and_lock_table_for_truncate"); + + /* + Before doing anything else, acquire a metadata lock on the table, + or ensure we have one. We don't use open_and_lock_tables() + right away because we want to be able to truncate (and recreate) + corrupted tables, those that we can't fully open. + + MySQL manual documents that TRUNCATE can be used to repair a + damaged table, i.e. a table that can not be fully "opened". + In particular MySQL manual says: As long as the table format + file tbl_name.frm is valid, the table can be re-created as + an empty table with TRUNCATE TABLE, even if the data or index + files have become corrupted. + */ + if (thd->locked_tables_mode) + { + if (!(table= find_table_for_mdl_upgrade(thd->open_tables, table_ref->db, + table_ref->table_name, FALSE))) + DBUG_RETURN(TRUE); + + *hton_can_recreate= ha_check_storage_engine_flag(table->s->db_type(), + HTON_CAN_RECREATE); + } + else + { + /* + Even though we could use the previous execution branch here just as + well, we must not try to open the table: + */ + MDL_request mdl_global_request, mdl_request; + MDL_request_list mdl_requests; + + mdl_global_request.init(MDL_key::GLOBAL, "", "", MDL_INTENTION_EXCLUSIVE); + mdl_request.init(MDL_key::TABLE, table_ref->db, table_ref->table_name, + MDL_SHARED_NO_READ_WRITE); + mdl_requests.push_front(&mdl_request); + mdl_requests.push_front(&mdl_global_request); + + if (thd->mdl_context.acquire_locks(&mdl_requests, + thd->variables.lock_wait_timeout)) + DBUG_RETURN(TRUE); + + mdl_ticket= mdl_request.ticket; + + if (dd_check_storage_engine_flag(thd, table_ref->db, table_ref->table_name, + HTON_CAN_RECREATE, hton_can_recreate)) + DBUG_RETURN(TRUE); + } + + DEBUG_SYNC(thd, "lock_table_for_truncate"); + + if (*hton_can_recreate) + { + /* + Acquire an exclusive lock. The storage engine can recreate the + table only if there are no references to it from anywhere, i.e. + no cached TABLE in the table cache. To remove the table from the + cache we need an exclusive lock. + */ + if (thd->locked_tables_mode) + { + if (wait_while_table_is_used(thd, table, HA_EXTRA_FORCE_REOPEN)) + DBUG_RETURN(TRUE); + *ticket_downgrade= table->mdl_ticket; + close_all_tables_for_name(thd, table->s, FALSE); + } + else + { + ulong timeout= thd->variables.lock_wait_timeout; + if (thd->mdl_context.upgrade_shared_lock_to_exclusive(mdl_ticket, timeout)) + DBUG_RETURN(TRUE); + mysql_mutex_lock(&LOCK_open); + tdc_remove_table(thd, TDC_RT_REMOVE_ALL, table_ref->db, + table_ref->table_name); + mysql_mutex_unlock(&LOCK_open); + } + } + else + { + /* + Can't recreate, we must mechanically delete all rows in + the table. Our metadata lock guarantees that no transaction + is reading or writing into the table. Yet, to open a write + cursor we need a thr_lock lock. Use open_and_lock_tables() + to do the necessary job. + */ + + /* Allow to open base tables only. */ + table_ref->required_type= FRMTYPE_TABLE; + /* We don't need to load triggers. */ + DBUG_ASSERT(table_ref->trg_event_map == 0); + /* Work around partition parser rules using alter table's. */ + if (thd->lex->alter_info.flags & ALTER_ADMIN_PARTITION) + { + table_ref->lock_type= TL_WRITE; + table_ref->mdl_request.set_type(MDL_SHARED_WRITE); + } + /* Ensure proper lock types (e.g. from the parser). */ + DBUG_ASSERT(table_ref->lock_type == TL_WRITE); + DBUG_ASSERT(table_ref->mdl_request.type == MDL_SHARED_WRITE); + + /* + Open the table as it will handle some required preparations. + Ignore pending FLUSH TABLES since we don't want to release + the MDL lock taken above and otherwise there is no way to + wait for FLUSH TABLES in deadlock-free fashion. + */ + if (open_and_lock_tables(thd, table_ref, TL_WRITE, + MYSQL_OPEN_IGNORE_FLUSH | + MYSQL_OPEN_SKIP_TEMPORARY)) + DBUG_RETURN(TRUE); + } + + DBUG_RETURN(FALSE); +} + + +/* + Optimized delete of all rows by doing a full generate of the table. + + @remark Will work even if the .MYI and .MYD files are destroyed. + In other words, it works as long as the .FRM is intact and + the engine supports re-create. + + @param thd Thread context. + @param table_ref Table list element for the table to be truncated. + + @retval FALSE Success. + @retval TRUE Error. +*/ + +bool mysql_truncate_table(THD *thd, TABLE_LIST *table_ref) +{ + TABLE *table; + bool error= TRUE, binlog_stmt; + MDL_ticket *mdl_ticket= NULL; + DBUG_ENTER("mysql_truncate_table"); + + /* Remove tables from the HANDLER's hash. */ + mysql_ha_rm_tables(thd, table_ref); + + /* If it is a temporary table, no need to take locks. */ + if ((table= find_temporary_table(thd, table_ref))) + { + /* In RBR, the statement is not binlogged if the table is temporary. */ + binlog_stmt= !thd->is_current_stmt_binlog_format_row(); + + /* Note that a temporary table cannot be partitioned. */ + if (ha_check_storage_engine_flag(table->s->db_type(), HTON_CAN_RECREATE)) + { + if ((error= recreate_temporary_table(thd, table))) + binlog_stmt= FALSE; /* No need to binlog failed truncate-by-recreate. */ + + DBUG_ASSERT(! thd->transaction.stmt.modified_non_trans_table); + } + else + { + /* + The engine does not support truncate-by-recreate. Open the + table and delete all rows. In such a manner this can in fact + open several tables if it's a temporary MyISAMMRG table. + */ + if (open_and_lock_tables(thd, table_ref, TL_WRITE, + MYSQL_OPEN_TEMPORARY_ONLY)) + DBUG_RETURN(TRUE); + + error= delete_all_rows(thd, table_ref->table); + } + + /* + No need to invalidate the query cache, queries with temporary + tables are not in the cache. No need to write to the binary + log a failed row-by-row delete even if under RBR as the table + might not exist on the slave. + */ + } + else /* It's not a temporary table. */ + { + bool hton_can_recreate; + + if (open_and_lock_table_for_truncate(thd, table_ref, + &hton_can_recreate, &mdl_ticket)) + DBUG_RETURN(TRUE); + + if (hton_can_recreate) + { + /* + The storage engine can truncate the table by creating an + empty table with the same structure. + */ + error= dd_recreate_table(thd, table_ref->db, table_ref->table_name); + + if (thd->locked_tables_mode && thd->locked_tables_list.reopen_tables(thd)) + thd->locked_tables_list.unlink_all_closed_tables(thd, NULL, 0); + + /* No need to binlog a failed truncate-by-recreate. */ + binlog_stmt= !error; + } + else + { + error= delete_all_rows(thd, table_ref->table); + + /* + Regardless of the error status, the query must be written to the + binary log if rows of a non-transactional table were deleted. + */ + binlog_stmt= !error || thd->transaction.stmt.modified_non_trans_table; + } + + query_cache_invalidate3(thd, table_ref, FALSE); + } + + /* DDL is logged in statement format, regardless of binlog format. */ + if (binlog_stmt) + error|= write_bin_log(thd, !error, thd->query(), thd->query_length()); + + /* + All effects of a TRUNCATE TABLE operation are rolled back if a row + by row deletion fails. Otherwise, it is automatically committed at + the end. + */ + if (error) + { + trans_rollback_stmt(thd); + trans_rollback(thd); + } + + if (mdl_ticket) + mdl_ticket->downgrade_exclusive_lock(MDL_SHARED_NO_READ_WRITE); + + DBUG_PRINT("exit", ("error: %d", error)); + DBUG_RETURN(test(error)); +} + -- cgit v1.2.1