diff options
author | Alfranio Correia <alfranio.correia@sun.com> | 2009-12-17 21:43:35 +0000 |
---|---|---|
committer | Alfranio Correia <alfranio.correia@sun.com> | 2009-12-17 21:43:35 +0000 |
commit | 3dadf9564cbfd6e584db282b9105b8f5fb38f99b (patch) | |
tree | fb25bfc9b6c669b42e70afadf7790f0f8d7db7e3 /sql | |
parent | e852131f048df0c48d6779c40b23a347fbccc107 (diff) | |
parent | e83c9f627377f05e093f092cee227da1b43d8e94 (diff) | |
download | mariadb-git-3dadf9564cbfd6e584db282b9105b8f5fb38f99b.tar.gz |
merge mysql-5.1-rep+3 --> mysql-5.1-rep+2-delivery1
Diffstat (limited to 'sql')
36 files changed, 1991 insertions, 1039 deletions
diff --git a/sql/event_db_repository.cc b/sql/event_db_repository.cc index 9f3863eb2b0..a6f249d286f 100644 --- a/sql/event_db_repository.cc +++ b/sql/event_db_repository.cc @@ -1052,8 +1052,8 @@ update_timing_fields_for_event(THD *thd, Turn off row binlogging of event timing updates. These are not used for RBR of events replicated to the slave. */ - if (thd->current_stmt_binlog_row_based) - thd->clear_current_stmt_binlog_row_based(); + if (thd->is_current_stmt_binlog_format_row()) + thd->clear_current_stmt_binlog_format_row(); DBUG_ASSERT(thd->security_ctx->master_access & SUPER_ACL); diff --git a/sql/events.cc b/sql/events.cc index 458ad61718d..b0b69b8c334 100644 --- a/sql/events.cc +++ b/sql/events.cc @@ -430,8 +430,8 @@ Events::create_event(THD *thd, Event_parse_data *parse_data, Turn off row binlogging of this statement and use statement-based so that all supporting tables are updated for CREATE EVENT command. */ - if (thd->current_stmt_binlog_row_based) - thd->clear_current_stmt_binlog_row_based(); + if (thd->is_current_stmt_binlog_format_row()) + thd->clear_current_stmt_binlog_format_row(); pthread_mutex_lock(&LOCK_event_metadata); @@ -563,8 +563,8 @@ Events::update_event(THD *thd, Event_parse_data *parse_data, Turn off row binlogging of this statement and use statement-based so that all supporting tables are updated for UPDATE EVENT command. */ - if (thd->current_stmt_binlog_row_based) - thd->clear_current_stmt_binlog_row_based(); + if (thd->is_current_stmt_binlog_format_row()) + thd->clear_current_stmt_binlog_format_row(); pthread_mutex_lock(&LOCK_event_metadata); @@ -660,8 +660,8 @@ Events::drop_event(THD *thd, LEX_STRING dbname, LEX_STRING name, bool if_exists) Turn off row binlogging of this statement and use statement-based so that all supporting tables are updated for DROP EVENT command. */ - if (thd->current_stmt_binlog_row_based) - thd->clear_current_stmt_binlog_row_based(); + if (thd->is_current_stmt_binlog_format_row()) + thd->clear_current_stmt_binlog_format_row(); pthread_mutex_lock(&LOCK_event_metadata); /* On error conditions my_error() is called so no need to handle here */ diff --git a/sql/ha_ndbcluster_binlog.cc b/sql/ha_ndbcluster_binlog.cc index 27af3f2cf2f..0421c0ecc15 100644 --- a/sql/ha_ndbcluster_binlog.cc +++ b/sql/ha_ndbcluster_binlog.cc @@ -302,6 +302,7 @@ static void run_query(THD *thd, char *buf, char *end, thd->transaction.all= save_thd_transaction_all; thd->transaction.stmt= save_thd_transaction_stmt; thd->net= save_thd_net; + thd->set_current_stmt_binlog_format_row(); if (thd == injector_thd) { @@ -1855,7 +1856,7 @@ static void ndb_binlog_query(THD *thd, Cluster_schema *schema) thd->db= schema->db; int errcode = query_error_code(thd, thd->killed == THD::NOT_KILLED); thd->binlog_query(THD::STMT_QUERY_TYPE, schema->query, - schema->query_length, FALSE, + schema->query_length, FALSE, TRUE, schema->name[0] == 0 || thd->db[0] == 0, errcode); thd->server_id= thd_server_id_save; @@ -3647,6 +3648,7 @@ pthread_handler_t ndb_binlog_thread_func(void *arg) thd= new THD; /* note that contructor of THD uses DBUG_ */ THD_CHECK_SENTRY(thd); + thd->set_current_stmt_binlog_format_row(); /* We need to set thd->thread_id before thd->store_globals, or it will set an invalid value for thd->variables.pseudo_thread_id. diff --git a/sql/ha_partition.cc b/sql/ha_partition.cc index 451631ff373..11ec8e8d176 100644 --- a/sql/ha_partition.cc +++ b/sql/ha_partition.cc @@ -6331,7 +6331,7 @@ void ha_partition::get_auto_increment(ulonglong offset, ulonglong increment, if (!auto_increment_safe_stmt_log_lock && thd->lex->sql_command != SQLCOM_INSERT && mysql_bin_log.is_open() && - !thd->current_stmt_binlog_row_based && + !thd->is_current_stmt_binlog_format_row() && (thd->options & OPTION_BIN_LOG)) { DBUG_PRINT("info", ("locking auto_increment_safe_stmt_log_lock")); diff --git a/sql/handler.cc b/sql/handler.cc index 2414ba64196..888aa86088b 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -2475,7 +2475,7 @@ int handler::update_auto_increment() variables->auto_increment_increment); auto_inc_intervals_count++; /* Row-based replication does not need to store intervals in binlog */ - if (mysql_bin_log.is_open() && !thd->current_stmt_binlog_row_based) + if (mysql_bin_log.is_open() && !thd->is_current_stmt_binlog_format_row()) thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(auto_inc_interval_for_cur_row.minimum(), auto_inc_interval_for_cur_row.values(), variables->auto_increment_increment); @@ -4472,7 +4472,7 @@ static bool check_table_binlog_row_based(THD *thd, TABLE *table) DBUG_ASSERT(table->s->cached_row_logging_check == 0 || table->s->cached_row_logging_check == 1); - return (thd->current_stmt_binlog_row_based && + return (thd->is_current_stmt_binlog_format_row() && table->s->cached_row_logging_check && (thd->options & OPTION_BIN_LOG) && mysql_bin_log.is_open()); @@ -4534,7 +4534,21 @@ static int write_locked_table_maps(THD *thd) if (table->current_lock == F_WRLCK && check_table_binlog_row_based(thd, table)) { - int const has_trans= table->file->has_transactions(); + /* + We need to have a transactional behavior for SQLCOM_CREATE_TABLE + (e.g. CREATE TABLE... SELECT * FROM TABLE) in order to keep a + compatible behavior with the STMT based replication even when + the table is not transactional. In other words, if the operation + fails while executing the insert phase nothing is written to the + binlog. + + Note that at this point, we check the type of a set of tables to + create the table map events. In the function binlog_log_row(), + which calls the current function, we check the type of the table + of the current row. + */ + bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE || + table->file->has_transactions(); int const error= thd->binlog_write_table_map(table, has_trans); /* If an error occurs, it is the responsibility of the caller to @@ -4583,10 +4597,20 @@ static int binlog_log_row(TABLE* table, { bitmap_set_all(&cols); if (likely(!(error= write_locked_table_maps(thd)))) - error= (*log_func)(thd, table, table->file->has_transactions(), - &cols, table->s->fields, + { + /* + We need to have a transactional behavior for SQLCOM_CREATE_TABLE + (i.e. CREATE TABLE... SELECT * FROM TABLE) in order to keep a + compatible behavior with the STMT based replication even when + the table is not transactional. In other words, if the operation + fails while executing the insert phase nothing is written to the + binlog. + */ + bool const has_trans= thd->lex->sql_command == SQLCOM_CREATE_TABLE || + table->file->has_transactions(); + error= (*log_func)(thd, table, has_trans, &cols, table->s->fields, before_record, after_record); - + } if (!use_bitbuf) bitmap_free(&cols); } diff --git a/sql/item_create.cc b/sql/item_create.cc index 53aa8081da1..323cdfc6492 100644 --- a/sql/item_create.cc +++ b/sql/item_create.cc @@ -2375,10 +2375,11 @@ Create_udf_func::create(THD *thd, udf_func *udf, List<Item> *item_list) Item *func= NULL; int arg_count= 0; + DBUG_ENTER("Create_udf_func::create"); if (item_list != NULL) arg_count= item_list->elements; - thd->lex->set_stmt_unsafe(); + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_UDF); DBUG_ASSERT( (udf->type == UDFTYPE_FUNCTION) || (udf->type == UDFTYPE_AGGREGATE)); @@ -2462,7 +2463,7 @@ Create_udf_func::create(THD *thd, udf_func *udf, List<Item> *item_list) } } thd->lex->safe_to_cache_query= 0; - return func; + DBUG_RETURN(func); } #endif @@ -3363,9 +3364,10 @@ Create_func_found_rows Create_func_found_rows::s_singleton; Item* Create_func_found_rows::create(THD *thd) { - thd->lex->set_stmt_unsafe(); + DBUG_ENTER("Create_func_found_rows::create"); + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); thd->lex->safe_to_cache_query= 0; - return new (thd->mem_root) Item_func_found_rows(); + DBUG_RETURN(new (thd->mem_root) Item_func_found_rows()); } @@ -3524,7 +3526,7 @@ Create_func_get_lock Create_func_get_lock::s_singleton; Item* Create_func_get_lock::create(THD *thd, Item *arg1, Item *arg2) { - thd->lex->set_stmt_unsafe(); + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT); return new (thd->mem_root) Item_func_get_lock(arg1, arg2); } @@ -3636,7 +3638,7 @@ Create_func_is_free_lock Create_func_is_free_lock::s_singleton; Item* Create_func_is_free_lock::create(THD *thd, Item *arg1) { - thd->lex->set_stmt_unsafe(); + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT); return new (thd->mem_root) Item_func_is_free_lock(arg1); } @@ -3647,7 +3649,7 @@ Create_func_is_used_lock Create_func_is_used_lock::s_singleton; Item* Create_func_is_used_lock::create(THD *thd, Item *arg1) { - thd->lex->set_stmt_unsafe(); + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT); return new (thd->mem_root) Item_func_is_used_lock(arg1); } @@ -3794,9 +3796,10 @@ Create_func_load_file Create_func_load_file::s_singleton; Item* Create_func_load_file::create(THD *thd, Item *arg1) { - thd->lex->set_stmt_unsafe(); + DBUG_ENTER("Create_func_load_file::create"); + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT); - return new (thd->mem_root) Item_load_file(arg1); + DBUG_RETURN(new (thd->mem_root) Item_load_file(arg1)); } @@ -3964,7 +3967,7 @@ Create_func_master_pos_wait::create_native(THD *thd, LEX_STRING name, Item *func= NULL; int arg_count= 0; - thd->lex->set_stmt_unsafe(); + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); if (item_list != NULL) arg_count= item_list->elements; @@ -4208,7 +4211,7 @@ Create_func_release_lock Create_func_release_lock::s_singleton; Item* Create_func_release_lock::create(THD *thd, Item *arg1) { - thd->lex->set_stmt_unsafe(); + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT); return new (thd->mem_root) Item_func_release_lock(arg1); } @@ -4266,9 +4269,10 @@ Create_func_row_count Create_func_row_count::s_singleton; Item* Create_func_row_count::create(THD *thd) { - thd->lex->set_stmt_unsafe(); + DBUG_ENTER("Create_func_row_count::create"); + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); thd->lex->safe_to_cache_query= 0; - return new (thd->mem_root) Item_func_row_count(); + DBUG_RETURN(new (thd->mem_root) Item_func_row_count()); } @@ -4331,7 +4335,7 @@ Create_func_sleep Create_func_sleep::s_singleton; Item* Create_func_sleep::create(THD *thd, Item *arg1) { - thd->lex->set_stmt_unsafe(); + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); thd->lex->uncacheable(UNCACHEABLE_SIDEEFFECT); return new (thd->mem_root) Item_func_sleep(arg1); } @@ -4576,9 +4580,10 @@ Create_func_uuid Create_func_uuid::s_singleton; Item* Create_func_uuid::create(THD *thd) { - thd->lex->set_stmt_unsafe(); + DBUG_ENTER("Create_func_uuid::create"); + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); thd->lex->safe_to_cache_query= 0; - return new (thd->mem_root) Item_func_uuid(); + DBUG_RETURN(new (thd->mem_root) Item_func_uuid()); } @@ -4587,9 +4592,10 @@ Create_func_uuid_short Create_func_uuid_short::s_singleton; Item* Create_func_uuid_short::create(THD *thd) { - thd->lex->set_stmt_unsafe(); + DBUG_ENTER("Create_func_uuid_short::create"); + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); thd->lex->safe_to_cache_query= 0; - return new (thd->mem_root) Item_func_uuid_short(); + DBUG_RETURN(new (thd->mem_root) Item_func_uuid_short()); } @@ -4598,7 +4604,7 @@ Create_func_version Create_func_version::s_singleton; Item* Create_func_version::create(THD *thd) { - thd->lex->set_stmt_unsafe(); + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); return new (thd->mem_root) Item_static_string_func("version()", server_version, (uint) strlen(server_version), diff --git a/sql/log.cc b/sql/log.cc index 5a7d3368abd..c3823336b5d 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -177,115 +177,155 @@ private: }; /* - Helper class to store binary log transaction data. + Helper classes to store non-transactional and transactional data + before copying it to the binary log. */ -class binlog_trx_data { +class binlog_cache_data +{ public: - binlog_trx_data() - : at_least_one_stmt_committed(0), incident(FALSE), m_pending(0), - before_stmt_pos(MY_OFF_T_UNDEF) + binlog_cache_data(): m_pending(0), before_stmt_pos (MY_OFF_T_UNDEF), + incident(FALSE) { - trans_log.end_of_file= max_binlog_cache_size; + cache_log.end_of_file= max_binlog_cache_size; } - ~binlog_trx_data() + ~binlog_cache_data() { - DBUG_ASSERT(pending() == NULL); - close_cached_file(&trans_log); + DBUG_ASSERT(empty()); + close_cached_file(&cache_log); } - my_off_t position() const { - return my_b_tell(&trans_log); + bool empty() const + { + return pending() == NULL && my_b_tell(&cache_log) == 0; } - bool empty() const + Rows_log_event *pending() const { - return pending() == NULL && my_b_tell(&trans_log) == 0; + return m_pending; } - /* - Truncate the transaction cache to a certain position. This - includes deleting the pending event. - */ - void truncate(my_off_t pos) + void set_pending(Rows_log_event *const pending) { - DBUG_PRINT("info", ("truncating to position %lu", (ulong) pos)); - DBUG_PRINT("info", ("before_stmt_pos=%lu", (ulong) pos)); - if (pending()) - { - delete pending(); - } - set_pending(0); - reinit_io_cache(&trans_log, WRITE_CACHE, pos, 0, 0); - trans_log.end_of_file= max_binlog_cache_size; - if (pos < before_stmt_pos) - before_stmt_pos= MY_OFF_T_UNDEF; + m_pending= pending; + } - /* - The only valid positions that can be truncated to are at the - beginning of a statement. We are relying on this fact to be able - to set the at_least_one_stmt_committed flag correctly. In other word, if - we are truncating to the beginning of the transaction cache, - there will be no statements in the cache, otherwhise, we will - have at least one statement in the transaction cache. - */ - at_least_one_stmt_committed= (pos > 0); + void set_incident(void) + { + incident= TRUE; + } + + bool has_incident(void) + { + return(incident); } - /* - Reset the entire contents of the transaction cache, emptying it - completely. - */ - void reset() { - if (!empty()) - truncate(0); - before_stmt_pos= MY_OFF_T_UNDEF; + void reset() + { + truncate(0); incident= FALSE; - trans_log.end_of_file= max_binlog_cache_size; + before_stmt_pos= MY_OFF_T_UNDEF; + cache_log.end_of_file= max_binlog_cache_size; DBUG_ASSERT(empty()); } - Rows_log_event *pending() const + my_off_t get_byte_position() const { - return m_pending; + return my_b_tell(&cache_log); } - void set_pending(Rows_log_event *const pending) + my_off_t get_prev_position() { - m_pending= pending; + return(before_stmt_pos); } - IO_CACHE trans_log; // The transaction cache - - void set_incident(void) + void set_prev_position(my_off_t pos) { - incident= TRUE; + before_stmt_pos= pos; } - bool has_incident(void) + void restore_prev_position() { - return(incident); + truncate(before_stmt_pos); } - /** - Boolean that is true if there is at least one statement in the - transaction cache. + void restore_savepoint(my_off_t pos) + { + truncate(pos); + if (pos < before_stmt_pos) + before_stmt_pos= MY_OFF_T_UNDEF; + } + + /* + Cache to store data before copying it to the binary log. */ - bool at_least_one_stmt_committed; - bool incident; + IO_CACHE cache_log; private: /* - Pending binrows event. This event is the event where the rows are - currently written. + Pending binrows event. This event is the event where the rows are currently + written. */ Rows_log_event *m_pending; -public: /* Binlog position before the start of the current statement. */ my_off_t before_stmt_pos; + + /* + This indicates that some events did not get into the cache and most likely + it is corrupted. + */ + bool incident; + + /* + It truncates the cache to a certain position. This includes deleting the + pending event. + */ + void truncate(my_off_t pos) + { + DBUG_PRINT("info", ("truncating to position %lu", (ulong) pos)); + if (pending()) + { + delete pending(); + set_pending(0); + } + reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, 0); + cache_log.end_of_file= max_binlog_cache_size; + } + + binlog_cache_data& operator=(const binlog_cache_data& info); + binlog_cache_data(const binlog_cache_data& info); +}; + +class binlog_cache_mngr { +public: + binlog_cache_mngr() {} + + void reset_cache(binlog_cache_data* cache_data) + { + cache_data->reset(); + } + + binlog_cache_data* get_binlog_cache_data(bool is_transactional) + { + return (is_transactional ? &trx_cache : &stmt_cache); + } + + IO_CACHE* get_binlog_cache_log(bool is_transactional) + { + return (is_transactional ? &trx_cache.cache_log : &stmt_cache.cache_log); + } + + binlog_cache_data stmt_cache; + + binlog_cache_data trx_cache; + +private: + + binlog_cache_mngr& operator=(const binlog_cache_mngr& info); + binlog_cache_mngr(const binlog_cache_mngr& info); }; handlerton *binlog_hton; @@ -1341,26 +1381,6 @@ int LOGGER::set_handlers(uint error_log_printer, return 0; } -/** - This function checks if a transactional talbe was updated by the - current statement. - - @param thd The client thread that executed the current statement. - @return - @c true if a transactional table was updated, @false otherwise. -*/ -static bool stmt_has_updated_trans_table(THD *thd) -{ - Ha_trx_info *ha_info; - - for (ha_info= thd->transaction.stmt.ha_list; ha_info && ha_info->is_started(); ha_info= ha_info->next()) - { - if (ha_info->is_trx_read_write() && ha_info->ht() != binlog_hton) - return (TRUE); - } - return (FALSE); -} - /* Save position of binary log transaction cache. @@ -1383,10 +1403,10 @@ binlog_trans_log_savepos(THD *thd, my_off_t *pos) DBUG_ASSERT(pos != NULL); if (thd_get_ha_data(thd, binlog_hton) == NULL) thd->binlog_setup_trx_data(); - binlog_trx_data *const trx_data= - (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton); + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); DBUG_ASSERT(mysql_bin_log.is_open()); - *pos= trx_data->position(); + *pos= cache_mngr->trx_cache.get_byte_position(); DBUG_PRINT("return", ("*pos: %lu", (ulong) *pos)); DBUG_VOID_RETURN; } @@ -1417,9 +1437,9 @@ binlog_trans_log_truncate(THD *thd, my_off_t pos) /* Only true if binlog_trans_log_savepos() wasn't called before */ DBUG_ASSERT(pos != ~(my_off_t) 0); - binlog_trx_data *const trx_data= - (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton); - trx_data->truncate(pos); + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + cache_mngr->trx_cache.restore_savepoint(pos); DBUG_VOID_RETURN; } @@ -1448,115 +1468,127 @@ int binlog_init(void *p) static int binlog_close_connection(handlerton *hton, THD *thd) { - binlog_trx_data *const trx_data= - (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton); - DBUG_ASSERT(trx_data->empty()); + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + DBUG_ASSERT(cache_mngr->trx_cache.empty() && cache_mngr->stmt_cache.empty()); thd_set_ha_data(thd, binlog_hton, NULL); - trx_data->~binlog_trx_data(); - my_free((uchar*)trx_data, MYF(0)); + cache_mngr->~binlog_cache_mngr(); + my_free((uchar*)cache_mngr, MYF(0)); return 0; } -/* - End a transaction. +/** + This function flushes a transactional cache upon commit/rollback. - SYNOPSIS - binlog_end_trans() + @param thd The thread whose transaction should be flushed + @param cache_mngr Pointer to the cache data to be flushed + @param end_ev The end event either commit/rollback. - thd The thread whose transaction should be ended - trx_data Pointer to the transaction data to use - end_ev The end event to use, or NULL - all True if the entire transaction should be ended, false if - only the statement transaction should be ended. + @return + nonzero if an error pops up when flushing the transactional cache. +*/ +static int +binlog_flush_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr, + Log_event *end_ev) +{ + DBUG_ENTER("binlog_flush_trx_cache"); + int error=0; + IO_CACHE *cache_log= &cache_mngr->trx_cache.cache_log; - DESCRIPTION + /* + This function handles transactional changes and as such + this flag equals to true. + */ + bool const is_transactional= TRUE; - End the currently open transaction. The transaction can be either - a real transaction (if 'all' is true) or a statement transaction - (if 'all' is false). + if (thd->binlog_flush_pending_rows_event(TRUE, is_transactional)) + DBUG_RETURN(1); + /* + Doing a commit or a rollback including non-transactional tables, + i.e., ending a transaction where we might write the transaction + cache to the binary log. + + We can always end the statement when ending a transaction since + transactions are not allowed inside stored functions. If they + were, we would have to ensure that we're not ending a statement + inside a stored function. + */ + error= mysql_bin_log.write(thd, &cache_mngr->trx_cache.cache_log, end_ev, + cache_mngr->trx_cache.has_incident()); + cache_mngr->reset_cache(&cache_mngr->trx_cache); - If 'end_ev' is NULL, the transaction is a rollback of only - transactional tables, so the transaction cache will be truncated - to either just before the last opened statement transaction (if - 'all' is false), or reset completely (if 'all' is true). - */ + /* + We need to step the table map version after writing the + transaction cache to disk. + */ + mysql_bin_log.update_table_map_version(); + statistic_increment(binlog_cache_use, &LOCK_status); + if (cache_log->disk_writes != 0) + { + statistic_increment(binlog_cache_disk_use, &LOCK_status); + cache_log->disk_writes= 0; + } + + DBUG_ASSERT(cache_mngr->trx_cache.empty()); + DBUG_RETURN(error); +} + +/** + This function truncates the transactional cache upon committing or rolling + back either a transaction or a statement. + + @param thd The thread whose transaction should be flushed + @param cache_mngr Pointer to the cache data to be flushed + @param all @c true means truncate the transaction, otherwise the + statement must be truncated. + + @return + nonzero if an error pops up when truncating the transactional cache. +*/ static int -binlog_end_trans(THD *thd, binlog_trx_data *trx_data, - Log_event *end_ev, bool all) +binlog_truncate_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr, bool all) { - DBUG_ENTER("binlog_end_trans"); + DBUG_ENTER("binlog_truncate_trx_cache"); int error=0; - IO_CACHE *trans_log= &trx_data->trans_log; - DBUG_PRINT("enter", ("transaction: %s end_ev: 0x%lx", - all ? "all" : "stmt", (long) end_ev)); - DBUG_PRINT("info", ("thd->options={ %s%s}", - FLAGSTR(thd->options, OPTION_NOT_AUTOCOMMIT), - FLAGSTR(thd->options, OPTION_BEGIN))); + /* + This function handles transactional changes and as such this flag + equals to true. + */ + bool const is_transactional= TRUE; + DBUG_PRINT("info", ("thd->options={ %s%s}, transaction: %s", + FLAGSTR(thd->options, OPTION_NOT_AUTOCOMMIT), + FLAGSTR(thd->options, OPTION_BEGIN), + all ? "all" : "stmt")); /* - NULL denotes ROLLBACK with nothing to replicate: i.e., rollback of - only transactional tables. If the transaction contain changes to - any non-transactiona tables, we need write the transaction and log - a ROLLBACK last. + If rolling back an entire transaction or a single statement not + inside a transaction, we reset the transaction cache. */ - if (end_ev != NULL) + thd->binlog_remove_pending_rows_event(TRUE, is_transactional); + if (all || !thd->in_multi_stmt_transaction()) { - if (thd->binlog_flush_pending_rows_event(TRUE)) - DBUG_RETURN(1); - /* - Doing a commit or a rollback including non-transactional tables, - i.e., ending a transaction where we might write the transaction - cache to the binary log. - - We can always end the statement when ending a transaction since - transactions are not allowed inside stored functions. If they - were, we would have to ensure that we're not ending a statement - inside a stored function. - */ - error= mysql_bin_log.write(thd, &trx_data->trans_log, end_ev, - trx_data->has_incident()); - trx_data->reset(); + if (cache_mngr->trx_cache.has_incident()) + error= mysql_bin_log.write_incident(thd, TRUE); - /* - We need to step the table map version after writing the - transaction cache to disk. - */ - mysql_bin_log.update_table_map_version(); - statistic_increment(binlog_cache_use, &LOCK_status); - if (trans_log->disk_writes != 0) - { - statistic_increment(binlog_cache_disk_use, &LOCK_status); - trans_log->disk_writes= 0; - } + cache_mngr->reset_cache(&cache_mngr->trx_cache); + + thd->clear_binlog_table_maps(); } + /* + If rolling back a statement in a transaction, we truncate the + transaction cache to remove the statement. + */ else - { - /* - If rolling back an entire transaction or a single statement not - inside a transaction, we reset the transaction cache. + cache_mngr->trx_cache.restore_prev_position(); - If rolling back a statement in a transaction, we truncate the - transaction cache to remove the statement. - */ - thd->binlog_remove_pending_rows_event(TRUE); - if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT))) - { - if (trx_data->has_incident()) - mysql_bin_log.write_incident(thd, TRUE); - trx_data->reset(); - } - else // ...statement - trx_data->truncate(trx_data->before_stmt_pos); - - /* - We need to step the table map version on a rollback to ensure - that a new table map event is generated instead of the one that - was written to the thrown-away transaction cache. - */ - mysql_bin_log.update_table_map_version(); - } + /* + We need to step the table map version on a rollback to ensure that a new + table map event is generated instead of the one that was written to the + thrown-away transaction cache. + */ + mysql_bin_log.update_table_map_version(); - DBUG_ASSERT(thd->binlog_get_pending_rows_event() == NULL); + DBUG_ASSERT(thd->binlog_get_pending_rows_event(is_transactional) == NULL); DBUG_RETURN(error); } @@ -1572,10 +1604,56 @@ static int binlog_prepare(handlerton *hton, THD *thd, bool all) } /** + This function flushes the non-transactional to the binary log upon + committing or rolling back a statement. + + @param thd The thread whose transaction should be flushed + @param cache_mngr Pointer to the cache data to be flushed + + @return + nonzero if an error pops up when flushing the non-transactional cache. +*/ +static int +binlog_flush_stmt_cache(THD *thd, binlog_cache_mngr *cache_mngr) +{ + int error= 0; + DBUG_ENTER("binlog_flush_stmt_cache"); + /* + If we are flushing the statement cache, it means that the changes get + through otherwise the cache is empty and this routine should not be called. + */ + DBUG_ASSERT(cache_mngr->stmt_cache.has_incident() == FALSE); + /* + This function handles non-transactional changes and as such this flag equals + to false. + */ + bool const is_transactional= FALSE; + IO_CACHE *cache_log= &cache_mngr->stmt_cache.cache_log; + thd->binlog_flush_pending_rows_event(TRUE, is_transactional); + Query_log_event qev(thd, STRING_WITH_LEN("COMMIT"), TRUE, FALSE, TRUE, 0); + if ((error= mysql_bin_log.write(thd, cache_log, &qev, + cache_mngr->stmt_cache.has_incident()))) + DBUG_RETURN(error); + cache_mngr->reset_cache(&cache_mngr->stmt_cache); + + /* + We need to step the table map version after writing the + transaction cache to disk. + */ + mysql_bin_log.update_table_map_version(); + statistic_increment(binlog_cache_use, &LOCK_status); + if (cache_log->disk_writes != 0) + { + statistic_increment(binlog_cache_disk_use, &LOCK_status); + cache_log->disk_writes= 0; + } + DBUG_RETURN(error); +} + +/** This function is called once after each statement. - It has the responsibility to flush the transaction cache to the - binlog file on commits. + It has the responsibility to flush the caches to the binary log on commits. @param hton The binlog handlerton. @param thd The client thread that executes the transaction. @@ -1588,57 +1666,53 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all) { int error= 0; DBUG_ENTER("binlog_commit"); - binlog_trx_data *const trx_data= - (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton); + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + bool const in_transaction= thd->in_multi_stmt_transaction(); - if (trx_data->empty()) + DBUG_PRINT("debug", + ("all: %d, in_transaction: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s", + all, + YESNO(in_transaction), + YESNO(thd->transaction.all.modified_non_trans_table), + YESNO(thd->transaction.stmt.modified_non_trans_table))); + + if (!cache_mngr->stmt_cache.empty()) + { + binlog_flush_stmt_cache(thd, cache_mngr); + } + + if (cache_mngr->trx_cache.empty()) { - // we're here because trans_log was flushed in MYSQL_BIN_LOG::log_xid() - trx_data->reset(); + /* + we're here because cache_log was flushed in MYSQL_BIN_LOG::log_xid() + */ + cache_mngr->reset_cache(&cache_mngr->trx_cache); DBUG_RETURN(0); } /* We commit the transaction if: - - We are not in a transaction and committing a statement, or - - - We are in a transaction and a full transaction is committed - - Otherwise, we accumulate the statement + - We are in a transaction and a full transaction is committed. + Otherwise, we accumulate the changes. */ - ulonglong const in_transaction= - thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN); - DBUG_PRINT("debug", - ("all: %d, empty: %s, in_transaction: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s", - all, - YESNO(trx_data->empty()), - YESNO(in_transaction), - YESNO(thd->transaction.all.modified_non_trans_table), - YESNO(thd->transaction.stmt.modified_non_trans_table))); - if (!in_transaction || all || - (!all && !trx_data->at_least_one_stmt_committed && - !stmt_has_updated_trans_table(thd) && - thd->transaction.stmt.modified_non_trans_table)) + if (!in_transaction || all) { - Query_log_event qev(thd, STRING_WITH_LEN("COMMIT"), TRUE, TRUE, 0); - error= binlog_end_trans(thd, trx_data, &qev, all); + Query_log_event qev(thd, STRING_WITH_LEN("COMMIT"), TRUE, FALSE, TRUE, 0); + error= binlog_flush_trx_cache(thd, cache_mngr, &qev); } - trx_data->at_least_one_stmt_committed = my_b_tell(&trx_data->trans_log) > 0; - + /* + This is part of the stmt rollback. + */ if (!all) - trx_data->before_stmt_pos = MY_OFF_T_UNDEF; // part of the stmt commit + cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF); DBUG_RETURN(error); } /** - This function is called when a transaction involving a transactional - table is rolled back. - - It has the responsibility to flush the transaction cache to the - binlog file. However, if the transaction does not involve - non-transactional tables, nothing needs to be logged. + This function is called when a transaction or a statement is rolled back. @param hton The binlog handlerton. @param thd The client thread that executes the transaction. @@ -1651,18 +1725,38 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all) { DBUG_ENTER("binlog_rollback"); int error=0; - binlog_trx_data *const trx_data= - (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton); - - if (trx_data->empty()) { - trx_data->reset(); - DBUG_RETURN(0); - } + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); DBUG_PRINT("debug", ("all: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s", YESNO(all), YESNO(thd->transaction.all.modified_non_trans_table), YESNO(thd->transaction.stmt.modified_non_trans_table))); + + /* + If an incident event is set we do not flush the content of the statement + cache because it may be corrupted. + */ + if (cache_mngr->stmt_cache.has_incident()) + { + mysql_bin_log.write_incident(thd, TRUE); + cache_mngr->reset_cache(&cache_mngr->stmt_cache); + } + else if (!cache_mngr->stmt_cache.empty()) + { + binlog_flush_stmt_cache(thd, cache_mngr); + } + + if (cache_mngr->trx_cache.empty()) + { + /* + we're here because cache_log was flushed in MYSQL_BIN_LOG::log_xid() + */ + cache_mngr->reset_cache(&cache_mngr->trx_cache); + DBUG_RETURN(0); + } + + if (mysql_bin_log.check_write_error(thd)) { /* @@ -1673,52 +1767,46 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all) */ DBUG_ASSERT(!all); /* - We reach this point if either only transactional tables were modified or - the effect of a statement that did not get into the binlog needs to be - rolled back. In the latter case, if a statement changed non-transactional - tables or had the OPTION_KEEP_LOG associated, we write an incident event - to the binlog in order to stop slaves and notify users that some changes - on the master did not get into the binlog and slaves will be inconsistent. - On the other hand, if a statement is transactional, we just safely roll it - back. + We reach this point if the effect of a statement did not properly get into + a cache and need to be rolled back. */ - if ((thd->transaction.stmt.modified_non_trans_table || - (thd->options & OPTION_KEEP_LOG)) && - mysql_bin_log.check_write_error(thd)) - trx_data->set_incident(); - error= binlog_end_trans(thd, trx_data, 0, all); + error= binlog_truncate_trx_cache(thd, cache_mngr, all); } else - { - /* - We flush the cache with a rollback, wrapped in a beging/rollback if: - . aborting a transaction that modified a non-transactional table; + { + /* + We flush the cache wrapped in a beging/rollback if: + . aborting a transcation that modified a non-transactional table or; . aborting a statement that modified both transactional and - non-transactional tables but which is not in the boundaries of any - transaction or there was no early change; + non-transctional tables but which is not in the boundaries of any + transaction; . the OPTION_KEEP_LOG is activate. */ - if ((all && thd->transaction.all.modified_non_trans_table) || + if (thd->variables.binlog_format == BINLOG_FORMAT_STMT && + ((all && thd->transaction.all.modified_non_trans_table) || (!all && thd->transaction.stmt.modified_non_trans_table && - !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT))) || - (!all && thd->transaction.stmt.modified_non_trans_table && - !trx_data->at_least_one_stmt_committed && - thd->current_stmt_binlog_row_based) || - ((thd->options & OPTION_KEEP_LOG))) + !thd->in_multi_stmt_transaction()) || + (thd->options & OPTION_KEEP_LOG))) { - Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, TRUE, 0); - error= binlog_end_trans(thd, trx_data, &qev, all); + Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, FALSE, TRUE, 0); + error= binlog_flush_trx_cache(thd, cache_mngr, &qev); } /* Otherwise, we simply truncate the cache as there is no change on non-transactional tables as follows. */ - else if ((all && !thd->transaction.all.modified_non_trans_table) || - (!all && !thd->transaction.stmt.modified_non_trans_table)) - error= binlog_end_trans(thd, trx_data, 0, all); + else if (all || (!all && + (!thd->transaction.stmt.modified_non_trans_table || + !thd->in_multi_stmt_transaction() || + thd->variables.binlog_format != BINLOG_FORMAT_STMT))) + error= binlog_truncate_trx_cache(thd, cache_mngr, all); } + + /* + This is part of the stmt rollback. + */ if (!all) - trx_data->before_stmt_pos = MY_OFF_T_UNDEF; // part of the stmt rollback + cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF); DBUG_RETURN(error); } @@ -1794,7 +1882,8 @@ static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv) int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED); int const error= thd->binlog_query(THD::STMT_QUERY_TYPE, - thd->query(), thd->query_length(), TRUE, FALSE, errcode); + thd->query(), thd->query_length(), TRUE, FALSE, FALSE, + errcode); DBUG_RETURN(error); } @@ -1813,7 +1902,8 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv) int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED); int error= thd->binlog_query(THD::STMT_QUERY_TYPE, - thd->query(), thd->query_length(), TRUE, FALSE, errcode); + thd->query(), thd->query_length(), TRUE, FALSE, FALSE, + errcode); DBUG_RETURN(error); } binlog_trans_log_truncate(thd, *(my_off_t*)sv); @@ -3876,27 +3966,67 @@ bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param) int THD::binlog_setup_trx_data() { DBUG_ENTER("THD::binlog_setup_trx_data"); - binlog_trx_data *trx_data= - (binlog_trx_data*) thd_get_ha_data(this, binlog_hton); + binlog_cache_mngr *cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton); - if (trx_data) + if (cache_mngr) DBUG_RETURN(0); // Already set up - trx_data= (binlog_trx_data*) my_malloc(sizeof(binlog_trx_data), MYF(MY_ZEROFILL)); - if (!trx_data || - open_cached_file(&trx_data->trans_log, mysql_tmpdir, + cache_mngr= (binlog_cache_mngr*) my_malloc(sizeof(binlog_cache_mngr), MYF(MY_ZEROFILL)); + if (!cache_mngr || + open_cached_file(&cache_mngr->stmt_cache.cache_log, mysql_tmpdir, + LOG_PREFIX, binlog_cache_size, MYF(MY_WME)) || + open_cached_file(&cache_mngr->trx_cache.cache_log, mysql_tmpdir, LOG_PREFIX, binlog_cache_size, MYF(MY_WME))) { - my_free((uchar*)trx_data, MYF(MY_ALLOW_ZERO_PTR)); + my_free((uchar*)cache_mngr, MYF(MY_ALLOW_ZERO_PTR)); DBUG_RETURN(1); // Didn't manage to set it up } - thd_set_ha_data(this, binlog_hton, trx_data); + thd_set_ha_data(this, binlog_hton, cache_mngr); - trx_data= new (thd_get_ha_data(this, binlog_hton)) binlog_trx_data; + cache_mngr= new (thd_get_ha_data(this, binlog_hton)) binlog_cache_mngr; DBUG_RETURN(0); } +/** + This function checks if a transactional talbe was updated by the + current transaction. + + @param thd The client thread that executed the current statement. + @return + @c true if a transactional table was updated, @false otherwise. +*/ +bool +trans_has_updated_trans_table(THD* thd) +{ + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + + return (cache_mngr ? my_b_tell (&cache_mngr->trx_cache.cache_log) : 0); +} + +/** + This function checks if a transactional talbe was updated by the + current statement. + + @param thd The client thread that executed the current statement. + @return + @c true if a transactional table was updated, @false otherwise. +*/ +bool +stmt_has_updated_trans_table(THD *thd) +{ + Ha_trx_info *ha_info; + + for (ha_info= thd->transaction.stmt.ha_list; ha_info; ha_info= ha_info->next()) + { + if (ha_info->is_trx_read_write() && ha_info->ht() != binlog_hton) + return (TRUE); + } + return (FALSE); +} + /* Function to start a statement and optionally a transaction for the binary log. @@ -3910,11 +4040,10 @@ int THD::binlog_setup_trx_data() - Start a transaction if not in autocommit mode or if a BEGIN statement has been seen. - - Start a statement transaction to allow us to truncate the binary - log. + - Start a statement transaction to allow us to truncate the cache. - Save the currrent binlog position so that we can roll back the - statement by truncating the transaction log. + statement by truncating the cache. We only update the saved position if the old one was undefined, the reason is that there are some cases (e.g., for CREATE-SELECT) @@ -3928,15 +4057,15 @@ int THD::binlog_setup_trx_data() void THD::binlog_start_trans_and_stmt() { - binlog_trx_data *trx_data= (binlog_trx_data*) thd_get_ha_data(this, binlog_hton); + binlog_cache_mngr *cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton); DBUG_ENTER("binlog_start_trans_and_stmt"); - DBUG_PRINT("enter", ("trx_data: 0x%lx trx_data->before_stmt_pos: %lu", - (long) trx_data, - (trx_data ? (ulong) trx_data->before_stmt_pos : + DBUG_PRINT("enter", ("cache_mngr: %p cache_mngr->trx_cache.get_prev_position(): %lu", + cache_mngr, + (cache_mngr ? (ulong) cache_mngr->trx_cache.get_prev_position() : (ulong) 0))); - if (trx_data == NULL || - trx_data->before_stmt_pos == MY_OFF_T_UNDEF) + if (cache_mngr == NULL || + cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF) { this->binlog_set_stmt_begin(); if (options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) @@ -3957,27 +4086,35 @@ THD::binlog_start_trans_and_stmt() } void THD::binlog_set_stmt_begin() { - binlog_trx_data *trx_data= - (binlog_trx_data*) thd_get_ha_data(this, binlog_hton); + binlog_cache_mngr *cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton); /* - The call to binlog_trans_log_savepos() might create the trx_data + The call to binlog_trans_log_savepos() might create the cache_mngr structure, if it didn't exist before, so we save the position into an auto variable and then write it into the transaction - data for the binary log (i.e., trx_data). + data for the binary log (i.e., cache_mngr). */ my_off_t pos= 0; binlog_trans_log_savepos(this, &pos); - trx_data= (binlog_trx_data*) thd_get_ha_data(this, binlog_hton); - trx_data->before_stmt_pos= pos; + cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton); + cache_mngr->trx_cache.set_prev_position(pos); } -/* - Write a table map to the binary log. - */ - -int THD::binlog_write_table_map(TABLE *table, bool is_trans) +/** + This function writes a table map to the binary log. + Note that in order to keep the signature uniform with related methods, + we use a redundant parameter to indicate whether a transactional table + was changed or not. + + @param table a pointer to the table. + @param is_transactional @c true indicates a transactional table, + otherwise @c false a non-transactional. + @return + nonzero if an error pops up when writing the table map event. +*/ +int THD::binlog_write_table_map(TABLE *table, bool is_transactional) { int error; DBUG_ENTER("THD::binlog_write_table_map"); @@ -3986,16 +4123,21 @@ int THD::binlog_write_table_map(TABLE *table, bool is_trans) table->s->table_map_id)); /* Pre-conditions */ - DBUG_ASSERT(current_stmt_binlog_row_based && mysql_bin_log.is_open()); + DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); DBUG_ASSERT(table->s->table_map_id != ULONG_MAX); Table_map_log_event - the_event(this, table, table->s->table_map_id, is_trans); + the_event(this, table, table->s->table_map_id, is_transactional); - if (is_trans && binlog_table_maps == 0) + if (binlog_table_maps == 0) binlog_start_trans_and_stmt(); - if ((error= mysql_bin_log.write(&the_event))) + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton); + + IO_CACHE *file= cache_mngr->get_binlog_cache_log(is_transactional); + + if ((error= the_event.write(file))) DBUG_RETURN(error); binlog_table_maps++; @@ -4003,144 +4145,163 @@ int THD::binlog_write_table_map(TABLE *table, bool is_trans) DBUG_RETURN(0); } +/** + This function retrieves a pending row event from a cache which is + specified through the parameter @c is_transactional. Respectively, when it + is @c true, the pending event is returned from the transactional cache. + Otherwise from the non-transactional cache. + + @param is_transactional @c true indicates a transactional cache, + otherwise @c false a non-transactional. + @return + The row event if any. +*/ Rows_log_event* -THD::binlog_get_pending_rows_event() const +THD::binlog_get_pending_rows_event(bool is_transactional) const { - binlog_trx_data *const trx_data= - (binlog_trx_data*) thd_get_ha_data(this, binlog_hton); + Rows_log_event* rows= NULL; + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton); + /* - This is less than ideal, but here's the story: If there is no - trx_data, prepare_pending_rows_event() has never been called - (since the trx_data is set up there). In that case, we just return - NULL. + This is less than ideal, but here's the story: If there is no cache_mngr, + prepare_pending_rows_event() has never been called (since the cache_mngr + is set up there). In that case, we just return NULL. */ - return trx_data ? trx_data->pending() : NULL; + if (cache_mngr) + { + binlog_cache_data *cache_data= + cache_mngr->get_binlog_cache_data(is_transactional); + + rows= cache_data->pending(); + } + return (rows); } +/** + This function stores a pending row event into a cache which is specified + through the parameter @c is_transactional. Respectively, when it is @c + true, the pending event is stored into the transactional cache. Otherwise + into the non-transactional cache. + + @param evt a pointer to the row event. + @param is_transactional @c true indicates a transactional cache, + otherwise @c false a non-transactional. +*/ void -THD::binlog_set_pending_rows_event(Rows_log_event* ev) +THD::binlog_set_pending_rows_event(Rows_log_event* ev, bool is_transactional) { if (thd_get_ha_data(this, binlog_hton) == NULL) binlog_setup_trx_data(); - binlog_trx_data *const trx_data= - (binlog_trx_data*) thd_get_ha_data(this, binlog_hton); + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton); + + DBUG_ASSERT(cache_mngr); + + binlog_cache_data *cache_data= + cache_mngr->get_binlog_cache_data(is_transactional); - DBUG_ASSERT(trx_data); - trx_data->set_pending(ev); + cache_data->set_pending(ev); } /** - Remove the pending rows event, discarding any outstanding rows. - - If there is no pending rows event available, this is effectively a + This function removes the pending rows event, discarding any outstanding + rows. If there is no pending rows event available, this is effectively a no-op. - */ + + @param thd a pointer to the user thread. + @param is_transactional @c true indicates a transactional cache, + otherwise @c false a non-transactional. +*/ int -MYSQL_BIN_LOG::remove_pending_rows_event(THD *thd) +MYSQL_BIN_LOG::remove_pending_rows_event(THD *thd, bool is_transactional) { DBUG_ENTER("MYSQL_BIN_LOG::remove_pending_rows_event"); - binlog_trx_data *const trx_data= - (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton); + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + + DBUG_ASSERT(cache_mngr); - DBUG_ASSERT(trx_data); + binlog_cache_data *cache_data= + cache_mngr->get_binlog_cache_data(is_transactional); - if (Rows_log_event* pending= trx_data->pending()) + if (Rows_log_event* pending= cache_data->pending()) { delete pending; - trx_data->set_pending(NULL); + cache_data->set_pending(NULL); } DBUG_RETURN(0); } /* - Moves the last bunch of rows from the pending Rows event to the binlog - (either cached binlog if transaction, or disk binlog). Sets a new pending - event. + Moves the last bunch of rows from the pending Rows event to a cache (either + transactional cache if is_transaction is @c true, or the non-transactional + cache otherwise. Sets a new pending event. + + @param thd a pointer to the user thread. + @param evt a pointer to the row event. + @param is_transactional @c true indicates a transactional cache, + otherwise @c false a non-transactional. */ int MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd, - Rows_log_event* event) + Rows_log_event* event, + bool is_transactional) { DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)"); DBUG_ASSERT(mysql_bin_log.is_open()); DBUG_PRINT("enter", ("event: 0x%lx", (long) event)); int error= 0; + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); - binlog_trx_data *const trx_data= - (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton); + DBUG_ASSERT(cache_mngr); - DBUG_ASSERT(trx_data); + binlog_cache_data *cache_data= + cache_mngr->get_binlog_cache_data(is_transactional); - DBUG_PRINT("info", ("trx_data->pending(): 0x%lx", (long) trx_data->pending())); + DBUG_PRINT("info", ("cache_mngr->pending(): 0x%lx", (long) cache_data->pending())); - if (Rows_log_event* pending= trx_data->pending()) + if (Rows_log_event* pending= cache_data->pending()) { - IO_CACHE *file= &log_file; + IO_CACHE *file= &cache_data->cache_log; /* - Decide if we should write to the log file directly or to the - transaction log. - */ - if (pending->get_cache_stmt() || my_b_tell(&trx_data->trans_log)) - file= &trx_data->trans_log; - - /* - If we are writing to the log file directly, we could avoid - locking the log. This does not work since we need to step the - m_table_map_version below, and that change has to be protected - by the LOCK_log mutex. - */ - pthread_mutex_lock(&LOCK_log); - - /* - Write pending event to log file or transaction cache + Write pending event to the cache. */ if (pending->write(file)) { - pthread_mutex_unlock(&LOCK_log); set_write_error(thd); + if (check_write_error(thd) && cache_data && + thd->transaction.stmt.modified_non_trans_table) + cache_data->set_incident(); DBUG_RETURN(1); } /* We step the table map version if we are writing an event - representing the end of a statement. We do this regardless of - wheather we write to the transaction cache or to directly to the - file. - - In an ideal world, we could avoid stepping the table map version - if we were writing to a transaction cache, since we could then - reuse the table map that was written earlier in the transaction - cache. This does not work since STMT_END_F implies closing all - table mappings on the slave side. + representing the end of a statement. + In an ideal world, we could avoid stepping the table map version, + since we could then reuse the table map that was written earlier + in the cache. This does not work since STMT_END_F implies closing + all table mappings on the slave side. + TODO: Find a solution so that table maps does not have to be written several times within a transaction. - */ + */ if (pending->get_flags(Rows_log_event::STMT_END_F)) ++m_table_map_version; delete pending; - - if (file == &log_file) - { - error= flush_and_sync(0); - if (!error) - { - signal_update(); - rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED); - } - } - - pthread_mutex_unlock(&LOCK_log); } - thd->binlog_set_pending_rows_event(event); + thd->binlog_set_pending_rows_event(event, is_transactional); DBUG_RETURN(error); } @@ -4154,6 +4315,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info) THD *thd= event_info->thd; bool error= 1; DBUG_ENTER("MYSQL_BIN_LOG::write(Log_event *)"); + binlog_cache_data *cache_data= 0; if (thd->binlog_evt_union.do_union) { @@ -4162,27 +4324,22 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info) We will log the function call to the binary log on function exit */ thd->binlog_evt_union.unioned_events= TRUE; - thd->binlog_evt_union.unioned_events_trans |= event_info->cache_stmt; + thd->binlog_evt_union.unioned_events_trans |= + event_info->use_trans_cache(); DBUG_RETURN(0); } /* - Flush the pending rows event to the transaction cache or to the - log file. Since this function potentially aquire the LOCK_log - mutex, we do this before aquiring the LOCK_log mutex in this - function. - We only end the statement if we are in a top-level statement. If we are inside a stored function, we do not end the statement since this will close all tables on the slave. */ bool const end_stmt= thd->prelocked_mode && thd->lex->requires_prelocking(); - if (thd->binlog_flush_pending_rows_event(end_stmt)) + if (thd->binlog_flush_pending_rows_event(end_stmt, + event_info->use_trans_cache())) DBUG_RETURN(error); - pthread_mutex_lock(&LOCK_log); - /* In most cases this is only called if 'is_open()' is true; in fact this is mostly called if is_open() *was* true a few instructions before, but it @@ -4190,7 +4347,6 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info) */ if (likely(is_open())) { - IO_CACHE *file= &log_file; #ifdef HAVE_REPLICATION /* In the future we need to add to the following if tests like @@ -4200,67 +4356,71 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info) const char *local_db= event_info->get_db(); if ((thd && !(thd->options & OPTION_BIN_LOG)) || (!binlog_filter->db_ok(local_db))) - { - VOID(pthread_mutex_unlock(&LOCK_log)); DBUG_RETURN(0); - } #endif /* HAVE_REPLICATION */ -#if defined(USING_TRANSACTIONS) - /* - Should we write to the binlog cache or to the binlog on disk? - Write to the binlog cache if: - - it is already not empty (meaning we're in a transaction; note that the - present event could be about a non-transactional table, but still we need - to write to the binlog cache in that case to handle updates to mixed - trans/non-trans table types the best possible in binlogging) - - or if the event asks for it (cache_stmt == TRUE). - */ - if (opt_using_transactions && thd) +#if defined(USING_TRANSACTIONS) + IO_CACHE *file= NULL; + + if (event_info->use_direct_logging()) + { + file= &log_file; + pthread_mutex_lock(&LOCK_log); + } + else { if (thd->binlog_setup_trx_data()) goto err; - binlog_trx_data *const trx_data= - (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton); - IO_CACHE *trans_log= &trx_data->trans_log; - my_off_t trans_log_pos= my_b_tell(trans_log); - if (event_info->get_cache_stmt() || trans_log_pos != 0 || - stmt_has_updated_trans_table(thd)) + binlog_cache_mngr *const cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); + + /* + If we are about to use write rows, we just need to check the type of + the event (either transactional or non-transactional) in order to + choose the cache. + */ + if (thd->is_current_stmt_binlog_format_row()) { - DBUG_PRINT("info", ("Using trans_log: cache: %d, trans_log_pos: %lu", - event_info->get_cache_stmt(), - (ulong) trans_log_pos)); - thd->binlog_start_trans_and_stmt(); - file= trans_log; + file= cache_mngr->get_binlog_cache_log(event_info->use_trans_cache()); + cache_data= cache_mngr->get_binlog_cache_data(event_info->use_trans_cache()); } /* - TODO as Mats suggested, for all the cases above where we write to - trans_log, it sounds unnecessary to lock LOCK_log. We should rather - test first if we want to write to trans_log, and if not, lock - LOCK_log. + However, if we are about to write statements we need to consider other + things. We use the non-transactional cache when: + + . the transactional cache is empty which means that there were no + early statement on behalf of the transaction. + . the respective event is tagged as non-transactional. */ + else if (cache_mngr->trx_cache.empty() && + !event_info->use_trans_cache()) + { + file= &cache_mngr->stmt_cache.cache_log; + cache_data= &cache_mngr->stmt_cache; + } + else + { + file= &cache_mngr->trx_cache.cache_log; + cache_data= &cache_mngr->trx_cache; + } + + thd->binlog_start_trans_and_stmt(); } #endif /* USING_TRANSACTIONS */ DBUG_PRINT("info",("event type: %d",event_info->get_type_code())); /* - No check for auto events flag here - this write method should - never be called if auto-events are enabled - */ - - /* - 1. Write first log events which describe the 'run environment' - of the SQL command - */ + No check for auto events flag here - this write method should + never be called if auto-events are enabled. - /* - If row-based binlogging, Insert_id, Rand and other kind of "setting - context" events are not needed. + Write first log events which describe the 'run environment' + of the SQL command. If row-based binlogging, Insert_id, Rand + and other kind of "setting context" events are not needed. */ if (thd) { - if (!thd->current_stmt_binlog_row_based) + if (!thd->is_current_stmt_binlog_format_row()) { if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt) { @@ -4306,39 +4466,48 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info) } /* - Write the SQL command - */ - - if (event_info->write(file) || + Write the event. + */ + if (event_info->write(file) || DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0)) goto err; - if (file == &log_file) // we are writing to the real log (disk) + error= 0; + +err: + if (event_info->use_direct_logging()) { - bool synced= 0; - if (flush_and_sync(&synced)) - goto err; + if (!error) + { + bool synced; + if ((error= flush_and_sync(&synced))) + goto unlock; - if (RUN_HOOK(binlog_storage, after_flush, - (thd, log_file_name, file->pos_in_file, synced))) { - sql_print_error("Failed to run 'after_flush' hooks"); - goto err; + if ((error= RUN_HOOK(binlog_storage, after_flush, + (thd, log_file_name, file->pos_in_file, synced)))) + { + sql_print_error("Failed to run 'after_flush' hooks"); + goto unlock; + } + signal_update(); + rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED); } - - signal_update(); - rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED); +unlock: + pthread_mutex_unlock(&LOCK_log); } - error=0; -err: if (error) + { set_write_error(thd); + if (check_write_error(thd) && cache_data && + thd->transaction.stmt.modified_non_trans_table) + cache_data->set_incident(); + } } if (event_info->flags & LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F) ++m_table_map_version; - pthread_mutex_unlock(&LOCK_log); DBUG_RETURN(error); } @@ -4450,7 +4619,7 @@ uint MYSQL_BIN_LOG::next_file_id() write_cache() cache Cache to write to the binary log lock_log True if the LOCK_log mutex should be aquired, false otherwise - sync_log True if the log should be flushed and sync:ed + sync_log True if the log should be flushed and synced DESCRIPTION Write the contents of the cache to the binary log. The cache will @@ -4666,9 +4835,6 @@ bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event, DBUG_ENTER("MYSQL_BIN_LOG::write(THD *, IO_CACHE *, Log_event *)"); VOID(pthread_mutex_lock(&LOCK_log)); - /* NULL would represent nothing to replicate after ROLLBACK */ - DBUG_ASSERT(commit_event != NULL); - DBUG_ASSERT(is_open()); if (likely(is_open())) // Should always be true { @@ -4683,19 +4849,9 @@ bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event, transaction is either a BEGIN..COMMIT block or a single statement in autocommit mode. */ - Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, TRUE, 0); - - /* - Now this Query_log_event has artificial log_pos 0. It must be - adjusted to reflect the real position in the log. Not doing it - would confuse the slave: it would prevent this one from - knowing where he is in the master's binlog, which would result - in wrong positions being shown to the user, MASTER_POS_WAIT - undue waiting etc. - */ + Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, FALSE, TRUE, 0); if (qinfo.write(&log_file)) goto err; - DBUG_EXECUTE_IF("crash_before_writing_xid", { if ((write_error= write_cache(cache, false, true))) @@ -5793,13 +5949,13 @@ int TC_LOG_BINLOG::log_xid(THD *thd, my_xid xid) { DBUG_ENTER("TC_LOG_BINLOG::log"); Xid_log_event xle(thd, xid); - binlog_trx_data *trx_data= - (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton); + binlog_cache_mngr *cache_mngr= + (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton); /* We always commit the entire transaction when writing an XID. Also note that the return value is inverted. */ - DBUG_RETURN(!binlog_end_trans(thd, trx_data, &xle, TRUE)); + DBUG_RETURN(!binlog_flush_trx_cache(thd, cache_mngr, &xle)); } void TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid) diff --git a/sql/log.h b/sql/log.h index 76ca18c8288..60b065dfb39 100644 --- a/sql/log.h +++ b/sql/log.h @@ -20,6 +20,9 @@ class Relay_log_info; class Format_description_log_event; +bool trans_has_updated_trans_table(THD* thd); +bool stmt_has_updated_trans_table(THD *thd); + /* Transaction Coordinator log - a base abstract class for two different implementations @@ -342,8 +345,9 @@ public: ulonglong table_map_version() const { return m_table_map_version; } void update_table_map_version() { ++m_table_map_version; } - int flush_and_set_pending_rows_event(THD *thd, Rows_log_event* event); - int remove_pending_rows_event(THD *thd); + int flush_and_set_pending_rows_event(THD *thd, Rows_log_event* event, + bool is_transactional); + int remove_pending_rows_event(THD *thd, bool is_transactional); #endif /* !defined(MYSQL_CLIENT) */ void reset_bytes_written() @@ -381,7 +385,7 @@ public: /* Use this to start writing a new log file */ void new_file(); - bool write(Log_event* event_info); // binary log write + bool write(Log_event* event_info); bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident); bool write_incident(THD *thd, bool lock); diff --git a/sql/log_event.cc b/sql/log_event.cc index d0c8c245175..f97a13daf66 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -664,10 +664,11 @@ Log_event::Log_event(THD* thd_arg, uint16 flags_arg, bool using_trans) { server_id= thd->server_id; when= thd->start_time; - cache_stmt= using_trans; + cache_type= (using_trans || stmt_has_updated_trans_table(thd) + ? Log_event::EVENT_TRANSACTIONAL_CACHE : + Log_event::EVENT_STMT_CACHE); } - /** This minimal constructor is for when you are not even sure that there is a valid THD. For example in the server when we are shutting down or @@ -676,8 +677,8 @@ Log_event::Log_event(THD* thd_arg, uint16 flags_arg, bool using_trans) */ Log_event::Log_event() - :temp_buf(0), exec_time(0), flags(0), cache_stmt(0), - thd(0) + :temp_buf(0), exec_time(0), flags(0), + cache_type(Log_event::EVENT_INVALID_CACHE), thd(0) { server_id= ::server_id; /* @@ -696,7 +697,7 @@ Log_event::Log_event() Log_event::Log_event(const char* buf, const Format_description_log_event* description_event) - :temp_buf(0), cache_stmt(0) + :temp_buf(0), cache_type(Log_event::EVENT_INVALID_CACHE) { #ifndef MYSQL_CLIENT thd = 0; @@ -2360,7 +2361,7 @@ Query_log_event::Query_log_event() */ Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg, ulong query_length, bool using_trans, - bool suppress_use, int errcode) + bool direct, bool suppress_use, int errcode) :Log_event(thd_arg, (thd_arg->thread_specific_used ? LOG_EVENT_THREAD_SPECIFIC_F : @@ -2439,6 +2440,95 @@ Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg, } else time_zone_len= 0; + + /* + In what follows, we decide whether to write to the binary log or to use a + cache. + */ + LEX *lex= thd->lex; + bool implicit_commit= FALSE; + cache_type= Log_event::EVENT_INVALID_CACHE; + switch (lex->sql_command) + { + case SQLCOM_ALTER_DB: + case SQLCOM_CREATE_FUNCTION: + case SQLCOM_DROP_FUNCTION: + case SQLCOM_DROP_PROCEDURE: + case SQLCOM_INSTALL_PLUGIN: + case SQLCOM_UNINSTALL_PLUGIN: + case SQLCOM_ALTER_TABLESPACE: + implicit_commit= TRUE; + break; + case SQLCOM_DROP_TABLE: + implicit_commit= !(lex->drop_temporary && thd->in_multi_stmt_transaction()); + break; + case SQLCOM_ALTER_TABLE: + case SQLCOM_CREATE_TABLE: + implicit_commit= !((lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) && + thd->in_multi_stmt_transaction()) && + !(lex->select_lex.item_list.elements && + thd->is_current_stmt_binlog_format_row()); + break; + case SQLCOM_SET_OPTION: + implicit_commit= (lex->autocommit ? TRUE : FALSE); + break; + /* + Replace what follows after CF_AUTO_COMMIT_TRANS is backported by: + + default: + implicit_commit= ((sql_command_flags[lex->sql_command] & + CF_AUTO_COMMIT_TRANS)); + break; + */ + case SQLCOM_CREATE_INDEX: + case SQLCOM_TRUNCATE: + case SQLCOM_CREATE_DB: + case SQLCOM_DROP_DB: + case SQLCOM_ALTER_DB_UPGRADE: + case SQLCOM_RENAME_TABLE: + case SQLCOM_DROP_INDEX: + case SQLCOM_CREATE_VIEW: + case SQLCOM_DROP_VIEW: + case SQLCOM_CREATE_TRIGGER: + case SQLCOM_DROP_TRIGGER: + case SQLCOM_CREATE_EVENT: + case SQLCOM_ALTER_EVENT: + case SQLCOM_DROP_EVENT: + case SQLCOM_REPAIR: + case SQLCOM_OPTIMIZE: + case SQLCOM_ANALYZE: + case SQLCOM_CREATE_USER: + case SQLCOM_DROP_USER: + case SQLCOM_RENAME_USER: + case SQLCOM_REVOKE_ALL: + case SQLCOM_REVOKE: + case SQLCOM_GRANT: + case SQLCOM_CREATE_PROCEDURE: + case SQLCOM_CREATE_SPFUNCTION: + case SQLCOM_ALTER_PROCEDURE: + case SQLCOM_ALTER_FUNCTION: + case SQLCOM_ASSIGN_TO_KEYCACHE: + case SQLCOM_PRELOAD_KEYS: + case SQLCOM_FLUSH: + case SQLCOM_CHECK: + implicit_commit= TRUE; + break; + default: + implicit_commit= FALSE; + break; + } + + if (implicit_commit || direct) + { + cache_type= Log_event::EVENT_NO_CACHE; + } + else + { + cache_type= (using_trans || stmt_has_updated_trans_table(thd) + ? Log_event::EVENT_TRANSACTIONAL_CACHE : + Log_event::EVENT_STMT_CACHE); + } + DBUG_ASSERT(cache_type != Log_event::EVENT_INVALID_CACHE); DBUG_PRINT("info",("Query_log_event has flags2: %lu sql_mode: %lu", (ulong) flags2, sql_mode)); } @@ -6704,9 +6794,9 @@ Execute_load_query_log_event(THD *thd_arg, const char* query_arg, ulong query_length_arg, uint fn_pos_start_arg, uint fn_pos_end_arg, enum_load_dup_handling dup_handling_arg, - bool using_trans, bool suppress_use, + bool using_trans, bool direct, bool suppress_use, int errcode): - Query_log_event(thd_arg, query_arg, query_length_arg, using_trans, + Query_log_event(thd_arg, query_arg, query_length_arg, using_trans, direct, suppress_use, errcode), file_id(thd_arg->file_id), fn_pos_start(fn_pos_start_arg), fn_pos_end(fn_pos_end_arg), dup_handling(dup_handling_arg) @@ -7275,7 +7365,7 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli) We also call the mysql_reset_thd_for_next_command(), since this is the logical start of the next "statement". Note that this - call might reset the value of current_stmt_binlog_row_based, so + call might reset the value of current_stmt_binlog_format, so we need to do any changes to that value after this function. */ lex_start(thd); @@ -7287,16 +7377,12 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli) */ thd->transaction.stmt.modified_non_trans_table= FALSE; /* - Check if the slave is set to use SBR. If so, it should switch - to using RBR until the end of the "statement", i.e., next - STMT_END_F or next error. + This is a row injection, so we flag the "statement" as + such. Note that this code is called both when the slave does row + injections and when the BINLOG statement is used to do row + injections. */ - if (!thd->current_stmt_binlog_row_based && - mysql_bin_log.is_open() && (thd->options & OPTION_BIN_LOG)) - { - thd->set_current_stmt_binlog_row_based(); - } - + thd->lex->set_stmt_row_injection(); /* There are a few flags that are replicated with each row event. @@ -7545,7 +7631,7 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli) error= 0; } - if (!cache_stmt) + if (!use_trans_cache()) { DBUG_PRINT("info", ("Marked that we need to keep log")); thd->options|= OPTION_KEEP_LOG; @@ -7564,7 +7650,14 @@ int Rows_log_event::do_apply_event(Relay_log_info const *rli) slave_rows_error_report(ERROR_LEVEL, error, rli, thd, table, get_type_str(), RPL_LOG_NAME, (ulong) log_pos); - thd->reset_current_stmt_binlog_row_based(); + /* + @todo We should probably not call + reset_current_stmt_binlog_format_row() from here. + + Note: this applies to log_event_old.cc too. + /Sven + */ + thd->reset_current_stmt_binlog_format_row(); const_cast<Relay_log_info*>(rli)->cleanup_context(thd, error); thd->is_slave_error= 1; DBUG_RETURN(error); @@ -7625,7 +7718,7 @@ static int rows_event_stmt_cleanup(Relay_log_info const *rli, THD * thd) (assume the last master's transaction is ignored by the slave because of replicate-ignore rules). */ - thd->binlog_flush_pending_rows_event(true); + thd->binlog_flush_pending_rows_event(TRUE); /* If this event is not in a transaction, the call below will, if some @@ -7648,7 +7741,17 @@ static int rows_event_stmt_cleanup(Relay_log_info const *rli, THD * thd) event flushed. */ - thd->reset_current_stmt_binlog_row_based(); + /* + @todo We should probably not call + reset_current_stmt_binlog_format_row() from here. + + Note: this applies to log_event_old.cc too + + Btw, the previous comment about transactional engines does not + seem related to anything that happens here. + /Sven + */ + thd->reset_current_stmt_binlog_format_row(); const_cast<Relay_log_info*>(rli)->cleanup_context(thd, 0); } @@ -7871,7 +7974,7 @@ int Table_map_log_event::save_field_metadata() #if !defined(MYSQL_CLIENT) Table_map_log_event::Table_map_log_event(THD *thd, TABLE *tbl, ulong tid, bool is_transactional) - : Log_event(thd, 0, true), + : Log_event(thd, 0, is_transactional), m_table(tbl), m_dbnam(tbl->s->db.str), m_dblen(m_dbnam ? tbl->s->db.length : 0), diff --git a/sql/log_event.h b/sql/log_event.h index d4168a87331..64489bd1f3f 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -870,6 +870,31 @@ public: EVENT_SKIP_COUNT }; + enum enum_event_cache_type + { + EVENT_INVALID_CACHE, + /* + If possible the event should use a non-transactional cache before + being flushed to the binary log. This means that it must be flushed + right after its correspondent statement is completed. + */ + EVENT_STMT_CACHE, + /* + The event should use a transactional cache before being flushed to + the binary log. This means that it must be flushed upon commit or + rollback. + */ + EVENT_TRANSACTIONAL_CACHE, + /* + The event must be written directly to the binary log without going + through a cache. + */ + EVENT_NO_CACHE, + /** + If there is a need for different types, introduce them before this. + */ + EVENT_CACHE_COUNT + }; /* The following type definition is to be used whenever data is placed @@ -920,8 +945,12 @@ public: LOG_EVENT_SUPPRESS_USE_F for notes. */ uint16 flags; - - bool cache_stmt; + + /* + Defines the type of the cache, if any, where the event will be + stored before being flushed to disk. + */ + uint16 cache_type; /** A storage to cache the global system variable's value. @@ -933,7 +962,7 @@ public: THD* thd; Log_event(); - Log_event(THD* thd_arg, uint16 flags_arg, bool cache_stmt); + Log_event(THD* thd_arg, uint16 flags_arg, bool is_transactional); /* read_log_event() functions read an event from a binlog or relay log; used by SHOW BINLOG EVENTS, the binlog_dump thread on the @@ -1031,7 +1060,18 @@ public: void set_relay_log_event() { flags |= LOG_EVENT_RELAY_LOG_F; } bool is_artificial_event() const { return flags & LOG_EVENT_ARTIFICIAL_F; } bool is_relay_log_event() const { return flags & LOG_EVENT_RELAY_LOG_F; } - inline bool get_cache_stmt() const { return cache_stmt; } + inline bool use_trans_cache() const + { + return (cache_type == Log_event::EVENT_TRANSACTIONAL_CACHE); + } + inline void set_direct_logging() + { + cache_type = Log_event::EVENT_NO_CACHE; + } + inline bool use_direct_logging() + { + return (cache_type == Log_event::EVENT_NO_CACHE); + } Log_event(const char* buf, const Format_description_log_event *description_event); virtual ~Log_event() { free_temp_buf();} @@ -1645,7 +1685,7 @@ public: #ifndef MYSQL_CLIENT Query_log_event(THD* thd_arg, const char* query_arg, ulong query_length, - bool using_trans, bool suppress_use, int error); + bool using_trans, bool direct, bool suppress_use, int error); const char* get_db() { return db; } #ifdef HAVE_REPLICATION void pack_info(Protocol* protocol); @@ -2895,8 +2935,8 @@ public: ulong query_length, uint fn_pos_start_arg, uint fn_pos_end_arg, enum_load_dup_handling dup_handling_arg, - bool using_trans, bool suppress_use, - int errcode); + bool using_trans, bool direct, + bool suppress_use, int errcode); #ifdef HAVE_REPLICATION void pack_info(Protocol* protocol); #endif /* HAVE_REPLICATION */ @@ -3326,10 +3366,10 @@ public: return new table_def(m_coltype, m_colcnt, m_field_metadata, m_field_metadata_size, m_null_bits, m_flags); } +#endif ulong get_table_id() const { return m_table_id; } const char *get_table_name() const { return m_tblnam; } const char *get_db_name() const { return m_dbnam; } -#endif virtual Log_event_type get_type_code() { return TABLE_MAP_EVENT; } virtual bool is_valid() const { return m_memory != NULL; /* we check malloc */ } @@ -3881,6 +3921,7 @@ public: DBUG_PRINT("enter", ("m_incident: %d", m_incident)); m_message.str= NULL; /* Just as a precaution */ m_message.length= 0; + set_direct_logging(); DBUG_VOID_RETURN; } @@ -3890,6 +3931,7 @@ public: DBUG_ENTER("Incident_log_event::Incident_log_event"); DBUG_PRINT("enter", ("m_incident: %d", m_incident)); m_message= msg; + set_direct_logging(); DBUG_VOID_RETURN; } #endif diff --git a/sql/log_event_old.cc b/sql/log_event_old.cc index 4edf060450c..59a1d2fc50a 100644 --- a/sql/log_event_old.cc +++ b/sql/log_event_old.cc @@ -59,22 +59,19 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, const Relay_log_info We also call the mysql_reset_thd_for_next_command(), since this is the logical start of the next "statement". Note that this - call might reset the value of current_stmt_binlog_row_based, so + call might reset the value of current_stmt_binlog_format, so we need to do any changes to that value after this function. */ lex_start(thd); mysql_reset_thd_for_next_command(thd); /* - Check if the slave is set to use SBR. If so, it should switch - to using RBR until the end of the "statement", i.e., next - STMT_END_F or next error. + This is a row injection, so we flag the "statement" as + such. Note that this code is called both when the slave does row + injections and when the BINLOG statement is used to do row + injections. */ - if (!thd->current_stmt_binlog_row_based && - mysql_bin_log.is_open() && (thd->options & OPTION_BIN_LOG)) - { - thd->set_current_stmt_binlog_row_based(); - } + thd->lex->set_stmt_row_injection(); if (simple_open_n_lock_tables(thd, rli->tables_to_lock)) { @@ -239,7 +236,7 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, const Relay_log_info DBUG_EXECUTE_IF("stop_slave_middle_group", const_cast<Relay_log_info*>(rli)->abort_slave= 1;); error= do_after_row_operations(table, error); - if (!ev->cache_stmt) + if (!ev->use_trans_cache()) { DBUG_PRINT("info", ("Marked that we need to keep log")); thd->options|= OPTION_KEEP_LOG; @@ -273,7 +270,7 @@ Old_rows_log_event::do_apply_event(Old_rows_log_event *ev, const Relay_log_info thread is certainly going to stop. rollback at the caller along with sbr. */ - thd->reset_current_stmt_binlog_row_based(); + thd->reset_current_stmt_binlog_format_row(); const_cast<Relay_log_info*>(rli)->cleanup_context(thd, error); thd->is_slave_error= 1; DBUG_RETURN(error); @@ -1523,7 +1520,7 @@ int Old_rows_log_event::do_apply_event(Relay_log_info const *rli) NOTE: For this new scheme there should be no pending event: need to add code to assert that is the case. */ - thd->binlog_flush_pending_rows_event(false); + thd->binlog_flush_pending_rows_event(FALSE); TABLE_LIST *tables= rli->tables_to_lock; close_tables_for_reopen(thd, &tables); @@ -1733,7 +1730,7 @@ int Old_rows_log_event::do_apply_event(Relay_log_info const *rli) DBUG_EXECUTE_IF("stop_slave_middle_group", const_cast<Relay_log_info*>(rli)->abort_slave= 1;); error= do_after_row_operations(rli, error); - if (!cache_stmt) + if (!use_trans_cache()) { DBUG_PRINT("info", ("Marked that we need to keep log")); thd->options|= OPTION_KEEP_LOG; @@ -1767,7 +1764,7 @@ int Old_rows_log_event::do_apply_event(Relay_log_info const *rli) thread is certainly going to stop. rollback at the caller along with sbr. */ - thd->reset_current_stmt_binlog_row_based(); + thd->reset_current_stmt_binlog_format_row(); const_cast<Relay_log_info*>(rli)->cleanup_context(thd, error); thd->is_slave_error= 1; DBUG_RETURN(error); @@ -1779,7 +1776,7 @@ int Old_rows_log_event::do_apply_event(Relay_log_info const *rli) last_event_start_time here instead. */ if (table && (table->s->primary_key == MAX_KEY) && - !cache_stmt && get_flags(STMT_END_F) == RLE_NO_FLAGS) + !use_trans_cache() && get_flags(STMT_END_F) == RLE_NO_FLAGS) { /* ------------ Temporary fix until WL#2975 is implemented --------- @@ -1817,7 +1814,7 @@ int Old_rows_log_event::do_apply_event(Relay_log_info const *rli) (assume the last master's transaction is ignored by the slave because of replicate-ignore rules). */ - thd->binlog_flush_pending_rows_event(true); + thd->binlog_flush_pending_rows_event(TRUE); /* If this event is not in a transaction, the call below will, if some @@ -1845,7 +1842,7 @@ int Old_rows_log_event::do_apply_event(Relay_log_info const *rli) event flushed. */ - thd->reset_current_stmt_binlog_row_based(); + thd->reset_current_stmt_binlog_format_row(); const_cast<Relay_log_info*>(rli)->cleanup_context(thd, 0); } diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index abd1d58d4e9..793fb3da4be 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -958,7 +958,8 @@ struct Query_cache_query_flags #endif /*HAVE_QUERY_CACHE*/ void write_bin_log(THD *thd, bool clear_error, - char const *query, ulong query_length); + char const *query, ulong query_length, + bool is_trans= FALSE); /* sql_connect.cc */ int check_user(THD *thd, enum enum_server_command command, @@ -1491,7 +1492,6 @@ TABLE *open_n_lock_single_table(THD *thd, TABLE_LIST *table_l, thr_lock_type lock_type); bool open_normal_and_derived_tables(THD *thd, TABLE_LIST *tables, uint flags); int lock_tables(THD *thd, TABLE_LIST *tables, uint counter, bool *need_reopen); -int decide_logging_format(THD *thd, TABLE_LIST *tables); TABLE *open_temporary_table(THD *thd, const char *path, const char *db, const char *table_name, bool link_in_list); bool rm_temporary_table(handlerton *base, char *path); diff --git a/sql/rpl_injector.cc b/sql/rpl_injector.cc index 684655d1c3b..8cdee89e164 100644 --- a/sql/rpl_injector.cc +++ b/sql/rpl_injector.cc @@ -36,8 +36,6 @@ injector::transaction::transaction(MYSQL_BIN_LOG *log, THD *thd) m_start_pos.m_file_pos= log_info.pos; begin_trans(m_thd); - - thd->set_current_stmt_binlog_row_based(); } injector::transaction::~transaction() diff --git a/sql/set_var.cc b/sql/set_var.cc index 54c305fa236..8a7bc5453b4 100644 --- a/sql/set_var.cc +++ b/sql/set_var.cc @@ -1326,7 +1326,12 @@ bool sys_var_thd_binlog_format::is_readonly() const void fix_binlog_format_after_update(THD *thd, enum_var_type type) { - thd->reset_current_stmt_binlog_row_based(); + /* + @todo This function should be eliminated. We should not set the + current binlog format anywhere else than in decide_logging_format. + /Sven + */ + thd->reset_current_stmt_binlog_format_row(); } diff --git a/sql/share/errmsg.txt b/sql/share/errmsg.txt index c3d9235bc30..de9822d382a 100644 --- a/sql/share/errmsg.txt +++ b/sql/share/errmsg.txt @@ -6076,8 +6076,7 @@ ER_SLAVE_INCIDENT ER_NO_PARTITION_FOR_GIVEN_VALUE_SILENT eng "Table has no partition for some existing values" ER_BINLOG_UNSAFE_STATEMENT - eng "Statement may not be safe to log in statement format." - swe "Detta är inte säkert att logga i statement-format." + eng "Unsafe statement binlogged in statement format since BINLOG_FORMAT = STATEMENT. Reason for unsafeness: %s" ER_SLAVE_FATAL_ERROR eng "Fatal error: %s" ER_SLAVE_RELAY_LOG_READ_FAILURE @@ -6163,9 +6162,6 @@ ER_SLAVE_HEARTBEAT_FAILURE ER_SLAVE_HEARTBEAT_VALUE_OUT_OF_RANGE eng "The requested value for the heartbeat period %s %s" -ER_SLAVE_IGNORE_SERVER_IDS - eng "The requested server id %d clashes with the slave startup option --replicate-same-server-id" - ER_NDB_REPLICATION_SCHEMA_ERROR eng "Bad schema for mysql.ndb_replication table. Message: %-.64s" ER_CONFLICT_FN_PARSE_ERROR @@ -6210,6 +6206,43 @@ ER_TOO_MANY_CONCURRENT_TRXS WARN_NON_ASCII_SEPARATOR_NOT_IMPLEMENTED eng "Non-ASCII separator arguments are not fully supported" +ER_BINLOG_ROW_ENGINE_AND_STMT_ENGINE + eng "Cannot execute statement: binlogging impossible since both row-incapable engines and statement-incapable engines are involved." +ER_BINLOG_ROW_MODE_AND_STMT_ENGINE + eng "Cannot execute statement: binlogging impossible since BINLOG_FORMAT = ROW and at least one table uses a storage engine limited to statement-logging." +ER_BINLOG_UNSAFE_AND_STMT_ENGINE + eng "Cannot execute statement: binlogging of unsafe statement is impossible when storage engine is limited to statement-logging and BINLOG_FORMAT = MIXED. Reason for unsafeness: %s" +ER_BINLOG_ROW_INJECTION_AND_STMT_ENGINE + eng "Cannot execute row injection: binlogging impossible since at least one table uses a storage engine limited to statement-logging." +ER_BINLOG_STMT_MODE_AND_ROW_ENGINE + eng "Cannot execute statement: binlogging impossible since BINLOG_FORMAT = STATEMENT and at least one table uses a storage engine limited to row-logging.%s" +ER_BINLOG_ROW_INJECTION_AND_STMT_MODE + eng "Cannot execute row injection: binlogging impossible since BINLOG_FORMAT = STATEMENT." +ER_BINLOG_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE + eng "Cannot execute statement: binlogging impossible since more than one engine is involved and at least one engine is self-logging." + +ER_BINLOG_UNSAFE_LIMIT + eng "Statement uses a LIMIT clause. This is unsafe because the set of rows included cannot be predicted." +ER_BINLOG_UNSAFE_INSERT_DELAYED + eng "Statement uses INSERT DELAYED. This is unsafe because the time when rows are inserted cannot be predicted." +ER_BINLOG_UNSAFE_SYSTEM_TABLE + eng "Statement uses the general_log or slow_log table. This is unsafe because system tables may differ on slave." +ER_BINLOG_UNSAFE_TWO_AUTOINC_COLUMNS + eng "Statement updates two AUTO_INCREMENT columns. This is unsafe because the generated value cannot be predicted by slave." +ER_BINLOG_UNSAFE_UDF + eng "Statement uses a UDF. It cannot be determined if the UDF will return the same value on slave." +ER_BINLOG_UNSAFE_SYSTEM_VARIABLE + eng "Statement uses a system variable whose value may differ on slave." +ER_BINLOG_UNSAFE_SYSTEM_FUNCTION + eng "Statement uses a system function whose value may differ on slave." +ER_BINLOG_UNSAFE_NONTRANS_AFTER_TRANS + eng "Non-transactional reads or writes are unsafe if they occur after transactional reads or writes inside a transaction." + +ER_MESSAGE_AND_STATEMENT + eng "%s Statement: %s" + +ER_SLAVE_IGNORE_SERVER_IDS + eng "The requested server id %d clashes with the slave startup option --replicate-same-server-id" ER_DEBUG_SYNC_TIMEOUT eng "debug sync point wait timed out" ger "Debug Sync Point Wartezeit überschritten" diff --git a/sql/slave.cc b/sql/slave.cc index 8e69066ed9b..c58d9738950 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -464,6 +464,7 @@ int terminate_slave_threads(Master_info* mi,int thread_mask,bool skip_lock) DBUG_RETURN(0); /* successfully do nothing */ int error,force_all = (thread_mask & SLAVE_FORCE_ALL); pthread_mutex_t *sql_lock = &mi->rli.run_lock, *io_lock = &mi->run_lock; + pthread_mutex_t *log_lock= mi->rli.relay_log.get_log_lock(); if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL)) { @@ -475,6 +476,22 @@ int terminate_slave_threads(Master_info* mi,int thread_mask,bool skip_lock) skip_lock)) && !force_all) DBUG_RETURN(error); + + pthread_mutex_lock(log_lock); + + DBUG_PRINT("info",("Flushing relay log and master info file.")); + if (current_thd) + thd_proc_info(current_thd, "Flushing relay log and master info files."); + if (flush_master_info(mi, TRUE /* flush relay log */)) + DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS); + + if (my_sync(mi->rli.relay_log.get_log_file()->file, MYF(MY_WME))) + DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS); + + if (my_sync(mi->fd, MYF(MY_WME))) + DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS); + + pthread_mutex_unlock(log_lock); } if (thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL)) { @@ -486,8 +503,21 @@ int terminate_slave_threads(Master_info* mi,int thread_mask,bool skip_lock) skip_lock)) && !force_all) DBUG_RETURN(error); + + pthread_mutex_lock(log_lock); + + DBUG_PRINT("info",("Flushing relay-log info file.")); + if (current_thd) + thd_proc_info(current_thd, "Flushing relay-log info file."); + if (flush_relay_log_info(&mi->rli)) + DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS); + + if (my_sync(mi->rli.info_fd, MYF(MY_WME))) + DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS); + + pthread_mutex_unlock(log_lock); } - DBUG_RETURN(0); + DBUG_RETURN(0); } @@ -4197,8 +4227,9 @@ MYSQL *rpl_connect_master(MYSQL *mysql) rli Relay log information NOTES - - As this is only called by the slave thread, we don't need to - have a lock on this. + - As this is only called by the slave thread or on STOP SLAVE, with the + log_lock grabbed and the slave thread stopped, we don't need to have + a lock here. - If there is an active transaction, then we don't update the position in the relay log. This is to ensure that we re-execute statements if we die in the middle of an transaction that was rolled back. @@ -4249,7 +4280,10 @@ bool flush_relay_log_info(Relay_log_info* rli) error=1; rli->sync_counter= 0; } - /* Flushing the relay log is done by the slave I/O thread */ + /* + Flushing the relay log is done by the slave I/O thread + or by the user on STOP SLAVE. + */ DBUG_RETURN(error); } diff --git a/sql/sp.cc b/sql/sp.cc index d3c5dfb96d0..8df3e7b6509 100644 --- a/sql/sp.cc +++ b/sql/sp.cc @@ -912,7 +912,7 @@ sp_create_routine(THD *thd, int type, sp_head *sp) row-based replication. The flag will be reset at the end of the statement. */ - thd->clear_current_stmt_binlog_row_based(); + thd->clear_current_stmt_binlog_format_row(); saved_count_cuted_fields= thd->count_cuted_fields; thd->count_cuted_fields= CHECK_FIELD_WARN; @@ -1104,9 +1104,9 @@ sp_create_routine(THD *thd, int type, sp_head *sp) /* restore sql_mode when binloging */ thd->variables.sql_mode= saved_mode; /* Such a statement can always go directly to binlog, no trans cache */ - thd->binlog_query(THD::MYSQL_QUERY_TYPE, + thd->binlog_query(THD::STMT_QUERY_TYPE, log_query.c_ptr(), log_query.length(), - FALSE, FALSE, 0); + FALSE, FALSE, FALSE, 0); thd->variables.sql_mode= 0; } @@ -1153,7 +1153,7 @@ sp_drop_routine(THD *thd, int type, sp_name *name) row-based replication. The flag will be reset at the end of the statement. */ - thd->clear_current_stmt_binlog_row_based(); + thd->clear_current_stmt_binlog_format_row(); if (!(table= open_proc_table_for_update(thd))) DBUG_RETURN(SP_OPEN_TABLE_FAILED); @@ -1207,7 +1207,7 @@ sp_update_routine(THD *thd, int type, sp_name *name, st_sp_chistics *chistics) row-based replication. The flag will be reset at the end of the statement. */ - thd->clear_current_stmt_binlog_row_based(); + thd->clear_current_stmt_binlog_format_row(); if (!(table= open_proc_table_for_update(thd))) DBUG_RETURN(SP_OPEN_TABLE_FAILED); @@ -2004,6 +2004,8 @@ sp_cache_routines_and_add_tables_for_triggers(THD *thd, LEX *lex, { int ret= 0; + DBUG_ENTER("sp_cache_routines_and_add_tables_for_triggers"); + Sroutine_hash_entry **last_cached_routine_ptr= (Sroutine_hash_entry **)lex->sroutines_list.next; @@ -2037,7 +2039,7 @@ sp_cache_routines_and_add_tables_for_triggers(THD *thd, LEX *lex, ret= sp_cache_routines_and_add_tables_aux(thd, lex, *last_cached_routine_ptr, FALSE); - return ret; + DBUG_RETURN(ret); } diff --git a/sql/sp_head.cc b/sql/sp_head.cc index 4320f037ac6..e776eeba16f 100644 --- a/sql/sp_head.cc +++ b/sql/sp_head.cc @@ -509,7 +509,7 @@ sp_head::operator delete(void *ptr, size_t size) throw() sp_head::sp_head() :Query_arena(&main_mem_root, INITIALIZED_FOR_SP), - m_flags(0), m_recursion_level(0), m_next_cached_sp(0), + m_flags(0), unsafe_flags(0), m_recursion_level(0), m_next_cached_sp(0), m_cont_level(0) { const LEX_STRING str_reset= { NULL, 0 }; @@ -1692,7 +1692,7 @@ sp_head::execute_function(THD *thd, Item **argp, uint argcount, each substatement be binlogged its way. */ need_binlog_call= mysql_bin_log.is_open() && - (thd->options & OPTION_BIN_LOG) && !thd->current_stmt_binlog_row_based; + (thd->options & OPTION_BIN_LOG) && !thd->is_current_stmt_binlog_format_row(); /* Remember the original arguments for unrolled replication of functions @@ -1781,7 +1781,7 @@ sp_head::execute_function(THD *thd, Item **argp, uint argcount, { int errcode = query_error_code(thd, thd->killed == THD::NOT_KILLED); Query_log_event qinfo(thd, binlog_buf.ptr(), binlog_buf.length(), - thd->binlog_evt_union.unioned_events_trans, FALSE, errcode); + thd->binlog_evt_union.unioned_events_trans, FALSE, FALSE, errcode); if (mysql_bin_log.write(&qinfo) && thd->binlog_evt_union.unioned_events_trans) { @@ -2114,13 +2114,10 @@ sp_head::restore_lex(THD *thd) oldlex->trg_table_fields.push_back(&sublex->trg_table_fields); - /* - If this substatement needs row-based, the entire routine does too (we - cannot switch from statement-based to row-based only for this - substatement). - */ - if (sublex->is_stmt_unsafe()) - m_flags|= BINLOG_ROW_BASED_IF_MIXED; + /* If this substatement is unsafe, the entire routine is too. */ + DBUG_PRINT("info", ("lex->get_stmt_unsafe_flags: 0x%x", + thd->lex->get_stmt_unsafe_flags())); + unsafe_flags|= sublex->get_stmt_unsafe_flags(); /* Add routines which are used by statement to respective set for diff --git a/sql/sp_head.h b/sql/sp_head.h index 00c96d44f70..138d0a2f506 100644 --- a/sql/sp_head.h +++ b/sql/sp_head.h @@ -165,9 +165,8 @@ public: HAS_COMMIT_OR_ROLLBACK= 128, LOG_SLOW_STATEMENTS= 256, // Used by events LOG_GENERAL_LOG= 512, // Used by events - BINLOG_ROW_BASED_IF_MIXED= 1024, - HAS_SQLCOM_RESET= 2048, - HAS_SQLCOM_FLUSH= 4096 + HAS_SQLCOM_RESET= 1024, + HAS_SQLCOM_FLUSH= 2048 }; /** TYPE_ENUM_FUNCTION, TYPE_ENUM_PROCEDURE or TYPE_ENUM_TRIGGER */ @@ -198,6 +197,11 @@ public: private: Stored_program_creation_ctx *m_creation_ctx; + /** + Boolean combination of (1<<flag), where flag is a member of + LEX::enum_binlog_stmt_unsafe. + */ + uint32 unsafe_flags; public: inline Stored_program_creation_ctx *get_creation_ctx() @@ -453,20 +457,24 @@ public: #endif /* - This method is intended for attributes of a routine which need - to propagate upwards to the LEX of the caller (when a property of a - sp_head needs to "taint" the caller). + This method is intended for attributes of a routine which need to + propagate upwards to the LEX of the caller. */ void propagate_attributes(LEX *lex) { + DBUG_ENTER("sp_head::propagate_attributes"); /* If this routine needs row-based binary logging, the entire top statement too (we cannot switch from statement-based to row-based only for this routine, as in statement-based the top-statement may be binlogged and the substatements not). */ - if (m_flags & BINLOG_ROW_BASED_IF_MIXED) - lex->set_stmt_unsafe(); + DBUG_PRINT("info", ("lex->get_stmt_unsafe_flags(): 0x%x", + lex->get_stmt_unsafe_flags())); + DBUG_PRINT("info", ("sp_head(0x%p=%s)->unsafe_flags: 0x%x", + this, name(), unsafe_flags)); + lex->set_stmt_unsafe_flags(unsafe_flags); + DBUG_VOID_RETURN; } diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc index 77c72066429..0bed0e05455 100644 --- a/sql/sql_acl.cc +++ b/sql/sql_acl.cc @@ -1654,8 +1654,8 @@ bool change_password(THD *thd, const char *host, const char *user, acl_user->host.hostname ? acl_user->host.hostname : "", new_password)); thd->clear_error(); - thd->binlog_query(THD::MYSQL_QUERY_TYPE, buff, query_length, - FALSE, FALSE, 0); + thd->binlog_query(THD::STMT_QUERY_TYPE, buff, query_length, + FALSE, FALSE, FALSE, 0); } end: close_thread_tables(thd); @@ -3068,7 +3068,7 @@ int mysql_table_grant(THD *thd, TABLE_LIST *table_list, row-based replication. The flag will be reset at the end of the statement. */ - thd->clear_current_stmt_binlog_row_based(); + thd->clear_current_stmt_binlog_format_row(); #ifdef HAVE_REPLICATION /* @@ -3285,7 +3285,7 @@ bool mysql_routine_grant(THD *thd, TABLE_LIST *table_list, bool is_proc, row-based replication. The flag will be reset at the end of the statement. */ - thd->clear_current_stmt_binlog_row_based(); + thd->clear_current_stmt_binlog_format_row(); #ifdef HAVE_REPLICATION /* @@ -3424,7 +3424,7 @@ bool mysql_grant(THD *thd, const char *db, List <LEX_USER> &list, row-based replication. The flag will be reset at the end of the statement. */ - thd->clear_current_stmt_binlog_row_based(); + thd->clear_current_stmt_binlog_format_row(); #ifdef HAVE_REPLICATION /* @@ -5668,7 +5668,7 @@ bool mysql_create_user(THD *thd, List <LEX_USER> &list) row-based replication. The flag will be reset at the end of the statement. */ - thd->clear_current_stmt_binlog_row_based(); + thd->clear_current_stmt_binlog_format_row(); /* CREATE USER may be skipped on replication client. */ if ((result= open_grant_tables(thd, tables))) @@ -5748,7 +5748,7 @@ bool mysql_drop_user(THD *thd, List <LEX_USER> &list) row-based replication. The flag will be reset at the end of the statement. */ - thd->clear_current_stmt_binlog_row_based(); + thd->clear_current_stmt_binlog_format_row(); /* DROP USER may be skipped on replication client. */ if ((result= open_grant_tables(thd, tables))) @@ -5822,7 +5822,7 @@ bool mysql_rename_user(THD *thd, List <LEX_USER> &list) row-based replication. The flag will be reset at the end of the statement. */ - thd->clear_current_stmt_binlog_row_based(); + thd->clear_current_stmt_binlog_format_row(); /* RENAME USER may be skipped on replication client. */ if ((result= open_grant_tables(thd, tables))) @@ -5904,7 +5904,7 @@ bool mysql_revoke_all(THD *thd, List <LEX_USER> &list) row-based replication. The flag will be reset at the end of the statement. */ - thd->clear_current_stmt_binlog_row_based(); + thd->clear_current_stmt_binlog_format_row(); if ((result= open_grant_tables(thd, tables))) DBUG_RETURN(result != 1); @@ -6157,7 +6157,7 @@ bool sp_revoke_privileges(THD *thd, const char *sp_db, const char *sp_name, row-based replication. The flag will be reset at the end of the statement. */ - thd->clear_current_stmt_binlog_row_based(); + thd->clear_current_stmt_binlog_format_row(); /* Remove procedure access */ do diff --git a/sql/sql_base.cc b/sql/sql_base.cc index d1dc86b9f07..ad6048a156d 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -30,7 +30,6 @@ #include <io.h> #endif -#define FLAGSTR(S,F) ((S) & (F) ? #F " " : "") /** This internal handler is used to trap internally @@ -1444,7 +1443,7 @@ void close_temporary_tables(THD *thd) return; if (!mysql_bin_log.is_open() || - (thd->current_stmt_binlog_row_based && thd->variables.binlog_format == BINLOG_FORMAT_ROW)) + (thd->is_current_stmt_binlog_format_row() && thd->variables.binlog_format == BINLOG_FORMAT_ROW)) { TABLE *tmp_next; for (table= thd->temporary_tables; table; table= tmp_next) @@ -1546,7 +1545,7 @@ void close_temporary_tables(THD *thd) thd->variables.character_set_client= system_charset_info; Query_log_event qinfo(thd, s_query.ptr(), s_query.length() - 1 /* to remove trailing ',' */, - 0, FALSE, 0); + FALSE, TRUE, FALSE, 0); qinfo.db= db.ptr(); qinfo.db_len= db.length(); thd->variables.character_set_client= cs_save; @@ -4033,7 +4032,7 @@ retry: int errcode= query_error_code(thd, TRUE); thd->binlog_query(THD::STMT_QUERY_TYPE, query, (ulong)(end-query), - FALSE, FALSE, errcode); + FALSE, FALSE, FALSE, errcode); my_free(query, MYF(0)); } else @@ -4846,7 +4845,7 @@ static bool check_lock_and_start_stmt(THD *thd, TABLE *table, There may be more differences between open_n_lock_single_table() and open_ltable(). One known difference is that open_ltable() does - neither call decide_logging_format() nor handle some other logging + neither call thd->decide_logging_format() nor handle some other logging and locking issues because it does not call lock_tables(). */ @@ -5066,165 +5065,6 @@ static void mark_real_tables_as_free_for_reuse(TABLE_LIST *table) } -/** - Decide on logging format to use for the statement. - - Compute the capabilities vector for the involved storage engines - and mask out the flags for the binary log. Right now, the binlog - flags only include the capabilities of the storage engines, so this - is safe. - - We now have three alternatives that prevent the statement from - being loggable: - - 1. If there are no capabilities left (all flags are clear) it is - not possible to log the statement at all, so we roll back the - statement and report an error. - - 2. Statement mode is set, but the capabilities indicate that - statement format is not possible. - - 3. Row mode is set, but the capabilities indicate that row - format is not possible. - - 4. Statement is unsafe, but the capabilities indicate that row - format is not possible. - - If we are in MIXED mode, we then decide what logging format to use: - - 1. If the statement is unsafe, row-based logging is used. - - 2. If statement-based logging is not possible, row-based logging is - used. - - 3. Otherwise, statement-based logging is used. - - @param thd Client thread - @param tables Tables involved in the query - */ - -int decide_logging_format(THD *thd, TABLE_LIST *tables) -{ - /* - In SBR mode, we are only proceeding if we are binlogging this - statement, ie, the filtering rules won't later filter this out. - - This check here is needed to prevent some spurious error to be - raised in some cases (See BUG#42829). - */ - if (mysql_bin_log.is_open() && (thd->options & OPTION_BIN_LOG) && - (thd->variables.binlog_format != BINLOG_FORMAT_STMT || - binlog_filter->db_ok(thd->db))) - { - /* - Compute the starting vectors for the computations by creating a - set with all the capabilities bits set and one with no - capabilities bits set. - */ - handler::Table_flags flags_some_set= 0; - handler::Table_flags flags_all_set= - HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE; - - my_bool multi_engine= FALSE; - void* prev_ht= NULL; - for (TABLE_LIST *table= tables; table; table= table->next_global) - { - if (table->placeholder()) - continue; - if (table->table->s->table_category == TABLE_CATEGORY_PERFORMANCE) - thd->lex->set_stmt_unsafe(); - if (table->lock_type >= TL_WRITE_ALLOW_WRITE) - { - ulonglong const flags= table->table->file->ha_table_flags(); - DBUG_PRINT("info", ("table: %s; ha_table_flags: %s%s", - table->table_name, - FLAGSTR(flags, HA_BINLOG_STMT_CAPABLE), - FLAGSTR(flags, HA_BINLOG_ROW_CAPABLE))); - if (prev_ht && prev_ht != table->table->file->ht) - multi_engine= TRUE; - prev_ht= table->table->file->ht; - flags_all_set &= flags; - flags_some_set |= flags; - } - } - - DBUG_PRINT("info", ("flags_all_set: %s%s", - FLAGSTR(flags_all_set, HA_BINLOG_STMT_CAPABLE), - FLAGSTR(flags_all_set, HA_BINLOG_ROW_CAPABLE))); - DBUG_PRINT("info", ("flags_some_set: %s%s", - FLAGSTR(flags_some_set, HA_BINLOG_STMT_CAPABLE), - FLAGSTR(flags_some_set, HA_BINLOG_ROW_CAPABLE))); - DBUG_PRINT("info", ("thd->variables.binlog_format: %ld", - thd->variables.binlog_format)); - DBUG_PRINT("info", ("multi_engine: %s", - multi_engine ? "TRUE" : "FALSE")); - - int error= 0; - if (flags_all_set == 0) - { - my_error((error= ER_BINLOG_LOGGING_IMPOSSIBLE), MYF(0), - "Statement cannot be logged to the binary log in" - " row-based nor statement-based format"); - } - else if (thd->variables.binlog_format == BINLOG_FORMAT_STMT && - (flags_all_set & HA_BINLOG_STMT_CAPABLE) == 0) - { - my_error((error= ER_BINLOG_LOGGING_IMPOSSIBLE), MYF(0), - "Statement-based format required for this statement," - " but not allowed by this combination of engines"); - } - else if ((thd->variables.binlog_format == BINLOG_FORMAT_ROW || - thd->lex->is_stmt_unsafe()) && - (flags_all_set & HA_BINLOG_ROW_CAPABLE) == 0) - { - my_error((error= ER_BINLOG_LOGGING_IMPOSSIBLE), MYF(0), - "Row-based format required for this statement," - " but not allowed by this combination of engines"); - } - - /* - If more than one engine is involved in the statement and at - least one is doing it's own logging (is *self-logging*), the - statement cannot be logged atomically, so we generate an error - rather than allowing the binlog to become corrupt. - */ - if (multi_engine && - (flags_some_set & HA_HAS_OWN_BINLOGGING)) - { - error= ER_BINLOG_LOGGING_IMPOSSIBLE; - my_error(error, MYF(0), - "Statement cannot be written atomically since more" - " than one engine involved and at least one engine" - " is self-logging"); - } - - DBUG_PRINT("info", ("error: %d", error)); - - if (error) - return -1; - - /* - We switch to row-based format if we are in mixed mode and one of - the following are true: - - 1. If the statement is unsafe - 2. If statement format cannot be used - - Observe that point to cannot be decided before the tables - involved in a statement has been checked, i.e., we cannot put - this code in reset_current_stmt_binlog_row_based(), it has to be - here. - */ - if (thd->lex->is_stmt_unsafe() || - (flags_all_set & HA_BINLOG_STMT_CAPABLE) == 0) - { - thd->set_current_stmt_binlog_row_based_if_mixed(); - } - } - - return 0; -} - /* Lock all tables in list @@ -5266,7 +5106,7 @@ int lock_tables(THD *thd, TABLE_LIST *tables, uint count, bool *need_reopen) *need_reopen= FALSE; if (!tables && !thd->lex->requires_prelocking()) - DBUG_RETURN(decide_logging_format(thd, tables)); + DBUG_RETURN(thd->decide_logging_format(tables)); /* We need this extra check for thd->prelocked_mode because we want to avoid @@ -5305,7 +5145,8 @@ int lock_tables(THD *thd, TABLE_LIST *tables, uint count, bool *need_reopen) if (thd->variables.binlog_format != BINLOG_FORMAT_ROW && tables && has_write_table_with_auto_increment(thd->lex->first_not_own_table())) { - thd->lex->set_stmt_unsafe(); + thd->lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_TWO_AUTOINC_COLUMNS); + thd->set_current_stmt_binlog_format_row_if_mixed(); } } @@ -5428,7 +5269,7 @@ int lock_tables(THD *thd, TABLE_LIST *tables, uint count, bool *need_reopen) } } - DBUG_RETURN(decide_logging_format(thd, tables)); + DBUG_RETURN(thd->decide_logging_format(tables)); } diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 10cea24a8e7..3fe4054c9e5 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -39,6 +39,7 @@ #include <io.h> #endif #include <mysys_err.h> +#include <limits.h> #include "sp_rcontext.h" #include "sp_cache.h" @@ -607,7 +608,7 @@ THD::THD() lock_id(&main_lock_id), user_time(0), in_sub_stmt(0), sql_log_bin_toplevel(false), - binlog_table_maps(0), binlog_flags(0UL), + binlog_unsafe_warning_flags(0), binlog_table_maps(0), table_map_for_update(0), arg_of_last_insert_id_function(FALSE), first_successful_insert_id_in_prev_stmt(0), @@ -861,7 +862,8 @@ void THD::init(void) else options &= ~OPTION_BIG_SELECTS; - transaction.all.modified_non_trans_table= transaction.stmt.modified_non_trans_table= FALSE; + transaction.all.modified_non_trans_table= + transaction.stmt.modified_non_trans_table= FALSE; open_options=ha_open_options; update_lock_default= (variables.low_priority_updates ? TL_WRITE_LOW_PRIORITY : @@ -871,7 +873,7 @@ void THD::init(void) bzero((char*) warn_count, sizeof(warn_count)); total_warn_count= 0; update_charset(); - reset_current_stmt_binlog_row_based(); + reset_current_stmt_binlog_format_row(); bzero((char *) &status_var, sizeof(status_var)); sql_log_bin_toplevel= options & OPTION_BIN_LOG; @@ -3149,13 +3151,13 @@ void THD::reset_sub_statement_state(Sub_statement_state *backup, first_successful_insert_id_in_cur_stmt; if ((!lex->requires_prelocking() || is_update_query(lex->sql_command)) && - !current_stmt_binlog_row_based) + !is_current_stmt_binlog_format_row()) { options&= ~OPTION_BIN_LOG; } if ((backup->options & OPTION_BIN_LOG) && is_update_query(lex->sql_command)&& - !current_stmt_binlog_row_based) + !is_current_stmt_binlog_format_row()) mysql_bin_log.start_union_events(this, this->query_id); /* Disable result sets */ @@ -3217,7 +3219,7 @@ void THD::restore_sub_statement_state(Sub_statement_state *backup) is_fatal_sub_stmt_error= FALSE; if ((options & OPTION_BIN_LOG) && is_update_query(lex->sql_command) && - !current_stmt_binlog_row_based) + !is_current_stmt_binlog_format_row()) mysql_bin_log.stop_union_events(this); /* @@ -3359,6 +3361,392 @@ void xid_cache_delete(XID_STATE *xid_state) pthread_mutex_unlock(&LOCK_xid_cache); } + +/** + Decide on logging format to use for the statement and issue errors + or warnings as needed. The decision depends on the following + parameters: + + - The logging mode, i.e., the value of binlog_format. Can be + statement, mixed, or row. + + - The type of statement. There are three types of statements: + "normal" safe statements; unsafe statements; and row injections. + An unsafe statement is one that, if logged in statement format, + might produce different results when replayed on the slave (e.g., + INSERT DELAYED). A row injection is either a BINLOG statement, or + a row event executed by the slave's SQL thread. + + - The capabilities of tables modified by the statement. The + *capabilities vector* for a table is a set of flags associated + with the table. Currently, it only includes two flags: *row + capability flag* and *statement capability flag*. + + The row capability flag is set if and only if the engine can + handle row-based logging. The statement capability flag is set if + and only if the table can handle statement-based logging. + + Decision table for logging format + --------------------------------- + + The following table summarizes how the format and generated + warning/error depends on the tables' capabilities, the statement + type, and the current binlog_format. + + Row capable N NNNNNNNNN YYYYYYYYY YYYYYYYYY + Statement capable N YYYYYYYYY NNNNNNNNN YYYYYYYYY + + Statement type * SSSUUUIII SSSUUUIII SSSUUUIII + + binlog_format * SMRSMRSMR SMRSMRSMR SMRSMRSMR + + Logged format - SS-S----- -RR-RR-RR SRRSRR-RR + Warning/Error 1 --2732444 5--5--6-- ---7--6-- + + Legend + ------ + + Row capable: N - Some table not row-capable, Y - All tables row-capable + Stmt capable: N - Some table not stmt-capable, Y - All tables stmt-capable + Statement type: (S)afe, (U)nsafe, or Row (I)njection + binlog_format: (S)TATEMENT, (M)IXED, or (R)OW + Logged format: (S)tatement or (R)ow + Warning/Error: Warnings and error messages are as follows: + + 1. Error: Cannot execute statement: binlogging impossible since both + row-incapable engines and statement-incapable engines are + involved. + + 2. Error: Cannot execute statement: binlogging impossible since + BINLOG_FORMAT = ROW and at least one table uses a storage engine + limited to statement-logging. + + 3. Error: Cannot execute statement: binlogging of unsafe statement + is impossible when storage engine is limited to statement-logging + and BINLOG_FORMAT = MIXED. + + 4. Error: Cannot execute row injection: binlogging impossible since + at least one table uses a storage engine limited to + statement-logging. + + 5. Error: Cannot execute statement: binlogging impossible since + BINLOG_FORMAT = STATEMENT and at least one table uses a storage + engine limited to row-logging. + + 6. Error: Cannot execute row injection: binlogging impossible since + BINLOG_FORMAT = STATEMENT. + + 7. Warning: Unsafe statement binlogged in statement format since + BINLOG_FORMAT = STATEMENT. + + In addition, we can produce the following error (not depending on + the variables of the decision diagram): + + 8. Error: Cannot execute statement: binlogging impossible since more + than one engine is involved and at least one engine is + self-logging. + + For each error case above, the statement is prevented from being + logged, we report an error, and roll back the statement. For + warnings, we set the thd->binlog_flags variable: the warning will be + printed only if the statement is successfully logged. + + @see THD::binlog_query + + @param[in] thd Client thread + @param[in] tables Tables involved in the query + + @retval 0 No error; statement can be logged. + @retval -1 One of the error conditions above applies (1, 2, 4, 5, or 6). +*/ + +int THD::decide_logging_format(TABLE_LIST *tables) +{ + DBUG_ENTER("THD::decide_logging_format"); + DBUG_PRINT("info", ("query: %s", query())); + DBUG_PRINT("info", ("variables.binlog_format: %ld", + variables.binlog_format)); + DBUG_PRINT("info", ("lex->get_stmt_unsafe_flags(): 0x%x", + lex->get_stmt_unsafe_flags())); + + /* + We should not decide logging format if the binlog is closed or + binlogging is off, or if the statement is filtered out from the + binlog by filtering rules. + */ + if (mysql_bin_log.is_open() && (options & OPTION_BIN_LOG) && + !(variables.binlog_format == BINLOG_FORMAT_STMT && + !binlog_filter->db_ok(db))) + { + /* + Compute one bit field with the union of all the engine + capabilities, and one with the intersection of all the engine + capabilities. + */ + handler::Table_flags flags_some_set= 0; + handler::Table_flags flags_all_set= + HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE; + + my_bool multi_engine= FALSE; + my_bool mixed_engine= FALSE; + my_bool all_trans_engines= TRUE; + TABLE* prev_write_table= NULL; + TABLE* prev_access_table= NULL; + +#ifndef DBUG_OFF + { + static const char *prelocked_mode_name[] = { + "NON_PRELOCKED", + "PRELOCKED", + "PRELOCKED_UNDER_LOCK_TABLES", + }; + DBUG_PRINT("debug", ("prelocked_mode: %s", + prelocked_mode_name[prelocked_mode])); + } +#endif + + /* + Get the capabilities vector for all involved storage engines and + mask out the flags for the binary log. + */ + for (TABLE_LIST *table= tables; table; table= table->next_global) + { + if (table->placeholder()) + continue; + if (table->table->s->table_category == TABLE_CATEGORY_PERFORMANCE) + lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_TABLE); + if (table->lock_type >= TL_WRITE_ALLOW_WRITE) + { + handler::Table_flags const flags= table->table->file->ha_table_flags(); + DBUG_PRINT("info", ("table: %s; ha_table_flags: 0x%llx", + table->table_name, flags)); + if (prev_write_table && prev_write_table->file->ht != + table->table->file->ht) + multi_engine= TRUE; + all_trans_engines= all_trans_engines && + table->table->file->has_transactions(); + prev_write_table= table->table; + flags_all_set &= flags; + flags_some_set |= flags; + } + if (prev_access_table && prev_access_table->file->ht != table->table->file->ht) + mixed_engine= mixed_engine || (prev_access_table->file->has_transactions() != + table->table->file->has_transactions()); + prev_access_table= table->table; + } + + /* + Set the statement as unsafe if: + + . it is a mixed statement, i.e. access transactional and non-transactional + tables, and updates at least one; + or + . an early statement updated a transactional table; + . and, the current statement updates a non-transactional table. + + Any mixed statement is classified as unsafe to ensure that mixed mode is + completely safe. Consider the following example to understand why we + decided to do this: + + Note that mixed statements such as + + 1: INSERT INTO myisam_t SELECT * FROM innodb_t; + + 2: INSERT INTO innodb_t SELECT * FROM myisam_t; + + are classified as unsafe to ensure that in mixed mode the execution is + completely safe and equivalent to the row mode. Consider the following + statements and sessions (connections) to understand the reason: + + con1: INSERT INTO innodb_t VALUES (1); + con1: INSERT INTO innodb_t VALUES (100); + + con1: BEGIN + con2: INSERT INTO myisam_t SELECT * FROM innodb_t; + con1: INSERT INTO innodb_t VALUES (200); + con1: COMMIT; + + The point is that the concurrent statements may be written into the binary log + in a way different from the execution. For example, + + BINARY LOG: + + con2: BEGIN; + con2: INSERT INTO myisam_t SELECT * FROM innodb_t; + con2: COMMIT; + con1: BEGIN + con1: INSERT INTO innodb_t VALUES (200); + con1: COMMIT; + + .... + + or + + BINARY LOG: + + con1: BEGIN + con1: INSERT INTO innodb_t VALUES (200); + con1: COMMIT; + con2: BEGIN; + con2: INSERT INTO myisam_t SELECT * FROM innodb_t; + con2: COMMIT; + + Clearly, this may become a problem in STMT mode and setting the statement + as unsafe will make rows to be written into the binary log in MIXED mode + and as such the problem will not stand. + + In STMT mode, although such statement is classified as unsafe, i.e. + + INSERT INTO myisam_t SELECT * FROM innodb_t; + + there is no enough information to avoid writing it outside the boundaries + of a transaction. This is not a problem if we are considering snapshot + isolation level but if we have pure repeatable read or serializable the + lock history on the slave will be different from the master. + */ + if (mixed_engine || + trans_has_updated_trans_table(this) && !all_trans_engines) + lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_NONTRANS_AFTER_TRANS); + + DBUG_PRINT("info", ("flags_all_set: 0x%llx", flags_all_set)); + DBUG_PRINT("info", ("flags_some_set: 0x%llx", flags_some_set)); + DBUG_PRINT("info", ("multi_engine: %d", multi_engine)); + + int error= 0; + int unsafe_flags; + + /* + If more than one engine is involved in the statement and at + least one is doing it's own logging (is *self-logging*), the + statement cannot be logged atomically, so we generate an error + rather than allowing the binlog to become corrupt. + */ + if (multi_engine && + (flags_some_set & HA_HAS_OWN_BINLOGGING)) + { + my_error((error= ER_BINLOG_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE), + MYF(0)); + } + + /* both statement-only and row-only engines involved */ + if ((flags_all_set & (HA_BINLOG_STMT_CAPABLE | HA_BINLOG_ROW_CAPABLE)) == 0) + { + /* + 1. Error: Binary logging impossible since both row-incapable + engines and statement-incapable engines are involved + */ + my_error((error= ER_BINLOG_ROW_ENGINE_AND_STMT_ENGINE), MYF(0)); + } + /* statement-only engines involved */ + else if ((flags_all_set & HA_BINLOG_ROW_CAPABLE) == 0) + { + if (lex->is_stmt_row_injection()) + { + /* + 4. Error: Cannot execute row injection since table uses + storage engine limited to statement-logging + */ + my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_ENGINE), MYF(0)); + } + else if (variables.binlog_format == BINLOG_FORMAT_ROW) + { + /* + 2. Error: Cannot modify table that uses a storage engine + limited to statement-logging when BINLOG_FORMAT = ROW + */ + my_error((error= ER_BINLOG_ROW_MODE_AND_STMT_ENGINE), MYF(0)); + } + else if ((unsafe_flags= lex->get_stmt_unsafe_flags()) != 0) + { + /* + 3. Error: Cannot execute statement: binlogging of unsafe + statement is impossible when storage engine is limited to + statement-logging and BINLOG_FORMAT = MIXED. + */ + for (int unsafe_type= 0; + unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT; + unsafe_type++) + if (unsafe_flags & (1 << unsafe_type)) + my_error((error= ER_BINLOG_UNSAFE_AND_STMT_ENGINE), MYF(0), + ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type])); + } + /* log in statement format! */ + } + /* no statement-only engines */ + else + { + /* binlog_format = STATEMENT */ + if (variables.binlog_format == BINLOG_FORMAT_STMT) + { + if (lex->is_stmt_row_injection()) + { + /* + 6. Error: Cannot execute row injection since + BINLOG_FORMAT = STATEMENT + */ + my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_MODE), MYF(0)); + } + else if ((flags_all_set & HA_BINLOG_STMT_CAPABLE) == 0) + { + /* + 5. Error: Cannot modify table that uses a storage engine + limited to row-logging when binlog_format = STATEMENT + */ + my_error((error= ER_BINLOG_STMT_MODE_AND_ROW_ENGINE), MYF(0), ""); + } + else if ((unsafe_flags= lex->get_stmt_unsafe_flags()) != 0) + { + /* + 7. Warning: Unsafe statement logged as statement due to + binlog_format = STATEMENT + */ + binlog_unsafe_warning_flags|= unsafe_flags; + DBUG_PRINT("info", ("Scheduling warning to be issued by " + "binlog_query: '%s'", + ER(ER_BINLOG_UNSAFE_STATEMENT))); + DBUG_PRINT("info", ("binlog_unsafe_warning_flags: 0x%x", + binlog_unsafe_warning_flags)); + } + /* log in statement format! */ + } + /* No statement-only engines and binlog_format != STATEMENT. + I.e., nothing prevents us from row logging if needed. */ + else + { + if (lex->is_stmt_unsafe() || lex->is_stmt_row_injection() + || (flags_all_set & HA_BINLOG_STMT_CAPABLE) == 0) + { + /* log in row format! */ + set_current_stmt_binlog_format_row_if_mixed(); + } + } + } + + if (error) { + DBUG_PRINT("info", ("decision: no logging since an error was generated")); + DBUG_RETURN(-1); + } + DBUG_PRINT("info", ("decision: logging in %s format", + is_current_stmt_binlog_format_row() ? + "ROW" : "STATEMENT")); + } +#ifndef DBUG_OFF + else + DBUG_PRINT("info", ("decision: no logging since " + "mysql_bin_log.is_open() = %d " + "and (options & OPTION_BIN_LOG) = 0x%llx " + "and binlog_format = %ld " + "and binlog_filter->db_ok(db) = %d", + mysql_bin_log.is_open(), + (options & OPTION_BIN_LOG), + variables.binlog_format, + binlog_filter->db_ok(db))); +#endif + + DBUG_RETURN(0); +} + + /* Implementation of interface to write rows to the binary log through the thread. The thread is responsible for writing the rows it has @@ -3410,7 +3798,7 @@ THD::binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id, if (binlog_setup_trx_data()) DBUG_RETURN(NULL); - Rows_log_event* pending= binlog_get_pending_rows_event(); + Rows_log_event* pending= binlog_get_pending_rows_event(is_transactional); if (unlikely(pending && !pending->is_valid())) DBUG_RETURN(NULL); @@ -3444,7 +3832,9 @@ THD::binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id, flush the pending event and replace it with the newly created event... */ - if (unlikely(mysql_bin_log.flush_and_set_pending_rows_event(this, ev))) + if (unlikely( + mysql_bin_log.flush_and_set_pending_rows_event(this, ev, + is_transactional))) { delete ev; DBUG_RETURN(NULL); @@ -3667,7 +4057,7 @@ int THD::binlog_write_row(TABLE* table, bool is_trans, MY_BITMAP const* cols, size_t colcnt, uchar const *record) { - DBUG_ASSERT(current_stmt_binlog_row_based && mysql_bin_log.is_open()); + DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); /* Pack records into format for transfer. We are allocating more @@ -3697,7 +4087,7 @@ int THD::binlog_update_row(TABLE* table, bool is_trans, const uchar *before_record, const uchar *after_record) { - DBUG_ASSERT(current_stmt_binlog_row_based && mysql_bin_log.is_open()); + DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); size_t const before_maxlen = max_row_length(table, before_record); size_t const after_maxlen = max_row_length(table, after_record); @@ -3742,7 +4132,7 @@ int THD::binlog_delete_row(TABLE* table, bool is_trans, MY_BITMAP const* cols, size_t colcnt, uchar const *record) { - DBUG_ASSERT(current_stmt_binlog_row_based && mysql_bin_log.is_open()); + DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()); /* Pack records into format for transfer. We are allocating more @@ -3768,14 +4158,15 @@ int THD::binlog_delete_row(TABLE* table, bool is_trans, } -int THD::binlog_remove_pending_rows_event(bool clear_maps) +int THD::binlog_remove_pending_rows_event(bool clear_maps, + bool is_transactional) { DBUG_ENTER("THD::binlog_remove_pending_rows_event"); if (!mysql_bin_log.is_open()) DBUG_RETURN(0); - mysql_bin_log.remove_pending_rows_event(this); + mysql_bin_log.remove_pending_rows_event(this, is_transactional); if (clear_maps) binlog_table_maps= 0; @@ -3783,7 +4174,7 @@ int THD::binlog_remove_pending_rows_event(bool clear_maps) DBUG_RETURN(0); } -int THD::binlog_flush_pending_rows_event(bool stmt_end) +int THD::binlog_flush_pending_rows_event(bool stmt_end, bool is_transactional) { DBUG_ENTER("THD::binlog_flush_pending_rows_event"); /* @@ -3799,7 +4190,7 @@ int THD::binlog_flush_pending_rows_event(bool stmt_end) flag is set. */ int error= 0; - if (Rows_log_event *pending= binlog_get_pending_rows_event()) + if (Rows_log_event *pending= binlog_get_pending_rows_event(is_transactional)) { if (stmt_end) { @@ -3808,7 +4199,8 @@ int THD::binlog_flush_pending_rows_event(bool stmt_end) binlog_table_maps= 0; } - error= mysql_bin_log.flush_and_set_pending_rows_event(this, 0); + error= mysql_bin_log.flush_and_set_pending_rows_event(this, 0, + is_transactional); } DBUG_RETURN(error); @@ -3824,8 +4216,6 @@ show_query_type(THD::enum_binlog_query_type qtype) return "ROW"; case THD::STMT_QUERY_TYPE: return "STMT"; - case THD::MYSQL_QUERY_TYPE: - return "MYSQL"; case THD::QUERY_TYPE_COUNT: default: DBUG_ASSERT(0 <= qtype && qtype < THD::QUERY_TYPE_COUNT); @@ -3837,32 +4227,97 @@ show_query_type(THD::enum_binlog_query_type qtype) #endif -/* - Member function that will log query, either row-based or - statement-based depending on the value of the 'current_stmt_binlog_row_based' - the value of the 'qtype' flag. +/** + Auxiliary method used by @c binlog_query() to raise warnings. - This function should be called after the all calls to ha_*_row() - functions have been issued, but before tables are unlocked and - closed. + The type of warning and the type of unsafeness is stored in + THD::binlog_unsafe_warning_flags. +*/ +void THD::issue_unsafe_warnings() +{ + DBUG_ENTER("issue_unsafe_warnings"); + /* + Ensure that binlog_unsafe_warning_flags is big enough to hold all + bits. This is actually a constant expression. + */ + DBUG_ASSERT(2 * LEX::BINLOG_STMT_UNSAFE_COUNT <= + sizeof(binlog_unsafe_warning_flags) * CHAR_BIT); - OBSERVE - There shall be no writes to any system table after calling - binlog_query(), so these writes has to be moved to before the call - of binlog_query() for correct functioning. + uint32 unsafe_type_flags= binlog_unsafe_warning_flags; - This is necessesary not only for RBR, but the master might crash - after binlogging the query but before changing the system tables. - This means that the slave and the master are not in the same state - (after the master has restarted), so therefore we have to - eliminate this problem. + /* + Clear: (1) bits above BINLOG_STMT_UNSAFE_COUNT; (2) bits for + warnings that have been printed already. + */ + unsafe_type_flags &= (LEX::BINLOG_STMT_UNSAFE_ALL_FLAGS ^ + (unsafe_type_flags >> LEX::BINLOG_STMT_UNSAFE_COUNT)); + /* If all warnings have been printed already, return. */ + if (unsafe_type_flags == 0) + DBUG_VOID_RETURN; - RETURN VALUE - Error code, or 0 if no error. + DBUG_PRINT("info", ("unsafe_type_flags: 0x%x", unsafe_type_flags)); + + /* + For each unsafe_type, check if the statement is unsafe in this way + and issue a warning. + */ + for (int unsafe_type=0; + unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT; + unsafe_type++) + { + if ((unsafe_type_flags & (1 << unsafe_type)) != 0) + { + push_warning_printf(this, MYSQL_ERROR::WARN_LEVEL_NOTE, + ER_BINLOG_UNSAFE_STATEMENT, + ER(ER_BINLOG_UNSAFE_STATEMENT), + ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type])); + if (global_system_variables.log_warnings) + { + char buf[MYSQL_ERRMSG_SIZE * 2]; + sprintf(buf, ER(ER_BINLOG_UNSAFE_STATEMENT), + ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type])); + sql_print_warning(ER(ER_MESSAGE_AND_STATEMENT), buf, query()); + } + } + } + /* + Mark these unsafe types as already printed, to avoid printing + warnings for them again. + */ + binlog_unsafe_warning_flags|= + unsafe_type_flags << LEX::BINLOG_STMT_UNSAFE_COUNT; + DBUG_VOID_RETURN; +} + + +/** + Log the current query. + + The query will be logged in either row format or statement format + depending on the value of @c current_stmt_binlog_format_row field and + the value of the @c qtype parameter. + + This function must be called: + + - After the all calls to ha_*_row() functions have been issued. + + - After any writes to system tables. Rationale: if system tables + were written after a call to this function, and the master crashes + after the call to this function and before writing the system + tables, then the master and slave get out of sync. + + - Before tables are unlocked and closed. + + @see decide_logging_format + + @retval 0 Success + + @retval nonzero If there is a failure when writing the query (e.g., + write failure), then the error code is returned. */ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg, - ulong query_len, bool is_trans, bool suppress_use, - int errcode) + ulong query_len, bool is_trans, bool direct, + bool suppress_use, int errcode) { DBUG_ENTER("THD::binlog_query"); DBUG_PRINT("enter", ("qtype: %s query: '%s'", @@ -3879,59 +4334,53 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg, top-most close_thread_tables(). */ if (this->prelocked_mode == NON_PRELOCKED) - if (int error= binlog_flush_pending_rows_event(TRUE)) + if (int error= binlog_flush_pending_rows_event(TRUE, is_trans)) DBUG_RETURN(error); /* - If we are in statement mode and trying to log an unsafe statement, - we should print a warning. + Warnings for unsafe statements logged in statement format are + printed here instead of in decide_logging_format(). This is + because the warnings should be printed only if the statement is + actually logged. When executing decide_logging_format(), we cannot + know for sure if the statement will be logged. */ - if (sql_log_bin_toplevel && lex->is_stmt_unsafe() && - variables.binlog_format == BINLOG_FORMAT_STMT && - binlog_filter->db_ok(this->db)) - { - /* - A warning can be elevated a error when STRICT sql mode. - But we don't want to elevate binlog warning to error here. - */ - push_warning(this, MYSQL_ERROR::WARN_LEVEL_NOTE, - ER_BINLOG_UNSAFE_STATEMENT, - ER(ER_BINLOG_UNSAFE_STATEMENT)); - if (global_system_variables.log_warnings && - !(binlog_flags & BINLOG_FLAG_UNSAFE_STMT_PRINTED)) - { - sql_print_warning("%s Statement: %.*s", - ER(ER_BINLOG_UNSAFE_STATEMENT), - MYSQL_ERRMSG_SIZE, query_arg); - binlog_flags|= BINLOG_FLAG_UNSAFE_STMT_PRINTED; - } - } + if (sql_log_bin_toplevel) + issue_unsafe_warnings(); switch (qtype) { + /* + ROW_QUERY_TYPE means that the statement may be logged either in + row format or in statement format. If + current_stmt_binlog_format is row, it means that the + statement has already been logged in row format and hence shall + not be logged again. + */ case THD::ROW_QUERY_TYPE: DBUG_PRINT("debug", - ("current_stmt_binlog_row_based: %d", - current_stmt_binlog_row_based)); - if (current_stmt_binlog_row_based) + ("is_current_stmt_binlog_format_row: %d", + is_current_stmt_binlog_format_row())); + if (is_current_stmt_binlog_format_row()) DBUG_RETURN(0); - /* Otherwise, we fall through */ - case THD::MYSQL_QUERY_TYPE: - /* - Using this query type is a conveniece hack, since we have been - moving back and forth between using RBR for replication of - system tables and not using it. + /* Fall through */ - Make sure to change in check_table_binlog_row_based() according - to how you treat this. + /* + STMT_QUERY_TYPE means that the query must be logged in statement + format; it cannot be logged in row format. This is typically + used by DDL statements. It is an error to use this query type + if current_stmt_binlog_format_row is row. + + @todo Currently there are places that call this method with + STMT_QUERY_TYPE and current_stmt_binlog_format is row. Fix those + places and add assert to ensure correct behavior. /Sven */ case THD::STMT_QUERY_TYPE: /* The MYSQL_LOG::write() function will set the STMT_END_F flag and flush the pending rows event if necessary. - */ + */ { - Query_log_event qinfo(this, query_arg, query_len, is_trans, suppress_use, - errcode); + Query_log_event qinfo(this, query_arg, query_len, is_trans, direct, + suppress_use, errcode); qinfo.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F; /* Binlog table maps will be irrelevant after a Query_log_event diff --git a/sql/sql_class.h b/sql/sql_class.h index 0ea1930c2d2..13ebb16095f 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -1288,6 +1288,7 @@ public: /* Used to execute base64 coded binlog events in MySQL server */ Relay_log_info* rli_fake; + void reset_for_next_command(); /* Constant for THD::where initialization in the beginning of every query. @@ -1462,32 +1463,81 @@ public: size_t needed, bool is_transactional, RowsEventT* hint); - Rows_log_event* binlog_get_pending_rows_event() const; - void binlog_set_pending_rows_event(Rows_log_event* ev); - int binlog_flush_pending_rows_event(bool stmt_end); - int binlog_remove_pending_rows_event(bool clear_maps); + Rows_log_event* binlog_get_pending_rows_event(bool is_transactional) const; + void binlog_set_pending_rows_event(Rows_log_event* ev, bool is_transactional); + inline int binlog_flush_pending_rows_event(bool stmt_end) + { + return (binlog_flush_pending_rows_event(stmt_end, FALSE) || + binlog_flush_pending_rows_event(stmt_end, TRUE)); + } + int binlog_flush_pending_rows_event(bool stmt_end, bool is_transactional); + int binlog_remove_pending_rows_event(bool clear_maps, bool is_transactional); + + /** + Determine the binlog format of the current statement. + + @retval 0 if the current statement will be logged in statement + format. + @retval nonzero if the current statement will be logged in row + format. + */ + int is_current_stmt_binlog_format_row() const { + DBUG_ASSERT(current_stmt_binlog_format == BINLOG_FORMAT_STMT || + current_stmt_binlog_format == BINLOG_FORMAT_ROW); + return current_stmt_binlog_format == BINLOG_FORMAT_ROW; + } private: + /** + Indicates the format in which the current statement will be + logged. This can only be set from @c decide_logging_format(). + */ + enum_binlog_format current_stmt_binlog_format; + + /** + Bit field for the state of binlog warnings. + + There are two groups of bits: + + - The first Lex::BINLOG_STMT_UNSAFE_COUNT bits list all types of + unsafeness that the current statement has. + + - The following Lex::BINLOG_STMT_UNSAFE_COUNT bits list all types + of unsafeness that the current statement has issued warnings + for. + + Hence, this variable must be big enough to hold + 2*Lex::BINLOG_STMT_UNSAFE_COUNT bits. This is asserted in @c + issue_unsafe_warnings(). + + The first and second groups of bits are set by @c + decide_logging_format() when it detects that a warning should be + issued. The third group of bits is set from @c binlog_query() + when a warning is issued. All bits are cleared at the end of the + top-level statement. + + This must be a member of THD and not of LEX, because warnings are + detected and issued in different places (@c + decide_logging_format() and @c binlog_query(), respectively). + Between these calls, the THD->lex object may change; e.g., if a + stored routine is invoked. Only THD persists between the calls. + */ + uint32 binlog_unsafe_warning_flags; + + void issue_unsafe_warnings(); + /* Number of outstanding table maps, i.e., table maps in the transaction cache. */ uint binlog_table_maps; - - enum enum_binlog_flag { - BINLOG_FLAG_UNSAFE_STMT_PRINTED, - BINLOG_FLAG_COUNT - }; - - /** - Flags with per-thread information regarding the status of the - binary log. - */ - uint32 binlog_flags; public: uint get_binlog_table_maps() const { return binlog_table_maps; } + void clear_binlog_table_maps() { + binlog_table_maps= 0; + } #endif /* MYSQL_CLIENT */ public: @@ -1792,8 +1842,6 @@ public: char scramble[SCRAMBLE_LENGTH+1]; bool slave_thread, one_shot_set; - /* tells if current statement should binlog row-based(1) or stmt-based(0) */ - bool current_stmt_binlog_row_based; bool locked, some_tables_deleted; bool last_cuted_field; bool no_errors, password; @@ -1950,27 +1998,18 @@ public: #ifndef MYSQL_CLIENT enum enum_binlog_query_type { - /* - The query can be logged row-based or statement-based - */ + /* The query can be logged in row format or in statement format. */ ROW_QUERY_TYPE, - /* - The query has to be logged statement-based - */ + /* The query has to be logged in statement format. */ STMT_QUERY_TYPE, - /* - The query represents a change to a table in the "mysql" - database and is currently mapped to ROW_QUERY_TYPE. - */ - MYSQL_QUERY_TYPE, QUERY_TYPE_COUNT }; int binlog_query(enum_binlog_query_type qtype, - char const *query, ulong query_len, - bool is_trans, bool suppress_use, + char const *query, ulong query_len, bool is_trans, + bool direct, bool suppress_use, int errcode); #endif @@ -2019,6 +2058,21 @@ public: return 0; #endif } + /** + Returns TRUE if session is in a multi-statement transaction mode. + + OPTION_NOT_AUTOCOMMIT: When autocommit is off, a multi-statement + transaction is implicitly started on the first statement after a + previous transaction has been ended. + + OPTION_BEGIN: Regardless of the autocommit status, a multi-statement + transaction can be explicitly started with the statements "START + TRANSACTION", "BEGIN [WORK]", "[COMMIT | ROLLBACK] AND CHAIN", etc. + */ + inline bool in_multi_stmt_transaction() + { + return options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN); + } inline bool fill_derived_tables() { return !stmt_arena->is_stmt_prepare() && !lex->only_view_structure(); @@ -2156,31 +2210,51 @@ public: void set_n_backup_active_arena(Query_arena *set, Query_arena *backup); void restore_active_arena(Query_arena *set, Query_arena *backup); - inline void set_current_stmt_binlog_row_based_if_mixed() + /* + @todo Make these methods private or remove them completely. Only + decide_logging_format should call them. /Sven + */ + inline void set_current_stmt_binlog_format_row_if_mixed() { + DBUG_ENTER("set_current_stmt_binlog_format_row_if_mixed"); + /* + This should only be called from decide_logging_format. + + @todo Once we have ensured this, uncomment the following + statement, remove the big comment below that, and remove the + in_sub_stmt==0 condition from the following 'if'. + */ + /* DBUG_ASSERT(in_sub_stmt == 0); */ /* If in a stored/function trigger, the caller should already have done the change. We test in_sub_stmt to prevent introducing bugs where people wouldn't ensure that, and would switch to row-based mode in the middle of executing a stored function/trigger (which is too late, see also - reset_current_stmt_binlog_row_based()); this condition will make their + reset_current_stmt_binlog_format_row()); this condition will make their tests fail and so force them to propagate the lex->binlog_row_based_if_mixed upwards to the caller. */ if ((variables.binlog_format == BINLOG_FORMAT_MIXED) && (in_sub_stmt == 0)) - current_stmt_binlog_row_based= TRUE; + set_current_stmt_binlog_format_row(); + + DBUG_VOID_RETURN; } - inline void set_current_stmt_binlog_row_based() + inline void set_current_stmt_binlog_format_row() { - current_stmt_binlog_row_based= TRUE; + DBUG_ENTER("set_current_stmt_binlog_format_row"); + current_stmt_binlog_format= BINLOG_FORMAT_ROW; + DBUG_VOID_RETURN; } - inline void clear_current_stmt_binlog_row_based() + inline void clear_current_stmt_binlog_format_row() { - current_stmt_binlog_row_based= FALSE; + DBUG_ENTER("clear_current_stmt_binlog_format_row"); + current_stmt_binlog_format= BINLOG_FORMAT_STMT; + DBUG_VOID_RETURN; } - inline void reset_current_stmt_binlog_row_based() + inline void reset_current_stmt_binlog_format_row() { + DBUG_ENTER("reset_current_stmt_binlog_format_row"); /* If there are temporary tables, don't reset back to statement-based. Indeed it could be that: @@ -2195,19 +2269,19 @@ public: or trigger is decided when it starts executing, depending for example on the caller (for a stored function: if caller is SELECT or INSERT/UPDATE/DELETE...). - - Don't reset binlog format for NDB binlog injector thread. */ DBUG_PRINT("debug", ("temporary_tables: %s, in_sub_stmt: %s, system_thread: %s", YESNO(temporary_tables), YESNO(in_sub_stmt), show_system_thread(system_thread))); - if ((temporary_tables == NULL) && (in_sub_stmt == 0) && - (system_thread != SYSTEM_THREAD_NDBCLUSTER_BINLOG)) + if ((temporary_tables == NULL) && (in_sub_stmt == 0)) { - current_stmt_binlog_row_based= - test(variables.binlog_format == BINLOG_FORMAT_ROW); + if (variables.binlog_format == BINLOG_FORMAT_ROW) + set_current_stmt_binlog_format_row(); + else + clear_current_stmt_binlog_format_row(); } + DBUG_VOID_RETURN; } /** @@ -2311,7 +2385,11 @@ public: Protected with LOCK_thd_data mutex. */ void set_query(char *query_arg, uint32 query_length_arg); + + int decide_logging_format(TABLE_LIST *tables); + private: + /** The current internal error handler for this thread, or NULL. */ Internal_error_handler *m_internal_handler; /** diff --git a/sql/sql_db.cc b/sql/sql_db.cc index e6ccd9aa594..64c9200533e 100644 --- a/sql/sql_db.cc +++ b/sql/sql_db.cc @@ -181,7 +181,7 @@ uchar* dboptions_get_key(my_dbopt_t *opt, size_t *length, static inline void write_to_binlog(THD *thd, char *query, uint q_len, char *db, uint db_len) { - Query_log_event qinfo(thd, query, q_len, 0, 0, 0); + Query_log_event qinfo(thd, query, q_len, FALSE, TRUE, FALSE, 0); qinfo.db= db; qinfo.db_len= db_len; mysql_bin_log.write(&qinfo); @@ -722,7 +722,7 @@ not_silent: if (mysql_bin_log.is_open()) { int errcode= query_error_code(thd, TRUE); - Query_log_event qinfo(thd, query, query_length, 0, + Query_log_event qinfo(thd, query, query_length, FALSE, TRUE, /* suppress_use */ TRUE, errcode); /* @@ -811,7 +811,7 @@ bool mysql_alter_db(THD *thd, const char *db, HA_CREATE_INFO *create_info) if (mysql_bin_log.is_open()) { int errcode= query_error_code(thd, TRUE); - Query_log_event qinfo(thd, thd->query(), thd->query_length(), 0, + Query_log_event qinfo(thd, thd->query(), thd->query_length(), FALSE, TRUE, /* suppress_use */ TRUE, errcode); /* @@ -963,7 +963,7 @@ bool mysql_rm_db(THD *thd,char *db,bool if_exists, bool silent) if (mysql_bin_log.is_open()) { int errcode= query_error_code(thd, TRUE); - Query_log_event qinfo(thd, query, query_length, 0, + Query_log_event qinfo(thd, query, query_length, FALSE, TRUE, /* suppress_use */ TRUE, errcode); /* Write should use the database being created as the "current @@ -1965,7 +1965,7 @@ bool mysql_upgrade_db(THD *thd, LEX_STRING *old_db) { int errcode= query_error_code(thd, TRUE); Query_log_event qinfo(thd, thd->query(), thd->query_length(), - 0, TRUE, errcode); + FALSE, TRUE, TRUE, errcode); thd->clear_error(); mysql_bin_log.write(&qinfo); } diff --git a/sql/sql_delete.cc b/sql/sql_delete.cc index 6b9a83e695b..157d5360a71 100644 --- a/sql/sql_delete.cc +++ b/sql/sql_delete.cc @@ -131,7 +131,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds, if (!using_limit && const_cond_result && !(specialflag & (SPECIAL_NO_NEW_FUNC | SPECIAL_SAFE_MODE)) && (thd->lex->sql_command == SQLCOM_TRUNCATE || - (!thd->current_stmt_binlog_row_based && + (!thd->is_current_stmt_binlog_format_row() && !(table->triggers && table->triggers->has_delete_triggers())))) { /* Update the table->file->stats.records number */ @@ -385,7 +385,8 @@ cleanup: transactional_table= table->file->has_transactions(); if (!transactional_table && deleted > 0) - thd->transaction.stmt.modified_non_trans_table= TRUE; + thd->transaction.stmt.modified_non_trans_table= + thd->transaction.all.modified_non_trans_table= TRUE; /* See similar binlogging code in sql_update.cc, for comments */ if ((error < 0) || thd->transaction.stmt.modified_non_trans_table) @@ -414,15 +415,13 @@ cleanup: */ int log_result= thd->binlog_query(query_type, thd->query(), thd->query_length(), - is_trans, FALSE, errcode); + is_trans, FALSE, FALSE, errcode); if (log_result) { error=1; } } - if (thd->transaction.stmt.modified_non_trans_table) - thd->transaction.all.modified_non_trans_table= TRUE; } DBUG_ASSERT(transactional_table || !deleted || thd->transaction.stmt.modified_non_trans_table); free_underlaid_joins(thd, select_lex); @@ -461,19 +460,6 @@ int mysql_prepare_delete(THD *thd, TABLE_LIST *table_list, Item **conds) DBUG_ENTER("mysql_prepare_delete"); List<Item> all_fields; - /* - Statement-based replication of DELETE ... LIMIT is not safe as order of - rows is not defined, so in mixed mode we go to row-based. - - Note that we may consider a statement as safe if ORDER BY primary_key - is present. However it may confuse users to see very similiar statements - replicated differently. - */ - if (thd->lex->current_select->select_limit) - { - thd->lex->set_stmt_unsafe(); - thd->set_current_stmt_binlog_row_based_if_mixed(); - } thd->lex->allow_sum_func= 0; if (setup_tables_and_check_access(thd, &thd->lex->select_lex.context, &thd->lex->select_lex.top_join_list, @@ -822,6 +808,9 @@ void multi_delete::abort() if (deleted) query_cache_invalidate3(thd, delete_tables, 1); + if (thd->transaction.stmt.modified_non_trans_table) + thd->transaction.all.modified_non_trans_table= TRUE; + /* If rows from the first table only has been deleted and it is transactional, just do rollback. @@ -852,9 +841,8 @@ void multi_delete::abort() int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED); thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query(), thd->query_length(), - transactional_tables, FALSE, errcode); + transactional_tables, FALSE, FALSE, errcode); } - thd->transaction.all.modified_non_trans_table= true; } DBUG_VOID_RETURN; } @@ -1007,6 +995,9 @@ bool multi_delete::send_eof() /* reset used flags */ thd_proc_info(thd, "end"); + if (thd->transaction.stmt.modified_non_trans_table) + thd->transaction.all.modified_non_trans_table= TRUE; + /* We must invalidate the query cache before binlog writing and ha_autocommit_... @@ -1026,14 +1017,12 @@ bool multi_delete::send_eof() errcode= query_error_code(thd, killed_status == THD::NOT_KILLED); if (thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query(), thd->query_length(), - transactional_tables, FALSE, errcode) && + transactional_tables, FALSE, FALSE, errcode) && !normal_tables) { local_error=1; // Log write failed: roll back the SQL statement } } - if (thd->transaction.stmt.modified_non_trans_table) - thd->transaction.all.modified_non_trans_table= TRUE; } if (local_error != 0) error_handled= TRUE; // to force early leave from ::send_error() @@ -1058,15 +1047,16 @@ bool multi_delete::send_eof() static bool mysql_truncate_by_delete(THD *thd, TABLE_LIST *table_list) { - bool error, save_binlog_row_based= thd->current_stmt_binlog_row_based; + bool error, save_binlog_row_based= thd->is_current_stmt_binlog_format_row(); DBUG_ENTER("mysql_truncate_by_delete"); table_list->lock_type= TL_WRITE; mysql_init_select(thd->lex); - thd->clear_current_stmt_binlog_row_based(); + thd->clear_current_stmt_binlog_format_row(); error= mysql_delete(thd, table_list, NULL, NULL, HA_POS_ERROR, LL(0), TRUE); ha_autocommit_or_rollback(thd, error); end_trans(thd, error ? ROLLBACK : COMMIT); - thd->current_stmt_binlog_row_based= save_binlog_row_based; + if (save_binlog_row_based) + thd->set_current_stmt_binlog_format_row(); DBUG_RETURN(error); } @@ -1166,7 +1156,11 @@ end: if (!error) { /* In RBR, the statement is not binlogged if the table is temporary. */ - if (!is_temporary_table || !thd->current_stmt_binlog_row_based) + if (!is_temporary_table || !thd->is_current_stmt_binlog_format_row()) + /* + TRUNCATE must always be statement-based binlogged (not row-based) so + we don't test current_stmt_binlog_format. + */ write_bin_log(thd, TRUE, thd->query(), thd->query_length()); my_ok(thd); // This should return record count } diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index e9a36629c66..dbec84b6d1a 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -872,6 +872,10 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, */ query_cache_invalidate3(thd, table_list, 1); } + + if (thd->transaction.stmt.modified_non_trans_table) + thd->transaction.all.modified_non_trans_table= TRUE; + if ((changed && error <= 0) || thd->transaction.stmt.modified_non_trans_table || was_insert_delayed) @@ -910,15 +914,13 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list, */ DBUG_ASSERT(thd->killed != THD::KILL_BAD_DATA || error > 0); if (thd->binlog_query(THD::ROW_QUERY_TYPE, - thd->query(), thd->query_length(), - transactional_table, FALSE, - errcode)) + thd->query(), thd->query_length(), + transactional_table, FALSE, FALSE, + errcode)) { error=1; } } - if (thd->transaction.stmt.modified_non_trans_table) - thd->transaction.all.modified_non_trans_table= TRUE; } DBUG_ASSERT(transactional_table || !changed || thd->transaction.stmt.modified_non_trans_table); @@ -1739,6 +1741,7 @@ public: table(0),tables_in_use(0),stacked_inserts(0), status(0), dead(0), group_count(0) { + DBUG_ENTER("Delayed_insert constructor"); thd.security_ctx->user=thd.security_ctx->priv_user=(char*) delayed_user; thd.security_ctx->host=(char*) my_localhost; thd.current_tablenr=0; @@ -1747,11 +1750,21 @@ public: thd.lex->current_select= 0; // for my_message_sql thd.lex->sql_command= SQLCOM_INSERT; // For innodb::store_lock() /* - Statement-based replication of INSERT DELAYED has problems with RAND() - and user vars, so in mixed mode we go to row-based. + Statement-based replication of INSERT DELAYED has problems with + RAND() and user variables, so in mixed mode we go to row-based. + For normal commands, the unsafe flag is set at parse time. + However, since the flag is a member of the THD object, of which + the delayed_insert thread has its own copy, we must set the + statement to unsafe here and explicitly set row logging mode. + + @todo set_current_stmt_binlog_format_row_if_mixed should not be + called by anything else than thd->decide_logging_format(). When + we call set_current_blah here, none of the checks in + decide_logging_format is made. We should probably call + thd->decide_logging_format() directly instead. /Sven */ - thd.lex->set_stmt_unsafe(); - thd.set_current_stmt_binlog_row_based_if_mixed(); + thd.lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_DELAYED); + thd.set_current_stmt_binlog_format_row_if_mixed(); bzero((char*) &thd.net, sizeof(thd.net)); // Safety bzero((char*) &table_list, sizeof(table_list)); // Safety @@ -1766,6 +1779,7 @@ public: delayed_lock= global_system_variables.low_priority_updates ? TL_WRITE_LOW_PRIORITY : TL_WRITE; VOID(pthread_mutex_unlock(&LOCK_thread_count)); + DBUG_VOID_RETURN; } ~Delayed_insert() { @@ -2310,12 +2324,6 @@ static void handle_delayed_insert_impl(THD *thd, Delayed_insert *di) */ lex_start(thd); thd->lex->sql_command= SQLCOM_INSERT; // For innodb::store_lock() - /* - Statement-based replication of INSERT DELAYED has problems with RAND() - and user vars, so in mixed mode we go to row-based. - */ - thd->lex->set_stmt_unsafe(); - thd->set_current_stmt_binlog_row_based_if_mixed(); /* Open table */ if (!(di->table= open_n_lock_single_table(thd, &di->table_list, @@ -2584,6 +2592,7 @@ bool Delayed_insert::handle_inserts(void) { int error; ulong max_rows; + bool has_trans = TRUE; bool using_ignore= 0, using_opt_replace= 0, using_bin_log= mysql_bin_log.is_open(); delayed_row *row; @@ -2736,7 +2745,7 @@ bool Delayed_insert::handle_inserts(void) */ thd.binlog_query(THD::ROW_QUERY_TYPE, row->query.str, row->query.length, - FALSE, FALSE, errcode); + FALSE, FALSE, FALSE, errcode); thd.time_zone_used = backup_time_zone_used; thd.variables.time_zone = backup_time_zone; @@ -2804,9 +2813,11 @@ bool Delayed_insert::handle_inserts(void) or trigger. TODO: Move the logging to last in the sequence of rows. - */ - if (thd.current_stmt_binlog_row_based) - thd.binlog_flush_pending_rows_event(TRUE); + */ + has_trans= thd.lex->sql_command == SQLCOM_CREATE_TABLE || + table->file->has_transactions(); + if (thd.is_current_stmt_binlog_format_row()) + thd.binlog_flush_pending_rows_event(TRUE, has_trans); if ((error=table->file->extra(HA_EXTRA_NO_CACHE))) { // This shouldn't happen @@ -2870,19 +2881,6 @@ bool mysql_insert_select_prepare(THD *thd) DBUG_ENTER("mysql_insert_select_prepare"); /* - Statement-based replication of INSERT ... SELECT ... LIMIT is not safe - as order of rows is not defined, so in mixed mode we go to row-based. - - Note that we may consider a statement as safe if ORDER BY primary_key - is present or we SELECT a constant. However it may confuse users to - see very similiar statements replicated differently. - */ - if (lex->current_select->select_limit) - { - lex->set_stmt_unsafe(); - thd->set_current_stmt_binlog_row_based_if_mixed(); - } - /* SELECT_LEX do not belong to INSERT statement, so we can't add WHERE clause if table is VIEW */ @@ -3246,9 +3244,11 @@ bool select_insert::send_eof() and ha_autocommit_or_rollback. */ query_cache_invalidate3(thd, table, 1); - if (thd->transaction.stmt.modified_non_trans_table) - thd->transaction.all.modified_non_trans_table= TRUE; } + + if (thd->transaction.stmt.modified_non_trans_table) + thd->transaction.all.modified_non_trans_table= TRUE; + DBUG_ASSERT(trans_table || !changed || thd->transaction.stmt.modified_non_trans_table); @@ -3267,7 +3267,7 @@ bool select_insert::send_eof() errcode= query_error_code(thd, killed_status == THD::NOT_KILLED); thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query(), thd->query_length(), - trans_table, FALSE, errcode); + trans_table, FALSE, FALSE, errcode); } table->file->ha_release_auto_increment(); @@ -3333,15 +3333,16 @@ void select_insert::abort() { transactional_table= table->file->has_transactions(); if (thd->transaction.stmt.modified_non_trans_table) { + if (!can_rollback_data()) + thd->transaction.all.modified_non_trans_table= TRUE; + if (mysql_bin_log.is_open()) { int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED); thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query(), thd->query_length(), - transactional_table, FALSE, errcode); + transactional_table, FALSE, FALSE, errcode); } - if (!thd->current_stmt_binlog_row_based && !can_rollback_data()) - thd->transaction.all.modified_non_trans_table= TRUE; if (changed) query_cache_invalidate3(thd, table, 1); } @@ -3585,11 +3586,11 @@ select_create::prepare(List<Item> &values, SELECT_LEX_UNIT *u) virtual int do_postlock(TABLE **tables, uint count) { THD *thd= const_cast<THD*>(ptr->get_thd()); - if (int error= decide_logging_format(thd, &all_tables)) + if (int error= thd->decide_logging_format(&all_tables)) return error; TABLE const *const table = *tables; - if (thd->current_stmt_binlog_row_based && + if (thd->is_current_stmt_binlog_format_row() && !table->s->tmp_table && !ptr->get_create_info()->table_existed) { @@ -3613,7 +3614,7 @@ select_create::prepare(List<Item> &values, SELECT_LEX_UNIT *u) temporary table, we need to start a statement transaction. */ if ((thd->lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) == 0 && - thd->current_stmt_binlog_row_based && + thd->is_current_stmt_binlog_format_row() && mysql_bin_log.is_open()) { thd->binlog_start_trans_and_stmt(); @@ -3632,7 +3633,7 @@ select_create::prepare(List<Item> &values, SELECT_LEX_UNIT *u) push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_TABLE_EXISTS_ERROR, ER(ER_TABLE_EXISTS_ERROR), create_table->table_name); - if (thd->current_stmt_binlog_row_based) + if (thd->is_current_stmt_binlog_format_row()) binlog_show_create_table(&(create_table->table), 1); table= create_table->table; } @@ -3720,7 +3721,7 @@ select_create::binlog_show_create_table(TABLE **tables, uint count) schema that will do a close_thread_tables(), destroying the statement transaction cache. */ - DBUG_ASSERT(thd->current_stmt_binlog_row_based); + DBUG_ASSERT(thd->is_current_stmt_binlog_format_row()); DBUG_ASSERT(tables && *tables && count > 0); char buf[2048]; @@ -3742,6 +3743,7 @@ select_create::binlog_show_create_table(TABLE **tables, uint count) thd->binlog_query(THD::STMT_QUERY_TYPE, query.ptr(), query.length(), /* is_trans */ TRUE, + /* direct */ FALSE, /* suppress_use */ FALSE, errcode); } @@ -3760,7 +3762,7 @@ void select_create::send_error(uint errcode,const char *err) DBUG_PRINT("info", ("Current statement %s row-based", - thd->current_stmt_binlog_row_based ? "is" : "is NOT")); + thd->is_current_stmt_binlog_format_row() ? "is" : "is NOT")); DBUG_PRINT("info", ("Current table (at 0x%lu) %s a temporary (or non-existant) table", (ulong) table, @@ -3842,7 +3844,7 @@ void select_create::abort() select_insert::abort(); thd->transaction.stmt.modified_non_trans_table= FALSE; reenable_binlog(thd); - thd->binlog_flush_pending_rows_event(TRUE); + thd->binlog_flush_pending_rows_event(TRUE, TRUE); if (m_plock) { diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 2adbc44eb12..a08338f4c76 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -31,6 +31,24 @@ sys_var *trg_new_row_fake_var= (sys_var*) 0x01; +/** + @note The order of the elements of this array must correspond to + the order of elements in enum_binlog_stmt_unsafe. +*/ +const int +Query_tables_list::binlog_stmt_unsafe_errcode[BINLOG_STMT_UNSAFE_COUNT] = +{ + ER_BINLOG_UNSAFE_LIMIT, + ER_BINLOG_UNSAFE_INSERT_DELAYED, + ER_BINLOG_UNSAFE_SYSTEM_TABLE, + ER_BINLOG_UNSAFE_TWO_AUTOINC_COLUMNS, + ER_BINLOG_UNSAFE_UDF, + ER_BINLOG_UNSAFE_SYSTEM_VARIABLE, + ER_BINLOG_UNSAFE_SYSTEM_FUNCTION, + ER_BINLOG_UNSAFE_NONTRANS_AFTER_TRANS +}; + + /* Longest standard keyword name */ #define TOCK_NAME_LENGTH 24 diff --git a/sql/sql_lex.h b/sql/sql_lex.h index bd4612a8819..04333f46db6 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -1048,25 +1048,156 @@ public: } } + /** - Has the parser/scanner detected that this statement is unsafe? - */ + Enumeration listing of all types of unsafe statement. + + @note The order of elements of this enumeration type must + correspond to the order of the elements of the @c explanations + array defined in the body of @c THD::issue_unsafe_warnings. + */ + enum enum_binlog_stmt_unsafe { + /** + SELECT..LIMIT is unsafe because the set of rows returned cannot + be predicted. + */ + BINLOG_STMT_UNSAFE_LIMIT= 0, + /** + INSERT DELAYED is unsafe because the time when rows are inserted + cannot be predicted. + */ + BINLOG_STMT_UNSAFE_INSERT_DELAYED, + /** + Access to log tables is unsafe because slave and master probably + log different things. + */ + BINLOG_STMT_UNSAFE_SYSTEM_TABLE, + /** + Update of two autoincrement columns is unsafe. With one + autoincrement column, we store the counter in the binlog so that + slave can restore the correct value. But we can only store one + such counter per statement, so updating more than one + autoincrement column is not safe. + */ + BINLOG_STMT_UNSAFE_TWO_AUTOINC_COLUMNS, + /** + Using a UDF (user-defined function) is unsafe. + */ + BINLOG_STMT_UNSAFE_UDF, + /** + Using most system variables is unsafe, because slave may run + with different options than master. + */ + BINLOG_STMT_UNSAFE_SYSTEM_VARIABLE, + /** + Using some functions is unsafe (e.g., UUID). + */ + BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION, + + /** + Mixing transactional and non-transactional statements are unsafe if + non-transactional reads or writes are occur after transactional + reads or writes inside a transaction. + */ + BINLOG_STMT_UNSAFE_NONTRANS_AFTER_TRANS, + + /* The last element of this enumeration type. */ + BINLOG_STMT_UNSAFE_COUNT + }; + /** + This has all flags from 0 (inclusive) to BINLOG_STMT_FLAG_COUNT + (exclusive) set. + */ + static const int BINLOG_STMT_UNSAFE_ALL_FLAGS= + ((1 << BINLOG_STMT_UNSAFE_COUNT) - 1); + + /** + Maps elements of enum_binlog_stmt_unsafe to error codes. + */ + static const int binlog_stmt_unsafe_errcode[BINLOG_STMT_UNSAFE_COUNT]; + + /** + Determine if this statement is marked as unsafe. + + @retval 0 if the statement is not marked as unsafe. + @retval nonzero if the statement is marked as unsafe. + */ inline bool is_stmt_unsafe() const { - return binlog_stmt_flags & (1U << BINLOG_STMT_FLAG_UNSAFE); + return get_stmt_unsafe_flags() != 0; } /** - Flag the current (top-level) statement as unsafe. + Flag the current (top-level) statement as unsafe. + The flag will be reset after the statement has finished. - The flag will be reset after the statement has finished. + @param unsafe_type The type of unsafety: one of the @c + BINLOG_STMT_FLAG_UNSAFE_* flags in @c enum_binlog_stmt_flag. + */ + inline void set_stmt_unsafe(enum_binlog_stmt_unsafe unsafe_type) { + DBUG_ENTER("set_stmt_unsafe"); + DBUG_ASSERT(unsafe_type >= 0 && unsafe_type < BINLOG_STMT_UNSAFE_COUNT); + binlog_stmt_flags|= (1U << unsafe_type); + DBUG_VOID_RETURN; + } - */ - inline void set_stmt_unsafe() { - binlog_stmt_flags|= (1U << BINLOG_STMT_FLAG_UNSAFE); + /** + Set the bits of binlog_stmt_flags determining the type of + unsafeness of the current statement. No existing bits will be + cleared, but new bits may be set. + + @param flags A binary combination of zero or more bits, (1<<flag) + where flag is a member of enum_binlog_stmt_unsafe. + */ + inline void set_stmt_unsafe_flags(uint32 flags) { + DBUG_ENTER("set_stmt_unsafe_flags"); + DBUG_ASSERT((flags & ~BINLOG_STMT_UNSAFE_ALL_FLAGS) == 0); + binlog_stmt_flags|= flags; + DBUG_VOID_RETURN; + } + + /** + Return a binary combination of all unsafe warnings for the + statement. If the statement has been marked as unsafe by the + 'flag' member of enum_binlog_stmt_unsafe, then the return value + from this function has bit (1<<flag) set to 1. + */ + inline uint32 get_stmt_unsafe_flags() const { + DBUG_ENTER("get_stmt_unsafe_flags"); + DBUG_RETURN(binlog_stmt_flags & BINLOG_STMT_UNSAFE_ALL_FLAGS); } + /** + Mark the current statement as safe; i.e., clear all bits in + binlog_stmt_flags that correspond to elements of + enum_binlog_stmt_unsafe. + */ inline void clear_stmt_unsafe() { - binlog_stmt_flags&= ~(1U << BINLOG_STMT_FLAG_UNSAFE); + DBUG_ENTER("clear_stmt_unsafe"); + binlog_stmt_flags&= ~BINLOG_STMT_UNSAFE_ALL_FLAGS; + DBUG_VOID_RETURN; + } + + /** + Determine if this statement is a row injection. + + @retval 0 if the statement is not a row injection + @retval nonzero if the statement is a row injection + */ + inline bool is_stmt_row_injection() const { + return binlog_stmt_flags & + (1U << (BINLOG_STMT_UNSAFE_COUNT + BINLOG_STMT_TYPE_ROW_INJECTION)); + } + + /** + Flag the statement as a row injection. A row injection is either + a BINLOG statement, or a row event in the relay log executed by + the slave SQL thread. + */ + inline void set_stmt_row_injection() { + DBUG_ENTER("set_stmt_row_injection"); + binlog_stmt_flags|= + (1U << (BINLOG_STMT_UNSAFE_COUNT + BINLOG_STMT_TYPE_ROW_INJECTION)); + DBUG_VOID_RETURN; } /** @@ -1077,16 +1208,37 @@ public: { return sroutines_list.elements != 0; } private: - enum enum_binlog_stmt_flag { - BINLOG_STMT_FLAG_UNSAFE, - BINLOG_STMT_FLAG_COUNT + + /** + Enumeration listing special types of statements. + + Currently, the only possible type is ROW_INJECTION. + */ + enum enum_binlog_stmt_type { + /** + The statement is a row injection (i.e., either a BINLOG + statement or a row event executed by the slave SQL thread). + */ + BINLOG_STMT_TYPE_ROW_INJECTION = 0, + + /** The last element of this enumeration type. */ + BINLOG_STMT_TYPE_COUNT }; - /* - Tells if the parsing stage detected properties of the statement, - for example: that some items require row-based binlogging to give - a reliable binlog/replication, or if we will use stored functions - or triggers which themselves need require row-based binlogging. + /** + Bit field indicating the type of statement. + + There are two groups of bits: + + - The low BINLOG_STMT_UNSAFE_COUNT bits indicate the types of + unsafeness that the current statement has. + + - The next BINLOG_STMT_TYPE_COUNT bits indicate if the statement + is of some special type. + + This must be a member of LEX, not of THD: each stored procedure + needs to remember its unsafeness state between calls and each + stored procedure has its own LEX object (but no own THD object). */ uint32 binlog_stmt_flags; }; @@ -1889,6 +2041,7 @@ typedef struct st_lex : public Query_tables_list } return FALSE; } + } LEX; diff --git a/sql/sql_load.cc b/sql/sql_load.cc index 8109ca4313e..2f3aff43ae6 100644 --- a/sql/sql_load.cc +++ b/sql/sql_load.cc @@ -540,8 +540,8 @@ int mysql_load(THD *thd,sql_exchange *ex,TABLE_LIST *table_list, version for the binary log to mark that table maps are invalid after this point. */ - if (thd->current_stmt_binlog_row_based) - thd->binlog_flush_pending_rows_event(true); + if (thd->is_current_stmt_binlog_format_row()) + thd->binlog_flush_pending_rows_event(TRUE, transactional_table); else { /* @@ -687,7 +687,7 @@ static bool write_execute_load_query_log_event(THD *thd, sql_exchange* ex, (uint) ((char*) fname_end - (char*) thd->query()), (duplicates == DUP_REPLACE) ? LOAD_DUP_REPLACE : (ignore ? LOAD_DUP_IGNORE : LOAD_DUP_ERROR), - transactional_table, FALSE, errcode); + transactional_table, FALSE, FALSE, errcode); e.flags|= LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F; return mysql_bin_log.write(&e); } diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 2210fd87070..e6e2466e1ef 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -5487,21 +5487,26 @@ bool my_yyoverflow(short **yyss, YYSTYPE **yyvs, ulong *yystacksize) /** - Reset THD part responsible for command processing state. + Reset the part of THD responsible for the state of command + processing. - This needs to be called before execution of every statement - (prepared or conventional). - It is not called by substatements of routines. + This needs to be called before execution of every statement + (prepared or conventional). It is not called by substatements of + routines. - @todo - Make it a method of THD and align its name with the rest of - reset/end/start/init methods. - @todo - Call it after we use THD for queries, not before. -*/ + @todo Remove mysql_reset_thd_for_next_command and only use the + member function. + @todo Call it after we use THD for queries, not before. +*/ void mysql_reset_thd_for_next_command(THD *thd) { + thd->reset_for_next_command(); +} + +void THD::reset_for_next_command() +{ + THD *thd= this; DBUG_ENTER("mysql_reset_thd_for_next_command"); DBUG_ASSERT(!thd->spcont); /* not for substatements of routines */ DBUG_ASSERT(! thd->in_sub_stmt); @@ -5545,15 +5550,12 @@ void mysql_reset_thd_for_next_command(THD *thd) thd->rand_used= 0; thd->sent_row_count= thd->examined_row_count= 0; - /* - Because we come here only for start of top-statements, binlog format is - constant inside a complex statement (using stored functions) etc. - */ - thd->reset_current_stmt_binlog_row_based(); + thd->reset_current_stmt_binlog_format_row(); + thd->binlog_unsafe_warning_flags= 0; DBUG_PRINT("debug", - ("current_stmt_binlog_row_based: %d", - thd->current_stmt_binlog_row_based)); + ("is_current_stmt_binlog_format_row(): %d", + thd->is_current_stmt_binlog_format_row())); DBUG_VOID_RETURN; } diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index 8eb6115d3d7..f2bbc247eb1 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -1950,7 +1950,7 @@ int log_loaded_block(IO_CACHE* file) uchar* buffer= (uchar*) my_b_get_buffer_start(file); uint max_event_size= current_thd->variables.max_allowed_packet; lf_info= (LOAD_FILE_INFO*) file->arg; - if (lf_info->thd->current_stmt_binlog_row_based) + if (lf_info->thd->is_current_stmt_binlog_format_row()) DBUG_RETURN(0); if (lf_info->last_pos_in_file != HA_POS_ERROR && lf_info->last_pos_in_file >= my_b_get_pos_in_file(file)) diff --git a/sql/sql_table.cc b/sql/sql_table.cc index ca4f5c9f1bc..b05afd6776c 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -1732,7 +1732,7 @@ end: */ void write_bin_log(THD *thd, bool clear_error, - char const *query, ulong query_length) + char const *query, ulong query_length, bool is_trans) { if (mysql_bin_log.is_open()) { @@ -1741,8 +1741,9 @@ void write_bin_log(THD *thd, bool clear_error, thd->clear_error(); else errcode= query_error_code(thd, TRUE); + thd->binlog_query(THD::STMT_QUERY_TYPE, - query, query_length, FALSE, FALSE, errcode); + query, query_length, is_trans, FALSE, FALSE, errcode); } } @@ -1857,7 +1858,7 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, LINT_INIT(alias); LINT_INIT(path_length); - if (thd->current_stmt_binlog_row_based && !dont_log_query) + if (thd->is_current_stmt_binlog_format_row() && !dont_log_query) { built_query.set_charset(system_charset_info); if (if_exists) @@ -1917,7 +1918,7 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, // removed temporary table tmp_table_deleted= 1; if (thd->variables.binlog_format == BINLOG_FORMAT_MIXED && - thd->current_stmt_binlog_row_based) + thd->is_current_stmt_binlog_format_row()) { if (built_tmp_query.is_empty()) { @@ -1951,7 +1952,7 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, being built. The string always end in a comma and the comma will be chopped off before being written to the binary log. */ - if (!drop_temporary && thd->current_stmt_binlog_row_based && !dont_log_query) + if (!drop_temporary && thd->is_current_stmt_binlog_format_row() && !dont_log_query) { non_temp_tables_count++; /* @@ -2079,7 +2080,7 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, query_cache_invalidate3(thd, tables, 0); if (!dont_log_query) { - if (!thd->current_stmt_binlog_row_based || + if (!thd->is_current_stmt_binlog_format_row() || (non_temp_tables_count > 0 && !tmp_table_deleted)) { /* @@ -2091,7 +2092,7 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, */ write_bin_log(thd, !error, thd->query(), thd->query_length()); } - else if (thd->current_stmt_binlog_row_based && + else if (thd->is_current_stmt_binlog_format_row() && tmp_table_deleted) { if (non_temp_tables_count > 0) @@ -2130,7 +2131,8 @@ int mysql_rm_table_part2(THD *thd, TABLE_LIST *tables, bool if_exists, */ built_tmp_query.chop(); // Chop of the last comma built_tmp_query.append(" /* generated by server */"); - write_bin_log(thd, !error, built_tmp_query.ptr(), built_tmp_query.length()); + write_bin_log(thd, !error, built_tmp_query.ptr(), built_tmp_query.length(), + thd->in_multi_stmt_transaction()); } } @@ -3551,8 +3553,8 @@ static inline void write_create_table_bin_log(THD *thd, Otherwise, the statement shall be binlogged. */ if (!internal_tmp_table && - (!thd->current_stmt_binlog_row_based || - (thd->current_stmt_binlog_row_based && + (!thd->is_current_stmt_binlog_format_row() || + (thd->is_current_stmt_binlog_format_row() && !(create_info->options & HA_LEX_CREATE_TMP_TABLE)))) write_bin_log(thd, TRUE, thd->query(), thd->query_length()); } @@ -5289,7 +5291,7 @@ binlog: /* We have to write the query before we unlock the tables. */ - if (thd->current_stmt_binlog_row_based) + if (thd->is_current_stmt_binlog_format_row()) { /* Since temporary tables are not replicated under row-based @@ -6450,7 +6452,7 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, { thd->clear_error(); Query_log_event qinfo(thd, thd->query(), thd->query_length(), - 0, FALSE, 0); + FALSE, TRUE, FALSE, 0); mysql_bin_log.write(&qinfo); } my_ok(thd); @@ -7192,7 +7194,7 @@ view_err: if (rename_temporary_table(thd, new_table, new_db, new_name)) goto err1; /* We don't replicate alter table statement on temporary tables */ - if (!thd->current_stmt_binlog_row_based) + if (!thd->is_current_stmt_binlog_format_row()) write_bin_log(thd, TRUE, thd->query(), thd->query_length()); goto end_temporary; } @@ -7354,7 +7356,7 @@ view_err: db, table_name); DBUG_ASSERT(!(mysql_bin_log.is_open() && - thd->current_stmt_binlog_row_based && + thd->is_current_stmt_binlog_format_row() && (create_info->options & HA_LEX_CREATE_TMP_TABLE))); write_bin_log(thd, TRUE, thd->query(), thd->query_length()); diff --git a/sql/sql_udf.cc b/sql/sql_udf.cc index c6b41b59a3f..ab2628da1b3 100644 --- a/sql/sql_udf.cc +++ b/sql/sql_udf.cc @@ -437,8 +437,8 @@ int mysql_create_function(THD *thd,udf_func *udf) Turn off row binlogging of this statement and use statement-based so that all supporting tables are updated for CREATE FUNCTION command. */ - if (thd->current_stmt_binlog_row_based) - thd->clear_current_stmt_binlog_row_based(); + if (thd->is_current_stmt_binlog_format_row()) + thd->clear_current_stmt_binlog_format_row(); rw_wrlock(&THR_LOCK_udf); if ((hash_search(&udf_hash,(uchar*) udf->name.str, udf->name.length))) @@ -540,8 +540,8 @@ int mysql_drop_function(THD *thd,const LEX_STRING *udf_name) Turn off row binlogging of this statement and use statement-based so that all supporting tables are updated for DROP FUNCTION command. */ - if (thd->current_stmt_binlog_row_based) - thd->clear_current_stmt_binlog_row_based(); + if (thd->is_current_stmt_binlog_format_row()) + thd->clear_current_stmt_binlog_format_row(); rw_wrlock(&THR_LOCK_udf); if (!(udf=(udf_func*) hash_search(&udf_hash,(uchar*) udf_name->str, diff --git a/sql/sql_update.cc b/sql/sql_update.cc index cfa383ce9cb..7023e62e70d 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -789,6 +789,9 @@ int mysql_update(THD *thd, { query_cache_invalidate3(thd, table_list, 1); } + + if (thd->transaction.stmt.modified_non_trans_table) + thd->transaction.all.modified_non_trans_table= TRUE; /* error < 0 means really no error at all: we processed all rows until the @@ -811,13 +814,11 @@ int mysql_update(THD *thd, if (thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query(), thd->query_length(), - transactional_table, FALSE, errcode)) + transactional_table, FALSE, FALSE, errcode)) { error=1; // Rollback update } } - if (thd->transaction.stmt.modified_non_trans_table) - thd->transaction.all.modified_non_trans_table= TRUE; } DBUG_ASSERT(transactional_table || !updated || thd->transaction.stmt.modified_non_trans_table); free_underlaid_joins(thd, select_lex); @@ -878,19 +879,6 @@ bool mysql_prepare_update(THD *thd, TABLE_LIST *table_list, SELECT_LEX *select_lex= &thd->lex->select_lex; DBUG_ENTER("mysql_prepare_update"); - /* - Statement-based replication of UPDATE ... LIMIT is not safe as order of - rows is not defined, so in mixed mode we go to row-based. - - Note that we may consider a statement as safe if ORDER BY primary_key - is present. However it may confuse users to see very similiar statements - replicated differently. - */ - if (thd->lex->current_select->select_limit) - { - thd->lex->set_stmt_unsafe(); - thd->set_current_stmt_binlog_row_based_if_mixed(); - } #ifndef NO_EMBEDDED_ACCESS_CHECKS table_list->grant.want_privilege= table->grant.want_privilege= (SELECT_ACL & ~table->grant.privilege); @@ -1753,10 +1741,10 @@ bool multi_update::send_data(List<Item> ¬_used_values) /* non-transactional or transactional table got modified */ /* either multi_update class' flag is raised in its branch */ if (table->file->has_transactions()) - transactional_tables= 1; + transactional_tables= TRUE; else { - trans_safe= 0; + trans_safe= FALSE; thd->transaction.stmt.modified_non_trans_table= TRUE; } } @@ -1866,7 +1854,7 @@ void multi_update::abort() int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED); thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query(), thd->query_length(), - transactional_tables, FALSE, errcode); + transactional_tables, FALSE, FALSE, errcode); } thd->transaction.all.modified_non_trans_table= TRUE; } @@ -2004,10 +1992,10 @@ int multi_update::do_updates() if (updated != org_updated) { if (table->file->has_transactions()) - transactional_tables= 1; + transactional_tables= TRUE; else { - trans_safe= 0; // Can't do safe rollback + trans_safe= FALSE; // Can't do safe rollback thd->transaction.stmt.modified_non_trans_table= TRUE; } } @@ -2037,10 +2025,10 @@ err2: if (updated != org_updated) { if (table->file->has_transactions()) - transactional_tables= 1; + transactional_tables= TRUE; else { - trans_safe= 0; + trans_safe= FALSE; thd->transaction.stmt.modified_non_trans_table= TRUE; } } @@ -2086,8 +2074,9 @@ bool multi_update::send_eof() either from the query's list or via a stored routine: bug#13270,23333 */ - DBUG_ASSERT(trans_safe || !updated || - thd->transaction.stmt.modified_non_trans_table); + if (thd->transaction.stmt.modified_non_trans_table) + thd->transaction.all.modified_non_trans_table= TRUE; + if (local_error == 0 || thd->transaction.stmt.modified_non_trans_table) { if (mysql_bin_log.is_open()) @@ -2099,14 +2088,15 @@ bool multi_update::send_eof() errcode= query_error_code(thd, killed_status == THD::NOT_KILLED); if (thd->binlog_query(THD::ROW_QUERY_TYPE, thd->query(), thd->query_length(), - transactional_tables, FALSE, errcode)) + transactional_tables, FALSE, FALSE, errcode)) { local_error= 1; // Rollback update } } - if (thd->transaction.stmt.modified_non_trans_table) - thd->transaction.all.modified_non_trans_table= TRUE; } + DBUG_ASSERT(trans_safe || !updated || + thd->transaction.stmt.modified_non_trans_table); + if (local_error != 0) error_handled= TRUE; // to force early leave from ::send_error() diff --git a/sql/sql_view.cc b/sql/sql_view.cc index ae3af0640a3..69b5d3100b7 100644 --- a/sql/sql_view.cc +++ b/sql/sql_view.cc @@ -663,7 +663,7 @@ bool mysql_create_view(THD *thd, TABLE_LIST *views, int errcode= query_error_code(thd, TRUE); thd->binlog_query(THD::STMT_QUERY_TYPE, - buff.ptr(), buff.length(), FALSE, FALSE, errcode); + buff.ptr(), buff.length(), FALSE, FALSE, FALSE, errcode); } VOID(pthread_mutex_unlock(&LOCK_open)); @@ -1306,8 +1306,8 @@ bool mysql_make_view(THD *thd, File_parser *parser, TABLE_LIST *table, If the view's body needs row-based binlogging (e.g. the VIEW is created from SELECT UUID()), the top statement also needs it. */ - if (lex->is_stmt_unsafe()) - old_lex->set_stmt_unsafe(); + old_lex->set_stmt_unsafe_flags(lex->get_stmt_unsafe_flags()); + view_is_mergeable= (table->algorithm != VIEW_ALGORITHM_TMPTABLE && lex->can_be_merged()); diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index d0bf24bd02b..977b9e622ce 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -7408,7 +7408,7 @@ function_call_keyword: $$= new (YYTHD->mem_root) Item_func_current_user(Lex->current_context()); if ($$ == NULL) MYSQL_YYABORT; - Lex->set_stmt_unsafe(); + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); Lex->safe_to_cache_query= 0; } | DATE_SYM '(' expr ')' @@ -7563,7 +7563,7 @@ function_call_keyword: $$= new (YYTHD->mem_root) Item_func_user(); if ($$ == NULL) MYSQL_YYABORT; - Lex->set_stmt_unsafe(); + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); Lex->safe_to_cache_query=0; } | YEAR_SYM '(' expr ')' @@ -7713,7 +7713,7 @@ function_call_nonkeyword: sysdate_is_now=1, because the slave may have sysdate_is_now=0. */ - Lex->set_stmt_unsafe(); + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION); if (global_system_variables.sysdate_is_now == 0) $$= new (YYTHD->mem_root) Item_func_sysdate_local(); else @@ -8307,7 +8307,7 @@ variable_aux: if (!($$= get_system_var(YYTHD, $2, $3, $4))) MYSQL_YYABORT; if (!((Item_func_get_system_var*) $$)->is_written_to_binlog()) - Lex->set_stmt_unsafe(); + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_VARIABLE); } ; @@ -9153,7 +9153,10 @@ opt_limit_clause: ; limit_clause: - LIMIT limit_options {} + LIMIT limit_options + { + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_LIMIT); + } ; limit_options: @@ -9215,6 +9218,7 @@ delete_limit_clause: { SELECT_LEX *sel= Select; sel->select_limit= $2; + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_LIMIT); sel->explicit_limit= 1; } ; @@ -9665,13 +9669,21 @@ insert_lock_option: #endif } | LOW_PRIORITY { $$= TL_WRITE_LOW_PRIORITY; } - | DELAYED_SYM { $$= TL_WRITE_DELAYED; } + | DELAYED_SYM + { + $$= TL_WRITE_DELAYED; + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_DELAYED); + } | HIGH_PRIORITY { $$= TL_WRITE; } ; replace_lock_option: opt_low_priority { $$= $1; } - | DELAYED_SYM { $$= TL_WRITE_DELAYED; } + | DELAYED_SYM + { + $$= TL_WRITE_DELAYED; + Lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_DELAYED); + } ; insert2: |