diff options
author | Nirbhay Choubey <nirbhay@mariadb.com> | 2015-05-09 17:09:21 -0400 |
---|---|---|
committer | Nirbhay Choubey <nirbhay@mariadb.com> | 2015-05-09 17:09:21 -0400 |
commit | e11cad9e9dd3ae0be61aec1bb50b0ddc867b10be (patch) | |
tree | d1266ef4e52851e73467a6d7bf4a3ca991b484fa /sql/log.cc | |
parent | 99f496ae65a56d587e24c88df85aae7e7cfce70e (diff) | |
parent | 0880284bf715b4916cc735e19b76d1062c2bfdcf (diff) | |
download | mariadb-git-e11cad9e9dd3ae0be61aec1bb50b0ddc867b10be.tar.gz |
Merge tag 'mariadb-10.0.19' into 10.0-galera
Diffstat (limited to 'sql/log.cc')
-rw-r--r-- | sql/log.cc | 220 |
1 files changed, 196 insertions, 24 deletions
diff --git a/sql/log.cc b/sql/log.cc index f042b1ab756..ae932221f3b 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -1,5 +1,5 @@ /* Copyright (c) 2000, 2013, Oracle and/or its affiliates. - Copyright (c) 2009, 2014, SkySQL Ab. + Copyright (c) 2009, 2015, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -98,6 +98,9 @@ mysql_mutex_t LOCK_commit_ordered; static ulonglong binlog_status_var_num_commits; static ulonglong binlog_status_var_num_group_commits; +static ulonglong binlog_status_group_commit_trigger_count; +static ulonglong binlog_status_group_commit_trigger_lock_wait; +static ulonglong binlog_status_group_commit_trigger_timeout; static char binlog_snapshot_file[FN_REFLEN]; static ulonglong binlog_snapshot_position; @@ -107,6 +110,12 @@ static SHOW_VAR binlog_status_vars_detail[]= (char *)&binlog_status_var_num_commits, SHOW_LONGLONG}, {"group_commits", (char *)&binlog_status_var_num_group_commits, SHOW_LONGLONG}, + {"group_commit_trigger_count", + (char *)&binlog_status_group_commit_trigger_count, SHOW_LONGLONG}, + {"group_commit_trigger_lock_wait", + (char *)&binlog_status_group_commit_trigger_lock_wait, SHOW_LONGLONG}, + {"group_commit_trigger_timeout", + (char *)&binlog_status_group_commit_trigger_timeout, SHOW_LONGLONG}, {"snapshot_file", (char *)&binlog_snapshot_file, SHOW_CHAR}, {"snapshot_position", @@ -2612,6 +2621,8 @@ bool MYSQL_LOG::open( char buff[FN_REFLEN]; MY_STAT f_stat; File file= -1; + my_off_t seek_offset; + bool is_fifo = false; int open_flags= O_CREAT | O_BINARY; DBUG_ENTER("MYSQL_LOG::open"); DBUG_PRINT("enter", ("log_type: %d", (int) log_type_arg)); @@ -2628,15 +2639,17 @@ bool MYSQL_LOG::open( log_type_arg, io_cache_type_arg)) goto err; - /* File is regular writable file */ - if (my_stat(log_file_name, &f_stat, MYF(0)) && !MY_S_ISREG(f_stat.st_mode)) - goto err; + is_fifo = my_stat(log_file_name, &f_stat, MYF(0)) && + MY_S_ISFIFO(f_stat.st_mode); if (io_cache_type == SEQ_READ_APPEND) open_flags |= O_RDWR | O_APPEND; else open_flags |= O_WRONLY | (log_type == LOG_BIN ? 0 : O_APPEND); + if (is_fifo) + open_flags |= O_NONBLOCK; + db[0]= 0; #ifdef HAVE_PSI_INTERFACE @@ -2644,11 +2657,16 @@ bool MYSQL_LOG::open( m_log_file_key= log_file_key; #endif - if ((file= mysql_file_open(log_file_key, - log_file_name, open_flags, - MYF(MY_WME | ME_WAITTANG))) < 0 || - init_io_cache(&log_file, file, IO_SIZE, io_cache_type, - mysql_file_tell(file, MYF(MY_WME)), 0, + if ((file= mysql_file_open(log_file_key, log_file_name, open_flags, + MYF(MY_WME | ME_WAITTANG))) < 0) + goto err; + + if (is_fifo) + seek_offset= 0; + else if ((seek_offset= mysql_file_tell(file, MYF(MY_WME)))) + goto err; + + if (init_io_cache(&log_file, file, IO_SIZE, io_cache_type, seek_offset, 0, MYF(MY_WME | MY_NABP | ((log_type == LOG_BIN) ? MY_WAIT_IF_FULL : 0)))) goto err; @@ -2737,17 +2755,17 @@ void MYSQL_LOG::close(uint exiting) { end_io_cache(&log_file); - if (mysql_file_sync(log_file.file, MYF(MY_WME)) && ! write_error) + if (log_type == LOG_BIN && mysql_file_sync(log_file.file, MYF(MY_WME)) && ! write_error) { write_error= 1; - sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno); + sql_print_error(ER_THD_OR_DEFAULT(current_thd, ER_ERROR_ON_WRITE), name, errno); } if (!(exiting & LOG_CLOSE_DELAYED_CLOSE) && mysql_file_close(log_file.file, MYF(MY_WME)) && ! write_error) { write_error= 1; - sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno); + sql_print_error(ER_THD_OR_DEFAULT(current_thd, ER_ERROR_ON_WRITE), name, errno); } } @@ -3163,6 +3181,8 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period) bytes_written(0), file_id(1), open_count(1), group_commit_queue(0), group_commit_queue_busy(FALSE), num_commits(0), num_group_commits(0), + group_commit_trigger_count(0), group_commit_trigger_timeout(0), + group_commit_trigger_lock_wait(0), sync_period_ptr(sync_period), sync_counter(0), state_file_deleted(false), binlog_state_recover_done(false), is_relay_log(0), signal_cnt(0), @@ -4262,8 +4282,7 @@ int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included) included= 1; to_purge_if_included= my_strdup(ir->name, MYF(0)); } - my_atomic_rwlock_destroy(&ir->inuse_relaylog_atomic_lock); - my_free(ir); + rli->free_inuse_relaylog(ir); ir= next; } rli->inuse_relaylog_list= ir; @@ -5791,6 +5810,14 @@ end: } +/* + Initialize the binlog state from the master-bin.state file, at server startup. + + Returns: + 0 for success. + 2 for when .state file did not exist. + 1 for other error. +*/ int MYSQL_BIN_LOG::read_state_from_file() { @@ -5818,7 +5845,7 @@ MYSQL_BIN_LOG::read_state_from_file() with GTID enabled. So initialize to empty state. */ rpl_global_gtid_binlog_state.reset(); - err= 0; + err= 2; goto end; } } @@ -5994,6 +6021,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate) if (direct) { int res; + uint64 commit_id= 0; DBUG_PRINT("info", ("direct is set")); if ((res= thd->wait_for_prior_commit())) DBUG_RETURN(res); @@ -6001,7 +6029,16 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate) my_org_b_tell= my_b_tell(file); mysql_mutex_lock(&LOCK_log); prev_binlog_id= current_binlog_id; - if (write_gtid_event(thd, true, using_trans, 0)) + DBUG_EXECUTE_IF("binlog_force_commit_id", + { + const LEX_STRING name= { C_STRING_WITH_LEN("commit_id") }; + bool null_value; + user_var_entry *entry= + (user_var_entry*) my_hash_search(&thd->user_vars, + (uchar*) name.str, name.length); + commit_id= entry->val_int(&null_value); + }); + if (write_gtid_event(thd, true, using_trans, commit_id)) goto err; } else @@ -6790,6 +6827,10 @@ bool MYSQL_BIN_LOG::write_incident(THD *thd) if (check_purge) checkpoint_and_purge(prev_binlog_id); } + else + { + mysql_mutex_unlock(&LOCK_log); + } DBUG_RETURN(error); } @@ -7179,6 +7220,14 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry) } } + /* + Handle the heuristics that if another transaction is waiting for this + transaction (or if it does so later), then we want to trigger group + commit immediately, without waiting for the binlog_commit_wait_usec + timeout to expire. + */ + entry->thd->waiting_on_group_commit= true; + /* Add the entry to the group commit queue. */ next_entry= entry->next; entry->next= group_commit_queue; @@ -7194,7 +7243,7 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry) cur= entry->thd->wait_for_commit_ptr; } - if (opt_binlog_commit_wait_count > 0) + if (opt_binlog_commit_wait_count > 0 && orig_queue != NULL) mysql_cond_signal(&COND_prepare_ordered); mysql_mutex_unlock(&LOCK_prepare_ordered); DEBUG_SYNC(orig_entry->thd, "commit_after_release_LOCK_prepare_ordered"); @@ -7368,6 +7417,11 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) while (current) { group_commit_entry *next= current->next; + /* + Now that group commit is started, we can clear the flag; there is no + longer any use in waiters on this commit trying to trigger it early. + */ + current->thd->waiting_on_group_commit= false; current->next= queue; queue= current; current= next; @@ -7381,6 +7435,15 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) if (likely(is_open())) // Should always be true { commit_id= (last_in_queue == leader ? 0 : (uint64)leader->thd->query_id); + DBUG_EXECUTE_IF("binlog_force_commit_id", + { + const LEX_STRING name= { C_STRING_WITH_LEN("commit_id") }; + bool null_value; + user_var_entry *entry= + (user_var_entry*) my_hash_search(&leader->thd->user_vars, + (uchar*) name.str, name.length); + commit_id= entry->val_int(&null_value); + }); /* Commit every transaction in the queue. @@ -7497,6 +7560,8 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader) my_error(ER_ERROR_ON_WRITE, MYF(ME_NOREFRESH), name, errno); check_purge= false; } + /* In case of binlog rotate, update the correct current binlog offset. */ + commit_offset= my_b_write_tell(&log_file); } DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered"); @@ -7678,8 +7743,18 @@ MYSQL_BIN_LOG::wait_for_sufficient_commits() mysql_mutex_assert_owner(&LOCK_prepare_ordered); for (e= last_head= group_commit_queue, count= 0; e; e= e->next) + { if (++count >= opt_binlog_commit_wait_count) + { + group_commit_trigger_count++; + return; + } + if (unlikely(e->thd->has_waiter)) + { + group_commit_trigger_lock_wait++; return; + } + } mysql_mutex_unlock(&LOCK_log); set_timespec_nsec(wait_until, (ulonglong)1000*opt_binlog_commit_wait_usec); @@ -7692,14 +7767,33 @@ MYSQL_BIN_LOG::wait_for_sufficient_commits() err= mysql_cond_timedwait(&COND_prepare_ordered, &LOCK_prepare_ordered, &wait_until); if (err == ETIMEDOUT) + { + group_commit_trigger_timeout++; break; + } + if (unlikely(last_head->thd->has_waiter)) + { + group_commit_trigger_lock_wait++; + break; + } head= group_commit_queue; for (e= head; e && e != last_head; e= e->next) + { ++count; + if (unlikely(e->thd->has_waiter)) + { + group_commit_trigger_lock_wait++; + goto after_loop; + } + } if (count >= opt_binlog_commit_wait_count) + { + group_commit_trigger_count++; break; + } last_head= head; } +after_loop: /* We must not wait for LOCK_log while holding LOCK_prepare_ordered. @@ -7723,6 +7817,42 @@ MYSQL_BIN_LOG::wait_for_sufficient_commits() } +void +MYSQL_BIN_LOG::binlog_trigger_immediate_group_commit() +{ + group_commit_entry *head; + mysql_mutex_lock(&LOCK_prepare_ordered); + head= group_commit_queue; + if (head) + { + head->thd->has_waiter= true; + mysql_cond_signal(&COND_prepare_ordered); + } + mysql_mutex_unlock(&LOCK_prepare_ordered); +} + + +/* + This function is called when a transaction T1 goes to wait for another + transaction T2. It is used to cut short any binlog group commit delay from + --binlog-commit-wait-count in the case where another transaction is stalled + on the wait due to conflicting row locks. + + If T2 is already ready to group commit, any waiting group commit will be + signalled to proceed immediately. Otherwise, a flag will be set in T2, and + when T2 later becomes ready, immediate group commit will be triggered. +*/ +void +binlog_report_wait_for(THD *thd1, THD *thd2) +{ + if (opt_binlog_commit_wait_count == 0) + return; + thd2->has_waiter= true; + if (thd2->waiting_on_group_commit) + mysql_bin_log.binlog_trigger_immediate_group_commit(); +} + + /** Wait until we get a signal that the relay log has been updated. @@ -9607,7 +9737,17 @@ MYSQL_BIN_LOG::do_binlog_recovery(const char *opt_name, bool do_xa_recovery) if (error != LOG_INFO_EOF) sql_print_error("find_log_pos() failed (error: %d)", error); else + { error= read_state_from_file(); + if (error == 2) + { + /* + No binlog files and no binlog state is not an error (eg. just initial + server start after fresh installation). + */ + error= 0; + } + } return error; } @@ -9633,15 +9773,42 @@ MYSQL_BIN_LOG::do_binlog_recovery(const char *opt_name, bool do_xa_recovery) if ((ev= Log_event::read_log_event(&log, 0, &fdle, opt_master_verify_checksum)) && - ev->get_type_code() == FORMAT_DESCRIPTION_EVENT && - ev->flags & LOG_EVENT_BINLOG_IN_USE_F) + ev->get_type_code() == FORMAT_DESCRIPTION_EVENT) { - sql_print_information("Recovering after a crash using %s", opt_name); - error= recover(&log_info, log_name, &log, - (Format_description_log_event *)ev, do_xa_recovery); + if (ev->flags & LOG_EVENT_BINLOG_IN_USE_F) + { + sql_print_information("Recovering after a crash using %s", opt_name); + error= recover(&log_info, log_name, &log, + (Format_description_log_event *)ev, do_xa_recovery); + } + else + { + error= read_state_from_file(); + if (error == 2) + { + /* + The binlog exists, but the .state file is missing. This is normal if + this is the first master start after a major upgrade to 10.0 (with + GTID support). + + However, it could also be that the .state file was lost somehow, and + in this case it could be a serious issue, as we would set the wrong + binlog state in the next binlog file to be created, and GTID + processing would be corrupted. A common way would be copying files + from an old server to a new one and forgetting the .state file. + + So in this case, we want to try to recover the binlog state by + scanning the last binlog file (but we do not need any XA recovery). + + ToDo: We could avoid one scan at first start after major upgrade, by + detecting that there is no GTID_LIST event at the start of the + binlog file, and stopping the scan in that case. + */ + error= recover(&log_info, log_name, &log, + (Format_description_log_event *)ev, false); + } + } } - else - error= read_state_from_file(); delete ev; end_io_cache(&log); @@ -9813,6 +9980,11 @@ TC_LOG_BINLOG::set_status_variables(THD *thd) binlog_snapshot_position= last_commit_pos_offset; } mysql_mutex_unlock(&LOCK_commit_ordered); + mysql_mutex_lock(&LOCK_prepare_ordered); + binlog_status_group_commit_trigger_count= this->group_commit_trigger_count; + binlog_status_group_commit_trigger_timeout= this->group_commit_trigger_timeout; + binlog_status_group_commit_trigger_lock_wait= this->group_commit_trigger_lock_wait; + mysql_mutex_unlock(&LOCK_prepare_ordered); if (have_snapshot) { |