summaryrefslogtreecommitdiff
path: root/sql/log.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/log.cc')
-rw-r--r--sql/log.cc220
1 files changed, 196 insertions, 24 deletions
diff --git a/sql/log.cc b/sql/log.cc
index 38fe1066896..99d3fb69b18 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -1,5 +1,5 @@
/* Copyright (c) 2000, 2013, Oracle and/or its affiliates.
- Copyright (c) 2009, 2014, SkySQL Ab.
+ Copyright (c) 2009, 2015, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -95,6 +95,9 @@ mysql_mutex_t LOCK_commit_ordered;
static ulonglong binlog_status_var_num_commits;
static ulonglong binlog_status_var_num_group_commits;
+static ulonglong binlog_status_group_commit_trigger_count;
+static ulonglong binlog_status_group_commit_trigger_lock_wait;
+static ulonglong binlog_status_group_commit_trigger_timeout;
static char binlog_snapshot_file[FN_REFLEN];
static ulonglong binlog_snapshot_position;
@@ -104,6 +107,12 @@ static SHOW_VAR binlog_status_vars_detail[]=
(char *)&binlog_status_var_num_commits, SHOW_LONGLONG},
{"group_commits",
(char *)&binlog_status_var_num_group_commits, SHOW_LONGLONG},
+ {"group_commit_trigger_count",
+ (char *)&binlog_status_group_commit_trigger_count, SHOW_LONGLONG},
+ {"group_commit_trigger_lock_wait",
+ (char *)&binlog_status_group_commit_trigger_lock_wait, SHOW_LONGLONG},
+ {"group_commit_trigger_timeout",
+ (char *)&binlog_status_group_commit_trigger_timeout, SHOW_LONGLONG},
{"snapshot_file",
(char *)&binlog_snapshot_file, SHOW_CHAR},
{"snapshot_position",
@@ -2484,6 +2493,8 @@ bool MYSQL_LOG::open(
char buff[FN_REFLEN];
MY_STAT f_stat;
File file= -1;
+ my_off_t seek_offset;
+ bool is_fifo = false;
int open_flags= O_CREAT | O_BINARY;
DBUG_ENTER("MYSQL_LOG::open");
DBUG_PRINT("enter", ("log_type: %d", (int) log_type_arg));
@@ -2500,15 +2511,17 @@ bool MYSQL_LOG::open(
log_type_arg, io_cache_type_arg))
goto err;
- /* File is regular writable file */
- if (my_stat(log_file_name, &f_stat, MYF(0)) && !MY_S_ISREG(f_stat.st_mode))
- goto err;
+ is_fifo = my_stat(log_file_name, &f_stat, MYF(0)) &&
+ MY_S_ISFIFO(f_stat.st_mode);
if (io_cache_type == SEQ_READ_APPEND)
open_flags |= O_RDWR | O_APPEND;
else
open_flags |= O_WRONLY | (log_type == LOG_BIN ? 0 : O_APPEND);
+ if (is_fifo)
+ open_flags |= O_NONBLOCK;
+
db[0]= 0;
#ifdef HAVE_PSI_INTERFACE
@@ -2516,11 +2529,16 @@ bool MYSQL_LOG::open(
m_log_file_key= log_file_key;
#endif
- if ((file= mysql_file_open(log_file_key,
- log_file_name, open_flags,
- MYF(MY_WME | ME_WAITTANG))) < 0 ||
- init_io_cache(&log_file, file, IO_SIZE, io_cache_type,
- mysql_file_tell(file, MYF(MY_WME)), 0,
+ if ((file= mysql_file_open(log_file_key, log_file_name, open_flags,
+ MYF(MY_WME | ME_WAITTANG))) < 0)
+ goto err;
+
+ if (is_fifo)
+ seek_offset= 0;
+ else if ((seek_offset= mysql_file_tell(file, MYF(MY_WME))))
+ goto err;
+
+ if (init_io_cache(&log_file, file, IO_SIZE, io_cache_type, seek_offset, 0,
MYF(MY_WME | MY_NABP |
((log_type == LOG_BIN) ? MY_WAIT_IF_FULL : 0))))
goto err;
@@ -2609,17 +2627,17 @@ void MYSQL_LOG::close(uint exiting)
{
end_io_cache(&log_file);
- if (mysql_file_sync(log_file.file, MYF(MY_WME)) && ! write_error)
+ if (log_type == LOG_BIN && mysql_file_sync(log_file.file, MYF(MY_WME)) && ! write_error)
{
write_error= 1;
- sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
+ sql_print_error(ER_THD_OR_DEFAULT(current_thd, ER_ERROR_ON_WRITE), name, errno);
}
if (!(exiting & LOG_CLOSE_DELAYED_CLOSE) &&
mysql_file_close(log_file.file, MYF(MY_WME)) && ! write_error)
{
write_error= 1;
- sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
+ sql_print_error(ER_THD_OR_DEFAULT(current_thd, ER_ERROR_ON_WRITE), name, errno);
}
}
@@ -3035,6 +3053,8 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
bytes_written(0), file_id(1), open_count(1),
group_commit_queue(0), group_commit_queue_busy(FALSE),
num_commits(0), num_group_commits(0),
+ group_commit_trigger_count(0), group_commit_trigger_timeout(0),
+ group_commit_trigger_lock_wait(0),
sync_period_ptr(sync_period), sync_counter(0),
state_file_deleted(false), binlog_state_recover_done(false),
is_relay_log(0), signal_cnt(0),
@@ -4134,8 +4154,7 @@ int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
included= 1;
to_purge_if_included= my_strdup(ir->name, MYF(0));
}
- my_atomic_rwlock_destroy(&ir->inuse_relaylog_atomic_lock);
- my_free(ir);
+ rli->free_inuse_relaylog(ir);
ir= next;
}
rli->inuse_relaylog_list= ir;
@@ -5653,6 +5672,14 @@ end:
}
+/*
+ Initialize the binlog state from the master-bin.state file, at server startup.
+
+ Returns:
+ 0 for success.
+ 2 for when .state file did not exist.
+ 1 for other error.
+*/
int
MYSQL_BIN_LOG::read_state_from_file()
{
@@ -5680,7 +5707,7 @@ MYSQL_BIN_LOG::read_state_from_file()
with GTID enabled. So initialize to empty state.
*/
rpl_global_gtid_binlog_state.reset();
- err= 0;
+ err= 2;
goto end;
}
}
@@ -5850,6 +5877,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
if (direct)
{
int res;
+ uint64 commit_id= 0;
DBUG_PRINT("info", ("direct is set"));
if ((res= thd->wait_for_prior_commit()))
DBUG_RETURN(res);
@@ -5857,7 +5885,16 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
my_org_b_tell= my_b_tell(file);
mysql_mutex_lock(&LOCK_log);
prev_binlog_id= current_binlog_id;
- if (write_gtid_event(thd, true, using_trans, 0))
+ DBUG_EXECUTE_IF("binlog_force_commit_id",
+ {
+ const LEX_STRING name= { C_STRING_WITH_LEN("commit_id") };
+ bool null_value;
+ user_var_entry *entry=
+ (user_var_entry*) my_hash_search(&thd->user_vars,
+ (uchar*) name.str, name.length);
+ commit_id= entry->val_int(&null_value);
+ });
+ if (write_gtid_event(thd, true, using_trans, commit_id))
goto err;
}
else
@@ -6637,6 +6674,10 @@ bool MYSQL_BIN_LOG::write_incident(THD *thd)
if (check_purge)
checkpoint_and_purge(prev_binlog_id);
}
+ else
+ {
+ mysql_mutex_unlock(&LOCK_log);
+ }
DBUG_RETURN(error);
}
@@ -7022,6 +7063,14 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry)
}
}
+ /*
+ Handle the heuristics that if another transaction is waiting for this
+ transaction (or if it does so later), then we want to trigger group
+ commit immediately, without waiting for the binlog_commit_wait_usec
+ timeout to expire.
+ */
+ entry->thd->waiting_on_group_commit= true;
+
/* Add the entry to the group commit queue. */
next_entry= entry->next;
entry->next= group_commit_queue;
@@ -7037,7 +7086,7 @@ MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry)
cur= entry->thd->wait_for_commit_ptr;
}
- if (opt_binlog_commit_wait_count > 0)
+ if (opt_binlog_commit_wait_count > 0 && orig_queue != NULL)
mysql_cond_signal(&COND_prepare_ordered);
mysql_mutex_unlock(&LOCK_prepare_ordered);
DEBUG_SYNC(orig_entry->thd, "commit_after_release_LOCK_prepare_ordered");
@@ -7211,6 +7260,11 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
while (current)
{
group_commit_entry *next= current->next;
+ /*
+ Now that group commit is started, we can clear the flag; there is no
+ longer any use in waiters on this commit trying to trigger it early.
+ */
+ current->thd->waiting_on_group_commit= false;
current->next= queue;
queue= current;
current= next;
@@ -7224,6 +7278,15 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
if (likely(is_open())) // Should always be true
{
commit_id= (last_in_queue == leader ? 0 : (uint64)leader->thd->query_id);
+ DBUG_EXECUTE_IF("binlog_force_commit_id",
+ {
+ const LEX_STRING name= { C_STRING_WITH_LEN("commit_id") };
+ bool null_value;
+ user_var_entry *entry=
+ (user_var_entry*) my_hash_search(&leader->thd->user_vars,
+ (uchar*) name.str, name.length);
+ commit_id= entry->val_int(&null_value);
+ });
/*
Commit every transaction in the queue.
@@ -7340,6 +7403,8 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
my_error(ER_ERROR_ON_WRITE, MYF(ME_NOREFRESH), name, errno);
check_purge= false;
}
+ /* In case of binlog rotate, update the correct current binlog offset. */
+ commit_offset= my_b_write_tell(&log_file);
}
DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered");
@@ -7521,8 +7586,18 @@ MYSQL_BIN_LOG::wait_for_sufficient_commits()
mysql_mutex_assert_owner(&LOCK_prepare_ordered);
for (e= last_head= group_commit_queue, count= 0; e; e= e->next)
+ {
if (++count >= opt_binlog_commit_wait_count)
+ {
+ group_commit_trigger_count++;
+ return;
+ }
+ if (unlikely(e->thd->has_waiter))
+ {
+ group_commit_trigger_lock_wait++;
return;
+ }
+ }
mysql_mutex_unlock(&LOCK_log);
set_timespec_nsec(wait_until, (ulonglong)1000*opt_binlog_commit_wait_usec);
@@ -7535,14 +7610,33 @@ MYSQL_BIN_LOG::wait_for_sufficient_commits()
err= mysql_cond_timedwait(&COND_prepare_ordered, &LOCK_prepare_ordered,
&wait_until);
if (err == ETIMEDOUT)
+ {
+ group_commit_trigger_timeout++;
break;
+ }
+ if (unlikely(last_head->thd->has_waiter))
+ {
+ group_commit_trigger_lock_wait++;
+ break;
+ }
head= group_commit_queue;
for (e= head; e && e != last_head; e= e->next)
+ {
++count;
+ if (unlikely(e->thd->has_waiter))
+ {
+ group_commit_trigger_lock_wait++;
+ goto after_loop;
+ }
+ }
if (count >= opt_binlog_commit_wait_count)
+ {
+ group_commit_trigger_count++;
break;
+ }
last_head= head;
}
+after_loop:
/*
We must not wait for LOCK_log while holding LOCK_prepare_ordered.
@@ -7566,6 +7660,42 @@ MYSQL_BIN_LOG::wait_for_sufficient_commits()
}
+void
+MYSQL_BIN_LOG::binlog_trigger_immediate_group_commit()
+{
+ group_commit_entry *head;
+ mysql_mutex_lock(&LOCK_prepare_ordered);
+ head= group_commit_queue;
+ if (head)
+ {
+ head->thd->has_waiter= true;
+ mysql_cond_signal(&COND_prepare_ordered);
+ }
+ mysql_mutex_unlock(&LOCK_prepare_ordered);
+}
+
+
+/*
+ This function is called when a transaction T1 goes to wait for another
+ transaction T2. It is used to cut short any binlog group commit delay from
+ --binlog-commit-wait-count in the case where another transaction is stalled
+ on the wait due to conflicting row locks.
+
+ If T2 is already ready to group commit, any waiting group commit will be
+ signalled to proceed immediately. Otherwise, a flag will be set in T2, and
+ when T2 later becomes ready, immediate group commit will be triggered.
+*/
+void
+binlog_report_wait_for(THD *thd1, THD *thd2)
+{
+ if (opt_binlog_commit_wait_count == 0)
+ return;
+ thd2->has_waiter= true;
+ if (thd2->waiting_on_group_commit)
+ mysql_bin_log.binlog_trigger_immediate_group_commit();
+}
+
+
/**
Wait until we get a signal that the relay log has been updated.
@@ -9444,7 +9574,17 @@ MYSQL_BIN_LOG::do_binlog_recovery(const char *opt_name, bool do_xa_recovery)
if (error != LOG_INFO_EOF)
sql_print_error("find_log_pos() failed (error: %d)", error);
else
+ {
error= read_state_from_file();
+ if (error == 2)
+ {
+ /*
+ No binlog files and no binlog state is not an error (eg. just initial
+ server start after fresh installation).
+ */
+ error= 0;
+ }
+ }
return error;
}
@@ -9470,15 +9610,42 @@ MYSQL_BIN_LOG::do_binlog_recovery(const char *opt_name, bool do_xa_recovery)
if ((ev= Log_event::read_log_event(&log, 0, &fdle,
opt_master_verify_checksum)) &&
- ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
- ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
+ ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
{
- sql_print_information("Recovering after a crash using %s", opt_name);
- error= recover(&log_info, log_name, &log,
- (Format_description_log_event *)ev, do_xa_recovery);
+ if (ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
+ {
+ sql_print_information("Recovering after a crash using %s", opt_name);
+ error= recover(&log_info, log_name, &log,
+ (Format_description_log_event *)ev, do_xa_recovery);
+ }
+ else
+ {
+ error= read_state_from_file();
+ if (error == 2)
+ {
+ /*
+ The binlog exists, but the .state file is missing. This is normal if
+ this is the first master start after a major upgrade to 10.0 (with
+ GTID support).
+
+ However, it could also be that the .state file was lost somehow, and
+ in this case it could be a serious issue, as we would set the wrong
+ binlog state in the next binlog file to be created, and GTID
+ processing would be corrupted. A common way would be copying files
+ from an old server to a new one and forgetting the .state file.
+
+ So in this case, we want to try to recover the binlog state by
+ scanning the last binlog file (but we do not need any XA recovery).
+
+ ToDo: We could avoid one scan at first start after major upgrade, by
+ detecting that there is no GTID_LIST event at the start of the
+ binlog file, and stopping the scan in that case.
+ */
+ error= recover(&log_info, log_name, &log,
+ (Format_description_log_event *)ev, false);
+ }
+ }
}
- else
- error= read_state_from_file();
delete ev;
end_io_cache(&log);
@@ -9650,6 +9817,11 @@ TC_LOG_BINLOG::set_status_variables(THD *thd)
binlog_snapshot_position= last_commit_pos_offset;
}
mysql_mutex_unlock(&LOCK_commit_ordered);
+ mysql_mutex_lock(&LOCK_prepare_ordered);
+ binlog_status_group_commit_trigger_count= this->group_commit_trigger_count;
+ binlog_status_group_commit_trigger_timeout= this->group_commit_trigger_timeout;
+ binlog_status_group_commit_trigger_lock_wait= this->group_commit_trigger_lock_wait;
+ mysql_mutex_unlock(&LOCK_prepare_ordered);
if (have_snapshot)
{