summaryrefslogtreecommitdiff
path: root/sql/log.cc
diff options
context:
space:
mode:
Diffstat (limited to 'sql/log.cc')
-rw-r--r--sql/log.cc259
1 files changed, 252 insertions, 7 deletions
diff --git a/sql/log.cc b/sql/log.cc
index a673e72e26c..225aaae0549 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -25,12 +25,14 @@
#include "mysql_priv.h"
#include "sql_acl.h"
#include "sql_repl.h"
+#include "ha_innodb.h" // necessary to cut the binlog when crash recovery
#include <my_dir.h>
#include <stdarg.h>
#include <m_ctype.h> // For test_if_number
MYSQL_LOG mysql_log, mysql_slow_log, mysql_bin_log;
+ulong sync_binlog_counter= 0;
static bool test_if_number(const char *str,
long *res, bool allow_wildcards);
@@ -299,6 +301,7 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
if ((index_file_nr= my_open(index_file_name,
O_RDWR | O_CREAT | O_BINARY ,
MYF(MY_WME))) < 0 ||
+ my_sync(index_file_nr, MYF(MY_WME)) ||
init_io_cache(&index_file, index_file_nr,
IO_SIZE, WRITE_CACHE,
my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)),
@@ -359,16 +362,21 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
goto err;
bytes_written+= description_event_for_queue->get_event_len();
}
- if (flush_io_cache(&log_file))
+ if (flush_io_cache(&log_file) ||
+ my_sync(log_file.file, MYF(MY_WME)))
goto err;
if (write_file_name_to_index_file)
{
- /* As this is a new log file, we write the file name to the index file */
+ /*
+ As this is a new log file, we write the file name to the index
+ file. As every time we write to the index file, we sync it.
+ */
if (my_b_write(&index_file, (byte*) log_file_name,
strlen(log_file_name)) ||
my_b_write(&index_file, (byte*) "\n", 1) ||
- flush_io_cache(&index_file))
+ flush_io_cache(&index_file) ||
+ my_sync(index_file.file, MYF(MY_WME)))
goto err;
}
break;
@@ -449,7 +457,8 @@ static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset)
goto err;
}
/* The following will either truncate the file or fill the end with \n' */
- if (my_chsize(file, offset - init_offset, '\n', MYF(MY_WME)))
+ if (my_chsize(file, offset - init_offset, '\n', MYF(MY_WME)) ||
+ my_sync(file, MYF(MY_WME)))
goto err;
/* Reset data in old index cache */
@@ -1048,6 +1057,8 @@ void MYSQL_LOG::new_file(bool need_lock)
open(old_name, save_log_type, new_name_ptr, index_file_name, io_cache_type,
no_auto_events, max_size, 1);
+ if (this == &mysql_bin_log)
+ report_pos_in_innodb();
my_free(old_name,MYF(0));
end:
@@ -1218,6 +1229,17 @@ bool MYSQL_LOG::write(THD *thd,enum enum_server_command command,
}
+inline bool sync_binlog(IO_CACHE *cache)
+{
+ if (sync_binlog_period == ++sync_binlog_counter && sync_binlog_period)
+ {
+ sync_binlog_counter= 0;
+ return my_sync(cache->file, MYF(MY_WME));
+ }
+ return 0;
+}
+
+
/*
Write an event to the binary log
*/
@@ -1285,6 +1307,54 @@ bool MYSQL_LOG::write(Log_event* event_info)
if (thd)
{
+#if MYSQL_VERSION_ID < 50000
+ /*
+ To make replication of charsets working in 4.1 we are writing values
+ of charset related variables before every statement in the binlog,
+ if values of those variables differ from global server-wide defaults.
+ We are using SET ONE_SHOT command so that the charset vars get reset
+ to default after the first non-SET statement.
+ In the next 5.0 this won't be needed as we will use the new binlog
+ format to store charset info.
+ */
+ if ((thd->variables.character_set_client->number !=
+ global_system_variables.collation_server->number) ||
+ (thd->variables.character_set_client->number !=
+ thd->variables.collation_connection->number) ||
+ (thd->variables.collation_server->number !=
+ thd->variables.collation_connection->number))
+ {
+ char buf[200];
+ int written= my_snprintf(buf, sizeof(buf)-1,
+ "SET ONE_SHOT CHARACTER_SET_CLIENT=%u,\
+COLLATION_CONNECTION=%u,COLLATION_DATABASE=%u,COLLATION_SERVER=%u",
+ (uint) thd->variables.character_set_client->number,
+ (uint) thd->variables.collation_connection->number,
+ (uint) thd->variables.collation_database->number,
+ (uint) thd->variables.collation_server->number);
+ Query_log_event e(thd, buf, written, 0);
+ e.set_log_pos(this);
+ if (e.write(file))
+ goto err;
+ }
+ /*
+ We use the same ONE_SHOT trick for making replication of time zones
+ working in 4.1. Again in 5.0 we have better means for doing this.
+ */
+ if (thd->time_zone_used &&
+ thd->variables.time_zone != global_system_variables.time_zone)
+ {
+ char buf[MAX_TIME_ZONE_NAME_LENGTH + 26];
+ char *buf_end= strxmov(buf, "SET ONE_SHOT TIME_ZONE='",
+ thd->variables.time_zone->get_name()->ptr(),
+ "'", NullS);
+ Query_log_event e(thd, buf, buf_end - buf, 0);
+ e.set_log_pos(this);
+ if (e.write(file))
+ goto err;
+ }
+#endif
+
if (thd->last_insert_id_used)
{
Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
@@ -1398,9 +1468,9 @@ bool MYSQL_LOG::write(Log_event* event_info)
if (file == &log_file) // we are writing to the real log (disk)
{
- if (flush_io_cache(file))
+ if (flush_io_cache(file) || sync_binlog(file))
goto err;
-
+
if (opt_using_transactions && !my_b_tell(&thd->transaction.trans_log))
{
/*
@@ -1413,6 +1483,30 @@ bool MYSQL_LOG::write(Log_event* event_info)
if (event_info->get_type_code() == QUERY_EVENT ||
event_info->get_type_code() == EXEC_LOAD_EVENT)
{
+#ifndef DBUG_OFF
+ if (unlikely(opt_crash_binlog_innodb))
+ {
+ /*
+ This option is for use in rpl_crash_binlog_innodb.test.
+ 1st we want to verify that Binlog_dump thread cannot send the
+ event now (because of LOCK_log): we here tell the Binlog_dump
+ thread to wake up, sleep for the slave to have time to possibly
+ receive data from the master (it should not), and then crash.
+ 2nd we want to verify that at crash recovery the rolled back
+ event is cut from the binlog.
+ */
+ if (!(--opt_crash_binlog_innodb))
+ {
+ signal_update();
+ sleep(2);
+ fprintf(stderr,"This is a normal crash because of"
+ " --crash-binlog-innodb\n");
+ assert(0);
+ }
+ DBUG_PRINT("info",("opt_crash_binlog_innodb: %d",
+ opt_crash_binlog_innodb));
+ }
+#endif
error = ha_report_binlog_offset_and_commit(thd, log_file_name,
file->pos_in_file);
called_handler_commit=1;
@@ -1558,7 +1652,8 @@ bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, bool commit_or_rollback)
commit_or_rollback ? 6 : 8,
TRUE);
qinfo.set_log_pos(this);
- if (qinfo.write(&log_file) || flush_io_cache(&log_file))
+ if (qinfo.write(&log_file) || flush_io_cache(&log_file) ||
+ sync_binlog(&log_file))
goto err;
}
if (cache->error) // Error on read
@@ -1567,6 +1662,22 @@ bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, bool commit_or_rollback)
write_error=1; // Don't give more errors
goto err;
}
+#ifndef DBUG_OFF
+ if (unlikely(opt_crash_binlog_innodb))
+ {
+ /* see the previous MYSQL_LOG::write() method for a comment */
+ if (!(--opt_crash_binlog_innodb))
+ {
+ signal_update();
+ sleep(2);
+ fprintf(stderr, "This is a normal crash because of"
+ " --crash-binlog-innodb\n");
+ assert(0);
+ }
+ DBUG_PRINT("info",("opt_crash_binlog_innodb: %d",
+ opt_crash_binlog_innodb));
+ }
+#endif
if ((ha_report_binlog_offset_and_commit(thd, log_file_name,
log_file.pos_in_file)))
goto err;
@@ -1979,4 +2090,138 @@ bool flush_error_log()
}
+/*
+ If the server has InnoDB on, and InnoDB has published the position of the
+ last committed transaction (which happens only if a crash recovery occured at
+ this startup) then truncate the previous binary log at the position given by
+ InnoDB. If binlog is shorter than the position, print a message to the error
+ log.
+
+ SYNOPSIS
+ cut_spurious_tail()
+
+ RETURN VALUES
+ 1 Error
+ 0 Ok
+*/
+
+bool MYSQL_LOG::cut_spurious_tail()
+{
+ int error= 0;
+ char llbuf1[22], llbuf2[22];
+ ulonglong actual_size;
+
+ DBUG_ENTER("cut_spurious_tail");
+#ifdef HAVE_INNOBASE_DB
+ if (have_innodb != SHOW_OPTION_YES)
+ DBUG_RETURN(0);
+ /*
+ This is the place where we use information from InnoDB to cut the
+ binlog.
+ */
+ char *name= ha_innobase::get_mysql_bin_log_name();
+ ulonglong pos= ha_innobase::get_mysql_bin_log_pos();
+ if (name[0] == 0 || pos == ULONGLONG_MAX)
+ {
+ DBUG_PRINT("info", ("InnoDB has not set binlog info"));
+ DBUG_RETURN(0);
+ }
+ /* The binlog given by InnoDB normally is never an active binlog */
+ if (is_open() && is_active(name))
+ {
+ sql_print_error("Warning: after InnoDB crash recovery, InnoDB says that "
+ "the binary log of the previous run has the same name "
+ "'%s' as the current one; this is likely to be abnormal.",
+ name);
+ DBUG_RETURN(1);
+ }
+ sql_print_error("After InnoDB crash recovery, checking if the binary log "
+ "'%s' contains rolled back transactions which must be "
+ "removed from it...", name);
+ /* If we have a too long binlog, cut. If too short, print error */
+ int fd= my_open(name, O_EXCL | O_APPEND | O_BINARY | O_WRONLY, MYF(MY_WME));
+ if (fd < 0)
+ {
+ int save_errno= my_errno;
+ sql_print_error("Could not open the binary log '%s' for truncation.",
+ name);
+ if (save_errno != ENOENT)
+ sql_print_error("The binary log '%s' should not be used for "
+ "replication.", name);
+ DBUG_RETURN(1);
+ }
+
+ if (pos > (actual_size= my_seek(fd, 0L, MY_SEEK_END, MYF(MY_WME))))
+ {
+ /*
+ Note that when we have MyISAM rollback this error message should be
+ reconsidered.
+ */
+ sql_print_error("The binary log '%s' is shorter than its expected size "
+ "(actual: %s, expected: %s) so it misses at least one "
+ "committed transaction; so it should not be used for "
+ "replication or point-in-time recovery. You would need "
+ "to restart slaves from a fresh master's data "
+ "snapshot ",
+ name, llstr(actual_size, llbuf1),
+ llstr(pos, llbuf2));
+ error= 1;
+ goto err;
+ }
+ if (pos < actual_size)
+ {
+ sql_print_error("The binary log '%s' is bigger than its expected size "
+ "(actual: %s, expected: %s) so it contains a rolled back "
+ "transaction; now truncating that.", name,
+ llstr(actual_size, llbuf1), llstr(pos, llbuf2));
+ /*
+ As on some OS, my_chsize() can only pad with 0s instead of really
+ truncating. Then mysqlbinlog (and Binlog_dump thread) will error on
+ these zeroes. This is annoying, but not more (you just need to manually
+ switch replication to the next binlog). Fortunately, in my_chsize.c, it
+ says that all modern machines support real ftruncate().
+
+ */
+ if ((error= my_chsize(fd, pos, 0, MYF(MY_WME))))
+ goto err;
+ }
+err:
+ if (my_close(fd, MYF(MY_WME)))
+ error= 1;
+#endif
+ DBUG_RETURN(error);
+}
+
+
+/*
+ If the server has InnoDB on, store the binlog name and position into
+ InnoDB. This function is used every time we create a new binlog.
+ SYNOPSIS
+ report_pos_in_innodb()
+
+ NOTES
+ This cannot simply be done in MYSQL_LOG::open(), because when we create
+ the first binlog at startup, we have not called ha_init() yet so we cannot
+ write into InnoDB yet.
+
+ RETURN VALUES
+ 1 Error
+ 0 Ok
+*/
+
+void MYSQL_LOG::report_pos_in_innodb()
+{
+ DBUG_ENTER("report_pos_in_innodb");
+#ifdef HAVE_INNOBASE_DB
+ if (is_open() && have_innodb == SHOW_OPTION_YES)
+ {
+ DBUG_PRINT("info", ("Reporting binlog info into InnoDB - "
+ "name: '%s' position: %d",
+ log_file_name, my_b_tell(&log_file)));
+ innobase_store_binlog_offset_and_flush_log(log_file_name,
+ my_b_tell(&log_file));
+ }
+#endif
+ DBUG_VOID_RETURN;
+}