summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <knielsen@knielsen-hq.org>2011-04-08 09:39:33 +0200
committerunknown <knielsen@knielsen-hq.org>2011-04-08 09:39:33 +0200
commit64e43e1cc88c66bd89f95a5307020bcf60ba4b96 (patch)
tree1ae6299d5a75f92fa97b240b0670c81c9a301afe
parent8b427b7d5548aeb214c70b004cc9056b9f7a6f7c (diff)
parent86008e0ca2b864f1ab7a30e496d7c67c8fce06c2 (diff)
downloadmariadb-git-64e43e1cc88c66bd89f95a5307020bcf60ba4b96.tar.gz
Merge various replication-related patches into MariaDB 5.3:
- MWL#116 Group commit - MWL#136 Enhancements for START TRANSACTION WITH CONSISTENT SNAPSHOT - MWL#47 Annotate_rows_log_event - MWL#163 innodb_release_locks_early - Percona patch enhancing row-based replication for tables with no primary key
-rw-r--r--client/client_priv.h1
-rw-r--r--client/mysqlbinlog.cc149
-rw-r--r--client/mysqldump.c119
-rw-r--r--include/my_sys.h2
-rw-r--r--mysql-test/extra/binlog_tests/mix_innodb_myisam_binlog.test9
-rw-r--r--mysql-test/extra/rpl_tests/rpl_deadlock.test4
-rw-r--r--mysql-test/extra/rpl_tests/rpl_log.test1
-rw-r--r--mysql-test/extra/rpl_tests/rpl_row_annotate.test156
-rw-r--r--mysql-test/include/binlog_start_pos.inc26
-rw-r--r--mysql-test/include/show_binlog_events.inc2
-rw-r--r--mysql-test/include/show_binlog_events2.inc2
-rw-r--r--mysql-test/r/ctype_cp932_binlog_stm.result2
-rw-r--r--mysql-test/r/innodb_release_row_locks_early.result104
-rw-r--r--mysql-test/r/mysqlbinlog.result27
-rw-r--r--mysql-test/r/mysqldump-max.result57
-rw-r--r--mysql-test/r/xa_binlog.result32
-rw-r--r--mysql-test/std_data/words3.dat66
-rw-r--r--mysql-test/suite/binlog/r/binlog_ioerr.result28
-rw-r--r--mysql-test/suite/binlog/r/binlog_row_annotate.result1219
-rw-r--r--mysql-test/suite/binlog/r/binlog_stm_binlog.result4
-rw-r--r--mysql-test/suite/binlog/t/binlog_incident.test3
-rw-r--r--mysql-test/suite/binlog/t/binlog_ioerr.test30
-rw-r--r--mysql-test/suite/binlog/t/binlog_killed.test4
-rw-r--r--mysql-test/suite/binlog/t/binlog_killed_simulate.test3
-rw-r--r--mysql-test/suite/binlog/t/binlog_row_annotate-master.opt1
-rw-r--r--mysql-test/suite/binlog/t/binlog_row_annotate.test189
-rw-r--r--mysql-test/suite/innodb_plugin/r/binlog_consistent.result99
-rw-r--r--mysql-test/suite/innodb_plugin/r/group_commit.result63
-rw-r--r--mysql-test/suite/innodb_plugin/r/group_commit_binlog_pos.result35
-rw-r--r--mysql-test/suite/innodb_plugin/r/group_commit_binlog_pos_no_optimize_thread.result36
-rw-r--r--mysql-test/suite/innodb_plugin/r/group_commit_crash.result125
-rw-r--r--mysql-test/suite/innodb_plugin/r/group_commit_crash_no_optimize_thread.result125
-rw-r--r--mysql-test/suite/innodb_plugin/r/group_commit_no_optimize_thread.result63
-rw-r--r--mysql-test/suite/innodb_plugin/t/binlog_consistent.test88
-rw-r--r--mysql-test/suite/innodb_plugin/t/group_commit.test115
-rw-r--r--mysql-test/suite/innodb_plugin/t/group_commit_binlog_pos-master.opt1
-rw-r--r--mysql-test/suite/innodb_plugin/t/group_commit_binlog_pos.test89
-rw-r--r--mysql-test/suite/innodb_plugin/t/group_commit_binlog_pos_no_optimize_thread-master.opt1
-rw-r--r--mysql-test/suite/innodb_plugin/t/group_commit_binlog_pos_no_optimize_thread.test90
-rw-r--r--mysql-test/suite/innodb_plugin/t/group_commit_crash-master.opt1
-rw-r--r--mysql-test/suite/innodb_plugin/t/group_commit_crash.test82
-rw-r--r--mysql-test/suite/innodb_plugin/t/group_commit_crash_no_optimize_thread-master.opt1
-rw-r--r--mysql-test/suite/innodb_plugin/t/group_commit_crash_no_optimize_thread.test82
-rw-r--r--mysql-test/suite/innodb_plugin/t/group_commit_no_optimize_thread-master.opt1
-rw-r--r--mysql-test/suite/innodb_plugin/t/group_commit_no_optimize_thread.test115
-rw-r--r--mysql-test/suite/maria/r/maria-connect.result8
-rw-r--r--mysql-test/suite/maria/t/maria-connect.test7
-rw-r--r--mysql-test/suite/pbxt/r/pbxt_xa_binlog.result32
-rw-r--r--mysql-test/suite/pbxt/t/pbxt_xa_binlog.test32
-rw-r--r--mysql-test/suite/rpl/r/rpl_deadlock_innodb.result4
-rw-r--r--mysql-test/suite/rpl/r/rpl_row_annotate_do.result141
-rw-r--r--mysql-test/suite/rpl/r/rpl_row_annotate_dont.result123
-rw-r--r--mysql-test/suite/rpl/r/rpl_row_conflicts.result2
-rw-r--r--mysql-test/suite/rpl/r/rpl_row_index_choice.result140
-rw-r--r--mysql-test/suite/rpl/t/rpl_row_annotate_do-slave.opt1
-rw-r--r--mysql-test/suite/rpl/t/rpl_row_annotate_do.test16
-rw-r--r--mysql-test/suite/rpl/t/rpl_row_annotate_dont-slave.opt1
-rw-r--r--mysql-test/suite/rpl/t/rpl_row_annotate_dont.test9
-rw-r--r--mysql-test/suite/rpl/t/rpl_row_flsh_tbls.test3
-rw-r--r--mysql-test/suite/rpl/t/rpl_row_index_choice.test243
-rw-r--r--mysql-test/suite/rpl/t/rpl_row_mysqlbinlog.test18
-rw-r--r--mysql-test/suite/rpl/t/rpl_stm_flsh_tbls.test3
-rw-r--r--mysql-test/t/ctype_cp932_binlog_stm.test8
-rw-r--r--mysql-test/t/innodb_release_row_locks_early-master.opt1
-rw-r--r--mysql-test/t/innodb_release_row_locks_early.test136
-rw-r--r--mysql-test/t/mysqlbinlog-master.opt1
-rw-r--r--mysql-test/t/mysqlbinlog.test99
-rw-r--r--mysql-test/t/mysqlbinlog2.test34
-rw-r--r--mysql-test/t/mysqldump-max.test82
-rw-r--r--mysql-test/t/xa_binlog.test32
-rw-r--r--sql/handler.cc209
-rw-r--r--sql/handler.h90
-rw-r--r--sql/log.cc1278
-rw-r--r--sql/log.h140
-rw-r--r--sql/log_event.cc293
-rw-r--r--sql/log_event.h65
-rw-r--r--sql/mysql_priv.h4
-rw-r--r--sql/mysqld.cc18
-rw-r--r--sql/rpl_rli.cc4
-rw-r--r--sql/rpl_rli.h38
-rw-r--r--sql/set_var.cc3
-rw-r--r--sql/slave.cc63
-rw-r--r--sql/slave.h1
-rw-r--r--sql/sql_binlog.cc2
-rw-r--r--sql/sql_class.cc23
-rw-r--r--sql/sql_class.h22
-rw-r--r--sql/sql_insert.cc5
-rw-r--r--sql/sql_parse.cc4
-rw-r--r--sql/sql_repl.cc75
-rw-r--r--storage/innodb_plugin/handler/ha_innodb.cc271
-rw-r--r--storage/innodb_plugin/handler/ha_innodb.h15
-rw-r--r--storage/innodb_plugin/include/trx0trx.h11
-rw-r--r--storage/innodb_plugin/trx/trx0trx.c2
-rw-r--r--storage/pbxt/src/ha_pbxt.cc39
-rw-r--r--storage/pbxt/src/thread_xt.h3
-rw-r--r--storage/pbxt/src/xaction_xt.cc149
-rw-r--r--storage/pbxt/src/xaction_xt.h2
-rw-r--r--storage/xtradb/handler/ha_innodb.cc348
-rw-r--r--storage/xtradb/handler/ha_innodb.h15
-rw-r--r--storage/xtradb/include/srv0srv.h5
-rw-r--r--storage/xtradb/include/trx0sys.ic9
-rw-r--r--storage/xtradb/include/trx0trx.h11
-rw-r--r--storage/xtradb/srv/srv0srv.c5
-rw-r--r--storage/xtradb/trx/trx0trx.c2
-rwxr-xr-xtests/consistent_snapshot.pl107
105 files changed, 7066 insertions, 893 deletions
diff --git a/client/client_priv.h b/client/client_priv.h
index f6bde7a594c..0b617c564df 100644
--- a/client/client_priv.h
+++ b/client/client_priv.h
@@ -95,5 +95,6 @@ enum options_client
OPT_REWRITE_DB,
OPT_PLUGIN_DIR,
OPT_DEFAULT_PLUGIN,
+ OPT_SKIP_ANNOTATE_ROWS_EVENTS,
OPT_MAX_CLIENT_OPTION /* should be always the last */
};
diff --git a/client/mysqlbinlog.cc b/client/mysqlbinlog.cc
index b6668161ce5..6e0b5cf5234 100644
--- a/client/mysqlbinlog.cc
+++ b/client/mysqlbinlog.cc
@@ -108,6 +108,7 @@ static ulonglong rec_count= 0;
static short binlog_flags = 0;
static MYSQL* mysql = NULL;
static const char* dirname_for_local_load= 0;
+static bool opt_skip_annotate_rows_events= 0;
/**
Pointer to the Format_description_log_event of the currently active binlog.
@@ -129,6 +130,70 @@ enum Exit_status {
OK_STOP
};
+/**
+ Pointer to the last read Annotate_rows_log_event. Having read an
+ Annotate_rows event, we should not print it immediatedly because all
+ subsequent rbr events can be filtered away, and have to keep it for a while.
+ Also because of that when reading a remote Annotate event we have to keep
+ its binary log representation in a separately allocated buffer.
+*/
+static Annotate_rows_log_event *annotate_event= NULL;
+
+void free_annotate_event()
+{
+ if (annotate_event)
+ {
+ delete annotate_event;
+ annotate_event= 0;
+ }
+}
+
+Log_event* read_remote_annotate_event(uchar* net_buf, ulong event_len,
+ const char **error_msg)
+{
+ uchar *event_buf;
+ Log_event* event;
+
+ if (!(event_buf= (uchar*) my_malloc(event_len + 1, MYF(MY_WME))))
+ {
+ error("Out of memory");
+ return 0;
+ }
+
+ memcpy(event_buf, net_buf, event_len);
+ event_buf[event_len]= 0;
+
+ if (!(event= Log_event::read_log_event((const char*) event_buf, event_len,
+ error_msg, glob_description_event)))
+ {
+ my_free(event_buf, MYF(0));
+ return 0;
+ }
+ /*
+ Ensure the event->temp_buf is pointing to the allocated buffer.
+ (TRUE = free temp_buf on the event deletion)
+ */
+ event->register_temp_buf((char*)event_buf, TRUE);
+
+ return event;
+}
+
+void keep_annotate_event(Annotate_rows_log_event* event)
+{
+ free_annotate_event();
+ annotate_event= event;
+}
+
+void print_annotate_event(PRINT_EVENT_INFO *print_event_info)
+{
+ if (annotate_event)
+ {
+ annotate_event->print(result_file, print_event_info);
+ delete annotate_event; // the event should not be printed more than once
+ annotate_event= 0;
+ }
+}
+
static Exit_status dump_local_log_entries(PRINT_EVENT_INFO *print_event_info,
const char* logname);
static Exit_status dump_remote_log_entries(PRINT_EVENT_INFO *print_event_info,
@@ -783,8 +848,19 @@ Exit_status process_event(PRINT_EVENT_INFO *print_event_info, Log_event *ev,
case QUERY_EVENT:
{
Query_log_event *qe= (Query_log_event*)ev;
- if (!qe->is_trans_keyword() && shall_skip_database(qe->db))
- goto end;
+ if (!qe->is_trans_keyword())
+ {
+ if (shall_skip_database(qe->db))
+ goto end;
+ }
+ else
+ {
+ /*
+ In case the event for one of these statements is obtained
+ from binary log 5.0, make it compatible with 5.1
+ */
+ qe->flags|= LOG_EVENT_SUPPRESS_USE_F;
+ }
print_use_stmt(print_event_info, qe);
if (opt_base64_output_mode == BASE64_OUTPUT_ALWAYS)
{
@@ -933,6 +1009,19 @@ Exit_status process_event(PRINT_EVENT_INFO *print_event_info, Log_event *ev,
my_free(fname, MYF(MY_WME));
break;
}
+ case ANNOTATE_ROWS_EVENT:
+ if (!opt_skip_annotate_rows_events)
+ {
+ /*
+ We don't print Annotate event just now because all subsequent
+ rbr-events can be filtered away. Instead we'll keep the event
+ till it will be printed together with the first not filtered
+ away Table map or the last rbr will be processed.
+ */
+ keep_annotate_event((Annotate_rows_log_event*) ev);
+ destroy_evt= FALSE;
+ }
+ break;
case TABLE_MAP_EVENT:
{
Table_map_log_event *map= ((Table_map_log_event *)ev);
@@ -942,6 +1031,13 @@ Exit_status process_event(PRINT_EVENT_INFO *print_event_info, Log_event *ev,
destroy_evt= FALSE;
goto end;
}
+ /*
+ The Table map is to be printed, so it's just the time when we may
+ print the kept Annotate event (if there is any).
+ print_annotate_event() also deletes the kept Annotate event.
+ */
+ print_annotate_event(print_event_info);
+
size_t len_to= 0;
const char* db_to= binlog_filter->get_rewrite_db(map->get_db_name(), &len_to);
if (len_to && map->rewrite_db(db_to, len_to, glob_description_event))
@@ -978,6 +1074,13 @@ Exit_status process_event(PRINT_EVENT_INFO *print_event_info, Log_event *ev,
if (print_event_info->m_table_map_ignored.count() > 0)
print_event_info->m_table_map_ignored.clear_tables();
+ /*
+ If there is a kept Annotate event and all corresponding
+ rbr-events were filtered away, the Annotate event was not
+ freed and it is just the time to do it.
+ */
+ free_annotate_event();
+
/*
One needs to take into account an event that gets
filtered but was last event in the statement. If this is
@@ -1212,6 +1315,11 @@ that may lead to an endless loop.",
"Updates to a database with a different name than the original. \
Example: rewrite-db='from->to'.",
0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"skip-annotate-rows-events", OPT_SKIP_ANNOTATE_ROWS_EVENTS,
+ "Don't print Annotate_rows events stored in the binary log.",
+ (uchar**) &opt_skip_annotate_rows_events,
+ (uchar**) &opt_skip_annotate_rows_events,
+ 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
};
@@ -1683,6 +1791,8 @@ static Exit_status dump_remote_log_entries(PRINT_EVENT_INFO *print_event_info,
cast to uint32.
*/
int4store(buf, (uint32)start_position);
+ if (!opt_skip_annotate_rows_events)
+ binlog_flags|= BINLOG_SEND_ANNOTATE_ROWS_EVENT;
int2store(buf + BIN_LOG_HEADER_SIZE, binlog_flags);
size_t tlen = strlen(logname);
@@ -1715,18 +1825,30 @@ static Exit_status dump_remote_log_entries(PRINT_EVENT_INFO *print_event_info,
break; // end of data
DBUG_PRINT("info",( "len: %lu net->read_pos[5]: %d\n",
len, net->read_pos[5]));
- if (!(ev= Log_event::read_log_event((const char*) net->read_pos + 1 ,
- len - 1, &error_msg,
- glob_description_event)))
+ if (net->read_pos[5] == ANNOTATE_ROWS_EVENT)
{
- error("Could not construct log event object: %s", error_msg);
- DBUG_RETURN(ERROR_STOP);
- }
- /*
- If reading from a remote host, ensure the temp_buf for the
- Log_event class is pointing to the incoming stream.
- */
- ev->register_temp_buf((char *) net->read_pos + 1, FALSE);
+ if (!(ev= read_remote_annotate_event(net->read_pos + 1, len - 1,
+ &error_msg)))
+ {
+ error("Could not construct annotate event object: %s", error_msg);
+ DBUG_RETURN(ERROR_STOP);
+ }
+ }
+ else
+ {
+ if (!(ev= Log_event::read_log_event((const char*) net->read_pos + 1 ,
+ len - 1, &error_msg,
+ glob_description_event)))
+ {
+ error("Could not construct log event object: %s", error_msg);
+ DBUG_RETURN(ERROR_STOP);
+ }
+ /*
+ If reading from a remote host, ensure the temp_buf for the
+ Log_event class is pointing to the incoming stream.
+ */
+ ev->register_temp_buf((char *) net->read_pos + 1, FALSE);
+ }
Log_event_type type= ev->get_type_code();
if (glob_description_event->binlog_version >= 3 ||
@@ -2236,6 +2358,7 @@ int main(int argc, char** argv)
if (result_file != stdout)
my_fclose(result_file, MYF(0));
cleanup();
+ free_annotate_event();
delete binlog_filter;
free_root(&s_mem_root, MYF(0));
free_defaults(defaults_argv);
diff --git a/client/mysqldump.c b/client/mysqldump.c
index 7a1590f8a95..efda677b5aa 100644
--- a/client/mysqldump.c
+++ b/client/mysqldump.c
@@ -77,6 +77,9 @@
#define IGNORE_DATA 0x01 /* don't dump data for this table */
#define IGNORE_INSERT_DELAYED 0x02 /* table doesn't support INSERT DELAYED */
+/* Chars needed to store LONGLONG, excluding trailing '\0'. */
+#define LONGLONG_LEN 20
+
static void add_load_option(DYNAMIC_STRING *str, const char *option,
const char *option_value);
static ulong find_set(TYPELIB *lib, const char *x, uint length,
@@ -344,9 +347,9 @@ static struct my_option my_long_options[] =
"This causes the binary log position and filename to be appended to the "
"output. If equal to 1, will print it as a CHANGE MASTER command; if equal"
" to 2, that command will be prefixed with a comment symbol. "
- "This option will turn --lock-all-tables on, unless "
- "--single-transaction is specified too (in which case a "
- "global read lock is only taken a short time at the beginning of the dump; "
+ "This option will turn --lock-all-tables on, unless --single-transaction "
+ "is specified too (on servers before MariaDB 5.3 this will still take a "
+ "global read lock for a short time at the beginning of the dump; "
"don't forget to read about --single-transaction below). In all cases, "
"any action on logs will happen at the exact moment of the dump. "
"Option automatically turns --lock-tables off.",
@@ -1110,6 +1113,44 @@ static int fetch_db_collation(const char *db_name,
}
+/*
+ Check if server supports non-blocking binlog position using the
+ binlog_snapshot_file and binlog_snapshot_position status variables. If it
+ does, also return the position obtained if output pointers are non-NULL.
+ Returns 1 if position available, 0 if not.
+*/
+static int
+check_consistent_binlog_pos(char *binlog_pos_file, char *binlog_pos_offset)
+{
+ MYSQL_RES *res;
+ MYSQL_ROW row;
+ int found;
+
+ if (mysql_query_with_error_report(mysql, &res,
+ "SHOW STATUS LIKE 'binlog_snapshot_%'"))
+ return 1;
+
+ found= 0;
+ while ((row= mysql_fetch_row(res)))
+ {
+ if (0 == strcmp(row[0], "binlog_snapshot_file"))
+ {
+ if (binlog_pos_file)
+ strmake(binlog_pos_file, row[1], FN_REFLEN-1);
+ found++;
+ }
+ else if (0 == strcmp(row[0], "binlog_snapshot_position"))
+ {
+ if (binlog_pos_offset)
+ strmake(binlog_pos_offset, row[1], LONGLONG_LEN);
+ found++;
+ }
+ }
+ mysql_free_result(res);
+
+ return (found == 2);
+}
+
static char *my_case_str(const char *str,
uint str_len,
const char *token,
@@ -4352,42 +4393,65 @@ static int dump_selected_tables(char *db, char **table_names, int tables)
} /* dump_selected_tables */
-static int do_show_master_status(MYSQL *mysql_con)
+static int do_show_master_status(MYSQL *mysql_con, int consistent_binlog_pos)
{
MYSQL_ROW row;
MYSQL_RES *master;
+ char binlog_pos_file[FN_REFLEN];
+ char binlog_pos_offset[LONGLONG_LEN+1];
+ char *file, *offset;
const char *comment_prefix=
(opt_master_data == MYSQL_OPT_MASTER_DATA_COMMENTED_SQL) ? "-- " : "";
- if (mysql_query_with_error_report(mysql_con, &master, "SHOW MASTER STATUS"))
+
+ if (consistent_binlog_pos)
{
- return 1;
+ if(!check_consistent_binlog_pos(binlog_pos_file, binlog_pos_offset))
+ return 1;
+ file= binlog_pos_file;
+ offset= binlog_pos_offset;
}
else
{
+ if (mysql_query_with_error_report(mysql_con, &master, "SHOW MASTER STATUS"))
+ return 1;
+
row= mysql_fetch_row(master);
if (row && row[0] && row[1])
{
- /* SHOW MASTER STATUS reports file and position */
- if (opt_comments)
- fprintf(md_result_file,
- "\n--\n-- Position to start replication or point-in-time "
- "recovery from\n--\n\n");
- fprintf(md_result_file,
- "%sCHANGE MASTER TO MASTER_LOG_FILE='%s', MASTER_LOG_POS=%s;\n",
- comment_prefix, row[0], row[1]);
- check_io(md_result_file);
+ file= row[0];
+ offset= row[1];
}
- else if (!ignore_errors)
+ else
{
- /* SHOW MASTER STATUS reports nothing and --force is not enabled */
- my_printf_error(0, "Error: Binlogging on server not active",
- MYF(0));
mysql_free_result(master);
- maybe_exit(EX_MYSQLERR);
- return 1;
+ if (!ignore_errors)
+ {
+ /* SHOW MASTER STATUS reports nothing and --force is not enabled */
+ my_printf_error(0, "Error: Binlogging on server not active",
+ MYF(0));
+ maybe_exit(EX_MYSQLERR);
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
}
- mysql_free_result(master);
}
+
+ /* SHOW MASTER STATUS reports file and position */
+ if (opt_comments)
+ fprintf(md_result_file,
+ "\n--\n-- Position to start replication or point-in-time "
+ "recovery from\n--\n\n");
+ fprintf(md_result_file,
+ "%sCHANGE MASTER TO MASTER_LOG_FILE='%s', MASTER_LOG_POS=%s;\n",
+ comment_prefix, file, offset);
+ check_io(md_result_file);
+
+ if (!consistent_binlog_pos)
+ mysql_free_result(master);
+
return 0;
}
@@ -5026,6 +5090,7 @@ int main(int argc, char **argv)
{
char bin_log_name[FN_REFLEN];
int exit_code;
+ int consistent_binlog_pos= 0;
MY_INIT("mysqldump");
compatible_mode_normal_str[0]= 0;
@@ -5056,7 +5121,13 @@ int main(int argc, char **argv)
if (!path)
write_header(md_result_file, *argv);
- if ((opt_lock_all_tables || opt_master_data) &&
+ if (opt_single_transaction && opt_master_data)
+ {
+ /* See if we can avoid FLUSH TABLES WITH READ LOCK (MariaDB 5.3+). */
+ consistent_binlog_pos= check_consistent_binlog_pos(NULL, NULL);
+ }
+
+ if ((opt_lock_all_tables || (opt_master_data && !consistent_binlog_pos)) &&
do_flush_tables_read_lock(mysql))
goto err;
if (opt_single_transaction && start_transaction(mysql))
@@ -5074,7 +5145,7 @@ int main(int argc, char **argv)
goto err;
flush_logs= 0; /* not anymore; that would not be sensible */
}
- if (opt_master_data && do_show_master_status(mysql))
+ if (opt_master_data && do_show_master_status(mysql, consistent_binlog_pos))
goto err;
if (opt_single_transaction && do_unlock_tables(mysql)) /* unlock but no commit! */
goto err;
diff --git a/include/my_sys.h b/include/my_sys.h
index d391492983d..65455990b04 100644
--- a/include/my_sys.h
+++ b/include/my_sys.h
@@ -561,6 +561,8 @@ typedef int (*qsort2_cmp)(const void *, const void *, const void *);
#define my_b_tell(info) ((info)->pos_in_file + \
(size_t) (*(info)->current_pos - (info)->request_pos))
+#define my_b_write_tell(info) ((info)->pos_in_file + \
+ ((info)->write_pos - (info)->write_buffer))
#define my_b_get_buffer_start(info) (info)->request_pos
#define my_b_get_bytes_in_buffer(info) (char*) (info)->read_end - \
diff --git a/mysql-test/extra/binlog_tests/mix_innodb_myisam_binlog.test b/mysql-test/extra/binlog_tests/mix_innodb_myisam_binlog.test
index 5db79e4f848..96c85e024e9 100644
--- a/mysql-test/extra/binlog_tests/mix_innodb_myisam_binlog.test
+++ b/mysql-test/extra/binlog_tests/mix_innodb_myisam_binlog.test
@@ -321,14 +321,19 @@ let $MYSQLD_DATADIR= `select @@datadir`;
# we check that the error code of the "ROLLBACK" event is 0 and not
# ER_SERVER_SHUTDOWN (i.e. disconnection just rolls back transaction
# and does not make slave to stop)
+
+-- source include/binlog_start_pos.inc
+
if (`select @@binlog_format = 'ROW'`)
{
- --exec $MYSQL_BINLOG --start-position=524 $MYSQLD_DATADIR/master-bin.000001 > $MYSQLTEST_VARDIR/tmp/mix_innodb_myisam_binlog.output
+ let $start_pos= `select @binlog_start_pos + 418`;
+ --exec $MYSQL_BINLOG --start-position=$start_pos $MYSQLD_DATADIR/master-bin.000001 > $MYSQLTEST_VARDIR/tmp/mix_innodb_myisam_binlog.output
}
if (`select @@binlog_format = 'STATEMENT' || @@binlog_format = 'MIXED'`)
{
- --exec $MYSQL_BINLOG --start-position=555 $MYSQLD_DATADIR/master-bin.000001 > $MYSQLTEST_VARDIR/tmp/mix_innodb_myisam_binlog.output
+ let $start_pos= `select @binlog_start_pos + 449`;
+ --exec $MYSQL_BINLOG --start-position=$start_pos $MYSQLD_DATADIR/master-bin.000001 > $MYSQLTEST_VARDIR/tmp/mix_innodb_myisam_binlog.output
}
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
diff --git a/mysql-test/extra/rpl_tests/rpl_deadlock.test b/mysql-test/extra/rpl_tests/rpl_deadlock.test
index bd446c86943..0e862e041e6 100644
--- a/mysql-test/extra/rpl_tests/rpl_deadlock.test
+++ b/mysql-test/extra/rpl_tests/rpl_deadlock.test
@@ -72,7 +72,7 @@ connection slave;
--source include/stop_slave.inc
DELETE FROM t2;
# Set slave position to the BEGIN log event
---replace_result $master_pos_begin MASTER_POS_BEGIN
+--replace_result $master_pos_begin <master_pos_begin>
eval CHANGE MASTER TO MASTER_LOG_POS=$master_pos_begin;
BEGIN;
# Hold lock
@@ -103,7 +103,7 @@ SET global max_relay_log_size=0;
--source include/stop_slave.inc
DELETE FROM t2;
# Set slave position to the BEGIN log event
---replace_result $master_pos_begin MASTER_POS_BEGIN
+--replace_result $master_pos_begin <master_pos_begin>
eval CHANGE MASTER TO MASTER_LOG_POS=$master_pos_begin;
BEGIN;
# Hold lock
diff --git a/mysql-test/extra/rpl_tests/rpl_log.test b/mysql-test/extra/rpl_tests/rpl_log.test
index b812b6ebaee..4de8169a2e6 100644
--- a/mysql-test/extra/rpl_tests/rpl_log.test
+++ b/mysql-test/extra/rpl_tests/rpl_log.test
@@ -14,6 +14,7 @@ source include/stop_slave.inc;
reset master;
reset slave;
source include/start_slave.inc;
+source include/binlog_start_pos.inc;
let $VERSION=`select version()`;
diff --git a/mysql-test/extra/rpl_tests/rpl_row_annotate.test b/mysql-test/extra/rpl_tests/rpl_row_annotate.test
new file mode 100644
index 00000000000..77c5f626e28
--- /dev/null
+++ b/mysql-test/extra/rpl_tests/rpl_row_annotate.test
@@ -0,0 +1,156 @@
+########################################################################
+# WL47: Store in binlog text of statements that caused RBR events
+# new event : ANNOTATE_ROWS_EVENT
+# new master option : --binlog-annotate-rows-events
+# new slave option : --replicate-annotate-rows-events
+########################################################################
+--source include/master-slave.inc
+connect (master2,127.0.0.1,root,,test,$MASTER_MYPORT,);
+
+connection master;
+--disable_query_log
+
+--disable_warnings
+DROP DATABASE IF EXISTS test1;
+--enable_warnings
+
+CREATE DATABASE test1;
+USE test1;
+
+CREATE TABLE t1(a int primary key, b int);
+CREATE TABLE t2(a int, b int);
+CREATE TABLE t3(a int, b int);
+CREATE TABLE t4(a int, b int);
+CREATE TABLE xt1(a int, b int);
+CREATE TABLE xt2(a int, b int);
+
+CREATE TABLE t5 (
+ a INT PRIMARY KEY AUTO_INCREMENT,
+ b VARCHAR(10) CHARACTER SET utf8 COLLATE utf8_bin
+);
+
+SET SESSION binlog_annotate_rows_events = OFF;
+
+INSERT INTO t1 VALUES (0,0), (1,1);
+
+SET SESSION binlog_annotate_rows_events = ON;
+
+UPDATE t1 SET b = b + 1;
+REPLACE t1 VALUES (1,1), (2,2), (3,3);
+
+INSERT INTO t2 VALUES (1,1), (2,2), (3,3);
+INSERT INTO t3 VALUES (1,1), (2,2), (3,3);
+DELETE t1, t2 FROM t1 INNER JOIN t2 INNER JOIN t3 WHERE t1.a=t2.a AND t2.a=t3.a;
+
+INSERT INTO xt1 VALUES (1,1), (2,2), (3,3);
+INSERT INTO t2 VALUES (1,1), (2,2), (3,3);
+DELETE xt1, t2 FROM xt1 INNER JOIN t2 INNER JOIN t3 WHERE xt1.a=t2.a AND t2.a=t3.a;
+
+INSERT INTO xt1 VALUES (1,1), (2,2), (3,3);
+INSERT INTO xt2 VALUES (1,1), (2,2), (3,3);
+DELETE xt1, xt2 FROM xt1 INNER JOIN xt2 INNER JOIN t3 WHERE xt1.a=xt2.a AND xt2.a=t3.a;
+
+INSERT INTO t5(b) VALUES ('foo'), ('bar'), ('baz');
+SET NAMES latin1;
+INSERT INTO t5(b) VALUES ('gås');
+SET NAMES utf8;
+INSERT INTO t5(b) VALUES ('gås');
+SET NAMES latin1;
+
+FLUSH LOGS;
+
+--echo ########################################################################
+--echo # TABLES ON MASTER
+--echo ########################################################################
+--enable_query_log
+
+SELECT * FROM t1 ORDER BY a;
+SELECT * FROM t2 ORDER BY a;
+SELECT * FROM t3 ORDER BY a;
+SELECT * FROM t5 ORDER BY a;
+
+sync_slave_with_master;
+--echo ########################################################################
+--echo # TABLES ON SLAVE: should be the same as on master
+--echo ########################################################################
+--disable_query_log
+USE test1;
+--enable_query_log
+
+SELECT * FROM t1 ORDER BY a;
+SELECT * FROM t2 ORDER BY a;
+SELECT * FROM t3 ORDER BY a;
+SELECT * FROM t5 ORDER BY a;
+
+--echo ########################################################################
+--echo # EVENTS ON SLAVE
+let $annotate= `select @@global.replicate_annotate_rows_events`;
+if ($annotate)
+{
+ --echo # The following Annotate_rows events should appear below:
+ --echo # - UPDATE t1 SET b = b + 1;
+ --echo # - REPLACE t1 VALUES (1,1), (2,2), (3,3);
+ --echo # - INSERT INTO t2 VALUES (1,1), (2,2), (3,3)
+ --echo # - INSERT INTO t3 VALUES (1,1), (2,2), (3,3)
+ --echo # - DELETE t1, t2 FROM <...>
+ --echo # - INSERT INTO t2 VALUES (1,1), (2,2), (3,3)
+ --echo # - DELETE xt1, t2 FROM <...>
+ --echo # - INSERT INTO t5(b) VALUES <...> (3 instances)
+}
+if (!$annotate)
+{
+ --echo # No Annotate_rows events should appear below
+}
+--echo ########################################################################
+FLUSH LOGS;
+
+--source include/binlog_start_pos.inc
+let $start_pos= `select @binlog_start_pos`;
+--replace_column 2 # 5 #
+--replace_result $start_pos <start_pos>
+--replace_regex /table_id: [0-9]+/table_id: #/ /\/\* xid=.* \*\//\/* xid= *\//
+--eval show binlog events in 'slave-bin.000001' from $start_pos
+
+--echo #
+--echo ########################################################################
+--echo # INSERTs DELAYED ON MASTERs
+--echo ########################################################################
+connection master;
+SET SESSION binlog_annotate_rows_events = ON;
+INSERT DELAYED INTO test1.t4 VALUES (1,1);
+FLUSH TABLES;
+SELECT * FROM test1.t4 ORDER BY a;
+
+sync_slave_with_master;
+connection master;
+sync_slave_with_master;
+
+--echo ########################################################################
+--echo # ON SLAVE
+--echo # No Annotate_rows events should appear below
+--echo ########################################################################
+FLUSH LOGS;
+
+--exec $MYSQL --host=127.0.0.1 --port=$SLAVE_MYPORT test -e "show binlog events in 'slave-bin.000002'" > $MYSQLTEST_VARDIR/tmp/annotated_events.txt
+perl;
+ open F, '<', "$ENV{MYSQLTEST_VARDIR}/tmp/annotated_events.txt" or die;
+ binmode STDOUT;
+ while (defined ($_ = <F>)) {
+ if (/Annotate_rows/) {
+ s/[0-9]+\sAnnotate_rows\s[0-9]+\s[0-9]+/# Annotate_rows # #/;
+ print($_);
+ $_ = <F>;
+ s/[0-9]+\sTable_map\s[0-9]+\s[0-9]+\stable_id:\s[0-9]+/# Table_map # # table_id: #/;
+ print($_);
+ }
+ }
+EOF
+
+# Clean-up
+connection master;
+--disable_query_log
+DROP DATABASE test1;
+sync_slave_with_master;
+--enable_query_log
+
+--source include/rpl_end.inc
diff --git a/mysql-test/include/binlog_start_pos.inc b/mysql-test/include/binlog_start_pos.inc
new file mode 100644
index 00000000000..f6d1c4c68a3
--- /dev/null
+++ b/mysql-test/include/binlog_start_pos.inc
@@ -0,0 +1,26 @@
+##############################################################################
+#
+# binlog_start_pos is the postion of the the first event in the binary log
+# which follows the Format description event. Intended to reduce test suite
+# dependance on the Format description event length changes (e.g. in case
+# of adding new events). Evaluated as:
+#
+# binlog_start_pos = 4 /* binlog header */ +
+# (Format_description_log_event length)
+#
+# Format_description_log_event length =
+# 19 /* event common header */ +
+# 57 /* misc stuff in the Format description header */ +
+# number of events.
+#
+# With current number of events = 160,
+#
+# binlog_start_pos = 4 + 19 + 57 + 160 = 240.
+#
+##############################################################################
+
+let $binlog_start_pos=240;
+--disable_query_log
+SET @binlog_start_pos=240;
+--enable_query_log
+
diff --git a/mysql-test/include/show_binlog_events.inc b/mysql-test/include/show_binlog_events.inc
index 6d8c8196102..a55ac983a52 100644
--- a/mysql-test/include/show_binlog_events.inc
+++ b/mysql-test/include/show_binlog_events.inc
@@ -3,7 +3,7 @@
#
# Useage:
# let $binlog_file= master-bin.000002;
-# let $binlog_start= 106;
+# let $binlog_start= 240;
# let $binlog_limit= 1, 3;
# source include/show_binlog_events.inc;
#
diff --git a/mysql-test/include/show_binlog_events2.inc b/mysql-test/include/show_binlog_events2.inc
index 5dd272c562d..96e42e93f90 100644
--- a/mysql-test/include/show_binlog_events2.inc
+++ b/mysql-test/include/show_binlog_events2.inc
@@ -1,4 +1,4 @@
---let $binlog_start=106
+--let $binlog_start=240
--replace_result $binlog_start <binlog_start>
--replace_column 2 # 5 #
--replace_regex /\/\* xid=.* \*\//\/* XID *\// /table_id: [0-9]+/table_id: #/
diff --git a/mysql-test/r/ctype_cp932_binlog_stm.result b/mysql-test/r/ctype_cp932_binlog_stm.result
index 8854a835e25..20eb73f4c42 100644
--- a/mysql-test/r/ctype_cp932_binlog_stm.result
+++ b/mysql-test/r/ctype_cp932_binlog_stm.result
@@ -44,8 +44,6 @@ master-bin.000001 # Query # # use `test`; INSERT INTO t4 VALUES ( NAME_CONST('in
master-bin.000001 # Query # # use `test`; DROP PROCEDURE bug18293
master-bin.000001 # Query # # use `test`; DROP TABLE t4
End of 5.0 tests
-SHOW BINLOG EVENTS FROM 365;
-ERROR HY000: Error when executing command SHOW BINLOG EVENTS: Wrong offset or I/O error
Bug#44352 UPPER/LOWER function doesn't work correctly on cp932 and sjis environment.
CREATE TABLE t1 (a varchar(16)) character set cp932;
INSERT INTO t1 VALUES (0x8372835E),(0x8352835E);
diff --git a/mysql-test/r/innodb_release_row_locks_early.result b/mysql-test/r/innodb_release_row_locks_early.result
new file mode 100644
index 00000000000..d544dc8df1f
--- /dev/null
+++ b/mysql-test/r/innodb_release_row_locks_early.result
@@ -0,0 +1,104 @@
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1 (k INT NOT NULL, a INT NOT NULL, b INT NOT NULL, c INT NOT NULL, PRIMARY KEY(k)) ENGINE=InnoDB;
+INSERT INTO t1 (k, a, b, c) VALUES (1, 0, 0, 0);
+INSERT INTO t1 (k, a, b, c) VALUES (2, 0, 0, 0);
+INSERT INTO t1 (k, a, b, c) VALUES (3, 0, 0, 0);
+INSERT INTO t1 (k, a, b, c) VALUES (4, 0, 0, 0);
+RESET MASTER;
+SET DEBUG_SYNC= 'RESET';
+# Connection c1
+SET binlog_format= mixed;
+BEGIN;
+UPDATE t1 SET a=10 WHERE k=1;
+SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committing";
+COMMIT;
+# Connection c2
+SET binlog_format= mixed;
+SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
+BEGIN;
+SELECT * FROM t1 WHERE k=1 FOR UPDATE;
+k a b c
+1 10 0 0
+UPDATE t1 SET a=20 WHERE k=1;
+SET DEBUG_SYNC="now SIGNAL c2_committing";
+COMMIT;
+# Connection c1
+BEGIN;
+UPDATE t1 SET a=10 WHERE k=2;
+SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committed TIMEOUT 2";
+COMMIT;
+# Connection c2
+SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
+BEGIN;
+SELECT * FROM t1 WHERE k=2 FOR UPDATE;
+k a b c
+2 10 0 0
+UPDATE t1 SET a=20 WHERE k=2;
+SET DEBUG_SYNC="binlog_after_log_and_order SIGNAL c2_committed";
+COMMIT;
+# Connection c1
+# This should warn about DEBUG_SYNC timeout
+Warnings:
+Warning 1639 debug sync point wait timed out
+# Connection c2
+SHOW BINLOG EVENTS LIMIT 2,12;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Query 1 # use `test`; UPDATE t1 SET a=10 WHERE k=1
+master-bin.000001 # Xid 1 # COMMIT /* xid=XX */
+master-bin.000001 # Query 1 # BEGIN
+master-bin.000001 # Query 1 # use `test`; UPDATE t1 SET a=20 WHERE k=1
+master-bin.000001 # Xid 1 # COMMIT /* xid=XX */
+master-bin.000001 # Query 1 # BEGIN
+master-bin.000001 # Query 1 # use `test`; UPDATE t1 SET a=10 WHERE k=2
+master-bin.000001 # Xid 1 # COMMIT /* xid=XX */
+master-bin.000001 # Query 1 # BEGIN
+master-bin.000001 # Query 1 # use `test`; UPDATE t1 SET a=20 WHERE k=2
+master-bin.000001 # Xid 1 # COMMIT /* xid=XX */
+# Connection c1
+RESET MASTER;
+SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committing";
+UPDATE t1 SET a=10 WHERE k=3;
+# Connection c2
+SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
+SELECT * FROM t1 WHERE k=3 FOR UPDATE;
+k a b c
+3 10 0 0
+SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c2_committing";
+UPDATE t1 SET a=20 WHERE k=3;
+# Connection c1
+SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committed TIMEOUT 2";
+UPDATE t1 SET a=10 WHERE k=4;
+# Connection c2
+SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
+SELECT * FROM t1 WHERE k=4 FOR UPDATE;
+k a b c
+4 10 0 0
+SET DEBUG_SYNC="binlog_after_log_and_order SIGNAL c2_committed";
+UPDATE t1 SET a=20 WHERE k=4;
+# Connection c1
+# This should warn about DEBUG_SYNC timeout
+Warnings:
+Warning 1639 debug sync point wait timed out
+# Connection c2
+SHOW BINLOG EVENTS LIMIT 1,12;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Query 1 # BEGIN
+master-bin.000001 # Query 1 # use `test`; UPDATE t1 SET a=10 WHERE k=3
+master-bin.000001 # Xid 1 # COMMIT /* xid=XX */
+master-bin.000001 # Query 1 # BEGIN
+master-bin.000001 # Query 1 # use `test`; UPDATE t1 SET a=20 WHERE k=3
+master-bin.000001 # Xid 1 # COMMIT /* xid=XX */
+master-bin.000001 # Query 1 # BEGIN
+master-bin.000001 # Query 1 # use `test`; UPDATE t1 SET a=10 WHERE k=4
+master-bin.000001 # Xid 1 # COMMIT /* xid=XX */
+master-bin.000001 # Query 1 # BEGIN
+master-bin.000001 # Query 1 # use `test`; UPDATE t1 SET a=20 WHERE k=4
+master-bin.000001 # Xid 1 # COMMIT /* xid=XX */
+SELECT * FROM t1 ORDER BY k;
+k a b c
+1 20 0 0
+2 20 0 0
+3 20 0 0
+4 20 0 0
+DROP TABLE t1;
+SET DEBUG_SYNC= 'RESET';
diff --git a/mysql-test/r/mysqlbinlog.result b/mysql-test/r/mysqlbinlog.result
index 1f2e1ed67e0..7e228792b0d 100644
--- a/mysql-test/r/mysqlbinlog.result
+++ b/mysql-test/r/mysqlbinlog.result
@@ -1,15 +1,17 @@
reset master;
+SET @save_binlog_size= @@global.max_binlog_size;
+SET @@global.max_binlog_size= 4096;
set timestamp=1000000000;
drop table if exists t1,t2,t3,t4,t5,t03,t04;
create table t1 (word varchar(20));
create table t2 (id int auto_increment not null primary key);
insert into t1 values ("abirvalg");
insert into t2 values ();
-load data infile '../../std_data/words.dat' into table t1;
-load data infile '../../std_data/words.dat' into table t1;
-load data infile '../../std_data/words.dat' into table t1;
-load data infile '../../std_data/words.dat' into table t1;
-load data infile '../../std_data/words.dat' into table t1;
+load data infile '../../std_data/words3.dat' into table t1;
+load data infile '../../std_data/words3.dat' into table t1;
+load data infile '../../std_data/words3.dat' into table t1;
+load data infile '../../std_data/words3.dat' into table t1;
+load data infile '../../std_data/words3.dat' into table t1;
insert into t1 values ("Alas");
flush logs;
@@ -220,7 +222,6 @@ ROLLBACK /* added by mysqlbinlog */;
/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/;
DELIMITER /*!*/;
ROLLBACK/*!*/;
-use test/*!*/;
SET TIMESTAMP=1108844556/*!*/;
SET @@session.pseudo_thread_id=999999999/*!*/;
SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/;
@@ -228,6 +229,7 @@ SET @@session.lc_time_names=0/*!*/;
SET @@session.collation_database=DEFAULT/*!*/;
BEGIN
/*!*/;
+use test/*!*/;
SET TIMESTAMP=1108844555/*!*/;
insert t1 values (1)
/*!*/;
@@ -239,7 +241,6 @@ Warning: The option '--position' is deprecated and will be removed in a future r
/*!40019 SET @@session.max_insert_delayed_threads=0*/;
/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/;
DELIMITER /*!*/;
-use test/*!*/;
SET TIMESTAMP=1108844556/*!*/;
SET @@session.pseudo_thread_id=999999999/*!*/;
SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/;
@@ -247,6 +248,7 @@ SET @@session.lc_time_names=0/*!*/;
SET @@session.collation_database=DEFAULT/*!*/;
BEGIN
/*!*/;
+use test/*!*/;
SET TIMESTAMP=1108844555/*!*/;
insert t1 values (1)
/*!*/;
@@ -255,6 +257,7 @@ DELIMITER ;
ROLLBACK /* added by mysqlbinlog */;
/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/;
drop table t1,t2;
+SET @@global.max_binlog_size= @save_binlog_size;
flush logs;
flush logs;
select * from t5 /* must be (1),(1) */;
@@ -377,14 +380,14 @@ LOAD DATA LOCAL INFILE 'MYSQLTEST_VARDIR/tmp/SQL_LOAD_MB-#-#' INTO TABLE `t1` FI
/*!*/;
SET TIMESTAMP=1000000000/*!*/;
SET @@session.collation_database=7/*!*/;
-LOAD DATA LOCAL INFILE 'MYSQLTEST_VARDIR/tmp/SQL_LOAD_MB-a-0' INTO TABLE `t1` FIELDS TERMINATED BY '\t' ENCLOSED BY '' ESCAPED BY '\\' LINES TERMINATED BY '\n' (`a`)
+LOAD DATA LOCAL INFILE 'MYSQLTEST_VARDIR/tmp/SQL_LOAD_MB-#-#' INTO TABLE `t1` FIELDS TERMINATED BY '\t' ENCLOSED BY '' ESCAPED BY '\\' LINES TERMINATED BY '\n' (`a`)
/*!*/;
SET TIMESTAMP=1000000000/*!*/;
SET @@session.collation_database=DEFAULT/*!*/;
-LOAD DATA LOCAL INFILE 'MYSQLTEST_VARDIR/tmp/SQL_LOAD_MB-b-0' INTO TABLE `t1` FIELDS TERMINATED BY '\t' ENCLOSED BY '' ESCAPED BY '\\' LINES TERMINATED BY '\n' (`a`)
+LOAD DATA LOCAL INFILE 'MYSQLTEST_VARDIR/tmp/SQL_LOAD_MB-#-#' INTO TABLE `t1` FIELDS TERMINATED BY '\t' ENCLOSED BY '' ESCAPED BY '\\' LINES TERMINATED BY '\n' (`a`)
/*!*/;
SET TIMESTAMP=1000000000/*!*/;
-LOAD DATA LOCAL INFILE 'MYSQLTEST_VARDIR/tmp/SQL_LOAD_MB-c-0' INTO TABLE `t1` CHARACTER SET koi8r FIELDS TERMINATED BY '\t' ENCLOSED BY '' ESCAPED BY '\\' LINES TERMINATED BY '\n' (`a`)
+LOAD DATA LOCAL INFILE 'MYSQLTEST_VARDIR/tmp/SQL_LOAD_MB-#-#' INTO TABLE `t1` CHARACTER SET koi8r FIELDS TERMINATED BY '\t' ENCLOSED BY '' ESCAPED BY '\\' LINES TERMINATED BY '\n' (`a`)
/*!*/;
SET TIMESTAMP=1000000000/*!*/;
drop table t1
@@ -581,7 +584,6 @@ SET @@session.lc_time_names=0/*!*/;
SET @@session.collation_database=DEFAULT/*!*/;
BEGIN
/*!*/;
-use test/*!*/;
SET TIMESTAMP=1266652094/*!*/;
SavePoint mixed_cases
/*!*/;
@@ -592,11 +594,9 @@ INSERT INTO db1.t2 VALUES("in savepoint mixed_cases")
SET TIMESTAMP=1266652094/*!*/;
INSERT INTO db1.t1 VALUES(40)
/*!*/;
-use test/*!*/;
SET TIMESTAMP=1266652094/*!*/;
ROLLBACK TO mixed_cases
/*!*/;
-use db1/*!*/;
SET TIMESTAMP=1266652094/*!*/;
INSERT INTO db1.t2 VALUES("after rollback to")
/*!*/;
@@ -624,7 +624,6 @@ SET @@session.lc_time_names=0/*!*/;
SET @@session.collation_database=DEFAULT/*!*/;
BEGIN
/*!*/;
-use test/*!*/;
SET TIMESTAMP=1266652094/*!*/;
SavePoint mixed_cases
/*!*/;
diff --git a/mysql-test/r/mysqldump-max.result b/mysql-test/r/mysqldump-max.result
index c300f3d7996..cc5fe207800 100644
--- a/mysql-test/r/mysqldump-max.result
+++ b/mysql-test/r/mysqldump-max.result
@@ -290,3 +290,60 @@ COUNT(*)
DROP VIEW v1;
DROP TABLE t1;
SET GLOBAL storage_engine=@old_engine;
+# Connection default
+SET binlog_format= mixed;
+RESET MASTER;
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (1),(2);
+CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
+INSERT INTO t2 VALUES (1,0), (2,0);
+SELECT GET_LOCK("block_queries_1", 120);
+GET_LOCK("block_queries_1", 120)
+1
+# Connection c3
+SELECT GET_LOCK("block_queries_2", 120);
+GET_LOCK("block_queries_2", 120)
+1
+# Connection c1
+SET @c= 0;
+SELECT IF(@c<1, @c:=@c+1, GET_LOCK("block_queries_1", 120)) FROM t1 ORDER BY a;
+# Connection c2
+SET binlog_format="row";
+SET @d= 10;
+UPDATE t2 SET b=IF(@d<=10, @d:=@d+1, GET_LOCK("block_queries_2", 120)) ORDER BY a;
+# Connection default
+# Make sure other queries are running (and waiting).
+SELECT RELEASE_LOCK("block_queries_1");
+RELEASE_LOCK("block_queries_1")
+1
+# Connection c3
+SELECT RELEASE_LOCK("block_queries_2");
+RELEASE_LOCK("block_queries_2")
+1
+# Connection c1
+IF(@c<1, @c:=@c+1, GET_LOCK("block_queries_1", 120))
+1
+1
+# Connection c2
+# Connection default
+SELECT * FROM t2 ORDER BY a;
+a b
+1 11
+2 1
+DROP TABLE t1;
+DROP TABLE t2;
+SHOW BINLOG EVENTS LIMIT 6,3;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 658 Query 1 726 BEGIN
+master-bin.000001 726 Query 1 823 use `test`; INSERT INTO t2 VALUES (1,0), (2,0)
+master-bin.000001 823 Xid 1 850 COMMIT /* XID */
+-- CHANGE MASTER TO MASTER_LOG_FILE='master-bin.000001', MASTER_LOG_POS=850;
+SELECT * FROM t1 ORDER BY a;
+a
+1
+2
+SELECT * FROM t2 ORDER BY a;
+a b
+1 0
+2 0
+DROP TABLE t1,t2;
diff --git a/mysql-test/r/xa_binlog.result b/mysql-test/r/xa_binlog.result
new file mode 100644
index 00000000000..3ce64953902
--- /dev/null
+++ b/mysql-test/r/xa_binlog.result
@@ -0,0 +1,32 @@
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
+SET binlog_format= mixed;
+RESET MASTER;
+XA START 'xatest';
+INSERT INTO t1 VALUES (1);
+XA END 'xatest';
+XA PREPARE 'xatest';
+XA COMMIT 'xatest';
+XA START 'xatest';
+INSERT INTO t1 VALUES (2);
+XA END 'xatest';
+XA COMMIT 'xatest' ONE PHASE;
+BEGIN;
+INSERT INTO t1 VALUES (3);
+COMMIT;
+SELECT * FROM t1 ORDER BY a;
+a
+1
+2
+3
+SHOW BINLOG EVENTS LIMIT 1,9;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Query 1 # BEGIN
+master-bin.000001 # Query 1 # use `test`; INSERT INTO t1 VALUES (1)
+master-bin.000001 # Query 1 # COMMIT
+master-bin.000001 # Query 1 # BEGIN
+master-bin.000001 # Query 1 # use `test`; INSERT INTO t1 VALUES (2)
+master-bin.000001 # Query 1 # COMMIT
+master-bin.000001 # Query 1 # BEGIN
+master-bin.000001 # Query 1 # use `test`; INSERT INTO t1 VALUES (3)
+master-bin.000001 # Xid 1 # COMMIT /* xid=XX */
+DROP TABLE t1;
diff --git a/mysql-test/std_data/words3.dat b/mysql-test/std_data/words3.dat
new file mode 100644
index 00000000000..dca0819721d
--- /dev/null
+++ b/mysql-test/std_data/words3.dat
@@ -0,0 +1,66 @@
+Aarhus
+Aaron
+Ababa
+aback
+abaft
+abandon
+abandoned
+abandoning
+abandonment
+abandons
+Aarhus
+Aaron
+Ababa
+aback
+abaft
+abandon
+abandoned
+abandoning
+abandonment
+abandons
+abase
+abased
+abasement
+abasements
+abases
+abash
+abashed
+abashes
+abashing
+abasing
+abate
+abated
+abatement
+abatements
+abater
+abates
+abating
+Abba
+abbe
+abbey
+abbeys
+abbot
+abbots
+Abbott
+abbreviate
+abbreviated
+abbreviates
+abbreviating
+abbreviation
+abbreviations
+Abby
+abdomen
+abdomens
+abdominal
+abduct
+abducted
+abduction
+abductions
+abductor
+abductors
+abducts
+Abe
+abed
+Abel
+Abelian
+Abelson
diff --git a/mysql-test/suite/binlog/r/binlog_ioerr.result b/mysql-test/suite/binlog/r/binlog_ioerr.result
new file mode 100644
index 00000000000..04ac0340746
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_ioerr.result
@@ -0,0 +1,28 @@
+CALL mtr.add_suppression("Error writing file 'master-bin'");
+RESET MASTER;
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
+INSERT INTO t1 VALUES(0);
+SET SESSION debug='+d,fail_binlog_write_1';
+INSERT INTO t1 VALUES(1);
+ERROR HY000: Error writing file 'master-bin' (errno: 28)
+INSERT INTO t1 VALUES(2);
+ERROR HY000: Error writing file 'master-bin' (errno: 28)
+SET SESSION debug='';
+INSERT INTO t1 VALUES(3);
+SELECT * FROM t1;
+a
+0
+3
+SHOW BINLOG EVENTS;
+Log_name Pos Event_type Server_id End_log_pos Info
+BINLOG POS Format_desc 1 ENDPOS Server ver: #, Binlog ver: #
+BINLOG POS Query 1 ENDPOS use `test`; CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb
+BINLOG POS Query 1 ENDPOS BEGIN
+BINLOG POS Query 1 ENDPOS use `test`; INSERT INTO t1 VALUES(0)
+BINLOG POS Xid 1 ENDPOS COMMIT /* XID */
+BINLOG POS Query 1 ENDPOS BEGIN
+BINLOG POS Query 1 ENDPOS BEGIN
+BINLOG POS Query 1 ENDPOS BEGIN
+BINLOG POS Query 1 ENDPOS use `test`; INSERT INTO t1 VALUES(3)
+BINLOG POS Xid 1 ENDPOS COMMIT /* XID */
+DROP TABLE t1;
diff --git a/mysql-test/suite/binlog/r/binlog_row_annotate.result b/mysql-test/suite/binlog/r/binlog_row_annotate.result
new file mode 100644
index 00000000000..647c07af2f8
--- /dev/null
+++ b/mysql-test/suite/binlog/r/binlog_row_annotate.result
@@ -0,0 +1,1219 @@
+#####################################################################################
+# The following Annotate_rows events should appear below:
+# - INSERT INTO test2.t2 VALUES (1), (2), (3)
+# - INSERT INTO test3.t3 VALUES (1), (2), (3)
+# - DELETE test1.t1, test2.t2 FROM <...>
+# - INSERT INTO test2.t2 VALUES (1), (2), (3)
+# - DELETE xtest1.xt1, test2.t2 FROM <...>
+#####################################################################################
+show binlog events in 'master-bin.000001' from <start_pos>;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Query 1 # DROP DATABASE IF EXISTS test1
+master-bin.000001 # Query 1 # DROP DATABASE IF EXISTS test2
+master-bin.000001 # Query 1 # DROP DATABASE IF EXISTS test3
+master-bin.000001 # Query 1 # CREATE DATABASE test1
+master-bin.000001 # Query 1 # CREATE DATABASE test2
+master-bin.000001 # Query 1 # CREATE DATABASE test3
+master-bin.000001 # Query 1 # BEGIN
+master-bin.000001 # Table_map 1 # table_id: # (test1.t1)
+master-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+master-bin.000001 # Query 1 # COMMIT
+master-bin.000001 # Query 1 # BEGIN
+master-bin.000001 # Annotate_rows 1 # INSERT INTO test2.t2 VALUES (1), (2), (3)
+master-bin.000001 # Table_map 1 # table_id: # (test2.t2)
+master-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+master-bin.000001 # Query 1 # COMMIT
+master-bin.000001 # Query 1 # BEGIN
+master-bin.000001 # Annotate_rows 1 # INSERT INTO test3.t3 VALUES (1), (2), (3)
+master-bin.000001 # Table_map 1 # table_id: # (test3.t3)
+master-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+master-bin.000001 # Query 1 # COMMIT
+master-bin.000001 # Query 1 # BEGIN
+master-bin.000001 # Annotate_rows 1 # DELETE test1.t1, test2.t2
+FROM test1.t1 INNER JOIN test2.t2 INNER JOIN test3.t3
+WHERE test1.t1.a=test2.t2.a AND test2.t2.a=test3.t3.a
+master-bin.000001 # Table_map 1 # table_id: # (test1.t1)
+master-bin.000001 # Table_map 1 # table_id: # (test2.t2)
+master-bin.000001 # Delete_rows 1 # table_id: #
+master-bin.000001 # Delete_rows 1 # table_id: # flags: STMT_END_F
+master-bin.000001 # Query 1 # COMMIT
+master-bin.000001 # Query 1 # BEGIN
+master-bin.000001 # Annotate_rows 1 # INSERT INTO test2.v2 VALUES (1), (2), (3)
+master-bin.000001 # Table_map 1 # table_id: # (test2.t2)
+master-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+master-bin.000001 # Query 1 # COMMIT
+master-bin.000001 # Query 1 # BEGIN
+master-bin.000001 # Annotate_rows 1 # DELETE xtest1.xt1, test2.t2
+FROM xtest1.xt1 INNER JOIN test2.t2 INNER JOIN test3.t3
+WHERE xtest1.xt1.a=test2.t2.a AND test2.t2.a=test3.t3.a
+master-bin.000001 # Table_map 1 # table_id: # (test2.t2)
+master-bin.000001 # Delete_rows 1 # table_id: # flags: STMT_END_F
+master-bin.000001 # Query 1 # COMMIT
+master-bin.000001 # Rotate 1 # master-bin.000002;pos=4
+#
+#####################################################################################
+# mysqlbinlog
+# The following Annotates should appear in this output:
+# - INSERT INTO test2.t2 VALUES (1), (2), (3)
+# - INSERT INTO test3.t3 VALUES (1), (2), (3)
+# - DELETE test1.t1, test2.t2 FROM <...> (with two subsequent Table maps)
+# - INSERT INTO test2.t2 VALUES (1), (2), (3)
+# - DELETE xtest1.xt1, test2.t2 FROM <...> (with one subsequent Table map)
+#####################################################################################
+/*!40019 SET @@session.max_insert_delayed_threads=0*/;
+/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/;
+DELIMITER /*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
+ROLLBACK/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+SET @@session.pseudo_thread_id=#/*!*/;
+SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=1, @@session.unique_checks=1, @@session.autocommit=1/*!*/;
+SET @@session.sql_mode=0/*!*/;
+SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/;
+/*!\C latin1 *//*!*/;
+SET @@session.character_set_client=8,@@session.collation_connection=8,@@session.collation_server=8/*!*/;
+SET @@session.lc_time_names=0/*!*/;
+SET @@session.collation_database=DEFAULT/*!*/;
+DROP DATABASE IF EXISTS test1
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+DROP DATABASE IF EXISTS test2
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+DROP DATABASE IF EXISTS test3
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+CREATE DATABASE test1
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+CREATE DATABASE test2
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+CREATE DATABASE test3
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test1`.`t1` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Write_rows: table id # flags: STMT_END_F
+### INSERT INTO test1.t1
+### SET
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test1.t1
+### SET
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test1.t1
+### SET
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Annotate_rows:
+#Q> INSERT INTO test2.t2 VALUES (1), (2), (3)
+#010909 4:46:40 server id # end_log_pos # Table_map: `test2`.`t2` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Write_rows: table id # flags: STMT_END_F
+### INSERT INTO test2.t2
+### SET
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test2.t2
+### SET
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test2.t2
+### SET
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Annotate_rows:
+#Q> INSERT INTO test3.t3 VALUES (1), (2), (3)
+#010909 4:46:40 server id # end_log_pos # Table_map: `test3`.`t3` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Write_rows: table id # flags: STMT_END_F
+### INSERT INTO test3.t3
+### SET
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test3.t3
+### SET
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test3.t3
+### SET
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Annotate_rows:
+#Q> DELETE test1.t1, test2.t2
+#Q> FROM test1.t1 INNER JOIN test2.t2 INNER JOIN test3.t3
+#Q> WHERE test1.t1.a=test2.t2.a AND test2.t2.a=test3.t3
+#010909 4:46:40 server id # end_log_pos # Table_map: `test1`.`t1` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test2`.`t2` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Delete_rows: table id #
+#010909 4:46:40 server id # end_log_pos # Delete_rows: table id # flags: STMT_END_F
+### DELETE FROM test1.t1
+### WHERE
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test1.t1
+### WHERE
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test1.t1
+### WHERE
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Annotate_rows:
+#Q> INSERT INTO test2.v2 VALUES (1), (2), (3)
+#010909 4:46:40 server id # end_log_pos # Table_map: `test2`.`t2` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Write_rows: table id # flags: STMT_END_F
+### INSERT INTO test2.t2
+### SET
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test2.t2
+### SET
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test2.t2
+### SET
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Annotate_rows:
+#Q> DELETE xtest1.xt1, test2.t2
+#Q> FROM xtest1.xt1 INNER JOIN test2.t2 INNER JOIN test3.t3
+#Q> WHERE xtest1.xt1.a=test2.t2.a AND test2.t2.a=test3.t3
+#010909 4:46:40 server id # end_log_pos # Table_map: `test2`.`t2` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Delete_rows: table id # flags: STMT_END_F
+### DELETE FROM test2.t2
+### WHERE
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Rotate to master-bin.000002 pos: 4
+DELIMITER ;
+# End of log file
+ROLLBACK /* added by mysqlbinlog */;
+/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/;
+#
+#####################################################################################
+# mysqlbinlog --database=test1
+# The following Annotate should appear in this output:
+# - DELETE test1.t1, test2.t2 FROM <...>
+#####################################################################################
+/*!40019 SET @@session.max_insert_delayed_threads=0*/;
+/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/;
+DELIMITER /*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
+ROLLBACK/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+SET @@session.pseudo_thread_id=#/*!*/;
+SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=1, @@session.unique_checks=1, @@session.autocommit=1/*!*/;
+SET @@session.sql_mode=0/*!*/;
+SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/;
+/*!\C latin1 *//*!*/;
+SET @@session.character_set_client=8,@@session.collation_connection=8,@@session.collation_server=8/*!*/;
+SET @@session.lc_time_names=0/*!*/;
+SET @@session.collation_database=DEFAULT/*!*/;
+DROP DATABASE IF EXISTS test1
+/*!*/;
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+CREATE DATABASE test1
+/*!*/;
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test1`.`t1` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Write_rows: table id # flags: STMT_END_F
+### INSERT INTO test1.t1
+### SET
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test1.t1
+### SET
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test1.t1
+### SET
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Annotate_rows:
+#Q> DELETE test1.t1, test2.t2
+#Q> FROM test1.t1 INNER JOIN test2.t2 INNER JOIN test3.t3
+#Q> WHERE test1.t1.a=test2.t2.a AND test2.t2.a=test3.t3
+#010909 4:46:40 server id # end_log_pos # Table_map: `test1`.`t1` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Delete_rows: table id #
+### DELETE FROM test1.t1
+### WHERE
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test1.t1
+### WHERE
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test1.t1
+### WHERE
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Rotate to master-bin.000002 pos: 4
+DELIMITER ;
+# End of log file
+ROLLBACK /* added by mysqlbinlog */;
+/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/;
+#
+#####################################################################################
+# mysqlbinlog --skip-annotate-rows-events
+# No Annotates should appear in this output
+#####################################################################################
+/*!40019 SET @@session.max_insert_delayed_threads=0*/;
+/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/;
+DELIMITER /*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
+ROLLBACK/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+SET @@session.pseudo_thread_id=#/*!*/;
+SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=1, @@session.unique_checks=1, @@session.autocommit=1/*!*/;
+SET @@session.sql_mode=0/*!*/;
+SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/;
+/*!\C latin1 *//*!*/;
+SET @@session.character_set_client=8,@@session.collation_connection=8,@@session.collation_server=8/*!*/;
+SET @@session.lc_time_names=0/*!*/;
+SET @@session.collation_database=DEFAULT/*!*/;
+DROP DATABASE IF EXISTS test1
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+DROP DATABASE IF EXISTS test2
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+DROP DATABASE IF EXISTS test3
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+CREATE DATABASE test1
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+CREATE DATABASE test2
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+CREATE DATABASE test3
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test1`.`t1` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Write_rows: table id # flags: STMT_END_F
+### INSERT INTO test1.t1
+### SET
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test1.t1
+### SET
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test1.t1
+### SET
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test2`.`t2` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Write_rows: table id # flags: STMT_END_F
+### INSERT INTO test2.t2
+### SET
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test2.t2
+### SET
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test2.t2
+### SET
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test3`.`t3` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Write_rows: table id # flags: STMT_END_F
+### INSERT INTO test3.t3
+### SET
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test3.t3
+### SET
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test3.t3
+### SET
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test1`.`t1` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test2`.`t2` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Delete_rows: table id #
+#010909 4:46:40 server id # end_log_pos # Delete_rows: table id # flags: STMT_END_F
+### DELETE FROM test1.t1
+### WHERE
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test1.t1
+### WHERE
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test1.t1
+### WHERE
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test2`.`t2` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Write_rows: table id # flags: STMT_END_F
+### INSERT INTO test2.t2
+### SET
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test2.t2
+### SET
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test2.t2
+### SET
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test2`.`t2` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Delete_rows: table id # flags: STMT_END_F
+### DELETE FROM test2.t2
+### WHERE
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Rotate to master-bin.000002 pos: 4
+DELIMITER ;
+# End of log file
+ROLLBACK /* added by mysqlbinlog */;
+/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/;
+#
+#####################################################################################
+# mysqlbinlog --read-from-remote-server
+# The following Annotates should appear in this output:
+# - INSERT INTO test2.t2 VALUES (1), (2), (3)
+# - INSERT INTO test3.t3 VALUES (1), (2), (3)
+# - DELETE test1.t1, test2.t2 FROM <...> (with two subsequent Table maps)
+# - INSERT INTO test2.t2 VALUES (1), (2), (3)
+# - DELETE xtest1.xt1, test2.t2 FROM <...> (with one subsequent Table map)
+#####################################################################################
+/*!40019 SET @@session.max_insert_delayed_threads=0*/;
+/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/;
+DELIMITER /*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
+ROLLBACK/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+SET @@session.pseudo_thread_id=#/*!*/;
+SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=1, @@session.unique_checks=1, @@session.autocommit=1/*!*/;
+SET @@session.sql_mode=0/*!*/;
+SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/;
+/*!\C latin1 *//*!*/;
+SET @@session.character_set_client=8,@@session.collation_connection=8,@@session.collation_server=8/*!*/;
+SET @@session.lc_time_names=0/*!*/;
+SET @@session.collation_database=DEFAULT/*!*/;
+DROP DATABASE IF EXISTS test1
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+DROP DATABASE IF EXISTS test2
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+DROP DATABASE IF EXISTS test3
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+CREATE DATABASE test1
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+CREATE DATABASE test2
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+CREATE DATABASE test3
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test1`.`t1` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Write_rows: table id # flags: STMT_END_F
+### INSERT INTO test1.t1
+### SET
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test1.t1
+### SET
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test1.t1
+### SET
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Annotate_rows:
+#Q> INSERT INTO test2.t2 VALUES (1), (2), (3)
+#010909 4:46:40 server id # end_log_pos # Table_map: `test2`.`t2` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Write_rows: table id # flags: STMT_END_F
+### INSERT INTO test2.t2
+### SET
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test2.t2
+### SET
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test2.t2
+### SET
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Annotate_rows:
+#Q> INSERT INTO test3.t3 VALUES (1), (2), (3)
+#010909 4:46:40 server id # end_log_pos # Table_map: `test3`.`t3` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Write_rows: table id # flags: STMT_END_F
+### INSERT INTO test3.t3
+### SET
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test3.t3
+### SET
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test3.t3
+### SET
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Annotate_rows:
+#Q> DELETE test1.t1, test2.t2
+#Q> FROM test1.t1 INNER JOIN test2.t2 INNER JOIN test3.t3
+#Q> WHERE test1.t1.a=test2.t2.a AND test2.t2.a=test3.t3
+#010909 4:46:40 server id # end_log_pos # Table_map: `test1`.`t1` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test2`.`t2` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Delete_rows: table id #
+#010909 4:46:40 server id # end_log_pos # Delete_rows: table id # flags: STMT_END_F
+### DELETE FROM test1.t1
+### WHERE
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test1.t1
+### WHERE
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test1.t1
+### WHERE
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Annotate_rows:
+#Q> INSERT INTO test2.v2 VALUES (1), (2), (3)
+#010909 4:46:40 server id # end_log_pos # Table_map: `test2`.`t2` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Write_rows: table id # flags: STMT_END_F
+### INSERT INTO test2.t2
+### SET
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test2.t2
+### SET
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test2.t2
+### SET
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Annotate_rows:
+#Q> DELETE xtest1.xt1, test2.t2
+#Q> FROM xtest1.xt1 INNER JOIN test2.t2 INNER JOIN test3.t3
+#Q> WHERE xtest1.xt1.a=test2.t2.a AND test2.t2.a=test3.t3
+#010909 4:46:40 server id # end_log_pos # Table_map: `test2`.`t2` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Delete_rows: table id # flags: STMT_END_F
+### DELETE FROM test2.t2
+### WHERE
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Rotate to master-bin.000002 pos: 4
+DELIMITER ;
+# End of log file
+ROLLBACK /* added by mysqlbinlog */;
+/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/;
+#
+#####################################################################################
+# mysqlbinlog --read-from-remote-server --database=test1
+# The following Annotate should appear in this output:
+# - DELETE test1.t1, test2.t2 FROM <...>
+#####################################################################################
+/*!40019 SET @@session.max_insert_delayed_threads=0*/;
+/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/;
+DELIMITER /*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
+ROLLBACK/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+SET @@session.pseudo_thread_id=#/*!*/;
+SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=1, @@session.unique_checks=1, @@session.autocommit=1/*!*/;
+SET @@session.sql_mode=0/*!*/;
+SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/;
+/*!\C latin1 *//*!*/;
+SET @@session.character_set_client=8,@@session.collation_connection=8,@@session.collation_server=8/*!*/;
+SET @@session.lc_time_names=0/*!*/;
+SET @@session.collation_database=DEFAULT/*!*/;
+DROP DATABASE IF EXISTS test1
+/*!*/;
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+CREATE DATABASE test1
+/*!*/;
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test1`.`t1` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Write_rows: table id # flags: STMT_END_F
+### INSERT INTO test1.t1
+### SET
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test1.t1
+### SET
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test1.t1
+### SET
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Annotate_rows:
+#Q> DELETE test1.t1, test2.t2
+#Q> FROM test1.t1 INNER JOIN test2.t2 INNER JOIN test3.t3
+#Q> WHERE test1.t1.a=test2.t2.a AND test2.t2.a=test3.t3
+#010909 4:46:40 server id # end_log_pos # Table_map: `test1`.`t1` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Delete_rows: table id #
+### DELETE FROM test1.t1
+### WHERE
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test1.t1
+### WHERE
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test1.t1
+### WHERE
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Rotate to master-bin.000002 pos: 4
+DELIMITER ;
+# End of log file
+ROLLBACK /* added by mysqlbinlog */;
+/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/;
+#
+#####################################################################################
+# mysqlbinlog --read-from-remote-server --skip-annotate-rows-events
+# No Annotates should appear in this output
+#####################################################################################
+/*!40019 SET @@session.max_insert_delayed_threads=0*/;
+/*!50003 SET @OLD_COMPLETION_TYPE=@@COMPLETION_TYPE,COMPLETION_TYPE=0*/;
+DELIMITER /*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Start: binlog v 4, server v #.##.## created 010909 4:46:40 at startup
+ROLLBACK/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+SET @@session.pseudo_thread_id=#/*!*/;
+SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=1, @@session.unique_checks=1, @@session.autocommit=1/*!*/;
+SET @@session.sql_mode=0/*!*/;
+SET @@session.auto_increment_increment=1, @@session.auto_increment_offset=1/*!*/;
+/*!\C latin1 *//*!*/;
+SET @@session.character_set_client=8,@@session.collation_connection=8,@@session.collation_server=8/*!*/;
+SET @@session.lc_time_names=0/*!*/;
+SET @@session.collation_database=DEFAULT/*!*/;
+DROP DATABASE IF EXISTS test1
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+DROP DATABASE IF EXISTS test2
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+DROP DATABASE IF EXISTS test3
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+CREATE DATABASE test1
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+CREATE DATABASE test2
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+CREATE DATABASE test3
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test1`.`t1` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Write_rows: table id # flags: STMT_END_F
+### INSERT INTO test1.t1
+### SET
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test1.t1
+### SET
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test1.t1
+### SET
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test2`.`t2` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Write_rows: table id # flags: STMT_END_F
+### INSERT INTO test2.t2
+### SET
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test2.t2
+### SET
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test2.t2
+### SET
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test3`.`t3` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Write_rows: table id # flags: STMT_END_F
+### INSERT INTO test3.t3
+### SET
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test3.t3
+### SET
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test3.t3
+### SET
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test1`.`t1` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test2`.`t2` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Delete_rows: table id #
+#010909 4:46:40 server id # end_log_pos # Delete_rows: table id # flags: STMT_END_F
+### DELETE FROM test1.t1
+### WHERE
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test1.t1
+### WHERE
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test1.t1
+### WHERE
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test2`.`t2` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Write_rows: table id # flags: STMT_END_F
+### INSERT INTO test2.t2
+### SET
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test2.t2
+### SET
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### INSERT INTO test2.t2
+### SET
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+BEGIN
+/*!*/;
+# at #
+# at #
+#010909 4:46:40 server id # end_log_pos # Table_map: `test2`.`t2` mapped to number #
+#010909 4:46:40 server id # end_log_pos # Delete_rows: table id # flags: STMT_END_F
+### DELETE FROM test2.t2
+### WHERE
+### @1=3 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=2 /* INT meta=0 nullable=1 is_null=0 */
+### DELETE FROM test2.t2
+### WHERE
+### @1=1 /* INT meta=0 nullable=1 is_null=0 */
+# at #
+#010909 4:46:40 server id # end_log_pos # Query thread_id=# exec_time=# error_code=0
+SET TIMESTAMP=1000000000/*!*/;
+COMMIT
+/*!*/;
+# at #
+#010909 4:46:40 server id # end_log_pos # Rotate to master-bin.000002 pos: 4
+DELIMITER ;
+# End of log file
+ROLLBACK /* added by mysqlbinlog */;
+/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/;
diff --git a/mysql-test/suite/binlog/r/binlog_stm_binlog.result b/mysql-test/suite/binlog/r/binlog_stm_binlog.result
index a923f51feac..92531d9fb54 100644
--- a/mysql-test/suite/binlog/r/binlog_stm_binlog.result
+++ b/mysql-test/suite/binlog/r/binlog_stm_binlog.result
@@ -333,7 +333,7 @@ master-bin.000001 # Query # # use `test`; insert into t1 values( 244 )
master-bin.000001 # Query # # use `test`; insert into t1 values( 243 )
master-bin.000001 # Query # # use `test`; insert into t1 values( 242 )
master-bin.000001 # Query # # use `test`; insert into t1 values( 241 )
-master-bin.000001 # Query # # use `test`; insert into t1 values( 240 )
+master-bin.000001 # Query # # use `test`; insert into t1 values( <binlog_start> )
master-bin.000001 # Query # # use `test`; insert into t1 values( 239 )
master-bin.000001 # Query # # use `test`; insert into t1 values( 238 )
master-bin.000001 # Query # # use `test`; insert into t1 values( 237 )
@@ -467,7 +467,7 @@ master-bin.000001 # Query # # use `test`; insert into t1 values( 110 )
master-bin.000001 # Query # # use `test`; insert into t1 values( 109 )
master-bin.000001 # Query # # use `test`; insert into t1 values( 108 )
master-bin.000001 # Query # # use `test`; insert into t1 values( 107 )
-master-bin.000001 # Query # # use `test`; insert into t1 values( <binlog_start> )
+master-bin.000001 # Query # # use `test`; insert into t1 values( 106 )
master-bin.000001 # Query # # use `test`; insert into t1 values( 105 )
master-bin.000001 # Query # # use `test`; insert into t1 values( 104 )
master-bin.000001 # Query # # use `test`; insert into t1 values( 103 )
diff --git a/mysql-test/suite/binlog/t/binlog_incident.test b/mysql-test/suite/binlog/t/binlog_incident.test
index 901ac49ea24..1c526ca5980 100644
--- a/mysql-test/suite/binlog/t/binlog_incident.test
+++ b/mysql-test/suite/binlog/t/binlog_incident.test
@@ -4,6 +4,7 @@
source include/have_log_bin.inc;
source include/have_debug.inc;
+source include/binlog_start_pos.inc;
let $MYSQLD_DATADIR= `select @@datadir`;
RESET MASTER;
@@ -20,7 +21,7 @@ REPLACE INTO t1 VALUES (4);
DROP TABLE t1;
FLUSH LOGS;
-exec $MYSQL_BINLOG --start-position=106 $MYSQLD_DATADIR/master-bin.000001 >$MYSQLTEST_VARDIR/tmp/binlog_incident-bug44442.sql;
+exec $MYSQL_BINLOG --start-position=$binlog_start_pos $MYSQLD_DATADIR/master-bin.000001 >$MYSQLTEST_VARDIR/tmp/binlog_incident-bug44442.sql;
--disable_query_log
eval SELECT cont LIKE '%RELOAD DATABASE; # Shall generate syntax error%' AS `Contain RELOAD DATABASE` FROM (SELECT load_file('$MYSQLTEST_VARDIR/tmp/binlog_incident-bug44442.sql') AS cont) AS tbl;
--enable_query_log
diff --git a/mysql-test/suite/binlog/t/binlog_ioerr.test b/mysql-test/suite/binlog/t/binlog_ioerr.test
new file mode 100644
index 00000000000..e6f559c1a7b
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_ioerr.test
@@ -0,0 +1,30 @@
+source include/have_debug.inc;
+source include/have_innodb.inc;
+source include/have_log_bin.inc;
+source include/have_binlog_format_mixed_or_statement.inc;
+
+CALL mtr.add_suppression("Error writing file 'master-bin'");
+
+RESET MASTER;
+
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
+INSERT INTO t1 VALUES(0);
+SET SESSION debug='+d,fail_binlog_write_1';
+--error ER_ERROR_ON_WRITE
+INSERT INTO t1 VALUES(1);
+--error ER_ERROR_ON_WRITE
+INSERT INTO t1 VALUES(2);
+SET SESSION debug='';
+INSERT INTO t1 VALUES(3);
+SELECT * FROM t1;
+
+# Actually the output from this currently shows a bug.
+# The injected IO error leaves partially written transactions in the binlog in
+# the form of stray "BEGIN" events.
+# These should disappear from the output if binlog error handling is improved
+# (see MySQL Bug#37148 and WL#1790).
+--replace_regex /\/\* xid=.* \*\//\/* XID *\// /Server ver: .*, Binlog ver: .*/Server ver: #, Binlog ver: #/ /table_id: [0-9]+/table_id: #/
+--replace_column 1 BINLOG 2 POS 5 ENDPOS
+SHOW BINLOG EVENTS;
+
+DROP TABLE t1;
diff --git a/mysql-test/suite/binlog/t/binlog_killed.test b/mysql-test/suite/binlog/t/binlog_killed.test
index ce6d344af32..a2bd7d8095a 100644
--- a/mysql-test/suite/binlog/t/binlog_killed.test
+++ b/mysql-test/suite/binlog/t/binlog_killed.test
@@ -1,5 +1,6 @@
-- source include/have_innodb.inc
-- source include/have_binlog_format_statement.inc
+-- source include/binlog_start_pos.inc
# You cannot use `KILL' with the Embedded MySQL Server library,
# because the embedded server merely runs inside the threads of the host
@@ -51,7 +52,8 @@ reap;
let $rows= `select count(*) from t2 /* must be 2 or 0 */`;
let $MYSQLD_DATADIR= `select @@datadir`;
---exec $MYSQL_BINLOG --force-if-open --start-position=134 $MYSQLD_DATADIR/master-bin.000001 > $MYSQLTEST_VARDIR/tmp/kill_query_calling_sp.binlog
+let $start_pos= `select @binlog_start_pos + 28`;
+--exec $MYSQL_BINLOG --force-if-open --start-position=$start_pos $MYSQLD_DATADIR/master-bin.000001 > $MYSQLTEST_VARDIR/tmp/kill_query_calling_sp.binlog
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
eval select
(@a:=load_file("$MYSQLTEST_VARDIR/tmp/kill_query_calling_sp.binlog"))
diff --git a/mysql-test/suite/binlog/t/binlog_killed_simulate.test b/mysql-test/suite/binlog/t/binlog_killed_simulate.test
index b87d47559fe..c5c14137fbb 100644
--- a/mysql-test/suite/binlog/t/binlog_killed_simulate.test
+++ b/mysql-test/suite/binlog/t/binlog_killed_simulate.test
@@ -1,5 +1,6 @@
-- source include/have_debug.inc
-- source include/have_binlog_format_statement.inc
+-- source include/binlog_start_pos.inc
#
# bug#27571 asynchronous setting mysql_$query()'s local error and
# Query_log_event::error_code
@@ -24,7 +25,7 @@ update t1 set a=2 /* will be "killed" after work has been done */;
# for some constants like the offset of the first real event
# that is different between severs versions.
let $MYSQLD_DATADIR= `select @@datadir`;
---exec $MYSQL_BINLOG --force-if-open --start-position=106 $MYSQLD_DATADIR/master-bin.000001 > $MYSQLTEST_VARDIR/tmp/binlog_killed_bug27571.binlog
+--exec $MYSQL_BINLOG --force-if-open --start-position=$binlog_start_pos $MYSQLD_DATADIR/master-bin.000001 > $MYSQLTEST_VARDIR/tmp/binlog_killed_bug27571.binlog
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
eval select
(@a:=load_file("$MYSQLTEST_VARDIR/tmp/binlog_killed_bug27571.binlog"))
diff --git a/mysql-test/suite/binlog/t/binlog_row_annotate-master.opt b/mysql-test/suite/binlog/t/binlog_row_annotate-master.opt
new file mode 100644
index 00000000000..24d88646b3b
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_row_annotate-master.opt
@@ -0,0 +1 @@
+--timezone=GMT-3 --binlog-do-db=test1 --binlog-do-db=test2 --binlog-do-db=test3
diff --git a/mysql-test/suite/binlog/t/binlog_row_annotate.test b/mysql-test/suite/binlog/t/binlog_row_annotate.test
new file mode 100644
index 00000000000..c5db9ef2148
--- /dev/null
+++ b/mysql-test/suite/binlog/t/binlog_row_annotate.test
@@ -0,0 +1,189 @@
+###############################################################################
+# WL47: Store in binlog text of statements that caused RBR events
+# new event: ANNOTATE_ROWS_EVENT
+# new master option: --binlog-annotate-rows-events
+# new mysqlbinlog option: --skip-annotate-rows-events
+#
+# Intended to test that:
+# *** If the --binlog-annotate-rows-events option is switched on on master
+# then Annotate_rows events:
+# - are generated;
+# - are genrated only once for "multi-table-maps" rbr queries;
+# - are not generated when the corresponding queries are filtered away;
+# - are generated when the corresponding queries are filtered away partialy
+# (e.g. in case of multi-delete).
+# *** Annotate_rows events are printed by mysqlbinlog started without
+# --skip-annotate-rows-events options both in remote and local cases.
+# *** Annotate_rows events are not printed by mysqlbinlog started with
+# --skip-annotate-rows-events options both in remote and local cases.
+###############################################################################
+
+--source include/have_log_bin.inc
+--source include/have_binlog_format_row.inc
+--source include/binlog_start_pos.inc
+
+--disable_query_log
+
+# Fix timestamp to avoid varying results
+SET timestamp=1000000000;
+
+# Delete all existing binary logs
+RESET MASTER;
+
+--disable_warnings
+DROP DATABASE IF EXISTS test1;
+DROP DATABASE IF EXISTS test2;
+DROP DATABASE IF EXISTS test3;
+DROP DATABASE IF EXISTS xtest1;
+DROP DATABASE IF EXISTS xtest2;
+--enable_warnings
+
+CREATE DATABASE test1;
+CREATE TABLE test1.t1(a int);
+
+CREATE DATABASE test2;
+CREATE TABLE test2.t2(a int);
+CREATE VIEW test2.v2 AS SELECT * FROM test2.t2;
+
+CREATE DATABASE test3;
+CREATE TABLE test3.t3(a int);
+
+CREATE DATABASE xtest1;
+CREATE TABLE xtest1.xt1(a int);
+
+CREATE DATABASE xtest2;
+CREATE TABLE xtest2.xt2(a int);
+
+# By default SESSION binlog_annotate_rows_events = OFF
+
+INSERT INTO test1.t1 VALUES (1), (2), (3);
+
+SET SESSION binlog_annotate_rows_events = ON;
+
+INSERT INTO test2.t2 VALUES (1), (2), (3);
+INSERT INTO test3.t3 VALUES (1), (2), (3);
+
+# This query generates two Table maps but the Annotate
+# event should appear only once before the first Table map
+DELETE test1.t1, test2.t2
+ FROM test1.t1 INNER JOIN test2.t2 INNER JOIN test3.t3
+ WHERE test1.t1.a=test2.t2.a AND test2.t2.a=test3.t3.a;
+
+# This event should be filtered out together with Annotate event
+INSERT INTO xtest1.xt1 VALUES (1), (2), (3);
+
+# This event should pass the filter
+INSERT INTO test2.v2 VALUES (1), (2), (3);
+
+# This event should pass the filter only for test2.t2 part
+DELETE xtest1.xt1, test2.t2
+ FROM xtest1.xt1 INNER JOIN test2.t2 INNER JOIN test3.t3
+ WHERE xtest1.xt1.a=test2.t2.a AND test2.t2.a=test3.t3.a;
+
+# These events should be filtered out together with Annotate events
+INSERT INTO xtest1.xt1 VALUES (1), (2), (3);
+INSERT INTO xtest2.xt2 VALUES (1), (2), (3);
+DELETE xtest1.xt1, xtest2.xt2
+ FROM xtest1.xt1 INNER JOIN xtest2.xt2 INNER JOIN test3.t3
+ WHERE xtest1.xt1.a=xtest2.xt2.a AND xtest2.xt2.a=test3.t3.a;
+
+FLUSH LOGS;
+--enable_query_log
+
+--echo #####################################################################################
+--echo # The following Annotate_rows events should appear below:
+--echo # - INSERT INTO test2.t2 VALUES (1), (2), (3)
+--echo # - INSERT INTO test3.t3 VALUES (1), (2), (3)
+--echo # - DELETE test1.t1, test2.t2 FROM <...>
+--echo # - INSERT INTO test2.t2 VALUES (1), (2), (3)
+--echo # - DELETE xtest1.xt1, test2.t2 FROM <...>
+--echo #####################################################################################
+
+let $start_pos= `select @binlog_start_pos`;
+--replace_column 2 # 5 #
+--replace_result $start_pos <start_pos>
+--replace_regex /table_id: [0-9]+/table_id: #/ /\/\* xid=.* \*\//\/* xid= *\//
+--eval show binlog events in 'master-bin.000001' from $start_pos
+
+--echo #
+--echo #####################################################################################
+--echo # mysqlbinlog
+--echo # The following Annotates should appear in this output:
+--echo # - INSERT INTO test2.t2 VALUES (1), (2), (3)
+--echo # - INSERT INTO test3.t3 VALUES (1), (2), (3)
+--echo # - DELETE test1.t1, test2.t2 FROM <...> (with two subsequent Table maps)
+--echo # - INSERT INTO test2.t2 VALUES (1), (2), (3)
+--echo # - DELETE xtest1.xt1, test2.t2 FROM <...> (with one subsequent Table map)
+--echo #####################################################################################
+
+let $MYSQLD_DATADIR= `select @@datadir`;
+--replace_regex /server id [0-9]*/server id #/ /server v [^ ]*/server v #.##.##/ /exec_time=[0-9]*/exec_time=#/ /thread_id=[0-9]*/thread_id=#/ /table id [0-9]*/table id #/ /mapped to number [0-9]*/mapped to number #/ /end_log_pos [0-9]*/end_log_pos #/ /# at [0-9]*/# at #/
+--exec $MYSQL_BINLOG --base64-output=decode-rows -v -v $MYSQLD_DATADIR/master-bin.000001
+
+--echo #
+--echo #####################################################################################
+--echo # mysqlbinlog --database=test1
+--echo # The following Annotate should appear in this output:
+--echo # - DELETE test1.t1, test2.t2 FROM <...>
+--echo #####################################################################################
+
+let $MYSQLD_DATADIR= `select @@datadir`;
+--replace_regex /server id [0-9]*/server id #/ /server v [^ ]*/server v #.##.##/ /exec_time=[0-9]*/exec_time=#/ /thread_id=[0-9]*/thread_id=#/ /table id [0-9]*/table id #/ /mapped to number [0-9]*/mapped to number #/ /end_log_pos [0-9]*/end_log_pos #/ /# at [0-9]*/# at #/
+--exec $MYSQL_BINLOG --base64-output=decode-rows --database=test1 -v -v $MYSQLD_DATADIR/master-bin.000001
+
+--echo #
+--echo #####################################################################################
+--echo # mysqlbinlog --skip-annotate-rows-events
+--echo # No Annotates should appear in this output
+--echo #####################################################################################
+
+let $MYSQLD_DATADIR= `select @@datadir`;
+--replace_regex /server id [0-9]*/server id #/ /server v [^ ]*/server v #.##.##/ /exec_time=[0-9]*/exec_time=#/ /thread_id=[0-9]*/thread_id=#/ /table id [0-9]*/table id #/ /mapped to number [0-9]*/mapped to number #/ /end_log_pos [0-9]*/end_log_pos #/ /# at [0-9]*/# at #/
+--exec $MYSQL_BINLOG --base64-output=decode-rows --skip-annotate-rows-events -v -v $MYSQLD_DATADIR/master-bin.000001
+
+--echo #
+--echo #####################################################################################
+--echo # mysqlbinlog --read-from-remote-server
+--echo # The following Annotates should appear in this output:
+--echo # - INSERT INTO test2.t2 VALUES (1), (2), (3)
+--echo # - INSERT INTO test3.t3 VALUES (1), (2), (3)
+--echo # - DELETE test1.t1, test2.t2 FROM <...> (with two subsequent Table maps)
+--echo # - INSERT INTO test2.t2 VALUES (1), (2), (3)
+--echo # - DELETE xtest1.xt1, test2.t2 FROM <...> (with one subsequent Table map)
+--echo #####################################################################################
+
+let $MYSQLD_DATADIR= `select @@datadir`;
+--replace_regex /server id [0-9]*/server id #/ /server v [^ ]*/server v #.##.##/ /exec_time=[0-9]*/exec_time=#/ /thread_id=[0-9]*/thread_id=#/ /table id [0-9]*/table id #/ /mapped to number [0-9]*/mapped to number #/ /end_log_pos [0-9]*/end_log_pos #/ /# at [0-9]*/# at #/
+--exec $MYSQL_BINLOG --base64-output=decode-rows -v -v --read-from-remote-server --user=root --host=localhost --port=$MASTER_MYPORT master-bin.000001
+
+--echo #
+--echo #####################################################################################
+--echo # mysqlbinlog --read-from-remote-server --database=test1
+--echo # The following Annotate should appear in this output:
+--echo # - DELETE test1.t1, test2.t2 FROM <...>
+--echo #####################################################################################
+
+let $MYSQLD_DATADIR= `select @@datadir`;
+--replace_regex /server id [0-9]*/server id #/ /server v [^ ]*/server v #.##.##/ /exec_time=[0-9]*/exec_time=#/ /thread_id=[0-9]*/thread_id=#/ /table id [0-9]*/table id #/ /mapped to number [0-9]*/mapped to number #/ /end_log_pos [0-9]*/end_log_pos #/ /# at [0-9]*/# at #/
+--exec $MYSQL_BINLOG --base64-output=decode-rows --database=test1 -v -v --read-from-remote-server --user=root --host=localhost --port=$MASTER_MYPORT master-bin.000001
+
+--echo #
+--echo #####################################################################################
+--echo # mysqlbinlog --read-from-remote-server --skip-annotate-rows-events
+--echo # No Annotates should appear in this output
+--echo #####################################################################################
+
+let $MYSQLD_DATADIR= `select @@datadir`;
+--replace_regex /server id [0-9]*/server id #/ /server v [^ ]*/server v #.##.##/ /exec_time=[0-9]*/exec_time=#/ /thread_id=[0-9]*/thread_id=#/ /table id [0-9]*/table id #/ /mapped to number [0-9]*/mapped to number #/ /end_log_pos [0-9]*/end_log_pos #/ /# at [0-9]*/# at #/
+--exec $MYSQL_BINLOG --base64-output=decode-rows --skip-annotate-rows-events -v -v --read-from-remote-server --user=root --host=localhost --port=$MASTER_MYPORT master-bin.000001
+
+# Clean-up
+
+--disable_query_log
+DROP DATABASE test1;
+DROP DATABASE test2;
+DROP DATABASE test3;
+DROP DATABASE xtest1;
+DROP DATABASE xtest2;
+--enable_query_log
+
diff --git a/mysql-test/suite/innodb_plugin/r/binlog_consistent.result b/mysql-test/suite/innodb_plugin/r/binlog_consistent.result
new file mode 100644
index 00000000000..428b828d699
--- /dev/null
+++ b/mysql-test/suite/innodb_plugin/r/binlog_consistent.result
@@ -0,0 +1,99 @@
+RESET MASTER;
+# Connection default
+CREATE TABLE t1 (a INT, b VARCHAR(100), PRIMARY KEY (a,b)) ENGINE=innodb;
+SHOW MASTER STATUS;
+File Position Binlog_Do_DB Binlog_Ignore_DB
+master-bin.000001 375
+SHOW STATUS LIKE 'binlog_snapshot_%';
+Variable_name Value
+binlog_snapshot_file master-bin.000001
+binlog_snapshot_position 375
+BEGIN;
+INSERT INTO t1 VALUES (0, "");
+# Connection con1
+BEGIN;
+INSERT INTO t1 VALUES (1, "");
+# Connection con2
+CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=myisam;
+BEGIN;
+INSERT INTO t1 VALUES (2, "first");
+INSERT INTO t2 VALUES (2);
+INSERT INTO t1 VALUES (2, "second");
+# Connection default
+COMMIT;
+SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+# Connection con3
+BEGIN;
+INSERT INTO t1 VALUES (3, "");
+INSERT INTO t2 VALUES (3);
+# Connection con4
+BEGIN;
+INSERT INTO t1 VALUES (4, "");
+COMMIT;
+# Connection default
+SELECT * FROM t1 ORDER BY a,b;
+a b
+0
+SHOW STATUS LIKE 'binlog_snapshot_%';
+Variable_name Value
+binlog_snapshot_file master-bin.000001
+binlog_snapshot_position 674
+SHOW MASTER STATUS;
+File Position Binlog_Do_DB Binlog_Ignore_DB
+master-bin.000001 861
+SELECT * FROM t2 ORDER BY a;
+a
+2
+3
+# Connection con1
+COMMIT;
+# Connection con2
+COMMIT;
+# Connection con3
+COMMIT;
+FLUSH LOGS;
+# Connection default
+SELECT * FROM t1 ORDER BY a,b;
+a b
+0
+SHOW STATUS LIKE 'binlog_snapshot_%';
+Variable_name Value
+binlog_snapshot_file master-bin.000001
+binlog_snapshot_position 674
+SHOW MASTER STATUS;
+File Position Binlog_Do_DB Binlog_Ignore_DB
+master-bin.000002 240
+COMMIT;
+SHOW STATUS LIKE 'binlog_snapshot_%';
+Variable_name Value
+binlog_snapshot_file master-bin.000002
+binlog_snapshot_position 240
+SHOW MASTER STATUS;
+File Position Binlog_Do_DB Binlog_Ignore_DB
+master-bin.000002 240
+SHOW BINLOG EVENTS;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 4 Format_desc 1 240 Server ver: #, Binlog ver: #
+master-bin.000001 240 Query 1 375 use `test`; CREATE TABLE t1 (a INT, b VARCHAR(100), PRIMARY KEY (a,b)) ENGINE=innodb
+master-bin.000001 375 Query 1 487 use `test`; CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=myisam
+master-bin.000001 487 Query 1 555 BEGIN
+master-bin.000001 555 Query 1 647 use `test`; INSERT INTO t1 VALUES (0, "")
+master-bin.000001 647 Xid 1 674 COMMIT /* XID */
+master-bin.000001 674 Query 1 742 BEGIN
+master-bin.000001 742 Query 1 834 use `test`; INSERT INTO t1 VALUES (4, "")
+master-bin.000001 834 Xid 1 861 COMMIT /* XID */
+master-bin.000001 861 Query 1 929 BEGIN
+master-bin.000001 929 Query 1 1021 use `test`; INSERT INTO t1 VALUES (1, "")
+master-bin.000001 1021 Xid 1 1048 COMMIT /* XID */
+master-bin.000001 1048 Query 1 1116 BEGIN
+master-bin.000001 1116 Query 1 1213 use `test`; INSERT INTO t1 VALUES (2, "first")
+master-bin.000001 1213 Query 1 1301 use `test`; INSERT INTO t2 VALUES (2)
+master-bin.000001 1301 Query 1 1399 use `test`; INSERT INTO t1 VALUES (2, "second")
+master-bin.000001 1399 Xid 1 1426 COMMIT /* XID */
+master-bin.000001 1426 Query 1 1494 BEGIN
+master-bin.000001 1494 Query 1 1586 use `test`; INSERT INTO t1 VALUES (3, "")
+master-bin.000001 1586 Query 1 1674 use `test`; INSERT INTO t2 VALUES (3)
+master-bin.000001 1674 Xid 1 1701 COMMIT /* XID */
+master-bin.000001 1701 Rotate 1 1745 master-bin.000002;pos=4
+DROP TABLE t1,t2;
diff --git a/mysql-test/suite/innodb_plugin/r/group_commit.result b/mysql-test/suite/innodb_plugin/r/group_commit.result
new file mode 100644
index 00000000000..9e80dc6da6e
--- /dev/null
+++ b/mysql-test/suite/innodb_plugin/r/group_commit.result
@@ -0,0 +1,63 @@
+CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) ENGINE=innodb;
+SELECT variable_value INTO @commits FROM information_schema.global_status
+WHERE variable_name = 'binlog_commits';
+SELECT variable_value INTO @group_commits FROM information_schema.global_status
+WHERE variable_name = 'binlog_group_commits';
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group1_running WAIT_FOR group2_queued";
+INSERT INTO t1 VALUES ("con1");
+set DEBUG_SYNC= "now WAIT_FOR group1_running";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con2";
+SET DEBUG_SYNC= "commit_after_release_LOCK_log WAIT_FOR group3_committed";
+SET DEBUG_SYNC= "commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked";
+INSERT INTO t1 VALUES ("con2");
+SET DEBUG_SYNC= "now WAIT_FOR group2_con2";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con3";
+INSERT INTO t1 VALUES ("con3");
+SET DEBUG_SYNC= "now WAIT_FOR group2_con3";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con4";
+INSERT INTO t1 VALUES ("con4");
+SET DEBUG_SYNC= "now WAIT_FOR group2_con4";
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SELECT * FROM t1 ORDER BY a;
+a
+SET DEBUG_SYNC= "now SIGNAL group2_queued";
+SELECT * FROM t1 ORDER BY a;
+a
+con1
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group3_con5";
+SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con5_leader WAIT_FOR con6_queued";
+INSERT INTO t1 VALUES ("con5");
+SET DEBUG_SYNC= "now WAIT_FOR con5_leader";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con6_queued";
+INSERT INTO t1 VALUES ("con6");
+SET DEBUG_SYNC= "now WAIT_FOR group3_con5";
+SELECT * FROM t1 ORDER BY a;
+a
+con1
+SET DEBUG_SYNC= "now SIGNAL group3_committed";
+SET DEBUG_SYNC= "now WAIT_FOR group2_visible";
+SELECT * FROM t1 ORDER BY a;
+a
+con1
+con2
+con3
+con4
+SET DEBUG_SYNC= "now SIGNAL group2_checked";
+SELECT * FROM t1 ORDER BY a;
+a
+con1
+con2
+con3
+con4
+con5
+con6
+SELECT variable_value - @commits FROM information_schema.global_status
+WHERE variable_name = 'binlog_commits';
+variable_value - @commits
+6
+SELECT variable_value - @group_commits FROM information_schema.global_status
+WHERE variable_name = 'binlog_group_commits';
+variable_value - @group_commits
+3
+SET DEBUG_SYNC= 'RESET';
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_plugin/r/group_commit_binlog_pos.result b/mysql-test/suite/innodb_plugin/r/group_commit_binlog_pos.result
new file mode 100644
index 00000000000..bfeb2474edb
--- /dev/null
+++ b/mysql-test/suite/innodb_plugin/r/group_commit_binlog_pos.result
@@ -0,0 +1,35 @@
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
+INSERT INTO t1 VALUES (0);
+SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued";
+SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont EXECUTE 3";
+INSERT INTO t1 VALUES (1);
+SET DEBUG_SYNC= "now WAIT_FOR con1_waiting";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con2_queued";
+INSERT INTO t1 VALUES (2);
+SET DEBUG_SYNC= "now WAIT_FOR con2_queued";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con3_queued";
+INSERT INTO t1 VALUES (3);
+SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
+SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
+SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
+SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
+SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
+SELECT * FROM t1 ORDER BY a;
+a
+0
+1
+2
+SET SESSION debug="+d,crash_dispatch_command_before";
+SELECT 1;
+Got one of the listed errors
+Got one of the listed errors
+Got one of the listed errors
+SELECT * FROM t1 ORDER BY a;
+a
+0
+1
+2
+3
+InnoDB: Last MySQL binlog file position 0 901, file name ./master-bin.000001
+SET DEBUG_SYNC= 'RESET';
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_plugin/r/group_commit_binlog_pos_no_optimize_thread.result b/mysql-test/suite/innodb_plugin/r/group_commit_binlog_pos_no_optimize_thread.result
new file mode 100644
index 00000000000..a1c31cd411f
--- /dev/null
+++ b/mysql-test/suite/innodb_plugin/r/group_commit_binlog_pos_no_optimize_thread.result
@@ -0,0 +1,36 @@
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
+INSERT INTO t1 VALUES (0);
+SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued";
+SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont";
+INSERT INTO t1 VALUES (1);
+SET DEBUG_SYNC= "now WAIT_FOR con1_waiting";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con2_queued";
+SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont";
+INSERT INTO t1 VALUES (2);
+SET DEBUG_SYNC= "now WAIT_FOR con2_queued";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con3_queued";
+SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont";
+INSERT INTO t1 VALUES (3);
+SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
+SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
+SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
+SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
+SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
+SELECT * FROM t1 ORDER BY a;
+a
+0
+1
+2
+SET SESSION debug="+d,crash_dispatch_command_before";
+SELECT 1;
+Got one of the listed errors
+Got one of the listed errors
+SELECT * FROM t1 ORDER BY a;
+a
+0
+1
+2
+3
+InnoDB: Last MySQL binlog file position 0 901, file name ./master-bin.000001
+SET DEBUG_SYNC= 'RESET';
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_plugin/r/group_commit_crash.result b/mysql-test/suite/innodb_plugin/r/group_commit_crash.result
new file mode 100644
index 00000000000..a55d8f29dda
--- /dev/null
+++ b/mysql-test/suite/innodb_plugin/r/group_commit_crash.result
@@ -0,0 +1,125 @@
+CREATE TABLE t1(a CHAR(255),
+b CHAR(255),
+c CHAR(255),
+d CHAR(255),
+id INT AUTO_INCREMENT,
+PRIMARY KEY(id)) ENGINE=InnoDB;
+create table t2 like t1;
+create procedure setcrash(IN i INT)
+begin
+CASE i
+WHEN 1 THEN SET SESSION debug="d,crash_commit_after_prepare";
+WHEN 2 THEN SET SESSION debug="d,crash_commit_after_log";
+WHEN 3 THEN SET SESSION debug="d,crash_commit_before_unlog";
+WHEN 4 THEN SET SESSION debug="d,crash_commit_after";
+WHEN 5 THEN SET SESSION debug="d,crash_commit_before";
+ELSE BEGIN END;
+END CASE;
+end //
+FLUSH TABLES;
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+SET binlog_format= mixed;
+RESET MASTER;
+START TRANSACTION;
+insert into t1 select * from t2;
+call setcrash(5);
+COMMIT;
+Got one of the listed errors
+SELECT * FROM t1 ORDER BY id;
+a b c d id
+SHOW BINLOG EVENTS LIMIT 2,1;
+Log_name Pos Event_type Server_id End_log_pos Info
+delete from t1;
+SET binlog_format= mixed;
+RESET MASTER;
+START TRANSACTION;
+insert into t1 select * from t2;
+call setcrash(4);
+COMMIT;
+Got one of the listed errors
+SELECT * FROM t1 ORDER BY id;
+a b c d id
+a b c d 1
+a b c d 2
+a b c d 3
+a b c d 4
+a b c d 5
+a b c d 6
+a b c d 7
+a b c d 8
+a b c d 9
+a b c d 10
+SHOW BINLOG EVENTS LIMIT 2,1;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Query 1 # use `test`; insert into t1 select * from t2
+delete from t1;
+SET binlog_format= mixed;
+RESET MASTER;
+START TRANSACTION;
+insert into t1 select * from t2;
+call setcrash(3);
+COMMIT;
+Got one of the listed errors
+SELECT * FROM t1 ORDER BY id;
+a b c d id
+a b c d 1
+a b c d 2
+a b c d 3
+a b c d 4
+a b c d 5
+a b c d 6
+a b c d 7
+a b c d 8
+a b c d 9
+a b c d 10
+SHOW BINLOG EVENTS LIMIT 2,1;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Query 1 # use `test`; insert into t1 select * from t2
+delete from t1;
+SET binlog_format= mixed;
+RESET MASTER;
+START TRANSACTION;
+insert into t1 select * from t2;
+call setcrash(2);
+COMMIT;
+Got one of the listed errors
+SELECT * FROM t1 ORDER BY id;
+a b c d id
+a b c d 1
+a b c d 2
+a b c d 3
+a b c d 4
+a b c d 5
+a b c d 6
+a b c d 7
+a b c d 8
+a b c d 9
+a b c d 10
+SHOW BINLOG EVENTS LIMIT 2,1;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Query 1 # use `test`; insert into t1 select * from t2
+delete from t1;
+SET binlog_format= mixed;
+RESET MASTER;
+START TRANSACTION;
+insert into t1 select * from t2;
+call setcrash(1);
+COMMIT;
+Got one of the listed errors
+SELECT * FROM t1 ORDER BY id;
+a b c d id
+SHOW BINLOG EVENTS LIMIT 2,1;
+Log_name Pos Event_type Server_id End_log_pos Info
+delete from t1;
+DROP TABLE t1;
+DROP TABLE t2;
+DROP PROCEDURE setcrash;
diff --git a/mysql-test/suite/innodb_plugin/r/group_commit_crash_no_optimize_thread.result b/mysql-test/suite/innodb_plugin/r/group_commit_crash_no_optimize_thread.result
new file mode 100644
index 00000000000..a55d8f29dda
--- /dev/null
+++ b/mysql-test/suite/innodb_plugin/r/group_commit_crash_no_optimize_thread.result
@@ -0,0 +1,125 @@
+CREATE TABLE t1(a CHAR(255),
+b CHAR(255),
+c CHAR(255),
+d CHAR(255),
+id INT AUTO_INCREMENT,
+PRIMARY KEY(id)) ENGINE=InnoDB;
+create table t2 like t1;
+create procedure setcrash(IN i INT)
+begin
+CASE i
+WHEN 1 THEN SET SESSION debug="d,crash_commit_after_prepare";
+WHEN 2 THEN SET SESSION debug="d,crash_commit_after_log";
+WHEN 3 THEN SET SESSION debug="d,crash_commit_before_unlog";
+WHEN 4 THEN SET SESSION debug="d,crash_commit_after";
+WHEN 5 THEN SET SESSION debug="d,crash_commit_before";
+ELSE BEGIN END;
+END CASE;
+end //
+FLUSH TABLES;
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+SET binlog_format= mixed;
+RESET MASTER;
+START TRANSACTION;
+insert into t1 select * from t2;
+call setcrash(5);
+COMMIT;
+Got one of the listed errors
+SELECT * FROM t1 ORDER BY id;
+a b c d id
+SHOW BINLOG EVENTS LIMIT 2,1;
+Log_name Pos Event_type Server_id End_log_pos Info
+delete from t1;
+SET binlog_format= mixed;
+RESET MASTER;
+START TRANSACTION;
+insert into t1 select * from t2;
+call setcrash(4);
+COMMIT;
+Got one of the listed errors
+SELECT * FROM t1 ORDER BY id;
+a b c d id
+a b c d 1
+a b c d 2
+a b c d 3
+a b c d 4
+a b c d 5
+a b c d 6
+a b c d 7
+a b c d 8
+a b c d 9
+a b c d 10
+SHOW BINLOG EVENTS LIMIT 2,1;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Query 1 # use `test`; insert into t1 select * from t2
+delete from t1;
+SET binlog_format= mixed;
+RESET MASTER;
+START TRANSACTION;
+insert into t1 select * from t2;
+call setcrash(3);
+COMMIT;
+Got one of the listed errors
+SELECT * FROM t1 ORDER BY id;
+a b c d id
+a b c d 1
+a b c d 2
+a b c d 3
+a b c d 4
+a b c d 5
+a b c d 6
+a b c d 7
+a b c d 8
+a b c d 9
+a b c d 10
+SHOW BINLOG EVENTS LIMIT 2,1;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Query 1 # use `test`; insert into t1 select * from t2
+delete from t1;
+SET binlog_format= mixed;
+RESET MASTER;
+START TRANSACTION;
+insert into t1 select * from t2;
+call setcrash(2);
+COMMIT;
+Got one of the listed errors
+SELECT * FROM t1 ORDER BY id;
+a b c d id
+a b c d 1
+a b c d 2
+a b c d 3
+a b c d 4
+a b c d 5
+a b c d 6
+a b c d 7
+a b c d 8
+a b c d 9
+a b c d 10
+SHOW BINLOG EVENTS LIMIT 2,1;
+Log_name Pos Event_type Server_id End_log_pos Info
+master-bin.000001 # Query 1 # use `test`; insert into t1 select * from t2
+delete from t1;
+SET binlog_format= mixed;
+RESET MASTER;
+START TRANSACTION;
+insert into t1 select * from t2;
+call setcrash(1);
+COMMIT;
+Got one of the listed errors
+SELECT * FROM t1 ORDER BY id;
+a b c d id
+SHOW BINLOG EVENTS LIMIT 2,1;
+Log_name Pos Event_type Server_id End_log_pos Info
+delete from t1;
+DROP TABLE t1;
+DROP TABLE t2;
+DROP PROCEDURE setcrash;
diff --git a/mysql-test/suite/innodb_plugin/r/group_commit_no_optimize_thread.result b/mysql-test/suite/innodb_plugin/r/group_commit_no_optimize_thread.result
new file mode 100644
index 00000000000..7c2636fb472
--- /dev/null
+++ b/mysql-test/suite/innodb_plugin/r/group_commit_no_optimize_thread.result
@@ -0,0 +1,63 @@
+CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) ENGINE=innodb;
+SELECT variable_value INTO @commits FROM information_schema.global_status
+WHERE variable_name = 'binlog_commits';
+SELECT variable_value INTO @group_commits FROM information_schema.global_status
+WHERE variable_name = 'binlog_group_commits';
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group1_running WAIT_FOR group2_queued";
+INSERT INTO t1 VALUES ("con1");
+set DEBUG_SYNC= "now WAIT_FOR group1_running";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con2";
+SET DEBUG_SYNC= "commit_after_release_LOCK_log WAIT_FOR group3_committed";
+INSERT INTO t1 VALUES ("con2");
+SET DEBUG_SYNC= "now WAIT_FOR group2_con2";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con3";
+INSERT INTO t1 VALUES ("con3");
+SET DEBUG_SYNC= "now WAIT_FOR group2_con3";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con4";
+SET DEBUG_SYNC= "commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked";
+INSERT INTO t1 VALUES ("con4");
+SET DEBUG_SYNC= "now WAIT_FOR group2_con4";
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SELECT * FROM t1 ORDER BY a;
+a
+SET DEBUG_SYNC= "now SIGNAL group2_queued";
+SELECT * FROM t1 ORDER BY a;
+a
+con1
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group3_con5";
+SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con5_leader WAIT_FOR con6_queued";
+INSERT INTO t1 VALUES ("con5");
+SET DEBUG_SYNC= "now WAIT_FOR con5_leader";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con6_queued";
+INSERT INTO t1 VALUES ("con6");
+SET DEBUG_SYNC= "now WAIT_FOR group3_con5";
+SELECT * FROM t1 ORDER BY a;
+a
+con1
+SET DEBUG_SYNC= "now SIGNAL group3_committed";
+SET DEBUG_SYNC= "now WAIT_FOR group2_visible";
+SELECT * FROM t1 ORDER BY a;
+a
+con1
+con2
+con3
+con4
+SET DEBUG_SYNC= "now SIGNAL group2_checked";
+SELECT * FROM t1 ORDER BY a;
+a
+con1
+con2
+con3
+con4
+con5
+con6
+SELECT variable_value - @commits FROM information_schema.global_status
+WHERE variable_name = 'binlog_commits';
+variable_value - @commits
+6
+SELECT variable_value - @group_commits FROM information_schema.global_status
+WHERE variable_name = 'binlog_group_commits';
+variable_value - @group_commits
+3
+SET DEBUG_SYNC= 'RESET';
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_plugin/t/binlog_consistent.test b/mysql-test/suite/innodb_plugin/t/binlog_consistent.test
new file mode 100644
index 00000000000..1a2c84f3439
--- /dev/null
+++ b/mysql-test/suite/innodb_plugin/t/binlog_consistent.test
@@ -0,0 +1,88 @@
+--source include/have_log_bin.inc
+--source include/have_binlog_format_mixed_or_statement.inc
+--source include/have_innodb_plugin.inc
+
+RESET MASTER;
+
+# Test that we get the correct binlog position from START TRANSACTION WITH
+# CONSISTENT SNAPSHOT even when other transactions are active.
+
+connect(con1,localhost,root,,);
+connect(con2,localhost,root,,);
+connect(con3,localhost,root,,);
+connect(con4,localhost,root,,);
+
+connection default;
+--echo # Connection default
+
+CREATE TABLE t1 (a INT, b VARCHAR(100), PRIMARY KEY (a,b)) ENGINE=innodb;
+SHOW MASTER STATUS;
+SHOW STATUS LIKE 'binlog_snapshot_%';
+BEGIN;
+INSERT INTO t1 VALUES (0, "");
+
+connection con1;
+--echo # Connection con1
+BEGIN;
+INSERT INTO t1 VALUES (1, "");
+
+connection con2;
+--echo # Connection con2
+CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=myisam;
+BEGIN;
+INSERT INTO t1 VALUES (2, "first");
+INSERT INTO t2 VALUES (2);
+INSERT INTO t1 VALUES (2, "second");
+
+connection default;
+--echo # Connection default
+COMMIT;
+
+SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
+START TRANSACTION WITH CONSISTENT SNAPSHOT;
+
+connection con3;
+--echo # Connection con3
+BEGIN;
+INSERT INTO t1 VALUES (3, "");
+INSERT INTO t2 VALUES (3);
+
+connection con4;
+--echo # Connection con4
+BEGIN;
+INSERT INTO t1 VALUES (4, "");
+COMMIT;
+
+connection default;
+--echo # Connection default
+SELECT * FROM t1 ORDER BY a,b;
+SHOW STATUS LIKE 'binlog_snapshot_%';
+SHOW MASTER STATUS;
+SELECT * FROM t2 ORDER BY a;
+
+connection con1;
+--echo # Connection con1
+COMMIT;
+
+connection con2;
+--echo # Connection con2
+COMMIT;
+
+connection con3;
+--echo # Connection con3
+COMMIT;
+FLUSH LOGS;
+
+connection default;
+--echo # Connection default
+SELECT * FROM t1 ORDER BY a,b;
+SHOW STATUS LIKE 'binlog_snapshot_%';
+SHOW MASTER STATUS;
+COMMIT;
+SHOW STATUS LIKE 'binlog_snapshot_%';
+SHOW MASTER STATUS;
+
+--replace_regex /\/\* xid=.* \*\//\/* XID *\// /Server ver: .*, Binlog ver: .*/Server ver: #, Binlog ver: #/ /table_id: [0-9]+/table_id: #/
+SHOW BINLOG EVENTS;
+
+DROP TABLE t1,t2;
diff --git a/mysql-test/suite/innodb_plugin/t/group_commit.test b/mysql-test/suite/innodb_plugin/t/group_commit.test
new file mode 100644
index 00000000000..4c60b7de331
--- /dev/null
+++ b/mysql-test/suite/innodb_plugin/t/group_commit.test
@@ -0,0 +1,115 @@
+--source include/have_debug_sync.inc
+--source include/have_innodb_plugin.inc
+--source include/have_log_bin.inc
+
+# Test some group commit code paths by using debug_sync to do controlled
+# commits of 6 transactions: first 1 alone, then 3 as a group, then 2 as a
+# group.
+#
+# Group 3 is allowed to race as far as possible ahead before group 2 finishes
+# to check some edge case for concurrency control.
+
+CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) ENGINE=innodb;
+
+SELECT variable_value INTO @commits FROM information_schema.global_status
+ WHERE variable_name = 'binlog_commits';
+SELECT variable_value INTO @group_commits FROM information_schema.global_status
+ WHERE variable_name = 'binlog_group_commits';
+
+connect(con1,localhost,root,,);
+connect(con2,localhost,root,,);
+connect(con3,localhost,root,,);
+connect(con4,localhost,root,,);
+connect(con5,localhost,root,,);
+connect(con6,localhost,root,,);
+
+# Start group1 (with one thread) doing commit, waiting for
+# group2 to queue up before finishing.
+
+connection con1;
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group1_running WAIT_FOR group2_queued";
+send INSERT INTO t1 VALUES ("con1");
+
+# Make group2 (with three threads) queue up.
+# Make sure con2 is the group commit leader for group2.
+# Make group2 wait with running commit_ordered() until group3 has committed.
+
+connection con2;
+set DEBUG_SYNC= "now WAIT_FOR group1_running";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con2";
+SET DEBUG_SYNC= "commit_after_release_LOCK_log WAIT_FOR group3_committed";
+SET DEBUG_SYNC= "commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked";
+send INSERT INTO t1 VALUES ("con2");
+connection con3;
+SET DEBUG_SYNC= "now WAIT_FOR group2_con2";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con3";
+send INSERT INTO t1 VALUES ("con3");
+connection con4;
+SET DEBUG_SYNC= "now WAIT_FOR group2_con3";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con4";
+send INSERT INTO t1 VALUES ("con4");
+
+# When group2 is queued, let group1 continue and queue group3.
+
+connection default;
+SET DEBUG_SYNC= "now WAIT_FOR group2_con4";
+
+# At this point, trasaction 1 is still not visible as commit_ordered() has not
+# been called yet.
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SELECT * FROM t1 ORDER BY a;
+
+SET DEBUG_SYNC= "now SIGNAL group2_queued";
+connection con1;
+reap;
+
+# Now transaction 1 is visible.
+connection default;
+SELECT * FROM t1 ORDER BY a;
+
+connection con5;
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group3_con5";
+SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con5_leader WAIT_FOR con6_queued";
+send INSERT INTO t1 VALUES ("con5");
+
+connection con6;
+SET DEBUG_SYNC= "now WAIT_FOR con5_leader";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con6_queued";
+send INSERT INTO t1 VALUES ("con6");
+
+connection default;
+SET DEBUG_SYNC= "now WAIT_FOR group3_con5";
+# Still only transaction 1 visible, as group2 have not yet run commit_ordered().
+SELECT * FROM t1 ORDER BY a;
+SET DEBUG_SYNC= "now SIGNAL group3_committed";
+SET DEBUG_SYNC= "now WAIT_FOR group2_visible";
+# Now transactions 1-4 visible.
+SELECT * FROM t1 ORDER BY a;
+SET DEBUG_SYNC= "now SIGNAL group2_checked";
+
+connection con2;
+reap;
+
+connection con3;
+reap;
+
+connection con4;
+reap;
+
+connection con5;
+reap;
+
+connection con6;
+reap;
+
+connection default;
+# Check all transactions finally visible.
+SELECT * FROM t1 ORDER BY a;
+
+SELECT variable_value - @commits FROM information_schema.global_status
+ WHERE variable_name = 'binlog_commits';
+SELECT variable_value - @group_commits FROM information_schema.global_status
+ WHERE variable_name = 'binlog_group_commits';
+
+SET DEBUG_SYNC= 'RESET';
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_plugin/t/group_commit_binlog_pos-master.opt b/mysql-test/suite/innodb_plugin/t/group_commit_binlog_pos-master.opt
new file mode 100644
index 00000000000..425fda95086
--- /dev/null
+++ b/mysql-test/suite/innodb_plugin/t/group_commit_binlog_pos-master.opt
@@ -0,0 +1 @@
+--skip-stack-trace --skip-core-file
diff --git a/mysql-test/suite/innodb_plugin/t/group_commit_binlog_pos.test b/mysql-test/suite/innodb_plugin/t/group_commit_binlog_pos.test
new file mode 100644
index 00000000000..0e5c60f0e94
--- /dev/null
+++ b/mysql-test/suite/innodb_plugin/t/group_commit_binlog_pos.test
@@ -0,0 +1,89 @@
+--source include/have_debug_sync.inc
+--source include/have_innodb_plugin.inc
+--source include/have_log_bin.inc
+--source include/have_binlog_format_mixed_or_statement.inc
+
+# Need DBUG to crash the server intentionally
+--source include/have_debug.inc
+# Don't test this under valgrind, memory leaks will occur as we crash
+--source include/not_valgrind.inc
+
+# The test case currently uses grep and tail, which may be unavailable on
+# some windows systems. But see MWL#191 for how to remove the need for grep.
+--source include/not_windows.inc
+
+# XtraDB stores the binlog position corresponding to the last commit, and
+# prints it during crash recovery.
+# Test that we get the correct position when we group commit several
+# transactions together.
+
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
+INSERT INTO t1 VALUES (0);
+
+connect(con1,localhost,root,,);
+connect(con2,localhost,root,,);
+connect(con3,localhost,root,,);
+
+# Queue up three commits for group commit.
+
+connection con1;
+SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued";
+SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont EXECUTE 3";
+send INSERT INTO t1 VALUES (1);
+
+connection con2;
+SET DEBUG_SYNC= "now WAIT_FOR con1_waiting";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con2_queued";
+send INSERT INTO t1 VALUES (2);
+
+connection con3;
+SET DEBUG_SYNC= "now WAIT_FOR con2_queued";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con3_queued";
+send INSERT INTO t1 VALUES (3);
+
+connection default;
+SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
+# At this point, no transactions are committed.
+SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
+SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
+# At this point, 1 transaction is committed.
+SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
+SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
+
+# At this point, 2 transactions are committed.
+SELECT * FROM t1 ORDER BY a;
+
+connection con2;
+reap;
+
+# Now crash the server with 1+2 in-memory committed, 3 only prepared.
+connection default;
+system echo wait-group_commit_binlog_pos.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
+SET SESSION debug="+d,crash_dispatch_command_before";
+--error 2006,2013
+SELECT 1;
+
+connection con1;
+--error 2006,2013
+reap;
+connection con3;
+--error 2006,2013
+reap;
+
+system echo restart-group_commit_binlog_pos.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
+
+connection default;
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+# Crash recovery should recover all three transactions.
+SELECT * FROM t1 ORDER BY a;
+
+# Check that the binlog position reported by InnoDB is the correct one
+# for the end of the second transaction (as can be checked with
+# mysqlbinlog).
+let $MYSQLD_DATADIR= `SELECT @@datadir`;
+--exec grep 'InnoDB: Last MySQL binlog file position' $MYSQLD_DATADIR/../../log/mysqld.1.err | tail -1
+
+SET DEBUG_SYNC= 'RESET';
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_plugin/t/group_commit_binlog_pos_no_optimize_thread-master.opt b/mysql-test/suite/innodb_plugin/t/group_commit_binlog_pos_no_optimize_thread-master.opt
new file mode 100644
index 00000000000..18d43988ffd
--- /dev/null
+++ b/mysql-test/suite/innodb_plugin/t/group_commit_binlog_pos_no_optimize_thread-master.opt
@@ -0,0 +1 @@
+--binlog-optimize-thread-scheduling=0 --skip-stack-trace --skip-core-file
diff --git a/mysql-test/suite/innodb_plugin/t/group_commit_binlog_pos_no_optimize_thread.test b/mysql-test/suite/innodb_plugin/t/group_commit_binlog_pos_no_optimize_thread.test
new file mode 100644
index 00000000000..b044fa21ad3
--- /dev/null
+++ b/mysql-test/suite/innodb_plugin/t/group_commit_binlog_pos_no_optimize_thread.test
@@ -0,0 +1,90 @@
+--source include/have_debug_sync.inc
+--source include/have_innodb_plugin.inc
+--source include/have_log_bin.inc
+--source include/have_binlog_format_mixed_or_statement.inc
+
+# Need DBUG to crash the server intentionally
+--source include/have_debug.inc
+# Don't test this under valgrind, memory leaks will occur as we crash
+--source include/not_valgrind.inc
+
+# The test case currently uses grep and tail, which may be unavailable on
+# some windows systems. But see MWL#191 for how to remove the need for grep.
+--source include/not_windows.inc
+
+# XtraDB stores the binlog position corresponding to the last commit, and
+# prints it during crash recovery.
+# Test that we get the correct position when we group commit several
+# transactions together.
+
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
+INSERT INTO t1 VALUES (0);
+
+connect(con1,localhost,root,,);
+connect(con2,localhost,root,,);
+connect(con3,localhost,root,,);
+
+# Queue up three commits for group commit.
+
+connection con1;
+SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con1_waiting WAIT_FOR con3_queued";
+SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont";
+send INSERT INTO t1 VALUES (1);
+
+connection con2;
+SET DEBUG_SYNC= "now WAIT_FOR con1_waiting";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con2_queued";
+SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont";
+send INSERT INTO t1 VALUES (2);
+
+connection con3;
+SET DEBUG_SYNC= "now WAIT_FOR con2_queued";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con3_queued";
+SET DEBUG_SYNC= "commit_loop_entry_commit_ordered SIGNAL con1_loop WAIT_FOR con1_loop_cont";
+send INSERT INTO t1 VALUES (3);
+
+connection default;
+SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
+# At this point, no transactions are committed.
+SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
+SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
+# At this point, 1 transaction is committed.
+SET DEBUG_SYNC= "now SIGNAL con1_loop_cont";
+SET DEBUG_SYNC= "now WAIT_FOR con1_loop";
+
+# At this point, 2 transactions are committed.
+SELECT * FROM t1 ORDER BY a;
+
+connection con1;
+reap;
+connection con2;
+reap;
+
+# Now crash the server with 1+2 in-memory committed, 3 only prepared.
+connection default;
+system echo wait-group_commit_binlog_pos_no_optimize_thread.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
+SET SESSION debug="+d,crash_dispatch_command_before";
+--error 2006,2013
+SELECT 1;
+
+connection con3;
+--error 2006,2013
+reap;
+
+system echo restart-group_commit_binlog_pos_no_optimize_thread.test >> $MYSQLTEST_VARDIR/tmp/mysqld.1.expect;
+
+connection default;
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+# Crash recovery should recover all three transactions.
+SELECT * FROM t1 ORDER BY a;
+
+# Check that the binlog position reported by InnoDB is the correct one
+# for the end of the second transaction (as can be checked with
+# mysqlbinlog).
+let $MYSQLD_DATADIR= `SELECT @@datadir`;
+--exec grep 'InnoDB: Last MySQL binlog file position' $MYSQLD_DATADIR/../../log/mysqld.1.err | tail -1
+
+SET DEBUG_SYNC= 'RESET';
+DROP TABLE t1;
diff --git a/mysql-test/suite/innodb_plugin/t/group_commit_crash-master.opt b/mysql-test/suite/innodb_plugin/t/group_commit_crash-master.opt
new file mode 100644
index 00000000000..425fda95086
--- /dev/null
+++ b/mysql-test/suite/innodb_plugin/t/group_commit_crash-master.opt
@@ -0,0 +1 @@
+--skip-stack-trace --skip-core-file
diff --git a/mysql-test/suite/innodb_plugin/t/group_commit_crash.test b/mysql-test/suite/innodb_plugin/t/group_commit_crash.test
new file mode 100644
index 00000000000..654ca313aab
--- /dev/null
+++ b/mysql-test/suite/innodb_plugin/t/group_commit_crash.test
@@ -0,0 +1,82 @@
+# Testing group commit by crashing a few times.
+# Test adapted from the Facebook patch: lp:mysqlatfacebook
+--source include/not_embedded.inc
+# Don't test this under valgrind, memory leaks will occur
+--source include/not_valgrind.inc
+
+# Binary must be compiled with debug for crash to occur
+--source include/have_debug.inc
+--source include/have_innodb_plugin.inc
+--source include/have_log_bin.inc
+
+let $file_format_check=`SELECT @@innodb_file_format_check`;
+CREATE TABLE t1(a CHAR(255),
+ b CHAR(255),
+ c CHAR(255),
+ d CHAR(255),
+ id INT AUTO_INCREMENT,
+ PRIMARY KEY(id)) ENGINE=InnoDB;
+create table t2 like t1;
+delimiter //;
+create procedure setcrash(IN i INT)
+begin
+ CASE i
+ WHEN 1 THEN SET SESSION debug="d,crash_commit_after_prepare";
+ WHEN 2 THEN SET SESSION debug="d,crash_commit_after_log";
+ WHEN 3 THEN SET SESSION debug="d,crash_commit_before_unlog";
+ WHEN 4 THEN SET SESSION debug="d,crash_commit_after";
+ WHEN 5 THEN SET SESSION debug="d,crash_commit_before";
+ ELSE BEGIN END;
+ END CASE;
+end //
+delimiter ;//
+# Avoid getting a crashed mysql.proc table.
+FLUSH TABLES;
+
+let $numtests = 5;
+
+let $numinserts = 10;
+while ($numinserts)
+{
+ dec $numinserts;
+ INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+}
+
+--enable_reconnect
+
+while ($numtests)
+{
+ SET binlog_format= mixed;
+ RESET MASTER;
+
+ START TRANSACTION;
+ insert into t1 select * from t2;
+ # Write file to make mysql-test-run.pl expect crash
+ --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+
+ eval call setcrash($numtests);
+
+ # Run the crashing query
+ --error 2006,2013
+ COMMIT;
+
+ # Poll the server waiting for it to be back online again.
+ --source include/wait_until_connected_again.inc
+
+ # table and binlog should be in sync.
+ SELECT * FROM t1 ORDER BY id;
+--replace_column 2 # 5 #
+ SHOW BINLOG EVENTS LIMIT 2,1;
+
+ delete from t1;
+
+ dec $numtests;
+}
+
+# final cleanup
+DROP TABLE t1;
+DROP TABLE t2;
+DROP PROCEDURE setcrash;
+--disable_query_log
+eval SET GLOBAL innodb_file_format_check=$file_format_check;
+--enable_query_log
diff --git a/mysql-test/suite/innodb_plugin/t/group_commit_crash_no_optimize_thread-master.opt b/mysql-test/suite/innodb_plugin/t/group_commit_crash_no_optimize_thread-master.opt
new file mode 100644
index 00000000000..18d43988ffd
--- /dev/null
+++ b/mysql-test/suite/innodb_plugin/t/group_commit_crash_no_optimize_thread-master.opt
@@ -0,0 +1 @@
+--binlog-optimize-thread-scheduling=0 --skip-stack-trace --skip-core-file
diff --git a/mysql-test/suite/innodb_plugin/t/group_commit_crash_no_optimize_thread.test b/mysql-test/suite/innodb_plugin/t/group_commit_crash_no_optimize_thread.test
new file mode 100644
index 00000000000..654ca313aab
--- /dev/null
+++ b/mysql-test/suite/innodb_plugin/t/group_commit_crash_no_optimize_thread.test
@@ -0,0 +1,82 @@
+# Testing group commit by crashing a few times.
+# Test adapted from the Facebook patch: lp:mysqlatfacebook
+--source include/not_embedded.inc
+# Don't test this under valgrind, memory leaks will occur
+--source include/not_valgrind.inc
+
+# Binary must be compiled with debug for crash to occur
+--source include/have_debug.inc
+--source include/have_innodb_plugin.inc
+--source include/have_log_bin.inc
+
+let $file_format_check=`SELECT @@innodb_file_format_check`;
+CREATE TABLE t1(a CHAR(255),
+ b CHAR(255),
+ c CHAR(255),
+ d CHAR(255),
+ id INT AUTO_INCREMENT,
+ PRIMARY KEY(id)) ENGINE=InnoDB;
+create table t2 like t1;
+delimiter //;
+create procedure setcrash(IN i INT)
+begin
+ CASE i
+ WHEN 1 THEN SET SESSION debug="d,crash_commit_after_prepare";
+ WHEN 2 THEN SET SESSION debug="d,crash_commit_after_log";
+ WHEN 3 THEN SET SESSION debug="d,crash_commit_before_unlog";
+ WHEN 4 THEN SET SESSION debug="d,crash_commit_after";
+ WHEN 5 THEN SET SESSION debug="d,crash_commit_before";
+ ELSE BEGIN END;
+ END CASE;
+end //
+delimiter ;//
+# Avoid getting a crashed mysql.proc table.
+FLUSH TABLES;
+
+let $numtests = 5;
+
+let $numinserts = 10;
+while ($numinserts)
+{
+ dec $numinserts;
+ INSERT INTO t2(a, b, c, d) VALUES ('a', 'b', 'c', 'd');
+}
+
+--enable_reconnect
+
+while ($numtests)
+{
+ SET binlog_format= mixed;
+ RESET MASTER;
+
+ START TRANSACTION;
+ insert into t1 select * from t2;
+ # Write file to make mysql-test-run.pl expect crash
+ --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
+
+ eval call setcrash($numtests);
+
+ # Run the crashing query
+ --error 2006,2013
+ COMMIT;
+
+ # Poll the server waiting for it to be back online again.
+ --source include/wait_until_connected_again.inc
+
+ # table and binlog should be in sync.
+ SELECT * FROM t1 ORDER BY id;
+--replace_column 2 # 5 #
+ SHOW BINLOG EVENTS LIMIT 2,1;
+
+ delete from t1;
+
+ dec $numtests;
+}
+
+# final cleanup
+DROP TABLE t1;
+DROP TABLE t2;
+DROP PROCEDURE setcrash;
+--disable_query_log
+eval SET GLOBAL innodb_file_format_check=$file_format_check;
+--enable_query_log
diff --git a/mysql-test/suite/innodb_plugin/t/group_commit_no_optimize_thread-master.opt b/mysql-test/suite/innodb_plugin/t/group_commit_no_optimize_thread-master.opt
new file mode 100644
index 00000000000..97d8c106816
--- /dev/null
+++ b/mysql-test/suite/innodb_plugin/t/group_commit_no_optimize_thread-master.opt
@@ -0,0 +1 @@
+--binlog-optimize-thread-scheduling=0
diff --git a/mysql-test/suite/innodb_plugin/t/group_commit_no_optimize_thread.test b/mysql-test/suite/innodb_plugin/t/group_commit_no_optimize_thread.test
new file mode 100644
index 00000000000..ae3f1b516d4
--- /dev/null
+++ b/mysql-test/suite/innodb_plugin/t/group_commit_no_optimize_thread.test
@@ -0,0 +1,115 @@
+--source include/have_debug_sync.inc
+--source include/have_innodb_plugin.inc
+--source include/have_log_bin.inc
+
+# Test some group commit code paths by using debug_sync to do controlled
+# commits of 6 transactions: first 1 alone, then 3 as a group, then 2 as a
+# group.
+#
+# Group 3 is allowed to race as far as possible ahead before group 2 finishes
+# to check some edge case for concurrency control.
+
+CREATE TABLE t1 (a VARCHAR(10) PRIMARY KEY) ENGINE=innodb;
+
+SELECT variable_value INTO @commits FROM information_schema.global_status
+ WHERE variable_name = 'binlog_commits';
+SELECT variable_value INTO @group_commits FROM information_schema.global_status
+ WHERE variable_name = 'binlog_group_commits';
+
+connect(con1,localhost,root,,);
+connect(con2,localhost,root,,);
+connect(con3,localhost,root,,);
+connect(con4,localhost,root,,);
+connect(con5,localhost,root,,);
+connect(con6,localhost,root,,);
+
+# Start group1 (with one thread) doing commit, waiting for
+# group2 to queue up before finishing.
+
+connection con1;
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group1_running WAIT_FOR group2_queued";
+send INSERT INTO t1 VALUES ("con1");
+
+# Make group2 (with three threads) queue up.
+# Make sure con2 is the group commit leader for group2.
+# Make group2 wait with running commit_ordered() until group3 has committed.
+
+connection con2;
+set DEBUG_SYNC= "now WAIT_FOR group1_running";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con2";
+SET DEBUG_SYNC= "commit_after_release_LOCK_log WAIT_FOR group3_committed";
+send INSERT INTO t1 VALUES ("con2");
+connection con3;
+SET DEBUG_SYNC= "now WAIT_FOR group2_con2";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con3";
+send INSERT INTO t1 VALUES ("con3");
+connection con4;
+SET DEBUG_SYNC= "now WAIT_FOR group2_con3";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL group2_con4";
+SET DEBUG_SYNC= "commit_after_group_run_commit_ordered SIGNAL group2_visible WAIT_FOR group2_checked";
+send INSERT INTO t1 VALUES ("con4");
+
+# When group2 is queued, let group1 continue and queue group3.
+
+connection default;
+SET DEBUG_SYNC= "now WAIT_FOR group2_con4";
+
+# At this point, trasaction 1 is still not visible as commit_ordered() has not
+# been called yet.
+SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
+SELECT * FROM t1 ORDER BY a;
+
+SET DEBUG_SYNC= "now SIGNAL group2_queued";
+connection con1;
+reap;
+
+# Now transaction 1 is visible.
+connection default;
+SELECT * FROM t1 ORDER BY a;
+
+connection con5;
+SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL group3_con5";
+SET DEBUG_SYNC= "commit_after_get_LOCK_log SIGNAL con5_leader WAIT_FOR con6_queued";
+send INSERT INTO t1 VALUES ("con5");
+
+connection con6;
+SET DEBUG_SYNC= "now WAIT_FOR con5_leader";
+SET DEBUG_SYNC= "commit_after_prepare_ordered SIGNAL con6_queued";
+send INSERT INTO t1 VALUES ("con6");
+
+connection default;
+SET DEBUG_SYNC= "now WAIT_FOR group3_con5";
+# Still only transaction 1 visible, as group2 have not yet run commit_ordered().
+SELECT * FROM t1 ORDER BY a;
+SET DEBUG_SYNC= "now SIGNAL group3_committed";
+SET DEBUG_SYNC= "now WAIT_FOR group2_visible";
+# Now transactions 1-4 visible.
+SELECT * FROM t1 ORDER BY a;
+SET DEBUG_SYNC= "now SIGNAL group2_checked";
+
+connection con2;
+reap;
+
+connection con3;
+reap;
+
+connection con4;
+reap;
+
+connection con5;
+reap;
+
+connection con6;
+reap;
+
+connection default;
+# Check all transactions finally visible.
+SELECT * FROM t1 ORDER BY a;
+
+SELECT variable_value - @commits FROM information_schema.global_status
+ WHERE variable_name = 'binlog_commits';
+SELECT variable_value - @group_commits FROM information_schema.global_status
+ WHERE variable_name = 'binlog_group_commits';
+
+SET DEBUG_SYNC= 'RESET';
+DROP TABLE t1;
diff --git a/mysql-test/suite/maria/r/maria-connect.result b/mysql-test/suite/maria/r/maria-connect.result
index a02c29f3d5f..45675deb098 100644
--- a/mysql-test/suite/maria/r/maria-connect.result
+++ b/mysql-test/suite/maria/r/maria-connect.result
@@ -14,10 +14,10 @@ a
2
3
4
-SHOW BINLOG EVENTS FROM 106;
+SHOW BINLOG EVENTS FROM <start_pos>;
Log_name Pos Event_type Server_id End_log_pos Info
-master-bin.000001 106 Query 1 204 use `test`; CREATE TABLE t1 (a int primary key)
-master-bin.000001 204 Query 1 295 use `test`; insert t1 values (1),(2),(3)
-master-bin.000001 295 Query 1 386 use `test`; insert t1 values (4),(2),(5)
+master-bin.000001 # Query 1 # use `test`; CREATE TABLE t1 (a int primary key)
+master-bin.000001 # Query 1 # use `test`; insert t1 values (1),(2),(3)
+master-bin.000001 # Query 1 # use `test`; insert t1 values (4),(2),(5)
drop table t1;
set binlog_format=default;
diff --git a/mysql-test/suite/maria/t/maria-connect.test b/mysql-test/suite/maria/t/maria-connect.test
index 9efb5844bc2..a1e9bbce4f2 100644
--- a/mysql-test/suite/maria/t/maria-connect.test
+++ b/mysql-test/suite/maria/t/maria-connect.test
@@ -4,6 +4,9 @@
-- source include/have_maria.inc
-- source include/have_log_bin.inc
+-- source include/binlog_start_pos.inc
+
+let $start_pos= `select @binlog_start_pos`;
let $default=`select @@global.storage_engine`;
set global storage_engine=aria;
@@ -27,7 +30,9 @@ insert t1 values (1),(2),(3);
--error ER_DUP_ENTRY
insert t1 values (4),(2),(5);
select * from t1;
-SHOW BINLOG EVENTS FROM 106;
+--replace_result $start_pos <start_pos>
+--replace_column 2 # 5 #
+eval SHOW BINLOG EVENTS FROM $start_pos;
drop table t1;
set binlog_format=default;
diff --git a/mysql-test/suite/pbxt/r/pbxt_xa_binlog.result b/mysql-test/suite/pbxt/r/pbxt_xa_binlog.result
new file mode 100644
index 00000000000..abfeebb5b96
--- /dev/null
+++ b/mysql-test/suite/pbxt/r/pbxt_xa_binlog.result
@@ -0,0 +1,32 @@
+drop table if exists t1, t2;
+SET binlog_format = 'mixed';
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
+CREATE TABLE t2 (b INT PRIMARY KEY) ENGINE=pbxt;
+BEGIN;
+SELECT @@log_bin;
+@@log_bin
+1
+INSERT INTO t1 VALUES (1);
+INSERT INTO t2 VALUES (2);
+COMMIT;
+select * from t1;
+a
+1
+select * from t2;
+b
+2
+SET sql_log_bin = 0;
+BEGIN;
+INSERT INTO t1 VALUES (3);
+INSERT INTO t2 VALUES (4);
+COMMIT;
+select * from t1 order by a;
+a
+1
+3
+select * from t2 order by b;
+b
+2
+4
+drop table t1, t2;
+drop database pbxt;
diff --git a/mysql-test/suite/pbxt/t/pbxt_xa_binlog.test b/mysql-test/suite/pbxt/t/pbxt_xa_binlog.test
new file mode 100644
index 00000000000..4a4578a5595
--- /dev/null
+++ b/mysql-test/suite/pbxt/t/pbxt_xa_binlog.test
@@ -0,0 +1,32 @@
+--source include/have_innodb.inc
+--source include/have_log_bin.inc
+
+--disable_warnings
+drop table if exists t1, t2;
+--enable_warnings
+
+SET binlog_format = 'mixed';
+
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=innodb;
+CREATE TABLE t2 (b INT PRIMARY KEY) ENGINE=pbxt;
+BEGIN;
+# verify that binlog is on
+SELECT @@log_bin;
+INSERT INTO t1 VALUES (1);
+INSERT INTO t2 VALUES (2);
+COMMIT;
+select * from t1;
+select * from t2;
+
+# Test 2-phase commit when we disable binlogging.
+SET sql_log_bin = 0;
+BEGIN;
+INSERT INTO t1 VALUES (3);
+INSERT INTO t2 VALUES (4);
+COMMIT;
+select * from t1 order by a;
+select * from t2 order by b;
+
+drop table t1, t2;
+drop database pbxt;
+
diff --git a/mysql-test/suite/rpl/r/rpl_deadlock_innodb.result b/mysql-test/suite/rpl/r/rpl_deadlock_innodb.result
index c399b408d5c..37d209bbcf6 100644
--- a/mysql-test/suite/rpl/r/rpl_deadlock_innodb.result
+++ b/mysql-test/suite/rpl/r/rpl_deadlock_innodb.result
@@ -51,7 +51,7 @@ include/check_slave_is_running.inc
*** Test lock wait timeout ***
include/stop_slave.inc
DELETE FROM t2;
-CHANGE MASTER TO MASTER_LOG_POS=MASTER_POS_BEGIN;
+CHANGE MASTER TO MASTER_LOG_POS=<master_pos_begin>;
BEGIN;
SELECT * FROM t1 FOR UPDATE;
a
@@ -78,7 +78,7 @@ SET @my_max_relay_log_size= @@global.max_relay_log_size;
SET global max_relay_log_size=0;
include/stop_slave.inc
DELETE FROM t2;
-CHANGE MASTER TO MASTER_LOG_POS=MASTER_POS_BEGIN;
+CHANGE MASTER TO MASTER_LOG_POS=<master_pos_begin>;
BEGIN;
SELECT * FROM t1 FOR UPDATE;
a
diff --git a/mysql-test/suite/rpl/r/rpl_row_annotate_do.result b/mysql-test/suite/rpl/r/rpl_row_annotate_do.result
new file mode 100644
index 00000000000..0ece93e7aa5
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_row_annotate_do.result
@@ -0,0 +1,141 @@
+include/master-slave.inc
+[connection master]
+########################################################################
+# TABLES ON MASTER
+########################################################################
+SELECT * FROM t1 ORDER BY a;
+a b
+0 1
+SELECT * FROM t2 ORDER BY a;
+a b
+SELECT * FROM t3 ORDER BY a;
+a b
+1 1
+2 2
+3 3
+SELECT * FROM t5 ORDER BY a;
+a b
+1 foo
+2 bar
+3 baz
+4 gås
+5 gås
+########################################################################
+# TABLES ON SLAVE: should be the same as on master
+########################################################################
+SELECT * FROM t1 ORDER BY a;
+a b
+0 1
+SELECT * FROM t2 ORDER BY a;
+a b
+SELECT * FROM t3 ORDER BY a;
+a b
+1 1
+2 2
+3 3
+SELECT * FROM t5 ORDER BY a;
+a b
+1 foo
+2 bar
+3 baz
+4 gås
+5 gås
+########################################################################
+# EVENTS ON SLAVE
+# The following Annotate_rows events should appear below:
+# - UPDATE t1 SET b = b + 1;
+# - REPLACE t1 VALUES (1,1), (2,2), (3,3);
+# - INSERT INTO t2 VALUES (1,1), (2,2), (3,3)
+# - INSERT INTO t3 VALUES (1,1), (2,2), (3,3)
+# - DELETE t1, t2 FROM <...>
+# - INSERT INTO t2 VALUES (1,1), (2,2), (3,3)
+# - DELETE xt1, t2 FROM <...>
+# - INSERT INTO t5(b) VALUES <...> (3 instances)
+########################################################################
+FLUSH LOGS;
+show binlog events in 'slave-bin.000001' from <start_pos>;
+Log_name Pos Event_type Server_id End_log_pos Info
+slave-bin.000001 # Query 1 # DROP DATABASE IF EXISTS test1
+slave-bin.000001 # Query 1 # CREATE DATABASE test1
+slave-bin.000001 # Query 1 # use `test1`; CREATE TABLE t1(a int primary key, b int)
+slave-bin.000001 # Query 1 # use `test1`; CREATE TABLE t2(a int, b int)
+slave-bin.000001 # Query 1 # use `test1`; CREATE TABLE t3(a int, b int)
+slave-bin.000001 # Query 1 # use `test1`; CREATE TABLE t4(a int, b int)
+slave-bin.000001 # Query 1 # use `test1`; CREATE TABLE t5 (
+a INT PRIMARY KEY AUTO_INCREMENT,
+b VARCHAR(10) CHARACTER SET utf8 COLLATE utf8_bin
+)
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t1)
+slave-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Annotate_rows 1 # UPDATE t1 SET b = b + 1
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t1)
+slave-bin.000001 # Update_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Annotate_rows 1 # REPLACE t1 VALUES (1,1), (2,2), (3,3)
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t1)
+slave-bin.000001 # Update_rows 1 # table_id: #
+slave-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Annotate_rows 1 # INSERT INTO t2 VALUES (1,1), (2,2), (3,3)
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t2)
+slave-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Annotate_rows 1 # INSERT INTO t3 VALUES (1,1), (2,2), (3,3)
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t3)
+slave-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Annotate_rows 1 # DELETE t1, t2 FROM t1 INNER JOIN t2 INNER JOIN t3 WHERE t1.a=t2.a AND t2.a=t3.a
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t2)
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t1)
+slave-bin.000001 # Delete_rows 1 # table_id: #
+slave-bin.000001 # Delete_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Annotate_rows 1 # INSERT INTO t2 VALUES (1,1), (2,2), (3,3)
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t2)
+slave-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Annotate_rows 1 # DELETE xt1, t2 FROM xt1 INNER JOIN t2 INNER JOIN t3 WHERE xt1.a=t2.a AND t2.a=t3.a
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t2)
+slave-bin.000001 # Delete_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Annotate_rows 1 # INSERT INTO t5(b) VALUES ('foo'), ('bar'), ('baz')
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t5)
+slave-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Annotate_rows 1 # INSERT INTO t5(b) VALUES ('gås')
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t5)
+slave-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Annotate_rows 1 # INSERT INTO t5(b) VALUES ('gås')
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t5)
+slave-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Rotate 2 # slave-bin.000002;pos=4
+#
+########################################################################
+# INSERTs DELAYED ON MASTERs
+########################################################################
+SET SESSION binlog_annotate_rows_events = ON;
+INSERT DELAYED INTO test1.t4 VALUES (1,1);
+FLUSH TABLES;
+SELECT * FROM test1.t4 ORDER BY a;
+a b
+1 1
+########################################################################
+# ON SLAVE
+# No Annotate_rows events should appear below
+########################################################################
+FLUSH LOGS;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_row_annotate_dont.result b/mysql-test/suite/rpl/r/rpl_row_annotate_dont.result
new file mode 100644
index 00000000000..8463256d5db
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_row_annotate_dont.result
@@ -0,0 +1,123 @@
+include/master-slave.inc
+[connection master]
+########################################################################
+# TABLES ON MASTER
+########################################################################
+SELECT * FROM t1 ORDER BY a;
+a b
+0 1
+SELECT * FROM t2 ORDER BY a;
+a b
+SELECT * FROM t3 ORDER BY a;
+a b
+1 1
+2 2
+3 3
+SELECT * FROM t5 ORDER BY a;
+a b
+1 foo
+2 bar
+3 baz
+4 gås
+5 gås
+########################################################################
+# TABLES ON SLAVE: should be the same as on master
+########################################################################
+SELECT * FROM t1 ORDER BY a;
+a b
+0 1
+SELECT * FROM t2 ORDER BY a;
+a b
+SELECT * FROM t3 ORDER BY a;
+a b
+1 1
+2 2
+3 3
+SELECT * FROM t5 ORDER BY a;
+a b
+1 foo
+2 bar
+3 baz
+4 gås
+5 gås
+########################################################################
+# EVENTS ON SLAVE
+# No Annotate_rows events should appear below
+########################################################################
+FLUSH LOGS;
+show binlog events in 'slave-bin.000001' from <start_pos>;
+Log_name Pos Event_type Server_id End_log_pos Info
+slave-bin.000001 # Query 1 # DROP DATABASE IF EXISTS test1
+slave-bin.000001 # Query 1 # CREATE DATABASE test1
+slave-bin.000001 # Query 1 # use `test1`; CREATE TABLE t1(a int primary key, b int)
+slave-bin.000001 # Query 1 # use `test1`; CREATE TABLE t2(a int, b int)
+slave-bin.000001 # Query 1 # use `test1`; CREATE TABLE t3(a int, b int)
+slave-bin.000001 # Query 1 # use `test1`; CREATE TABLE t4(a int, b int)
+slave-bin.000001 # Query 1 # use `test1`; CREATE TABLE t5 (
+a INT PRIMARY KEY AUTO_INCREMENT,
+b VARCHAR(10) CHARACTER SET utf8 COLLATE utf8_bin
+)
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t1)
+slave-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t1)
+slave-bin.000001 # Update_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t1)
+slave-bin.000001 # Update_rows 1 # table_id: #
+slave-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t2)
+slave-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t3)
+slave-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t2)
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t1)
+slave-bin.000001 # Delete_rows 1 # table_id: #
+slave-bin.000001 # Delete_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t2)
+slave-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t2)
+slave-bin.000001 # Delete_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t5)
+slave-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t5)
+slave-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Query 1 # BEGIN
+slave-bin.000001 # Table_map 1 # table_id: # (test1.t5)
+slave-bin.000001 # Write_rows 1 # table_id: # flags: STMT_END_F
+slave-bin.000001 # Query 1 # COMMIT
+slave-bin.000001 # Rotate 2 # slave-bin.000002;pos=4
+#
+########################################################################
+# INSERTs DELAYED ON MASTERs
+########################################################################
+SET SESSION binlog_annotate_rows_events = ON;
+INSERT DELAYED INTO test1.t4 VALUES (1,1);
+FLUSH TABLES;
+SELECT * FROM test1.t4 ORDER BY a;
+a b
+1 1
+########################################################################
+# ON SLAVE
+# No Annotate_rows events should appear below
+########################################################################
+FLUSH LOGS;
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/r/rpl_row_conflicts.result b/mysql-test/suite/rpl/r/rpl_row_conflicts.result
index 795fd7d080d..7ad0c06448e 100644
--- a/mysql-test/suite/rpl/r/rpl_row_conflicts.result
+++ b/mysql-test/suite/rpl/r/rpl_row_conflicts.result
@@ -22,7 +22,7 @@ a
[on slave]
---- Wait until slave stops with an error ----
include/wait_for_slave_sql_error.inc [errno=1062]
-Last_SQL_Error = Could not execute Write_rows event on table test.t1; Duplicate entry '1' for key 'PRIMARY', Error_code: 1062; handler error HA_ERR_FOUND_DUPP_KEY; the event's master log master-bin.000001, end_log_pos 346 (expected "duplicate key" error)
+Last_SQL_Error = Could not execute Write_rows event on table test.t1; Duplicate entry '1' for key 'PRIMARY', Error_code: 1062; handler error HA_ERR_FOUND_DUPP_KEY; the event's master log master-bin.000001, end_log_pos 480 (expected "duplicate key" error)
SELECT * FROM t1;
a
1
diff --git a/mysql-test/suite/rpl/r/rpl_row_index_choice.result b/mysql-test/suite/rpl/r/rpl_row_index_choice.result
new file mode 100644
index 00000000000..2d955299e6e
--- /dev/null
+++ b/mysql-test/suite/rpl/r/rpl_row_index_choice.result
@@ -0,0 +1,140 @@
+include/master-slave.inc
+[connection master]
+CREATE TABLE t1 (a int, b varchar(100), fulltext(b)) engine=MyISAM;
+INSERT INTO t1 VALUES (1,"a"), (2,"b");
+UPDATE t1 SET b='A' WHERE a=1;
+DELETE FROM t1 WHERE a=2;
+SELECT * FROM t1 ORDER BY a;
+a b
+1 A
+DROP TABLE t1;
+CREATE TABLE t1 (d INT PRIMARY KEY) ENGINE=myisam;
+INSERT INTO t1 VALUES (0);
+INSERT INTO t1 SELECT d+1 FROM t1;
+INSERT INTO t1 SELECT d+2 FROM t1;
+INSERT INTO t1 SELECT d+4 FROM t1;
+INSERT INTO t1 SELECT d+8 FROM t1;
+INSERT INTO t1 SELECT d+16 FROM t1;
+INSERT INTO t1 SELECT d+32 FROM t1;
+INSERT INTO t1 SELECT d+64 FROM t1;
+INSERT INTO t1 SELECT d+128 FROM t1;
+INSERT INTO t1 SELECT d+256 FROM t1;
+INSERT INTO t1 SELECT d+512 FROM t1;
+CREATE TABLE t2 (a INT, b INT, c INT, d INT,
+KEY wrong_key(a),
+KEY expected_key(b,c),
+KEY wrong_key2(c)) ENGINE=myisam;
+INSERT INTO t2 SELECT d DIV 10, d MOD 41, d MOD 37, d FROM t1;
+ANALYZE TABLE t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status OK
+# Slave will crash if using the wrong or no index
+SET GLOBAL debug="+d,slave_crash_if_wrong_index,slave_crash_if_table_scan";
+UPDATE t2 SET d=10042 WHERE d=42;
+DELETE FROM t2 WHERE d=53;
+SET GLOBAL debug="";
+SELECT * FROM t2 WHERE d IN (10042,53);
+a b c d
+4 1 5 10042
+DROP TABLE t2;
+CREATE TABLE t2 (a INT, b INT, c INT, d INT NOT NULL, e INT,
+UNIQUE wrong_key3(a,e),
+KEY wrong_key4(b,c),
+UNIQUE expected_key(d)) ENGINE=myisam;
+INSERT INTO t2 SELECT d DIV 10, d MOD 41, d MOD 37, d, NULL FROM t1;
+ANALYZE TABLE t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status OK
+# Slave will crash if using the wrong or no index
+SET GLOBAL debug="+d,slave_crash_if_wrong_index,slave_crash_if_table_scan";
+UPDATE t2 SET d=10042 WHERE d=42;
+DELETE FROM t2 WHERE d=53;
+SET GLOBAL debug="";
+SELECT * FROM t2 WHERE d IN (10042,53);
+a b c d e
+4 1 5 10042 NULL
+DROP TABLE t2;
+CREATE TABLE t2 (a INT NOT NULL, b INT NOT NULL, c INT NOT NULL, d INT NOT NULL,
+KEY wrong_key5(b),
+UNIQUE expected_key(d),
+KEY wrong_key6(c)) ENGINE=myisam;
+INSERT INTO t2 SELECT d DIV 10, d MOD 41, d MOD 37, d FROM t1;
+# Slave will crash if using the wrong or no index
+SET GLOBAL debug="+d,slave_crash_if_wrong_index,slave_crash_if_table_scan";
+UPDATE t2 SET d=10042 WHERE d=42;
+DELETE FROM t2 WHERE d=53;
+SET GLOBAL debug="";
+SELECT * FROM t2 WHERE d IN (10042,53);
+a b c d
+4 1 5 10042
+DROP TABLE t2;
+CREATE TABLE t2 (a INT NOT NULL, b INT NOT NULL, c INT NOT NULL, d INT NOT NULL,
+KEY expected_key(b),
+KEY wrong_key7(d),
+KEY wrong_key8(c)) ENGINE=myisam;
+INSERT INTO t2 SELECT d DIV 10, d MOD 41, d MOD 37, d FROM t1;
+# Slave will crash if using the wrong or no index
+SET GLOBAL debug="+d,slave_crash_if_wrong_index,slave_crash_if_table_scan";
+UPDATE t2 SET d=10042 WHERE d=42;
+DELETE FROM t2 WHERE d=53;
+SET GLOBAL debug="";
+SELECT * FROM t2 WHERE d IN (10042,53);
+a b c d
+4 1 5 10042
+DROP TABLE t2;
+CREATE TABLE t2 (a INT NOT NULL, b INT NOT NULL, c INT NOT NULL, d INT,
+UNIQUE wrong_key9(d),
+KEY wrong_key10(a),
+PRIMARY KEY expected_key(c,b)) ENGINE=innodb;
+INSERT INTO t2 SELECT d DIV 10, d MOD 41, d MOD 37, d FROM t1;
+# Slave will crash if using the wrong or no index
+SET GLOBAL debug="+d,slave_crash_if_wrong_index,slave_crash_if_table_scan,slave_crash_if_index_scan";
+UPDATE t2 SET d=10042 WHERE d=42;
+DELETE FROM t2 WHERE d=53;
+SET GLOBAL debug="";
+SELECT * FROM t2 WHERE d IN (10042,53);
+a b c d
+4 1 5 10042
+DROP TABLE t2;
+CREATE TABLE t2 (a INT NOT NULL, b INT NOT NULL, c INT NOT NULL, d INT, e INT,
+UNIQUE wrong_key11(e),
+KEY wrong_key12(a),
+KEY expected_key(c,b)) ENGINE=innodb;
+INSERT INTO t2 SELECT d DIV 10, d MOD 41, d MOD 37, d, IF(d<10, d, NULL) FROM t1;
+ANALYZE TABLE t2;
+Table Op Msg_type Msg_text
+test.t2 analyze status OK
+# Slave will crash if using the wrong or no index
+SET GLOBAL debug="+d,slave_crash_if_wrong_index,slave_crash_if_table_scan";
+UPDATE t2 SET d=10042 WHERE d=42;
+DELETE FROM t2 WHERE d=53;
+SET GLOBAL debug="";
+SELECT * FROM t2 WHERE d IN (10042,53);
+a b c d e
+4 1 5 10042 NULL
+DROP TABLE t2;
+CREATE TABLE t2 (a INT NOT NULL, b INT NOT NULL, c INT NOT NULL, d INT, e INT,
+KEY wrong_key13(a),
+UNIQUE expected_key(e),
+KEY wrong_key14(c,b)) ENGINE=innodb;
+INSERT INTO t2 SELECT d DIV 10, d MOD 41, d MOD 37, d, IF(d<10, d, NULL) FROM t1;
+# Slave will crash if using the wrong or no index
+SET GLOBAL debug="+d,slave_crash_if_wrong_index,slave_crash_if_table_scan";
+UPDATE t2 SET d=10042 WHERE d=42;
+DELETE FROM t2 WHERE d=53;
+SET GLOBAL debug="";
+SELECT * FROM t2 WHERE d IN (10042,53);
+a b c d e
+4 1 5 10042 NULL
+DROP TABLE t2;
+CREATE TABLE t2 (a INT NOT NULL, d INT) ENGINE=innodb;
+INSERT INTO t2 SELECT d DIV 10, d FROM t1;
+UPDATE t2 SET d=10042 WHERE d=42;
+DELETE FROM t2 WHERE d=53;
+SELECT * FROM t2 WHERE d IN (10042,53);
+a d
+4 10042
+DROP TABLE t2;
+DROP TABLE t1;
+SET GLOBAL debug="";
+include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_row_annotate_do-slave.opt b/mysql-test/suite/rpl/t/rpl_row_annotate_do-slave.opt
new file mode 100644
index 00000000000..aa3af621897
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_row_annotate_do-slave.opt
@@ -0,0 +1 @@
+--log-slave-updates --replicate-annotate-rows-events --replicate-ignore-table=test1.xt1 --replicate-ignore-table=test1.xt2 \ No newline at end of file
diff --git a/mysql-test/suite/rpl/t/rpl_row_annotate_do.test b/mysql-test/suite/rpl/t/rpl_row_annotate_do.test
new file mode 100644
index 00000000000..b61ce0ab6d8
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_row_annotate_do.test
@@ -0,0 +1,16 @@
+###############################################################################
+# WL47: Store in binlog text of statements that caused RBR events
+# Wrapper for extra/rpl/rpl_row_annotate.test.
+# Intended to test that if the --replicate-annotate-rows-events option
+# is switched on on slave then Annotate_events:
+# - are reproduced on slave
+# - are reproduced only once for "multi-table-maps" rbr queries
+# - are not reproduced when the corresponding queries are filtered away
+# on replication
+# - are reproduced when the corresponding queries are filtered away partialy
+# (e.g. in case of multi-delete)
+# - are not generated on slave for queries that are not annotated on master.
+###############################################################################
+
+--source include/have_binlog_format_row.inc
+--source extra/rpl_tests/rpl_row_annotate.test
diff --git a/mysql-test/suite/rpl/t/rpl_row_annotate_dont-slave.opt b/mysql-test/suite/rpl/t/rpl_row_annotate_dont-slave.opt
new file mode 100644
index 00000000000..74ac3bfefcb
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_row_annotate_dont-slave.opt
@@ -0,0 +1 @@
+--log-slave-updates --replicate-ignore-table=test1.xt1 --replicate-ignore-table=test1.xt2 \ No newline at end of file
diff --git a/mysql-test/suite/rpl/t/rpl_row_annotate_dont.test b/mysql-test/suite/rpl/t/rpl_row_annotate_dont.test
new file mode 100644
index 00000000000..56765c591aa
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_row_annotate_dont.test
@@ -0,0 +1,9 @@
+###############################################################################
+# WL47: Store in binlog text of statements that caused RBR events
+# Wrapper for extra/rpl/rpl_row_annotate.test.
+# Intended to test that if the --replicate-annotate-rows-events option
+# is switched off on slave then Annotate_events are not reproduced.
+###############################################################################
+
+--source include/have_binlog_format_row.inc
+--source extra/rpl_tests/rpl_row_annotate.test
diff --git a/mysql-test/suite/rpl/t/rpl_row_flsh_tbls.test b/mysql-test/suite/rpl/t/rpl_row_flsh_tbls.test
index f506d0c54f5..2429dbc1142 100644
--- a/mysql-test/suite/rpl/t/rpl_row_flsh_tbls.test
+++ b/mysql-test/suite/rpl/t/rpl_row_flsh_tbls.test
@@ -1,7 +1,8 @@
# depends on the binlog output
-- source include/have_binlog_format_row.inc
+--source include/binlog_start_pos.inc
-let $rename_event_pos= 925;
+let $rename_event_pos= `select @binlog_start_pos + 819`;
# Bug#18326: Do not lock table for writing during prepare of statement
# The use of the ps protocol causes extra table maps in the binlog, so
diff --git a/mysql-test/suite/rpl/t/rpl_row_index_choice.test b/mysql-test/suite/rpl/t/rpl_row_index_choice.test
new file mode 100644
index 00000000000..d393c65438a
--- /dev/null
+++ b/mysql-test/suite/rpl/t/rpl_row_index_choice.test
@@ -0,0 +1,243 @@
+--source include/have_binlog_format_row.inc
+--source include/master-slave.inc
+--source include/have_debug.inc
+--source include/have_innodb.inc
+
+# Bug#58997: Row-based replication breaks on table with only fulltext index:
+connection master;
+CREATE TABLE t1 (a int, b varchar(100), fulltext(b)) engine=MyISAM;
+INSERT INTO t1 VALUES (1,"a"), (2,"b");
+UPDATE t1 SET b='A' WHERE a=1;
+DELETE FROM t1 WHERE a=2;
+
+sync_slave_with_master;
+
+connection slave;
+
+SELECT * FROM t1 ORDER BY a;
+
+connection master;
+DROP TABLE t1;
+
+
+# A utility table used to populate subsequent tables in various ways.
+connection master;
+CREATE TABLE t1 (d INT PRIMARY KEY) ENGINE=myisam;
+INSERT INTO t1 VALUES (0);
+INSERT INTO t1 SELECT d+1 FROM t1;
+INSERT INTO t1 SELECT d+2 FROM t1;
+INSERT INTO t1 SELECT d+4 FROM t1;
+INSERT INTO t1 SELECT d+8 FROM t1;
+INSERT INTO t1 SELECT d+16 FROM t1;
+INSERT INTO t1 SELECT d+32 FROM t1;
+INSERT INTO t1 SELECT d+64 FROM t1;
+INSERT INTO t1 SELECT d+128 FROM t1;
+INSERT INTO t1 SELECT d+256 FROM t1;
+INSERT INTO t1 SELECT d+512 FROM t1;
+
+# Test that we pick the better multi-column index, even if the
+# single-column index is more selective in the first column.
+CREATE TABLE t2 (a INT, b INT, c INT, d INT,
+ KEY wrong_key(a),
+ KEY expected_key(b,c),
+ KEY wrong_key2(c)) ENGINE=myisam;
+INSERT INTO t2 SELECT d DIV 10, d MOD 41, d MOD 37, d FROM t1;
+
+sync_slave_with_master;
+connection slave;
+ANALYZE TABLE t2;
+--echo # Slave will crash if using the wrong or no index
+SET GLOBAL debug="+d,slave_crash_if_wrong_index,slave_crash_if_table_scan";
+
+connection master;
+UPDATE t2 SET d=10042 WHERE d=42;
+DELETE FROM t2 WHERE d=53;
+
+sync_slave_with_master;
+connection slave;
+SET GLOBAL debug="";
+SELECT * FROM t2 WHERE d IN (10042,53);
+
+# Test that we don't pick a unique index with NULLS over a more selective
+# non-unique index.
+connection master;
+DROP TABLE t2;
+CREATE TABLE t2 (a INT, b INT, c INT, d INT NOT NULL, e INT,
+ UNIQUE wrong_key3(a,e),
+ KEY wrong_key4(b,c),
+ UNIQUE expected_key(d)) ENGINE=myisam;
+INSERT INTO t2 SELECT d DIV 10, d MOD 41, d MOD 37, d, NULL FROM t1;
+
+sync_slave_with_master;
+connection slave;
+ANALYZE TABLE t2;
+--echo # Slave will crash if using the wrong or no index
+SET GLOBAL debug="+d,slave_crash_if_wrong_index,slave_crash_if_table_scan";
+
+connection master;
+UPDATE t2 SET d=10042 WHERE d=42;
+DELETE FROM t2 WHERE d=53;
+
+sync_slave_with_master;
+connection slave;
+SET GLOBAL debug="";
+SELECT * FROM t2 WHERE d IN (10042,53);
+
+connection master;
+DROP TABLE t2;
+
+# Test that we pick a reasonable index when there is no rec_per_key[]
+# information (no ANALYZE TABLE).
+CREATE TABLE t2 (a INT NOT NULL, b INT NOT NULL, c INT NOT NULL, d INT NOT NULL,
+ KEY wrong_key5(b),
+ UNIQUE expected_key(d),
+ KEY wrong_key6(c)) ENGINE=myisam;
+INSERT INTO t2 SELECT d DIV 10, d MOD 41, d MOD 37, d FROM t1;
+
+sync_slave_with_master;
+connection slave;
+--echo # Slave will crash if using the wrong or no index
+SET GLOBAL debug="+d,slave_crash_if_wrong_index,slave_crash_if_table_scan";
+
+connection master;
+UPDATE t2 SET d=10042 WHERE d=42;
+DELETE FROM t2 WHERE d=53;
+
+sync_slave_with_master;
+connection slave;
+SET GLOBAL debug="";
+SELECT * FROM t2 WHERE d IN (10042,53);
+
+connection master;
+DROP TABLE t2;
+
+
+# Also test without ANALYZE when we pick the sub-optimal index.
+# In this case the key on (d) is the best one, but without ANALYZE TABLE we
+# have no information and will pick the first one on (b).
+# (This test should be updated if we improve the index selection, of course).
+CREATE TABLE t2 (a INT NOT NULL, b INT NOT NULL, c INT NOT NULL, d INT NOT NULL,
+ KEY expected_key(b),
+ KEY wrong_key7(d),
+ KEY wrong_key8(c)) ENGINE=myisam;
+INSERT INTO t2 SELECT d DIV 10, d MOD 41, d MOD 37, d FROM t1;
+
+sync_slave_with_master;
+connection slave;
+--echo # Slave will crash if using the wrong or no index
+SET GLOBAL debug="+d,slave_crash_if_wrong_index,slave_crash_if_table_scan";
+
+connection master;
+UPDATE t2 SET d=10042 WHERE d=42;
+DELETE FROM t2 WHERE d=53;
+
+sync_slave_with_master;
+connection slave;
+SET GLOBAL debug="";
+SELECT * FROM t2 WHERE d IN (10042,53);
+
+connection master;
+DROP TABLE t2;
+
+
+# Test that we pick the primary key for InnoDB, if available.
+CREATE TABLE t2 (a INT NOT NULL, b INT NOT NULL, c INT NOT NULL, d INT,
+ UNIQUE wrong_key9(d),
+ KEY wrong_key10(a),
+ PRIMARY KEY expected_key(c,b)) ENGINE=innodb;
+INSERT INTO t2 SELECT d DIV 10, d MOD 41, d MOD 37, d FROM t1;
+
+sync_slave_with_master;
+connection slave;
+--echo # Slave will crash if using the wrong or no index
+SET GLOBAL debug="+d,slave_crash_if_wrong_index,slave_crash_if_table_scan,slave_crash_if_index_scan";
+
+connection master;
+UPDATE t2 SET d=10042 WHERE d=42;
+DELETE FROM t2 WHERE d=53;
+
+sync_slave_with_master;
+connection slave;
+SET GLOBAL debug="";
+SELECT * FROM t2 WHERE d IN (10042,53);
+
+connection master;
+DROP TABLE t2;
+
+
+# Test that we pick a good index for InnoDB when primary key is not available.
+CREATE TABLE t2 (a INT NOT NULL, b INT NOT NULL, c INT NOT NULL, d INT, e INT,
+ UNIQUE wrong_key11(e),
+ KEY wrong_key12(a),
+ KEY expected_key(c,b)) ENGINE=innodb;
+INSERT INTO t2 SELECT d DIV 10, d MOD 41, d MOD 37, d, IF(d<10, d, NULL) FROM t1;
+
+sync_slave_with_master;
+connection slave;
+ANALYZE TABLE t2;
+--echo # Slave will crash if using the wrong or no index
+SET GLOBAL debug="+d,slave_crash_if_wrong_index,slave_crash_if_table_scan";
+
+connection master;
+UPDATE t2 SET d=10042 WHERE d=42;
+DELETE FROM t2 WHERE d=53;
+
+sync_slave_with_master;
+connection slave;
+SET GLOBAL debug="";
+SELECT * FROM t2 WHERE d IN (10042,53);
+
+connection master;
+DROP TABLE t2;
+
+
+# When there is no ANALYZE TABLE, InnoDB will just report "1" for index
+# cardinality for all indexes in rec_per_key. So currently we cannot choose
+# index to use intelligently. Just test that we work as expected (select
+# first index, remember that unique keys are sorted first by server).
+CREATE TABLE t2 (a INT NOT NULL, b INT NOT NULL, c INT NOT NULL, d INT, e INT,
+ KEY wrong_key13(a),
+ UNIQUE expected_key(e),
+ KEY wrong_key14(c,b)) ENGINE=innodb;
+INSERT INTO t2 SELECT d DIV 10, d MOD 41, d MOD 37, d, IF(d<10, d, NULL) FROM t1;
+
+sync_slave_with_master;
+connection slave;
+--echo # Slave will crash if using the wrong or no index
+SET GLOBAL debug="+d,slave_crash_if_wrong_index,slave_crash_if_table_scan";
+
+connection master;
+UPDATE t2 SET d=10042 WHERE d=42;
+DELETE FROM t2 WHERE d=53;
+
+sync_slave_with_master;
+connection slave;
+SET GLOBAL debug="";
+SELECT * FROM t2 WHERE d IN (10042,53);
+
+connection master;
+DROP TABLE t2;
+
+
+# Finally, test behaviour when no indexes are available at all.
+CREATE TABLE t2 (a INT NOT NULL, d INT) ENGINE=innodb;
+INSERT INTO t2 SELECT d DIV 10, d FROM t1;
+
+UPDATE t2 SET d=10042 WHERE d=42;
+DELETE FROM t2 WHERE d=53;
+
+sync_slave_with_master;
+connection slave;
+SELECT * FROM t2 WHERE d IN (10042,53);
+
+connection master;
+DROP TABLE t2;
+
+
+connection master;
+DROP TABLE t1;
+sync_slave_with_master;
+connection slave;
+SET GLOBAL debug="";
+
+--source include/rpl_end.inc
diff --git a/mysql-test/suite/rpl/t/rpl_row_mysqlbinlog.test b/mysql-test/suite/rpl/t/rpl_row_mysqlbinlog.test
index eb4eb2e0ce8..dd46eeb393e 100644
--- a/mysql-test/suite/rpl/t/rpl_row_mysqlbinlog.test
+++ b/mysql-test/suite/rpl/t/rpl_row_mysqlbinlog.test
@@ -164,15 +164,18 @@ connection master;
remove_file $MYSQLTEST_VARDIR/tmp/master.sql;
+--source include/binlog_start_pos.inc
# this test for position option
-# By setting this position to 416, we should only get the create of t3
+# By setting this position to start_binlog_pos + 310, we should only get the create of t3
+let $start_pos= `select @binlog_start_pos + 310`;
+let $stop_pos= `select @binlog_start_pos + 463`;
--disable_query_log
select "--- Test 2 position test --" as "";
--enable_query_log
let $MYSQLD_DATADIR= `select @@datadir;`;
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
---exec $MYSQL_BINLOG --short-form --local-load=$MYSQLTEST_VARDIR/tmp/ --position=416 --stop-position=569 $MYSQLD_DATADIR/master-bin.000001
+--exec $MYSQL_BINLOG --short-form --local-load=$MYSQLTEST_VARDIR/tmp/ --position=$start_pos --stop-position=$stop_pos $MYSQLD_DATADIR/master-bin.000001
# These are tests for remote binlog.
# They should return the same as previous test.
@@ -183,7 +186,7 @@ select "--- Test 3 First Remote test --" as "";
# This is broken now
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
---exec $MYSQL_BINLOG --short-form --local-load=$MYSQLTEST_VARDIR/tmp/ --stop-position=569 --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT master-bin.000001
+--exec $MYSQL_BINLOG --short-form --local-load=$MYSQLTEST_VARDIR/tmp/ --stop-position=$stop_pos --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT master-bin.000001
# This part is disabled due to bug #17654
@@ -259,7 +262,7 @@ connection master;
select "--- Test 5 LOAD DATA --" as "";
--enable_query_log
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
---exec $MYSQL_BINLOG --short-form --local-load=$MYSQLTEST_VARDIR/tmp/ --stop-position=106 --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT master-bin.000002
+--exec $MYSQL_BINLOG --short-form --local-load=$MYSQLTEST_VARDIR/tmp/ --stop-position=$binlog_start_pos --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT master-bin.000002
# Bug#7853 (mysqlbinlog does not accept input from stdin)
@@ -267,14 +270,17 @@ select "--- Test 5 LOAD DATA --" as "";
select "--- Test 6 reading stdin --" as "";
--enable_query_log
let $MYSQLD_DATADIR= `select @@datadir;`;
+let $stop_pos= `select @binlog_start_pos + 463`;
--replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR
---exec $MYSQL_BINLOG --short-form --stop-position=569 - < $MYSQLD_DATADIR/master-bin.000001
+--exec $MYSQL_BINLOG --short-form --stop-position=$stop_pos - < $MYSQLD_DATADIR/master-bin.000001
--disable_query_log
select "--- Test 7 reading stdin w/position --" as "";
--enable_query_log
+let $start_pos= `select @binlog_start_pos + 310`;
+let $stop_pos= `select @binlog_start_pos + 463`;
--replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR
---exec $MYSQL_BINLOG --short-form --position=416 --stop-position=569 - < $MYSQLD_DATADIR/master-bin.000001
+--exec $MYSQL_BINLOG --short-form --position=$start_pos --stop-position=$stop_pos - < $MYSQLD_DATADIR/master-bin.000001
# Bug#16217 (mysql client did not know how not switch its internal charset)
--disable_query_log
diff --git a/mysql-test/suite/rpl/t/rpl_stm_flsh_tbls.test b/mysql-test/suite/rpl/t/rpl_stm_flsh_tbls.test
index f0a2e660830..83ef8699425 100644
--- a/mysql-test/suite/rpl/t/rpl_stm_flsh_tbls.test
+++ b/mysql-test/suite/rpl/t/rpl_stm_flsh_tbls.test
@@ -1,5 +1,6 @@
# depends on the binlog output
--source include/have_binlog_format_mixed_or_statement.inc
+--source include/binlog_start_pos.inc
-let $rename_event_pos= 684;
+let $rename_event_pos= `select @binlog_start_pos + 578`;
-- source extra/rpl_tests/rpl_flsh_tbls.test
diff --git a/mysql-test/t/ctype_cp932_binlog_stm.test b/mysql-test/t/ctype_cp932_binlog_stm.test
index f3038ccfa61..b1f34ec22d4 100644
--- a/mysql-test/t/ctype_cp932_binlog_stm.test
+++ b/mysql-test/t/ctype_cp932_binlog_stm.test
@@ -28,14 +28,6 @@ delimiter ;|
--echo End of 5.0 tests
-#
-# #28436: Incorrect position in SHOW BINLOG EVENTS causes server coredump
-# Note: 364 is a magic position (found experimentally, depends on
-# the log's contents) that caused the server crash.
-
---error 1220
-SHOW BINLOG EVENTS FROM 365;
-
--echo Bug#44352 UPPER/LOWER function doesn't work correctly on cp932 and sjis environment.
CREATE TABLE t1 (a varchar(16)) character set cp932;
INSERT INTO t1 VALUES (0x8372835E),(0x8352835E);
diff --git a/mysql-test/t/innodb_release_row_locks_early-master.opt b/mysql-test/t/innodb_release_row_locks_early-master.opt
new file mode 100644
index 00000000000..57ce087cd25
--- /dev/null
+++ b/mysql-test/t/innodb_release_row_locks_early-master.opt
@@ -0,0 +1 @@
+--innodb-release-locks-early=1
diff --git a/mysql-test/t/innodb_release_row_locks_early.test b/mysql-test/t/innodb_release_row_locks_early.test
new file mode 100644
index 00000000000..b4e99a1c4f8
--- /dev/null
+++ b/mysql-test/t/innodb_release_row_locks_early.test
@@ -0,0 +1,136 @@
+--source include/have_debug_sync.inc
+--source include/have_innodb.inc
+--source include/have_log_bin.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+CREATE TABLE t1 (k INT NOT NULL, a INT NOT NULL, b INT NOT NULL, c INT NOT NULL, PRIMARY KEY(k)) ENGINE=InnoDB;
+INSERT INTO t1 (k, a, b, c) VALUES (1, 0, 0, 0);
+INSERT INTO t1 (k, a, b, c) VALUES (2, 0, 0, 0);
+INSERT INTO t1 (k, a, b, c) VALUES (3, 0, 0, 0);
+INSERT INTO t1 (k, a, b, c) VALUES (4, 0, 0, 0);
+
+RESET MASTER;
+SET DEBUG_SYNC= 'RESET';
+
+# Two transactions A,B that update the same row.
+# A releases row locks during the prepare phase, and waits using DEBUG_SYNC.
+# B then updates the same row.
+# Verify that
+# - B's update can proceed while A is waiting for commit, showing that
+# locks are released early.
+# - B cannot be binlogged before A.
+
+connect(c1,127.0.0.1,root,,test,$MASTER_MYPORT,);
+connect(c2,127.0.0.1,root,,test,$MASTER_MYPORT,);
+
+connection c1;
+--echo # Connection c1
+# Fix binlog format (otherwise SHOW BINLOG EVENTS will fluctuate).
+SET binlog_format= mixed;
+
+# First verify that row locks are released early.
+BEGIN;
+UPDATE t1 SET a=10 WHERE k=1;
+# Wait until c2 starts COMMIT, to verify that we release our locks in prepare.
+SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committing";
+send COMMIT;
+
+ connection c2;
+ --echo # Connection c2
+ SET binlog_format= mixed;
+ SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
+ BEGIN;
+ SELECT * FROM t1 WHERE k=1 FOR UPDATE;
+ UPDATE t1 SET a=20 WHERE k=1;
+ SET DEBUG_SYNC="now SIGNAL c2_committing";
+ COMMIT;
+
+connection c1;
+--echo # Connection c1
+reap;
+
+# Now verify that binlog order is correct.
+BEGIN;
+UPDATE t1 SET a=10 WHERE k=2;
+# This time wait until c2 is binlogged. This should time out, as we must not
+# allow c2 to finish commit before c1.
+SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committed TIMEOUT 2";
+send COMMIT;
+
+ connection c2;
+ --echo # Connection c2
+ SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
+ BEGIN;
+ SELECT * FROM t1 WHERE k=2 FOR UPDATE;
+ UPDATE t1 SET a=20 WHERE k=2;
+ SET DEBUG_SYNC="binlog_after_log_and_order SIGNAL c2_committed";
+ send COMMIT;
+
+connection c1;
+--echo # Connection c1
+--echo # This should warn about DEBUG_SYNC timeout
+reap;
+
+connection c2;
+--echo # Connection c2
+reap;
+
+--replace_column 2 # 5 #
+--replace_regex /xid=[0-9]+/xid=XX/
+SHOW BINLOG EVENTS LIMIT 2,12;
+
+
+connection c1;
+--echo # Connection c1
+# Now the same thing, but using autocommit.
+RESET MASTER;
+# First verify that row locks are released early.
+# Wait until c2 starts COMMIT, to verify that we release our locks in prepare.
+SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committing";
+send UPDATE t1 SET a=10 WHERE k=3;
+
+ connection c2;
+ --echo # Connection c2
+ SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
+ SELECT * FROM t1 WHERE k=3 FOR UPDATE;
+ SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c2_committing";
+ UPDATE t1 SET a=20 WHERE k=3;
+
+connection c1;
+--echo # Connection c1
+reap;
+
+# Now verify that binlog order is correct, this time with autocommit.
+# This time wait until c2 is binlogged. This should time out, as we must not
+# allow c2 to finish commit before c1.
+SET DEBUG_SYNC="commit_after_release_LOCK_prepare_ordered SIGNAL c1_prepared WAIT_FOR c2_committed TIMEOUT 2";
+send UPDATE t1 SET a=10 WHERE k=4;
+
+ connection c2;
+ --echo # Connection c2
+ SET DEBUG_SYNC="now WAIT_FOR c1_prepared";
+ SELECT * FROM t1 WHERE k=4 FOR UPDATE;
+ SET DEBUG_SYNC="binlog_after_log_and_order SIGNAL c2_committed";
+ send UPDATE t1 SET a=20 WHERE k=4;
+
+connection c1;
+--echo # Connection c1
+--echo # This should warn about DEBUG_SYNC timeout
+reap;
+
+connection c2;
+--echo # Connection c2
+reap;
+
+--replace_column 2 # 5 #
+--replace_regex /xid=[0-9]+/xid=XX/
+SHOW BINLOG EVENTS LIMIT 1,12;
+
+
+SELECT * FROM t1 ORDER BY k;
+
+DROP TABLE t1;
+SET DEBUG_SYNC= 'RESET';
diff --git a/mysql-test/t/mysqlbinlog-master.opt b/mysql-test/t/mysqlbinlog-master.opt
index a9f4a6010d8..cef79bc8585 100644
--- a/mysql-test/t/mysqlbinlog-master.opt
+++ b/mysql-test/t/mysqlbinlog-master.opt
@@ -1,2 +1 @@
---max-binlog-size=4096
--force-restart
diff --git a/mysql-test/t/mysqlbinlog.test b/mysql-test/t/mysqlbinlog.test
index d5dd3052269..bf371443779 100644
--- a/mysql-test/t/mysqlbinlog.test
+++ b/mysql-test/t/mysqlbinlog.test
@@ -3,10 +3,18 @@
-- source include/have_binlog_format_statement.inc
-- source include/have_log_bin.inc
+-- source include/binlog_start_pos.inc
# Deletes all the binary logs
reset master;
+# We need small binlog size to break the last LOAD DATA INFILE below so that
+# the corresponding Begin_load_query will be written to master-bin.000001
+# while the Execute_load_query will be written to master-bin.000002.
+
+SET @save_binlog_size= @@global.max_binlog_size;
+SET @@global.max_binlog_size= 4096;
+
# we need this for getting fixed timestamps inside of this test
set timestamp=1000000000;
@@ -26,13 +34,15 @@ insert into t2 values ();
# test for load data and load data distributed among the several
# files (we need to fill up first binlog)
-load data infile '../../std_data/words.dat' into table t1;
-load data infile '../../std_data/words.dat' into table t1;
-load data infile '../../std_data/words.dat' into table t1;
-load data infile '../../std_data/words.dat' into table t1;
-load data infile '../../std_data/words.dat' into table t1;
+load data infile '../../std_data/words3.dat' into table t1;
+load data infile '../../std_data/words3.dat' into table t1;
+load data infile '../../std_data/words3.dat' into table t1;
+load data infile '../../std_data/words3.dat' into table t1;
+load data infile '../../std_data/words3.dat' into table t1;
# simple query to show more in second binlog
insert into t1 values ("Alas");
+
+### Starting master-bin.000003
flush logs;
# delimiters are for easier debugging in future
@@ -46,7 +56,7 @@ select "--- Local --" as "";
#
let $MYSQLD_DATADIR= `select @@datadir`;
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
---replace_regex /SQL_LOAD_MB-[0-9]-[0-9]/SQL_LOAD_MB-#-#/
+--replace_regex /SQL_LOAD_MB-[0-9a-f]+-[0-9a-f]+/SQL_LOAD_MB-#-#/
--exec $MYSQL_BINLOG --short-form --local-load=$MYSQLTEST_VARDIR/tmp/ $MYSQLD_DATADIR/master-bin.000001
# this should not fail but shouldn't produce any working statements
@@ -54,7 +64,7 @@ let $MYSQLD_DATADIR= `select @@datadir`;
select "--- Broken LOAD DATA --" as "";
--enable_query_log
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
---replace_regex /SQL_LOAD_MB-[0-9]-[0-9]/SQL_LOAD_MB-#-#/
+--replace_regex /SQL_LOAD_MB-[0-9a-f]+-[0-9a-f]+/SQL_LOAD_MB-#-#/
--exec $MYSQL_BINLOG --short-form --local-load=$MYSQLTEST_VARDIR/tmp/ $MYSQLD_DATADIR/master-bin.000002 2> /dev/null
# this should show almost nothing
@@ -62,17 +72,17 @@ select "--- Broken LOAD DATA --" as "";
select "--- --database --" as "";
--enable_query_log
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
---replace_regex /SQL_LOAD_MB-[0-9]-[0-9]/SQL_LOAD_MB-#-#/
+--replace_regex /SQL_LOAD_MB-[0-9a-f]+-[0-9a-f]+/SQL_LOAD_MB-#-#/
--exec $MYSQL_BINLOG --short-form --local-load=$MYSQLTEST_VARDIR/tmp/ --database=nottest $MYSQLD_DATADIR/master-bin.000001 2> /dev/null
# this test for position option
--disable_query_log
select "--- --position --" as "";
--enable_query_log
+let $start_pos= `select @binlog_start_pos + 227`;
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
---replace_regex /SQL_LOAD_MB-[0-9]-[0-9]/SQL_LOAD_MB-#-#/
---exec $MYSQL_BINLOG --short-form --local-load=$MYSQLTEST_VARDIR/tmp/ --position=332 $MYSQLD_DATADIR/master-bin.000002
-
+--replace_regex /SQL_LOAD_MB-[0-9a-f]+-[0-9a-f]+/SQL_LOAD_MB-#-#/
+--exec $MYSQL_BINLOG --short-form --local-load=$MYSQLTEST_VARDIR/tmp/ --position=$start_pos $MYSQLD_DATADIR/master-bin.000002
# These are tests for remote binlog.
# They should return the same as previous test.
@@ -83,7 +93,7 @@ select "--- Remote --" as "";
# This is broken now
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
---replace_regex /SQL_LOAD_MB-[0-9]-[0-9]/SQL_LOAD_MB-#-#/
+--replace_regex /SQL_LOAD_MB-[0-9a-f]+-[0-9a-f]+/SQL_LOAD_MB-#-#/
--exec $MYSQL_BINLOG --short-form --local-load=$MYSQLTEST_VARDIR/tmp/ --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT master-bin.000001
# This is broken too
@@ -91,7 +101,7 @@ select "--- Remote --" as "";
select "--- Broken LOAD DATA --" as "";
--enable_query_log
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
---replace_regex /SQL_LOAD_MB-[0-9]-[0-9]/SQL_LOAD_MB-#-#/
+--replace_regex /SQL_LOAD_MB-[0-9a-f]+-[0-9a-f]+/SQL_LOAD_MB-#-#/
--exec $MYSQL_BINLOG --short-form --local-load=$MYSQLTEST_VARDIR/tmp/ --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT master-bin.000002 2> /dev/null
# And this too ! (altough it is documented)
@@ -99,34 +109,39 @@ select "--- Broken LOAD DATA --" as "";
select "--- --database --" as "";
--enable_query_log
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
---replace_regex /SQL_LOAD_MB-[0-9]-[0-9]/SQL_LOAD_MB-#-#/
+--replace_regex /SQL_LOAD_MB-[0-9a-f]+-[0-9a-f]+/SQL_LOAD_MB-#-#/
--exec $MYSQL_BINLOG --short-form --local-load=$MYSQLTEST_VARDIR/tmp/ --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT --database=nottest master-bin.000001 2> /dev/null
# Strangely but this works
--disable_query_log
select "--- --position --" as "";
--enable_query_log
+let $start_pos= `select @binlog_start_pos + 227`;
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
---replace_regex /SQL_LOAD_MB-[0-9]-[0-9]/SQL_LOAD_MB-#-#/
---exec $MYSQL_BINLOG --short-form --local-load=$MYSQLTEST_VARDIR/tmp/ --read-from-remote-server --position=332 --user=root --host=127.0.0.1 --port=$MASTER_MYPORT master-bin.000002
+--replace_regex /SQL_LOAD_MB-[0-9a-f]+-[0-9a-f]+/SQL_LOAD_MB-#-#/
+--exec $MYSQL_BINLOG --short-form --local-load=$MYSQLTEST_VARDIR/tmp/ --read-from-remote-server --position=$start_pos --user=root --host=127.0.0.1 --port=$MASTER_MYPORT master-bin.000002
# Bug#7853 mysqlbinlog does not accept input from stdin
--disable_query_log
select "--- reading stdin --" as "";
--enable_query_log
--replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR
---replace_regex /SQL_LOAD_MB-[0-9]-[0-9]/SQL_LOAD_MB-#-#/
+--replace_regex /SQL_LOAD_MB-[0-9a-f]+-[0-9a-f]+/SQL_LOAD_MB-#-#/
--exec $MYSQL_BINLOG --short-form - < $MYSQL_TEST_DIR/std_data/trunc_binlog.000001
--replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR
---replace_regex /SQL_LOAD_MB-[0-9]-[0-9]/SQL_LOAD_MB-#-#/
+--replace_regex /SQL_LOAD_MB-[0-9a-f]+-[0-9a-f]+/SQL_LOAD_MB-#-#/
--exec $MYSQL_BINLOG --short-form --position=79 - < $MYSQL_TEST_DIR/std_data/trunc_binlog.000001
drop table t1,t2;
+SET @@global.max_binlog_size= @save_binlog_size;
+
#
# Bug#14157 utf8 encoding in binlog without set character_set_client
#
+### Starting master-bin.000004
flush logs;
+
--write_file $MYSQLTEST_VARDIR/tmp/bug14157.sql
create table if not exists t5 (a int);
set names latin1;
@@ -140,6 +155,8 @@ EOF
# resulted binlog, parly consisting of multi-byte utf8 chars,
# must be digestable for both client and server. In 4.1 the client
# should use default-character-set same as the server.
+
+### Starting master-bin.000005
flush logs;
--exec $MYSQL_BINLOG --short-form $MYSQLD_DATADIR/master-bin.000004 | $MYSQL
select * from t5 /* must be (1),(1) */;
@@ -150,6 +167,8 @@ drop table t5;
# Check that a dump created by mysqlbinlog reproduces
# lc_time_names dependent values correctly
#
+
+### Starting master-bin.000006
flush logs;
create table t5 (c1 int, c2 varchar(128) character set latin1 not null);
insert into t5 values (1, date_format('2001-01-01','%W'));
@@ -158,7 +177,10 @@ insert into t5 values (2, date_format('2001-01-01','%W'));
set lc_time_names=en_US;
insert into t5 values (3, date_format('2001-01-01','%W'));
select * from t5 order by c1;
+
+### Starting master-bin.000007
flush logs;
+
drop table t5;
--exec $MYSQL_BINLOG --short-form $MYSQLD_DATADIR/master-bin.000006 | $MYSQL
select * from t5 order by c1;
@@ -170,7 +192,10 @@ drop table t5;
--disable_warnings
drop procedure if exists p1;
--enable_warnings
+
+### Starting master-bin.000008
flush logs;
+
delimiter //;
create procedure p1()
begin
@@ -178,12 +203,15 @@ select 1;
end;
//
delimiter ;//
+
+### Starting master-bin.000009
flush logs;
+
call p1();
drop procedure p1;
--error ER_SP_DOES_NOT_EXIST
call p1();
---replace_regex /SQL_LOAD_MB-[0-9]-[0-9]/SQL_LOAD_MB-#-#/
+--replace_regex /SQL_LOAD_MB-[0-9a-f]+-[0-9a-f]+/SQL_LOAD_MB-#-#/
--exec $MYSQL_BINLOG --short-form $MYSQLD_DATADIR/master-bin.000008
--exec $MYSQL_BINLOG --short-form $MYSQLD_DATADIR/master-bin.000008 | $MYSQL
call p1();
@@ -202,7 +230,9 @@ drop procedure p1;
# (LOAD DATA INFILE need it)
#
+### Starting master-bin.000010
flush logs;
+
create table t1 (a varchar(64) character set utf8);
load data infile '../../std_data/loaddata6.dat' into table t1;
set character_set_database=koi8r;
@@ -217,9 +247,12 @@ load data infile '../../std_data/loaddata6.dat' into table t1;
load data infile '../../std_data/loaddata6.dat' into table t1 character set koi8r;
select hex(a) from t1;
drop table t1;
+
+### Starting master-bin.000011
flush logs;
+
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
---replace_regex /SQL_LOAD_MB-[0-9]-[0-9]/SQL_LOAD_MB-#-#/
+--replace_regex /SQL_LOAD_MB-[0-9a-f]+-[0-9a-f]+/SQL_LOAD_MB-#-#/
--exec $MYSQL_BINLOG --short-form --local-load=$MYSQLTEST_VARDIR/tmp/ $MYSQLD_DATADIR/master-bin.000010
#
@@ -229,9 +262,14 @@ flush logs;
CREATE TABLE t1 (c1 CHAR(10));
# we need this for getting fixed timestamps inside of this test
+### Starting master-bin.000012
FLUSH LOGS;
+
INSERT INTO t1 VALUES ('0123456789');
+
+### Starting master-bin.000013
FLUSH LOGS;
+
DROP TABLE t1;
# We create a table, patch, and load the output into it
@@ -257,11 +295,16 @@ DROP TABLE patch;
#
# Bug#29928 incorrect connection_id() restoring from mysqlbinlog out
#
+### Starting master-bin.000014
FLUSH LOGS;
+
CREATE TABLE t1(a INT);
INSERT INTO t1 VALUES(connection_id());
let $a= `SELECT a FROM t1`;
+
+### Starting master-bin.000015
FLUSH LOGS;
+
let $outfile= $MYSQLTEST_VARDIR/tmp/bug29928.sql;
--exec $MYSQL_BINLOG $MYSQLD_DATADIR/master-bin.000014 > $outfile
DROP TABLE t1;
@@ -281,11 +324,12 @@ error 1;
exec $MYSQL_BINLOG $MYSQL_TEST_DIR/std_data/corrupt-relay-bin.000624 > $MYSQLTEST_VARDIR/tmp/bug31793.sql;
--remove_file $MYSQLTEST_VARDIR/tmp/bug31793.sql
-
#
# Test --disable-force-if-open and --force-if-open
#
+### Starting master-bin.000016
FLUSH LOGS;
+
--error 1
--exec $MYSQL_BINLOG $MYSQLD_DATADIR/master-bin.000016 >/dev/null 2>/dev/null
--exec $MYSQL_BINLOG --force-if-open $MYSQLD_DATADIR/master-bin.000016 >/dev/null 2>/dev/null
@@ -300,9 +344,15 @@ GRANT SELECT ON mysqltest1.* TO untrusted@localhost;
SHOW GRANTS FOR untrusted@localhost;
USE mysqltest1;
CREATE TABLE t1 (a INT, b CHAR(64));
+
+### Starting master-bin.000017
flush logs;
+
INSERT INTO t1 VALUES (1,USER());
+
+### Starting master-bin.000018
flush logs;
+
echo mysqlbinlog var/log/master-bin.000017 > var/tmp/bug31611.sql;
exec $MYSQL_BINLOG $MYSQLD_DATADIR/master-bin.000017 > $MYSQLTEST_VARDIR/tmp/bug31611.sql;
connect (unsecure,localhost,untrusted,,mysqltest1);
@@ -326,14 +376,20 @@ DROP USER untrusted@localhost;
connection default;
USE test;
SET BINLOG_FORMAT = STATEMENT;
+
+### Starting master-bin.000019
FLUSH LOGS;
+
CREATE TABLE t1 (a_real FLOAT, an_int INT, a_decimal DECIMAL(5,2), a_string CHAR(32));
SET @a_real = rand(20) * 1000;
SET @an_int = 1000;
SET @a_decimal = CAST(rand(19) * 999 AS DECIMAL(5,2));
SET @a_string = 'Just a test';
INSERT INTO t1 VALUES (@a_real, @an_int, @a_decimal, @a_string);
+
+### Starting master-bin.000020
FLUSH LOGS;
+
query_vertical SELECT * FROM t1;
DROP TABLE t1;
@@ -357,6 +413,7 @@ eval SET @@global.server_id= $s_id_max;
RESET MASTER;
FLUSH LOGS;
+
--exec $MYSQL_BINLOG $MYSQLD_DATADIR/master-bin.000001 > $binlog_file
--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR
eval SELECT
diff --git a/mysql-test/t/mysqlbinlog2.test b/mysql-test/t/mysqlbinlog2.test
index d6be029ea56..57adde7b975 100644
--- a/mysql-test/t/mysqlbinlog2.test
+++ b/mysql-test/t/mysqlbinlog2.test
@@ -3,7 +3,7 @@
# TODO: Need to look at making row based version once new binlog client is complete.
-- source include/have_binlog_format_mixed_or_statement.inc
-
+-- source include/binlog_start_pos.inc
--disable_warnings
drop table if exists t1;
@@ -50,15 +50,19 @@ select "--- offset --" as "";
--disable_query_log
select "--- start-position --" as "";
--enable_query_log
---exec $MYSQL_BINLOG --short-form --start-position=608 $MYSQLD_DATADIR/master-bin.000001
+let $start_pos= `select @binlog_start_pos + 502`;
+--exec $MYSQL_BINLOG --short-form --start-position=$start_pos $MYSQLD_DATADIR/master-bin.000001
--disable_query_log
select "--- stop-position --" as "";
--enable_query_log
---exec $MYSQL_BINLOG --short-form --stop-position=608 $MYSQLD_DATADIR/master-bin.000001
+let $stop_pos= `select @binlog_start_pos + 502`;
+--exec $MYSQL_BINLOG --short-form --stop-position=$stop_pos $MYSQLD_DATADIR/master-bin.000001
--disable_query_log
select "--- start and stop positions ---" as "";
--enable_query_log
---exec $MYSQL_BINLOG --short-form --start-position=608 --stop-position 725 $MYSQLD_DATADIR/master-bin.000001
+let $start_pos= `select @binlog_start_pos + 502`;
+let $stop_pos= `select @binlog_start_pos + 619`;
+--exec $MYSQL_BINLOG --short-form --start-position=$start_pos --stop-position $stop_pos $MYSQLD_DATADIR/master-bin.000001
--disable_query_log
select "--- start-datetime --" as "";
--enable_query_log
@@ -84,11 +88,13 @@ select "--- offset --" as "";
--disable_query_log
select "--- start-position --" as "";
--enable_query_log
---exec $MYSQL_BINLOG --short-form --start-position=608 $MYSQLD_DATADIR/master-bin.000001 $MYSQLD_DATADIR/master-bin.000002
+let $start_pos= `select @binlog_start_pos + 502`;
+--exec $MYSQL_BINLOG --short-form --start-position=$start_pos $MYSQLD_DATADIR/master-bin.000001 $MYSQLD_DATADIR/master-bin.000002
--disable_query_log
select "--- stop-position --" as "";
--enable_query_log
---exec $MYSQL_BINLOG --short-form --stop-position=134 $MYSQLD_DATADIR/master-bin.000001 $MYSQLD_DATADIR/master-bin.000002
+let $stop_pos= `select @binlog_start_pos + 28`;
+--exec $MYSQL_BINLOG --short-form --stop-position=$stop_pos $MYSQLD_DATADIR/master-bin.000001 $MYSQLD_DATADIR/master-bin.000002
--disable_query_log
select "--- start-datetime --" as "";
--enable_query_log
@@ -111,15 +117,19 @@ select "--- offset --" as "";
--disable_query_log
select "--- start-position --" as "";
--enable_query_log
---exec $MYSQL_BINLOG --short-form --start-position=608 --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT master-bin.000001
+let $start_pos= `select @binlog_start_pos + 502`;
+--exec $MYSQL_BINLOG --short-form --start-position=$start_pos --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT master-bin.000001
--disable_query_log
select "--- stop-position --" as "";
--enable_query_log
---exec $MYSQL_BINLOG --short-form --stop-position=608 --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT master-bin.000001
+let $stop_pos= `select @binlog_start_pos + 502`;
+--exec $MYSQL_BINLOG --short-form --stop-position=$stop_pos --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT master-bin.000001
--disable_query_log
select "--- start and stop positions ---" as "";
--enable_query_log
---exec $MYSQL_BINLOG --short-form --start-position=608 --stop-position 725 --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT master-bin.000001
+let $start_pos= `select @binlog_start_pos + 502`;
+let $stop_pos= `select @binlog_start_pos + 619`;
+--exec $MYSQL_BINLOG --short-form --start-position=$start_pos --stop-position $stop_pos --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT master-bin.000001
--disable_query_log
select "--- start-datetime --" as "";
--enable_query_log
@@ -142,11 +152,13 @@ select "--- offset --" as "";
--disable_query_log
select "--- start-position --" as "";
--enable_query_log
---exec $MYSQL_BINLOG --short-form --start-position=608 --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT master-bin.000001 master-bin.000002
+let $start_pos= `select @binlog_start_pos + 502`;
+--exec $MYSQL_BINLOG --short-form --start-position=$start_pos --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT master-bin.000001 master-bin.000002
--disable_query_log
select "--- stop-position --" as "";
--enable_query_log
---exec $MYSQL_BINLOG --short-form --stop-position=134 --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT master-bin.000001 master-bin.000002
+let $stop_pos= `select @binlog_start_pos + 28`;
+--exec $MYSQL_BINLOG --short-form --stop-position=$stop_pos --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT master-bin.000001 master-bin.000002
--disable_query_log
select "--- start-datetime --" as "";
--enable_query_log
diff --git a/mysql-test/t/mysqldump-max.test b/mysql-test/t/mysqldump-max.test
index 1e8b9647503..27c1a3ce20c 100644
--- a/mysql-test/t/mysqldump-max.test
+++ b/mysql-test/t/mysqldump-max.test
@@ -2,6 +2,7 @@
--source include/not_embedded.inc
--source include/have_innodb.inc
--source include/have_archive.inc
+--source include/have_log_bin.inc
--disable_warnings
drop table if exists t1, t2, t3, t4, t5, t6;
@@ -1124,3 +1125,84 @@ DROP VIEW v1;
DROP TABLE t1;
SET GLOBAL storage_engine=@old_engine;
+
+# Test fully non-locking mysqldump with consistent binlog position (MWL#136).
+
+connect(c1,127.0.0.1,root,,test,$MASTER_MYPORT,);
+connect(c2,127.0.0.1,root,,test,$MASTER_MYPORT,);
+connect(c3,127.0.0.1,root,,test,$MASTER_MYPORT,);
+
+connection default;
+--echo # Connection default
+SET binlog_format= mixed;
+RESET MASTER;
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
+INSERT INTO t1 VALUES (1),(2);
+CREATE TABLE t2 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
+INSERT INTO t2 VALUES (1,0), (2,0);
+SELECT GET_LOCK("block_queries_1", 120);
+
+connection c3;
+--echo # Connection c3
+SELECT GET_LOCK("block_queries_2", 120);
+
+# Start two queries that will be running on the tables during mysqldump
+connection c1;
+--echo # Connection c1
+SET @c= 0;
+send SELECT IF(@c<1, @c:=@c+1, GET_LOCK("block_queries_1", 120)) FROM t1 ORDER BY a;
+
+connection c2;
+--echo # Connection c2
+SET binlog_format="row";
+SET @d= 10;
+send UPDATE t2 SET b=IF(@d<=10, @d:=@d+1, GET_LOCK("block_queries_2", 120)) ORDER BY a;
+
+connection default;
+--echo # Connection default
+--echo # Make sure other queries are running (and waiting).
+let $wait_condition=
+ SELECT COUNT(*) FROM information_schema.processlist
+ WHERE state = "User lock" AND info LIKE 'SELECT%block_queries_1%';
+--source include/wait_condition.inc
+let $wait_condition=
+ SELECT COUNT(*) FROM information_schema.processlist
+ WHERE state = "User lock" AND info LIKE 'UPDATE%block_queries_2%';
+--source include/wait_condition.inc
+
+--exec $MYSQL_DUMP --master-data=2 --single-transaction test t1 t2 > $MYSQLTEST_VARDIR/tmp/mwl136.sql
+
+SELECT RELEASE_LOCK("block_queries_1");
+
+connection c3;
+--echo # Connection c3
+SELECT RELEASE_LOCK("block_queries_2");
+
+connection c1;
+--echo # Connection c1
+reap;
+
+connection c2;
+--echo # Connection c2
+reap;
+
+connection default;
+--echo # Connection default
+SELECT * FROM t2 ORDER BY a;
+DROP TABLE t1;
+DROP TABLE t2;
+--exec $MYSQL test < $MYSQLTEST_VARDIR/tmp/mwl136.sql
+
+--replace_regex /\/\* xid=.* \*\//\/* XID *\// /Server ver: .*, Binlog ver: .*/Server ver: #, Binlog ver: #/ /table_id: [0-9]+/table_id: #/
+SHOW BINLOG EVENTS LIMIT 6,3;
+--perl
+my $f= "$ENV{MYSQLTEST_VARDIR}/tmp/mwl136.sql";
+open F, '<', $f or die "Failed to open $f: $!\n";
+while (<F>) {
+ print if /CHANGE MASTER TO/;
+}
+EOF
+SELECT * FROM t1 ORDER BY a;
+SELECT * FROM t2 ORDER BY a;
+
+DROP TABLE t1,t2;
diff --git a/mysql-test/t/xa_binlog.test b/mysql-test/t/xa_binlog.test
new file mode 100644
index 00000000000..48f1dc6dfaa
--- /dev/null
+++ b/mysql-test/t/xa_binlog.test
@@ -0,0 +1,32 @@
+--source include/have_innodb.inc
+--source include/have_log_bin.inc
+
+CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
+
+# Fix binlog format (otherwise SHOW BINLOG EVENTS will fluctuate).
+SET binlog_format= mixed;
+
+RESET MASTER;
+
+XA START 'xatest';
+INSERT INTO t1 VALUES (1);
+XA END 'xatest';
+XA PREPARE 'xatest';
+XA COMMIT 'xatest';
+
+XA START 'xatest';
+INSERT INTO t1 VALUES (2);
+XA END 'xatest';
+XA COMMIT 'xatest' ONE PHASE;
+
+BEGIN;
+INSERT INTO t1 VALUES (3);
+COMMIT;
+
+SELECT * FROM t1 ORDER BY a;
+
+--replace_column 2 # 5 #
+--replace_regex /xid=[0-9]+/xid=XX/
+SHOW BINLOG EVENTS LIMIT 1,9;
+
+DROP TABLE t1;
diff --git a/sql/handler.cc b/sql/handler.cc
index c5a870e77ad..df5acb98efe 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -79,6 +79,8 @@ TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
uint known_extensions_id= 0;
+static int commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans,
+ bool is_real_trans);
static plugin_ref ha_default_plugin(THD *thd)
@@ -1077,7 +1079,7 @@ ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
*/
int ha_commit_trans(THD *thd, bool all)
{
- int error= 0, cookie= 0;
+ int error= 0, cookie;
/*
'all' means that this is either an explicit commit issued by
user, or an implicit commit issued by a DDL.
@@ -1092,7 +1094,8 @@ int ha_commit_trans(THD *thd, bool all)
*/
bool is_real_trans= all || thd->transaction.all.ha_list == 0;
Ha_trx_info *ha_info= trans->ha_list;
- my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
+ bool need_prepare_ordered, need_commit_ordered;
+ my_xid xid;
DBUG_ENTER("ha_commit_trans");
/* Just a random warning to test warnings pushed during autocommit. */
@@ -1131,89 +1134,114 @@ int ha_commit_trans(THD *thd, bool all)
DBUG_RETURN(2);
}
#ifdef USING_TRANSACTIONS
- if (ha_info)
+ if (!ha_info)
{
- uint rw_ha_count;
- bool rw_trans;
+ /* Free resources and perform other cleanup even for 'empty' transactions. */
+ if (is_real_trans)
+ thd->transaction.cleanup();
+ DBUG_RETURN(0);
+ }
- DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
+ DBUG_EXECUTE_IF("crash_commit_before", DBUG_SUICIDE(););
- /* Close all cursors that can not survive COMMIT */
- if (is_real_trans) /* not a statement commit */
- thd->stmt_map.close_transient_cursors();
+ /* Close all cursors that can not survive COMMIT */
+ if (is_real_trans) /* not a statement commit */
+ thd->stmt_map.close_transient_cursors();
- rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
- /* rw_trans is TRUE when we in a transaction changing data */
- rw_trans= is_real_trans && (rw_ha_count > 0);
+ uint rw_ha_count= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
+ /* rw_trans is TRUE when we in a transaction changing data */
+ bool rw_trans= is_real_trans && (rw_ha_count > 0);
- if (rw_trans &&
- wait_if_global_read_lock(thd, 0, 0))
- {
- ha_rollback_trans(thd, all);
- DBUG_RETURN(1);
- }
+ if (rw_trans &&
+ wait_if_global_read_lock(thd, 0, 0))
+ {
+ ha_rollback_trans(thd, all);
+ DBUG_RETURN(1);
+ }
- if (rw_trans &&
- opt_readonly &&
- !(thd->security_ctx->master_access & SUPER_ACL) &&
- !thd->slave_thread)
- {
- my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
- ha_rollback_trans(thd, all);
- error= 1;
- goto end;
- }
+ if (rw_trans &&
+ opt_readonly &&
+ !(thd->security_ctx->master_access & SUPER_ACL) &&
+ !thd->slave_thread)
+ {
+ my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
+ goto err;
+ }
- if (!trans->no_2pc && (rw_ha_count > 1))
- {
- for (; ha_info && !error; ha_info= ha_info->next())
- {
- int err;
- handlerton *ht= ha_info->ht();
- /*
- Do not call two-phase commit if this particular
- transaction is read-only. This allows for simpler
- implementation in engines that are always read-only.
- */
- if (! ha_info->is_trx_read_write())
- continue;
- /*
- Sic: we know that prepare() is not NULL since otherwise
- trans->no_2pc would have been set.
- */
- if ((err= ht->prepare(ht, thd, all)))
- {
- my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
- error= 1;
- }
- status_var_increment(thd->status_var.ha_prepare_count);
- }
- DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
- if (error || (is_real_trans && xid &&
- (error= !(cookie= tc_log->log_xid(thd, xid)))))
- {
- ha_rollback_trans(thd, all);
- error= 1;
- goto end;
- }
- DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
- }
- error=ha_commit_one_phase(thd, all) ? (cookie ? 2 : 1) : 0;
- DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_SUICIDE(););
- if (cookie)
- if(tc_log->unlog(cookie, xid))
- {
- error= 2;
- goto end;
- }
+ if (trans->no_2pc || (rw_ha_count <= 1))
+ {
+ error= ha_commit_one_phase(thd, all);
DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
-end:
- if (rw_trans)
- start_waiting_global_read_lock(thd);
+ goto end;
}
- /* Free resources and perform other cleanup even for 'empty' transactions. */
- else if (is_real_trans)
- thd->transaction.cleanup();
+
+ need_prepare_ordered= FALSE;
+ need_commit_ordered= FALSE;
+ xid= thd->transaction.xid_state.xid.get_my_xid();
+
+ for (Ha_trx_info *hi= ha_info; hi; hi= hi->next())
+ {
+ int err;
+ handlerton *ht= hi->ht();
+ /*
+ Do not call two-phase commit if this particular
+ transaction is read-only. This allows for simpler
+ implementation in engines that are always read-only.
+ */
+ if (! hi->is_trx_read_write())
+ continue;
+ /*
+ Sic: we know that prepare() is not NULL since otherwise
+ trans->no_2pc would have been set.
+ */
+ err= ht->prepare(ht, thd, all);
+ status_var_increment(thd->status_var.ha_prepare_count);
+ if (err)
+ my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
+
+ if (err)
+ goto err;
+
+ need_prepare_ordered|= (ht->prepare_ordered != NULL);
+ need_commit_ordered|= (ht->commit_ordered != NULL);
+ }
+ DBUG_EXECUTE_IF("crash_commit_after_prepare", DBUG_SUICIDE(););
+
+ if (!is_real_trans)
+ {
+ error= commit_one_phase_2(thd, all, trans, is_real_trans);
+ DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
+ goto end;
+ }
+
+ cookie= tc_log->log_and_order(thd, xid, all, need_prepare_ordered,
+ need_commit_ordered);
+ if (!cookie)
+ goto err;
+
+ DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
+
+ error= commit_one_phase_2(thd, all, trans, is_real_trans) ? 2 : 0;
+ DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
+
+ DBUG_EXECUTE_IF("crash_commit_before_unlog", DBUG_SUICIDE(););
+ if (tc_log->unlog(cookie, xid))
+ {
+ error= 2; /* Error during commit */
+ goto end;
+ }
+
+ DBUG_EXECUTE_IF("crash_commit_after", DBUG_SUICIDE(););
+ goto end;
+
+ /* Come here if error and we need to rollback. */
+err:
+ error= 1; /* Transaction was rolled back */
+ ha_rollback_trans(thd, all);
+
+end:
+ if (rw_trans)
+ start_waiting_global_read_lock(thd);
#endif /* USING_TRANSACTIONS */
DBUG_RETURN(error);
}
@@ -1224,7 +1252,6 @@ end:
*/
int ha_commit_one_phase(THD *thd, bool all)
{
- int error=0;
THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
/*
"real" is a nick name for a transaction for which a commit will
@@ -1234,8 +1261,16 @@ int ha_commit_one_phase(THD *thd, bool all)
enclosing 'all' transaction is rolled back.
*/
bool is_real_trans=all || thd->transaction.all.ha_list == 0;
- Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
DBUG_ENTER("ha_commit_one_phase");
+ DBUG_RETURN(commit_one_phase_2(thd, all, trans, is_real_trans));
+}
+
+static int
+commit_one_phase_2(THD *thd, bool all, THD_TRANS *trans, bool is_real_trans)
+{
+ int error= 0;
+ Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
+ DBUG_ENTER("commit_one_phase_2");
#ifdef USING_TRANSACTIONS
if (ha_info)
{
@@ -1852,7 +1887,16 @@ int ha_start_consistent_snapshot(THD *thd)
{
bool warn= true;
+ /*
+ Holding the LOCK_commit_ordered mutex ensures that we get the same
+ snapshot for all engines (including the binary log). This allows us
+ among other things to do backups with
+ START TRANSACTION WITH CONSISTENT SNAPSHOT and
+ have a consistent binlog position.
+ */
+ pthread_mutex_lock(&LOCK_commit_ordered);
plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
+ pthread_mutex_unlock(&LOCK_commit_ordered);
/*
Same idea as when one wants to CREATE TABLE in one engine which does not
@@ -4622,7 +4666,8 @@ static bool check_table_binlog_row_based(THD *thd, TABLE *table)
/** @brief
Write table maps for all (manually or automatically) locked tables
- to the binary log.
+ to the binary log. Also, if binlog_annotate_rows_events is ON,
+ write Annotate_rows event before the first table map.
SYNOPSIS
write_locked_table_maps()
@@ -4659,6 +4704,9 @@ static int write_locked_table_maps(THD *thd)
locks[0]= thd->extra_lock;
locks[1]= thd->lock;
locks[2]= thd->locked_tables;
+ my_bool with_annotate= thd->variables.binlog_annotate_rows_events &&
+ thd->query() && thd->query_length();
+
for (uint i= 0 ; i < sizeof(locks)/sizeof(*locks) ; ++i )
{
MYSQL_LOCK const *const lock= locks[i];
@@ -4676,7 +4724,8 @@ static int write_locked_table_maps(THD *thd)
check_table_binlog_row_based(thd, table))
{
int const has_trans= table->file->has_transactions();
- int const error= thd->binlog_write_table_map(table, has_trans);
+ int const error= thd->binlog_write_table_map(table, has_trans,
+ &with_annotate);
/*
If an error occurs, it is the responsibility of the caller to
roll back the transaction.
diff --git a/sql/handler.h b/sql/handler.h
index 12413d6238a..02b96ebefb8 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -771,9 +771,97 @@ struct handlerton
NOTE 'all' is also false in auto-commit mode where 'end of statement'
and 'real commit' mean the same event.
*/
- int (*commit)(handlerton *hton, THD *thd, bool all);
+ int (*commit)(handlerton *hton, THD *thd, bool all);
+ /*
+ The commit_ordered() method is called prior to the commit() method, after
+ the transaction manager has decided to commit (not rollback) the
+ transaction. Unlike commit(), commit_ordered() is called only when the
+ full transaction is committed, not for each commit of statement
+ transaction in a multi-statement transaction.
+
+ Not that like prepare(), commit_ordered() is only called when 2-phase
+ commit takes place. Ie. when no binary log and only a single engine
+ participates in a transaction, one commit() is called, no
+ commit_ordered(). So engines must be prepared for this.
+
+ The calls to commit_ordered() in multiple parallel transactions is
+ guaranteed to happen in the same order in every participating
+ handler. This can be used to ensure the same commit order among multiple
+ handlers (eg. in table handler and binlog). So if transaction T1 calls
+ into commit_ordered() of handler A before T2, then T1 will also call
+ commit_ordered() of handler B before T2.
+
+ Engines that implement this method should during this call make the
+ transaction visible to other transactions, thereby making the order of
+ transaction commits be defined by the order of commit_ordered() calls.
+
+ The intention is that commit_ordered() should do the minimal amount of
+ work that needs to happen in consistent commit order among handlers. To
+ preserve ordering, calls need to be serialised on a global mutex, so
+ doing any time-consuming or blocking operations in commit_ordered() will
+ limit scalability.
+
+ Handlers can rely on commit_ordered() calls to be serialised (no two
+ calls can run in parallel, so no extra locking on the handler part is
+ required to ensure this).
+
+ Note that commit_ordered() can be called from a different thread than the
+ one handling the transaction! So it can not do anything that depends on
+ thread local storage, in particular it can not call my_error() and
+ friends (instead it can store the error code and delay the call of
+ my_error() to the commit() method).
+
+ Similarly, since commit_ordered() returns void, any return error code
+ must be saved and returned from the commit() method instead.
+
+ The commit_ordered method is optional, and can be left unset if not
+ needed in a particular handler (then there will be no ordering guarantees
+ wrt. other engines and binary log).
+ */
+ void (*commit_ordered)(handlerton *hton, THD *thd, bool all);
int (*rollback)(handlerton *hton, THD *thd, bool all);
int (*prepare)(handlerton *hton, THD *thd, bool all);
+ /*
+ The prepare_ordered method is optional. If set, it will be called after
+ successful prepare() in all handlers participating in 2-phase
+ commit. Like commit_ordered(), it is called only when the full
+ transaction is committed, not for each commit of statement transaction.
+
+ The calls to prepare_ordered() among multiple parallel transactions are
+ ordered consistently with calls to commit_ordered(). This means that
+ calls to prepare_ordered() effectively define the commit order, and that
+ each handler will see the same sequence of transactions calling into
+ prepare_ordered() and commit_ordered().
+
+ Thus, prepare_ordered() can be used to define commit order for handlers
+ that need to do this in the prepare step (like binlog). It can also be
+ used to release transaction's locks early in an order consistent with the
+ order transactions will be eventually committed.
+
+ Like commit_ordered(), prepare_ordered() calls are serialised to maintain
+ ordering, so the intention is that they should execute fast, with only
+ the minimal amount of work needed to define commit order. Handlers can
+ rely on this serialisation, and do not need to do any extra locking to
+ avoid two prepare_ordered() calls running in parallel.
+
+ Like commit_ordered(), prepare_ordered() is not guaranteed to be called
+ in the context of the thread handling the rest of the transaction. So it
+ cannot invoke code that relies on thread local storage, in particular it
+ cannot call my_error().
+
+ prepare_ordered() cannot cause a rollback by returning an error, all
+ possible errors must be handled in prepare() (the prepare_ordered()
+ method returns void). In case of some fatal error, a record of the error
+ must be made internally by the engine and returned from commit() later.
+
+ Note that for user-level XA SQL commands, no consistent ordering among
+ prepare_ordered() and commit_ordered() is guaranteed (as that would
+ require blocking all other commits for an indefinite time).
+
+ When 2-phase commit is not used (eg. only one engine (and no binlog) in
+ transaction), neither prepare() nor prepare_ordered() is called.
+ */
+ void (*prepare_ordered)(handlerton *hton, THD *thd, bool all);
int (*recover)(handlerton *hton, XID *xid_list, uint len);
int (*commit_by_xid)(handlerton *hton, XID *xid);
int (*rollback_by_xid)(handlerton *hton, XID *xid);
diff --git a/sql/log.cc b/sql/log.cc
index 5213bdcc937..53c9843ebbc 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -38,6 +38,7 @@
#endif
#include <mysql/plugin.h>
+#include "debug_sync.h"
/* max size of the log message */
#define MAX_LOG_BUFFER_SIZE 1024
@@ -61,6 +62,38 @@ static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
static int binlog_commit(handlerton *hton, THD *thd, bool all);
static int binlog_rollback(handlerton *hton, THD *thd, bool all);
static int binlog_prepare(handlerton *hton, THD *thd, bool all);
+static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd);
+
+static LEX_STRING const write_error_msg=
+ { C_STRING_WITH_LEN("error writing to the binary log") };
+
+static my_bool opt_optimize_thread_scheduling= TRUE;
+#ifndef DBUG_OFF
+static ulong opt_binlog_dbug_fsync_sleep= 0;
+#endif
+
+static my_bool mutexes_inited;
+pthread_mutex_t LOCK_prepare_ordered;
+pthread_mutex_t LOCK_commit_ordered;
+
+static ulonglong binlog_status_var_num_commits;
+static ulonglong binlog_status_var_num_group_commits;
+static char binlog_snapshot_file[FN_REFLEN];
+static ulonglong binlog_snapshot_position;
+
+static SHOW_VAR binlog_status_vars_detail[]=
+{
+ {"commits",
+ (char *)&binlog_status_var_num_commits, SHOW_LONGLONG},
+ {"group_commits",
+ (char *)&binlog_status_var_num_group_commits, SHOW_LONGLONG},
+ {"snapshot_file",
+ (char *)&binlog_snapshot_file, SHOW_CHAR},
+ {"snapshot_position",
+ (char *)&binlog_snapshot_position, SHOW_LONGLONG},
+ {NullS, NullS, SHOW_LONG}
+};
+
/**
Silence all errors and warnings reported when performing a write
@@ -134,50 +167,17 @@ char *make_once_alloced_filename(const char *basename, const char *ext)
/*
- Helper class to hold a mutex for the duration of the
- block.
-
- Eliminates the need for explicit unlocking of mutexes on, e.g.,
- error returns. On passing a null pointer, the sentry will not do
- anything.
- */
-class Mutex_sentry
-{
-public:
- Mutex_sentry(pthread_mutex_t *mutex)
- : m_mutex(mutex)
- {
- if (m_mutex)
- pthread_mutex_lock(mutex);
- }
-
- ~Mutex_sentry()
- {
- if (m_mutex)
- pthread_mutex_unlock(m_mutex);
-#ifndef DBUG_OFF
- m_mutex= 0;
-#endif
- }
-
-private:
- pthread_mutex_t *m_mutex;
-
- // It's not allowed to copy this object in any way
- Mutex_sentry(Mutex_sentry const&);
- void operator=(Mutex_sentry const&);
-};
-
-/*
Helper class to store binary log transaction data.
*/
class binlog_trx_data {
public:
binlog_trx_data()
: at_least_one_stmt_committed(0), incident(FALSE), m_pending(0),
- before_stmt_pos(MY_OFF_T_UNDEF)
+ before_stmt_pos(MY_OFF_T_UNDEF), last_commit_pos_offset(0),
+ using_xa(FALSE), xa_xid(0)
{
trans_log.end_of_file= max_binlog_cache_size;
+ last_commit_pos_file[0]= 0;
}
~binlog_trx_data()
@@ -229,11 +229,14 @@ public:
completely.
*/
void reset() {
- if (!empty())
+ if (trans_log.type != WRITE_CACHE || !empty())
truncate(0);
before_stmt_pos= MY_OFF_T_UNDEF;
incident= FALSE;
trans_log.end_of_file= max_binlog_cache_size;
+ using_xa= FALSE;
+ last_commit_pos_file[0]= 0;
+ last_commit_pos_offset= 0;
DBUG_ASSERT(empty());
}
@@ -278,6 +281,22 @@ public:
Binlog position before the start of the current statement.
*/
my_off_t before_stmt_pos;
+ /*
+ Binlog position for current transaction.
+ For START TRANSACTION WITH CONSISTENT SNAPSHOT, this is the binlog
+ position corresponding to the snapshot taken. During (and after) commit,
+ this is set to the binlog position corresponding to just after the
+ commit (so storage engines can store it in their transaction log).
+ */
+ char last_commit_pos_file[FN_REFLEN];
+ my_off_t last_commit_pos_offset;
+
+ /*
+ Flag set true if this transaction is committed with log_xid() as part of
+ XA, false if not.
+ */
+ bool using_xa;
+ my_xid xa_xid;
};
handlerton *binlog_hton;
@@ -1421,6 +1440,7 @@ int binlog_init(void *p)
binlog_hton->commit= binlog_commit;
binlog_hton->rollback= binlog_rollback;
binlog_hton->prepare= binlog_prepare;
+ binlog_hton->start_consistent_snapshot= binlog_start_consistent_snapshot;
binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
return 0;
}
@@ -1437,91 +1457,118 @@ static int binlog_close_connection(handlerton *hton, THD *thd)
}
/*
- End a transaction.
+ End a transaction, writing events to the binary log.
SYNOPSIS
- binlog_end_trans()
+ binlog_flush_trx_cache()
thd The thread whose transaction should be ended
trx_data Pointer to the transaction data to use
- end_ev The end event to use, or NULL
all True if the entire transaction should be ended, false if
only the statement transaction should be ended.
+ end_ev The end event to use (COMMIT, ROLLBACK, or commit XID)
DESCRIPTION
End the currently open transaction. The transaction can be either
- a real transaction (if 'all' is true) or a statement transaction
- (if 'all' is false).
+ a real transaction or a statement transaction.
- If 'end_ev' is NULL, the transaction is a rollback of only
- transactional tables, so the transaction cache will be truncated
- to either just before the last opened statement transaction (if
- 'all' is false), or reset completely (if 'all' is true).
+ This can be to commit a transaction, with a COMMIT query event or an XA
+ commit XID event. But it can also be to rollback a transaction with a
+ ROLLBACK query event, used for rolling back transactions which also
+ contain updates to non-transactional tables.
*/
static int
-binlog_end_trans(THD *thd, binlog_trx_data *trx_data,
- Log_event *end_ev, bool all)
+binlog_flush_trx_cache(THD *thd, binlog_trx_data *trx_data,
+ Log_event *end_ev, bool all)
{
- DBUG_ENTER("binlog_end_trans");
- int error=0;
+ DBUG_ENTER("binlog_flush_trx_cache");
IO_CACHE *trans_log= &trx_data->trans_log;
- DBUG_PRINT("enter", ("transaction: %s end_ev: 0x%lx",
- all ? "all" : "stmt", (long) end_ev));
DBUG_PRINT("info", ("thd->options={ %s%s}",
FLAGSTR(thd->options, OPTION_NOT_AUTOCOMMIT),
FLAGSTR(thd->options, OPTION_BEGIN)));
+ if (thd->binlog_flush_pending_rows_event(TRUE))
+ DBUG_RETURN(1);
+
/*
- NULL denotes ROLLBACK with nothing to replicate: i.e., rollback of
- only transactional tables. If the transaction contain changes to
- any non-transactiona tables, we need write the transaction and log
- a ROLLBACK last.
- */
- if (end_ev != NULL)
- {
- if (thd->binlog_flush_pending_rows_event(TRUE))
- DBUG_RETURN(1);
- /*
- Doing a commit or a rollback including non-transactional tables,
- i.e., ending a transaction where we might write the transaction
- cache to the binary log.
-
- We can always end the statement when ending a transaction since
- transactions are not allowed inside stored functions. If they
- were, we would have to ensure that we're not ending a statement
- inside a stored function.
- */
- error= mysql_bin_log.write(thd, &trx_data->trans_log, end_ev,
- trx_data->has_incident());
- trx_data->reset();
+ Doing a commit or a rollback including non-transactional tables,
+ i.e., ending a transaction where we might write the transaction
+ cache to the binary log.
+
+ We can always end the statement when ending a transaction since
+ transactions are not allowed inside stored functions. If they
+ were, we would have to ensure that we're not ending a statement
+ inside a stored function.
+ */
+ int error= mysql_bin_log.write_transaction_to_binlog(thd, trx_data,
+ end_ev, all);
- statistic_increment(binlog_cache_use, &LOCK_status);
- if (trans_log->disk_writes != 0)
- {
- statistic_increment(binlog_cache_disk_use, &LOCK_status);
- trans_log->disk_writes= 0;
- }
- }
- else
+ trx_data->reset();
+
+ statistic_increment(binlog_cache_use, &LOCK_status);
+ if (trans_log->disk_writes != 0)
{
- /*
- If rolling back an entire transaction or a single statement not
- inside a transaction, we reset the transaction cache.
+ statistic_increment(binlog_cache_disk_use, &LOCK_status);
+ trans_log->disk_writes= 0;
+ }
- If rolling back a statement in a transaction, we truncate the
- transaction cache to remove the statement.
- */
- thd->binlog_remove_pending_rows_event(TRUE);
- if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT)))
- {
- if (trx_data->has_incident())
- error= mysql_bin_log.write_incident(thd, TRUE);
- trx_data->reset();
- }
- else // ...statement
- trx_data->truncate(trx_data->before_stmt_pos);
+ DBUG_ASSERT(thd->binlog_get_pending_rows_event() == NULL);
+ DBUG_RETURN(error);
+}
+
+/*
+ Discard a transaction, ie. ROLLBACK with only transactional table updates.
+
+ SYNOPSIS
+ binlog_truncate_trx_cache()
+
+ thd The thread whose transaction should be ended
+ trx_data Pointer to the transaction data to use
+ all True if the entire transaction should be ended, false if
+ only the statement transaction should be ended.
+
+ DESCRIPTION
+
+ Rollback (and end) a transaction that only modifies transactional
+ tables. The transaction can be either a real transaction (if 'all' is
+ true) or a statement transaction (if 'all' is false).
+
+ The transaction cache will be truncated to either just before the last
+ opened statement transaction (if 'all' is false), or reset completely (if
+ 'all' is true).
+ */
+static int
+binlog_truncate_trx_cache(THD *thd, binlog_trx_data *trx_data, bool all)
+{
+ DBUG_ENTER("binlog_truncate_trx_cache");
+ int error= 0;
+ DBUG_PRINT("enter", ("transaction: %s", all ? "all" : "stmt"));
+ DBUG_PRINT("info", ("thd->options={ %s%s}",
+ FLAGSTR(thd->options, OPTION_NOT_AUTOCOMMIT),
+ FLAGSTR(thd->options, OPTION_BEGIN)));
+
+ /*
+ ROLLBACK with nothing to replicate: i.e., rollback of only transactional
+ tables.
+ */
+
+ /*
+ If rolling back an entire transaction or a single statement not
+ inside a transaction, we reset the transaction cache.
+
+ If rolling back a statement in a transaction, we truncate the
+ transaction cache to remove the statement.
+ */
+ thd->binlog_remove_pending_rows_event(TRUE);
+ if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT)))
+ {
+ if (trx_data->has_incident())
+ error= mysql_bin_log.write_incident(thd);
+ trx_data->reset();
}
+ else // ...statement
+ trx_data->truncate(trx_data->before_stmt_pos);
DBUG_ASSERT(thd->binlog_get_pending_rows_event() == NULL);
DBUG_RETURN(error);
@@ -1533,7 +1580,7 @@ static int binlog_prepare(handlerton *hton, THD *thd, bool all)
do nothing.
just pretend we can do 2pc, so that MySQL won't
switch to 1pc.
- real work will be done in MYSQL_BIN_LOG::log_xid()
+ real work will be done in MYSQL_BIN_LOG::log_and_order()
*/
return 0;
}
@@ -1584,8 +1631,8 @@ static int binlog_commit(handlerton *hton, THD *thd, bool all)
(trans_has_no_stmt_committed(thd, all) &&
!stmt_has_updated_trans_table(thd) && stmt_has_updated_non_trans_table(thd)))
{
- Query_log_event qev(thd, STRING_WITH_LEN("COMMIT"), TRUE, TRUE, 0);
- error= binlog_end_trans(thd, trx_data, &qev, all);
+ Query_log_event end_ev(thd, STRING_WITH_LEN("COMMIT"), TRUE, TRUE, 0);
+ error= binlog_flush_trx_cache(thd, trx_data, &end_ev, all);
}
trx_data->at_least_one_stmt_committed = my_b_tell(&trx_data->trans_log) > 0;
@@ -1649,7 +1696,7 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
(thd->options & OPTION_KEEP_LOG)) &&
mysql_bin_log.check_write_error(thd))
trx_data->set_incident();
- error= binlog_end_trans(thd, trx_data, 0, all);
+ error= binlog_truncate_trx_cache(thd, trx_data, all);
}
else
{
@@ -1668,8 +1715,8 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
stmt_has_updated_non_trans_table(thd) &&
thd->current_stmt_binlog_row_based))
{
- Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, TRUE, 0);
- error= binlog_end_trans(thd, trx_data, &qev, all);
+ Query_log_event end_ev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, TRUE, 0);
+ error= binlog_flush_trx_cache(thd, trx_data, &end_ev, all);
}
/*
Otherwise, we simply truncate the cache as there is no change on
@@ -1677,7 +1724,7 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
*/
else if (ending_trans(thd, all) ||
(!(thd->options & OPTION_KEEP_LOG) && !stmt_has_updated_non_trans_table(thd)))
- error= binlog_end_trans(thd, trx_data, 0, all);
+ error= binlog_truncate_trx_cache(thd, trx_data, all);
}
if (!all)
trx_data->before_stmt_pos = MY_OFF_T_UNDEF; // part of the stmt rollback
@@ -2518,6 +2565,8 @@ const char *MYSQL_LOG::generate_name(const char *log_name,
MYSQL_BIN_LOG::MYSQL_BIN_LOG()
:bytes_written(0), prepared_xids(0), file_id(1), open_count(1),
need_start_event(TRUE),
+ group_commit_queue(0), group_commit_queue_busy(FALSE),
+ num_commits(0), num_group_commits(0),
is_relay_log(0),
description_event_for_exec(0), description_event_for_queue(0)
{
@@ -2574,6 +2623,7 @@ void MYSQL_BIN_LOG::init_pthread_objects()
(void) my_pthread_mutex_init(&LOCK_index, MY_MUTEX_INIT_SLOW, "LOCK_index",
MYF_NO_DEADLOCK_DETECTION);
(void) pthread_cond_init(&update_cond, 0);
+ (void) pthread_cond_init(&COND_queue_busy, 0);
}
@@ -2795,6 +2845,11 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
if (flush_io_cache(&log_file) ||
my_sync(log_file.file, MYF(MY_WME)))
goto err;
+ pthread_mutex_lock(&LOCK_commit_ordered);
+ strmake(last_commit_pos_file, log_file_name,
+ sizeof(last_commit_pos_file)-1);
+ last_commit_pos_offset= my_b_tell(&log_file);
+ pthread_mutex_unlock(&LOCK_commit_ordered);
if (write_file_name_to_index_file)
{
@@ -4078,6 +4133,10 @@ bool MYSQL_BIN_LOG::flush_and_sync()
{
sync_binlog_counter= 0;
err=my_sync(fd, MYF(MY_WME));
+#ifndef DBUG_OFF
+ if (opt_binlog_dbug_fsync_sleep > 0)
+ my_sleep(opt_binlog_dbug_fsync_sleep);
+#endif
}
return err;
}
@@ -4269,12 +4328,33 @@ void THD::binlog_set_stmt_begin() {
trx_data->before_stmt_pos= pos;
}
+static int
+binlog_start_consistent_snapshot(handlerton *hton, THD *thd)
+{
+ int err= 0;
+ binlog_trx_data *trx_data;
+ DBUG_ENTER("binlog_start_consistent_snapshot");
+
+ thd->binlog_setup_trx_data();
+ trx_data= (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
+
+ /* Server layer calls us with LOCK_commit_ordered locked, so this is safe. */
+ strmake(trx_data->last_commit_pos_file, mysql_bin_log.last_commit_pos_file,
+ sizeof(trx_data->last_commit_pos_file)-1);
+ trx_data->last_commit_pos_offset= mysql_bin_log.last_commit_pos_offset;
+
+ trans_register_ha(thd, TRUE, hton);
+
+ DBUG_RETURN(err);
+}
/*
- Write a table map to the binary log.
+ Write a table map to the binary log. If with_annotate != NULL and
+ *with_annotate = TRUE write also Annotate_rows before the table map.
*/
-int THD::binlog_write_table_map(TABLE *table, bool is_trans)
+int THD::binlog_write_table_map(TABLE *table, bool is_trans,
+ my_bool *with_annotate)
{
int error;
DBUG_ENTER("THD::binlog_write_table_map");
@@ -4292,7 +4372,7 @@ int THD::binlog_write_table_map(TABLE *table, bool is_trans)
if (is_trans && binlog_table_maps == 0)
binlog_start_trans_and_stmt();
- if ((error= mysql_bin_log.write(&the_event)))
+ if ((error= mysql_bin_log.write(&the_event, with_annotate)))
DBUG_RETURN(error);
binlog_table_maps++;
@@ -4376,44 +4456,48 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
if (Rows_log_event* pending= trx_data->pending())
{
- IO_CACHE *file= &log_file;
-
/*
Decide if we should write to the log file directly or to the
transaction log.
*/
if (pending->get_cache_stmt() || my_b_tell(&trx_data->trans_log))
- file= &trx_data->trans_log;
-
- /*
- If we are not writing to the log file directly, we could avoid
- locking the log.
- */
- pthread_mutex_lock(&LOCK_log);
-
- /*
- Write pending event to log file or transaction cache
- */
- if (pending->write(file))
{
- pthread_mutex_unlock(&LOCK_log);
- set_write_error(thd);
- DBUG_RETURN(1);
+ /* Write to transaction log/cache. */
+ if (pending->write(&trx_data->trans_log))
+ {
+ set_write_error(thd);
+ DBUG_RETURN(1);
+ }
}
-
- delete pending;
-
- if (file == &log_file)
+ else
{
+ /* Write directly to log file. */
+ pthread_mutex_lock(&LOCK_log);
+ if (pending->write(&log_file))
+ {
+ pthread_mutex_unlock(&LOCK_log);
+ set_write_error(thd);
+ DBUG_RETURN(1);
+ }
+
error= flush_and_sync();
if (!error)
{
signal_update();
error= rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
}
+
+ /*
+ Take mutex to protect against a reader seeing partial writes of 64-bit
+ offset on 32-bit CPUs.
+ */
+ pthread_mutex_lock(&LOCK_commit_ordered);
+ last_commit_pos_offset= my_b_tell(&log_file);
+ pthread_mutex_unlock(&LOCK_commit_ordered);
+ pthread_mutex_unlock(&LOCK_log);
}
- pthread_mutex_unlock(&LOCK_log);
+ delete pending;
}
thd->binlog_set_pending_rows_event(event);
@@ -4422,10 +4506,12 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
}
/**
- Write an event to the binary log.
+ Write an event to the binary log. If with_annotate != NULL and
+ *with_annotate = TRUE write also Annotate_rows before the event
+ (this should happen only if the event is a Table_map).
*/
-bool MYSQL_BIN_LOG::write(Log_event *event_info)
+bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
{
THD *thd= event_info->thd;
bool error= 1;
@@ -4443,11 +4529,6 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
}
/*
- Flush the pending rows event to the transaction cache or to the
- log file. Since this function potentially aquire the LOCK_log
- mutex, we do this before aquiring the LOCK_log mutex in this
- function.
-
We only end the statement if we are in a top-level statement. If
we are inside a stored function, we do not end the statement since
this will close all tables on the slave.
@@ -4457,8 +4538,6 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
if (thd->binlog_flush_pending_rows_event(end_stmt))
DBUG_RETURN(error);
- pthread_mutex_lock(&LOCK_log);
-
/*
In most cases this is only called if 'is_open()' is true; in fact this is
mostly called if is_open() *was* true a few instructions before, but it
@@ -4480,7 +4559,6 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
thd->lex->sql_command != SQLCOM_SAVEPOINT &&
!binlog_filter->db_ok(local_db)))
{
- VOID(pthread_mutex_unlock(&LOCK_log));
DBUG_RETURN(0);
}
#endif /* HAVE_REPLICATION */
@@ -4524,15 +4602,23 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
thd->binlog_start_trans_and_stmt();
file= trans_log;
}
- /*
- TODO as Mats suggested, for all the cases above where we write to
- trans_log, it sounds unnecessary to lock LOCK_log. We should rather
- test first if we want to write to trans_log, and if not, lock
- LOCK_log.
- */
}
#endif /* USING_TRANSACTIONS */
DBUG_PRINT("info",("event type: %d",event_info->get_type_code()));
+ if (file == &log_file)
+ {
+ pthread_mutex_lock(&LOCK_log);
+ /*
+ We did not want to take LOCK_log unless really necessary.
+ However, now that we hold LOCK_log, we must check is_open() again, lest
+ the log was closed just before.
+ */
+ if (unlikely(!is_open()))
+ {
+ pthread_mutex_unlock(&LOCK_log);
+ DBUG_RETURN(error);
+ }
+ }
/*
No check for auto events flag here - this write method should
@@ -4544,6 +4630,16 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
of the SQL command
*/
+ if (with_annotate && *with_annotate)
+ {
+ DBUG_ASSERT(event_info->get_type_code() == TABLE_MAP_EVENT);
+ Annotate_rows_log_event anno(thd);
+ /* Annotate event should be written not more than once */
+ *with_annotate= 0;
+ if (anno.write(file))
+ goto err;
+ }
+
/*
If row-based binlogging, Insert_id, Rand and other kind of "setting
context" events are not needed.
@@ -4556,7 +4652,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
thd->first_successful_insert_id_in_prev_stmt_for_binlog);
if (e.write(file))
- goto err;
+ goto err_unlock;
}
if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
{
@@ -4567,13 +4663,13 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
thd->auto_inc_intervals_in_cur_stmt_for_binlog.
minimum());
if (e.write(file))
- goto err;
+ goto err_unlock;
}
if (thd->rand_used)
{
Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2);
if (e.write(file))
- goto err;
+ goto err_unlock;
}
if (thd->user_var_events.elements)
{
@@ -4588,7 +4684,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
user_var_event->type,
user_var_event->charset_number);
if (e.write(file))
- goto err;
+ goto err_unlock;
}
}
}
@@ -4597,7 +4693,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
/* Write the SQL command */
if (event_info->write(file) ||
DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
- goto err;
+ goto err_unlock;
if (file == &log_file) // we are writing to the real log (disk)
{
@@ -4605,20 +4701,33 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
status_var_add(thd->status_var.binlog_bytes_written, data_written);
if (flush_and_sync())
- goto err;
+ goto err_unlock;
signal_update();
if ((error= rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED)))
- goto err;
+ goto err_unlock;
}
error=0;
+err_unlock:
+ if (file == &log_file)
+ {
+ my_off_t offset= my_b_tell(&log_file);
+ /*
+ Take mutex to protect against a reader seeing partial writes of 64-bit
+ offset on 32-bit CPUs.
+ */
+ pthread_mutex_lock(&LOCK_commit_ordered);
+ last_commit_pos_offset= offset;
+ pthread_mutex_unlock(&LOCK_commit_ordered);
+ pthread_mutex_unlock(&LOCK_log);
+ }
+
err:
if (error)
set_write_error(thd);
}
- pthread_mutex_unlock(&LOCK_log);
DBUG_RETURN(error);
}
@@ -4723,7 +4832,7 @@ int MYSQL_BIN_LOG::rotate_and_purge(uint flags)
We give it a shot and try to write an incident event anyway
to the current log.
*/
- if (!write_incident(current_thd, FALSE))
+ if (!write_incident_already_locked(current_thd))
flush_and_sync();
#ifdef HAVE_REPLICATION
@@ -4764,19 +4873,15 @@ uint MYSQL_BIN_LOG::next_file_id()
write_cache()
thd Current_thread
cache Cache to write to the binary log
- lock_log True if the LOCK_log mutex should be aquired, false otherwise
- sync_log True if the log should be flushed and sync:ed
DESCRIPTION
Write the contents of the cache to the binary log. The cache will
be reset as a READ_CACHE to be able to read the contents from it.
*/
-int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, bool lock_log,
- bool sync_log)
+int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache)
{
- Mutex_sentry sentry(lock_log ? &LOCK_log : NULL);
-
+ safe_mutex_assert_owner(&LOCK_log);
if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
return ER_ERROR_ON_WRITE;
uint length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
@@ -4888,6 +4993,8 @@ int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, bool lock_log,
}
/* Write data to the binary log file */
+ DBUG_EXECUTE_IF("fail_binlog_write_1",
+ errno= 28; return ER_ERROR_ON_WRITE;);
if (my_b_write(&log_file, cache->read_pos, length))
return ER_ERROR_ON_WRITE;
status_var_add(thd->status_var.binlog_bytes_written, length);
@@ -4897,9 +5004,6 @@ int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache, bool lock_log,
DBUG_ASSERT(carry == 0);
- if (sync_log)
- flush_and_sync();
-
return 0; // All OK
}
@@ -4932,31 +5036,50 @@ int query_error_code(THD *thd, bool not_killed)
return error;
}
-bool MYSQL_BIN_LOG::write_incident(THD *thd, bool lock)
+
+bool MYSQL_BIN_LOG::write_incident_already_locked(THD *thd)
{
uint error= 0;
- DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
-
- if (!is_open())
- DBUG_RETURN(error);
-
- LEX_STRING const write_error_msg=
- { C_STRING_WITH_LEN("error writing to the binary log") };
+ DBUG_ENTER("MYSQL_BIN_LOG::write_incident_already_locked");
Incident incident= INCIDENT_LOST_EVENTS;
Incident_log_event ev(thd, incident, write_error_msg);
- if (lock)
- pthread_mutex_lock(&LOCK_log);
- error= ev.write(&log_file);
- status_var_add(thd->status_var.binlog_bytes_written, ev.data_written);
- if (lock)
+
+ if (likely(is_open()))
+ {
+ error= ev.write(&log_file);
+ status_var_add(thd->status_var.binlog_bytes_written, ev.data_written);
+ }
+
+ DBUG_RETURN(error);
+}
+
+
+bool MYSQL_BIN_LOG::write_incident(THD *thd)
+{
+ uint error= 0;
+ my_off_t offset;
+ DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
+
+ pthread_mutex_lock(&LOCK_log);
+ if (likely(is_open()))
{
- if (!error && !(error= flush_and_sync()))
+ if (!(error= write_incident_already_locked(thd)) &&
+ !(error= flush_and_sync()))
{
signal_update();
error= rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
}
- pthread_mutex_unlock(&LOCK_log);
+ offset= my_b_tell(&log_file);
+ /*
+ Take mutex to protect against a reader seeing partial writes of 64-bit
+ offset on 32-bit CPUs.
+ */
+ pthread_mutex_lock(&LOCK_commit_ordered);
+ last_commit_pos_offset= offset;
+ pthread_mutex_unlock(&LOCK_commit_ordered);
}
+ pthread_mutex_unlock(&LOCK_log);
+
DBUG_RETURN(error);
}
@@ -4984,111 +5107,367 @@ bool MYSQL_BIN_LOG::write_incident(THD *thd, bool lock)
'cache' needs to be reinitialized after this functions returns.
*/
-bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event,
- bool incident)
+bool
+MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, binlog_trx_data *trx_data,
+ Log_event *end_ev, bool all)
+{
+ group_commit_entry entry;
+ DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog");
+
+ entry.thd= thd;
+ entry.trx_data= trx_data;
+ entry.error= 0;
+ entry.all= all;
+
+ /*
+ Log "BEGIN" at the beginning of every transaction. Here, a transaction is
+ either a BEGIN..COMMIT block or a single statement in autocommit mode.
+
+ Create the necessary events here, where we have the correct THD (and
+ thread context).
+
+ Due to group commit the actual writing to binlog may happen in a different
+ thread.
+ */
+ Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, TRUE, 0);
+ entry.begin_event= &qinfo;
+ entry.end_event= end_ev;
+ if (trx_data->has_incident())
+ {
+ Incident_log_event inc_ev(thd, INCIDENT_LOST_EVENTS, write_error_msg);
+ entry.incident_event= &inc_ev;
+ DBUG_RETURN(write_transaction_to_binlog_events(&entry));
+ }
+ else
+ {
+ entry.incident_event= NULL;
+ DBUG_RETURN(write_transaction_to_binlog_events(&entry));
+ }
+}
+
+bool
+MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry)
{
- DBUG_ENTER("MYSQL_BIN_LOG::write(THD *, IO_CACHE *, Log_event *)");
+ /*
+ To facilitate group commit for the binlog, we first queue up ourselves in
+ the group commit queue. Then the first thread to enter the queue waits for
+ the LOCK_log mutex, and commits for everyone in the queue once it gets the
+ lock. Any other threads in the queue just wait for the first one to finish
+ the commit and wake them up.
+ */
+
+ entry->thd->clear_wakeup_ready();
+ pthread_mutex_lock(&LOCK_prepare_ordered);
+ group_commit_entry *orig_queue= group_commit_queue;
+ entry->next= orig_queue;
+ group_commit_queue= entry;
+
+ if (entry->trx_data->using_xa)
+ {
+ DEBUG_SYNC(entry->thd, "commit_before_prepare_ordered");
+ run_prepare_ordered(entry->thd, entry->all);
+ DEBUG_SYNC(entry->thd, "commit_after_prepare_ordered");
+ }
+ pthread_mutex_unlock(&LOCK_prepare_ordered);
+ DEBUG_SYNC(entry->thd, "commit_after_release_LOCK_prepare_ordered");
+
+ /*
+ The first in the queue handle group commit for all; the others just wait
+ to be signalled when group commit is done.
+ */
+ if (orig_queue != NULL)
+ entry->thd->wait_for_wakeup_ready();
+ else
+ trx_group_commit_leader(entry);
+
+ if (!opt_optimize_thread_scheduling)
+ {
+ /* For the leader, trx_group_commit_leader() already took the lock. */
+ if (orig_queue != NULL)
+ pthread_mutex_lock(&LOCK_commit_ordered);
+
+ DEBUG_SYNC(entry->thd, "commit_loop_entry_commit_ordered");
+ ++num_commits;
+ if (entry->trx_data->using_xa && !entry->error)
+ run_commit_ordered(entry->thd, entry->all);
+
+ group_commit_entry *next= entry->next;
+ if (!next)
+ {
+ group_commit_queue_busy= FALSE;
+ pthread_cond_signal(&COND_queue_busy);
+ DEBUG_SYNC(entry->thd, "commit_after_group_run_commit_ordered");
+ }
+ pthread_mutex_unlock(&LOCK_commit_ordered);
+
+ if (next)
+ {
+ next->thd->signal_wakeup_ready();
+ }
+ }
+
+ if (likely(!entry->error))
+ return 0;
+
+ switch (entry->error)
+ {
+ case ER_ERROR_ON_WRITE:
+ my_error(ER_ERROR_ON_WRITE, MYF(ME_NOREFRESH), name, entry->commit_errno);
+ break;
+ case ER_ERROR_ON_READ:
+ my_error(ER_ERROR_ON_READ, MYF(ME_NOREFRESH),
+ entry->trx_data->trans_log.file_name, entry->commit_errno);
+ break;
+ default:
+ /*
+ There are not (and should not be) any errors thrown not covered above.
+ But just in case one is added later without updating the above switch
+ statement, include a catch-all.
+ */
+ my_printf_error(entry->error,
+ "Error writing transaction to binary log: %d",
+ MYF(ME_NOREFRESH), entry->error);
+ }
+
+ /*
+ Since we return error, this transaction XID will not be committed, so
+ we need to mark it as not needed for recovery (unlog() is not called
+ for a transaction if log_xid() fails).
+ */
+ if (entry->trx_data->using_xa && entry->trx_data->xa_xid)
+ mark_xid_done();
+
+ return 1;
+}
+
+/*
+ Do binlog group commit as the lead thread.
+
+ This must be called when this thread/transaction is queued at the start of
+ the group_commit_queue. It will wait to obtain the LOCK_log mutex, then group
+ commit all the transactions in the queue (more may have entered while waiting
+ for LOCK_log). After commit is done, all other threads in the queue will be
+ signalled.
+
+ */
+void
+MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
+{
+ uint xid_count= 0;
+ uint write_count= 0;
+ my_off_t commit_offset;
+ group_commit_entry *current;
+ group_commit_entry *last_in_queue;
+ DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader");
+ LINT_INIT(commit_offset);
+
+ /*
+ Lock the LOCK_log(), and once we get it, collect any additional writes
+ that queued up while we were waiting.
+ */
VOID(pthread_mutex_lock(&LOCK_log));
+ DEBUG_SYNC(leader->thd, "commit_after_get_LOCK_log");
- /* NULL would represent nothing to replicate after ROLLBACK */
- DBUG_ASSERT(commit_event != NULL);
+ pthread_mutex_lock(&LOCK_prepare_ordered);
+ current= group_commit_queue;
+ group_commit_queue= NULL;
+ pthread_mutex_unlock(&LOCK_prepare_ordered);
+ /* As the queue is in reverse order of entering, reverse it. */
+ group_commit_entry *queue= NULL;
+ last_in_queue= current;
+ while (current)
+ {
+ group_commit_entry *next= current->next;
+ current->next= queue;
+ queue= current;
+ current= next;
+ }
+ DBUG_ASSERT(leader == queue /* the leader should be first in queue */);
+
+ /* Now we have in queue the list of transactions to be committed in order. */
DBUG_ASSERT(is_open());
if (likely(is_open())) // Should always be true
{
/*
- We only bother to write to the binary log if there is anything
- to write.
- */
- if (my_b_tell(cache) > 0)
+ Commit every transaction in the queue.
+
+ Note that we are doing this in a different thread than the one running
+ the transaction! So we are limited in the operations we can do. In
+ particular, we cannot call my_error() on behalf of a transaction, as
+ that obtains the THD from thread local storage. Instead, we must set
+ current->error and let the thread do the error reporting itself once
+ we wake it up.
+ */
+ for (current= queue; current != NULL; current= current->next)
{
- /*
- Log "BEGIN" at the beginning of every transaction. Here, a
- transaction is either a BEGIN..COMMIT block or a single
- statement in autocommit mode.
- */
- Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE, TRUE, 0);
+ binlog_trx_data *trx_data= current->trx_data;
+ IO_CACHE *cache= &trx_data->trans_log;
/*
- Now this Query_log_event has artificial log_pos 0. It must be
- adjusted to reflect the real position in the log. Not doing it
- would confuse the slave: it would prevent this one from
- knowing where he is in the master's binlog, which would result
- in wrong positions being shown to the user, MASTER_POS_WAIT
- undue waiting etc.
+ We only bother to write to the binary log if there is anything
+ to write.
*/
- if (qinfo.write(&log_file))
- goto err;
- status_var_add(thd->status_var.binlog_bytes_written, qinfo.data_written);
-
- DBUG_EXECUTE_IF("crash_before_writing_xid",
- {
- if ((write_error= write_cache(thd, cache, FALSE,
- TRUE)))
- DBUG_PRINT("info", ("error writing binlog cache: %d",
- write_error));
- DBUG_PRINT("info", ("crashing before writing xid"));
- DBUG_SUICIDE();
- });
-
- if ((write_error= write_cache(thd, cache, FALSE, FALSE)))
- goto err;
-
- if (commit_event)
+ if (my_b_tell(cache) > 0)
{
- if (commit_event->write(&log_file))
- goto err;
- status_var_add(thd->status_var.binlog_bytes_written,
- commit_event->data_written);
+ if ((current->error= write_transaction(current)))
+ current->commit_errno= errno;
+
+ write_count++;
}
- if (incident && write_incident(thd, FALSE))
- goto err;
+ strmake(trx_data->last_commit_pos_file, log_file_name,
+ sizeof(trx_data->last_commit_pos_file)-1);
+ commit_offset= my_b_write_tell(&log_file);
+ trx_data->last_commit_pos_offset= commit_offset;
+ if (trx_data->using_xa && trx_data->xa_xid)
+ xid_count++;
+ }
+ if (write_count > 0)
+ {
if (flush_and_sync())
- goto err;
- DBUG_EXECUTE_IF("half_binlogged_transaction", DBUG_SUICIDE(););
- if (cache->error) // Error on read
{
- sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno);
- write_error=1; // Don't give more errors
- goto err;
+ for (current= queue; current != NULL; current= current->next)
+ {
+ if (!current->error)
+ {
+ current->error= ER_ERROR_ON_WRITE;
+ current->commit_errno= errno;
+ }
+ }
+ }
+ else
+ {
+ signal_update();
}
- signal_update();
}
/*
- if commit_event is Xid_log_event, increase the number of
- prepared_xids (it's decreasd in ::unlog()). Binlog cannot be rotated
+ if any commit_events are Xid_log_event, increase the number of
+ prepared_xids (it's decreased in ::unlog()). Binlog cannot be rotated
if there're prepared xids in it - see the comment in new_file() for
an explanation.
- If the commit_event is not Xid_log_event (then it's a Query_log_event)
- rotate binlog, if necessary.
+ If no Xid_log_events (then it's all Query_log_event) rotate binlog,
+ if necessary.
*/
- if (commit_event && commit_event->get_type_code() == XID_EVENT)
+ if (xid_count > 0)
{
- pthread_mutex_lock(&LOCK_prep_xids);
- prepared_xids++;
- pthread_mutex_unlock(&LOCK_prep_xids);
+ mark_xids_active(xid_count);
}
else
+ {
if (rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED))
- goto err;
+ {
+ /*
+ If we fail to rotate, which thread should get the error?
+ We give the error to the *last* transaction thread; that seems to
+ make the most sense, as it was the last to write to the log.
+ */
+ last_in_queue->error= ER_ERROR_ON_WRITE;
+ last_in_queue->commit_errno= errno;
+ }
+ }
}
- VOID(pthread_mutex_unlock(&LOCK_log));
- DBUG_RETURN(0);
+ DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered");
+ pthread_mutex_lock(&LOCK_commit_ordered);
+ last_commit_pos_offset= commit_offset;
+ /*
+ We cannot unlock LOCK_log until we have locked LOCK_commit_ordered;
+ otherwise scheduling could allow the next group commit to run ahead of us,
+ messing up the order of commit_ordered() calls. But as soon as
+ LOCK_commit_ordered is obtained, we can let the next group commit start.
+ */
+ pthread_mutex_unlock(&LOCK_log);
+ DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_log");
+ ++num_group_commits;
-err:
- if (!write_error)
+ if (!opt_optimize_thread_scheduling)
{
- write_error= 1;
- sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
+ /*
+ If we want to run commit_ordered() each in the transaction's own thread
+ context, then we need to mark the queue reserved; we need to finish all
+ threads in one group commit before the next group commit can be allowed
+ to proceed, and we cannot unlock a simple pthreads mutex in a different
+ thread from the one that locked it.
+ */
+
+ while (group_commit_queue_busy)
+ pthread_cond_wait(&COND_queue_busy, &LOCK_commit_ordered);
+ group_commit_queue_busy= TRUE;
+
+ /* Note that we return with LOCK_commit_ordered locked! */
+ DBUG_VOID_RETURN;
}
- VOID(pthread_mutex_unlock(&LOCK_log));
- DBUG_RETURN(1);
+
+ /*
+ Wakeup each participant waiting for our group commit, first calling the
+ commit_ordered() methods for any transactions doing 2-phase commit.
+ */
+ current= queue;
+ while (current != NULL)
+ {
+ group_commit_entry *next;
+
+ DEBUG_SYNC(leader->thd, "commit_loop_entry_commit_ordered");
+ ++num_commits;
+ if (current->trx_data->using_xa && !current->error)
+ run_commit_ordered(current->thd, current->all);
+
+ /*
+ Careful not to access current->next after waking up the other thread! As
+ it may change immediately after wakeup.
+ */
+ next= current->next;
+ if (current != leader) // Don't wake up ourself
+ current->thd->signal_wakeup_ready();
+ current= next;
+ }
+ DEBUG_SYNC(leader->thd, "commit_after_group_run_commit_ordered");
+ pthread_mutex_unlock(&LOCK_commit_ordered);
+
+ DBUG_VOID_RETURN;
}
+int
+MYSQL_BIN_LOG::write_transaction(group_commit_entry *entry)
+{
+ binlog_trx_data *trx_data= entry->trx_data;
+ IO_CACHE *cache= &trx_data->trans_log;
+
+ if (entry->begin_event->write(&log_file))
+ return ER_ERROR_ON_WRITE;
+ status_var_add(entry->thd->status_var.binlog_bytes_written,
+ entry->begin_event->data_written);
+
+ DBUG_EXECUTE_IF("crash_before_writing_xid",
+ {
+ if ((write_cache(entry->thd, cache)))
+ DBUG_PRINT("info", ("error writing binlog cache"));
+ else
+ flush_and_sync();
+
+ DBUG_PRINT("info", ("crashing before writing xid"));
+ DBUG_SUICIDE();
+ });
+
+ if (write_cache(entry->thd, cache))
+ return ER_ERROR_ON_WRITE;
+
+ if (entry->end_event->write(&log_file))
+ return ER_ERROR_ON_WRITE;
+ status_var_add(entry->thd->status_var.binlog_bytes_written,
+ entry->end_event->data_written);
+
+ if (entry->incident_event && entry->incident_event->write(&log_file))
+ return ER_ERROR_ON_WRITE;
+
+ if (cache->error) // Error on read
+ return ER_ERROR_ON_READ;
+
+ return 0;
+}
/**
Wait until we get a signal that the binary log has been updated.
@@ -5467,6 +5846,171 @@ void sql_print_information(const char *format, ...)
}
+void
+TC_init()
+{
+ my_pthread_mutex_init(&LOCK_prepare_ordered, MY_MUTEX_INIT_SLOW,
+ "LOCK_prepare_ordered", MYF(0));
+ my_pthread_mutex_init(&LOCK_commit_ordered, MY_MUTEX_INIT_SLOW,
+ "LOCK_commit_ordered", MYF(0));
+ mutexes_inited= TRUE;
+}
+
+
+void
+TC_destroy()
+{
+ if (mutexes_inited)
+ {
+ pthread_mutex_destroy(&LOCK_prepare_ordered);
+ pthread_mutex_destroy(&LOCK_commit_ordered);
+ mutexes_inited= FALSE;
+ }
+}
+
+
+void
+TC_LOG::run_prepare_ordered(THD *thd, bool all)
+{
+ Ha_trx_info *ha_info=
+ all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
+
+ safe_mutex_assert_owner(&LOCK_prepare_ordered);
+ for (; ha_info; ha_info= ha_info->next())
+ {
+ handlerton *ht= ha_info->ht();
+ if (!ht->prepare_ordered)
+ continue;
+ ht->prepare_ordered(ht, thd, all);
+ }
+}
+
+
+void
+TC_LOG::run_commit_ordered(THD *thd, bool all)
+{
+ Ha_trx_info *ha_info=
+ all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
+
+ safe_mutex_assert_owner(&LOCK_commit_ordered);
+ for (; ha_info; ha_info= ha_info->next())
+ {
+ handlerton *ht= ha_info->ht();
+ if (!ht->commit_ordered)
+ continue;
+ ht->commit_ordered(ht, thd, all);
+ DEBUG_SYNC(thd, "commit_after_run_commit_ordered");
+ }
+}
+
+
+int TC_LOG_MMAP::log_and_order(THD *thd, my_xid xid, bool all,
+ bool need_prepare_ordered,
+ bool need_commit_ordered)
+{
+ int cookie;
+ struct commit_entry entry;
+ bool is_group_commit_leader;
+ LINT_INIT(is_group_commit_leader);
+
+ if (need_prepare_ordered)
+ {
+ pthread_mutex_lock(&LOCK_prepare_ordered);
+ run_prepare_ordered(thd, all);
+ if (need_commit_ordered)
+ {
+ /*
+ Must put us in queue so we can run_commit_ordered() in same sequence
+ as we did run_prepare_ordered().
+ */
+ thd->clear_wakeup_ready();
+ entry.thd= thd;
+ commit_entry *previous_queue= commit_ordered_queue;
+ entry.next= previous_queue;
+ commit_ordered_queue= &entry;
+ is_group_commit_leader= (previous_queue == NULL);
+ }
+ pthread_mutex_unlock(&LOCK_prepare_ordered);
+ }
+
+ cookie= 0;
+ if (xid)
+ cookie= log_one_transaction(xid);
+
+ if (need_commit_ordered)
+ {
+ if (need_prepare_ordered)
+ {
+ /*
+ We did the run_prepare_ordered() serialised, then ran the log_xid() in
+ parallel. Now we have to do run_commit_ordered() serialised in the
+ same sequence as run_prepare_ordered().
+
+ We do this starting from the head of the queue, each thread doing
+ run_commit_ordered() and signalling the next in queue.
+ */
+ if (is_group_commit_leader)
+ {
+ /* The first in queue starts the ball rolling. */
+ pthread_mutex_lock(&LOCK_prepare_ordered);
+ while (commit_ordered_queue_busy)
+ pthread_cond_wait(&COND_queue_busy, &LOCK_prepare_ordered);
+ commit_entry *queue= commit_ordered_queue;
+ commit_ordered_queue= NULL;
+ /*
+ Mark the queue busy while we bounce it from one thread to the
+ next.
+ */
+ commit_ordered_queue_busy= true;
+ pthread_mutex_unlock(&LOCK_prepare_ordered);
+
+ /* Reverse the queue list so we get correct order. */
+ commit_entry *prev= NULL;
+ while (queue)
+ {
+ commit_entry *next= queue->next;
+ queue->next= prev;
+ prev= queue;
+ queue= next;
+ }
+ DBUG_ASSERT(prev == &entry && prev->thd == thd);
+ }
+ else
+ {
+ /* Not first in queue; just wait until previous thread wakes us up. */
+ thd->wait_for_wakeup_ready();
+ }
+ }
+
+ /* Only run commit_ordered() if log_xid was successful. */
+ if (cookie)
+ {
+ pthread_mutex_lock(&LOCK_commit_ordered);
+ run_commit_ordered(thd, all);
+ pthread_mutex_unlock(&LOCK_commit_ordered);
+ }
+
+ if (need_prepare_ordered)
+ {
+ commit_entry *next= entry.next;
+ if (next)
+ {
+ next->thd->signal_wakeup_ready();
+ }
+ else
+ {
+ pthread_mutex_lock(&LOCK_prepare_ordered);
+ commit_ordered_queue_busy= false;
+ pthread_cond_signal(&COND_queue_busy);
+ pthread_mutex_unlock(&LOCK_prepare_ordered);
+ }
+ }
+ }
+
+ return cookie;
+}
+
+
/********* transaction coordinator log for 2pc - mmap() based solution *******/
/*
@@ -5603,6 +6147,7 @@ int TC_LOG_MMAP::open(const char *opt_name)
pthread_mutex_init(&LOCK_pool, MY_MUTEX_INIT_FAST);
pthread_cond_init(&COND_active, 0);
pthread_cond_init(&COND_pool, 0);
+ pthread_cond_init(&COND_queue_busy, 0);
inited=6;
@@ -5610,6 +6155,8 @@ int TC_LOG_MMAP::open(const char *opt_name)
active=pages;
pool=pages+1;
pool_last=pages+npages-1;
+ commit_ordered_queue= NULL;
+ commit_ordered_queue_busy= false;
return 0;
@@ -5715,7 +6262,7 @@ int TC_LOG_MMAP::overflow()
to the position in memory where xid was logged to.
*/
-int TC_LOG_MMAP::log_xid(THD *thd, my_xid xid)
+int TC_LOG_MMAP::log_one_transaction(my_xid xid)
{
int err;
PAGE *p;
@@ -5885,6 +6432,8 @@ void TC_LOG_MMAP::close()
pthread_mutex_destroy(&LOCK_active);
pthread_mutex_destroy(&LOCK_pool);
pthread_cond_destroy(&COND_pool);
+ pthread_cond_destroy(&COND_active);
+ pthread_cond_destroy(&COND_queue_busy);
case 5:
data[0]='A'; // garble the first (signature) byte, in case my_delete fails
case 4:
@@ -6093,39 +6642,98 @@ void TC_LOG_BINLOG::close()
pthread_cond_destroy (&COND_prep_xids);
}
-/**
- @todo
- group commit
-
- @retval
- 0 error
- @retval
- 1 success
+/*
+ Do a binlog log_xid() for a group of transactions, linked through
+ thd->next_commit_ordered.
*/
-int TC_LOG_BINLOG::log_xid(THD *thd, my_xid xid)
+int
+TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all,
+ bool need_prepare_ordered __attribute__((unused)),
+ bool need_commit_ordered __attribute__((unused)))
{
- DBUG_ENTER("TC_LOG_BINLOG::log");
- Xid_log_event xle(thd, xid);
- binlog_trx_data *trx_data=
+ int err;
+ DBUG_ENTER("TC_LOG_BINLOG::log_and_order");
+
+ binlog_trx_data *const trx_data=
(binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
- /*
- We always commit the entire transaction when writing an XID. Also
- note that the return value is inverted.
- */
- DBUG_RETURN(!binlog_end_trans(thd, trx_data, &xle, TRUE));
+
+ trx_data->using_xa= TRUE;
+ trx_data->xa_xid= xid;
+ if (xid)
+ {
+ Xid_log_event xid_event(thd, xid);
+ err= binlog_flush_trx_cache(thd, trx_data, &xid_event, all);
+ }
+ else
+ {
+ /*
+ Empty xid occurs in XA COMMIT ... ONE PHASE.
+ In this case, we do not have a MySQL xid for the transaction, and the
+ external XA transaction coordinator will have to handle recovery if
+ needed. So we end the transaction with a plain COMMIT query event.
+ */
+ Query_log_event end_event(thd, STRING_WITH_LEN("COMMIT"), TRUE, TRUE, 0);
+ err= binlog_flush_trx_cache(thd, trx_data, &end_event, all);
+ }
+
+ DEBUG_SYNC(thd, "binlog_after_log_and_order");
+
+ DBUG_RETURN(!err);
}
-int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
+/*
+ After an XID is logged, we need to hold on to the current binlog file until
+ it is fully committed in the storage engine. The reason is that crash
+ recovery only looks at the latest binlog, so we must make sure there are no
+ outstanding prepared (but not committed) transactions before rotating the
+ binlog.
+
+ To handle this, we keep a count of outstanding XIDs. This function is used
+ to increase this count when committing one or more transactions to the
+ binary log.
+*/
+void
+TC_LOG_BINLOG::mark_xids_active(uint xid_count)
{
- DBUG_ENTER("TC_LOG_BINLOG::unlog");
+ DBUG_ENTER("TC_LOG_BINLOG::mark_xids_active");
+ DBUG_PRINT("info", ("xid_count=%u", xid_count));
+ pthread_mutex_lock(&LOCK_prep_xids);
+ prepared_xids+= xid_count;
+ pthread_mutex_unlock(&LOCK_prep_xids);
+ DBUG_VOID_RETURN;
+}
+
+/*
+ Once an XID is committed, it is safe to rotate the binary log, as it can no
+ longer be needed during crash recovery.
+
+ This function is called to mark an XID this way. It needs to decrease the
+ count of pending XIDs, and signal the log rotator thread when it reaches zero.
+*/
+void
+TC_LOG_BINLOG::mark_xid_done()
+{
+ my_bool send_signal;
+
+ DBUG_ENTER("TC_LOG_BINLOG::mark_xid_done");
pthread_mutex_lock(&LOCK_prep_xids);
DBUG_ASSERT(prepared_xids > 0);
- if (--prepared_xids == 0) {
+ send_signal= !--prepared_xids;
+ pthread_mutex_unlock(&LOCK_prep_xids);
+ if (send_signal) {
DBUG_PRINT("info", ("prepared_xids=%lu", prepared_xids));
pthread_cond_signal(&COND_prep_xids);
}
- pthread_mutex_unlock(&LOCK_prep_xids);
- DBUG_RETURN(rotate_and_purge(0)); // as ::write() did not rotate
+ DBUG_VOID_RETURN;
+}
+
+int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
+{
+ DBUG_ENTER("TC_LOG_BINLOG::unlog");
+ if (xid)
+ mark_xid_done();
+ /* As ::write_transaction_to_binlog() did not rotate, do it here. */
+ DBUG_RETURN(rotate_and_purge(0));
}
int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle)
@@ -6194,9 +6802,131 @@ ulonglong mysql_bin_log_file_pos(void)
{
return (ulonglong) mysql_bin_log.get_log_file()->pos_in_file;
}
+/*
+ Get the current position of the MySQL binlog for transaction currently being
+ committed.
+
+ This is valid to call from within storage engine commit_ordered() and
+ commit() methods only.
+
+ Since it stores the position inside THD, it is safe to call without any
+ locking.
+*/
+void
+mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file)
+{
+ binlog_trx_data *const trx_data=
+ (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
+ if (trx_data)
+ {
+ *out_file= trx_data->last_commit_pos_file;
+ *out_pos= (ulonglong)(trx_data->last_commit_pos_offset);
+ }
+ else
+ {
+ *out_file= NULL;
+ *out_pos= 0;
+ }
+}
#endif /* INNODB_COMPATIBILITY_HOOKS */
+static int show_binlog_vars(THD *thd, SHOW_VAR *var, char *buff)
+{
+ mysql_bin_log.set_status_variables(thd);
+ var->type= SHOW_ARRAY;
+ var->value= (char *)&binlog_status_vars_detail;
+ return 0;
+}
+
+static SHOW_VAR binlog_status_vars_top[]= {
+ {"binlog", (char *) &show_binlog_vars, SHOW_FUNC},
+ {NullS, NullS, SHOW_LONG}
+};
+
+static MYSQL_SYSVAR_BOOL(
+ optimize_thread_scheduling,
+ opt_optimize_thread_scheduling,
+ PLUGIN_VAR_READONLY,
+ "Run fast part of group commit in a single thread, to optimize kernel "
+ "thread scheduling. On by default. Disable to run each transaction in group "
+ "commit in its own thread, which can be slower at very high concurrency. "
+ "This option is mostly for testing one algorithm versus the other, and it "
+ "should not normally be necessary to change it.",
+ NULL,
+ NULL,
+ 1);
+
+#ifndef DBUG_OFF
+static MYSQL_SYSVAR_ULONG(
+ dbug_fsync_sleep,
+ opt_binlog_dbug_fsync_sleep,
+ PLUGIN_VAR_RQCMDARG,
+ "Extra sleep (in microseconds) to add to binlog fsync(), for debugging",
+ NULL,
+ NULL,
+ 0,
+ 0,
+ ULONG_MAX,
+ 0);
+#endif
+
+static struct st_mysql_sys_var *binlog_sys_vars[]=
+{
+ MYSQL_SYSVAR(optimize_thread_scheduling),
+#ifndef DBUG_OFF
+ MYSQL_SYSVAR(dbug_fsync_sleep),
+#endif
+ NULL
+};
+
+
+/*
+ Copy out the non-directory part of binlog position filename for the
+ `binlog_snapshot_file' status variable, same way as it is done for
+ SHOW MASTER STATUS.
+*/
+static void
+set_binlog_snapshot_file(const char *src)
+{
+ int dir_len = dirname_length(src);
+ strmake(binlog_snapshot_file, src + dir_len, sizeof(binlog_snapshot_file)-1);
+}
+
+/*
+ Copy out current values of status variables, for SHOW STATUS or
+ information_schema.global_status.
+
+ This is called only under LOCK_status, so we can fill in a static array.
+*/
+void
+TC_LOG_BINLOG::set_status_variables(THD *thd)
+{
+ binlog_trx_data *trx_data;
+
+ if (thd)
+ trx_data= (binlog_trx_data*) thd_get_ha_data(thd, binlog_hton);
+ else
+ trx_data= NULL;
+
+ bool have_snapshot= (trx_data && trx_data->last_commit_pos_file[0] != 0);
+ pthread_mutex_lock(&LOCK_commit_ordered);
+ binlog_status_var_num_commits= this->num_commits;
+ binlog_status_var_num_group_commits= this->num_group_commits;
+ if (!have_snapshot)
+ {
+ set_binlog_snapshot_file(last_commit_pos_file);
+ binlog_snapshot_position= last_commit_pos_offset;
+ }
+ pthread_mutex_unlock(&LOCK_commit_ordered);
+
+ if (have_snapshot)
+ {
+ set_binlog_snapshot_file(trx_data->last_commit_pos_file);
+ binlog_snapshot_position= trx_data->last_commit_pos_offset;
+ }
+}
+
struct st_mysql_storage_engine binlog_storage_engine=
{ MYSQL_HANDLERTON_INTERFACE_VERSION };
@@ -6211,8 +6941,8 @@ mysql_declare_plugin(binlog)
binlog_init, /* Plugin Init */
NULL, /* Plugin Deinit */
0x0100 /* 1.0 */,
- NULL, /* status variables */
- NULL, /* system variables */
+ binlog_status_vars_top, /* status variables */
+ binlog_sys_vars, /* system variables */
NULL /* config options */
}
mysql_declare_plugin_end;
@@ -6227,8 +6957,8 @@ maria_declare_plugin(binlog)
binlog_init, /* Plugin Init */
NULL, /* Plugin Deinit */
0x0100 /* 1.0 */,
- NULL, /* status variables */
- NULL, /* system variables */
+ binlog_status_vars_top, /* status variables */
+ binlog_sys_vars, /* system variables */
"1.0", /* string version */
MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
}
diff --git a/sql/log.h b/sql/log.h
index df475268cc3..503e94df981 100644
--- a/sql/log.h
+++ b/sql/log.h
@@ -38,17 +38,58 @@ class TC_LOG
virtual int open(const char *opt_name)=0;
virtual void close()=0;
- virtual int log_xid(THD *thd, my_xid xid)=0;
+ virtual int log_and_order(THD *thd, my_xid xid, bool all,
+ bool need_prepare_ordered,
+ bool need_commit_ordered) = 0;
virtual int unlog(ulong cookie, my_xid xid)=0;
+
+protected:
+ /*
+ These methods are meant to be invoked from log_and_order() implementations
+ to run any prepare_ordered() respectively commit_ordered() methods in
+ participating handlers.
+
+ They must be called using suitable thread syncronisation to ensure that
+ they are each called in the correct commit order among all
+ transactions. However, it is only necessary to call them if the
+ corresponding flag passed to log_and_order is set (it is safe, but not
+ required, to call them when the flag is false).
+
+ The caller must be holding LOCK_prepare_ordered respectively
+ LOCK_commit_ordered when calling these methods.
+ */
+ void run_prepare_ordered(THD *thd, bool all);
+ void run_commit_ordered(THD *thd, bool all);
};
+/*
+ Locks used to ensure serialised execution of TC_LOG::run_prepare_ordered()
+ and TC_LOG::run_commit_ordered(), or any other code that calls handler
+ prepare_ordered() or commit_ordered() methods.
+*/
+extern pthread_mutex_t LOCK_prepare_ordered;
+extern pthread_mutex_t LOCK_commit_ordered;
+
+extern void TC_init();
+extern void TC_destroy();
+
class TC_LOG_DUMMY: public TC_LOG // use it to disable the logging
{
public:
TC_LOG_DUMMY() {}
int open(const char *opt_name) { return 0; }
void close() { }
- int log_xid(THD *thd, my_xid xid) { return 1; }
+ /*
+ TC_LOG_DUMMY is only used when there are <= 1 XA-capable engines, and we
+ only use internal XA during commit when >= 2 XA-capable engines
+ participate.
+ */
+ int log_and_order(THD *thd, my_xid xid, bool all,
+ bool need_prepare_ordered, bool need_commit_ordered)
+ {
+ DBUG_ASSERT(0 /* Internal error - TC_LOG_DUMMY::log_and_order() called */);
+ return 1;
+ }
int unlog(ulong cookie, my_xid xid) { return 0; }
};
@@ -74,6 +115,13 @@ class TC_LOG_MMAP: public TC_LOG
pthread_cond_t cond; // to wait for a sync
} PAGE;
+ /* List of THDs for which to invoke commit_ordered(), in order. */
+ struct commit_entry
+ {
+ struct commit_entry *next;
+ THD *thd;
+ };
+
char logname[FN_REFLEN];
File fd;
my_off_t file_length;
@@ -88,16 +136,38 @@ class TC_LOG_MMAP: public TC_LOG
*/
pthread_mutex_t LOCK_active, LOCK_pool, LOCK_sync;
pthread_cond_t COND_pool, COND_active;
+ /*
+ Queue of threads that need to call commit_ordered().
+ Access to this queue must be protected by LOCK_prepare_ordered.
+ */
+ commit_entry *commit_ordered_queue;
+ /*
+ This flag and condition is used to reserve the queue while threads in it
+ each run the commit_ordered() methods one after the other. Only once the
+ last commit_ordered() in the queue is done can we start on a new queue
+ run.
+
+ Since we start this process in the first thread in the queue and finish in
+ the last (and possibly different) thread, we need a condition variable for
+ this (we cannot unlock a mutex in a different thread than the one who
+ locked it).
+
+ The condition is used together with the LOCK_prepare_ordered mutex.
+ */
+ my_bool commit_ordered_queue_busy;
+ pthread_cond_t COND_queue_busy;
public:
TC_LOG_MMAP(): inited(0) {}
int open(const char *opt_name);
void close();
- int log_xid(THD *thd, my_xid xid);
+ int log_and_order(THD *thd, my_xid xid, bool all,
+ bool need_prepare_ordered, bool need_commit_ordered);
int unlog(ulong cookie, my_xid xid);
int recover();
private:
+ int log_one_transaction(my_xid xid);
void get_active_from_pool();
int sync();
int overflow();
@@ -232,9 +302,31 @@ private:
time_t last_time;
};
+class binlog_trx_data;
class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
{
private:
+ struct group_commit_entry
+ {
+ struct group_commit_entry *next;
+ THD *thd;
+ binlog_trx_data *trx_data;
+ /*
+ Extra events (BEGIN, COMMIT/ROLLBACK/XID, and possibly INCIDENT) to be
+ written during group commit. The incident_event is only valid if
+ trx_data->has_incident() is true.
+ */
+ Log_event *begin_event;
+ Log_event *end_event;
+ Log_event *incident_event;
+ /* Set during group commit to record any per-thread error. */
+ int error;
+ int commit_errno;
+ /* This is the `all' parameter for ha_commit_ordered(). */
+ bool all;
+ /* True if we come in through XA log_and_order(), false otherwise. */
+ };
+
/* LOCK_log and LOCK_index are inited by init_pthread_objects() */
pthread_mutex_t LOCK_index;
pthread_mutex_t LOCK_prep_xids;
@@ -276,6 +368,20 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
In 5.0 it's 0 for relay logs too!
*/
bool no_auto_events;
+ /* Queue of transactions queued up to participate in group commit. */
+ group_commit_entry *group_commit_queue;
+ /*
+ Condition variable to mark that the group commit queue is busy.
+ Used when each thread does it's own commit_ordered() (when
+ binlog_optimize_thread_scheduling=1).
+ Used with the LOCK_commit_ordered mutex.
+ */
+ my_bool group_commit_queue_busy;
+ pthread_cond_t COND_queue_busy;
+ /* Total number of committed transactions. */
+ ulonglong num_commits;
+ /* Number of group commits done. */
+ ulonglong num_group_commits;
int write_to_file(IO_CACHE *cache);
/*
@@ -285,6 +391,11 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
*/
int new_file_without_locking();
int new_file_impl(bool need_lock);
+ int write_transaction(group_commit_entry *entry);
+ bool write_transaction_to_binlog_events(group_commit_entry *entry);
+ void trx_group_commit_leader(group_commit_entry *leader);
+ void mark_xid_done();
+ void mark_xids_active(uint xid_count);
public:
MYSQL_LOG::generate_name;
@@ -303,6 +414,12 @@ public:
*/
Format_description_log_event *description_event_for_exec,
*description_event_for_queue;
+ /*
+ Binlog position of last commit (or non-transactional write) to the binlog.
+ Access to this is protected by LOCK_commit_ordered.
+ */
+ char last_commit_pos_file[FN_REFLEN];
+ my_off_t last_commit_pos_offset;
MYSQL_BIN_LOG();
/*
@@ -313,7 +430,8 @@ public:
int open(const char *opt_name);
void close();
- int log_xid(THD *thd, my_xid xid);
+ int log_and_order(THD *thd, my_xid xid, bool all,
+ bool need_prepare_ordered, bool need_commit_ordered);
int unlog(ulong cookie, my_xid xid);
int recover(IO_CACHE *log, Format_description_log_event *fdle);
#if !defined(MYSQL_CLIENT)
@@ -357,11 +475,14 @@ public:
int new_file();
void reset_gathered_updates(THD *thd);
- bool write(Log_event* event_info); // binary log write
- bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident);
- bool write_incident(THD *thd, bool lock);
- int write_cache(THD *thd, IO_CACHE *cache, bool lock_log,
- bool flush_and_sync);
+ bool write(Log_event* event_info,
+ my_bool *with_annotate= 0); // binary log write
+ bool write_transaction_to_binlog(THD *thd, binlog_trx_data *trx_data,
+ Log_event *end_ev, bool all);
+
+ bool write_incident_already_locked(THD *thd);
+ bool write_incident(THD *thd);
+ int write_cache(THD *thd, IO_CACHE *cache);
void set_write_error(THD *thd);
bool check_write_error(THD *thd);
@@ -416,6 +537,7 @@ public:
inline void unlock_index() { pthread_mutex_unlock(&LOCK_index);}
inline IO_CACHE *get_index_file() { return &index_file;}
inline uint32 get_open_count() { return open_count; }
+ void set_status_variables(THD *thd);
};
class Log_event_handler
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 95cf853d9ff..0259ff762ad 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -649,6 +649,7 @@ const char* Log_event::get_type_str(Log_event_type type)
case BEGIN_LOAD_QUERY_EVENT: return "Begin_load_query";
case EXECUTE_LOAD_QUERY_EVENT: return "Execute_load_query";
case INCIDENT_EVENT: return "Incident";
+ case ANNOTATE_ROWS_EVENT: return "Annotate_rows";
default: return "Unknown"; /* impossible */
}
}
@@ -728,7 +729,7 @@ Log_event::Log_event(const char* buf,
logs are in 4.0 format, until it finds a Format_desc).
*/
if (description_event->binlog_version==3 &&
- buf[EVENT_TYPE_OFFSET]<FORMAT_DESCRIPTION_EVENT && log_pos)
+ (uchar)buf[EVENT_TYPE_OFFSET]<FORMAT_DESCRIPTION_EVENT && log_pos)
{
/*
If log_pos=0, don't change it. log_pos==0 is a marker to mean
@@ -746,8 +747,8 @@ Log_event::Log_event(const char* buf,
DBUG_PRINT("info", ("log_pos: %lu", (ulong) log_pos));
flags= uint2korr(buf + FLAGS_OFFSET);
- if ((buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT) ||
- (buf[EVENT_TYPE_OFFSET] == ROTATE_EVENT))
+ if (((uchar)buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT) ||
+ ((uchar)buf[EVENT_TYPE_OFFSET] == ROTATE_EVENT))
{
/*
These events always have a header which stops here (i.e. their
@@ -1168,14 +1169,14 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
/* Check the integrity */
if (event_len < EVENT_LEN_OFFSET ||
- buf[EVENT_TYPE_OFFSET] >= ENUM_END_EVENT ||
+ (uchar)buf[EVENT_TYPE_OFFSET] >= ENUM_END_EVENT ||
(uint) event_len != uint4korr(buf+EVENT_LEN_OFFSET))
{
*error="Sanity check failed"; // Needed to free buffer
DBUG_RETURN(NULL); // general sanity check - will fail on a partial read
}
- uint event_type= buf[EVENT_TYPE_OFFSET];
+ uint event_type= (uchar)buf[EVENT_TYPE_OFFSET];
if (event_type > description_event->number_of_event_types &&
event_type != FORMAT_DESCRIPTION_EVENT)
{
@@ -1297,6 +1298,9 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
case INCIDENT_EVENT:
ev = new Incident_log_event(buf, event_len, description_event);
break;
+ case ANNOTATE_ROWS_EVENT:
+ ev = new Annotate_rows_log_event(buf, event_len, description_event);
+ break;
default:
DBUG_PRINT("error",("Unknown event code: %d",
(int) buf[EVENT_TYPE_OFFSET]));
@@ -3801,6 +3805,13 @@ Format_description_log_event(uint8 binlog_ver, const char* server_ver)
post_header_len[DELETE_ROWS_EVENT-1]= 6;);
post_header_len[INCIDENT_EVENT-1]= INCIDENT_HEADER_LEN;
+ // Set header length of the reserved events to 0
+ memset(post_header_len + MYSQL_EVENTS_END - 1, 0,
+ (MARIA_EVENTS_BEGIN - MYSQL_EVENTS_END)*sizeof(uint8));
+
+ // Set header lengths of Maria events
+ post_header_len[ANNOTATE_ROWS_EVENT-1]= ANNOTATE_ROWS_HEADER_LEN;
+
// Sanity-check that all post header lengths are initialized.
IF_DBUG({
int i;
@@ -4445,8 +4456,8 @@ Load_log_event::Load_log_event(const char *buf, uint event_len,
*/
if (event_len)
copy_log_event(buf, event_len,
- ((buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
- LOAD_HEADER_LEN +
+ (((uchar)buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
+ LOAD_HEADER_LEN +
description_event->common_header_len :
LOAD_HEADER_LEN + LOG_EVENT_HEADER_LEN),
description_event);
@@ -4483,7 +4494,7 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len,
*/
if (!(field_lens= (uchar*)sql_ex.init((char*)buf + body_offset,
buf_end,
- buf[EVENT_TYPE_OFFSET] != LOAD_EVENT)))
+ (uchar)buf[EVENT_TYPE_OFFSET] != LOAD_EVENT)))
DBUG_RETURN(1);
data_len = event_len - body_offset;
@@ -6178,7 +6189,7 @@ Create_file_log_event::Create_file_log_event(const char* buf, uint len,
uint8 create_file_header_len= description_event->post_header_len[CREATE_FILE_EVENT-1];
if (!(event_buf= (char*) my_memdup(buf, len, MYF(MY_WME))) ||
copy_log_event(event_buf,len,
- ((buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
+ (((uchar)buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
load_header_len + header_len :
(fake_base ? (header_len+load_header_len) :
(header_len+load_header_len) +
@@ -7134,7 +7145,8 @@ Rows_log_event::Rows_log_event(THD *thd_arg, TABLE *tbl_arg, ulong tid,
m_width(tbl_arg ? tbl_arg->s->fields : 1),
m_rows_buf(0), m_rows_cur(0), m_rows_end(0), m_flags(0)
#ifdef HAVE_REPLICATION
- , m_curr_row(NULL), m_curr_row_end(NULL), m_key(NULL)
+ , m_curr_row(NULL), m_curr_row_end(NULL),
+ m_key(NULL), m_key_info(NULL), m_key_nr(0)
#endif
{
/*
@@ -7182,7 +7194,8 @@ Rows_log_event::Rows_log_event(const char *buf, uint event_len,
#endif
m_table_id(0), m_rows_buf(0), m_rows_cur(0), m_rows_end(0)
#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
- , m_curr_row(NULL), m_curr_row_end(NULL), m_key(NULL)
+ , m_curr_row(NULL), m_curr_row_end(NULL),
+ m_key(NULL), m_key_info(NULL), m_key_nr(0)
#endif
{
DBUG_ENTER("Rows_log_event::Rows_log_event(const char*,...)");
@@ -7944,6 +7957,142 @@ void Rows_log_event::print_helper(FILE *file,
#endif
/**************************************************************************
+ Annotate_rows_log_event member functions
+**************************************************************************/
+
+#ifndef MYSQL_CLIENT
+Annotate_rows_log_event::Annotate_rows_log_event(THD *thd)
+ : Log_event(thd, 0, true),
+ m_save_thd_query_txt(0),
+ m_save_thd_query_len(0)
+{
+ m_query_txt= thd->query();
+ m_query_len= thd->query_length();
+}
+#endif
+
+Annotate_rows_log_event::Annotate_rows_log_event(const char *buf,
+ uint event_len,
+ const Format_description_log_event *desc)
+ : Log_event(buf, desc),
+ m_save_thd_query_txt(0),
+ m_save_thd_query_len(0)
+{
+ m_query_len= event_len - desc->common_header_len;
+ m_query_txt= (char*) buf + desc->common_header_len;
+}
+
+Annotate_rows_log_event::~Annotate_rows_log_event()
+{
+#ifndef MYSQL_CLIENT
+ if (m_save_thd_query_txt)
+ thd->set_query(m_save_thd_query_txt, m_save_thd_query_len);
+#endif
+}
+
+int Annotate_rows_log_event::get_data_size()
+{
+ return m_query_len;
+}
+
+Log_event_type Annotate_rows_log_event::get_type_code()
+{
+ return ANNOTATE_ROWS_EVENT;
+}
+
+bool Annotate_rows_log_event::is_valid() const
+{
+ return (m_query_txt != NULL && m_query_len != 0);
+}
+
+#ifndef MYSQL_CLIENT
+bool Annotate_rows_log_event::write_data_header(IO_CACHE *file)
+{
+ return 0;
+}
+#endif
+
+#ifndef MYSQL_CLIENT
+bool Annotate_rows_log_event::write_data_body(IO_CACHE *file)
+{
+ return my_b_safe_write(file, (uchar*) m_query_txt, m_query_len);
+}
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+void Annotate_rows_log_event::pack_info(Protocol* protocol)
+{
+ if (m_query_txt && m_query_len)
+ protocol->store(m_query_txt, m_query_len, &my_charset_bin);
+}
+#endif
+
+#ifdef MYSQL_CLIENT
+void Annotate_rows_log_event::print(FILE *file, PRINT_EVENT_INFO *pinfo)
+{
+ if (pinfo->short_form)
+ return;
+
+ print_header(&pinfo->head_cache, pinfo, TRUE);
+ my_b_printf(&pinfo->head_cache, "\tAnnotate_rows:\n");
+
+ char *pbeg; // beginning of the next line
+ char *pend; // end of the next line
+ uint cnt= 0; // characters counter
+
+ for (pbeg= m_query_txt; ; pbeg= pend)
+ {
+ // skip all \r's and \n's at the beginning of the next line
+ for (;; pbeg++)
+ {
+ if (++cnt > m_query_len)
+ return;
+
+ if (*pbeg != '\r' && *pbeg != '\n')
+ break;
+ }
+
+ // find end of the next line
+ for (pend= pbeg + 1;
+ ++cnt <= m_query_len && *pend != '\r' && *pend != '\n';
+ pend++)
+ ;
+
+ // print next line
+ my_b_write(&pinfo->head_cache, (const uchar*) "#Q> ", 4);
+ my_b_write(&pinfo->head_cache, (const uchar*) pbeg, pend - pbeg);
+ my_b_write(&pinfo->head_cache, (const uchar*) "\n", 1);
+ }
+}
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+int Annotate_rows_log_event::do_apply_event(Relay_log_info const *rli)
+{
+ m_save_thd_query_txt= thd->query();
+ m_save_thd_query_len= thd->query_length();
+ thd->set_query(m_query_txt, m_query_len);
+ return 0;
+}
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+int Annotate_rows_log_event::do_update_pos(Relay_log_info *rli)
+{
+ rli->inc_event_relay_log_pos();
+ return 0;
+}
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+Log_event::enum_skip_reason
+Annotate_rows_log_event::do_shall_skip(Relay_log_info *rli)
+{
+ return continue_group(rli);
+}
+#endif
+
+/**************************************************************************
Table_map_log_event member functions and support functions
**************************************************************************/
@@ -9057,6 +9206,86 @@ record_compare_exit:
return result;
}
+
+/**
+ Find the best key to use when locating the row in @c find_row().
+
+ A primary key is preferred if it exists; otherwise a unique index is
+ preferred. Else we pick the index with the smalles rec_per_key value.
+
+ If a suitable key is found, set @c m_key, @c m_key_nr and @c m_key_info
+ member fields appropriately.
+
+ @returns Error code on failure, 0 on success.
+*/
+int Rows_log_event::find_key()
+{
+ uint i, best_key_nr, last_part;
+ KEY *key, *best_key;
+ ulong best_rec_per_key, tmp;
+ DBUG_ENTER("Rows_log_event::find_key");
+ DBUG_ASSERT(m_table);
+
+ best_key_nr= MAX_KEY;
+ LINT_INIT(best_key);
+ LINT_INIT(best_rec_per_key);
+
+ /*
+ Keys are sorted so that any primary key is first, followed by unique keys,
+ followed by any other. So we will automatically pick the primary key if
+ it exists.
+ */
+ for (i= 0, key= m_table->key_info; i < m_table->s->keys; i++, key++)
+ {
+ if (!m_table->s->keys_in_use.is_set(i))
+ continue;
+ /*
+ We cannot use a unique key with NULL-able columns to uniquely identify
+ a row (but we can still select it for range scan below if nothing better
+ is available).
+ */
+ if ((key->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME)
+ {
+ best_key_nr= i;
+ best_key= key;
+ break;
+ }
+ /*
+ We can only use a non-unique key if it allows range scans (ie. skip
+ FULLTEXT indexes and such).
+ */
+ last_part= key->key_parts - 1;
+ DBUG_PRINT("info", ("Index %s rec_per_key[%u]= %lu",
+ key->name, last_part, key->rec_per_key[last_part]));
+ if (!(m_table->file->index_flags(i, last_part, 1) & HA_READ_NEXT))
+ continue;
+
+ tmp= key->rec_per_key[last_part];
+ if (best_key_nr == MAX_KEY || (tmp > 0 && tmp < best_rec_per_key))
+ {
+ best_key_nr= i;
+ best_key= key;
+ best_rec_per_key= tmp;
+ }
+ }
+
+ if (best_key_nr == MAX_KEY)
+ {
+ m_key_info= NULL;
+ DBUG_RETURN(0);
+ }
+
+ // Allocate buffer for key searches
+ m_key= (uchar *) my_malloc(best_key->key_length, MYF(MY_WME));
+ if (m_key == NULL)
+ DBUG_RETURN(HA_ERR_OUT_OF_MEM);
+ m_key_info= best_key;
+ m_key_nr= best_key_nr;
+
+ DBUG_RETURN(0);;
+}
+
+
/**
Locate the current row in event's table.
@@ -9156,12 +9385,17 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
*/
store_record(table,record[1]);
- if (table->s->keys > 0 && table->s->keys_in_use.is_set(0))
+ if (m_key_info)
{
- DBUG_PRINT("info",("locating record using primary key (index_read)"));
+ DBUG_PRINT("info",("locating record using key #%u [%s] (index_read)",
+ m_key_nr, m_key_info->name));
+ /* We use this to test that the correct key is used in test cases. */
+ DBUG_EXECUTE_IF("slave_crash_if_wrong_index",
+ if(0 != strcmp(m_key_info->name,"expected_key")) abort(););
- /* The 0th key is active: search the table using the index */
- if (!table->file->inited && (error= table->file->ha_index_init(0, FALSE)))
+ /* The key is active: search the table using the index */
+ if (!table->file->inited &&
+ (error= table->file->ha_index_init(m_key_nr, FALSE)))
{
DBUG_PRINT("info",("ha_index_init returns error %d",error));
table->file->print_error(error, MYF(0));
@@ -9171,14 +9405,14 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
/* Fill key data for the row */
DBUG_ASSERT(m_key);
- key_copy(m_key, table->record[0], table->key_info, 0);
+ key_copy(m_key, table->record[0], m_key_info, 0);
/*
Don't print debug messages when running valgrind since they can
trigger false warnings.
*/
#ifndef HAVE_valgrind
- DBUG_DUMP("key data", m_key, table->key_info->key_length);
+ DBUG_DUMP("key data", m_key, m_key_info->key_length);
#endif
/*
@@ -9264,6 +9498,8 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
record we are looking for is stored in record[1].
*/
DBUG_PRINT("info",("non-unique index, scanning it to find matching record"));
+ /* We use this to test that the correct key is used in test cases. */
+ DBUG_EXECUTE_IF("slave_crash_if_index_scan", abort(););
while (record_compare(table))
{
@@ -9302,6 +9538,8 @@ int Rows_log_event::find_row(const Relay_log_info *rli)
else
{
DBUG_PRINT("info",("locating record using table scan (rnd_next)"));
+ /* We use this to test that the correct key is used in test cases. */
+ DBUG_EXECUTE_IF("slave_crash_if_table_scan", abort(););
int restart_count= 0; // Number of times scanning has restarted from top
@@ -9421,14 +9659,7 @@ Delete_rows_log_event::do_before_row_operations(const Slave_reporting_capability
return 0;
}
- if (m_table->s->keys > 0)
- {
- // Allocate buffer for key searches
- m_key= (uchar*)my_malloc(m_table->key_info->key_length, MYF(MY_WME));
- if (!m_key)
- return HA_ERR_OUT_OF_MEM;
- }
- return 0;
+ return find_key();
}
int
@@ -9439,6 +9670,7 @@ Delete_rows_log_event::do_after_row_operations(const Slave_reporting_capability
m_table->file->ha_index_or_rnd_end();
my_free(m_key, MYF(MY_ALLOW_ZERO_PTR));
m_key= NULL;
+ m_key_info= NULL;
return error;
}
@@ -9541,13 +9773,9 @@ Update_rows_log_event::Update_rows_log_event(const char *buf, uint event_len,
int
Update_rows_log_event::do_before_row_operations(const Slave_reporting_capability *const)
{
- if (m_table->s->keys > 0)
- {
- // Allocate buffer for key searches
- m_key= (uchar*)my_malloc(m_table->key_info->key_length, MYF(MY_WME));
- if (!m_key)
- return HA_ERR_OUT_OF_MEM;
- }
+ int err;
+ if ((err= find_key()))
+ return err;
m_table->timestamp_field_type= TIMESTAMP_NO_AUTO_SET;
@@ -9562,6 +9790,7 @@ Update_rows_log_event::do_after_row_operations(const Slave_reporting_capability
m_table->file->ha_index_or_rnd_end();
my_free(m_key, MYF(MY_ALLOW_ZERO_PTR)); // Free for multi_malloc
m_key= NULL;
+ m_key_info= NULL;
return error;
}
diff --git a/sql/log_event.h b/sql/log_event.h
index 4ea511f45b5..91a2ad9693c 100644
--- a/sql/log_event.h
+++ b/sql/log_event.h
@@ -250,6 +250,7 @@ struct sql_ex_info
#define EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN (4 + 4 + 4 + 1)
#define EXECUTE_LOAD_QUERY_HEADER_LEN (QUERY_HEADER_LEN + EXECUTE_LOAD_QUERY_EXTRA_HEADER_LEN)
#define INCIDENT_HEADER_LEN 2
+#define ANNOTATE_ROWS_HEADER_LEN 0
/*
Max number of possible extra bytes in a replication event compared to a
packet (i.e. a query) sent from client to master;
@@ -582,8 +583,14 @@ enum Log_event_type
*/
INCIDENT_EVENT= 26,
+ /* New MySQL/Sun events are to be added right above this comment */
+ MYSQL_EVENTS_END,
+
+ MARIA_EVENTS_BEGIN= 160,
+ /* New Maria event numbers start from here */
+ ANNOTATE_ROWS_EVENT= 160,
+
/*
- Add new events here - right above this comment!
Existing events (except ENUM_END_EVENT) should never change their numbers
*/
@@ -2988,6 +2995,59 @@ public:
char *str_to_hex(char *to, const char *from, uint len);
/**
+ @class Annotate_rows_log_event
+
+ In row-based mode, if binlog_annotate_rows_events = ON, each group of
+ Table_map_log_events is preceded by an Annotate_rows_log_event which
+ contains the query which caused the subsequent rows operations.
+
+ The Annotate_rows_log_event has no post-header and its body contains
+ the corresponding query (without trailing zero). Note. The query length
+ is to be calculated as a difference between the whole event length and
+ the common header length.
+*/
+class Annotate_rows_log_event: public Log_event
+{
+public:
+#ifndef MYSQL_CLIENT
+ Annotate_rows_log_event(THD*);
+#endif
+ Annotate_rows_log_event(const char *buf, uint event_len,
+ const Format_description_log_event*);
+ ~Annotate_rows_log_event();
+
+ virtual int get_data_size();
+ virtual Log_event_type get_type_code();
+ virtual bool is_valid() const;
+
+#ifndef MYSQL_CLIENT
+ virtual bool write_data_header(IO_CACHE*);
+ virtual bool write_data_body(IO_CACHE*);
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+ virtual void pack_info(Protocol*);
+#endif
+
+#ifdef MYSQL_CLIENT
+ virtual void print(FILE*, PRINT_EVENT_INFO*);
+#endif
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+private:
+ virtual int do_apply_event(Relay_log_info const*);
+ virtual int do_update_pos(Relay_log_info*);
+ virtual enum_skip_reason do_shall_skip(Relay_log_info*);
+#endif
+
+private:
+ char *m_query_txt;
+ uint m_query_len;
+ char *m_save_thd_query_txt;
+ uint m_save_thd_query_len;
+};
+
+/**
@class Table_map_log_event
In row-based mode, every row operation event is preceded by a
@@ -3592,7 +3652,10 @@ protected:
const uchar *m_curr_row; /* Start of the row being processed */
const uchar *m_curr_row_end; /* One-after the end of the current row */
uchar *m_key; /* Buffer to keep key value during searches */
+ KEY *m_key_info; /* Pointer to KEY info for m_key_nr */
+ uint m_key_nr; /* Key number */
+ int find_key(); // Find a best key to use in find_row()
int find_row(const Relay_log_info *const);
int write_row(const Relay_log_info *const, const bool);
diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h
index 5255d0a8403..b9bc99e9747 100644
--- a/sql/mysql_priv.h
+++ b/sql/mysql_priv.h
@@ -688,7 +688,11 @@ protected:
/* BINLOG_DUMP options */
#define BINLOG_DUMP_NON_BLOCK 1
+#endif /* !MYSQL_CLIENT */
+#define BINLOG_SEND_ANNOTATE_ROWS_EVENT 2
+
+#ifndef MYSQL_CLIENT
/* sql_show.cc:show_log_files() */
#define SHOW_LOG_STATUS_FREE "FREE"
#define SHOW_LOG_STATUS_INUSE "IN USE"
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 1d8fadd8bb6..68140ae1b5a 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -548,6 +548,7 @@ my_bool opt_local_infile, opt_slave_compressed_protocol;
my_bool opt_safe_user_create = 0, opt_no_mix_types = 0;
my_bool opt_show_slave_auth_info, opt_sql_bin_update = 0;
my_bool opt_log_slave_updates= 0;
+my_bool opt_replicate_annotate_rows_events= 0;
bool slave_warning_issued = false;
/*
@@ -1425,6 +1426,7 @@ void clean_up(bool print_message)
ha_end();
if (tc_log)
tc_log->close();
+ TC_destroy();
xid_cache_free();
wt_end();
delete_elements(&key_caches, (void (*)(const char*, uchar*)) free_key_cache);
@@ -4276,6 +4278,8 @@ a file name for --log-bin-index option", opt_binlog_index_name);
if (!errmesg[0][0])
unireg_abort(1);
+ TC_init();
+
/* We have to initialize the storage engines before CSV logging */
if (ha_init())
{
@@ -5905,6 +5909,8 @@ enum options_mysqld
OPT_REPLICATE_IGNORE_DB, OPT_LOG_SLAVE_UPDATES,
OPT_BINLOG_DO_DB, OPT_BINLOG_IGNORE_DB,
OPT_BINLOG_FORMAT,
+ OPT_BINLOG_ANNOTATE_ROWS_EVENTS,
+ OPT_REPLICATE_ANNOTATE_ROWS_EVENTS,
#ifndef DBUG_OFF
OPT_BINLOG_SHOW_XID,
#endif
@@ -6135,6 +6141,18 @@ struct my_option my_long_options[] =
#endif
, &opt_binlog_format, &opt_binlog_format,
0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
+ {"binlog-annotate-rows-events", OPT_BINLOG_ANNOTATE_ROWS_EVENTS,
+ "Tells the master to annotate RBR events with the statement that "
+ "caused these events.",
+ (uchar**) &global_system_variables.binlog_annotate_rows_events,
+ (uchar**) &max_system_variables.binlog_annotate_rows_events,
+ 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+ {"replicate-annotate-rows-events", OPT_REPLICATE_ANNOTATE_ROWS_EVENTS,
+ "Tells the slave to write annotate rows events recieved from the master "
+ "to its own binary log. Sensible only in pair with log-slave-updates option.",
+ (uchar**) &opt_replicate_annotate_rows_events,
+ (uchar**) &opt_replicate_annotate_rows_events,
+ 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
{"binlog-do-db", OPT_BINLOG_DO_DB,
"Tells the master it should log updates for the specified database, "
"and exclude all others not explicitly mentioned.",
diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index 6319bbb60cf..42701bc9e24 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -38,7 +38,8 @@ Relay_log_info::Relay_log_info()
inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE),
until_log_pos(0), retried_trans(0),
tables_to_lock(0), tables_to_lock_count(0),
- last_event_start_time(0), m_flags(0)
+ last_event_start_time(0), m_flags(0),
+ m_annotate_event(0)
{
DBUG_ENTER("Relay_log_info::Relay_log_info");
@@ -72,6 +73,7 @@ Relay_log_info::~Relay_log_info()
pthread_cond_destroy(&stop_cond);
pthread_cond_destroy(&log_space_cond);
relay_log.cleanup();
+ free_annotate_event();
DBUG_VOID_RETURN;
}
diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h
index 5cafcf47086..0ea3c23bfd8 100644
--- a/sql/rpl_rli.h
+++ b/sql/rpl_rli.h
@@ -423,8 +423,46 @@ public:
(m_flags & (1UL << IN_STMT));
}
+ /**
+ Save pointer to Annotate_rows event and switch on the
+ binlog_annotate_rows_events for this sql thread.
+ To be called when sql thread recieves an Annotate_rows event.
+ */
+ inline void set_annotate_event(Annotate_rows_log_event *event)
+ {
+ free_annotate_event();
+ m_annotate_event= event;
+ sql_thd->variables.binlog_annotate_rows_events= 1;
+ }
+
+ /**
+ Returns pointer to the saved Annotate_rows event or NULL if there is
+ no saved event.
+ */
+ inline Annotate_rows_log_event* get_annotate_event()
+ {
+ return m_annotate_event;
+ }
+
+ /**
+ Delete saved Annotate_rows event (if any) and switch off the
+ binlog_annotate_rows_events for this sql thread.
+ To be called when sql thread has applied the last (i.e. with
+ STMT_END_F flag) rbr event.
+ */
+ inline void free_annotate_event()
+ {
+ if (m_annotate_event)
+ {
+ sql_thd->variables.binlog_annotate_rows_events= 0;
+ delete m_annotate_event;
+ m_annotate_event= 0;
+ }
+ }
+
private:
uint32 m_flags;
+ Annotate_rows_log_event *m_annotate_event;
};
diff --git a/sql/set_var.cc b/sql/set_var.cc
index b02330727e9..33575de1ccf 100644
--- a/sql/set_var.cc
+++ b/sql/set_var.cc
@@ -183,6 +183,9 @@ static sys_var_const sys_back_log(&vars, "back_log",
OPT_GLOBAL, SHOW_LONG,
(uchar*) &back_log);
static sys_var_const_os_str sys_basedir(&vars, "basedir", mysql_home);
+static sys_var_thd_bool
+sys_binlog_annotate_rows_events(&vars, "binlog_annotate_rows_events",
+ &SV::binlog_annotate_rows_events);
static sys_var_long_ptr sys_binlog_cache_size(&vars, "binlog_cache_size",
&binlog_cache_size);
static sys_var_thd_binlog_format sys_binlog_format(&vars, "binlog_format",
diff --git a/sql/slave.cc b/sql/slave.cc
index 0d14234766b..63b7ce715c9 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -1867,6 +1867,9 @@ static int request_dump(MYSQL* mysql, Master_info* mi,
*suppress_warnings= FALSE;
+ if (opt_log_slave_updates && opt_replicate_annotate_rows_events)
+ binlog_flags|= BINLOG_SEND_ANNOTATE_ROWS_EVENT;
+
// TODO if big log files: Change next to int8store()
int4store(buf, (ulong) mi->master_log_pos);
int2store(buf + 4, binlog_flags);
@@ -2261,17 +2264,41 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
}
exec_res= apply_event_and_update_pos(ev, thd, rli);
- /*
- Format_description_log_event should not be deleted because it will be
- used to read info about the relay log's format; it will be deleted when
- the SQL thread does not need it, i.e. when this thread terminates.
- */
- if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
- {
- DBUG_PRINT("info", ("Deleting the event after it has been executed"));
- delete ev;
+ switch (ev->get_type_code()) {
+ case FORMAT_DESCRIPTION_EVENT:
+ /*
+ Format_description_log_event should not be deleted because it
+ will be used to read info about the relay log's format;
+ it will be deleted when the SQL thread does not need it,
+ i.e. when this thread terminates.
+ */
+ break;
+ case ANNOTATE_ROWS_EVENT:
+ /*
+ Annotate_rows event should not be deleted because after it has
+ been applied, thd->query points to the string inside this event.
+ The thd->query will be used to generate new Annotate_rows event
+ during applying the subsequent Rows events.
+ */
+ rli->set_annotate_event((Annotate_rows_log_event*) ev);
+ break;
+ case DELETE_ROWS_EVENT:
+ case UPDATE_ROWS_EVENT:
+ case WRITE_ROWS_EVENT:
+ /*
+ After the last Rows event has been applied, the saved Annotate_rows
+ event (if any) is not needed anymore and can be deleted.
+ */
+ if (((Rows_log_event*)ev)->get_flags(Rows_log_event::STMT_END_F))
+ rli->free_annotate_event();
+ /* fall through */
+ default:
+ DBUG_PRINT("info", ("Deleting the event after it has been executed"));
+ delete ev;
+ break;
}
+
/*
update_log_pos failed: this should not happen, so we don't
retry.
@@ -2899,6 +2926,12 @@ pthread_handler_t handle_slave_sql(void *arg)
thd->init_for_queries();
thd->temporary_tables = rli->save_temporary_tables; // restore temp tables
set_thd_in_use_temporary_tables(rli); // (re)set sql_thd in use for saved temp tables
+ /*
+ binlog_annotate_rows_events must be TRUE only after an Annotate_rows event
+ has been recieved and only till the last corresponding rbr event has been
+ applied. In all other cases it must be FALSE.
+ */
+ thd->variables.binlog_annotate_rows_events= 0;
pthread_mutex_lock(&LOCK_thread_count);
threads.append(thd);
pthread_mutex_unlock(&LOCK_thread_count);
@@ -3381,7 +3414,7 @@ static int queue_binlog_ver_1_event(Master_info *mi, const char *buf,
If we get Load event, we need to pass a non-reusable buffer
to read_log_event, so we do a trick
*/
- if (buf[EVENT_TYPE_OFFSET] == LOAD_EVENT)
+ if ((uchar)buf[EVENT_TYPE_OFFSET] == LOAD_EVENT)
{
if (unlikely(!(tmp_buf=(char*)my_malloc(event_len+1,MYF(MY_WME)))))
{
@@ -3588,13 +3621,13 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
LINT_INIT(inc_pos);
if (mi->rli.relay_log.description_event_for_queue->binlog_version<4 &&
- buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */)
+ (uchar)buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */)
DBUG_RETURN(queue_old_event(mi,buf,event_len));
LINT_INIT(inc_pos);
pthread_mutex_lock(&mi->data_lock);
- switch (buf[EVENT_TYPE_OFFSET]) {
+ switch ((uchar)buf[EVENT_TYPE_OFFSET]) {
case STOP_EVENT:
/*
We needn't write this event to the relay log. Indeed, it just indicates a
@@ -3697,9 +3730,9 @@ static int queue_event(Master_info* mi,const char* buf, ulong event_len)
the master's binlog (i.e. Format_desc, Rotate & Stop) should not increment
mi->master_log_pos.
*/
- if (buf[EVENT_TYPE_OFFSET]!=FORMAT_DESCRIPTION_EVENT &&
- buf[EVENT_TYPE_OFFSET]!=ROTATE_EVENT &&
- buf[EVENT_TYPE_OFFSET]!=STOP_EVENT)
+ if ((uchar)buf[EVENT_TYPE_OFFSET]!=FORMAT_DESCRIPTION_EVENT &&
+ (uchar)buf[EVENT_TYPE_OFFSET]!=ROTATE_EVENT &&
+ (uchar)buf[EVENT_TYPE_OFFSET]!=STOP_EVENT)
{
mi->master_log_pos+= inc_pos;
memcpy(rli->ign_master_log_name_end, mi->master_log_name, FN_REFLEN);
diff --git a/sql/slave.h b/sql/slave.h
index 1aa5b374e4b..3333e583559 100644
--- a/sql/slave.h
+++ b/sql/slave.h
@@ -106,6 +106,7 @@ extern MYSQL_PLUGIN_IMPORT char *relay_log_info_file;
extern char *opt_relay_logname, *opt_relaylog_index_name;
extern my_bool opt_skip_slave_start, opt_reckless_slave;
extern my_bool opt_log_slave_updates;
+extern my_bool opt_replicate_annotate_rows_events;
extern ulonglong relay_log_space_limit;
/*
diff --git a/sql/sql_binlog.cc b/sql/sql_binlog.cc
index 9713ec1ef5c..a59cd631fef 100644
--- a/sql/sql_binlog.cc
+++ b/sql/sql_binlog.cc
@@ -174,7 +174,7 @@ void mysql_client_binlog_statement(THD* thd)
*/
if (!have_fd_event)
{
- int type = bufptr[EVENT_TYPE_OFFSET];
+ int type = (uchar)bufptr[EVENT_TYPE_OFFSET];
if (type == FORMAT_DESCRIPTION_EVENT || type == START_EVENT_V3)
have_fd_event= TRUE;
else
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index f10655c3e51..f8905cd5f8c 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -763,6 +763,8 @@ THD::THD()
active_vio = 0;
#endif
pthread_mutex_init(&LOCK_thd_data, MY_MUTEX_INIT_FAST);
+ pthread_mutex_init(&LOCK_wakeup_ready, MY_MUTEX_INIT_FAST);
+ pthread_cond_init(&COND_wakeup_ready, 0);
/* Variables with default values */
proc_info="login";
@@ -1149,6 +1151,8 @@ THD::~THD()
free_root(&transaction.mem_root,MYF(0));
#endif
mysys_var=0; // Safety (shouldn't be needed)
+ pthread_cond_destroy(&COND_wakeup_ready);
+ pthread_mutex_destroy(&LOCK_wakeup_ready);
pthread_mutex_destroy(&LOCK_thd_data);
#ifndef DBUG_OFF
dbug_sentry= THD_SENTRY_GONE;
@@ -4243,6 +4247,25 @@ int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
DBUG_RETURN(0);
}
+void
+THD::wait_for_wakeup_ready()
+{
+ pthread_mutex_lock(&LOCK_wakeup_ready);
+ while (!wakeup_ready)
+ pthread_cond_wait(&COND_wakeup_ready, &LOCK_wakeup_ready);
+ pthread_mutex_unlock(&LOCK_wakeup_ready);
+}
+
+void
+THD::signal_wakeup_ready()
+{
+ pthread_mutex_lock(&LOCK_wakeup_ready);
+ wakeup_ready= true;
+ pthread_mutex_unlock(&LOCK_wakeup_ready);
+ pthread_cond_signal(&COND_wakeup_ready);
+}
+
+
bool Discrete_intervals_list::append(ulonglong start, ulonglong val,
ulonglong incr)
{
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 477ae9bf751..9ea4e37e610 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -451,6 +451,7 @@ struct system_variables
ulong ndb_index_stat_cache_entries;
ulong ndb_index_stat_update_freq;
ulong binlog_format; // binlog format for this thd (see enum_binlog_format)
+ my_bool binlog_annotate_rows_events;
my_bool binlog_direct_non_trans_update;
/*
In slave thread we need to know in behalf of which
@@ -1577,7 +1578,8 @@ public:
*/
void binlog_start_trans_and_stmt();
void binlog_set_stmt_begin();
- int binlog_write_table_map(TABLE *table, bool is_transactional);
+ int binlog_write_table_map(TABLE *table, bool is_transactional,
+ my_bool *with_annotate= 0);
int binlog_write_row(TABLE* table, bool is_transactional,
MY_BITMAP const* cols, size_t colcnt,
const uchar *buf);
@@ -2551,6 +2553,14 @@ public:
return backup;
}
+ void clear_wakeup_ready() { wakeup_ready= false; }
+ /*
+ Sleep waiting for others to wake us up with signal_wakeup_ready().
+ Must call clear_wakeup_ready() before waiting.
+ */
+ void wait_for_wakeup_ready();
+ /* Wake this thread up from wait_for_wakeup_ready(). */
+ void signal_wakeup_ready();
private:
/** The current internal error handler for this thread, or NULL. */
Internal_error_handler *m_internal_handler;
@@ -2589,6 +2599,16 @@ private:
*/
LEX_STRING invoker_user;
LEX_STRING invoker_host;
+ /*
+ Flag, mutex and condition for a thread to wait for a signal from another
+ thread.
+
+ Currently used to wait for group commit to complete, can also be used for
+ other purposes.
+ */
+ bool wakeup_ready;
+ pthread_mutex_t LOCK_wakeup_ready;
+ pthread_cond_t COND_wakeup_ready;
};
/** A short cut for thd->main_da.set_ok_status(). */
diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc
index adc82d791b6..05804c2ee7e 100644
--- a/sql/sql_insert.cc
+++ b/sql/sql_insert.cc
@@ -1970,6 +1970,11 @@ bool delayed_get_table(THD *thd, TABLE_LIST *table_list)
pthread_mutex_lock(&LOCK_thread_count);
thread_count++;
pthread_mutex_unlock(&LOCK_thread_count);
+ /*
+ Annotating delayed inserts is not supported.
+ */
+ di->thd.variables.binlog_annotate_rows_events= 0;
+
di->thd.set_db(table_list->db, (uint) strlen(table_list->db));
di->thd.set_query(my_strdup(table_list->table_name, MYF(MY_WME)), 0);
if (di->thd.db == NULL || di->thd.query() == NULL)
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index 3088d807549..103cf72cf14 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -1008,6 +1008,10 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
DBUG_ENTER("dispatch_command");
DBUG_PRINT("info", ("command: %d", command));
+ DBUG_EXECUTE_IF("crash_dispatch_command_before",
+ { DBUG_PRINT("crash_dispatch_command_before", ("now"));
+ DBUG_ABORT(); });
+
thd->command=command;
/*
Commands which always take a long time are logged into
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 95e48c531be..596f0f5c1e6 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -489,7 +489,7 @@ impossible position";
DBUG_PRINT("info",
("Looked for a Format_description_log_event, found event type %d",
(*packet)[EVENT_TYPE_OFFSET+1]));
- if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT)
+ if ((uchar)(*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT)
{
binlog_can_be_corrupted= test((*packet)[FLAGS_OFFSET+1] &
LOG_EVENT_BINLOG_IN_USE_F);
@@ -556,32 +556,36 @@ impossible position";
}
#endif
- if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT)
+ if ((uchar)(*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT)
{
binlog_can_be_corrupted= test((*packet)[FLAGS_OFFSET+1] &
LOG_EVENT_BINLOG_IN_USE_F);
(*packet)[FLAGS_OFFSET+1] &= ~LOG_EVENT_BINLOG_IN_USE_F;
}
- else if ((*packet)[EVENT_TYPE_OFFSET+1] == STOP_EVENT)
+ else if ((uchar)(*packet)[EVENT_TYPE_OFFSET+1] == STOP_EVENT)
binlog_can_be_corrupted= FALSE;
- if (my_net_write(net, (uchar*) packet->ptr(), packet->length()))
+ if ((uchar)(*packet)[EVENT_TYPE_OFFSET+1] != ANNOTATE_ROWS_EVENT ||
+ (flags & BINLOG_SEND_ANNOTATE_ROWS_EVENT))
{
- errmsg = "Failed on my_net_write()";
- my_errno= ER_UNKNOWN_ERROR;
- goto err;
- }
+ if (my_net_write(net, (uchar*) packet->ptr(), packet->length()))
+ {
+ errmsg = "Failed on my_net_write()";
+ my_errno= ER_UNKNOWN_ERROR;
+ goto err;
+ }
- DBUG_PRINT("info", ("log event code %d",
- (*packet)[LOG_EVENT_OFFSET+1] ));
- if ((*packet)[LOG_EVENT_OFFSET+1] == LOAD_EVENT)
- {
- if (send_file(thd))
- {
- errmsg = "failed in send_file()";
- my_errno= ER_UNKNOWN_ERROR;
- goto err;
- }
+ DBUG_PRINT("info", ("log event code %d",
+ (*packet)[LOG_EVENT_OFFSET+1] ));
+ if ((uchar)(*packet)[LOG_EVENT_OFFSET+1] == LOAD_EVENT)
+ {
+ if (send_file(thd))
+ {
+ errmsg = "failed in send_file()";
+ my_errno= ER_UNKNOWN_ERROR;
+ goto err;
+ }
+ }
}
packet->set("\0", 1, &my_charset_bin);
}
@@ -677,23 +681,27 @@ impossible position";
if (read_packet)
{
- thd_proc_info(thd, "Sending binlog event to slave");
- if (my_net_write(net, (uchar*) packet->ptr(), packet->length()) )
- {
- errmsg = "Failed on my_net_write()";
- my_errno= ER_UNKNOWN_ERROR;
- goto err;
- }
-
- if ((*packet)[LOG_EVENT_OFFSET+1] == LOAD_EVENT)
- {
- if (send_file(thd))
+ if ((uchar)(*packet)[EVENT_TYPE_OFFSET+1] != ANNOTATE_ROWS_EVENT ||
+ (flags & BINLOG_SEND_ANNOTATE_ROWS_EVENT))
+ {
+ thd_proc_info(thd, "Sending binlog event to slave");
+ if (my_net_write(net, (uchar*) packet->ptr(), packet->length()) )
{
- errmsg = "failed in send_file()";
+ errmsg = "Failed on my_net_write()";
my_errno= ER_UNKNOWN_ERROR;
goto err;
}
- }
+
+ if ((uchar)(*packet)[LOG_EVENT_OFFSET+1] == LOAD_EVENT)
+ {
+ if (send_file(thd))
+ {
+ errmsg = "failed in send_file()";
+ my_errno= ER_UNKNOWN_ERROR;
+ goto err;
+ }
+ }
+ }
packet->set("\0", 1, &my_charset_bin);
/*
No need to net_flush because we will get to flush later when
@@ -1774,6 +1782,11 @@ static sys_var_chain vars = { NULL, NULL };
static sys_var_const sys_log_slave_updates(&vars, "log_slave_updates",
OPT_GLOBAL, SHOW_MY_BOOL,
(uchar*) &opt_log_slave_updates);
+static sys_var_const
+sys_replicate_annotate_rows_events(&vars,
+ "replicate_annotate_rows_events",
+ OPT_GLOBAL, SHOW_MY_BOOL,
+ (uchar*) &opt_replicate_annotate_rows_events);
static sys_var_const sys_relay_log(&vars, "relay_log",
OPT_GLOBAL, SHOW_CHAR_PTR,
(uchar*) &opt_relay_logname);
diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc
index 74dba9d2c31..9407a0106e4 100644
--- a/storage/innodb_plugin/handler/ha_innodb.cc
+++ b/storage/innodb_plugin/handler/ha_innodb.cc
@@ -32,7 +32,8 @@ Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/* TODO list for the InnoDB handler in 5.0:
- - Remove the flag trx->active_trans and look at trx->conc_state
+ - Remove the flag trx->active_flag & TRX_ACTIVE_IN_MYSQL and look
+ at trx->conc_state
- fix savepoint functions to use savepoint storage area
- Find out what kind of problems the OS X case-insensitivity causes to
table and database names; should we 'normalize' the names like we do
@@ -102,8 +103,6 @@ bool check_global_access(THD *thd, ulong want_access);
/** to protect innobase_open_files */
static pthread_mutex_t innobase_share_mutex;
-/** to force correct commit order in binlog */
-static pthread_mutex_t prepare_commit_mutex;
static ulong commit_threads = 0;
static pthread_mutex_t commit_threads_m;
static pthread_cond_t commit_cond;
@@ -197,6 +196,7 @@ static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
static int innobase_close_connection(handlerton *hton, THD* thd);
+static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
static int innobase_commit(handlerton *hton, THD* thd, bool all);
static int innobase_rollback(handlerton *hton, THD* thd, bool all);
static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
@@ -1320,7 +1320,6 @@ innobase_trx_init(
trx_t* trx) /*!< in/out: InnoDB transaction handle */
{
DBUG_ENTER("innobase_trx_init");
- DBUG_ASSERT(EQ_CURRENT_THD(thd));
DBUG_ASSERT(thd == trx->mysql_thd);
trx->check_foreigns = !thd_test_options(
@@ -1369,8 +1368,6 @@ check_trx_exists(
{
trx_t*& trx = thd_to_trx(thd);
- ut_ad(EQ_CURRENT_THD(thd));
-
if (trx == NULL) {
trx = innobase_trx_allocate(thd);
} else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
@@ -1636,10 +1633,10 @@ innobase_query_caching_of_table_permitted(
/* The call of row_search_.. will start a new transaction if it is
not yet started */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(innodb_hton_ptr, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
if (row_search_check_if_query_cache_permitted(trx, norm_name)) {
@@ -1909,11 +1906,11 @@ ha_innobase::init_table_handle_for_HANDLER(void)
/* Set the MySQL flag to mark that there is an active transaction */
- if (prebuilt->trx->active_trans == 0) {
+ if ((prebuilt->trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, user_thd);
- prebuilt->trx->active_trans = 1;
+ prebuilt->trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
/* We did the necessary inits in this function, no need to repeat them
@@ -1964,6 +1961,8 @@ innobase_init(
innobase_hton->savepoint_set=innobase_savepoint;
innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
innobase_hton->savepoint_release=innobase_release_savepoint;
+ innobase_hton->prepare_ordered=NULL;
+ innobase_hton->commit_ordered=innobase_commit_ordered;
innobase_hton->commit=innobase_commit;
innobase_hton->rollback=innobase_rollback;
innobase_hton->prepare=innobase_xa_prepare;
@@ -2268,7 +2267,6 @@ innobase_change_buffering_inited_ok:
innobase_open_tables = hash_create(200);
pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
- pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&commit_cond_m, MY_MUTEX_INIT_FAST);
pthread_cond_init(&commit_cond, NULL);
@@ -2323,7 +2321,6 @@ innobase_end(
my_free(internal_innobase_data_file_path,
MYF(MY_ALLOW_ZERO_PTR));
pthread_mutex_destroy(&innobase_share_mutex);
- pthread_mutex_destroy(&prepare_commit_mutex);
pthread_mutex_destroy(&commit_threads_m);
pthread_mutex_destroy(&commit_cond_m);
pthread_cond_destroy(&commit_cond);
@@ -2422,14 +2419,118 @@ innobase_start_trx_and_assign_read_view(
/* Set the MySQL flag to mark that there is an active transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(hton, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
DBUG_RETURN(0);
}
+static
+void
+innobase_commit_ordered_2(
+/*============*/
+ trx_t* trx, /*!< in: Innodb transaction */
+ THD* thd) /*!< in: MySQL thread handle */
+{
+ ulonglong tmp_pos;
+ DBUG_ENTER("innobase_commit_ordered");
+
+ /* We need current binlog position for ibbackup to work.
+ Note, the position is current because commit_ordered is guaranteed
+ to be called in same sequenece as writing to binlog. */
+
+retry:
+ if (innobase_commit_concurrency > 0) {
+ pthread_mutex_lock(&commit_cond_m);
+ commit_threads++;
+
+ if (commit_threads > innobase_commit_concurrency) {
+ commit_threads--;
+ pthread_cond_wait(&commit_cond,
+ &commit_cond_m);
+ pthread_mutex_unlock(&commit_cond_m);
+ goto retry;
+ }
+ else {
+ pthread_mutex_unlock(&commit_cond_m);
+ }
+ }
+
+ mysql_bin_log_commit_pos(thd, &tmp_pos, &(trx->mysql_log_file_name));
+ trx->mysql_log_offset = (ib_int64_t) tmp_pos;
+
+ /* Don't do write + flush right now. For group commit
+ to work we want to do the flush in the innobase_commit()
+ method, which runs without holding any locks. */
+ trx->flush_log_later = TRUE;
+ innobase_commit_low(trx);
+ trx->flush_log_later = FALSE;
+
+ if (innobase_commit_concurrency > 0) {
+ pthread_mutex_lock(&commit_cond_m);
+ commit_threads--;
+ pthread_cond_signal(&commit_cond);
+ pthread_mutex_unlock(&commit_cond_m);
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+/*****************************************************************//**
+Perform the first, fast part of InnoDB commit.
+
+Doing it in this call ensures that we get the same commit order here
+as in binlog and any other participating transactional storage engines.
+
+Note that we want to do as little as really needed here, as we run
+under a global mutex. The expensive fsync() is done later, in
+innobase_commit(), without a lock so group commit can take place.
+
+Note also that this method can be called from a different thread than
+the one handling the rest of the transaction. */
+static
+void
+innobase_commit_ordered(
+/*============*/
+ handlerton *hton, /*!< in: Innodb handlerton */
+ THD* thd, /*!< in: MySQL thread handle of the user for whom
+ the transaction should be committed */
+ bool all) /*!< in: TRUE - commit transaction
+ FALSE - the current SQL statement ended */
+{
+ trx_t* trx;
+ DBUG_ENTER("innobase_commit_ordered");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
+ trx = check_trx_exists(thd);
+
+ /* Since we will reserve the kernel mutex, we must not be holding the
+ search system latch, or we will disobey the latching order. But we
+ already released it in innobase_xa_prepare() (if not before), so just
+ have an assert here.*/
+ ut_ad(!trx->has_search_latch);
+
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0
+ && trx->conc_state != TRX_NOT_STARTED) {
+ /* We cannot throw error here; instead we will catch this error
+ again in innobase_commit() and report it from there. */
+ DBUG_VOID_RETURN;
+ }
+
+ /* commit_ordered is only called when committing the whole transaction
+ (or an SQL statement when autocommit is on). */
+ DBUG_ASSERT(all ||
+ (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
+
+ innobase_commit_ordered_2(trx, thd);
+
+ trx->active_flag |= TRX_ACTIVE_COMMIT_ORDERED;
+
+ DBUG_VOID_RETURN;
+}
+
/*****************************************************************//**
Commits a transaction in an InnoDB database or marks an SQL statement
ended.
@@ -2455,11 +2556,12 @@ innobase_commit(
/* Since we will reserve the kernel mutex, we have to release
the search system latch first to obey the latching order. */
- if (trx->has_search_latch) {
+ if (trx->has_search_latch &&
+ (trx->active_flag & TRX_ACTIVE_COMMIT_ORDERED) == 0) {
trx_search_latch_release_if_reserved(trx);
}
- /* The flag trx->active_trans is set to 1 in
+ /* The flag TRX_ACTIVE_IN_MYSQL in trx->active_flag is set in
1. ::external_lock(),
2. ::start_stmt(),
@@ -2469,81 +2571,33 @@ innobase_commit(
6. innobase_start_trx_and_assign_read_view(),
7. ::transactional_table_lock()
- and it is only set to 0 in a commit or a rollback. If it is 0 we know
+ and it is only cleared in a commit or a rollback. If it is unset we know
there cannot be resources to be freed and we could return immediately.
For the time being, we play safe and do the cleanup though there should
be nothing to clean up. */
- if (trx->active_trans == 0
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0
&& trx->conc_state != TRX_NOT_STARTED) {
- sql_print_error("trx->active_trans == 0, but"
- " trx->conc_state != TRX_NOT_STARTED");
+ sql_print_error("trx->active_flag & TRX_ACTIVE_IN_MYSQL== 0,"
+ " but trx->conc_state != TRX_NOT_STARTED");
}
+
if (all
|| (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
- /* We were instructed to commit the whole transaction, or
- this is an SQL statement end and autocommit is on */
-
- /* We need current binlog position for ibbackup to work.
- Note, the position is current because of
- prepare_commit_mutex */
-retry:
- if (innobase_commit_concurrency > 0) {
- pthread_mutex_lock(&commit_cond_m);
- commit_threads++;
-
- if (commit_threads > innobase_commit_concurrency) {
- commit_threads--;
- pthread_cond_wait(&commit_cond,
- &commit_cond_m);
- pthread_mutex_unlock(&commit_cond_m);
- goto retry;
- }
- else {
- pthread_mutex_unlock(&commit_cond_m);
- }
- }
-
- /* The following calls to read the MySQL binary log
- file name and the position return consistent results:
- 1) Other InnoDB transactions cannot intervene between
- these calls as we are holding prepare_commit_mutex.
- 2) Binary logging of other engines is not relevant
- to InnoDB as all InnoDB requires is that committing
- InnoDB transactions appear in the same order in the
- MySQL binary log as they appear in InnoDB logs.
- 3) A MySQL log file rotation cannot happen because
- MySQL protects against this by having a counter of
- transactions in prepared state and it only allows
- a rotation when the counter drops to zero. See
- LOCK_prep_xids and COND_prep_xids in log.cc. */
- trx->mysql_log_file_name = mysql_bin_log_file_name();
- trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
-
- /* Don't do write + flush right now. For group commit
- to work we want to do the flush after releasing the
- prepare_commit_mutex. */
- trx->flush_log_later = TRUE;
- innobase_commit_low(trx);
- trx->flush_log_later = FALSE;
-
- if (innobase_commit_concurrency > 0) {
- pthread_mutex_lock(&commit_cond_m);
- commit_threads--;
- pthread_cond_signal(&commit_cond);
- pthread_mutex_unlock(&commit_cond_m);
+ /* Run the fast part of commit if we did not already. */
+ if ((trx->active_flag & TRX_ACTIVE_COMMIT_ORDERED) == 0) {
+ innobase_commit_ordered_2(trx, thd);
}
- if (trx->active_trans == 2) {
-
- pthread_mutex_unlock(&prepare_commit_mutex);
- }
+ /* We were instructed to commit the whole transaction, or
+ this is an SQL statement end and autocommit is on */
- /* Now do a write + flush of logs. */
+ /* We did the first part already in innobase_commit_ordered(),
+ Now finish by doing a write + flush of logs. */
trx_commit_complete_for_mysql(trx);
- trx->active_trans = 0;
+ trx->active_flag = 0;
} else {
/* We just mark the SQL statement ended and do not do a
@@ -2616,7 +2670,7 @@ innobase_rollback(
|| !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
error = trx_rollback_for_mysql(trx);
- trx->active_trans = 0;
+ trx->active_flag = 0;
} else {
error = trx_rollback_last_sql_stat_for_mysql(trx);
}
@@ -2760,7 +2814,7 @@ innobase_savepoint(
innobase_release_stat_resources(trx);
/* cannot happen outside of transaction */
- DBUG_ASSERT(trx->active_trans);
+ DBUG_ASSERT(trx->active_flag & TRX_ACTIVE_IN_MYSQL);
/* TODO: use provided savepoint data area to store savepoint data */
char name[64];
@@ -2790,11 +2844,11 @@ innobase_close_connection(
ut_a(trx);
- if (trx->active_trans == 0
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0
&& trx->conc_state != TRX_NOT_STARTED) {
- sql_print_error("trx->active_trans == 0, but"
- " trx->conc_state != TRX_NOT_STARTED");
+ sql_print_error("trx->active_flag & TRX_ACTIVE_IN_MYSQL == 0,"
+ " but trx->conc_state != TRX_NOT_STARTED");
}
@@ -4691,7 +4745,7 @@ no_commit:
/* Altering to InnoDB format */
innobase_commit(ht, user_thd, 1);
/* Note that this transaction is still active. */
- prebuilt->trx->active_trans = 1;
+ prebuilt->trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
/* We will need an IX lock on the destination table. */
prebuilt->sql_stat_start = TRUE;
} else {
@@ -4707,7 +4761,7 @@ no_commit:
locks, so they have to be acquired again. */
innobase_commit(ht, user_thd, 1);
/* Note that this transaction is still active. */
- prebuilt->trx->active_trans = 1;
+ prebuilt->trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
/* Re-acquire the table lock on the source table. */
row_lock_table_for_mysql(prebuilt, src_table, mode);
/* We will need an IX lock on the destination table. */
@@ -8500,10 +8554,10 @@ ha_innobase::start_stmt(
trx->detailed_error[0] = '\0';
/* Set the MySQL flag to mark that there is an active transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
} else {
innobase_register_stmt(ht, thd);
}
@@ -8601,10 +8655,10 @@ ha_innobase::external_lock(
/* Set the MySQL flag to mark that there is an active
transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
} else if (trx->n_mysql_tables_in_use == 0) {
innobase_register_stmt(ht, thd);
}
@@ -8683,7 +8737,7 @@ ha_innobase::external_lock(
prebuilt->used_in_HANDLER = FALSE;
if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
- if (trx->active_trans != 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) != 0) {
innobase_commit(ht, thd, TRUE);
}
} else {
@@ -8764,10 +8818,10 @@ ha_innobase::transactional_table_lock(
/* MySQL is setting a new transactional table lock */
/* Set the MySQL flag to mark that there is an active transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
if (THDVAR(thd, table_locks) && thd_in_lock_tables(thd)) {
@@ -9823,10 +9877,11 @@ innobase_xa_prepare(
innobase_release_stat_resources(trx);
- if (trx->active_trans == 0 && trx->conc_state != TRX_NOT_STARTED) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0 &&
+ trx->conc_state != TRX_NOT_STARTED) {
- sql_print_error("trx->active_trans == 0, but trx->conc_state != "
- "TRX_NOT_STARTED");
+ sql_print_error("trx->active_flag & TRX_ACTIVE_IN_MYSQL == 0, but"
+ " trx->conc_state != TRX_NOT_STARTED");
}
if (all
@@ -9835,7 +9890,7 @@ innobase_xa_prepare(
/* We were instructed to prepare the whole transaction, or
this is an SQL statement end and autocommit is on */
- ut_ad(trx->active_trans);
+ ut_ad(trx->active_flag & TRX_ACTIVE_IN_MYSQL);
error = (int) trx_prepare_for_mysql(trx);
} else {
@@ -9859,32 +9914,6 @@ innobase_xa_prepare(
srv_active_wake_master_thread();
- if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
- (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
- {
-
- /* For ibbackup to work the order of transactions in binlog
- and InnoDB must be the same. Consider the situation
-
- thread1> prepare; write to binlog; ...
- <context switch>
- thread2> prepare; write to binlog; commit
- thread1> ... commit
-
- To ensure this will not happen we're taking the mutex on
- prepare, and releasing it on commit.
-
- Note: only do it for normal commits, done via ha_commit_trans.
- If 2pc protocol is executed by external transaction
- coordinator, it will be just a regular MySQL client
- executing XA PREPARE and XA COMMIT commands.
- In this case we cannot know how many minutes or hours
- will be between XA PREPARE and XA COMMIT, and we don't want
- to block for undefined period of time. */
- pthread_mutex_lock(&prepare_commit_mutex);
- trx->active_trans = 2;
- }
-
return(error);
}
diff --git a/storage/innodb_plugin/handler/ha_innodb.h b/storage/innodb_plugin/handler/ha_innodb.h
index 7a8f29853de..1dcd7ce203f 100644
--- a/storage/innodb_plugin/handler/ha_innodb.h
+++ b/storage/innodb_plugin/handler/ha_innodb.h
@@ -238,16 +238,6 @@ LEX_STRING *thd_query_string(MYSQL_THD thd);
char **thd_query(MYSQL_THD thd);
#endif
-/** Get the file name of the MySQL binlog.
- * @return the name of the binlog file
- */
-const char* mysql_bin_log_file_name(void);
-
-/** Get the current position of the MySQL binlog.
- * @return byte offset from the beginning of the binlog
- */
-ulonglong mysql_bin_log_file_pos(void);
-
/**
Check if a user thread is a replication slave thread
@param thd user thread
@@ -288,6 +278,11 @@ bool thd_binlog_filter_ok(const MYSQL_THD thd);
#endif /* MYSQL_VERSION_ID > 50140 */
}
+/** Get the file name and position of the MySQL binlog corresponding to the
+ * current commit.
+ */
+extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file);
+
typedef struct trx_struct trx_t;
/********************************************************************//**
@file handler/ha_innodb.h
diff --git a/storage/innodb_plugin/include/trx0trx.h b/storage/innodb_plugin/include/trx0trx.h
index abd175d365b..5ecebdda945 100644
--- a/storage/innodb_plugin/include/trx0trx.h
+++ b/storage/innodb_plugin/include/trx0trx.h
@@ -510,9 +510,10 @@ struct trx_struct{
in that case we must flush the log
in trx_commit_complete_for_mysql() */
ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
- ulint active_trans; /*!< 1 - if a transaction in MySQL
- is active. 2 - if prepare_commit_mutex
- was taken */
+ ulint active_flag; /*!< TRX_ACTIVE_IN_MYSQL - set if a
+ transaction in MySQL is active.
+ TRX_ACTIVE_COMMIT_ORDERED - set if
+ innobase_commit_ordered has run */
ulint has_search_latch;
/* TRUE if this trx has latched the
search system latch in S-mode */
@@ -797,6 +798,10 @@ Multiple flags can be combined with bitwise OR. */
#define TRX_SIG_OTHER_SESS 1 /* sent by another session (which
must hold rights to this) */
+/* Flag bits for trx_struct.active_flag */
+#define TRX_ACTIVE_IN_MYSQL (1<<0)
+#define TRX_ACTIVE_COMMIT_ORDERED (1<<1)
+
/** Commit node states */
enum commit_node_state {
COMMIT_NODE_SEND = 1, /*!< about to send a commit signal to
diff --git a/storage/innodb_plugin/trx/trx0trx.c b/storage/innodb_plugin/trx/trx0trx.c
index ee744fd58b1..adece596e1b 100644
--- a/storage/innodb_plugin/trx/trx0trx.c
+++ b/storage/innodb_plugin/trx/trx0trx.c
@@ -119,7 +119,7 @@ trx_create(
trx->table_id = ut_dulint_zero;
trx->mysql_thd = NULL;
- trx->active_trans = 0;
+ trx->active_flag = 0;
trx->duplicates = 0;
trx->n_mysql_tables_in_use = 0;
diff --git a/storage/pbxt/src/ha_pbxt.cc b/storage/pbxt/src/ha_pbxt.cc
index 8640c079a37..b5cc7d7f34a 100644
--- a/storage/pbxt/src/ha_pbxt.cc
+++ b/storage/pbxt/src/ha_pbxt.cc
@@ -108,6 +108,9 @@ static int pbxt_end(void *p);
static int pbxt_panic(handlerton *hton, enum ha_panic_function flag);
static void pbxt_drop_database(handlerton *hton, char *path);
static int pbxt_close_connection(handlerton *hton, THD* thd);
+#ifdef MARIADB_BASE_VERSION
+static void pbxt_commit_ordered(handlerton *hton, THD *thd, bool all);
+#endif
static int pbxt_commit(handlerton *hton, THD *thd, bool all);
static int pbxt_rollback(handlerton *hton, THD *thd, bool all);
static int pbxt_prepare(handlerton *hton, THD *thd, bool all);
@@ -1147,6 +1150,9 @@ static int pbxt_init(void *p)
pbxt_hton->state = SHOW_OPTION_YES;
pbxt_hton->db_type = DB_TYPE_PBXT; // Wow! I have my own!
pbxt_hton->close_connection = pbxt_close_connection; /* close_connection, cleanup thread related data. */
+#ifdef MARIADB_BASE_VERSION
+ pbxt_hton->commit_ordered = pbxt_commit_ordered;
+#endif
pbxt_hton->commit = pbxt_commit; /* commit */
pbxt_hton->rollback = pbxt_rollback; /* rollback */
if (pbxt_support_xa) {
@@ -1484,6 +1490,29 @@ static int pbxt_start_consistent_snapshot(handlerton *hton, THD *thd)
return err;
}
+#ifdef MARIADB_BASE_VERSION
+/*
+ * Quickly commit the transaction to memory and make it visible to others.
+ * The remaining part of commit will happen later, in pbxt_commit().
+ */
+static void pbxt_commit_ordered(handlerton *hton, THD *thd, bool all)
+{
+ XTThreadPtr self;
+
+ if ((self = (XTThreadPtr) *thd_ha_data(thd, hton))) {
+ XT_PRINT2(self, "%s pbxt_commit_ordered all=%d\n", all ? "END CONN XACT" : "END STAT", all);
+
+ if (self->st_xact_data) {
+ if (all || self->st_auto_commit) {
+ self->st_commit_ordered = TRUE;
+ self->st_writer = self->st_xact_writer;
+ self->st_delayed_error= !xt_xn_commit_fast(self, self->st_writer);
+ }
+ }
+ }
+}
+#endif
+
/*
* Commit the PBXT transaction of the given thread.
* thd is the MySQL thread structure.
@@ -1512,7 +1541,13 @@ static int pbxt_commit(handlerton *hton, THD *thd, bool all)
if (all || self->st_auto_commit) {
XT_PRINT0(self, "xt_xn_commit in pbxt_commit\n");
- if (!xt_xn_commit(self))
+ if (self->st_commit_ordered) {
+ self->st_commit_ordered = FALSE;
+ err = !xt_xn_commit_slow(self, self->st_writer) || self->st_delayed_error;
+ } else {
+ err = !xt_xn_commit(self);
+ }
+ if (err)
err = xt_ha_pbxt_thread_error_for_mysql(thd, self, FALSE);
}
}
@@ -6064,7 +6099,7 @@ static MYSQL_SYSVAR_INT(max_threads, pbxt_max_threads,
NULL, NULL, 0, 0, 20000, 1);
#endif
-#ifndef DEBUG
+#if !defined(DEBUG) || defined(MARIADB_BASE_VERSION)
static MYSQL_SYSVAR_BOOL(support_xa, pbxt_support_xa,
PLUGIN_VAR_OPCMDARG,
"Enable PBXT support for the XA two-phase commit, default is enabled",
diff --git a/storage/pbxt/src/thread_xt.h b/storage/pbxt/src/thread_xt.h
index a07f7b7ae01..282df46a5d5 100644
--- a/storage/pbxt/src/thread_xt.h
+++ b/storage/pbxt/src/thread_xt.h
@@ -299,6 +299,9 @@ typedef struct XTThread {
xtBool st_stat_ended; /* TRUE if the statement was ended. */
xtBool st_stat_trans; /* TRUE if a statement transaction is running (started on UPDATE). */
xtBool st_stat_modify; /* TRUE if the statement is an INSERT/UPDATE/DELETE */
+ xtBool st_commit_ordered; /* TRUE if we have run commit_ordered() */
+ xtBool st_delayed_error; /* TRUE if we got an error in commit_ordered() */
+ xtBool st_writer; /* Copy of thread->st_xact_writer (which is clobbered by xlog_append()) */
#ifdef XT_IMPLEMENT_NO_ACTION
XTBasicListRec st_restrict_list; /* These records have been deleted and should have no reference. */
#endif
diff --git a/storage/pbxt/src/xaction_xt.cc b/storage/pbxt/src/xaction_xt.cc
index 48abc5d2b66..c5f07eedabf 100644
--- a/storage/pbxt/src/xaction_xt.cc
+++ b/storage/pbxt/src/xaction_xt.cc
@@ -1287,27 +1287,61 @@ xtPublic xtBool xt_xn_begin(XTThreadPtr self)
return OK;
}
-static xtBool xn_end_xact(XTThreadPtr thread, u_int status)
+static void xn_end_release_locks(XTThreadPtr thread)
+{
+ XTXactDataPtr xact = thread->st_xact_data;
+ XTDatabaseHPtr db = thread->st_database;
+ ASSERT_NS(xact);
+
+ /* {REMOVE-LOCKS} Drop locks if you have any: */
+ thread->st_lock_list.xt_remove_all_locks(db, thread);
+
+ /* Do this afterwards to make sure the sweeper
+ * does not cleanup transactions start cleaning up
+ * before any transactions that were waiting for
+ * this transaction have completed!
+ */
+ xact->xd_end_xn_id = db->db_xn_curr_id;
+
+ /* Now you can sweep! */
+ xact->xd_flags |= XT_XN_XAC_SWEEP;
+}
+
+/* The commit is split into two phases: one "fast" for MariaDB commit_ordered(),
+ * and one "slow" for commit(). When not using internal 2pc, there is only one
+ * call combining both phases.
+ */
+
+enum {
+ XN_END_PHASE_FAST = 1,
+ XN_END_PHASE_SLOW = 2,
+ XN_END_PHASE_BOTH = 3
+};
+
+static xtBool xn_end_xact(XTThreadPtr thread, u_int status, xtBool writer, int phase)
{
XTXactDataPtr xact;
xtBool ok = TRUE;
+ xtBool err;
ASSERT_NS(thread->st_xact_data);
if ((xact = thread->st_xact_data)) {
XTDatabaseHPtr db = thread->st_database;
xtXactID xn_id = xact->xd_start_xn_id;
- xtBool writer;
- if ((writer = thread->st_xact_writer)) {
+ if (writer) {
/* The transaction wrote something: */
XTXactEndEntryDRec entry;
xtWord4 sum;
- sum = XT_CHECKSUM4_XACT(xn_id) ^ XT_CHECKSUM4_XACT(0);
- entry.xe_status_1 = status;
- entry.xe_checksum_1 = XT_CHECKSUM_1(sum);
- XT_SET_DISK_4(entry.xe_xact_id_4, xn_id);
- XT_SET_DISK_4(entry.xe_not_used_4, 0);
+ if (phase & XN_END_PHASE_FAST)
+ {
+ sum = XT_CHECKSUM4_XACT(xn_id) ^ XT_CHECKSUM4_XACT(0);
+ entry.xe_status_1 = status;
+ entry.xe_checksum_1 = XT_CHECKSUM_1(sum);
+ XT_SET_DISK_4(entry.xe_xact_id_4, xn_id);
+ XT_SET_DISK_4(entry.xe_not_used_4, 0);
+ }
#ifdef XT_IMPLEMENT_NO_ACTION
/* This will check any resticts that have been delayed to the end of the statement. */
@@ -1319,20 +1353,35 @@ static xtBool xn_end_xact(XTThreadPtr thread, u_int status)
}
#endif
- /* Flush the data log: */
- if (!thread->st_dlog_buf.dlb_flush_log(TRUE, thread)) {
+ /* Flush the data log (in the "fast" case we already did it in prepare: */
+ if ((phase & XN_END_PHASE_SLOW) && !thread->st_dlog_buf.dlb_flush_log(TRUE, thread)) {
ok = FALSE;
status = XT_LOG_ENT_ABORT;
}
/* Write and flush the transaction log: */
- if (!xt_xlog_log_data(thread, sizeof(XTXactEndEntryDRec), (XTXactLogBufferDPtr) &entry, xt_db_flush_log_at_trx_commit)) {
+ if (phase == XN_END_PHASE_FAST) {
+ /* Fast phase, delay any write or flush to later. */
+ err = !xt_xlog_log_data(thread, sizeof(XTXactEndEntryDRec), (XTXactLogBufferDPtr) &entry, XT_XLOG_NO_WRITE_NO_FLUSH);
+ } else if (phase == XN_END_PHASE_SLOW) {
+ /* We already appended the commit record in the fast phase.
+ * Now just call with empty record to ensure we write/flush
+ * the log as needed for this commit.
+ */
+ err = !xt_xlog_log_data(thread, 0, NULL, xt_db_flush_log_at_trx_commit);
+ } else /* phase == XN_END_PHASE_BOTH */ {
+ /* Both phases at once, append commit record and write/flush normally. */
+ ASSERT_NS(phase == XN_END_PHASE_BOTH);
+ err = !xt_xlog_log_data(thread, sizeof(XTXactEndEntryDRec), (XTXactLogBufferDPtr) &entry, xt_db_flush_log_at_trx_commit);
+ }
+
+ if (err) {
ok = FALSE;
status = XT_LOG_ENT_ABORT;
/* Make sure this is done, if we failed to log
* the transction end!
*/
- if (thread->st_xact_writer) {
+ if (writer) {
/* Adjust this in case of error, but don't forget
* to lock!
*/
@@ -1347,46 +1396,46 @@ static xtBool xn_end_xact(XTThreadPtr thread, u_int status)
}
}
- /* Setting this flag completes the transaction,
- * Do this before we release the locks, because
- * the unlocked transactions expect the
- * transaction they are waiting for to be
- * gone!
- */
- xact->xd_end_time = ++db->db_xn_end_time;
- if (status == XT_LOG_ENT_COMMIT) {
- thread->st_statistics.st_commits++;
- xact->xd_flags |= (XT_XN_XAC_COMMITTED | XT_XN_XAC_ENDED);
- }
- else {
- thread->st_statistics.st_rollbacks++;
- xact->xd_flags |= XT_XN_XAC_ENDED;
+ if (phase & XN_END_PHASE_FAST) {
+ /* Setting this flag completes the transaction,
+ * Do this before we release the locks, because
+ * the unlocked transactions expect the
+ * transaction they are waiting for to be
+ * gone!
+ */
+ xact->xd_end_time = ++db->db_xn_end_time;
+ if (status == XT_LOG_ENT_COMMIT) {
+ thread->st_statistics.st_commits++;
+ xact->xd_flags |= (XT_XN_XAC_COMMITTED | XT_XN_XAC_ENDED);
+ }
+ else {
+ thread->st_statistics.st_rollbacks++;
+ xact->xd_flags |= XT_XN_XAC_ENDED;
+ }
}
- /* {REMOVE-LOCKS} Drop locks is you have any: */
- thread->st_lock_list.xt_remove_all_locks(db, thread);
-
- /* Do this afterwards to make sure the sweeper
- * does not cleanup transactions start cleaning up
- * before any transactions that were waiting for
- * this transaction have completed!
+ /* Be as fast as possible in the "fast" path, as we want to be as
+ * fast as possible here (we will release slow locks immediately
+ * after in the "slow" part).
+ * ToDo: If we ran the fast part, the slow part could release locks
+ * _before_ fsync(), rather than after.
*/
- xact->xd_end_xn_id = db->db_xn_curr_id;
+ if (!(phase & XN_END_PHASE_SLOW))
+ return ok;
- /* Now you can sweep! */
- xact->xd_flags |= XT_XN_XAC_SWEEP;
+ xn_end_release_locks(thread);
}
else {
/* Read-only transaction can be removed, immediately */
- xact->xd_end_time = ++db->db_xn_end_time;
- xact->xd_flags |= (XT_XN_XAC_COMMITTED | XT_XN_XAC_ENDED);
-
- /* Drop locks is you have any: */
- thread->st_lock_list.xt_remove_all_locks(db, thread);
+ if (phase & XN_END_PHASE_FAST) {
+ xact->xd_end_time = ++db->db_xn_end_time;
+ xact->xd_flags |= (XT_XN_XAC_COMMITTED | XT_XN_XAC_ENDED);
- xact->xd_end_xn_id = db->db_xn_curr_id;
+ if (!(phase & XN_END_PHASE_SLOW))
+ return ok;
+ }
- xact->xd_flags |= XT_XN_XAC_SWEEP;
+ xn_end_release_locks(thread);
if (xt_xn_delete_xact(db, xn_id, thread)) {
if (db->db_xn_min_ram_id == xn_id)
@@ -1478,12 +1527,22 @@ static xtBool xn_end_xact(XTThreadPtr thread, u_int status)
xtPublic xtBool xt_xn_commit(XTThreadPtr thread)
{
- return xn_end_xact(thread, XT_LOG_ENT_COMMIT);
+ return xn_end_xact(thread, XT_LOG_ENT_COMMIT, thread->st_xact_writer, XN_END_PHASE_BOTH);
+}
+
+xtPublic xtBool xt_xn_commit_fast(XTThreadPtr thread, xtBool writer)
+{
+ return xn_end_xact(thread, XT_LOG_ENT_COMMIT, writer, XN_END_PHASE_FAST);
+}
+
+xtPublic xtBool xt_xn_commit_slow(XTThreadPtr thread, xtBool writer)
+{
+ return xn_end_xact(thread, XT_LOG_ENT_COMMIT, writer, XN_END_PHASE_SLOW);
}
xtPublic xtBool xt_xn_rollback(XTThreadPtr thread)
{
- return xn_end_xact(thread, XT_LOG_ENT_ABORT);
+ return xn_end_xact(thread, XT_LOG_ENT_ABORT, thread->st_xact_writer, XN_END_PHASE_BOTH);
}
xtPublic xtBool xt_xn_log_tab_id(XTThreadPtr self, xtTableID tab_id)
diff --git a/storage/pbxt/src/xaction_xt.h b/storage/pbxt/src/xaction_xt.h
index e679a0f38f0..cd350200506 100644
--- a/storage/pbxt/src/xaction_xt.h
+++ b/storage/pbxt/src/xaction_xt.h
@@ -193,6 +193,8 @@ void xt_wakeup_sweeper(struct XTDatabase *db);
xtBool xt_xn_begin(struct XTThread *self);
xtBool xt_xn_commit(struct XTThread *self);
+xtBool xt_xn_commit_fast(struct XTThread *self, xtBool writer);
+xtBool xt_xn_commit_slow(struct XTThread *self, xtBool writer);
xtBool xt_xn_rollback(struct XTThread *self);
xtBool xt_xn_log_tab_id(struct XTThread *self, xtTableID tab_id);
int xt_xn_status(struct XTOpenTable *ot, xtXactID xn_id, xtRecordID rec_id);
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 9ccacf812ae..7389abc464d 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -32,7 +32,8 @@ Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
/* TODO list for the InnoDB handler in 5.0:
- - Remove the flag trx->active_trans and look at trx->conc_state
+ - Remove the flag trx->active_flag & TRX_ACTIVE_IN_MYSQL and look
+ at trx->conc_state
- fix savepoint functions to use savepoint storage area
- Find out what kind of problems the OS X case-insensitivity causes to
table and database names; should we 'normalize' the names like we do
@@ -117,8 +118,6 @@ bool check_global_access(THD *thd, ulong want_access);
/** to protect innobase_open_files */
static pthread_mutex_t innobase_share_mutex;
-/** to force correct commit order in binlog */
-static pthread_mutex_t prepare_commit_mutex;
static ulong commit_threads = 0;
static pthread_mutex_t commit_threads_m;
static pthread_cond_t commit_cond;
@@ -230,6 +229,8 @@ static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
static int innobase_close_connection(handlerton *hton, THD* thd);
+static void innobase_prepare_ordered(handlerton *hton, THD* thd, bool all);
+static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
static int innobase_commit(handlerton *hton, THD* thd, bool all);
static int innobase_rollback(handlerton *hton, THD* thd, bool all);
static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd,
@@ -1376,7 +1377,6 @@ innobase_trx_init(
trx_t* trx) /*!< in/out: InnoDB transaction handle */
{
DBUG_ENTER("innobase_trx_init");
- DBUG_ASSERT(EQ_CURRENT_THD(thd));
DBUG_ASSERT(thd == trx->mysql_thd);
trx->check_foreigns = !thd_test_options(
@@ -1435,8 +1435,6 @@ check_trx_exists(
{
trx_t*& trx = thd_to_trx(thd);
- ut_ad(EQ_CURRENT_THD(thd));
-
if (trx == NULL) {
trx = innobase_trx_allocate(thd);
} else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
@@ -1728,10 +1726,10 @@ innobase_query_caching_of_table_permitted(
/* The call of row_search_.. will start a new transaction if it is
not yet started */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(innodb_hton_ptr, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
if (row_search_check_if_query_cache_permitted(trx, norm_name)) {
@@ -2001,11 +1999,11 @@ ha_innobase::init_table_handle_for_HANDLER(void)
/* Set the MySQL flag to mark that there is an active transaction */
- if (prebuilt->trx->active_trans == 0) {
+ if ((prebuilt->trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, user_thd);
- prebuilt->trx->active_trans = 1;
+ prebuilt->trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
/* We did the necessary inits in this function, no need to repeat them
@@ -2056,6 +2054,11 @@ innobase_init(
innobase_hton->savepoint_set=innobase_savepoint;
innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
innobase_hton->savepoint_release=innobase_release_savepoint;
+ if (innobase_release_locks_early)
+ innobase_hton->prepare_ordered=innobase_prepare_ordered;
+ else
+ innobase_hton->prepare_ordered=NULL;
+ innobase_hton->commit_ordered=innobase_commit_ordered;
innobase_hton->commit=innobase_commit;
innobase_hton->rollback=innobase_rollback;
innobase_hton->prepare=innobase_xa_prepare;
@@ -2585,7 +2588,6 @@ skip_overwrite:
innobase_open_tables = hash_create(200);
pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
- pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&commit_cond_m, MY_MUTEX_INIT_FAST);
pthread_cond_init(&commit_cond, NULL);
@@ -2640,7 +2642,6 @@ innobase_end(
my_free(internal_innobase_data_file_path,
MYF(MY_ALLOW_ZERO_PTR));
pthread_mutex_destroy(&innobase_share_mutex);
- pthread_mutex_destroy(&prepare_commit_mutex);
pthread_mutex_destroy(&commit_threads_m);
pthread_mutex_destroy(&commit_cond_m);
pthread_cond_destroy(&commit_cond);
@@ -2761,15 +2762,167 @@ innobase_start_trx_and_assign_read_view(
/* Set the MySQL flag to mark that there is an active transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(hton, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
DBUG_RETURN(0);
}
/*****************************************************************//**
+Release row locks early during prepare phase.
+
+Only enabled if --innodb-release-locks-early=1. In this case, a prepared
+transaction is treated as committed to memory (but not to disk), and we
+release row locks at the end of the prepare phase.
+
+The consistent commit ordering guarantees of prepare_ordered() calls means
+that transactions will not be binlogged in different order than locks are
+released (which would cause trouble for statement-based replication).
+
+This optimisation is not 100% safe, so is not enabled by default. But some
+applications may decide to enable it to reduce contention on hotspot rows.
+
+The consequences of enabling this are:
+
+ - It is not possible to rollback after successful prepare(). If there is
+ a need to rollback (ie. failure to binlog the transaction), we crash the
+ server (!)
+
+ - If we crash during commit, it is possible that an application/user can have
+ seen another transaction committed that is not recovered by XA crash
+ recovery. Thus durability is partially lost. However, consistency is still
+ guaranteed, we never recover a transaction and not recover another
+ transaction that committed before. */
+static
+void
+innobase_prepare_ordered(
+/*============*/
+ handlerton *hton, /*!< in: Innodb handlerton */
+ THD* thd, /*!< in: MySQL thread handle of the user for whom
+ the transaction should be committed */
+ bool all) /*!< in: TRUE - commit transaction
+ FALSE - the current SQL statement ended */
+{
+ trx_t* trx;
+ DBUG_ENTER("innobase_prepare_ordered");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
+ trx = check_trx_exists(thd);
+
+ mutex_enter(&kernel_mutex);
+ lock_release_off_kernel(trx);
+ mutex_exit(&kernel_mutex);
+
+ DBUG_VOID_RETURN;
+}
+
+static
+void
+innobase_commit_ordered_2(
+/*============*/
+ trx_t* trx, /*!< in: Innodb transaction */
+ THD* thd) /*!< in: MySQL thread handle */
+{
+ ulonglong tmp_pos;
+ DBUG_ENTER("innobase_commit_ordered");
+
+ /* We need current binlog position for ibbackup to work.
+ Note, the position is current because commit_ordered is guaranteed
+ to be called in same sequenece as writing to binlog. */
+
+retry:
+ if (innobase_commit_concurrency > 0) {
+ pthread_mutex_lock(&commit_cond_m);
+ commit_threads++;
+
+ if (commit_threads > innobase_commit_concurrency) {
+ commit_threads--;
+ pthread_cond_wait(&commit_cond,
+ &commit_cond_m);
+ pthread_mutex_unlock(&commit_cond_m);
+ goto retry;
+ }
+ else {
+ pthread_mutex_unlock(&commit_cond_m);
+ }
+ }
+
+ mysql_bin_log_commit_pos(thd, &tmp_pos, &(trx->mysql_log_file_name));
+ trx->mysql_log_offset = (ib_int64_t) tmp_pos;
+
+ /* Don't do write + flush right now. For group commit
+ to work we want to do the flush in the innobase_commit()
+ method, which runs without holding any locks. */
+ trx->flush_log_later = TRUE;
+ innobase_commit_low(trx);
+ trx->flush_log_later = FALSE;
+
+ if (innobase_commit_concurrency > 0) {
+ pthread_mutex_lock(&commit_cond_m);
+ commit_threads--;
+ pthread_cond_signal(&commit_cond);
+ pthread_mutex_unlock(&commit_cond_m);
+ }
+
+ DBUG_VOID_RETURN;
+}
+
+/*****************************************************************//**
+Perform the first, fast part of InnoDB commit.
+
+Doing it in this call ensures that we get the same commit order here
+as in binlog and any other participating transactional storage engines.
+
+Note that we want to do as little as really needed here, as we run
+under a global mutex. The expensive fsync() is done later, in
+innobase_commit(), without a lock so group commit can take place.
+
+Note also that this method can be called from a different thread than
+the one handling the rest of the transaction. */
+static
+void
+innobase_commit_ordered(
+/*============*/
+ handlerton *hton, /*!< in: Innodb handlerton */
+ THD* thd, /*!< in: MySQL thread handle of the user for whom
+ the transaction should be committed */
+ bool all) /*!< in: TRUE - commit transaction
+ FALSE - the current SQL statement ended */
+{
+ trx_t* trx;
+ DBUG_ENTER("innobase_commit_ordered");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
+ trx = check_trx_exists(thd);
+
+ /* Since we will reserve the kernel mutex, we must not be holding the
+ search system latch, or we will disobey the latching order. But we
+ already released it in innobase_xa_prepare() (if not before), so just
+ have an assert here.*/
+ ut_ad(!trx->has_search_latch);
+
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0
+ && trx->conc_state != TRX_NOT_STARTED) {
+ /* We cannot throw error here; instead we will catch this error
+ again in innobase_commit() and report it from there. */
+ DBUG_VOID_RETURN;
+ }
+
+ /* commit_ordered is only called when committing the whole transaction
+ (or an SQL statement when autocommit is on). */
+ DBUG_ASSERT(all ||
+ (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
+
+ innobase_commit_ordered_2(trx, thd);
+
+ trx->active_flag |= TRX_ACTIVE_COMMIT_ORDERED;
+
+ DBUG_VOID_RETURN;
+}
+
+/*****************************************************************//**
Commits a transaction in an InnoDB database or marks an SQL statement
ended.
@return 0 */
@@ -2794,11 +2947,12 @@ innobase_commit(
/* Since we will reserve the kernel mutex, we have to release
the search system latch first to obey the latching order. */
- if (trx->has_search_latch) {
+ if (trx->has_search_latch &&
+ (trx->active_flag & TRX_ACTIVE_COMMIT_ORDERED) == 0) {
trx_search_latch_release_if_reserved(trx);
}
- /* The flag trx->active_trans is set to 1 in
+ /* The flag TRX_ACTIVE_IN_MYSQL in trx->active_flag is set in
1. ::external_lock(),
2. ::start_stmt(),
@@ -2808,81 +2962,33 @@ innobase_commit(
6. innobase_start_trx_and_assign_read_view(),
7. ::transactional_table_lock()
- and it is only set to 0 in a commit or a rollback. If it is 0 we know
+ and it is only cleared in a commit or a rollback. If it is unset we know
there cannot be resources to be freed and we could return immediately.
For the time being, we play safe and do the cleanup though there should
be nothing to clean up. */
- if (trx->active_trans == 0
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0
&& trx->conc_state != TRX_NOT_STARTED) {
- sql_print_error("trx->active_trans == 0, but"
- " trx->conc_state != TRX_NOT_STARTED");
+ sql_print_error("trx->active_flag & TRX_ACTIVE_IN_MYSQL== 0,"
+ " but trx->conc_state != TRX_NOT_STARTED");
}
+
if (all
|| (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
- /* We were instructed to commit the whole transaction, or
- this is an SQL statement end and autocommit is on */
-
- /* We need current binlog position for ibbackup to work.
- Note, the position is current because of
- prepare_commit_mutex */
-retry:
- if (innobase_commit_concurrency > 0) {
- pthread_mutex_lock(&commit_cond_m);
- commit_threads++;
-
- if (commit_threads > innobase_commit_concurrency) {
- commit_threads--;
- pthread_cond_wait(&commit_cond,
- &commit_cond_m);
- pthread_mutex_unlock(&commit_cond_m);
- goto retry;
- }
- else {
- pthread_mutex_unlock(&commit_cond_m);
- }
- }
-
- /* The following calls to read the MySQL binary log
- file name and the position return consistent results:
- 1) Other InnoDB transactions cannot intervene between
- these calls as we are holding prepare_commit_mutex.
- 2) Binary logging of other engines is not relevant
- to InnoDB as all InnoDB requires is that committing
- InnoDB transactions appear in the same order in the
- MySQL binary log as they appear in InnoDB logs.
- 3) A MySQL log file rotation cannot happen because
- MySQL protects against this by having a counter of
- transactions in prepared state and it only allows
- a rotation when the counter drops to zero. See
- LOCK_prep_xids and COND_prep_xids in log.cc. */
- trx->mysql_log_file_name = mysql_bin_log_file_name();
- trx->mysql_log_offset = (ib_int64_t) mysql_bin_log_file_pos();
-
- /* Don't do write + flush right now. For group commit
- to work we want to do the flush after releasing the
- prepare_commit_mutex. */
- trx->flush_log_later = TRUE;
- innobase_commit_low(trx);
- trx->flush_log_later = FALSE;
-
- if (innobase_commit_concurrency > 0) {
- pthread_mutex_lock(&commit_cond_m);
- commit_threads--;
- pthread_cond_signal(&commit_cond);
- pthread_mutex_unlock(&commit_cond_m);
+ /* Run the fast part of commit if we did not already. */
+ if ((trx->active_flag & TRX_ACTIVE_COMMIT_ORDERED) == 0) {
+ innobase_commit_ordered_2(trx, thd);
}
- if (trx->active_trans == 2) {
-
- pthread_mutex_unlock(&prepare_commit_mutex);
- }
+ /* We were instructed to commit the whole transaction, or
+ this is an SQL statement end and autocommit is on */
- /* Now do a write + flush of logs. */
+ /* We did the first part already in innobase_commit_ordered(),
+ Now finish by doing a write + flush of logs. */
trx_commit_complete_for_mysql(trx);
- trx->active_trans = 0;
+ trx->active_flag = 0;
} else {
/* We just mark the SQL statement ended and do not do a
@@ -2951,11 +3057,21 @@ innobase_rollback(
row_unlock_table_autoinc_for_mysql(trx);
+ /* if transaction has already released locks, it is too late to
+ rollback */
+ if (innobase_release_locks_early && trx->conc_state == TRX_PREPARED
+ && UT_LIST_GET_LEN(trx->trx_locks) == 0) {
+ sql_print_error("Rollback after releasing locks! "
+ "errno=%d, dberr="ULINTPF,
+ errno, trx->error_state);
+ ut_error;
+ }
+
if (all
|| !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
error = trx_rollback_for_mysql(trx);
- trx->active_trans = 0;
+ trx->active_flag = 0;
} else {
error = trx_rollback_last_sql_stat_for_mysql(trx);
}
@@ -3099,7 +3215,7 @@ innobase_savepoint(
innobase_release_stat_resources(trx);
/* cannot happen outside of transaction */
- DBUG_ASSERT(trx->active_trans);
+ DBUG_ASSERT(trx->active_flag & TRX_ACTIVE_IN_MYSQL);
/* TODO: use provided savepoint data area to store savepoint data */
char name[64];
@@ -3129,11 +3245,11 @@ innobase_close_connection(
ut_a(trx);
- if (trx->active_trans == 0
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0
&& trx->conc_state != TRX_NOT_STARTED) {
- sql_print_error("trx->active_trans == 0, but"
- " trx->conc_state != TRX_NOT_STARTED");
+ sql_print_error("trx->active_flag & TRX_ACTIVE_IN_MYSQL == 0,"
+ " but trx->conc_state != TRX_NOT_STARTED");
}
@@ -5095,7 +5211,7 @@ no_commit:
/* Altering to InnoDB format */
innobase_commit(ht, user_thd, 1);
/* Note that this transaction is still active. */
- prebuilt->trx->active_trans = 1;
+ prebuilt->trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
/* We will need an IX lock on the destination table. */
prebuilt->sql_stat_start = TRUE;
} else {
@@ -5111,7 +5227,7 @@ no_commit:
locks, so they have to be acquired again. */
innobase_commit(ht, user_thd, 1);
/* Note that this transaction is still active. */
- prebuilt->trx->active_trans = 1;
+ prebuilt->trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
/* Re-acquire the table lock on the source table. */
row_lock_table_for_mysql(prebuilt, src_table, mode);
/* We will need an IX lock on the destination table. */
@@ -9041,10 +9157,10 @@ ha_innobase::start_stmt(
trx->detailed_error[0] = '\0';
/* Set the MySQL flag to mark that there is an active transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
} else {
innobase_register_stmt(ht, thd);
}
@@ -9142,10 +9258,10 @@ ha_innobase::external_lock(
/* Set the MySQL flag to mark that there is an active
transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
} else if (trx->n_mysql_tables_in_use == 0) {
innobase_register_stmt(ht, thd);
}
@@ -9243,7 +9359,7 @@ ha_innobase::external_lock(
prebuilt->used_in_HANDLER = FALSE;
if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
- if (trx->active_trans != 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) != 0) {
innobase_commit(ht, thd, TRUE);
}
} else {
@@ -9328,10 +9444,10 @@ ha_innobase::transactional_table_lock(
/* MySQL is setting a new transactional table lock */
/* Set the MySQL flag to mark that there is an active transaction */
- if (trx->active_trans == 0) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0) {
innobase_register_trx_and_stmt(ht, thd);
- trx->active_trans = 1;
+ trx->active_flag |= TRX_ACTIVE_IN_MYSQL;
}
if (THDVAR(thd, table_locks) && thd_in_lock_tables(thd)) {
@@ -10385,10 +10501,11 @@ innobase_xa_prepare(
innobase_release_stat_resources(trx);
- if (trx->active_trans == 0 && trx->conc_state != TRX_NOT_STARTED) {
+ if ((trx->active_flag & TRX_ACTIVE_IN_MYSQL) == 0 &&
+ trx->conc_state != TRX_NOT_STARTED) {
- sql_print_error("trx->active_trans == 0, but trx->conc_state != "
- "TRX_NOT_STARTED");
+ sql_print_error("trx->active_flag & TRX_ACTIVE_IN_MYSQL == 0, but"
+ " trx->conc_state != TRX_NOT_STARTED");
}
if (all
@@ -10397,7 +10514,7 @@ innobase_xa_prepare(
/* We were instructed to prepare the whole transaction, or
this is an SQL statement end and autocommit is on */
- ut_ad(trx->active_trans);
+ ut_ad(trx->active_flag & TRX_ACTIVE_IN_MYSQL);
error = (int) trx_prepare_for_mysql(trx);
} else {
@@ -10421,36 +10538,6 @@ innobase_xa_prepare(
srv_active_wake_master_thread();
- if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
- (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
- {
- if (srv_enable_unsafe_group_commit && !THDVAR(thd, support_xa)) {
- /* choose group commit rather than binlog order */
- return(error);
- }
-
- /* For ibbackup to work the order of transactions in binlog
- and InnoDB must be the same. Consider the situation
-
- thread1> prepare; write to binlog; ...
- <context switch>
- thread2> prepare; write to binlog; commit
- thread1> ... commit
-
- To ensure this will not happen we're taking the mutex on
- prepare, and releasing it on commit.
-
- Note: only do it for normal commits, done via ha_commit_trans.
- If 2pc protocol is executed by external transaction
- coordinator, it will be just a regular MySQL client
- executing XA PREPARE and XA COMMIT commands.
- In this case we cannot know how many minutes or hours
- will be between XA PREPARE and XA COMMIT, and we don't want
- to block for undefined period of time. */
- pthread_mutex_lock(&prepare_commit_mutex);
- trx->active_trans = 2;
- }
-
return(error);
}
@@ -11752,9 +11839,10 @@ static MYSQL_SYSVAR_ENUM(adaptive_checkpoint, srv_adaptive_checkpoint,
"Enable/Disable flushing along modified age. (none, reflex, [estimate], keep_average)",
NULL, innodb_adaptive_checkpoint_update, 2, &adaptive_checkpoint_typelib);
-static MYSQL_SYSVAR_ULONG(enable_unsafe_group_commit, srv_enable_unsafe_group_commit,
+static MYSQL_SYSVAR_ULONG(enable_unsafe_group_commit, srv_deprecated_enable_unsafe_group_commit,
PLUGIN_VAR_RQCMDARG,
- "Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine.",
+ "Enable/Disable unsafe group commit when support_xa=OFF and use with binlog or other XA storage engine. "
+ "(Deprecated, and does nothing, group commit is always enabled in a safe way)",
NULL, NULL, 0, 0, 1, 0);
static MYSQL_SYSVAR_ULONG(expand_import, srv_expand_import,
@@ -11786,6 +11874,11 @@ static MYSQL_SYSVAR_ULINT(pass_corrupt_table, srv_pass_corrupt_table,
"except for the deletion.",
NULL, NULL, 0, 0, 1, 0);
+static MYSQL_SYSVAR_BOOL(release_locks_early, innobase_release_locks_early,
+ PLUGIN_VAR_READONLY,
+ "Release row locks in the prepare stage instead of in the commit stage",
+ NULL, NULL, FALSE);
+
static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(page_size),
MYSQL_SYSVAR(log_block_size),
@@ -11877,6 +11970,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(auto_lru_dump),
MYSQL_SYSVAR(use_purge_thread),
MYSQL_SYSVAR(pass_corrupt_table),
+ MYSQL_SYSVAR(release_locks_early),
NULL
};
diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h
index 7470ff97fc2..599b48287e3 100644
--- a/storage/xtradb/handler/ha_innodb.h
+++ b/storage/xtradb/handler/ha_innodb.h
@@ -262,16 +262,6 @@ LEX_STRING *thd_query_string(MYSQL_THD thd);
char **thd_query(MYSQL_THD thd);
#endif
-/** Get the file name of the MySQL binlog.
- * @return the name of the binlog file
- */
-const char* mysql_bin_log_file_name(void);
-
-/** Get the current position of the MySQL binlog.
- * @return byte offset from the beginning of the binlog
- */
-ulonglong mysql_bin_log_file_pos(void);
-
/**
Check if a user thread is a replication slave thread
@param thd user thread
@@ -312,6 +302,11 @@ bool thd_binlog_filter_ok(const MYSQL_THD thd);
#endif /* MYSQL_VERSION_ID > 50140 */
}
+/** Get the file name and position of the MySQL binlog corresponding to the
+ * current commit.
+ */
+extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file);
+
typedef struct trx_struct trx_t;
/********************************************************************//**
@file handler/ha_innodb.h
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index f4c9704741c..7ecf952d329 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -233,7 +233,7 @@ extern ulong srv_ibuf_active_contract;
extern ulong srv_ibuf_accel_rate;
extern ulint srv_checkpoint_age_target;
extern ulong srv_flush_neighbor_pages;
-extern ulong srv_enable_unsafe_group_commit;
+extern ulong srv_deprecated_enable_unsafe_group_commit;
extern ulong srv_read_ahead;
extern ulong srv_adaptive_checkpoint;
@@ -348,6 +348,9 @@ extern ulint srv_buf_pool_reads;
/** Time in seconds between automatic buffer pool dumps */
extern uint srv_auto_lru_dump;
+/** Release row locks already in the prepare phase */
+extern my_bool innobase_release_locks_early;
+
/** Status variables to be passed to MySQL */
typedef struct export_var_struct export_struc;
diff --git a/storage/xtradb/include/trx0sys.ic b/storage/xtradb/include/trx0sys.ic
index 5e0f07c8b9d..f39ba3055be 100644
--- a/storage/xtradb/include/trx0sys.ic
+++ b/storage/xtradb/include/trx0sys.ic
@@ -338,6 +338,12 @@ trx_list_get_min_trx_id(void)
/****************************************************************//**
Checks if a transaction with the given id is active.
+IMPORTANT ASSUMPTION:
+ It is assumed that this function is only used for the purpose of
+ determining if locks need to be created for the input transaction.
+ So if innobase_release_locks_early global option is set, and if
+ the transaction is already in prepared state, this returns FALSE,
+ as locks are no longer needed for the transaction.
@return TRUE if active */
UNIV_INLINE
ibool
@@ -366,7 +372,8 @@ trx_is_active(
trx = trx_get_on_id(trx_id);
if (trx && (trx->conc_state == TRX_ACTIVE
- || trx->conc_state == TRX_PREPARED)) {
+ || (trx->conc_state == TRX_PREPARED &&
+ !innobase_release_locks_early))) {
return(TRUE);
}
diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h
index 4c0ce392bcd..47b935a7efd 100644
--- a/storage/xtradb/include/trx0trx.h
+++ b/storage/xtradb/include/trx0trx.h
@@ -511,9 +511,10 @@ struct trx_struct{
in that case we must flush the log
in trx_commit_complete_for_mysql() */
ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
- ulint active_trans; /*!< 1 - if a transaction in MySQL
- is active. 2 - if prepare_commit_mutex
- was taken */
+ ulint active_flag; /*!< TRX_ACTIVE_IN_MYSQL - set if a
+ transaction in MySQL is active.
+ TRX_ACTIVE_COMMIT_ORDERED - set if
+ innobase_commit_ordered has run */
ulint has_search_latch;
/* TRUE if this trx has latched the
search system latch in S-mode */
@@ -824,6 +825,10 @@ Multiple flags can be combined with bitwise OR. */
#define TRX_SIG_OTHER_SESS 1 /* sent by another session (which
must hold rights to this) */
+/* Flag bits for trx_struct.active_flag */
+#define TRX_ACTIVE_IN_MYSQL (1<<0)
+#define TRX_ACTIVE_COMMIT_ORDERED (1<<1)
+
/** Commit node states */
enum commit_node_state {
COMMIT_NODE_SEND = 1, /*!< about to send a commit signal to
diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c
index 3184308f573..4565cac41b9 100644
--- a/storage/xtradb/srv/srv0srv.c
+++ b/storage/xtradb/srv/srv0srv.c
@@ -410,7 +410,7 @@ UNIV_INTERN ulong srv_ibuf_accel_rate = 100;
UNIV_INTERN ulint srv_checkpoint_age_target = 0;
UNIV_INTERN ulong srv_flush_neighbor_pages = 1; /* 0:disable 1:enable */
-UNIV_INTERN ulong srv_enable_unsafe_group_commit = 0; /* 0:disable 1:enable */
+UNIV_INTERN ulong srv_deprecated_enable_unsafe_group_commit = 0;
UNIV_INTERN ulong srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
UNIV_INTERN ulong srv_adaptive_checkpoint = 0; /* 0: none 1: reflex 2: estimate */
@@ -490,6 +490,9 @@ UNIV_INTERN FILE* srv_misc_tmpfile;
UNIV_INTERN ulint srv_main_thread_process_no = 0;
UNIV_INTERN ulint srv_main_thread_id = 0;
+/* Release row locks already in the prepare phase */
+UNIV_INTERN my_bool innobase_release_locks_early = FALSE;
+
/* The following count work done by srv_master_thread. */
/* Iterations by the 'once per second' loop. */
diff --git a/storage/xtradb/trx/trx0trx.c b/storage/xtradb/trx/trx0trx.c
index 75bbe1b342a..06778bbbef7 100644
--- a/storage/xtradb/trx/trx0trx.c
+++ b/storage/xtradb/trx/trx0trx.c
@@ -121,7 +121,7 @@ trx_create(
trx->table_id = ut_dulint_zero;
trx->mysql_thd = NULL;
- trx->active_trans = 0;
+ trx->active_flag = 0;
trx->duplicates = 0;
trx->n_mysql_tables_in_use = 0;
diff --git a/tests/consistent_snapshot.pl b/tests/consistent_snapshot.pl
new file mode 100755
index 00000000000..9e53eaea6a1
--- /dev/null
+++ b/tests/consistent_snapshot.pl
@@ -0,0 +1,107 @@
+#! /usr/bin/perl
+
+# Test START TRANSACTION WITH CONSISTENT SNAPSHOT.
+# With MWL#116, this is implemented so it is actually consistent.
+
+use strict;
+use warnings;
+
+use DBI;
+
+my $UPDATERS= 10;
+my $READERS= 5;
+
+my $ROWS= 50;
+my $DURATION= 20;
+
+my $stop_time= time() + $DURATION;
+
+sub my_connect {
+ my $dbh= DBI->connect("dbi:mysql:mysql_socket=/tmp/mysql.sock;database=test",
+ "root", undef, { RaiseError=>1, PrintError=>0, AutoCommit=>0});
+ $dbh->do("SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ");
+ $dbh->do("SET SESSION autocommit = 0");
+ return $dbh;
+}
+
+sub my_setup {
+ my $dbh= my_connect();
+
+ $dbh->do("DROP TABLE IF EXISTS test_consistent_snapshot1, test_consistent_snapshot2");
+ $dbh->do(<<TABLE);
+CREATE TABLE test_consistent_snapshot1 (
+ a INT PRIMARY KEY,
+ b INT NOT NULL
+) ENGINE=InnoDB
+TABLE
+ $dbh->do(<<TABLE);
+CREATE TABLE test_consistent_snapshot2(
+ a INT PRIMARY KEY,
+ b INT NOT NULL
+) ENGINE=PBXT
+TABLE
+
+ for (my $i= 0; $i < $ROWS; $i++) {
+ my $value= int(rand()*1000);
+ $dbh->do("INSERT INTO test_consistent_snapshot1 VALUES (?, ?)", undef,
+ $i, $value);
+ $dbh->do("INSERT INTO test_consistent_snapshot2 VALUES (?, ?)", undef,
+ $i, -$value);
+ }
+ $dbh->commit();
+ $dbh->disconnect();
+}
+
+sub my_updater {
+ my $dbh= my_connect();
+
+ while (time() < $stop_time) {
+ my $i1= int(rand()*$ROWS);
+ my $i2= int(rand()*$ROWS);
+ my $v= int(rand()*99)-49;
+ $dbh->do("UPDATE test_consistent_snapshot1 SET b = b + ? WHERE a = ?",
+ undef, $v, $i1);
+ $dbh->do("UPDATE test_consistent_snapshot2 SET b = b - ? WHERE a = ?",
+ undef, $v, $i2);
+ $dbh->commit();
+ }
+
+ $dbh->disconnect();
+ exit(0);
+}
+
+sub my_reader {
+ my $dbh= my_connect();
+
+ my $iteration= 0;
+ while (time() < $stop_time) {
+ $dbh->do("START TRANSACTION WITH CONSISTENT SNAPSHOT");
+ my $s1= $dbh->selectrow_arrayref("SELECT SUM(b) FROM test_consistent_snapshot1");
+ $s1= $s1->[0];
+ my $s2= $dbh->selectrow_arrayref("SELECT SUM(b) FROM test_consistent_snapshot2");
+ $s2= $s2->[0];
+ $dbh->commit();
+ if ($s1 + $s2 != 0) {
+ print STDERR "Found inconsistency, s1=$s1 s2=$s2 iteration=$iteration\n";
+ last;
+ }
+ ++$iteration;
+ }
+
+ $dbh->disconnect();
+ exit(0);
+}
+
+my_setup();
+
+for (1 .. $UPDATERS) {
+ fork() || my_updater();
+}
+
+for (1 .. $READERS) {
+ fork() || my_reader();
+}
+
+waitpid(-1, 0) for (1 .. ($UPDATERS + $READERS));
+
+print "All checks done\n";