summaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorsysprg <sysprg@gmail.com>2019-03-18 06:39:51 +0100
committerJan Lindström <jan.lindstrom@mariadb.com>2019-03-18 07:39:51 +0200
commit26432e49d37a37d09b862bb49a021e44bdf4789c (patch)
treed0cb051292a44079bb7b5fba5d35407416d2f796 /sql
parent5e044f78c0a9a8cd40dedff0e4bc857c0bd76b95 (diff)
downloadmariadb-git-26432e49d37a37d09b862bb49a021e44bdf4789c.tar.gz
MDEV-17262: mysql crashed on galera while node rejoined cluster (#895)
This patch contains a fix for the MDEV-17262/17243 issues and new mtr test. These issues (MDEV-17262/17243) have two reasons: 1) After an intermediate commit, a transaction loses its status of "transaction that registered in the MySQL for 2pc coordinator" (in the InnoDB) due to the fact that since version 10.2 the write_row() function (which located in the ha_innodb.cc) does not call trx_register_for_2pc(m_prebuilt->trx) during the processing of split transactions. It is necessary to restore this call inside the write_row() when an intermediate commit was made (for a split transaction). Similarly, we need to set the flag of the started transaction (m_prebuilt->sql_stat_start) after intermediate commit. The table->file->extra(HA_EXTRA_FAKE_START_STMT) called from the wsrep_load_data_split() function (which located in sql_load.cc) will also do this, but it will be too late. As a result, the call to the wsrep_append_keys() function from the InnoDB engine may be lost or function may be called with invalid transaction identifier. 2) If a transaction with the LOAD DATA statement is divided into logical mini-transactions (of the 10K rows) and binlog is rotated, then in rare cases due to the wsrep handler re-registration at the boundary of the split, the last portion of data may be lost. Since splitting of the LOAD DATA into mini-transactions is technical, I believe that we should not allow these mini-transactions to fall into separate binlogs. Therefore, it is necessary to prohibit the rotation of binlog in the middle of processing LOAD DATA statement. https://jira.mariadb.org/browse/MDEV-17262 and https://jira.mariadb.org/browse/MDEV-17243
Diffstat (limited to 'sql')
-rw-r--r--sql/log.cc51
-rw-r--r--sql/sql_class.cc2
-rw-r--r--sql/sql_class.h8
-rw-r--r--sql/sql_load.cc55
-rw-r--r--sql/sql_plugin_services.ic2
-rw-r--r--sql/wsrep_dummy.cc6
-rw-r--r--sql/wsrep_hton.cc1
-rw-r--r--sql/wsrep_thd.cc10
8 files changed, 115 insertions, 20 deletions
diff --git a/sql/log.cc b/sql/log.cc
index 4d62c9783cd..df928e89390 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -6413,8 +6413,25 @@ err:
update_binlog_end_pos(offset);
signal_update();
+ /*
+ If a transaction with the LOAD DATA statement is divided
+ into logical mini-transactions (of the 10K rows) and binlog
+ is rotated, then the last portion of data may be lost due to
+ wsrep handler re-registration at the boundary of the split.
+ Since splitting of the LOAD DATA into mini-transactions is
+ logical, we should not allow these mini-transactions to fall
+ into separate binlogs. Therefore, it is necessary to prohibit
+ the rotation of binlog in the middle of processing LOAD DATA:
+ */
+#ifdef WITH_WSREP
+ if (!thd->wsrep_split_flag)
+ {
+#endif /* WITH_WSREP */
if ((error= rotate(false, &check_purge)))
check_purge= false;
+#ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
}
}
}
@@ -7139,8 +7156,25 @@ bool MYSQL_BIN_LOG::write_incident(THD *thd)
!(error= flush_and_sync(0)))
{
signal_update();
+ /*
+ If a transaction with the LOAD DATA statement is divided
+ into logical mini-transactions (of the 10K rows) and binlog
+ is rotated, then the last portion of data may be lost due to
+ wsrep handler re-registration at the boundary of the split.
+ Since splitting of the LOAD DATA into mini-transactions is
+ logical, we should not allow these mini-transactions to fall
+ into separate binlogs. Therefore, it is necessary to prohibit
+ the rotation of binlog in the middle of processing LOAD DATA:
+ */
+#ifdef WITH_WSREP
+ if (!thd->wsrep_split_flag)
+ {
+#endif /* WITH_WSREP */
if ((error= rotate(false, &check_purge)))
check_purge= false;
+#ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
}
offset= my_b_tell(&log_file);
@@ -7906,6 +7940,20 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
mark_xids_active(binlog_id, xid_count);
}
+ /*
+ If a transaction with the LOAD DATA statement is divided
+ into logical mini-transactions (of the 10K rows) and binlog
+ is rotated, then the last portion of data may be lost due to
+ wsrep handler re-registration at the boundary of the split.
+ Since splitting of the LOAD DATA into mini-transactions is
+ logical, we should not allow these mini-transactions to fall
+ into separate binlogs. Therefore, it is necessary to prohibit
+ the rotation of binlog in the middle of processing LOAD DATA:
+ */
+#ifdef WITH_WSREP
+ if (!leader->thd->wsrep_split_flag)
+ {
+#endif /* WITH_WSREP */
if (rotate(false, &check_purge))
{
/*
@@ -7925,6 +7973,9 @@ MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
my_error(ER_ERROR_ON_WRITE, MYF(ME_NOREFRESH), name, errno);
check_purge= false;
}
+#ifdef WITH_WSREP
+ }
+#endif /* WITH_WSREP */
/* In case of binlog rotate, update the correct current binlog offset. */
commit_offset= my_b_write_tell(&log_file);
}
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index d6aa6456710..512f7fdfd56 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -784,6 +784,7 @@ THD::THD(my_thread_id id, bool is_wsrep_applier)
wsrep_affected_rows = 0;
wsrep_replicate_GTID = false;
wsrep_skip_wsrep_GTID = false;
+ wsrep_split_flag = false;
#endif
/* Call to init() below requires fully initialized Open_tables_state. */
reset_open_tables_state(this);
@@ -1218,6 +1219,7 @@ void THD::init(void)
wsrep_affected_rows = 0;
wsrep_replicate_GTID = false;
wsrep_skip_wsrep_GTID = false;
+ wsrep_split_flag = false;
#endif /* WITH_WSREP */
if (variables.sql_log_bin)
diff --git a/sql/sql_class.h b/sql/sql_class.h
index d701d4cb46c..cb182f55bf1 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -4431,6 +4431,14 @@ public:
ulong wsrep_affected_rows;
bool wsrep_replicate_GTID;
bool wsrep_skip_wsrep_GTID;
+ /* This flag is set when innodb do an intermediate commit to
+ processing the LOAD DATA INFILE statement by splitting it into 10K
+ rows chunks. If flag is set, then binlog rotation is not performed
+ while intermediate transaction try to commit, because in this case
+ rotation causes unregistration of innodb handler. Later innodb handler
+ registered again, but replication of last chunk of rows is skipped
+ by the innodb engine: */
+ bool wsrep_split_flag;
#endif /* WITH_WSREP */
/* Handling of timeouts for commands */
diff --git a/sql/sql_load.cc b/sql/sql_load.cc
index 8c2f17dac3f..8e0bdcb32b8 100644
--- a/sql/sql_load.cc
+++ b/sql/sql_load.cc
@@ -41,6 +41,7 @@
#include "sql_trigger.h"
#include "sql_derived.h"
#include "sql_show.h"
+#include "debug_sync.h"
extern "C" int _my_b_net_read(IO_CACHE *info, uchar *Buffer, size_t Count);
@@ -119,21 +120,43 @@ static bool wsrep_load_data_split(THD *thd, const TABLE *table,
if (hton->db_type != DB_TYPE_INNODB)
DBUG_RETURN(false);
WSREP_DEBUG("intermediate transaction commit in LOAD DATA");
+ wsrep_set_load_multi_commit(thd, true);
if (wsrep_run_wsrep_commit(thd, true) != WSREP_TRX_OK) DBUG_RETURN(true);
if (binlog_hton->commit(binlog_hton, thd, true)) DBUG_RETURN(true);
wsrep_post_commit(thd, true);
hton->commit(hton, thd, true);
+ wsrep_set_load_multi_commit(thd, false);
+ DEBUG_SYNC(thd, "intermediate_transaction_commit");
table->file->extra(HA_EXTRA_FAKE_START_STMT);
}
DBUG_RETURN(false);
}
-# define WSREP_LOAD_DATA_SPLIT(thd,table,info) \
- if (wsrep_load_data_split(thd,table,info)) DBUG_RETURN(1)
+/*
+ If the commit fails, then an early return from
+ the function occurs there and therefore we need
+ to reset the table->auto_increment_field_not_null
+ flag, which is usually reset after calling
+ the write_record():
+*/
+#define WSREP_LOAD_DATA_SPLIT(thd,table,info) \
+ if (wsrep_load_data_split(thd,table,info)) \
+ { \
+ table->auto_increment_field_not_null= FALSE; \
+ DBUG_RETURN(1); \
+ }
#else /* WITH_WSREP */
#define WSREP_LOAD_DATA_SPLIT(thd,table,info) /* empty */
#endif /* WITH_WSREP */
+#define WRITE_RECORD(thd,table,info) \
+ do { \
+ int err_= write_record(thd, table, &info); \
+ table->auto_increment_field_not_null= FALSE; \
+ if (err_) \
+ DBUG_RETURN(1); \
+ } while (0)
+
class READ_INFO: public Load_data_param
{
File file;
@@ -911,7 +934,7 @@ read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
List_iterator_fast<Item> it(fields_vars);
Item *item;
TABLE *table= table_list->table;
- bool err, progress_reports;
+ bool progress_reports;
ulonglong counter, time_to_report_progress;
DBUG_ENTER("read_fixed_length");
@@ -1003,11 +1026,8 @@ read_fixed_length(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
}
WSREP_LOAD_DATA_SPLIT(thd, table, info);
- err= write_record(thd, table, &info);
- table->auto_increment_field_not_null= FALSE;
- if (err)
- DBUG_RETURN(1);
-
+ WRITE_RECORD(thd, table, info);
+
/*
We don't need to reset auto-increment field since we are restoring
its default value at the beginning of each loop iteration.
@@ -1040,7 +1060,7 @@ read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
Item *item;
TABLE *table= table_list->table;
uint enclosed_length;
- bool err, progress_reports;
+ bool progress_reports;
ulonglong counter, time_to_report_progress;
DBUG_ENTER("read_sep_field");
@@ -1124,7 +1144,7 @@ read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
{
Load_data_outvar *dst= item->get_load_data_outvar_or_error();
DBUG_ASSERT(dst);
- if (dst->load_data_set_no_data(thd, &read_info))
+ if (unlikely(dst->load_data_set_no_data(thd, &read_info)))
DBUG_RETURN(1);
}
}
@@ -1146,10 +1166,8 @@ read_sep_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
}
WSREP_LOAD_DATA_SPLIT(thd, table, info);
- err= write_record(thd, table, &info);
- table->auto_increment_field_not_null= FALSE;
- if (err)
- DBUG_RETURN(1);
+ WRITE_RECORD(thd, table, info);
+
/*
We don't need to reset auto-increment field since we are restoring
its default value at the beginning of each loop iteration.
@@ -1267,13 +1285,10 @@ read_xml_field(THD *thd, COPY_INFO &info, TABLE_LIST *table_list,
case VIEW_CHECK_ERROR:
DBUG_RETURN(-1);
}
-
+
WSREP_LOAD_DATA_SPLIT(thd, table, info);
- err= write_record(thd, table, &info);
- table->auto_increment_field_not_null= false;
- if (err)
- DBUG_RETURN(1);
-
+ WRITE_RECORD(thd, table, info);
+
/*
We don't need to reset auto-increment field since we are restoring
its default value at the beginning of each loop iteration.
diff --git a/sql/sql_plugin_services.ic b/sql/sql_plugin_services.ic
index 3d6cf0a0723..e7de45b5ee2 100644
--- a/sql/sql_plugin_services.ic
+++ b/sql/sql_plugin_services.ic
@@ -178,6 +178,8 @@ static struct wsrep_service_st wsrep_handler = {
wsrep_thd_trx_seqno,
wsrep_thd_ws_handle,
wsrep_thd_auto_increment_variables,
+ wsrep_set_load_multi_commit,
+ wsrep_is_load_multi_commit,
wsrep_trx_is_aborting,
wsrep_trx_order_before,
wsrep_unlock_rollback,
diff --git a/sql/wsrep_dummy.cc b/sql/wsrep_dummy.cc
index 7297dbfe0fd..aff75cf7790 100644
--- a/sql/wsrep_dummy.cc
+++ b/sql/wsrep_dummy.cc
@@ -133,6 +133,12 @@ void wsrep_thd_auto_increment_variables(THD *thd,
*increment= thd->variables.auto_increment_increment;
}
+void wsrep_set_load_multi_commit(THD *thd, bool split)
+{ }
+
+bool wsrep_is_load_multi_commit(THD *thd)
+{ return false; }
+
int wsrep_trx_is_aborting(THD *)
{ return 0; }
diff --git a/sql/wsrep_hton.cc b/sql/wsrep_hton.cc
index e3da6a79f26..3603e05fd5f 100644
--- a/sql/wsrep_hton.cc
+++ b/sql/wsrep_hton.cc
@@ -45,6 +45,7 @@ void wsrep_cleanup_transaction(THD *thd)
thd->wsrep_exec_mode= LOCAL_STATE;
thd->wsrep_affected_rows= 0;
thd->wsrep_skip_wsrep_GTID= false;
+ thd->wsrep_split_flag= false;
return;
}
diff --git a/sql/wsrep_thd.cc b/sql/wsrep_thd.cc
index dab9f91b381..00afbec290e 100644
--- a/sql/wsrep_thd.cc
+++ b/sql/wsrep_thd.cc
@@ -708,3 +708,13 @@ my_bool wsrep_thd_is_applier(MYSQL_THD thd)
return (is_applier);
}
+
+void wsrep_set_load_multi_commit(THD *thd, bool split)
+{
+ thd->wsrep_split_flag= split;
+}
+
+bool wsrep_is_load_multi_commit(THD *thd)
+{
+ return thd->wsrep_split_flag;
+}